From e0f1fecc338d481833c200e63c3dd3eb60d1ecdc Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 25 Oct 2023 08:29:41 +0200 Subject: [PATCH 001/119] [oct23av] rerun 78 tput alltees, all ok (itscrd90 Silver4216 el9 VM, after hard reboot including downfall mitigation) New performance baseline (move from Alma8/itscrd80 to Alma9/itscrd90 on Silver4216+V100 and include downfall mitigation) Using codebases as of commit bd255c01fb1cf5377de344c42089765756fd75e1 (Wed Aug 16 15:05:27 2023 +0200) STARTED AT Wed Oct 25 06:25:59 PM CEST 2023 ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean ENDED(1) AT Wed Oct 25 06:50:29 PM CEST 2023 [Status=0] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Wed Oct 25 06:59:19 PM CEST 2023 [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Wed Oct 25 07:08:28 PM CEST 2023 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Wed Oct 25 07:11:30 PM CEST 2023 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ENDED(5) AT Wed Oct 25 07:14:30 PM CEST 2023 [Status=0] [avalassi@itscrd90 gcc11/usr] /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp> cat /etc/redhat-release AlmaLinux release 9.2 (Turquoise Kodkod) [avalassi@itscrd90 gcc11/usr] /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp> grep 'stepping\|model\|microcode' /proc/cpuinfo | sort -u microcode : 0x5003604 model : 85 model name : Intel(R) Xeon(R) Silver 4216 CPU @ 2.10GHz stepping : 7 --- .../log_eemumu_mad_d_inl0_hrd0.txt | 100 ++++++------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 100 ++++++------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 100 ++++++------- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 100 ++++++------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 100 ++++++------- .../log_eemumu_mad_d_inl0_hrd1.txt | 100 ++++++------- .../log_eemumu_mad_d_inl1_hrd0.txt | 100 ++++++------- .../log_eemumu_mad_d_inl1_hrd1.txt | 100 ++++++------- .../log_eemumu_mad_f_inl0_hrd0.txt | 104 +++++++------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 106 +++++++------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 104 +++++++------- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 104 +++++++------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 106 +++++++------- .../log_eemumu_mad_f_inl0_hrd1.txt | 104 +++++++------- .../log_eemumu_mad_f_inl1_hrd0.txt | 104 +++++++------- .../log_eemumu_mad_f_inl1_hrd1.txt | 104 +++++++------- .../log_eemumu_mad_m_inl0_hrd0.txt | 100 ++++++------- .../log_eemumu_mad_m_inl0_hrd1.txt | 100 ++++++------- .../log_ggtt_mad_d_inl0_hrd0.txt | 102 +++++++------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 102 +++++++------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 102 +++++++------- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 102 +++++++------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 102 +++++++------- .../log_ggtt_mad_d_inl0_hrd1.txt | 100 ++++++------- .../log_ggtt_mad_d_inl1_hrd0.txt | 102 +++++++------- .../log_ggtt_mad_d_inl1_hrd1.txt | 104 +++++++------- .../log_ggtt_mad_f_inl0_hrd0.txt | 106 +++++++------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 108 +++++++------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 106 +++++++------- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 106 +++++++------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 108 +++++++------- .../log_ggtt_mad_f_inl0_hrd1.txt | 104 +++++++------- .../log_ggtt_mad_f_inl1_hrd0.txt | 104 +++++++------- .../log_ggtt_mad_f_inl1_hrd1.txt | 108 +++++++------- .../log_ggtt_mad_m_inl0_hrd0.txt | 102 +++++++------- .../log_ggtt_mad_m_inl0_hrd1.txt | 104 +++++++------- .../log_ggttg_mad_d_inl0_hrd0.txt | 118 ++++++++-------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 118 ++++++++-------- .../log_ggttg_mad_d_inl0_hrd1.txt | 120 ++++++++-------- .../log_ggttg_mad_f_inl0_hrd0.txt | 124 ++++++++-------- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 126 ++++++++--------- .../log_ggttg_mad_f_inl0_hrd1.txt | 124 ++++++++-------- .../log_ggttg_mad_m_inl0_hrd0.txt | 124 ++++++++-------- .../log_ggttg_mad_m_inl0_hrd1.txt | 124 ++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 120 ++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 120 ++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 120 ++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 120 ++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 120 ++++++++-------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 126 ++++++++--------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 122 ++++++++-------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 124 ++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 124 ++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 126 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 124 ++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 124 ++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 126 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 130 ++++++++--------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 122 ++++++++-------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 122 ++++++++-------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 124 ++++++++-------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 126 ++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 120 ++++++++-------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 120 ++++++++-------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 122 ++++++++-------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 128 ++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 128 ++++++++--------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 132 +++++++++--------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 120 ++++++++-------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 120 ++++++++-------- .../log_gqttq_mad_d_inl0_hrd0.txt | 118 ++++++++-------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 118 ++++++++-------- .../log_gqttq_mad_d_inl0_hrd1.txt | 116 +++++++-------- .../log_gqttq_mad_f_inl0_hrd0.txt | 122 ++++++++-------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 124 ++++++++-------- .../log_gqttq_mad_f_inl0_hrd1.txt | 122 ++++++++-------- .../log_gqttq_mad_m_inl0_hrd0.txt | 120 ++++++++-------- .../log_gqttq_mad_m_inl0_hrd1.txt | 118 ++++++++-------- 78 files changed, 4427 insertions(+), 4427 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index a87822d822..fb3e759147 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_07:54:35 +DATE: 2023-10-25_18:31:49 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.672193e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.550427e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.701539e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.992610e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.677560e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.800497e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.694267 sec - 2,657,264,310 cycles # 2.858 GHz - 3,765,362,313 instructions # 1.42 insn per cycle - 1.008172040 seconds time elapsed +TOTAL : 0.651038 sec + 2,636,526,277 cycles # 3.033 GHz + 4,084,504,000 instructions # 1.55 insn per cycle + 0.937514788 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.201139e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.486063e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.486063e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.223824e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.458038e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.458038e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.637064 sec - 17,374,480,524 cycles # 3.081 GHz - 41,066,855,128 instructions # 2.36 insn per cycle - 5.643507902 seconds time elapsed +TOTAL : 5.535744 sec + 17,174,922,423 cycles # 3.101 GHz + 40,422,775,862 instructions # 2.35 insn per cycle + 5.540757574 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.072551e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.176931e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.176931e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.137087e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.061371e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.061371e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.433727 sec - 10,651,941,944 cycles # 3.099 GHz - 25,327,627,259 instructions # 2.38 insn per cycle - 3.446001784 seconds time elapsed +TOTAL : 3.338481 sec + 10,270,021,527 cycles # 3.072 GHz + 24,681,672,230 instructions # 2.40 insn per cycle + 3.343524574 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.915398e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.790165e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.790165e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.319309e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.049977e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.049977e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.559896 sec - 7,470,114,400 cycles # 2.913 GHz - 14,323,698,348 instructions # 1.92 insn per cycle - 2.572490985 seconds time elapsed +TOTAL : 2.283025 sec + 6,897,531,665 cycles # 3.016 GHz + 13,676,914,709 instructions # 1.98 insn per cycle + 2.287967204 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.074566e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.307252e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.307252e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.461379e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.455419e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.455419e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.437591 sec - 7,267,006,379 cycles # 2.975 GHz - 14,030,605,607 instructions # 1.93 insn per cycle - 2.449534359 seconds time elapsed +TOTAL : 2.201231 sec + 6,644,736,732 cycles # 3.013 GHz + 13,369,268,411 instructions # 2.01 insn per cycle + 2.206080825 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.914292e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.670582e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.670582e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.225436e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.708340e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.708340e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.557249 sec - 6,520,525,266 cycles # 2.545 GHz - 10,814,200,252 instructions # 1.66 insn per cycle - 2.569336783 seconds time elapsed +TOTAL : 2.342191 sec + 5,886,532,421 cycles # 2.509 GHz + 10,160,262,547 instructions # 1.73 insn per cycle + 2.347341313 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 791e95c3e1..ff2ab6ab12 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -36,26 +36,26 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:27:14 +DATE: 2023-10-25_19:02:17 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.169671e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.772595e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.772595e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.965333e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.255447e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.255447e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.412440 sec - 8,085,879,441 cycles # 3.031 GHz - 13,804,456,818 instructions # 1.71 insn per cycle - 2.727864220 seconds time elapsed +TOTAL : 2.135108 sec + 7,250,256,577 cycles # 3.055 GHz + 13,026,966,701 instructions # 1.80 insn per cycle + 2.429348584 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -73,19 +73,19 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.145466e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.404196e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.404196e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.180043e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.393531e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.393531e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.106495 sec - 18,655,748,758 cycles # 3.053 GHz - 41,377,014,729 instructions # 2.22 insn per cycle - 6.113887499 seconds time elapsed +TOTAL : 5.930110 sec + 18,404,007,216 cycles # 3.102 GHz + 40,649,787,986 instructions # 2.21 insn per cycle + 5.936245491 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -100,19 +100,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.930164e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.865257e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.865257e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.001560e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.785736e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.785736e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.856414 sec - 11,884,784,674 cycles # 3.077 GHz - 26,175,543,746 instructions # 2.20 insn per cycle - 3.870932619 seconds time elapsed +TOTAL : 3.743997 sec + 11,597,299,620 cycles # 3.094 GHz + 25,525,941,371 instructions # 2.20 insn per cycle + 3.750076018 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -127,19 +127,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.705471e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.966789e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.966789e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.959223e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.997724e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.997724e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.932639 sec - 8,729,428,772 cycles # 2.970 GHz - 15,688,784,706 instructions # 1.80 insn per cycle - 2.946907461 seconds time elapsed +TOTAL : 2.737253 sec + 8,251,537,282 cycles # 3.009 GHz + 15,038,208,979 instructions # 1.82 insn per cycle + 2.743376591 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -154,19 +154,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.816691e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.292126e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.292126e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.076188e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.294642e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.294642e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.837926 sec - 8,512,441,591 cycles # 2.996 GHz - 15,397,705,654 instructions # 1.81 insn per cycle - 2.854571954 seconds time elapsed +TOTAL : 2.656399 sec + 7,925,615,204 cycles # 2.978 GHz + 14,731,067,513 instructions # 1.86 insn per cycle + 2.662615813 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -181,19 +181,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.572890e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.618725e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.618725e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.899896e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.732839e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.732839e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.067289 sec - 7,897,531,929 cycles # 2.570 GHz - 11,965,513,983 instructions # 1.52 insn per cycle - 3.083738006 seconds time elapsed +TOTAL : 2.778907 sec + 7,285,067,282 cycles # 2.617 GHz + 11,305,402,811 instructions # 1.55 insn per cycle + 2.785056032 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index f7b9c99682..ee209006c3 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:40:19 +DATE: 2023-10-25_19:14:42 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.908839e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.278507e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.626738e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.734802e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.548595e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.695477e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.338283 sec - 4,710,085,675 cycles # 2.990 GHz - 7,055,082,394 instructions # 1.50 insn per cycle - 1.632987098 seconds time elapsed +TOTAL : 1.288103 sec + 4,585,947,033 cycles # 3.031 GHz + 7,017,628,406 instructions # 1.53 insn per cycle + 1.570944429 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.202837e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.487646e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.487646e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.218596e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.449391e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.449391e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.982127 sec - 18,484,607,427 cycles # 3.088 GHz - 41,193,413,905 instructions # 2.23 insn per cycle - 5.988145512 seconds time elapsed +TOTAL : 5.906919 sec + 18,244,110,475 cycles # 3.087 GHz + 40,525,216,964 instructions # 2.22 insn per cycle + 5.912008529 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.062982e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.164129e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.164129e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.094791e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.999575e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.999575e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.793432 sec - 11,774,969,685 cycles # 3.102 GHz - 25,353,299,382 instructions # 2.15 insn per cycle - 3.799461430 seconds time elapsed +TOTAL : 3.757966 sec + 11,362,621,772 cycles # 3.020 GHz + 24,684,545,006 instructions # 2.17 insn per cycle + 3.762920389 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.964788e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.886297e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.886297e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.221039e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.851480e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.851480e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.869115 sec - 8,608,684,116 cycles # 2.996 GHz - 14,248,463,131 instructions # 1.66 insn per cycle - 2.887780495 seconds time elapsed +TOTAL : 2.709730 sec + 7,962,642,894 cycles # 2.935 GHz + 13,579,072,475 instructions # 1.71 insn per cycle + 2.714759310 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.096822e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.314817e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.314817e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.422533e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.356319e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.356319e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.784058 sec - 8,355,473,574 cycles # 2.997 GHz - 13,754,388,286 instructions # 1.65 insn per cycle - 2.789826895 seconds time elapsed +TOTAL : 2.582578 sec + 7,748,421,467 cycles # 2.996 GHz + 13,080,984,196 instructions # 1.69 insn per cycle + 2.587647023 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.901063e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.664325e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.664325e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.234611e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.715248e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.715248e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.921492 sec - 7,710,551,616 cycles # 2.635 GHz - 10,537,417,253 instructions # 1.37 insn per cycle - 2.936780421 seconds time elapsed +TOTAL : 2.694072 sec + 7,028,142,015 cycles # 2.605 GHz + 9,860,263,834 instructions # 1.40 insn per cycle + 2.699027311 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 2f4b273270..8ad22bdaab 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:37:08 +DATE: 2023-10-25_19:11:43 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.942692e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.330516e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.705460e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.751984e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.562686e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.712190e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.977802 sec - 3,622,628,001 cycles # 2.987 GHz - 6,887,030,840 instructions # 1.90 insn per cycle - 1.270674651 seconds time elapsed +TOTAL : 0.934603 sec + 3,531,370,849 cycles # 3.027 GHz + 7,056,706,138 instructions # 2.00 insn per cycle + 1.223131814 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.208005e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.495038e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.495038e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.229006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.461428e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.461428e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.605516 sec - 17,363,648,733 cycles # 3.095 GHz - 41,066,644,485 instructions # 2.37 insn per cycle - 5.612220833 seconds time elapsed +TOTAL : 5.514347 sec + 17,134,759,816 cycles # 3.105 GHz + 40,421,384,674 instructions # 2.36 insn per cycle + 5.519235575 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.056139e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.158689e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.158689e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.136550e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.078809e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.078809e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.459827 sec - 10,651,024,283 cycles # 3.074 GHz - 25,327,710,293 instructions # 2.38 insn per cycle - 3.471243867 seconds time elapsed +TOTAL : 3.338536 sec + 10,358,915,958 cycles # 3.099 GHz + 24,681,209,780 instructions # 2.38 insn per cycle + 3.343472671 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.979620e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.939152e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.939152e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.254346e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.883713e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.883713e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.502516 sec - 7,522,714,318 cycles # 3.000 GHz - 14,323,295,499 instructions # 1.90 insn per cycle - 2.508864166 seconds time elapsed +TOTAL : 2.325759 sec + 6,909,272,329 cycles # 2.965 GHz + 13,676,492,702 instructions # 1.98 insn per cycle + 2.330695390 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.103932e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.359476e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.359476e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.418862e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.369541e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.369541e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.417909 sec - 7,244,725,933 cycles # 2.990 GHz - 14,030,681,101 instructions # 1.94 insn per cycle - 2.429759330 seconds time elapsed +TOTAL : 2.228021 sec + 6,650,960,186 cycles # 2.980 GHz + 13,380,296,124 instructions # 2.01 insn per cycle + 2.233066593 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.921840e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.718079e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.718079e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.184006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.596559e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.596559e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.547151 sec - 6,577,388,785 cycles # 2.577 GHz - 10,813,767,193 instructions # 1.64 insn per cycle - 2.559250592 seconds time elapsed +TOTAL : 2.371480 sec + 5,899,234,125 cycles # 2.483 GHz + 10,159,638,956 instructions # 1.72 insn per cycle + 2.376624580 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 01ad82e89d..683b401a34 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -36,23 +36,23 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:33:54 +DATE: 2023-10-25_19:08:41 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.246182e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.255428e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.587144e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.450047e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.536531e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.695667e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.052115 sec - 6,878,654,346 cycles # 3.005 GHz - 11,917,245,523 instructions # 1.73 insn per cycle - 2.346890943 seconds time elapsed +TOTAL : 1.791948 sec + 6,196,047,830 cycles # 3.061 GHz + 11,389,938,256 instructions # 1.84 insn per cycle + 2.082150692 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 @@ -66,19 +66,19 @@ Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.199648e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.484757e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.484757e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.225914e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.460719e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.460719e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.644079 sec - 17,374,294,583 cycles # 3.078 GHz - 41,067,072,666 instructions # 2.36 insn per cycle - 5.650187028 seconds time elapsed +TOTAL : 5.523506 sec + 17,169,419,080 cycles # 3.106 GHz + 40,421,624,196 instructions # 2.35 insn per cycle + 5.528429327 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -92,19 +92,19 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.057242e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.159791e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.159791e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.159688e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.087862e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.087862e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.458639 sec - 10,663,165,741 cycles # 3.080 GHz - 25,328,035,686 instructions # 2.38 insn per cycle - 3.470060797 seconds time elapsed +TOTAL : 3.303087 sec + 10,283,099,650 cycles # 3.109 GHz + 24,681,354,623 instructions # 2.40 insn per cycle + 3.308198240 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -118,19 +118,19 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.976299e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.977860e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.977860e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.299969e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.994161e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.994161e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.506470 sec - 7,495,523,027 cycles # 2.985 GHz - 14,323,185,828 instructions # 1.91 insn per cycle - 2.521547342 seconds time elapsed +TOTAL : 2.296796 sec + 6,872,706,594 cycles # 2.987 GHz + 13,676,412,365 instructions # 1.99 insn per cycle + 2.301612504 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -144,19 +144,19 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.096324e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.377515e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.377515e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.398777e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.314475e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.314475e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.422127 sec - 7,261,731,842 cycles # 2.995 GHz - 14,029,800,668 instructions # 1.93 insn per cycle - 2.428445118 seconds time elapsed +TOTAL : 2.238868 sec + 6,648,921,003 cycles # 2.964 GHz + 13,380,372,785 instructions # 2.01 insn per cycle + 2.243705065 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -170,19 +170,19 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.935302e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.737567e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.737567e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.257611e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.762023e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.762023e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.538004 sec - 6,512,072,673 cycles # 2.561 GHz - 10,813,676,057 instructions # 1.66 insn per cycle - 2.544311743 seconds time elapsed +TOTAL : 2.324047 sec + 5,897,358,226 cycles # 2.533 GHz + 10,159,451,394 instructions # 1.72 insn per cycle + 2.329040547 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 0e3156c822..8f12496d4e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_07:55:07 +DATE: 2023-10-25_18:32:17 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.797502e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.304738e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.068661e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.135499e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.480702e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.088764e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.668941 sec - 2,618,550,010 cycles # 2.895 GHz - 3,725,914,034 instructions # 1.42 insn per cycle - 0.964586682 seconds time elapsed +TOTAL : 0.649915 sec + 2,521,027,262 cycles # 2.870 GHz + 3,932,562,496 instructions # 1.56 insn per cycle + 0.935284453 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 1.027708011645137e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.207948e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.493925e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.493925e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.221519e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.452240e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.452240e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.606225 sec - 17,352,611,992 cycles # 3.095 GHz - 41,015,581,318 instructions # 2.36 insn per cycle - 5.612273631 seconds time elapsed +TOTAL : 5.545860 sec + 17,128,162,100 cycles # 3.086 GHz + 40,370,576,437 instructions # 2.36 insn per cycle + 5.550872301 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 362) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.055642e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.147243e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.147243e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.147682e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.063332e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.063332e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.458988 sec - 10,650,896,510 cycles # 3.075 GHz - 25,289,274,202 instructions # 2.37 insn per cycle - 3.470313959 seconds time elapsed +TOTAL : 3.321610 sec + 10,270,541,691 cycles # 3.088 GHz + 24,643,021,754 instructions # 2.40 insn per cycle + 3.326647423 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.994887e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.987724e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.987724e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.302607e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.009317e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.009317e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.494936 sec - 7,463,237,722 cycles # 2.986 GHz - 14,297,586,390 instructions # 1.92 insn per cycle - 2.501385204 seconds time elapsed +TOTAL : 2.292563 sec + 6,895,121,758 cycles # 3.004 GHz + 13,651,253,610 instructions # 1.98 insn per cycle + 2.297509965 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1037) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.101945e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.402549e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.402549e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.466481e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.436410e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.436410e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.419494 sec - 7,255,354,650 cycles # 2.993 GHz - 14,017,249,829 instructions # 1.93 insn per cycle - 2.435526163 seconds time elapsed +TOTAL : 2.199117 sec + 6,634,520,360 cycles # 3.011 GHz + 13,355,581,160 instructions # 2.01 insn per cycle + 2.204141246 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 989) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.025481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.091522e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.091522e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.383490e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.139882e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.139882e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.474734 sec - 6,401,014,262 cycles # 2.581 GHz - 10,693,180,264 instructions # 1.67 insn per cycle - 2.491261488 seconds time elapsed +TOTAL : 2.249115 sec + 5,748,008,303 cycles # 2.551 GHz + 10,038,931,524 instructions # 1.75 insn per cycle + 2.254226968 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 246) (512y: 0) (512z: 663) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index a36b8c245f..bad17671a9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:16:26 +DATE: 2023-10-25_18:52:22 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.914166e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.327317e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.704399e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.866442e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.650091e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.818114e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.682445 sec - 2,681,111,270 cycles # 2.917 GHz - 3,835,048,245 instructions # 1.43 insn per cycle - 0.985161352 seconds time elapsed +TOTAL : 0.673872 sec + 2,565,505,221 cycles # 2.855 GHz + 3,938,465,714 instructions # 1.54 insn per cycle + 0.962945607 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.537359e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.293434e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.293434e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.788792e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.362239e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.362239e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.888959 sec - 8,793,923,135 cycles # 3.041 GHz - 18,106,828,171 instructions # 2.06 insn per cycle - 2.895668902 seconds time elapsed +TOTAL : 2.649101 sec + 8,210,295,285 cycles # 3.094 GHz + 17,459,406,832 instructions # 2.13 insn per cycle + 2.654141725 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 125) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.326429e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.963979e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.963979e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.776841e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.067159e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.067159e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.299886 sec - 6,945,957,949 cycles # 3.016 GHz - 13,421,286,049 instructions # 1.93 insn per cycle - 2.311641853 seconds time elapsed +TOTAL : 2.060741 sec + 6,367,782,565 cycles # 3.084 GHz + 12,773,139,369 instructions # 2.01 insn per cycle + 2.065923417 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 810) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.146868e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.354064e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.354064e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.730005e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.360397e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.360397e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.914407 sec - 5,714,721,147 cycles # 2.978 GHz - 10,018,011,296 instructions # 1.75 insn per cycle - 1.926003662 seconds time elapsed +TOTAL : 1.721074 sec + 5,169,510,873 cycles # 2.996 GHz + 9,371,577,717 instructions # 1.81 insn per cycle + 1.726231344 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 720) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.305174e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.522058e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.522058e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.006682e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.519447e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.519447e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.852893 sec - 5,605,274,427 cycles # 3.019 GHz - 9,879,619,284 instructions # 1.76 insn per cycle - 1.868960977 seconds time elapsed +TOTAL : 1.646907 sec + 4,985,951,497 cycles # 3.019 GHz + 9,229,216,123 instructions # 1.85 insn per cycle + 1.652062250 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 641) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.706880e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.000723e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.000723e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.264702e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.000270e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000270e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.095172 sec - 5,643,037,786 cycles # 2.689 GHz - 9,343,938,303 instructions # 1.66 insn per cycle - 2.107089803 seconds time elapsed +TOTAL : 1.871689 sec + 5,009,273,536 cycles # 2.672 GHz + 8,693,527,346 instructions # 1.74 insn per cycle + 1.876855811 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 200) (512y: 0) (512z: 276) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 55c6ae7bc3..e9aad49fe2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:16:50 +DATE: 2023-10-25_18:52:44 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.015497e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.041509e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.076611e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.999685e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.416020e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082814e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.671217 sec - 2,648,430,808 cycles # 2.914 GHz - 3,872,567,566 instructions # 1.46 insn per cycle - 0.968378046 seconds time elapsed +TOTAL : 0.643446 sec + 2,620,726,592 cycles # 3.012 GHz + 4,087,111,468 instructions # 1.56 insn per cycle + 0.931265549 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 1.027708011645137e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.170088e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.615486e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.615486e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.490278e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.520777e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.520777e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.384664 sec - 7,263,421,980 cycles # 3.040 GHz - 14,876,504,014 instructions # 2.05 insn per cycle - 2.391073812 seconds time elapsed +TOTAL : 2.208507 sec + 6,644,704,561 cycles # 3.003 GHz + 14,230,584,763 instructions # 2.14 insn per cycle + 2.213619219 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 122) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.987019e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.200512e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.200512e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.496209e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.203520e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.203520e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.975075 sec - 6,103,568,200 cycles # 3.084 GHz - 11,421,166,399 instructions # 1.87 insn per cycle - 1.981152289 seconds time elapsed +TOTAL : 1.793256 sec + 5,547,608,796 cycles # 3.086 GHz + 10,773,719,188 instructions # 1.94 insn per cycle + 1.798601488 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 610) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.378307e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.651197e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.651197e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.103748e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.641142e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.641142e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.835051 sec - 5,528,943,122 cycles # 3.004 GHz - 9,375,389,845 instructions # 1.70 insn per cycle - 1.865320631 seconds time elapsed +TOTAL : 1.621275 sec + 4,941,467,155 cycles # 3.041 GHz + 8,728,712,502 instructions # 1.77 insn per cycle + 1.626388139 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.607711e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.890314e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.890314e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.063835e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.792404e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.792404e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.756986 sec - 5,347,368,521 cycles # 3.036 GHz - 9,390,869,751 instructions # 1.76 insn per cycle - 1.768916861 seconds time elapsed +TOTAL : 1.652560 sec + 4,758,216,156 cycles # 2.885 GHz + 8,734,044,090 instructions # 1.84 insn per cycle + 1.657554337 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 519) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.859935e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.101667e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.101667e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.451379e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.103695e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103695e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.025711 sec - 5,554,713,597 cycles # 2.736 GHz - 9,060,598,111 instructions # 1.63 insn per cycle - 2.037110763 seconds time elapsed +TOTAL : 1.809541 sec + 4,867,548,247 cycles # 2.684 GHz + 8,406,231,727 instructions # 1.73 insn per cycle + 1.814605280 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 168) (512y: 0) (512z: 227) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index d1fccd5830..a96360aa15 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_07:55:37 +DATE: 2023-10-25_18:32:45 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.545574e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.429400e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.762898e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.622469e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.481172e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.826768e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.574402 sec - 2,306,684,617 cycles # 2.879 GHz - 3,317,349,009 instructions # 1.44 insn per cycle - 0.861431911 seconds time elapsed +TOTAL : 0.551831 sec + 2,329,387,313 cycles # 3.021 GHz + 3,650,775,745 instructions # 1.57 insn per cycle + 0.828584114 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,24 +59,24 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112026909366E-002 -Relative difference = 7.173898182689807e-06 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.265149e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.524518e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.524518e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.243845e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.497749e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.497749e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.331314 sec - 16,491,409,917 cycles # 3.093 GHz - 40,103,873,366 instructions # 2.43 insn per cycle - 5.337381440 seconds time elapsed +TOTAL : 5.410831 sec + 16,844,648,065 cycles # 3.111 GHz + 40,088,965,912 instructions # 2.38 insn per cycle + 5.415719530 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.255546e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.158473e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.158473e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.198454e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.967626e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.967626e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.289624 sec - 7,065,393,860 cycles # 3.080 GHz - 16,746,102,273 instructions # 2.37 insn per cycle - 2.295533984 seconds time elapsed +TOTAL : 2.324919 sec + 7,138,346,939 cycles # 3.065 GHz + 16,729,497,470 instructions # 2.34 insn per cycle + 2.329783883 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.577100e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.247291e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.247291e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.643489e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.234035e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234035e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.728483 sec - 5,246,476,226 cycles # 3.028 GHz - 10,646,051,029 instructions # 2.03 insn per cycle - 1.744312540 seconds time elapsed +TOTAL : 1.702583 sec + 5,155,065,818 cycles # 3.021 GHz + 10,628,955,239 instructions # 2.06 insn per cycle + 1.707446133 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.684671e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.304884e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.304884e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.817440e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.344869e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.344869e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.701889 sec - 5,084,915,133 cycles # 2.980 GHz - 10,499,674,438 instructions # 2.06 insn per cycle - 1.716700420 seconds time elapsed +TOTAL : 1.653471 sec + 5,044,234,982 cycles # 3.043 GHz + 10,475,715,128 instructions # 2.08 insn per cycle + 1.658543423 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.560201e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.172152e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.172152e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.608343e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175337e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.175337e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.731968 sec - 4,700,187,956 cycles # 2.707 GHz - 8,947,738,240 instructions # 1.90 insn per cycle - 1.744130504 seconds time elapsed +TOTAL : 1.712484 sec + 4,683,701,478 cycles # 2.728 GHz + 8,926,870,251 instructions # 1.91 insn per cycle + 1.717292179 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 777be3dddb..efa36acd38 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -36,9 +36,9 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:27:48 +DATE: 2023-10-25_19:02:51 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -52,17 +52,17 @@ WARNING! flagging abnormal ME for ievt=66427 WARNING! flagging abnormal ME for ievt=465318 WARNING! flagging abnormal ME for ievt=458848 WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=7, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.218476e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.056373e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.056373e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371709e-02 +- 3.270386e-06 ) GeV^0 -TOTAL : 1.698014 sec - 5,764,608,219 cycles # 2.984 GHz - 10,199,099,127 instructions # 1.77 insn per cycle - 1.992360425 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.717117e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.761186e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.761186e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371709e-02 +- 3.270385e-06 ) GeV^0 +TOTAL : 1.602340 sec + 5,595,842,121 cycles # 3.065 GHz + 10,162,817,469 instructions # 1.82 insn per cycle + 1.883301086 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -74,8 +74,8 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112026909366E-002 -Relative difference = 7.173898182689807e-06 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= @@ -86,19 +86,19 @@ WARNING! flagging abnormal ME for ievt=152898 WARNING! flagging abnormal ME for ievt=66427 WARNING! flagging abnormal ME for ievt=164749 WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.238322e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.485077e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.485077e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.194651e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.427134e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.427134e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.549150 sec - 17,156,844,123 cycles # 3.090 GHz - 40,268,372,978 instructions # 2.35 insn per cycle - 5.556229262 seconds time elapsed +TOTAL : 5.727507 sec + 17,468,478,153 cycles # 3.048 GHz + 40,238,549,213 instructions # 2.30 insn per cycle + 5.733108996 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -119,19 +119,19 @@ WARNING! flagging abnormal ME for ievt=152898 WARNING! flagging abnormal ME for ievt=66427 WARNING! flagging abnormal ME for ievt=164749 WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.021228e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.444863e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.444863e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.015918e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.363005e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.363005e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.567788 sec - 7,842,409,842 cycles # 3.048 GHz - 18,081,290,969 instructions # 2.31 insn per cycle - 2.581763386 seconds time elapsed +TOTAL : 2.562966 sec + 7,929,821,727 cycles # 3.088 GHz + 18,064,430,946 instructions # 2.28 insn per cycle + 2.568572939 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -150,19 +150,19 @@ WARNING! flagging abnormal ME for ievt=53874 WARNING! flagging abnormal ME for ievt=66427 WARNING! flagging abnormal ME for ievt=164749 WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.215444e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.001558e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.001558e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.272473e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.009652e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.009652e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.972841 sec - 5,968,906,543 cycles # 3.019 GHz - 11,766,354,244 instructions # 1.97 insn per cycle - 1.989154993 seconds time elapsed +TOTAL : 1.944574 sec + 5,929,261,497 cycles # 3.042 GHz + 11,749,715,523 instructions # 1.98 insn per cycle + 1.950245306 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -181,19 +181,19 @@ WARNING! flagging abnormal ME for ievt=53874 WARNING! flagging abnormal ME for ievt=66427 WARNING! flagging abnormal ME for ievt=164749 WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.331823e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.057903e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.057903e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.364659e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.078675e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078675e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.929002 sec - 5,852,668,636 cycles # 3.024 GHz - 11,619,769,746 instructions # 1.99 insn per cycle - 1.946679853 seconds time elapsed +TOTAL : 1.914043 sec + 5,843,415,161 cycles # 3.045 GHz + 11,595,784,393 instructions # 1.98 insn per cycle + 1.919813375 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -212,19 +212,19 @@ WARNING! flagging abnormal ME for ievt=53874 WARNING! flagging abnormal ME for ievt=66427 WARNING! flagging abnormal ME for ievt=164749 WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.096575e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.324841e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.324841e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.130829e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.360401e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.360401e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.014606 sec - 5,539,109,033 cycles # 2.743 GHz - 10,154,997,244 instructions # 1.83 insn per cycle - 2.031462529 seconds time elapsed +TOTAL : 2.001099 sec + 5,491,473,730 cycles # 2.738 GHz + 10,134,991,267 instructions # 1.85 insn per cycle + 2.006879900 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index a8859eaa23..3649e05b61 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:40:51 +DATE: 2023-10-25_19:15:13 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.388130e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.322176e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.699521e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.566300e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.421516e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.742992e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.171138 sec - 4,175,234,437 cycles # 2.980 GHz - 6,374,434,859 instructions # 1.53 insn per cycle - 1.458340956 seconds time elapsed +TOTAL : 1.134749 sec + 4,098,993,410 cycles # 3.029 GHz + 6,605,372,981 instructions # 1.61 insn per cycle + 1.409722045 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,24 +59,24 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112026909366E-002 -Relative difference = 7.173898182689807e-06 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.254138e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.512198e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.512198e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.244098e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.493114e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.493114e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 5.696069 sec - 17,480,760,791 cycles # 3.066 GHz - 40,290,171,415 instructions # 2.30 insn per cycle - 5.702403171 seconds time elapsed +TOTAL : 5.726200 sec + 17,811,812,788 cycles # 3.109 GHz + 40,270,712,628 instructions # 2.26 insn per cycle + 5.730949174 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.227003e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.133484e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.133484e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.215182e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.000060e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.000060e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 2.620604 sec - 8,082,957,151 cycles # 3.079 GHz - 16,832,196,831 instructions # 2.08 insn per cycle - 2.632153852 seconds time elapsed +TOTAL : 2.623227 sec + 8,152,502,492 cycles # 3.103 GHz + 16,810,279,631 instructions # 2.06 insn per cycle + 2.628033716 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.550214e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.231450e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.231450e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.607786e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.225348e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.225348e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 2.049014 sec - 6,214,591,019 cycles # 3.026 GHz - 10,561,469,244 instructions # 1.70 insn per cycle - 2.061054298 seconds time elapsed +TOTAL : 2.027947 sec + 6,177,720,088 cycles # 3.041 GHz + 10,540,553,410 instructions # 1.71 insn per cycle + 2.032791206 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.661615e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.311531e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.311531e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.739337e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.329843e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329843e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 2.026023 sec - 6,148,544,502 cycles # 3.028 GHz - 10,213,907,636 instructions # 1.66 insn per cycle - 2.031850914 seconds time elapsed +TOTAL : 1.998605 sec + 6,104,707,264 cycles # 3.049 GHz + 10,185,815,617 instructions # 1.67 insn per cycle + 2.003437292 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.520391e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.158289e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.158289e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.461301e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.134325e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.134325e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 -TOTAL : 2.066306 sec - 5,722,268,726 cycles # 2.765 GHz - 8,658,387,445 instructions # 1.51 insn per cycle - 2.072030119 seconds time elapsed +TOTAL : 2.092643 sec + 5,695,345,035 cycles # 2.717 GHz + 8,637,352,141 instructions # 1.52 insn per cycle + 2.097505728 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 08f9d90beb..482f335b2b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:37:38 +DATE: 2023-10-25_19:12:11 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.397106e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.350042e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.750966e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.572199e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.443581e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.789713e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.856969 sec - 3,231,922,783 cycles # 2.982 GHz - 6,241,038,070 instructions # 1.93 insn per cycle - 1.141635259 seconds time elapsed +TOTAL : 0.852228 sec + 3,102,837,181 cycles # 2.900 GHz + 6,399,466,575 instructions # 2.06 insn per cycle + 1.127081538 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,24 +59,24 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112026909366E-002 -Relative difference = 7.173898182689807e-06 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.266454e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.526174e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.526174e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.242033e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.490396e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.490396e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.327368 sec - 16,480,103,424 cycles # 3.091 GHz - 40,103,730,146 instructions # 2.43 insn per cycle - 5.333346765 seconds time elapsed +TOTAL : 5.420944 sec + 16,812,615,332 cycles # 3.099 GHz + 40,088,432,090 instructions # 2.38 insn per cycle + 5.425745402 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.240289e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.142776e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.142776e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.220212e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.000588e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.000588e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.307276 sec - 7,096,546,849 cycles # 3.073 GHz - 16,746,922,642 instructions # 2.36 insn per cycle - 2.319547037 seconds time elapsed +TOTAL : 2.308004 sec + 7,155,170,314 cycles # 3.095 GHz + 16,729,446,787 instructions # 2.34 insn per cycle + 2.312771061 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.581291e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.238177e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.238177e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.640154e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.225902e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.225902e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.729502 sec - 5,212,976,460 cycles # 3.005 GHz - 10,646,012,351 instructions # 2.04 insn per cycle - 1.735817439 seconds time elapsed +TOTAL : 1.703655 sec + 5,164,709,474 cycles # 3.025 GHz + 10,629,819,542 instructions # 2.06 insn per cycle + 1.708526413 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.726597e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.323769e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.323769e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.770352e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.326396e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326396e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.684285 sec - 5,114,786,489 cycles # 3.029 GHz - 10,499,635,148 instructions # 2.05 insn per cycle - 1.695702980 seconds time elapsed +TOTAL : 1.669695 sec + 5,072,425,715 cycles # 3.031 GHz + 10,481,476,531 instructions # 2.07 insn per cycle + 1.674496390 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.552900e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166453e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.166453e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.608761e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188893e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188893e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.734671 sec - 4,714,487,833 cycles # 2.710 GHz - 8,947,783,662 instructions # 1.90 insn per cycle - 1.740864411 seconds time elapsed +TOTAL : 1.713521 sec + 4,682,934,639 cycles # 2.727 GHz + 8,926,936,750 instructions # 1.91 insn per cycle + 1.718311206 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index 32be1777db..efc5436b49 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -36,23 +36,23 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:34:26 +DATE: 2023-10-25_19:09:10 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=7, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.059394e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.303614e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.586022e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371709e-02 +- 3.270386e-06 ) GeV^0 -TOTAL : 1.557569 sec - 5,330,102,791 cycles # 2.986 GHz - 9,105,022,833 instructions # 1.71 insn per cycle - 1.843537042 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 9.218117e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.394390e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.636278e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371709e-02 +- 3.270385e-06 ) GeV^0 +TOTAL : 1.461263 sec + 4,982,734,203 cycles # 2.962 GHz + 9,103,785,620 instructions # 1.83 insn per cycle + 1.744448034 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 @@ -61,24 +61,24 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112026909366E-002 -Relative difference = 7.173898182689807e-06 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.246233e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.501484e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.501484e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.245708e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.494542e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.494542e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.409994 sec - 16,491,394,473 cycles # 3.046 GHz - 40,104,848,105 instructions # 2.43 insn per cycle - 5.415867559 seconds time elapsed +TOTAL : 5.404955 sec + 16,819,106,248 cycles # 3.110 GHz + 40,088,802,052 instructions # 2.38 insn per cycle + 5.409792938 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -92,19 +92,19 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.248425e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.165572e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.165572e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.222846e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.010307e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.010307e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.296217 sec - 7,073,167,410 cycles # 3.077 GHz - 16,744,469,995 instructions # 2.37 insn per cycle - 2.301756332 seconds time elapsed +TOTAL : 2.309298 sec + 7,155,727,700 cycles # 3.093 GHz + 16,729,709,933 instructions # 2.34 insn per cycle + 2.314175580 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -118,19 +118,19 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.567663e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.238042e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.238042e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.653137e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.236145e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236145e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.731450 sec - 5,235,103,499 cycles # 3.017 GHz - 10,646,278,392 instructions # 2.03 insn per cycle - 1.746601624 seconds time elapsed +TOTAL : 1.698626 sec + 5,157,602,141 cycles # 3.030 GHz + 10,629,000,968 instructions # 2.06 insn per cycle + 1.703452950 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -144,19 +144,19 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.721614e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.333374e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.333374e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.773001e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.326358e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326358e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.686175 sec - 5,126,659,727 cycles # 3.034 GHz - 10,499,853,434 instructions # 2.05 insn per cycle - 1.692340474 seconds time elapsed +TOTAL : 1.667901 sec + 5,061,515,411 cycles # 3.028 GHz + 10,480,815,680 instructions # 2.07 insn per cycle + 1.672594775 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -170,19 +170,19 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.567673e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.172234e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.172234e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.555200e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166909e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.166909e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.728805 sec - 4,709,895,789 cycles # 2.717 GHz - 8,947,780,732 instructions # 1.90 insn per cycle - 1.740285725 seconds time elapsed +TOTAL : 1.731879 sec + 4,689,690,631 cycles # 2.701 GHz + 8,927,107,008 instructions # 1.90 insn per cycle + 1.736672094 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 77eb4b6776..5dbfdd3213 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_07:56:03 +DATE: 2023-10-25_18:33:09 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.549024e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.454205e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.867590e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.625913e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.503566e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.918133e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.573247 sec - 2,306,223,093 cycles # 2.885 GHz - 3,304,353,770 instructions # 1.43 insn per cycle - 0.859595196 seconds time elapsed +TOTAL : 0.552329 sec + 2,328,888,712 cycles # 3.020 GHz + 3,648,506,478 instructions # 1.57 insn per cycle + 0.830116688 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 80 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,24 +59,24 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112026909366E-002 -Relative difference = 7.173898182689807e-06 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.261287e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.517761e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.517761e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.244101e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.497364e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.497364e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.347587 sec - 16,519,340,323 cycles # 3.087 GHz - 40,052,333,384 instructions # 2.42 insn per cycle - 5.353348824 seconds time elapsed +TOTAL : 5.479428 sec + 17,033,310,610 cycles # 3.106 GHz + 40,038,122,508 instructions # 2.35 insn per cycle + 5.484429062 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 347) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.258683e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.177997e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.177997e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.023210e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.906828e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.906828e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.291784 sec - 7,066,394,987 cycles # 3.077 GHz - 16,669,891,798 instructions # 2.36 insn per cycle - 2.303456803 seconds time elapsed +TOTAL : 2.441591 sec + 7,417,759,581 cycles # 3.033 GHz + 16,653,923,334 instructions # 2.25 insn per cycle + 2.446386116 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1335) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.586500e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.234895e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.234895e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.697456e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.249241e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.249241e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.721421 sec - 5,214,494,623 cycles # 3.021 GHz - 10,632,672,125 instructions # 2.04 insn per cycle - 1.732750457 seconds time elapsed +TOTAL : 1.685112 sec + 5,138,959,489 cycles # 3.042 GHz + 10,615,393,712 instructions # 2.07 insn per cycle + 1.690001992 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1092) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.718916e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.326614e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326614e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.752164e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.307340e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.307340e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.686615 sec - 5,108,977,210 cycles # 3.020 GHz - 10,490,613,336 instructions # 2.05 insn per cycle - 1.698547089 seconds time elapsed +TOTAL : 1.671544 sec + 5,088,363,074 cycles # 3.037 GHz + 10,468,790,591 instructions # 2.06 insn per cycle + 1.676598026 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1044) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.651051e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.268382e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.268382e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.538235e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.204282e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204282e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.709054 sec - 4,687,414,523 cycles # 2.736 GHz - 8,876,964,748 instructions # 1.89 insn per cycle - 1.715057097 seconds time elapsed +TOTAL : 1.747955 sec + 4,622,637,035 cycles # 2.639 GHz + 8,857,108,339 instructions # 1.92 insn per cycle + 1.752985428 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 312) (512y: 0) (512z: 678) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 9bd4d8b42a..7778235778 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:17:14 +DATE: 2023-10-25_18:53:05 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.386329e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.343893e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.734394e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.591002e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.452620e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.766126e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.573807 sec - 2,338,736,868 cycles # 2.911 GHz - 3,361,616,652 instructions # 1.44 insn per cycle - 0.860710088 seconds time elapsed +TOTAL : 0.556490 sec + 2,367,622,596 cycles # 3.019 GHz + 3,659,745,020 instructions # 1.55 insn per cycle + 0.843534071 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,24 +59,24 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112026909366E-002 -Relative difference = 7.173898182689807e-06 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.907458e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.789549e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.789549e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.920065e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.817963e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.817963e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.524937 sec - 7,695,908,300 cycles # 3.042 GHz - 17,419,908,512 instructions # 2.26 insn per cycle - 2.530886883 seconds time elapsed +TOTAL : 2.512043 sec + 7,713,453,992 cycles # 3.066 GHz + 17,403,928,818 instructions # 2.26 insn per cycle + 2.516888756 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.4858695011109669e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.654228e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.447937e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.447937e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.681101e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.456058e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.456058e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 1.715670 sec - 5,250,624,000 cycles # 3.055 GHz - 10,778,731,487 instructions # 2.05 insn per cycle - 1.726909018 seconds time elapsed +TOTAL : 1.700110 sec + 5,232,564,400 cycles # 3.070 GHz + 10,761,247,884 instructions # 2.06 insn per cycle + 1.704947526 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 941) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.924793743706775e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.003576e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.423971e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.423971e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.136284e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.424889e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.424889e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.401866 sec - 4,289,992,122 cycles # 3.051 GHz - 8,353,467,362 instructions # 1.95 insn per cycle - 1.413100405 seconds time elapsed +TOTAL : 1.396345 sec + 4,222,666,465 cycles # 3.047 GHz + 8,344,159,796 instructions # 1.98 insn per cycle + 1.401275041 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 855) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 2.5235104658031306e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.193956e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.892438e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.892438e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.252461e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.832504e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.832504e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.370603 sec - 4,199,967,342 cycles # 3.054 GHz - 8,332,687,737 instructions # 1.98 insn per cycle - 1.376565532 seconds time elapsed +TOTAL : 1.356528 sec + 4,156,578,728 cycles # 3.055 GHz + 8,308,294,757 instructions # 2.00 insn per cycle + 1.361324117 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 779) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 2.5235104658031306e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.581405e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.201035e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.201035e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.632818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.213085e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.213085e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.480284 sec - 4,226,325,107 cycles # 2.846 GHz - 8,217,989,555 instructions # 1.94 insn per cycle - 1.495814004 seconds time elapsed +TOTAL : 1.468207 sec + 4,189,515,183 cycles # 2.846 GHz + 8,197,193,406 instructions # 1.96 insn per cycle + 1.473030833 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 280) (512y: 0) (512z: 301) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 9afa2da28d..0d46a7bcf5 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_08:17:35 +DATE: 2023-10-25_18:53:26 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.385884e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.371282e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.861308e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.594388e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.488622e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.908806e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.576503 sec - 2,326,678,847 cycles # 2.894 GHz - 3,347,071,515 instructions # 1.44 insn per cycle - 0.861093880 seconds time elapsed +TOTAL : 0.555353 sec + 2,369,008,611 cycles # 3.015 GHz + 3,693,428,004 instructions # 1.56 insn per cycle + 0.844152711 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 80 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,24 +59,24 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112026909366E-002 -Relative difference = 7.173898182689807e-06 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.826942e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.871964e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.871964e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.824530e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.823115e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.823115e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.005736 sec - 6,187,454,993 cycles # 3.079 GHz - 14,177,459,076 instructions # 2.29 insn per cycle - 2.011637210 seconds time elapsed +TOTAL : 2.002512 sec + 6,203,899,879 cycles # 3.095 GHz + 14,161,126,790 instructions # 2.28 insn per cycle + 2.007246903 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 1.3015322037054697e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.290599e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.243867e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.243867e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.299369e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.234812e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.234812e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 1.550551 sec - 4,789,377,245 cycles # 3.083 GHz - 9,583,428,039 instructions # 2.00 insn per cycle - 1.562945446 seconds time elapsed +TOTAL : 1.542994 sec + 4,763,831,720 cycles # 3.079 GHz + 9,566,058,895 instructions # 2.01 insn per cycle + 1.547857940 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 663) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.222388e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.095648e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.095648e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.326543e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.012502e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.012502e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.363861 sec - 4,178,762,719 cycles # 3.053 GHz - 8,137,629,720 instructions # 1.95 insn per cycle - 1.370140957 seconds time elapsed +TOTAL : 1.344736 sec + 4,139,920,255 cycles # 3.069 GHz + 8,120,823,500 instructions # 1.96 insn per cycle + 1.349591931 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 623) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.352788e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.538240e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.538240e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.344788e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.506915e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.506915e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.344937 sec - 4,128,391,295 cycles # 3.058 GHz - 8,145,545,731 instructions # 1.97 insn per cycle - 1.360573478 seconds time elapsed +TOTAL : 1.344145 sec + 4,090,602,024 cycles # 3.034 GHz + 8,121,231,635 instructions # 1.99 insn per cycle + 1.349056597 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 590) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.792847e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.582026e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.582026e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.650060e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.477330e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.477330e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.440562 sec - 4,167,353,431 cycles # 2.883 GHz - 8,053,376,857 instructions # 1.93 insn per cycle - 1.446842959 seconds time elapsed +TOTAL : 1.474656 sec + 4,125,251,957 cycles # 2.790 GHz + 8,033,155,651 instructions # 1.95 insn per cycle + 1.479796548 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 238) (512y: 0) (512z: 234) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 0a3064edc4..42cb535764 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_07:56:29 +DATE: 2023-10-25_18:33:34 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.669670e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.534549e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.653764e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.989272e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.677595e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.794677e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.663511 sec - 2,639,528,093 cycles # 2.904 GHz - 3,824,856,160 instructions # 1.45 insn per cycle - 0.966243935 seconds time elapsed +TOTAL : 0.639629 sec + 2,624,746,782 cycles # 3.027 GHz + 4,095,202,506 instructions # 1.56 insn per cycle + 0.927049463 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.183307e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.458687e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.458687e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.199685e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.421947e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.421947e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.714648 sec - 17,642,893,641 cycles # 3.087 GHz - 41,243,812,397 instructions # 2.34 insn per cycle - 5.720997255 seconds time elapsed +TOTAL : 5.639670 sec + 17,406,394,399 cycles # 3.084 GHz + 40,598,366,537 instructions # 2.33 insn per cycle + 5.644674626 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 377) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.077069e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.197446e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.197446e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.181362e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.136383e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.136383e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.426962 sec - 10,562,613,964 cycles # 3.078 GHz - 25,488,240,263 instructions # 2.41 insn per cycle - 3.432996829 seconds time elapsed +TOTAL : 3.277504 sec + 10,153,584,525 cycles # 3.094 GHz + 24,841,830,142 instructions # 2.45 insn per cycle + 3.282502956 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1318) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.041178e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.115231e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.115231e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.329721e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.123732e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.123732e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.464007 sec - 7,360,734,505 cycles # 2.983 GHz - 14,281,547,404 instructions # 1.94 insn per cycle - 2.476037508 seconds time elapsed +TOTAL : 2.277389 sec + 6,843,760,468 cycles # 3.000 GHz + 13,635,441,327 instructions # 1.99 insn per cycle + 2.282659453 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1211) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.160027e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.603041e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.603041e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.425597e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.470588e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.470588e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.381011 sec - 7,144,233,987 cycles # 2.993 GHz - 13,977,358,492 instructions # 1.96 insn per cycle - 2.393452159 seconds time elapsed +TOTAL : 2.232215 sec + 6,546,202,423 cycles # 2.927 GHz + 13,316,237,781 instructions # 2.03 insn per cycle + 2.237429944 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1141) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.929943e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.721420e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.721420e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.286536e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.835267e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.835267e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.545022 sec - 6,534,578,577 cycles # 2.563 GHz - 10,866,400,512 instructions # 1.66 insn per cycle - 2.556667131 seconds time elapsed +TOTAL : 2.304970 sec + 5,866,744,695 cycles # 2.541 GHz + 10,212,406,703 instructions # 1.74 insn per cycle + 2.310030357 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 406) (512y: 0) (512z: 707) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index e2daa7fa37..84b58b8eae 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-08-15_07:56:59 +DATE: 2023-10-25_18:34:02 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.792190e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.296331e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.073214e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.129836e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.454495e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.086141e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.658774 sec - 2,632,532,045 cycles # 2.934 GHz - 3,722,497,876 instructions # 1.41 insn per cycle - 0.954702315 seconds time elapsed +TOTAL : 0.640256 sec + 2,612,587,104 cycles # 3.007 GHz + 4,030,428,105 instructions # 1.54 insn per cycle + 0.928744648 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.186828e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.463171e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.463171e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.208951e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.433392e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.433392e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.706749 sec - 17,621,138,449 cycles # 3.088 GHz - 41,192,591,924 instructions # 2.34 insn per cycle - 5.713150892 seconds time elapsed +TOTAL : 5.597792 sec + 17,342,645,679 cycles # 3.096 GHz + 40,546,867,973 instructions # 2.34 insn per cycle + 5.602682073 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.066061e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.186765e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.186765e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.133508e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.059933e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.059933e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.443089 sec - 10,636,292,355 cycles # 3.085 GHz - 25,449,644,894 instructions # 2.39 insn per cycle - 3.458603313 seconds time elapsed +TOTAL : 3.349360 sec + 10,185,466,163 cycles # 3.037 GHz + 24,803,480,189 instructions # 2.44 insn per cycle + 3.354498074 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1305) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.025537e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.156439e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.156439e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.351442e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.169056e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.169056e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.471270 sec - 7,412,247,060 cycles # 2.994 GHz - 14,255,493,791 instructions # 1.92 insn per cycle - 2.482605852 seconds time elapsed +TOTAL : 2.263569 sec + 6,798,954,008 cycles # 2.998 GHz + 13,608,714,241 instructions # 2.00 insn per cycle + 2.268496012 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1191) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.180644e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.589038e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.589038e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.513470e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.629747e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.629747e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.367834 sec - 7,129,277,655 cycles # 3.007 GHz - 13,963,683,721 instructions # 1.96 insn per cycle - 2.383982852 seconds time elapsed +TOTAL : 2.175520 sec + 6,534,951,743 cycles # 2.998 GHz + 13,313,454,459 instructions # 2.04 insn per cycle + 2.180544895 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1121) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.998078e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.969254e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.969254e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.364985e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.081130e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.081130e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.494071 sec - 6,425,672,029 cycles # 2.572 GHz - 10,745,918,092 instructions # 1.67 insn per cycle - 2.500013882 seconds time elapsed +TOTAL : 2.260031 sec + 5,775,308,811 cycles # 2.550 GHz + 10,091,603,442 instructions # 1.75 insn per cycle + 2.265220222 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 386) (512y: 0) (512z: 688) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 64d227ea2d..fc7d3d5581 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_07:57:28 +DATE: 2023-10-25_18:34:30 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.008846e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166760e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.264507e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.189330e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175818e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270057e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.526553 sec - 2,210,322,248 cycles # 2.904 GHz - 2,874,286,221 instructions # 1.30 insn per cycle - 0.819598652 seconds time elapsed +TOTAL : 0.512252 sec + 2,155,173,458 cycles # 2.909 GHz + 3,041,305,881 instructions # 1.41 insn per cycle + 0.798678690 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.959920e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.022893e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.022893e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.926195e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.975396e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.975396e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.455757 sec - 16,830,029,467 cycles # 3.082 GHz - 45,503,274,966 instructions # 2.70 insn per cycle - 5.462080500 seconds time elapsed +TOTAL : 5.545999 sec + 17,154,877,090 cycles # 3.091 GHz + 45,384,595,667 instructions # 2.65 insn per cycle + 5.551026412 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.384153e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.591052e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.591052e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.351243e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.515324e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.515324e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.203803 sec - 9,917,768,655 cycles # 3.090 GHz - 27,884,562,243 instructions # 2.81 insn per cycle - 3.210298938 seconds time elapsed +TOTAL : 3.231698 sec + 10,007,778,960 cycles # 3.093 GHz + 27,771,257,423 instructions # 2.77 insn per cycle + 3.236933218 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.261175e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.764281e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.764281e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.340358e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.757627e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.757627e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.098374 sec - 6,126,947,640 cycles # 2.912 GHz - 12,622,456,584 instructions # 2.06 insn per cycle - 2.110538594 seconds time elapsed +TOTAL : 2.065747 sec + 6,039,122,425 cycles # 2.917 GHz + 12,507,446,858 instructions # 2.07 insn per cycle + 2.070933576 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.687482e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.294742e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.294742e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.874280e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.375597e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.375597e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.950153 sec - 5,678,878,812 cycles # 2.907 GHz - 12,001,198,241 instructions # 2.11 insn per cycle - 1.962021066 seconds time elapsed +TOTAL : 1.886900 sec + 5,532,181,520 cycles # 2.925 GHz + 11,883,413,800 instructions # 2.15 insn per cycle + 1.892172826 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -168,20 +168,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.771194e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.025396e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.025396e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.697439e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.891301e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.891301e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.887803 sec - 5,812,534,828 cycles # 2.009 GHz - 8,395,770,130 instructions # 1.44 insn per cycle - 2.900336232 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) +TOTAL : 2.940719 sec + 5,705,029,690 cycles # 1.938 GHz + 8,291,496,940 instructions # 1.45 insn per cycle + 2.945995320 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 3015f84fb6..bde7cbdb09 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -36,26 +36,26 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:28:16 +DATE: 2023-10-25_19:03:18 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.000928e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.975262e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.975262e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.773797e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.294173e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.294173e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.861220 sec - 3,260,802,553 cycles # 2.935 GHz - 4,618,775,963 instructions # 1.42 insn per cycle - 1.169456478 seconds time elapsed +TOTAL : 0.785416 sec + 3,063,164,499 cycles # 3.000 GHz + 4,792,639,654 instructions # 1.56 insn per cycle + 1.079850324 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -73,19 +73,19 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.944448e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.006332e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.006332e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.892855e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.940587e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.940587e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.578239 sec - 17,180,177,907 cycles # 3.077 GHz - 45,580,640,750 instructions # 2.65 insn per cycle - 5.586327034 seconds time elapsed +TOTAL : 5.719758 sec + 17,497,619,767 cycles # 3.056 GHz + 45,446,099,914 instructions # 2.60 insn per cycle + 5.726041885 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -100,19 +100,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.334544e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.533842e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.533842e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.325585e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.487595e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.487595e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.328630 sec - 10,265,865,180 cycles # 3.079 GHz - 28,067,447,778 instructions # 2.73 insn per cycle - 3.335847383 seconds time elapsed +TOTAL : 3.333037 sec + 10,354,211,298 cycles # 3.101 GHz + 27,955,092,209 instructions # 2.70 insn per cycle + 3.339341303 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -127,19 +127,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.196651e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.682146e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.682146e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.229345e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.633133e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.633133e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.201402 sec - 6,462,352,884 cycles # 2.927 GHz - 12,911,212,720 instructions # 2.00 insn per cycle - 2.218781232 seconds time elapsed +TOTAL : 2.186507 sec + 6,395,865,888 cycles # 2.918 GHz + 12,794,721,791 instructions # 2.00 insn per cycle + 2.192626317 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -154,19 +154,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.619696e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.207150e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.207150e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.718956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.198053e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.198053e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.049198 sec - 5,990,685,994 cycles # 2.915 GHz - 12,289,610,651 instructions # 2.05 insn per cycle - 2.062389699 seconds time elapsed +TOTAL : 2.015312 sec + 5,880,292,764 cycles # 2.910 GHz + 12,172,549,562 instructions # 2.07 insn per cycle + 2.021489543 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.732022e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.981030e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.981030e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.795700e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.000887e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.000887e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.995536 sec - 6,173,358,415 cycles # 2.057 GHz - 8,641,899,643 instructions # 1.40 insn per cycle - 3.012705154 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) +TOTAL : 2.946657 sec + 6,072,408,903 cycles # 2.057 GHz + 8,534,252,358 instructions # 1.41 insn per cycle + 2.952768361 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 76ed0eee8f..531c093860 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:41:20 +DATE: 2023-10-25_19:15:40 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.644029e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.145205e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.252895e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.085961e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.172637e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270805e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.635318 sec - 2,561,422,666 cycles # 2.945 GHz - 3,393,296,507 instructions # 1.32 insn per cycle - 0.929392556 seconds time elapsed +TOTAL : 0.603556 sec + 2,510,793,087 cycles # 3.030 GHz + 3,673,074,819 instructions # 1.46 insn per cycle + 0.885869264 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.979808e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.043822e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.043822e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.938086e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.988073e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.988073e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.460485 sec - 17,004,589,822 cycles # 3.113 GHz - 45,522,952,314 instructions # 2.68 insn per cycle - 5.466956367 seconds time elapsed +TOTAL : 5.572089 sec + 17,339,920,977 cycles # 3.110 GHz + 45,401,031,280 instructions # 2.62 insn per cycle + 5.577099493 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.390158e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.596269e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.596269e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.366267e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.533818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.533818e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.256443 sec - 10,096,422,053 cycles # 3.096 GHz - 27,887,588,220 instructions # 2.76 insn per cycle - 3.262761314 seconds time elapsed +TOTAL : 3.275090 sec + 10,190,622,487 cycles # 3.108 GHz + 27,770,717,333 instructions # 2.73 insn per cycle + 3.280028443 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.186500e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.673124e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.673124e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.358680e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.785880e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.785880e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.186284 sec - 6,324,981,711 cycles # 2.887 GHz - 12,609,506,928 instructions # 1.99 insn per cycle - 2.202007823 seconds time elapsed +TOTAL : 2.118513 sec + 6,227,899,517 cycles # 2.934 GHz + 12,490,736,505 instructions # 2.01 insn per cycle + 2.123796775 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.757703e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.364269e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.364269e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.791831e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.300158e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.300158e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.989394 sec - 5,802,568,748 cycles # 2.909 GHz - 11,953,910,414 instructions # 2.06 insn per cycle - 2.013447216 seconds time elapsed +TOTAL : 1.975789 sec + 5,740,046,861 cycles # 2.900 GHz + 11,834,515,828 instructions # 2.06 insn per cycle + 1.980847261 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -168,20 +168,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.789206e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.042238e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.042238e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.818538e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.030811e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.030811e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.931825 sec - 6,005,179,047 cycles # 2.045 GHz - 8,349,149,388 instructions # 1.39 insn per cycle - 2.943653883 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) +TOTAL : 2.912399 sec + 5,874,294,388 cycles # 2.014 GHz + 8,239,488,482 instructions # 1.40 insn per cycle + 2.917538602 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index 490b24f7f8..ee80d49776 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:38:04 +DATE: 2023-10-25_19:12:36 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.668263e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155660e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.265888e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.085112e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169879e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269257e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.576348 sec - 2,379,994,863 cycles # 2.928 GHz - 3,380,609,648 instructions # 1.42 insn per cycle - 0.869854185 seconds time elapsed +TOTAL : 0.542204 sec + 2,298,212,603 cycles # 3.002 GHz + 3,616,714,256 instructions # 1.57 insn per cycle + 0.823159797 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.967394e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.031238e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.031238e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.902640e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.950734e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.950734e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.438543 sec - 16,825,741,082 cycles # 3.092 GHz - 45,503,772,977 instructions # 2.70 insn per cycle - 5.444908289 seconds time elapsed +TOTAL : 5.613770 sec + 17,167,591,716 cycles # 3.056 GHz + 45,385,422,779 instructions # 2.64 insn per cycle + 5.618850316 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.394004e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.599065e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.599065e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.356859e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.522784e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.522784e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.196288 sec - 9,907,973,976 cycles # 3.097 GHz - 27,884,963,666 instructions # 2.81 insn per cycle - 3.217592118 seconds time elapsed +TOTAL : 3.227055 sec + 10,007,434,150 cycles # 3.097 GHz + 27,771,321,943 instructions # 2.78 insn per cycle + 3.232084319 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.302667e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.812822e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.812822e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.339247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.757472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.757472e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.081335 sec - 6,127,956,387 cycles # 2.939 GHz - 12,618,745,029 instructions # 2.06 insn per cycle - 2.096904001 seconds time elapsed +TOTAL : 2.067924 sec + 6,064,030,334 cycles # 2.927 GHz + 12,508,006,764 instructions # 2.06 insn per cycle + 2.072907317 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.775458e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.382901e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.382901e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.874688e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.380385e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.380385e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.920774 sec - 5,660,233,376 cycles # 2.940 GHz - 12,001,022,164 instructions # 2.12 insn per cycle - 1.933401381 seconds time elapsed +TOTAL : 1.887776 sec + 5,540,691,746 cycles # 2.929 GHz + 11,883,645,896 instructions # 2.14 insn per cycle + 1.892908998 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -168,20 +168,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.842171e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.103040e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.103040e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.817386e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.022798e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.022798e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.834457 sec - 5,802,842,720 cycles # 2.045 GHz - 8,396,122,638 instructions # 1.45 insn per cycle - 2.846418059 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) +TOTAL : 2.850472 sec + 5,706,017,461 cycles # 1.999 GHz + 8,290,142,366 instructions # 1.45 insn per cycle + 2.855563486 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 0d615af3df..1b9c9ee7df 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -36,23 +36,23 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:34:53 +DATE: 2023-10-25_19:09:35 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.019428e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.157075e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.265817e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.145651e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.173920e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273175e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.759005 sec - 2,920,260,499 cycles # 2.930 GHz - 4,235,878,653 instructions # 1.45 insn per cycle - 1.056436552 seconds time elapsed +TOTAL : 0.691895 sec + 2,781,064,477 cycles # 3.018 GHz + 4,402,279,507 instructions # 1.58 insn per cycle + 0.980225226 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -66,19 +66,19 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.955683e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.020031e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.020031e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.929230e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.978722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.978722e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.468306 sec - 16,841,401,584 cycles # 3.078 GHz - 45,503,912,146 instructions # 2.70 insn per cycle - 5.474662171 seconds time elapsed +TOTAL : 5.538774 sec + 17,159,821,844 cycles # 3.096 GHz + 45,385,185,100 instructions # 2.64 insn per cycle + 5.543842374 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -92,19 +92,19 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.390321e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.596833e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.596833e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.287540e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.449504e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.449504e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.199564 sec - 9,905,525,381 cycles # 3.092 GHz - 27,884,768,172 instructions # 2.82 insn per cycle - 3.215066669 seconds time elapsed +TOTAL : 3.292731 sec + 10,016,590,673 cycles # 3.038 GHz + 27,771,485,458 instructions # 2.77 insn per cycle + 3.297740861 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -118,19 +118,19 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.267750e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.776839e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.776839e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.325678e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.741826e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.741826e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.094602 sec - 6,143,328,303 cycles # 2.927 GHz - 12,622,735,883 instructions # 2.05 insn per cycle - 2.110762418 seconds time elapsed +TOTAL : 2.072513 sec + 6,058,236,283 cycles # 2.917 GHz + 12,507,380,733 instructions # 2.06 insn per cycle + 2.077678265 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -144,19 +144,19 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.728979e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.335326e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.335326e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.871793e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.376853e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.376853e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.934965 sec - 5,651,469,044 cycles # 2.914 GHz - 12,001,153,045 instructions # 2.12 insn per cycle - 1.946292203 seconds time elapsed +TOTAL : 1.888593 sec + 5,531,298,122 cycles # 2.922 GHz + 11,883,369,769 instructions # 2.15 insn per cycle + 1.893708699 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -170,20 +170,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.774699e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.028447e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.028447e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.825270e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.031884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.031884e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.882840 sec - 5,816,807,873 cycles # 2.015 GHz - 8,396,518,147 instructions # 1.44 insn per cycle - 2.894278819 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) +TOTAL : 2.844855 sec + 5,686,333,748 cycles # 1.997 GHz + 8,290,317,138 instructions # 1.46 insn per cycle + 2.849943056 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 22a0517515..11300e6895 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_07:57:57 +DATE: 2023-10-25_18:34:57 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.008544e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166017e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.262922e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.181607e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.171006e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.264343e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.525464 sec - 2,217,740,109 cycles # 2.909 GHz - 2,859,445,292 instructions # 1.29 insn per cycle - 0.819678835 seconds time elapsed +TOTAL : 0.507998 sec + 2,247,373,928 cycles # 3.001 GHz + 3,245,400,581 instructions # 1.44 insn per cycle + 0.806174140 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.012615e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.079882e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.079882e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.976266e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.029293e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.029293e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.316949 sec - 16,451,778,287 cycles # 3.092 GHz - 44,494,546,010 instructions # 2.70 insn per cycle - 5.323396072 seconds time elapsed +TOTAL : 5.409225 sec + 16,681,628,869 cycles # 3.082 GHz + 44,378,235,380 instructions # 2.66 insn per cycle + 5.414507921 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 576) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.525477e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.747634e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.747634e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.514456e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.696441e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.696441e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.089064 sec - 9,483,642,291 cycles # 3.070 GHz - 26,735,070,417 instructions # 2.82 insn per cycle - 3.100712316 seconds time elapsed +TOTAL : 3.086332 sec + 9,510,708,832 cycles # 3.078 GHz + 26,620,808,250 instructions # 2.80 insn per cycle + 3.091424217 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2339) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.814360e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.235072e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.235072e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.735122e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.074181e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.074181e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.284241 sec - 6,672,382,951 cycles # 2.914 GHz - 14,170,881,099 instructions # 2.12 insn per cycle - 2.296317691 seconds time elapsed +TOTAL : 2.319139 sec + 6,584,764,475 cycles # 2.834 GHz + 14,057,249,658 instructions # 2.13 insn per cycle + 2.324235278 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2753) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.075753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.538831e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.538831e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.117258e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.494373e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.494373e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.170660 sec - 6,388,748,184 cycles # 2.937 GHz - 13,691,137,225 instructions # 2.14 insn per cycle - 2.183487516 seconds time elapsed +TOTAL : 2.150419 sec + 6,273,792,377 cycles # 2.911 GHz + 13,574,431,184 instructions # 2.16 insn per cycle + 2.155695099 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2405) (512y: 296) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.631677e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.866426e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.866426e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.685746e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.878776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.878776e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.993412 sec - 6,037,646,643 cycles # 2.013 GHz - 10,189,515,870 instructions # 1.69 insn per cycle - 3.005471354 seconds time elapsed +TOTAL : 2.947596 sec + 5,924,739,734 cycles # 2.007 GHz + 10,074,038,054 instructions # 1.70 insn per cycle + 2.952990655 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1324) (512y: 208) (512z: 1980) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 3f97b3c235..72e4f7ff9f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:17:56 +DATE: 2023-10-25_18:53:45 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.684024e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156752e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.265433e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.139570e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.179113e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275835e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.527445 sec - 2,196,196,584 cycles # 2.883 GHz - 2,837,159,945 instructions # 1.29 insn per cycle - 0.820745401 seconds time elapsed +TOTAL : 0.507615 sec + 2,255,394,284 cycles # 3.018 GHz + 3,233,142,981 instructions # 1.43 insn per cycle + 0.804645133 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,20 +64,20 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.556304e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.666245e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.666245e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.504791e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.591296e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.591296e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.208653 sec - 12,868,804,128 cycles # 3.055 GHz - 34,451,363,346 instructions # 2.68 insn per cycle - 4.214976170 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 680) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.290630 sec + 13,015,421,868 cycles # 3.031 GHz + 34,406,707,609 instructions # 2.64 insn per cycle + 4.295722046 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -90,19 +90,19 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.168050e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.345967e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.345967e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.162383e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.308946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.308946e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.416327 sec - 10,630,599,247 cycles # 3.109 GHz - 24,137,030,173 instructions # 2.27 insn per cycle - 3.432016204 seconds time elapsed +TOTAL : 3.418889 sec + 10,608,875,216 cycles # 3.099 GHz + 24,023,081,327 instructions # 2.26 insn per cycle + 3.424022271 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.809442e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.228508e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.228508e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.707866e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.031974e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.031974e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.285822 sec - 6,681,741,196 cycles # 2.917 GHz - 12,529,897,185 instructions # 1.88 insn per cycle - 2.301347809 seconds time elapsed +TOTAL : 2.329926 sec + 6,692,684,984 cycles # 2.867 GHz + 12,415,083,748 instructions # 1.86 insn per cycle + 2.335033574 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3156) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.063333e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.530304e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.530304e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.126119e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.508462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.508462e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.175701 sec - 6,360,455,844 cycles # 2.917 GHz - 11,704,234,219 instructions # 1.84 insn per cycle - 2.187563538 seconds time elapsed +TOTAL : 2.148404 sec + 6,244,000,039 cycles # 2.900 GHz + 11,586,646,765 instructions # 1.86 insn per cycle + 2.153653502 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2692) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.982609e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.263683e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.263683e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.121660e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.365055e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.365055e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.738643 sec - 5,460,660,660 cycles # 1.991 GHz - 9,426,098,964 instructions # 1.73 insn per cycle - 2.754227372 seconds time elapsed +TOTAL : 2.647363 sec + 5,336,886,997 cycles # 2.013 GHz + 9,309,895,095 instructions # 1.74 insn per cycle + 2.652547834 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2116) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index b296b295a7..b8d2933568 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:18:24 +DATE: 2023-10-25_18:54:12 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.675639e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156622e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.263843e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.136826e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.174839e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270632e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.526110 sec - 2,203,683,409 cycles # 2.889 GHz - 2,882,164,152 instructions # 1.31 insn per cycle - 0.820053431 seconds time elapsed +TOTAL : 0.507009 sec + 2,214,441,092 cycles # 3.000 GHz + 3,174,554,342 instructions # 1.43 insn per cycle + 0.795728101 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,20 +64,20 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.681420e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.800838e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.800838e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.647788e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.745237e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.745237e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.015999 sec - 12,417,250,293 cycles # 3.088 GHz - 35,154,287,962 instructions # 2.83 insn per cycle - 4.022456422 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 456) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.064620 sec + 12,384,265,473 cycles # 3.044 GHz + 35,059,405,316 instructions # 2.83 insn per cycle + 4.069669316 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -90,19 +90,19 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.100804e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.279717e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.279717e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.138033e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.282936e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.282936e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.491921 sec - 10,723,909,681 cycles # 3.069 GHz - 23,214,463,378 instructions # 2.16 insn per cycle - 3.504911524 seconds time elapsed +TOTAL : 3.444702 sec + 10,688,625,544 cycles # 3.099 GHz + 23,099,820,217 instructions # 2.16 insn per cycle + 3.449755846 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.133216e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.615370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.615370e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.237103e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.643499e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.643499e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.147355 sec - 6,243,347,073 cycles # 2.902 GHz - 12,085,563,659 instructions # 1.94 insn per cycle - 2.162840079 seconds time elapsed +TOTAL : 2.105153 sec + 6,160,181,225 cycles # 2.920 GHz + 11,969,984,936 instructions # 1.94 insn per cycle + 2.110284671 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2511) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.286872e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.794622e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.794622e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.378177e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.801357e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.801357e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.089602 sec - 6,108,057,445 cycles # 2.919 GHz - 11,259,587,097 instructions # 1.84 insn per cycle - 2.105276019 seconds time elapsed +TOTAL : 2.052827 sec + 6,017,899,320 cycles # 2.925 GHz + 11,142,057,093 instructions # 1.85 insn per cycle + 2.058039153 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 174) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe @@ -168,20 +168,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.171723e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.483529e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.483529e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.233582e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.488303e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.488303e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.619031 sec - 5,330,469,490 cycles # 2.031 GHz - 9,148,199,525 instructions # 1.72 insn per cycle - 2.634939963 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1566) +TOTAL : 2.581242 sec + 5,224,244,555 cycles # 2.021 GHz + 9,033,433,625 instructions # 1.73 insn per cycle + 2.586440370 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 34fa02a67a..24b477c6c2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_07:58:26 +DATE: 2023-10-25_18:35:25 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.029364e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.652279e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.917363e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.085170e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.712610e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.977210e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.480244 sec - 2,046,925,223 cycles # 2.893 GHz - 2,612,431,160 instructions # 1.28 insn per cycle - 0.764545118 seconds time elapsed +TOTAL : 0.469467 sec + 2,068,301,293 cycles # 3.004 GHz + 3,012,364,622 instructions # 1.46 insn per cycle + 0.747476379 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,24 +59,24 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499668240547 -Relative difference = 1.920672948568199e-05 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.043990e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.101952e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.101952e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.956122e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.010570e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.010570e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.215979 sec - 16,072,416,204 cycles # 3.078 GHz - 45,313,248,503 instructions # 2.82 insn per cycle - 5.222636507 seconds time elapsed +TOTAL : 5.445413 sec + 16,510,915,611 cycles # 3.030 GHz + 45,308,404,518 instructions # 2.74 insn per cycle + 5.450456954 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.863735e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.234843e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.234843e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.773825e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.129881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.129881e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.242573 sec - 6,925,966,298 cycles # 3.081 GHz - 17,681,196,805 instructions # 2.55 insn per cycle - 2.256043920 seconds time elapsed +TOTAL : 2.278501 sec + 7,055,633,229 cycles # 3.091 GHz + 17,671,724,757 instructions # 2.50 insn per cycle + 2.283347357 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.888993e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.014224e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.014224e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.823101e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.001328e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001328e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.264930 sec - 3,728,910,193 cycles # 2.939 GHz - 8,261,400,317 instructions # 2.22 insn per cycle - 1.276544799 seconds time elapsed +TOTAL : 1.269969 sec + 3,728,138,097 cycles # 2.926 GHz + 8,250,735,018 instructions # 2.21 insn per cycle + 1.274926428 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.349259e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.071142e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.071142e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.356043e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.069996e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.069996e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.206356 sec - 3,546,231,866 cycles # 2.927 GHz - 7,872,842,225 instructions # 2.22 insn per cycle - 1.212871301 seconds time elapsed +TOTAL : 1.201889 sec + 3,525,312,934 cycles # 2.923 GHz + 7,861,079,341 instructions # 2.23 insn per cycle + 1.206782783 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -168,20 +168,20 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.069995e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.813167e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.813167e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.081368e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.847086e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.847086e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.568624 sec - 3,264,339,653 cycles # 2.075 GHz - 6,096,363,429 instructions # 1.87 insn per cycle - 1.580964642 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) +TOTAL : 1.564981 sec + 3,252,144,204 cycles # 2.073 GHz + 6,095,772,749 instructions # 1.87 insn per cycle + 1.569858235 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 699719261c..c228b2c37b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -36,26 +36,26 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:28:46 +DATE: 2023-10-25_19:03:47 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.113852e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.906495e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.906495e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086808e+00 +- 3.414090e-03 ) GeV^0 -TOTAL : 0.687407 sec - 2,703,287,943 cycles # 2.930 GHz - 3,880,423,043 instructions # 1.44 insn per cycle - 0.979473385 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.513457e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.340252e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.340252e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.650789 sec + 2,641,546,515 cycles # 3.016 GHz + 4,117,903,371 instructions # 1.56 insn per cycle + 0.935043223 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -67,25 +67,25 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499668240547 -Relative difference = 1.920672948568199e-05 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.040547e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.098289e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.098289e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.972248e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.026239e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.026239e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.266229 sec - 16,257,367,161 cycles # 3.084 GHz - 45,360,585,545 instructions # 2.79 insn per cycle - 5.273291355 seconds time elapsed +TOTAL : 5.440996 sec + 16,702,681,594 cycles # 3.067 GHz + 45,351,045,297 instructions # 2.72 insn per cycle + 5.446683603 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -100,19 +100,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.841948e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.207039e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.207039e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.605636e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.935791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.935791e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.298048 sec - 7,125,726,743 cycles # 3.097 GHz - 17,962,120,405 instructions # 2.52 insn per cycle - 2.311237641 seconds time elapsed +TOTAL : 2.405458 sec + 7,261,686,615 cycles # 3.014 GHz + 17,953,553,750 instructions # 2.47 insn per cycle + 2.411441099 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -127,19 +127,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.492655e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.673572e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.673572e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.560330e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.721128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.721128e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.367552 sec - 3,923,165,806 cycles # 2.856 GHz - 8,499,549,873 instructions # 2.17 insn per cycle - 1.385449516 seconds time elapsed +TOTAL : 1.356872 sec + 3,928,188,681 cycles # 2.884 GHz + 8,488,830,304 instructions # 2.16 insn per cycle + 1.362856063 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -154,19 +154,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.178718e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.052721e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.052721e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.116761e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.040482e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.040482e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.270952 sec - 3,751,683,787 cycles # 2.940 GHz - 8,110,729,315 instructions # 2.16 insn per cycle - 1.283890460 seconds time elapsed +TOTAL : 1.278534 sec + 3,740,578,395 cycles # 2.919 GHz + 8,100,523,605 instructions # 2.17 insn per cycle + 1.284258782 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.936240e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.658700e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.658700e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.953315e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.671270e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.671270e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.643742 sec - 3,464,570,040 cycles # 2.100 GHz - 6,351,528,205 instructions # 1.83 insn per cycle - 1.660374822 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) +TOTAL : 1.636801 sec + 3,469,634,780 cycles # 2.114 GHz + 6,351,136,410 instructions # 1.83 insn per cycle + 1.642694122 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 8ab67a0fea..5dc74dfed7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:41:49 +DATE: 2023-10-25_19:16:08 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.538632e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.606886e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.900636e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.063632e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.693440e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.968571e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.580696 sec - 2,338,833,755 cycles # 2.895 GHz - 3,122,120,979 instructions # 1.33 insn per cycle - 0.865057805 seconds time elapsed +TOTAL : 0.553622 sec + 2,317,649,288 cycles # 3.011 GHz + 3,439,584,300 instructions # 1.48 insn per cycle + 0.828678237 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,24 +59,24 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499668240547 -Relative difference = 1.920672948568199e-05 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.041632e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.100080e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.100080e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.989342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.044248e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.044248e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 5.280524 sec - 16,237,390,286 cycles # 3.076 GHz - 45,344,009,234 instructions # 2.79 insn per cycle - 5.286363951 seconds time elapsed +TOTAL : 5.407392 sec + 16,682,462,365 cycles # 3.083 GHz + 45,337,082,640 instructions # 2.72 insn per cycle + 5.412277054 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.818343e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.185778e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.185778e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.799956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.158589e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.158589e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.314082 sec - 7,088,214,755 cycles # 3.056 GHz - 17,695,395,796 instructions # 2.50 insn per cycle - 2.326567496 seconds time elapsed +TOTAL : 2.320759 sec + 7,223,788,046 cycles # 3.108 GHz + 17,685,035,831 instructions # 2.45 insn per cycle + 2.325560432 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.843902e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.007868e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.007868e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.822474e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.004978e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.004978e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.324203 sec - 3,892,773,802 cycles # 2.929 GHz - 8,246,686,406 instructions # 2.12 insn per cycle - 1.335693323 seconds time elapsed +TOTAL : 1.324170 sec + 3,914,841,287 cycles # 2.948 GHz + 8,235,477,108 instructions # 2.10 insn per cycle + 1.328966517 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.319473e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.067358e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.067358e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.391242e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.078618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078618e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.265326 sec - 3,710,510,965 cycles # 2.921 GHz - 7,824,604,210 instructions # 2.11 insn per cycle - 1.271514391 seconds time elapsed +TOTAL : 1.253021 sec + 3,707,252,167 cycles # 2.949 GHz + 7,811,825,096 instructions # 2.11 insn per cycle + 1.257855017 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -168,20 +168,20 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.065974e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.817314e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.817314e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.106245e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.850378e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.850378e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.622939 sec - 3,424,491,515 cycles # 2.104 GHz - 6,048,054,064 instructions # 1.77 insn per cycle - 1.628727410 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) +TOTAL : 1.611554 sec + 3,420,500,042 cycles # 2.117 GHz + 6,046,541,541 instructions # 1.77 insn per cycle + 1.616370057 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index b810c04a14..7b90f03855 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:38:33 +DATE: 2023-10-25_19:13:04 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.598687e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.638860e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.937584e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.065322e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.700283e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.974533e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.532463 sec - 2,194,687,457 cycles # 2.866 GHz - 3,113,635,867 instructions # 1.42 insn per cycle - 0.825483096 seconds time elapsed +TOTAL : 0.498915 sec + 2,192,440,414 cycles # 3.004 GHz + 3,416,060,899 instructions # 1.56 insn per cycle + 0.787716102 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,24 +59,24 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499668240547 -Relative difference = 1.920672948568199e-05 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.046386e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.104740e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.104740e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.966466e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.020648e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.020648e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.222255 sec - 16,104,081,465 cycles # 3.082 GHz - 45,313,673,726 instructions # 2.81 insn per cycle - 5.228069360 seconds time elapsed +TOTAL : 5.415648 sec + 16,529,341,853 cycles # 3.050 GHz + 45,309,866,535 instructions # 2.74 insn per cycle + 5.420402206 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.837866e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.208016e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.208016e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.693978e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.035776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.035776e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.253970 sec - 6,919,832,712 cycles # 3.063 GHz - 17,681,383,889 instructions # 2.56 insn per cycle - 2.265893838 seconds time elapsed +TOTAL : 2.316382 sec + 7,054,105,370 cycles # 3.040 GHz + 17,671,721,016 instructions # 2.51 insn per cycle + 2.321167806 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.842268e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.006130e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.006130e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.830241e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.005280e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005280e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.270817 sec - 3,727,622,493 cycles # 2.922 GHz - 8,261,948,722 instructions # 2.22 insn per cycle - 1.283347035 seconds time elapsed +TOTAL : 1.269396 sec + 3,738,194,435 cycles # 2.936 GHz + 8,251,074,147 instructions # 2.21 insn per cycle + 1.274249735 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.342312e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.070330e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.070330e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.392639e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.077705e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.077705e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.206395 sec - 3,540,586,382 cycles # 2.922 GHz - 7,873,268,970 instructions # 2.22 insn per cycle - 1.218817952 seconds time elapsed +TOTAL : 1.198333 sec + 3,534,628,897 cycles # 2.940 GHz + 7,862,127,936 instructions # 2.22 insn per cycle + 1.203142647 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -168,20 +168,20 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.961762e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.678036e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.678036e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.097193e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.836994e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.836994e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.591067 sec - 3,266,654,573 cycles # 2.048 GHz - 6,096,994,037 instructions # 1.87 insn per cycle - 1.597459012 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) +TOTAL : 1.558825 sec + 3,254,164,123 cycles # 2.082 GHz + 6,095,387,295 instructions # 1.87 insn per cycle + 1.563579525 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index d169068d52..fdd315eb16 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -36,23 +36,23 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:35:22 +DATE: 2023-10-25_19:10:03 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.823545e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.634071e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.926324e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086808e+00 +- 3.414090e-03 ) GeV^0 -TOTAL : 0.634580 sec - 2,519,256,028 cycles # 2.927 GHz - 3,597,945,906 instructions # 1.43 insn per cycle - 0.919708259 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 9.170150e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.660447e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.933538e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.602645 sec + 2,479,148,123 cycles # 2.998 GHz + 3,864,785,421 instructions # 1.56 insn per cycle + 0.885563297 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -61,24 +61,24 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499668240547 -Relative difference = 1.920672948568199e-05 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.047792e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.106404e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.106404e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.957679e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.011741e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.011741e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.205858 sec - 16,074,209,490 cycles # 3.085 GHz - 45,313,436,920 instructions # 2.82 insn per cycle - 5.212589062 seconds time elapsed +TOTAL : 5.439322 sec + 16,519,636,229 cycles # 3.035 GHz + 45,307,914,586 instructions # 2.74 insn per cycle + 5.444150127 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -92,19 +92,19 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.875281e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.250038e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.250038e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.773026e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.136685e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.136685e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.236319 sec - 6,919,411,058 cycles # 3.088 GHz - 17,681,439,898 instructions # 2.56 insn per cycle - 2.242251935 seconds time elapsed +TOTAL : 2.280272 sec + 7,068,660,475 cycles # 3.094 GHz + 17,671,452,966 instructions # 2.50 insn per cycle + 2.285202578 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -118,19 +118,19 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.834711e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.005706e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005706e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.805997e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.992625e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.992625e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.271538 sec - 3,732,044,660 cycles # 2.923 GHz - 8,262,000,987 instructions # 2.21 insn per cycle - 1.283281912 seconds time elapsed +TOTAL : 1.272643 sec + 3,731,426,488 cycles # 2.926 GHz + 8,249,195,685 instructions # 2.21 insn per cycle + 1.277241448 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -144,19 +144,19 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.348762e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.071277e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.071277e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.399444e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.076890e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.076890e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.205855 sec - 3,543,052,493 cycles # 2.926 GHz - 7,873,220,879 instructions # 2.22 insn per cycle - 1.217711284 seconds time elapsed +TOTAL : 1.197731 sec + 3,530,731,182 cycles # 2.938 GHz + 7,860,812,005 instructions # 2.23 insn per cycle + 1.202556944 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -170,20 +170,20 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.085582e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.832029e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.832029e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.091593e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.835046e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.835046e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.564744 sec - 3,257,258,720 cycles # 2.075 GHz - 6,096,841,191 instructions # 1.87 insn per cycle - 1.580476949 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) +TOTAL : 1.560736 sec + 3,257,981,590 cycles # 2.083 GHz + 6,095,878,647 instructions # 1.87 insn per cycle + 1.565536774 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 6ff4bf4cd5..663a41142c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_07:58:51 +DATE: 2023-10-25_18:35:49 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.036556e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.702022e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.979115e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.096390e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.766631e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.047368e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.482274 sec - 2,051,946,623 cycles # 2.890 GHz - 2,621,601,452 instructions # 1.28 insn per cycle - 0.767984472 seconds time elapsed +TOTAL : 0.467013 sec + 2,056,734,142 cycles # 2.997 GHz + 2,992,962,147 instructions # 1.46 insn per cycle + 0.744434014 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,24 +59,24 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499668240547 -Relative difference = 1.920672948568199e-05 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.072485e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.132741e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.132741e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.031007e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.087778e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.087778e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.147272 sec - 15,849,140,606 cycles # 3.077 GHz - 44,489,793,003 instructions # 2.81 insn per cycle - 5.153665912 seconds time elapsed +TOTAL : 5.245108 sec + 16,244,805,475 cycles # 3.095 GHz + 44,484,348,190 instructions # 2.74 insn per cycle + 5.249986656 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 576) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.721713e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.243717e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.243717e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.358927e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.815806e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.815806e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.918320 sec - 5,932,795,544 cycles # 3.085 GHz - 16,978,686,078 instructions # 2.86 insn per cycle - 1.930651299 seconds time elapsed +TOTAL : 2.039403 sec + 6,083,169,654 cycles # 2.982 GHz + 16,972,342,736 instructions # 2.79 insn per cycle + 2.044363213 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2881) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.428912e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.059732e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.059732e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.400912e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.009313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.009313e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.715657 sec - 5,020,215,358 cycles # 2.917 GHz - 10,225,262,266 instructions # 2.04 insn per cycle - 1.727545908 seconds time elapsed +TOTAL : 1.720669 sec + 5,008,260,515 cycles # 2.904 GHz + 10,214,809,232 instructions # 2.04 insn per cycle + 1.725527481 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.480209e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.124270e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.124270e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.537295e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.168989e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.168989e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.703473 sec - 4,964,855,233 cycles # 2.906 GHz - 9,948,546,373 instructions # 2.00 insn per cycle - 1.716183447 seconds time elapsed +TOTAL : 1.684974 sec + 4,937,248,513 cycles # 2.923 GHz + 9,938,060,774 instructions # 2.01 insn per cycle + 1.689989340 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3789) (512y: 2) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.068334e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.440911e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.440911e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.077645e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.456866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.456866e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.161474 sec - 4,375,796,303 cycles # 2.024 GHz - 8,454,458,995 instructions # 1.93 insn per cycle - 2.173311929 seconds time elapsed +TOTAL : 2.147132 sec + 4,363,010,014 cycles # 2.028 GHz + 8,442,845,303 instructions # 1.94 insn per cycle + 2.152072523 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2882) (512y: 4) (512z: 2751) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 568b6f41d6..d0aa02b37a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:18:52 +DATE: 2023-10-25_18:54:38 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.616621e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.635958e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.931720e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.072434e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.686668e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.952500e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.485656 sec - 2,051,660,572 cycles # 2.884 GHz - 2,621,055,870 instructions # 1.28 insn per cycle - 0.770512814 seconds time elapsed +TOTAL : 0.467510 sec + 2,069,160,049 cycles # 3.005 GHz + 2,965,842,897 instructions # 1.43 insn per cycle + 0.745986723 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,24 +59,24 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499668240547 -Relative difference = 1.920672948568199e-05 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.632191e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.730309e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.730309e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.486168e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.574733e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.574733e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.071252 sec - 12,383,661,598 cycles # 3.038 GHz - 34,383,527,670 instructions # 2.78 insn per cycle - 4.077255403 seconds time elapsed +TOTAL : 4.300765 sec + 12,613,573,541 cycles # 2.930 GHz + 34,394,223,521 instructions # 2.73 insn per cycle + 4.305708849 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.524261e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.028231e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.028231e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.423404e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.902037e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.902037e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.984266 sec - 6,019,410,390 cycles # 3.025 GHz - 14,885,531,276 instructions # 2.47 insn per cycle - 1.996173107 seconds time elapsed +TOTAL : 2.018730 sec + 6,098,231,874 cycles # 3.015 GHz + 14,875,099,697 instructions # 2.44 insn per cycle + 2.023701584 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 1.8746278463897685e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.506887e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.379568e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.379568e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.288210e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.081005e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.081005e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.483333 sec - 4,312,836,331 cycles # 2.899 GHz - 9,052,928,178 instructions # 2.10 insn per cycle - 1.496086705 seconds time elapsed +TOTAL : 1.521473 sec + 4,271,996,539 cycles # 2.800 GHz + 9,042,309,170 instructions # 2.12 insn per cycle + 1.526427437 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4445) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.730445e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.651954e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.651954e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.762492e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.667202e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.667202e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.441275 sec - 4,220,027,025 cycles # 2.918 GHz - 8,688,292,013 instructions # 2.06 insn per cycle - 1.456976050 seconds time elapsed +TOTAL : 1.433726 sec + 4,213,011,776 cycles # 2.930 GHz + 8,676,320,241 instructions # 2.06 insn per cycle + 1.438715591 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.856381e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.355382e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.355382e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.878645e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.382089e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.382089e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.875924 sec - 3,851,740,176 cycles # 2.048 GHz - 7,831,569,152 instructions # 2.03 insn per cycle - 1.891782027 seconds time elapsed +TOTAL : 1.866240 sec + 3,836,736,420 cycles # 2.052 GHz + 7,820,066,058 instructions # 2.04 insn per cycle + 1.871114134 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 0) (512z: 2556) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index fff20efbc0..de54279b1b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_08:19:16 +DATE: 2023-10-25_18:55:01 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.645282e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.665963e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.984825e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.082452e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.759103e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.038632e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.482528 sec - 2,065,134,611 cycles # 2.914 GHz - 2,629,043,759 instructions # 1.27 insn per cycle - 0.768011802 seconds time elapsed +TOTAL : 0.468671 sec + 2,051,049,137 cycles # 2.975 GHz + 2,933,032,180 instructions # 1.43 insn per cycle + 0.745977930 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -59,25 +59,25 @@ runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499668240547 -Relative difference = 1.920672948568199e-05 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.762246e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.873496e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.873496e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.728308e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.832886e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.832886e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 3.883907 sec - 11,714,309,141 cycles # 3.014 GHz - 35,120,310,634 instructions # 3.00 insn per cycle - 3.889825259 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 471) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.926359 sec + 11,754,711,596 cycles # 2.991 GHz + 35,130,335,361 instructions # 2.99 insn per cycle + 3.931207276 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -90,19 +90,19 @@ Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.556370e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.046775e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.046775e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.711585e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.225013e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.225013e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.972772 sec - 6,124,406,949 cycles # 3.099 GHz - 14,495,501,909 instructions # 2.37 insn per cycle - 1.985116410 seconds time elapsed +TOTAL : 1.918624 sec + 5,946,615,728 cycles # 3.093 GHz + 14,483,958,293 instructions # 2.44 insn per cycle + 1.923457186 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe @@ -116,20 +116,20 @@ Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.918220e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.878370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.878370e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.855810e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.786782e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.786782e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.408181 sec - 4,130,037,548 cycles # 2.922 GHz - 8,871,524,390 instructions # 2.15 insn per cycle - 1.424438220 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3563) (512y: 0) (512z: 0) +TOTAL : 1.419134 sec + 4,148,821,411 cycles # 2.915 GHz + 8,888,021,481 instructions # 2.14 insn per cycle + 1.424042048 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3576) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,19 +142,19 @@ Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.886901e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.839065e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.839065e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.911621e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.860580e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.860580e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.414127 sec - 4,130,988,668 cycles # 2.911 GHz - 8,436,330,650 instructions # 2.04 insn per cycle - 1.425576395 seconds time elapsed +TOTAL : 1.406850 sec + 4,137,327,451 cycles # 2.932 GHz + 8,424,234,551 instructions # 2.04 insn per cycle + 1.411791633 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3320) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe @@ -168,19 +168,19 @@ Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.909459e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.424475e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.424475e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.947023e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.462476e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.462476e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.862780 sec - 3,795,301,905 cycles # 2.035 GHz - 7,724,865,647 instructions # 2.04 insn per cycle - 1.874275599 seconds time elapsed +TOTAL : 1.845349 sec + 3,784,294,920 cycles # 2.046 GHz + 7,713,085,184 instructions # 2.04 insn per cycle + 1.850240418 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3436) (512y: 0) (512z: 2108) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 8baebbd2f3..e5b5571dad 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_07:59:17 +DATE: 2023-10-25_18:36:14 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.009693e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.164196e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.261064e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.194168e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.177440e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.271526e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.527651 sec - 2,207,019,653 cycles # 2.898 GHz - 2,847,015,082 instructions # 1.29 insn per cycle - 0.821000983 seconds time elapsed +TOTAL : 0.510074 sec + 2,249,628,773 cycles # 3.005 GHz + 3,213,054,699 instructions # 1.43 insn per cycle + 0.807578099 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.948815e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.011489e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.011489e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.906485e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.954717e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.954717e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.486964 sec - 16,967,538,345 cycles # 3.091 GHz - 45,670,632,768 instructions # 2.69 insn per cycle - 5.493623126 seconds time elapsed +TOTAL : 5.603351 sec + 17,321,977,891 cycles # 3.090 GHz + 45,555,371,368 instructions # 2.63 insn per cycle + 5.608662187 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.425735e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.635769e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.635769e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.376355e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.544078e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.544078e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.167024 sec - 9,830,113,960 cycles # 3.099 GHz - 27,642,674,750 instructions # 2.81 insn per cycle - 3.179154372 seconds time elapsed +TOTAL : 3.208637 sec + 9,923,474,484 cycles # 3.089 GHz + 27,529,097,588 instructions # 2.77 insn per cycle + 3.213810203 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe @@ -116,19 +116,19 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.356184e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.870584e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.870584e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.199043e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.595306e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.595306e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.063479 sec - 6,070,660,065 cycles # 2.936 GHz - 12,536,006,083 instructions # 2.07 insn per cycle - 2.079986594 seconds time elapsed +TOTAL : 2.118667 sec + 5,989,500,217 cycles # 2.821 GHz + 12,420,938,473 instructions # 2.07 insn per cycle + 2.123732572 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2753) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe @@ -142,19 +142,19 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.722108e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.342034e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.342034e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.938752e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.452534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.452534e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.938671 sec - 5,596,368,925 cycles # 2.879 GHz - 11,921,657,746 instructions # 2.13 insn per cycle - 1.951359996 seconds time elapsed +TOTAL : 1.867638 sec + 5,462,355,916 cycles # 2.919 GHz + 11,803,822,809 instructions # 2.16 insn per cycle + 1.872842798 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2503) (512y: 146) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe @@ -168,20 +168,20 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.872349e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.139324e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.139324e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.872672e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.090666e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.090666e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.814204 sec - 5,702,887,021 cycles # 2.023 GHz - 8,189,649,910 instructions # 1.44 insn per cycle - 2.825947269 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1655) (512y: 126) (512z: 1854) +TOTAL : 2.812736 sec + 5,598,441,704 cycles # 1.988 GHz + 8,083,507,451 instructions # 1.44 insn per cycle + 2.817822099 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1660) (512y: 126) (512z: 1854) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 4970867e08..be7fa646e4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -36,22 +36,22 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-08-15_07:59:45 +DATE: 2023-10-25_18:36:41 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.010623e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.171983e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.271089e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.207178e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.183934e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279144e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.526957 sec - 2,195,732,629 cycles # 2.890 GHz - 2,848,939,282 instructions # 1.30 insn per cycle - 0.819885244 seconds time elapsed +TOTAL : 0.508835 sec + 2,247,853,776 cycles # 3.008 GHz + 3,199,999,470 instructions # 1.42 insn per cycle + 0.806333964 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -64,19 +64,19 @@ Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.000475e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.066045e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.066045e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.962576e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.013722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.013722e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.347714 sec - 16,529,515,673 cycles # 3.088 GHz - 44,662,506,175 instructions # 2.70 insn per cycle - 5.354104244 seconds time elapsed +TOTAL : 5.445467 sec + 16,864,922,350 cycles # 3.095 GHz + 44,544,928,625 instructions # 2.64 insn per cycle + 5.450679101 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe @@ -90,19 +90,19 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.580475e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.810771e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.810771e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.463332e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.638084e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.638084e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.034928 sec - 9,395,348,615 cycles # 3.090 GHz - 26,286,630,411 instructions # 2.80 insn per cycle - 3.051011696 seconds time elapsed +TOTAL : 3.129878 sec + 9,472,664,981 cycles # 3.022 GHz + 26,172,690,479 instructions # 2.76 insn per cycle + 3.134859663 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2397) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe @@ -116,20 +116,20 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.626501e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.016802e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.016802e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.769147e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.097427e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.097427e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.374125 sec - 6,801,396,202 cycles # 2.859 GHz - 14,083,469,511 instructions # 2.07 insn per cycle - 2.401099228 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2876) (512y: 0) (512z: 0) +TOTAL : 2.300686 sec + 6,708,376,683 cycles # 2.910 GHz + 13,967,973,168 instructions # 2.08 insn per cycle + 2.306085049 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2875) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,20 +142,20 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.955222e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.399396e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.399396e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.897600e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.248967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.248967e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.221630 sec - 6,484,638,076 cycles # 2.912 GHz - 13,521,792,259 instructions # 2.09 insn per cycle - 2.228210892 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2516) (512y: 302) (512z: 0) +TOTAL : 2.245188 sec + 6,373,380,149 cycles # 2.833 GHz + 13,408,335,115 instructions # 2.10 insn per cycle + 2.250462198 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2517) (512y: 302) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,19 +168,19 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.847830e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.110433e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.110433e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.921480e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.136739e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.136739e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.830298 sec - 5,669,518,548 cycles # 2.000 GHz - 9,292,775,819 instructions # 1.64 insn per cycle - 2.836480185 seconds time elapsed +TOTAL : 2.776161 sec + 5,570,521,591 cycles # 2.004 GHz + 9,179,596,120 instructions # 1.65 insn per cycle + 2.781332851 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1439) (512y: 212) (512z: 2053) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 97bc98c2b8..ebc965cc92 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-08-15_08:00:15 +DATE: 2023-10-25_18:37:09 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.346608e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.920121e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.004289e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.017326e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.054319e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.066811e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.471560 sec - 1,973,950,685 cycles # 2.856 GHz - 2,560,293,237 instructions # 1.30 insn per cycle - 0.748224369 seconds time elapsed +TOTAL : 0.454634 sec + 1,998,901,545 cycles # 2.995 GHz + 2,904,650,993 instructions # 1.45 insn per cycle + 0.724253457 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.109718e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.317244e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329134e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.124031e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322289e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.333522e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.606081 sec - 2,471,054,363 cycles # 2.919 GHz - 3,431,530,401 instructions # 1.39 insn per cycle - 0.906910842 seconds time elapsed +TOTAL : 0.587604 sec + 2,471,633,267 cycles # 3.015 GHz + 3,730,138,752 instructions # 1.51 insn per cycle + 0.878825359 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +77,19 @@ Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.643786e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.660673e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.660673e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.582748e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.595260e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.595260e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.221919 sec - 19,243,608,253 cycles # 3.091 GHz - 59,004,769,590 instructions # 3.07 insn per cycle - 6.226949298 seconds time elapsed +TOTAL : 6.365564 sec + 19,741,191,972 cycles # 3.100 GHz + 58,964,992,174 instructions # 2.99 insn per cycle + 6.369485962 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1189) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -103,19 +103,19 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.901079e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.958523e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.958523e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.832276e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.875225e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.875225e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.366236 sec - 10,452,664,686 cycles # 3.103 GHz - 31,036,674,927 instructions # 2.97 insn per cycle - 3.376930277 seconds time elapsed +TOTAL : 3.412509 sec + 10,571,839,132 cycles # 3.095 GHz + 30,995,598,646 instructions # 2.93 insn per cycle + 3.416791050 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5217) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.651217e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.875611e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.875611e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.671174e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.843577e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.843577e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.720655 sec - 4,978,164,910 cycles # 2.887 GHz - 11,347,041,260 instructions # 2.28 insn per cycle - 1.725704266 seconds time elapsed +TOTAL : 1.715587 sec + 4,991,139,375 cycles # 2.903 GHz + 11,305,706,976 instructions # 2.27 insn per cycle + 1.719836361 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.070490e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.098163e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.098163e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.100643e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.122719e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.122719e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.554956 sec - 4,439,057,094 cycles # 2.849 GHz - 10,527,304,462 instructions # 2.37 insn per cycle - 1.569562073 seconds time elapsed +TOTAL : 1.509863 sec + 4,400,565,724 cycles # 2.908 GHz + 10,484,557,861 instructions # 2.38 insn per cycle + 1.513887056 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4296) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.744665e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.887734e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.887734e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.479826e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.587663e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.587663e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.141679 sec - 4,129,307,754 cycles # 1.925 GHz - 5,947,543,583 instructions # 1.44 insn per cycle - 2.156123063 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1544) (512y: 95) (512z: 3573) +TOTAL : 2.214647 sec + 4,100,640,054 cycles # 1.849 GHz + 5,907,026,834 instructions # 1.44 insn per cycle + 2.218934371 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1551) (512y: 95) (512z: 3573) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 86dff22d67..84eb682463 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -36,26 +36,26 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-08-15_08:29:11 +DATE: 2023-10-25_19:04:11 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.321686e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.599127e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.599127e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.737533e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.009392e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.009392e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.519457 sec - 2,163,157,863 cycles # 2.900 GHz - 2,940,679,116 instructions # 1.36 insn per cycle - 0.803109015 seconds time elapsed +TOTAL : 0.482468 sec + 2,092,072,357 cycles # 3.017 GHz + 3,191,388,192 instructions # 1.53 insn per cycle + 0.750549951 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -69,17 +69,17 @@ WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.396633e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.578542e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.578542e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.824576e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.948428e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.948428e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.871347 sec - 3,319,474,295 cycles # 2.942 GHz - 4,919,963,320 instructions # 1.48 insn per cycle - 1.187456919 seconds time elapsed +TOTAL : 0.804158 sec + 3,165,815,286 cycles # 3.022 GHz + 5,087,211,394 instructions # 1.61 insn per cycle + 1.108277579 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,19 +90,19 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.621127e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.637548e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.637548e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.552798e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.565584e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.565584e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.280218 sec - 19,360,349,430 cycles # 3.081 GHz - 59,009,876,662 instructions # 3.05 insn per cycle - 6.285804933 seconds time elapsed +TOTAL : 6.447784 sec + 19,812,659,516 cycles # 3.072 GHz + 58,973,017,270 instructions # 2.98 insn per cycle + 6.451999180 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1189) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -117,19 +117,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.863217e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.920233e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.920233e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.843686e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.887129e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.887129e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.399573 sec - 10,479,860,332 cycles # 3.080 GHz - 31,084,657,008 instructions # 2.97 insn per cycle - 3.404850916 seconds time elapsed +TOTAL : 3.410617 sec + 10,606,495,630 cycles # 3.107 GHz + 31,045,364,778 instructions # 2.93 insn per cycle + 3.414818481 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5217) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -144,19 +144,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.661744e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.884797e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.884797e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.635348e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.810089e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.810089e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.726595 sec - 5,018,022,522 cycles # 2.899 GHz - 11,400,093,423 instructions # 2.27 insn per cycle - 1.732312900 seconds time elapsed +TOTAL : 1.728990 sec + 5,025,463,399 cycles # 2.900 GHz + 11,356,936,586 instructions # 2.26 insn per cycle + 1.733253508 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -171,19 +171,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.087002e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.114719e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.114719e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.091196e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.113656e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113656e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.537550 sec - 4,464,241,884 cycles # 2.896 GHz - 10,576,995,228 instructions # 2.37 insn per cycle - 1.552181935 seconds time elapsed +TOTAL : 1.530073 sec + 4,437,395,928 cycles # 2.894 GHz + 10,533,774,197 instructions # 2.37 insn per cycle + 1.534310467 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4296) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -198,20 +198,20 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.671174e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.815552e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.815552e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.778593e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.893791e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.893791e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.169001 sec - 4,168,553,925 cycles # 1.921 GHz - 5,987,800,885 instructions # 1.44 insn per cycle - 2.179997665 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1544) (512y: 95) (512z: 3573) +TOTAL : 2.137109 sec + 4,135,271,327 cycles # 1.933 GHz + 5,946,987,935 instructions # 1.44 insn per cycle + 2.141484524 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1551) (512y: 95) (512z: 3573) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 287719fb4c..5da42e2dfc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-08-15_08:00:44 +DATE: 2023-10-25_18:37:38 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.300130e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.841205e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.963795e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.980838e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.046634e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059100e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.473190 sec - 1,977,903,116 cycles # 2.866 GHz - 2,527,671,450 instructions # 1.28 insn per cycle - 0.749450924 seconds time elapsed +TOTAL : 0.459445 sec + 2,001,624,062 cycles # 2.971 GHz + 2,897,227,747 instructions # 1.45 insn per cycle + 0.730848040 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.104513e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.309505e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.321059e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.119062e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.315389e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326570e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.601033 sec - 2,446,341,332 cycles # 2.907 GHz - 3,477,720,369 instructions # 1.42 insn per cycle - 0.900988844 seconds time elapsed +TOTAL : 0.584635 sec + 2,502,191,176 cycles # 3.029 GHz + 3,799,894,385 instructions # 1.52 insn per cycle + 0.885312342 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.621645e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.638256e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.638256e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.578276e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.590963e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.590963e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.273973 sec - 19,401,728,760 cycles # 3.091 GHz - 59,271,474,776 instructions # 3.05 insn per cycle - 6.279292583 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1314) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.377287 sec + 19,776,032,592 cycles # 3.100 GHz + 59,242,647,666 instructions # 3.00 insn per cycle + 6.381344291 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1315) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.897061e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.954733e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.954733e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.838897e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.882635e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.882635e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.369303 sec - 10,321,836,606 cycles # 3.060 GHz - 30,745,027,571 instructions # 2.98 insn per cycle - 3.374742155 seconds time elapsed +TOTAL : 3.407218 sec + 10,428,150,513 cycles # 3.058 GHz + 30,703,821,983 instructions # 2.94 insn per cycle + 3.411368559 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5043) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.225882e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.431722e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.431722e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.472201e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.635749e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.635749e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.801331 sec - 5,141,865,663 cycles # 2.849 GHz - 11,828,366,799 instructions # 2.30 insn per cycle - 1.806756237 seconds time elapsed +TOTAL : 1.750914 sec + 5,109,907,048 cycles # 2.913 GHz + 11,785,108,632 instructions # 2.31 insn per cycle + 1.754997634 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4668) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.020691e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.045131e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.045131e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.023891e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.043074e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.043074e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.628258 sec - 4,713,345,183 cycles # 2.888 GHz - 11,074,061,148 instructions # 2.35 insn per cycle - 1.638619467 seconds time elapsed +TOTAL : 1.621556 sec + 4,691,054,117 cycles # 2.887 GHz + 11,032,599,545 instructions # 2.35 insn per cycle + 1.625732931 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4331) (512y: 245) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.733326e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.875000e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.875000e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.596531e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.705229e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.705229e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.143662 sec - 4,150,381,558 cycles # 1.933 GHz - 6,223,232,573 instructions # 1.50 insn per cycle - 2.158094654 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1458) (512y: 139) (512z: 3673) +TOTAL : 2.180528 sec + 4,124,129,963 cycles # 1.890 GHz + 6,174,744,538 instructions # 1.50 insn per cycle + 2.184771281 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1461) (512y: 139) (512z: 3675) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 2201bc5f3b..758058b159 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -36,60 +36,60 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-08-15_08:01:14 +DATE: 2023-10-25_18:38:06 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.406431e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.311489e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.406809e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.611847e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.385260e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.471189e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.453050 sec - 1,920,468,364 cycles # 2.866 GHz - 2,412,861,677 instructions # 1.26 insn per cycle - 0.727980038 seconds time elapsed +TOTAL : 0.437775 sec + 1,949,752,289 cycles # 2.999 GHz + 2,775,143,872 instructions # 1.42 insn per cycle + 0.707448913 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.336680e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.395129e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.463483e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.418789e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.455087e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.521596e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.498499 sec - 2,112,948,915 cycles # 2.903 GHz - 2,744,565,868 instructions # 1.30 insn per cycle - 0.786132716 seconds time elapsed +TOTAL : 0.482861 sec + 2,123,251,884 cycles # 3.013 GHz + 3,090,198,407 instructions # 1.46 insn per cycle + 0.761733855 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.412608e+00 -Avg ME (F77/CUDA) = 1.4132214343518683 -Relative difference = 0.0004342566032956241 +Avg ME (F77/CUDA) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.696981e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.711531e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.711531e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.632015e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.645099e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.645099e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.097926 sec - 18,842,737,798 cycles # 3.089 GHz - 59,477,078,656 instructions # 3.16 insn per cycle - 6.103001324 seconds time elapsed +TOTAL : 6.245504 sec + 19,420,521,245 cycles # 3.108 GHz + 59,463,843,270 instructions # 3.06 insn per cycle + 6.249442801 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 961) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe @@ -103,20 +103,20 @@ Relative difference = 2.1728426918172542e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.464668e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.611770e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.611770e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.406220e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.547669e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.547669e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.956560 sec - 5,827,138,618 cycles # 2.974 GHz - 16,930,455,676 instructions # 2.91 insn per cycle - 1.966949100 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5857) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.968760 sec + 5,998,257,000 cycles # 3.042 GHz + 16,914,468,455 instructions # 2.82 insn per cycle + 1.972914932 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5858) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,19 +129,19 @@ Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.860314e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.928538e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.928538e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.859553e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.925073e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.925073e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.902356 sec - 2,622,038,190 cycles # 2.894 GHz - 6,157,146,340 instructions # 2.35 insn per cycle - 0.907479962 seconds time elapsed +TOTAL : 0.900948 sec + 2,632,220,925 cycles # 2.911 GHz + 6,140,096,248 instructions # 2.33 insn per cycle + 0.904996982 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5019) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.060318e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.145170e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.145170e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.072824e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.155063e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.155063e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.816442 sec - 2,380,166,524 cycles # 2.901 GHz - 5,718,523,485 instructions # 2.40 insn per cycle - 0.821587636 seconds time elapsed +TOTAL : 0.809948 sec + 2,370,894,209 cycles # 2.915 GHz + 5,701,521,318 instructions # 2.40 insn per cycle + 0.814071799 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4804) (512y: 36) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.588424e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.639513e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.639513e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.607455e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.657302e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.657302e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.054803 sec - 2,062,209,519 cycles # 1.949 GHz - 3,380,248,799 instructions # 1.64 insn per cycle - 1.059522460 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2130) (512y: 40) (512z: 3776) +TOTAL : 1.040929 sec + 2,057,132,306 cycles # 1.970 GHz + 3,365,579,683 instructions # 1.64 insn per cycle + 1.044863677 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2141) (512y: 39) (512z: 3775) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 7f43a19b3d..48beeeb5ad 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -36,26 +36,26 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-08-15_08:29:41 +DATE: 2023-10-25_19:04:39 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.946079e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.201180e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.201180e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.864071e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.240535e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.240535e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.481798 sec - 2,015,187,560 cycles # 2.853 GHz - 2,654,607,280 instructions # 1.32 insn per cycle - 0.765431808 seconds time elapsed +TOTAL : 0.462582 sec + 1,884,746,111 cycles # 2.811 GHz + 2,786,242,007 instructions # 1.48 insn per cycle + 0.730246160 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -69,40 +69,40 @@ WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.740355e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.667132e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.667132e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.737489e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.659208 sec - 2,647,206,982 cycles # 2.918 GHz - 3,707,484,646 instructions # 1.40 insn per cycle - 0.964897832 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.695482e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.755030e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.755030e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 +TOTAL : 0.635503 sec + 2,447,308,351 cycles # 2.825 GHz + 3,823,535,894 instructions # 1.56 insn per cycle + 0.923575231 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.412608e+00 -Avg ME (F77/CUDA) = 1.4132214343518683 -Relative difference = 0.0004342566032956241 +Avg ME (F77/CUDA) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.694248e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.708560e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.708560e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.567048e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.579966e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.579966e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.106197 sec - 18,844,738,268 cycles # 3.084 GHz - 59,481,932,456 instructions # 3.16 insn per cycle - 6.116008012 seconds time elapsed +TOTAL : 6.406446 sec + 19,444,692,901 cycles # 3.034 GHz + 59,468,886,107 instructions # 3.06 insn per cycle + 6.410558637 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 961) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe @@ -117,20 +117,20 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.506852e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.654162e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.654162e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.536135e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.681573e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.681573e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.951328 sec - 5,846,261,275 cycles # 2.992 GHz - 16,978,874,386 instructions # 2.90 insn per cycle - 1.956229643 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5857) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.943530 sec + 6,018,572,710 cycles # 3.091 GHz + 16,962,561,293 instructions # 2.82 insn per cycle + 1.947552922 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5858) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,19 +144,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.851729e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.920803e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.920803e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.852963e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.918436e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.918436e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.909940 sec - 2,643,321,044 cycles # 2.892 GHz - 6,194,221,153 instructions # 2.34 insn per cycle - 0.915140180 seconds time elapsed +TOTAL : 0.908122 sec + 2,646,262,038 cycles # 2.903 GHz + 6,176,972,836 instructions # 2.33 insn per cycle + 0.912119450 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5019) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe @@ -171,19 +171,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.038921e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.122854e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.122854e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.053883e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.135495e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.135495e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.831632 sec - 2,400,502,783 cycles # 2.878 GHz - 5,756,071,420 instructions # 2.40 insn per cycle - 0.836907229 seconds time elapsed +TOTAL : 0.821672 sec + 2,391,067,663 cycles # 2.897 GHz + 5,738,392,055 instructions # 2.40 insn per cycle + 0.825851117 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4804) (512y: 36) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe @@ -198,20 +198,20 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.517398e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.565711e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.565711e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.562282e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.609907e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.609907e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.108961 sec - 2,086,070,681 cycles # 1.875 GHz - 3,422,372,371 instructions # 1.64 insn per cycle - 1.113925906 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2130) (512y: 40) (512z: 3776) +TOTAL : 1.075433 sec + 2,080,452,431 cycles # 1.928 GHz + 3,407,597,282 instructions # 1.64 insn per cycle + 1.079584991 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2141) (512y: 39) (512z: 3775) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 1df556d75b..35d51d9f5b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -36,60 +36,60 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-08-15_08:01:39 +DATE: 2023-10-25_18:38:30 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.422212e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.288151e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.386780e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.558695e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.304995e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.390429e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.448849 sec - 1,941,942,406 cycles # 2.915 GHz - 2,406,681,347 instructions # 1.24 insn per cycle - 0.723909819 seconds time elapsed +TOTAL : 0.441583 sec + 1,913,307,169 cycles # 2.938 GHz + 2,730,410,416 instructions # 1.43 insn per cycle + 0.710289745 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.352571e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.421119e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.492595e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.431804e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.480539e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.548305e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.502246 sec - 2,107,695,366 cycles # 2.885 GHz - 2,759,759,873 instructions # 1.31 insn per cycle - 0.790343780 seconds time elapsed +TOTAL : 0.484144 sec + 2,122,361,683 cycles # 3.000 GHz + 3,092,428,798 instructions # 1.46 insn per cycle + 0.764929414 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.412608e+00 -Avg ME (F77/CUDA) = 1.4132214343518683 -Relative difference = 0.0004342566032956241 +Avg ME (F77/CUDA) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.716507e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.731257e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.731257e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.622945e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.636379e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.636379e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.052780 sec - 18,786,762,925 cycles # 3.104 GHz - 59,224,528,935 instructions # 3.15 insn per cycle - 6.057917316 seconds time elapsed +TOTAL : 6.266974 sec + 19,387,707,588 cycles # 3.092 GHz + 59,211,783,711 instructions # 3.05 insn per cycle + 6.270947254 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe @@ -103,20 +103,20 @@ Relative difference = 2.1728426918172542e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.131862e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.305099e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.305099e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.919855e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.077833e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.077833e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.815057 sec - 5,582,881,371 cycles # 3.069 GHz - 16,724,850,076 instructions # 3.00 insn per cycle - 1.826820128 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5623) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.856181 sec + 5,736,685,730 cycles # 3.085 GHz + 16,708,949,188 instructions # 2.91 insn per cycle + 1.860305013 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,19 +129,19 @@ Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.627559e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.680257e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.680257e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.619777e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.669337e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.669337e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.027563 sec - 2,998,369,464 cycles # 2.909 GHz - 6,824,345,384 instructions # 2.28 insn per cycle - 1.032324331 seconds time elapsed +TOTAL : 1.031321 sec + 3,001,059,822 cycles # 2.901 GHz + 6,807,446,499 instructions # 2.27 insn per cycle + 1.035316846 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.739116e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.799649e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.799649e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.758092e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.816843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.816843e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.963274 sec - 2,776,231,077 cycles # 2.872 GHz - 6,371,695,047 instructions # 2.30 insn per cycle - 0.974745543 seconds time elapsed +TOTAL : 0.951721 sec + 2,767,509,162 cycles # 2.897 GHz + 6,354,591,455 instructions # 2.30 insn per cycle + 0.955744845 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5429) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.448866e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.491409e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.491409e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.458284e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.499118e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.499118e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.154218 sec - 2,240,419,741 cycles # 1.935 GHz - 3,753,173,024 instructions # 1.68 insn per cycle - 1.165467448 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2375) (512y: 30) (512z: 4073) +TOTAL : 1.146088 sec + 2,235,083,857 cycles # 1.946 GHz + 3,731,059,413 instructions # 1.67 insn per cycle + 1.150018435 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2381) (512y: 29) (512z: 4070) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index bb3e881615..392905595e 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-08-15_08:02:04 +DATE: 2023-10-25_18:38:54 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.290398e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.845540e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.964726e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.991876e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.048685e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.061466e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.470961 sec - 1,982,748,816 cycles # 2.880 GHz - 2,541,468,026 instructions # 1.28 insn per cycle - 0.747537205 seconds time elapsed +TOTAL : 0.460934 sec + 1,966,721,120 cycles # 2.924 GHz + 2,827,577,653 instructions # 1.44 insn per cycle + 0.730736586 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.107272e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.315172e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326836e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.122528e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.320226e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.331429e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.609050 sec - 2,450,461,311 cycles # 2.891 GHz - 3,472,533,054 instructions # 1.42 insn per cycle - 0.909431094 seconds time elapsed +TOTAL : 0.588908 sec + 2,472,359,151 cycles # 3.012 GHz + 3,773,127,523 instructions # 1.53 insn per cycle + 0.879878807 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +77,19 @@ Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.600949e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.617125e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.617125e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.519449e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.531484e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.531484e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.332094 sec - 19,626,072,012 cycles # 3.102 GHz - 60,092,931,892 instructions # 3.06 insn per cycle - 6.337235404 seconds time elapsed +TOTAL : 6.525526 sec + 20,092,492,706 cycles # 3.078 GHz + 60,052,973,297 instructions # 2.99 insn per cycle + 6.529664742 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1224) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe @@ -103,20 +103,20 @@ Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.803362e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.859060e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.859060e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.869773e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.913961e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.913961e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.434987 sec - 10,282,391,139 cycles # 2.990 GHz - 30,780,626,926 instructions # 2.99 insn per cycle - 3.440411022 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5353) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.385815 sec + 10,415,517,369 cycles # 3.073 GHz + 30,737,885,914 instructions # 2.95 insn per cycle + 3.390029957 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -129,20 +129,20 @@ Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.662251e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.887127e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.887127e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.784627e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.958931e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.958931e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.720090 sec - 4,950,116,741 cycles # 2.872 GHz - 11,306,969,988 instructions # 2.28 insn per cycle - 1.725207077 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4684) (512y: 0) (512z: 0) +TOTAL : 1.696342 sec + 4,938,080,705 cycles # 2.905 GHz + 11,263,764,405 instructions # 2.28 insn per cycle + 1.700575900 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4683) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -155,20 +155,20 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.102470e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.131191e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.131191e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.113465e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.136337e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.136337e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.511102 sec - 4,375,018,658 cycles # 2.889 GHz - 10,478,373,615 instructions # 2.40 insn per cycle - 1.525805694 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 83) (512z: 0) +TOTAL : 1.492907 sec + 4,341,247,814 cycles # 2.902 GHz + 10,434,510,449 instructions # 2.40 insn per cycle + 1.497014311 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4418) (512y: 83) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -181,20 +181,20 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.504396e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.639957e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.639957e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.532516e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.636201e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.636201e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.209455 sec - 4,242,345,884 cycles # 1.917 GHz - 6,156,808,300 instructions # 1.45 insn per cycle - 2.214700190 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 117) (512z: 3648) +TOTAL : 2.198970 sec + 4,210,314,244 cycles # 1.912 GHz + 6,111,580,609 instructions # 1.45 insn per cycle + 2.203311339 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2065) (512y: 117) (512z: 3649) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index d7a6c1c5a3..39bb25c947 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-08-15_08:02:33 +DATE: 2023-10-25_18:39:22 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.229649e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.745477e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.863213e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.944009e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.041807e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.053824e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.471873 sec - 1,969,411,167 cycles # 2.855 GHz - 2,527,637,472 instructions # 1.28 insn per cycle - 0.747596794 seconds time elapsed +TOTAL : 0.457165 sec + 2,013,112,285 cycles # 3.009 GHz + 2,933,107,931 instructions # 1.46 insn per cycle + 0.726273814 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.094888e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.298904e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.310235e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.111421e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.304827e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.316338e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.602742 sec - 2,460,497,015 cycles # 2.922 GHz - 3,449,636,442 instructions # 1.40 insn per cycle - 0.902413801 seconds time elapsed +TOTAL : 0.584817 sec + 2,503,239,468 cycles # 3.024 GHz + 3,758,528,305 instructions # 1.50 insn per cycle + 0.886403311 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +77,19 @@ Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.587494e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.603624e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.603624e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.502057e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.514010e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.514010e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.355894 sec - 19,662,899,968 cycles # 3.093 GHz - 60,301,262,777 instructions # 3.07 insn per cycle - 6.360918487 seconds time elapsed +TOTAL : 6.570415 sec + 20,096,701,303 cycles # 3.057 GHz + 60,261,778,784 instructions # 3.00 insn per cycle + 6.574454844 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe @@ -103,20 +103,20 @@ Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.814453e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.870401e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.870401e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.950569e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.996463e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.996463e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.426935 sec - 10,157,287,624 cycles # 2.960 GHz - 30,490,441,918 instructions # 3.00 insn per cycle - 3.432394941 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5151) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.330994 sec + 10,302,022,615 cycles # 3.090 GHz + 30,444,386,178 instructions # 2.96 insn per cycle + 3.335148743 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5149) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,20 +129,20 @@ Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.381746e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.595958e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.595958e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.413979e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.578528e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.578528e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.770488 sec - 5,122,017,261 cycles # 2.888 GHz - 11,823,506,295 instructions # 2.31 insn per cycle - 1.776023203 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4797) (512y: 0) (512z: 0) +TOTAL : 1.762053 sec + 5,116,204,786 cycles # 2.900 GHz + 11,780,626,112 instructions # 2.30 insn per cycle + 1.766305951 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4795) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -155,20 +155,20 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.032538e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.058039e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.058039e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.034226e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.053843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.053843e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.610226 sec - 4,674,616,829 cycles # 2.896 GHz - 11,035,878,763 instructions # 2.36 insn per cycle - 1.615179746 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4427) (512y: 236) (512z: 0) +TOTAL : 1.605857 sec + 4,642,992,473 cycles # 2.885 GHz + 10,992,793,436 instructions # 2.37 insn per cycle + 1.609875653 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4423) (512y: 238) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -181,20 +181,20 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.513081e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.648869e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.648869e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.613528e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.720366e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.720366e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.205682 sec - 4,258,221,941 cycles # 1.927 GHz - 6,395,609,321 instructions # 1.50 insn per cycle - 2.220421883 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1957) (512y: 163) (512z: 3727) +TOTAL : 2.175272 sec + 4,221,455,153 cycles # 1.938 GHz + 6,349,351,796 instructions # 1.50 insn per cycle + 2.179367593 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1959) (512y: 163) (512z: 3727) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index ee99dff53c..bff9233075 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:03:03 +DATE: 2023-10-25_18:39:51 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.453892e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.496986e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.499134e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.468061e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.492682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.494686e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.532264 sec - 2,202,591,124 cycles # 2.880 GHz - 3,145,234,675 instructions # 1.43 insn per cycle - 0.827585575 seconds time elapsed +TOTAL : 0.518589 sec + 2,245,805,905 cycles # 3.007 GHz + 3,544,979,174 instructions # 1.58 insn per cycle + 0.807315238 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.128592e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.157553e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.158726e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.126268e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.153472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154624e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.048277 sec - 10,045,436,491 cycles # 3.027 GHz - 21,587,671,766 instructions # 2.15 insn per cycle - 3.375324480 seconds time elapsed +TOTAL : 3.016484 sec + 10,017,854,023 cycles # 3.069 GHz + 22,587,762,207 instructions # 2.25 insn per cycle + 3.322845777 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.975608e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.976863e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.976863e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.955514e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.956404e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.956404e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.320519 sec - 25,296,824,937 cycles # 3.041 GHz - 78,718,706,014 instructions # 3.11 insn per cycle - 8.325864427 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.396540 sec + 25,870,300,414 cycles # 3.080 GHz + 78,705,757,349 instructions # 3.04 insn per cycle + 8.400556749 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.681821e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.686349e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.686349e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.628098e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.631366e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.631366e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.465998 sec - 12,962,442,263 cycles # 2.901 GHz - 39,326,857,637 instructions # 3.03 insn per cycle - 4.471243281 seconds time elapsed +TOTAL : 4.529617 sec + 13,157,831,550 cycles # 2.903 GHz + 39,316,654,466 instructions # 2.99 insn per cycle + 4.533882139 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.545695e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.567841e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.567841e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.489125e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.506110e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.506110e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.929314 sec - 5,594,587,140 cycles # 2.894 GHz - 13,926,374,724 instructions # 2.49 insn per cycle - 1.934668446 seconds time elapsed +TOTAL : 1.941066 sec + 5,640,899,050 cycles # 2.901 GHz + 13,915,027,017 instructions # 2.47 insn per cycle + 1.945275776 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.267998e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.295121e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.295121e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.632867e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.655023e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.655023e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.779909 sec - 5,160,580,380 cycles # 2.893 GHz - 12,568,523,308 instructions # 2.44 insn per cycle - 1.785283591 seconds time elapsed +TOTAL : 1.711467 sec + 4,969,822,591 cycles # 2.898 GHz + 12,556,829,300 instructions # 2.53 insn per cycle + 1.715640499 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.597716e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.615205e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.615205e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.688750e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.702937e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.702937e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.168878 sec - 4,135,215,705 cycles # 1.903 GHz - 6,452,419,322 instructions # 1.56 insn per cycle - 2.174332343 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) +TOTAL : 2.142319 sec + 4,116,162,262 cycles # 1.918 GHz + 6,441,474,951 instructions # 1.56 insn per cycle + 2.146523645 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 0b2de777dc..19b0ccbfe1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -36,26 +36,26 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:30:42 +DATE: 2023-10-25_19:05:36 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.014063e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.384512e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.384512e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.145401e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.455796e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.455796e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.537620 sec - 2,245,393,273 cycles # 2.907 GHz - 3,255,628,352 instructions # 1.45 insn per cycle - 0.832611719 seconds time elapsed +TOTAL : 0.511465 sec + 2,211,538,389 cycles # 2.999 GHz + 3,499,461,341 instructions # 1.58 insn per cycle + 0.799495725 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -69,17 +69,17 @@ WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.548113e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.099320e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.099320e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.639414e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.104964e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.104964e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.358261 sec - 11,062,414,783 cycles # 3.042 GHz - 23,490,877,966 instructions # 2.12 insn per cycle - 3.693864709 seconds time elapsed +TOTAL : 3.289425 sec + 10,927,544,611 cycles # 3.076 GHz + 23,831,419,819 instructions # 2.18 insn per cycle + 3.609486496 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,20 +90,20 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.017662e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.019011e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.019011e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.955661e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.956599e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.956599e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.143634 sec - 25,319,073,361 cycles # 3.109 GHz - 78,725,760,258 instructions # 3.11 insn per cycle - 8.148951373 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.399286 sec + 25,889,610,376 cycles # 3.081 GHz + 78,711,674,763 instructions # 3.04 insn per cycle + 8.403464378 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,19 +117,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.653505e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.658020e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.658020e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.684905e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.688434e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.688434e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.508369 sec - 13,019,069,255 cycles # 2.886 GHz - 39,344,398,700 instructions # 3.02 insn per cycle - 4.513744525 seconds time elapsed +TOTAL : 4.463396 sec + 13,180,558,134 cycles # 2.951 GHz + 39,329,251,791 instructions # 2.98 insn per cycle + 4.467689901 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -144,19 +144,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.544470e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.566776e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.566776e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.312850e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.329128e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.329128e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.935380 sec - 5,614,957,088 cycles # 2.895 GHz - 13,940,977,670 instructions # 2.48 insn per cycle - 1.940728454 seconds time elapsed +TOTAL : 1.985951 sec + 5,671,057,559 cycles # 2.852 GHz + 13,925,731,418 instructions # 2.46 insn per cycle + 1.990267942 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -171,19 +171,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.572880e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.603073e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.603073e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.189591e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.210527e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.210527e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.727110 sec - 5,000,794,718 cycles # 2.888 GHz - 12,577,122,736 instructions # 2.52 insn per cycle - 1.732450309 seconds time elapsed +TOTAL : 1.797800 sec + 4,986,486,293 cycles # 2.768 GHz + 12,566,997,052 instructions # 2.52 insn per cycle + 1.802092456 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -198,20 +198,20 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.594797e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.613505e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.613505e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.650923e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.665003e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.665003e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.175375 sec - 4,166,851,801 cycles # 1.912 GHz - 6,467,787,989 instructions # 1.55 insn per cycle - 2.180731533 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) +TOTAL : 2.156621 sec + 4,130,305,981 cycles # 1.912 GHz + 6,453,079,741 instructions # 1.56 insn per cycle + 2.160974147 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 77780d410d..81203fa77a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:42:14 +DATE: 2023-10-25_19:16:31 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.429732e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.467711e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.469814e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.481338e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.505105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.507123e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.528918 sec - 2,209,872,003 cycles # 2.894 GHz - 3,176,003,884 instructions # 1.44 insn per cycle - 0.824220410 seconds time elapsed +TOTAL : 0.501686 sec + 2,214,909,881 cycles # 3.031 GHz + 3,458,747,276 instructions # 1.56 insn per cycle + 0.800330270 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.160148e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.193585e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.194939e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.151978e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.180537e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.181762e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.139841 sec - 10,329,103,375 cycles # 3.040 GHz - 22,212,942,478 instructions # 2.15 insn per cycle - 3.455656111 seconds time elapsed +TOTAL : 3.117189 sec + 10,377,272,213 cycles # 3.074 GHz + 22,017,651,367 instructions # 2.12 insn per cycle + 3.433688033 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.979419e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.980640e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.980640e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.951603e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.952527e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.952527e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.297424 sec - 25,302,053,764 cycles # 3.050 GHz - 78,718,480,647 instructions # 3.11 insn per cycle - 8.302445273 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.414289 sec + 25,891,693,281 cycles # 3.076 GHz + 78,705,382,161 instructions # 3.04 insn per cycle + 8.418214136 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.636065e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.640264e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.640264e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.708393e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.711838e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.711838e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.522525 sec - 12,975,254,269 cycles # 2.867 GHz - 39,327,280,013 instructions # 3.03 insn per cycle - 4.527497502 seconds time elapsed +TOTAL : 4.432833 sec + 13,155,908,783 cycles # 2.966 GHz + 39,315,348,391 instructions # 2.99 insn per cycle + 4.436757068 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.483373e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.505325e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.505325e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.511504e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.528904e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.528904e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.945229 sec - 5,600,197,846 cycles # 2.876 GHz - 13,925,895,358 instructions # 2.49 insn per cycle - 1.950206271 seconds time elapsed +TOTAL : 1.937344 sec + 5,646,349,471 cycles # 2.910 GHz + 13,913,307,123 instructions # 2.46 insn per cycle + 1.941259569 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.569990e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.598824e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.598824e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.572932e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.594635e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.594635e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.725342 sec - 4,994,452,767 cycles # 2.893 GHz - 12,566,891,411 instructions # 2.52 insn per cycle - 1.730492317 seconds time elapsed +TOTAL : 1.724509 sec + 4,978,068,357 cycles # 2.882 GHz + 12,554,500,287 instructions # 2.52 insn per cycle + 1.728498714 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.586191e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.604384e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.604384e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.677554e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.691380e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.691380e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.174746 sec - 4,138,656,564 cycles # 1.901 GHz - 6,452,749,279 instructions # 1.56 insn per cycle - 2.179472637 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) +TOTAL : 2.147049 sec + 4,126,422,075 cycles # 1.919 GHz + 6,439,114,110 instructions # 1.56 insn per cycle + 2.151134180 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index 686f1e49a1..983ed35921 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:38:58 +DATE: 2023-10-25_19:13:27 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.408896e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.453347e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.455502e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.486758e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.510432e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.512397e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.527305 sec - 2,220,070,983 cycles # 2.916 GHz - 3,151,259,549 instructions # 1.42 insn per cycle - 0.821841341 seconds time elapsed +TOTAL : 0.503841 sec + 2,219,754,850 cycles # 2.994 GHz + 3,493,120,915 instructions # 1.57 insn per cycle + 0.814222419 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.154789e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.188369e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.189721e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.146385e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.174926e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176115e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.094429 sec - 10,128,470,165 cycles # 3.020 GHz - 23,253,458,467 instructions # 2.30 insn per cycle - 3.411550143 seconds time elapsed +TOTAL : 3.060532 sec + 10,143,930,144 cycles # 3.066 GHz + 23,186,884,860 instructions # 2.29 insn per cycle + 3.364498351 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.008156e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.009429e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.009429e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.960187e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.961109e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.961109e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.177574 sec - 25,302,901,901 cycles # 3.093 GHz - 78,716,976,187 instructions # 3.11 insn per cycle - 8.182798610 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.392474 sec + 25,877,587,986 cycles # 3.088 GHz + 78,705,423,071 instructions # 3.04 insn per cycle + 8.396357877 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.587950e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.592033e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.592033e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.691020e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.694421e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.694421e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.582675 sec - 13,508,007,326 cycles # 2.947 GHz - 39,327,551,179 instructions # 2.91 insn per cycle - 4.587694775 seconds time elapsed +TOTAL : 4.452252 sec + 13,153,001,214 cycles # 2.952 GHz + 39,316,173,049 instructions # 2.99 insn per cycle + 4.456201629 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.486007e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.508549e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.508549e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.443294e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.459986e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.459986e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.943227 sec - 5,595,681,232 cycles # 2.874 GHz - 13,926,558,643 instructions # 2.49 insn per cycle - 1.948271986 seconds time elapsed +TOTAL : 1.951557 sec + 5,638,519,517 cycles # 2.884 GHz + 13,914,420,326 instructions # 2.47 insn per cycle + 1.955513391 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.596256e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.625091e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.625091e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.662759e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.685664e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.685664e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.719247 sec - 4,990,025,907 cycles # 2.896 GHz - 12,568,338,549 instructions # 2.52 insn per cycle - 1.724314858 seconds time elapsed +TOTAL : 1.706815 sec + 4,966,762,812 cycles # 2.905 GHz + 12,556,639,833 instructions # 2.53 insn per cycle + 1.710823467 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.657567e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.675488e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.675488e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.630990e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.645195e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.645195e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.151986 sec - 4,134,393,503 cycles # 1.918 GHz - 6,452,537,166 instructions # 1.56 insn per cycle - 2.157197097 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) +TOTAL : 2.158956 sec + 4,117,585,001 cycles # 1.904 GHz + 6,441,334,233 instructions # 1.56 insn per cycle + 2.163053685 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 992b9aa27f..089d292aa8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -36,23 +36,23 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:35:47 +DATE: 2023-10-25_19:10:27 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.072022e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.440583e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.442697e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.218258e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.503307e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.505360e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.533318 sec - 2,211,373,511 cycles # 2.880 GHz - 3,228,795,304 instructions # 1.46 insn per cycle - 0.828284741 seconds time elapsed +TOTAL : 0.506835 sec + 2,221,452,108 cycles # 3.022 GHz + 3,523,570,836 instructions # 1.59 insn per cycle + 0.796752404 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -60,17 +60,17 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.665049e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.181764e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.183102e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.728232e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.175248e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176437e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.247163 sec - 10,670,290,958 cycles # 3.043 GHz - 21,862,228,593 instructions # 2.05 insn per cycle - 3.563318288 seconds time elapsed +TOTAL : 3.202196 sec + 10,370,823,383 cycles # 3.013 GHz + 22,699,363,327 instructions # 2.19 insn per cycle + 3.506471268 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -80,20 +80,20 @@ Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.004157e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.005423e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.005423e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.964142e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.965067e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.965067e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.193994 sec - 25,318,715,213 cycles # 3.090 GHz - 78,717,630,381 instructions # 3.11 insn per cycle - 8.199107320 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.359175 sec + 25,872,166,576 cycles # 3.094 GHz + 78,706,432,099 instructions # 3.04 insn per cycle + 8.363176184 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -106,19 +106,19 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.685853e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.690241e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.690241e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.675901e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.679176e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.679176e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.460276 sec - 12,967,494,098 cycles # 2.906 GHz - 39,327,269,410 instructions # 3.03 insn per cycle - 4.465144824 seconds time elapsed +TOTAL : 4.470613 sec + 13,168,571,852 cycles # 2.943 GHz + 39,316,143,486 instructions # 2.99 insn per cycle + 4.474685106 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -132,19 +132,19 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.566997e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.590444e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.590444e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.475559e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.492856e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.492856e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.927198 sec - 5,598,489,142 cycles # 2.901 GHz - 13,926,756,722 instructions # 2.49 insn per cycle - 1.932331133 seconds time elapsed +TOTAL : 1.944338 sec + 5,656,434,910 cycles # 2.905 GHz + 13,914,488,872 instructions # 2.46 insn per cycle + 1.948357306 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -158,19 +158,19 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.590912e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.619419e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.619419e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.315923e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.336840e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.336840e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.720802 sec - 4,990,138,053 cycles # 2.894 GHz - 12,570,402,284 instructions # 2.52 insn per cycle - 1.725569173 seconds time elapsed +TOTAL : 1.769971 sec + 4,966,635,750 cycles # 2.801 GHz + 12,556,400,439 instructions # 2.53 insn per cycle + 1.774243834 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -184,20 +184,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.667683e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.685389e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.685389e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.505641e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.519600e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.519600e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.150008 sec - 4,128,778,267 cycles # 1.917 GHz - 6,452,453,162 instructions # 1.56 insn per cycle - 2.155042794 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) +TOTAL : 2.194843 sec + 4,122,096,598 cycles # 1.876 GHz + 6,442,654,429 instructions # 1.56 insn per cycle + 2.198924835 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 237ca99358..db28556fed 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:03:40 +DATE: 2023-10-25_18:40:27 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.411666e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.457808e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.459974e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.480350e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.503709e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.505689e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.530347 sec - 2,215,627,654 cycles # 2.903 GHz - 3,117,473,452 instructions # 1.41 insn per cycle - 0.825930987 seconds time elapsed +TOTAL : 0.515106 sec + 2,241,115,881 cycles # 3.010 GHz + 3,491,500,030 instructions # 1.56 insn per cycle + 0.803761340 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.144774e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.174090e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.175257e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.140925e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.168327e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169450e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.043950 sec - 10,077,982,824 cycles # 3.048 GHz - 20,884,643,426 instructions # 2.07 insn per cycle - 3.362779620 seconds time elapsed +TOTAL : 3.010736 sec + 10,017,349,944 cycles # 3.076 GHz + 21,234,719,579 instructions # 2.12 insn per cycle + 3.315687380 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001628e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002869e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.002869e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.950176e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.951182e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.951182e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.203624 sec - 25,355,069,104 cycles # 3.089 GHz - 78,461,481,347 instructions # 3.09 insn per cycle - 8.208964551 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4141) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.419375 sec + 25,820,844,871 cycles # 3.067 GHz + 78,455,782,361 instructions # 3.04 insn per cycle + 8.423501286 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4147) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,20 +103,20 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.649553e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.653948e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.653948e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.695368e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.698841e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.698841e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.504807 sec - 12,940,264,818 cycles # 2.872 GHz - 39,278,818,959 instructions # 3.04 insn per cycle - 4.509858133 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12921) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.446590 sec + 13,096,365,032 cycles # 2.943 GHz + 39,266,931,549 instructions # 3.00 insn per cycle + 4.450776925 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12925) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,20 +129,20 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.391541e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.414022e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.414022e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.473385e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.490359e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.490359e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.964481 sec - 5,630,549,019 cycles # 2.860 GHz - 14,042,768,532 instructions # 2.49 insn per cycle - 1.969790831 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11430) (512y: 0) (512z: 0) +TOTAL : 1.944494 sec + 5,647,694,185 cycles # 2.899 GHz + 14,031,784,985 instructions # 2.48 insn per cycle + 1.948726891 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11428) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -155,20 +155,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.707847e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.733033e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.733033e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.439321e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.460393e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.460393e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.894730 sec - 5,044,848,529 cycles # 2.657 GHz - 12,697,937,577 instructions # 2.52 insn per cycle - 1.900080529 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10994) (512y: 240) (512z: 0) +TOTAL : 1.746193 sec + 5,071,268,913 cycles # 2.898 GHz + 12,684,289,306 instructions # 2.50 insn per cycle + 1.750379728 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10992) (512y: 240) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -181,20 +181,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.667824e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.686951e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.686951e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.529292e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.543220e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.543220e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.149855 sec - 4,131,471,523 cycles # 1.920 GHz - 6,576,312,702 instructions # 1.59 insn per cycle - 2.154822899 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1638) (512y: 192) (512z:10078) +TOTAL : 2.188060 sec + 4,141,433,761 cycles # 1.890 GHz + 6,563,782,413 instructions # 1.58 insn per cycle + 2.192342750 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1640) (512y: 192) (512z:10068) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index b0cb514025..4c6f36c205 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:19:39 +DATE: 2023-10-25_18:55:24 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.186111e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.222258e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.224187e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.222490e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.244878e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.246691e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.534685 sec - 2,221,949,437 cycles # 2.892 GHz - 3,229,701,697 instructions # 1.45 insn per cycle - 0.828035424 seconds time elapsed +TOTAL : 0.528299 sec + 2,270,979,797 cycles # 3.018 GHz + 3,579,093,863 instructions # 1.58 insn per cycle + 0.812313256 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.745971e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.772990e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.774074e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.777355e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.800618e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.801578e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.294695 sec - 10,798,249,474 cycles # 3.040 GHz - 25,165,680,445 instructions # 2.33 insn per cycle - 3.610046788 seconds time elapsed +TOTAL : 3.287863 sec + 10,849,521,679 cycles # 3.070 GHz + 24,134,668,326 instructions # 2.22 insn per cycle + 3.593012410 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.440167e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.440795e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.440795e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.444628e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.445098e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.445098e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 36.950676 sec - 113,548,188,280 cycles # 3.073 GHz - 144,974,209,663 instructions # 1.28 insn per cycle - 36.955853236 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:21600) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 36.909027 sec + 113,587,132,048 cycles # 3.078 GHz + 144,964,358,008 instructions # 1.28 insn per cycle + 36.912946290 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:21605) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.329065e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.332654e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.332654e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.256823e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.259432e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.259432e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.936971 sec - 14,692,912,106 cycles # 2.974 GHz - 37,589,294,182 instructions # 2.56 insn per cycle - 4.941927376 seconds time elapsed +TOTAL : 5.044758 sec + 14,717,259,007 cycles # 2.916 GHz + 37,577,668,645 instructions # 2.55 insn per cycle + 5.048857745 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68118) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.700518e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.719059e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.719059e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.803332e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.817865e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.817865e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.143912 sec - 6,137,805,962 cycles # 2.862 GHz - 13,076,339,536 instructions # 2.13 insn per cycle - 2.149352332 seconds time elapsed +TOTAL : 2.110842 sec + 6,124,055,435 cycles # 2.897 GHz + 13,063,274,169 instructions # 2.13 insn per cycle + 2.114845473 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe @@ -155,20 +155,20 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.391844e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.419528e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.419528e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.453213e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.474776e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.474776e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.757108 sec - 5,072,480,201 cycles # 2.881 GHz - 11,455,808,544 instructions # 2.26 insn per cycle - 1.762035410 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40433) (512y: 285) (512z: 0) +TOTAL : 1.744006 sec + 5,055,001,520 cycles # 2.893 GHz + 11,442,027,490 instructions # 2.26 insn per cycle + 1.747990275 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -181,20 +181,20 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.876076e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.894961e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.894961e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.925155e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.940309e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.940309e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.093367 sec - 3,994,032,080 cycles # 1.905 GHz - 5,957,196,685 instructions # 1.49 insn per cycle - 2.098310322 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39409) +TOTAL : 2.078899 sec + 3,977,787,711 cycles # 1.911 GHz + 5,943,488,721 instructions # 1.49 insn per cycle + 2.082985085 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index a6581703dc..6ac5000ce8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:20:48 +DATE: 2023-10-25_18:56:31 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.187705e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.222893e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.224731e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.239290e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.259875e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.261623e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.535653 sec - 2,220,323,389 cycles # 2.893 GHz - 3,156,830,694 instructions # 1.42 insn per cycle - 0.828034854 seconds time elapsed +TOTAL : 0.524624 sec + 2,256,102,466 cycles # 3.003 GHz + 3,565,937,124 instructions # 1.58 insn per cycle + 0.809821745 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.771115e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.798449e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.799565e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.793727e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.817115e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.818113e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.292251 sec - 10,760,515,723 cycles # 3.035 GHz - 25,239,650,000 instructions # 2.35 insn per cycle - 3.607701526 seconds time elapsed +TOTAL : 3.259889 sec + 10,730,925,294 cycles # 3.058 GHz + 24,431,623,702 instructions # 2.28 insn per cycle + 3.565849378 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.415087e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.415720e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.415720e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.412471e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.412949e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.412949e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.159775 sec - 114,301,534,784 cycles # 3.076 GHz - 145,574,530,561 instructions # 1.27 insn per cycle - 37.164733277 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:22238) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 37.178028 sec + 114,361,849,248 cycles # 3.076 GHz + 145,560,134,005 instructions # 1.27 insn per cycle + 37.182120025 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:22248) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.261786e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.265194e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.265194e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.195698e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.198225e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.198225e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.044434 sec - 15,176,552,874 cycles # 3.008 GHz - 37,775,533,795 instructions # 2.49 insn per cycle - 5.049599176 seconds time elapsed +TOTAL : 5.140800 sec + 15,162,495,765 cycles # 2.948 GHz + 37,764,610,972 instructions # 2.49 insn per cycle + 5.144813607 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68446) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe @@ -129,20 +129,20 @@ Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.943060e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.962497e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.962497e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.961138e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.976889e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.976889e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.074892 sec - 6,010,581,709 cycles # 2.891 GHz - 12,911,568,202 instructions # 2.15 insn per cycle - 2.080135608 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45936) (512y: 0) (512z: 0) +TOTAL : 2.069144 sec + 6,001,988,087 cycles # 2.896 GHz + 12,897,757,655 instructions # 2.15 insn per cycle + 2.073134028 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -155,20 +155,20 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.321078e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.349408e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.349408e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.425265e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.445952e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.445952e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.770348 sec - 5,116,510,829 cycles # 2.884 GHz - 11,462,830,925 instructions # 2.24 insn per cycle - 1.775692855 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40124) (512y: 219) (512z: 0) +TOTAL : 1.748961 sec + 5,100,741,339 cycles # 2.911 GHz + 11,448,531,367 instructions # 2.24 insn per cycle + 1.753002861 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -181,20 +181,20 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.877630e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.896567e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.896567e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.951658e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.967421e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.967421e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.092406 sec - 3,959,116,760 cycles # 1.889 GHz - 5,909,678,629 instructions # 1.49 insn per cycle - 2.097595629 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38938) +TOTAL : 2.071513 sec + 3,955,166,061 cycles # 1.907 GHz + 5,898,178,662 instructions # 1.49 insn per cycle + 2.075594399 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 87c14a9fc2..7d5250e643 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -36,61 +36,61 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:04:17 +DATE: 2023-10-25_18:41:03 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.113680e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.206120e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.211586e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.344411e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.393693e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.398639e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.489030 sec - 2,018,556,536 cycles # 2.860 GHz - 2,695,229,333 instructions # 1.34 insn per cycle - 0.764433321 seconds time elapsed +TOTAL : 0.476002 sec + 2,055,020,376 cycles # 2.991 GHz + 3,081,758,808 instructions # 1.50 insn per cycle + 0.745881313 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.501612e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.564106e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.566732e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.554616e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.614004e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.616642e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.742005 sec - 5,900,924,146 cycles # 2.991 GHz - 12,364,294,177 instructions # 2.10 insn per cycle - 2.032841021 seconds time elapsed +TOTAL : 1.707448 sec + 5,919,573,877 cycles # 3.066 GHz + 11,552,481,792 instructions # 1.95 insn per cycle + 1.990304243 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262662035525971E-004 -Relative difference = 2.8340413651595734e-05 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.077935e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.079032e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.079032e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.027055e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.028034e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.028034e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.903687 sec - 24,463,617,809 cycles # 3.095 GHz - 78,139,167,105 instructions # 3.19 insn per cycle - 7.908373975 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.098867 sec + 25,111,702,681 cycles # 3.100 GHz + 78,142,230,902 instructions # 3.11 insn per cycle + 8.102919065 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.252580e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.266716e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.266716e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.175810e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.188746e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.188746e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.270339 sec - 6,331,371,908 cycles # 2.784 GHz - 20,185,378,375 instructions # 3.19 insn per cycle - 2.275323970 seconds time elapsed +TOTAL : 2.292926 sec + 6,573,476,191 cycles # 2.863 GHz + 20,176,795,660 instructions # 3.07 insn per cycle + 2.297103514 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.605269e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.612091e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.612091e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.680111e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.687118e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.687118e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.031349 sec - 2,857,704,024 cycles # 2.761 GHz - 7,121,520,002 instructions # 2.49 insn per cycle - 1.036403125 seconds time elapsed +TOTAL : 0.983720 sec + 2,861,168,699 cycles # 2.899 GHz + 7,112,434,592 instructions # 2.49 insn per cycle + 0.987814280 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.888192e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.897460e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.897460e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.901709e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910562e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910562e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.877915 sec - 2,532,938,739 cycles # 2.874 GHz - 6,416,830,601 instructions # 2.53 insn per cycle - 0.882777780 seconds time elapsed +TOTAL : 0.870092 sec + 2,532,384,607 cycles # 2.899 GHz + 6,407,671,698 instructions # 2.53 insn per cycle + 0.874200480 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.499109e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.505240e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.505240e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.544511e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.550364e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.550364e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.103806 sec - 2,068,951,024 cycles # 1.868 GHz - 3,329,621,366 instructions # 1.61 insn per cycle - 1.109490178 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) +TOTAL : 1.069933 sec + 2,059,770,627 cycles # 1.919 GHz + 3,321,177,538 instructions # 1.61 insn per cycle + 1.074034173 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 8638dd3103..866fb524ce 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -36,26 +36,26 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:31:19 +DATE: 2023-10-25_19:06:12 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.531230e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.195294e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.195294e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.649753e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.350178e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.350178e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.491228 sec - 2,027,497,668 cycles # 2.858 GHz - 2,777,232,069 instructions # 1.37 insn per cycle - 0.767453903 seconds time elapsed +TOTAL : 0.465463 sec + 2,047,479,601 cycles # 2.978 GHz + 3,049,363,259 instructions # 1.49 insn per cycle + 0.744895811 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -69,41 +69,41 @@ WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.199928e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.478309e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.478309e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641709e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.925284 sec - 6,532,368,683 cycles # 3.019 GHz - 13,853,967,442 instructions # 2.12 insn per cycle - 2.224547208 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.287015e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.501118e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.501118e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 +TOTAL : 1.864461 sec + 6,383,949,622 cycles # 3.047 GHz + 13,653,993,577 instructions # 2.14 insn per cycle + 2.154815432 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262662035525971E-004 -Relative difference = 2.8340413651595734e-05 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.069434e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.070543e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.070543e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.020017e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.021013e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.021013e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.937844 sec - 24,489,796,577 cycles # 3.084 GHz - 78,143,313,735 instructions # 3.19 insn per cycle - 7.942733555 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.129725 sec + 25,159,036,941 cycles # 3.094 GHz + 78,146,432,404 instructions # 3.11 insn per cycle + 8.133616973 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,19 +117,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.501137e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.515705e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.515705e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.180823e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.193936e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.193936e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.197820 sec - 6,342,440,687 cycles # 2.882 GHz - 20,194,640,120 instructions # 3.18 insn per cycle - 2.202831490 seconds time elapsed +TOTAL : 2.294060 sec + 6,581,200,311 cycles # 2.865 GHz + 20,186,134,505 instructions # 3.07 insn per cycle + 2.298229949 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -144,19 +144,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.680755e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.688331e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.688331e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.675906e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.682979e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.682979e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.987293 sec - 2,865,332,330 cycles # 2.890 GHz - 7,131,409,419 instructions # 2.49 insn per cycle - 0.992557144 seconds time elapsed +TOTAL : 0.989013 sec + 2,874,126,906 cycles # 2.896 GHz + 7,122,171,177 instructions # 2.48 insn per cycle + 0.993096654 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -171,19 +171,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.898899e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.908726e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.908726e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.895474e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.904073e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.904073e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.875197 sec - 2,543,327,231 cycles # 2.892 GHz - 6,426,714,975 instructions # 2.53 insn per cycle - 0.880579478 seconds time elapsed +TOTAL : 0.875592 sec + 2,541,159,330 cycles # 2.891 GHz + 6,417,191,354 instructions # 2.53 insn per cycle + 0.879698021 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -198,20 +198,20 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.541618e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.547883e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.547883e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.548734e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.554330e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.554330e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.075533 sec - 2,075,747,014 cycles # 1.922 GHz - 3,339,997,983 instructions # 1.61 insn per cycle - 1.080773183 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) +TOTAL : 1.069331 sec + 2,068,634,398 cycles # 1.928 GHz + 3,331,804,154 instructions # 1.61 insn per cycle + 1.073485896 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 0ab22d9f59..b125b710bd 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -36,61 +36,61 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:42:51 +DATE: 2023-10-25_19:17:07 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.144326e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.215205e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.220785e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.332225e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.378563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.386102e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.488557 sec - 2,011,133,432 cycles # 2.858 GHz - 2,717,062,488 instructions # 1.35 insn per cycle - 0.763355966 seconds time elapsed +TOTAL : 0.457977 sec + 2,007,013,185 cycles # 3.005 GHz + 3,016,655,563 instructions # 1.50 insn per cycle + 0.725068773 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.586236e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.659086e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.662124e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.573671e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.635105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.637758e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.821809 sec - 6,199,621,275 cycles # 3.020 GHz - 12,302,218,981 instructions # 1.98 insn per cycle - 2.113042818 seconds time elapsed +TOTAL : 1.789355 sec + 6,154,390,786 cycles # 3.059 GHz + 11,779,031,447 instructions # 1.91 insn per cycle + 2.068482222 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262662035525971E-004 -Relative difference = 2.8340413651595734e-05 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.070465e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.071518e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.071518e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.992613e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.993617e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.993617e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 7.936750 sec - 24,501,268,872 cycles # 3.089 GHz - 78,140,977,505 instructions # 3.19 insn per cycle - 7.941817846 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.239039 sec + 25,101,211,563 cycles # 3.046 GHz + 78,141,605,294 instructions # 3.11 insn per cycle + 8.242885554 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.577149e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.592541e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.592541e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.360647e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.374286e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.374286e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.174855 sec - 6,336,150,180 cycles # 2.910 GHz - 20,185,517,198 instructions # 3.19 insn per cycle - 2.179654718 seconds time elapsed +TOTAL : 2.237472 sec + 6,571,783,960 cycles # 2.933 GHz + 20,176,847,169 instructions # 3.07 insn per cycle + 2.241174830 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.679095e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.686422e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.686422e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.681723e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.688538e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.688538e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.987011 sec - 2,858,107,376 cycles # 2.886 GHz - 7,120,975,733 instructions # 2.49 insn per cycle - 0.991535494 seconds time elapsed +TOTAL : 0.984343 sec + 2,863,785,178 cycles # 2.900 GHz + 7,111,595,374 instructions # 2.48 insn per cycle + 0.988141267 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.895324e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.904535e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.904535e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.906951e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.915838e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.915838e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.875618 sec - 2,534,850,734 cycles # 2.883 GHz - 6,415,151,431 instructions # 2.53 insn per cycle - 0.880122223 seconds time elapsed +TOTAL : 0.868571 sec + 2,534,531,591 cycles # 2.907 GHz + 6,404,093,295 instructions # 2.53 insn per cycle + 0.872424795 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.534730e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.540899e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.540899e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.558486e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.564692e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.564692e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.081909 sec - 2,066,983,805 cycles # 1.907 GHz - 3,328,304,762 instructions # 1.61 insn per cycle - 1.086461140 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) +TOTAL : 1.060964 sec + 2,062,134,932 cycles # 1.938 GHz + 3,317,722,223 instructions # 1.61 insn per cycle + 1.064830329 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 9f2ef7c120..0197c733f9 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -36,61 +36,61 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:39:35 +DATE: 2023-10-25_19:14:03 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.082660e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.168017e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.173141e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.335935e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.379594e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.384424e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.485959 sec - 2,007,340,793 cycles # 2.856 GHz - 2,718,317,159 instructions # 1.35 insn per cycle - 0.760972781 seconds time elapsed +TOTAL : 0.458171 sec + 1,997,983,261 cycles # 2.988 GHz + 3,059,794,719 instructions # 1.53 insn per cycle + 0.725811269 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.588938e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.660631e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.663590e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.578157e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.639706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.642441e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.773538 sec - 6,041,038,525 cycles # 3.010 GHz - 13,199,098,490 instructions # 2.18 insn per cycle - 2.063993421 seconds time elapsed +TOTAL : 1.740586 sec + 5,998,214,102 cycles # 3.054 GHz + 12,259,549,434 instructions # 2.04 insn per cycle + 2.021545903 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262662035525971E-004 -Relative difference = 2.8340413651595734e-05 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.068156e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.069195e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.069195e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.001178e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.002137e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.002137e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.938597 sec - 24,486,012,837 cycles # 3.083 GHz - 78,138,824,946 instructions # 3.19 insn per cycle - 7.943607441 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.203084 sec + 25,141,172,926 cycles # 3.064 GHz + 78,142,442,354 instructions # 3.11 insn per cycle + 8.206877433 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.545303e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.560195e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.560195e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.194824e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.207542e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.207542e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.187527 sec - 6,329,240,263 cycles # 2.893 GHz - 20,185,499,978 instructions # 3.19 insn per cycle - 2.192263277 seconds time elapsed +TOTAL : 2.287147 sec + 6,571,682,415 cycles # 2.870 GHz + 20,177,851,750 instructions # 3.07 insn per cycle + 2.290998385 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.681389e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.688756e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.688756e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.671381e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.678184e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.678184e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.984275 sec - 2,856,185,163 cycles # 2.891 GHz - 7,121,459,357 instructions # 2.49 insn per cycle - 0.989290512 seconds time elapsed +TOTAL : 0.988852 sec + 2,877,121,035 cycles # 2.900 GHz + 7,112,414,105 instructions # 2.47 insn per cycle + 0.992772923 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.909484e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.919062e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.919062e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.905382e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.914336e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.914336e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.867556 sec - 2,532,676,049 cycles # 2.911 GHz - 6,416,893,378 instructions # 2.53 insn per cycle - 0.872102079 seconds time elapsed +TOTAL : 0.868290 sec + 2,533,476,931 cycles # 2.907 GHz + 6,407,633,337 instructions # 2.53 insn per cycle + 0.872075865 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.531203e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.537250e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.537250e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.547508e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.553260e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.553260e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.080158 sec - 2,062,887,429 cycles # 1.904 GHz - 3,329,660,109 instructions # 1.61 insn per cycle - 1.085064073 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) +TOTAL : 1.067544 sec + 2,060,630,355 cycles # 1.925 GHz + 3,320,987,634 instructions # 1.61 insn per cycle + 1.071418753 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index bf788b11b5..52987bd60d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -36,23 +36,23 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:36:24 +DATE: 2023-10-25_19:11:03 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.576733e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.221598e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.226830e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.805977e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.405107e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.410125e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.487901 sec - 2,025,095,734 cycles # 2.872 GHz - 2,738,438,516 instructions # 1.35 insn per cycle - 0.762207032 seconds time elapsed +TOTAL : 0.460864 sec + 2,006,899,759 cycles # 2.993 GHz + 3,017,362,425 instructions # 1.50 insn per cycle + 0.728187465 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -60,40 +60,40 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.456403e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.644388e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.647370e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641709e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.853771 sec - 6,296,786,374 cycles # 3.016 GHz - 12,875,787,727 instructions # 2.04 insn per cycle - 2.154051650 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.501502e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.624461e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.627283e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 +TOTAL : 1.814750 sec + 6,240,031,804 cycles # 3.062 GHz + 12,062,681,609 instructions # 1.93 insn per cycle + 2.094320858 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262662035525971E-004 -Relative difference = 2.8340413651595734e-05 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.058673e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.059792e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.059792e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.001113e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.002085e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.002085e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.975780 sec - 24,487,620,137 cycles # 3.070 GHz - 78,140,745,843 instructions # 3.19 insn per cycle - 7.980846967 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.203609 sec + 25,118,317,039 cycles # 3.061 GHz + 78,142,981,648 instructions # 3.11 insn per cycle + 8.207429402 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -106,19 +106,19 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.553798e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.569045e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.569045e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.323036e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.336228e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.336228e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.179841 sec - 6,328,389,261 cycles # 2.901 GHz - 20,185,367,490 instructions # 3.19 insn per cycle - 2.184524511 seconds time elapsed +TOTAL : 2.246931 sec + 6,579,301,389 cycles # 2.924 GHz + 20,176,586,022 instructions # 3.07 insn per cycle + 2.250705274 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -132,19 +132,19 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.690124e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.697333e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.697333e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.674657e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.681528e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.681528e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.979140 sec - 2,856,842,686 cycles # 2.911 GHz - 7,121,570,376 instructions # 2.49 insn per cycle - 0.983637729 seconds time elapsed +TOTAL : 0.986857 sec + 2,862,922,647 cycles # 2.892 GHz + 7,112,389,781 instructions # 2.48 insn per cycle + 0.990752111 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -158,19 +158,19 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.896699e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.906222e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.906222e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.894478e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.903271e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.903271e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.873736 sec - 2,532,762,027 cycles # 2.888 GHz - 6,416,752,526 instructions # 2.53 insn per cycle - 0.878374086 seconds time elapsed +TOTAL : 0.873227 sec + 2,541,312,245 cycles # 2.899 GHz + 6,407,310,369 instructions # 2.52 insn per cycle + 0.877122463 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -184,20 +184,20 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.541857e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.548013e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.548013e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.549285e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.555166e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.555166e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.072693 sec - 2,060,338,474 cycles # 1.914 GHz - 3,329,512,577 instructions # 1.62 insn per cycle - 1.077219821 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) +TOTAL : 1.066246 sec + 2,058,021,558 cycles # 1.924 GHz + 3,321,051,164 instructions # 1.61 insn per cycle + 1.070183678 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index d991aa119f..e3d102e7b5 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -36,61 +36,61 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:04:47 +DATE: 2023-10-25_18:41:32 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.128116e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.217668e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.222594e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.347885e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.397852e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.402767e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.487229 sec - 2,026,991,222 cycles # 2.878 GHz - 2,709,678,447 instructions # 1.34 insn per cycle - 0.762361654 seconds time elapsed +TOTAL : 0.473915 sec + 2,057,636,111 cycles # 3.008 GHz + 3,034,789,542 instructions # 1.47 insn per cycle + 0.743124266 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.575581e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.638965e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.642004e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.510288e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.569100e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.571863e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.739337 sec - 5,714,760,202 cycles # 2.894 GHz - 11,499,716,249 instructions # 2.01 insn per cycle - 2.031805952 seconds time elapsed +TOTAL : 1.706491 sec + 5,801,343,951 cycles # 3.003 GHz + 11,478,639,093 instructions # 1.98 insn per cycle + 1.991446406 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262662035525971E-004 -Relative difference = 2.8340413651595734e-05 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.990978e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.991940e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.991940e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.032219e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.033197e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.033197e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.247144 sec - 24,315,749,458 cycles # 2.947 GHz - 77,883,463,554 instructions # 3.20 insn per cycle - 8.251967591 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.077935 sec + 25,002,620,407 cycles # 3.094 GHz + 77,880,023,337 instructions # 3.11 insn per cycle + 8.081833653 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3061) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,20 +103,20 @@ Relative difference = 5.65798569465384e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.446333e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.461104e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.461104e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.437992e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.452045e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.452045e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.211601 sec - 6,289,071,271 cycles # 2.840 GHz - 20,152,105,466 instructions # 3.20 insn per cycle - 2.216825625 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13433) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.212380 sec + 6,525,641,551 cycles # 2.945 GHz + 20,144,168,186 instructions # 3.09 insn per cycle + 2.216462164 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,20 +129,20 @@ Relative difference = 2.1853408865157068e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.576783e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.583356e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.583356e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.631112e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.637585e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.637585e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.049646 sec - 2,917,818,703 cycles # 2.769 GHz - 7,260,965,647 instructions # 2.49 insn per cycle - 1.054753836 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12273) (512y: 0) (512z: 0) +TOTAL : 1.012887 sec + 2,950,530,206 cycles # 2.903 GHz + 7,252,358,943 instructions # 2.46 insn per cycle + 1.016940562 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12263) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -155,20 +155,20 @@ Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.848490e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.856955e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.856955e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.851739e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.860081e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.860081e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.895882 sec - 2,613,663,217 cycles # 2.907 GHz - 6,558,326,054 instructions # 2.51 insn per cycle - 0.900676022 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11966) (512y: 26) (512z: 0) +TOTAL : 0.893117 sec + 2,605,469,056 cycles # 2.906 GHz + 6,549,528,920 instructions # 2.51 insn per cycle + 0.897080094 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11948) (512y: 26) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -181,20 +181,20 @@ Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.479912e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.485645e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.485645e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.502391e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.508119e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.508119e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.117454 sec - 2,129,795,413 cycles # 1.900 GHz - 3,489,517,961 instructions # 1.64 insn per cycle - 1.122073937 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2901) (512y: 23) (512z:10269) +TOTAL : 1.099172 sec + 2,123,741,528 cycles # 1.926 GHz + 3,480,482,498 instructions # 1.64 insn per cycle + 1.103291837 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2903) (512y: 22) (512z:10276) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 313bb42091..31738cc5a1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -36,61 +36,61 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:21:57 +DATE: 2023-10-25_18:57:39 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.395758e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.460925e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.465060e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.596965e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.633834e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.637880e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.491081 sec - 2,078,405,452 cycles # 2.876 GHz - 2,866,034,185 instructions # 1.38 insn per cycle - 0.779842200 seconds time elapsed +TOTAL : 0.481755 sec + 2,117,394,113 cycles # 2.997 GHz + 3,211,903,752 instructions # 1.52 insn per cycle + 0.763834850 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.825853e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.885521e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.888169e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.702969e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.752015e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.754390e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.848365 sec - 6,206,449,631 cycles # 2.970 GHz - 13,392,412,167 instructions # 2.16 insn per cycle - 2.146713266 seconds time elapsed +TOTAL : 1.845325 sec + 6,364,196,440 cycles # 3.044 GHz + 12,697,584,410 instructions # 2.00 insn per cycle + 2.147798753 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262662649554244E-004 -Relative difference = 2.833114733400458e-05 +Avg ME (F77/CUDA) = 6.6262660579844562E-004 +Relative difference = 2.836238137986709e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.780076e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.780894e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.780894e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.860218e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.861043e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.861043e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 28.383541 sec - 87,166,803,809 cycles # 3.071 GHz - 135,545,164,925 instructions # 1.56 insn per cycle - 28.388103344 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15458) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 27.993515 sec + 85,967,133,243 cycles # 3.071 GHz + 135,563,627,438 instructions # 1.58 insn per cycle + 27.997422911 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15486) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 4.195614963669944e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.375173e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.389905e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.389905e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.196137e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.208997e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.208997e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.232940 sec - 6,813,994,495 cycles # 3.046 GHz - 19,395,856,674 instructions # 2.85 insn per cycle - 2.238369966 seconds time elapsed +TOTAL : 2.287867 sec + 6,773,769,099 cycles # 2.957 GHz + 19,387,600,160 instructions # 2.86 insn per cycle + 2.291838045 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69680) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 4.0849182767952624e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.498442e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.504286e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.504286e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.513484e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.519092e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.519092e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.103395 sec - 3,175,966,194 cycles # 2.870 GHz - 6,817,821,844 instructions # 2.15 insn per cycle - 1.108102716 seconds time elapsed +TOTAL : 1.091418 sec + 3,173,929,820 cycles # 2.900 GHz + 6,808,660,445 instructions # 2.15 insn per cycle + 1.095213929 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.808044e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.816407e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.816407e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.813250e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.821399e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821399e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.922171 sec - 2,651,507,093 cycles # 2.875 GHz - 5,996,888,437 instructions # 2.26 insn per cycle - 0.927031811 seconds time elapsed +TOTAL : 0.912193 sec + 2,648,785,634 cycles # 2.893 GHz + 5,986,998,268 instructions # 2.26 insn per cycle + 0.916244855 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe @@ -181,19 +181,19 @@ Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.512862e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.518753e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.518753e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.539260e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.545021e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.545021e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.093278 sec - 2,078,575,504 cycles # 1.895 GHz - 3,510,282,026 instructions # 1.69 insn per cycle - 1.097953673 seconds time elapsed +TOTAL : 1.073101 sec + 2,071,594,759 cycles # 1.925 GHz + 3,501,390,779 instructions # 1.69 insn per cycle + 1.077005935 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5198) (512y: 3) (512z:44822) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 8832827e81..cddff811bf 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -36,61 +36,61 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:22:49 +DATE: 2023-10-25_18:58:30 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.366921e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.431014e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.435160e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.566704e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.601747e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.605700e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.492320 sec - 2,070,385,138 cycles # 2.865 GHz - 2,831,765,240 instructions # 1.37 insn per cycle - 0.779919428 seconds time elapsed +TOTAL : 0.479762 sec + 2,129,975,460 cycles # 3.025 GHz + 3,262,724,571 instructions # 1.53 insn per cycle + 0.761351418 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.621752e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.677977e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.680342e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.654164e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.702666e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.704765e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.879104 sec - 6,357,166,988 cycles # 2.996 GHz - 13,486,685,413 instructions # 2.12 insn per cycle - 2.182208907 seconds time elapsed +TOTAL : 1.853079 sec + 6,372,411,967 cycles # 3.059 GHz + 13,261,029,776 instructions # 2.08 insn per cycle + 2.143146478 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262662649554244E-004 -Relative difference = 2.833114733400458e-05 +Avg ME (F77/CUDA) = 6.6262660579844562E-004 +Relative difference = 2.836238137986709e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.926941e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.927826e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.927826e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.859455e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.860302e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.860302e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.679825 sec - 84,768,166,768 cycles # 3.062 GHz - 136,006,770,863 instructions # 1.60 insn per cycle - 27.684475907 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15937) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 27.996994 sec + 86,063,307,798 cycles # 3.074 GHz + 135,905,248,930 instructions # 1.58 insn per cycle + 28.000969025 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15910) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 4.0361421941458736e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.055087e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.067922e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.067922e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.111595e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.124605e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.124605e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.333434 sec - 6,800,648,023 cycles # 2.910 GHz - 19,447,790,102 instructions # 2.86 insn per cycle - 2.338354760 seconds time elapsed +TOTAL : 2.313738 sec + 6,851,236,852 cycles # 2.957 GHz + 19,439,512,273 instructions # 2.84 insn per cycle + 2.317708744 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69722) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 4.170542995014107e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.542489e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.548518e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.548518e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.540866e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.546584e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.546584e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.071802 sec - 3,111,815,719 cycles # 2.894 GHz - 6,728,747,187 instructions # 2.16 insn per cycle - 1.076475555 seconds time elapsed +TOTAL : 1.071791 sec + 3,105,453,036 cycles # 2.888 GHz + 6,719,669,630 instructions # 2.16 insn per cycle + 1.075697475 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.829500e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.838005e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.838005e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.816139e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.824278e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.824278e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.906923 sec - 2,637,944,229 cycles # 2.901 GHz - 5,979,583,857 instructions # 2.27 insn per cycle - 0.911799920 seconds time elapsed +TOTAL : 0.910723 sec + 2,625,346,963 cycles # 2.872 GHz + 5,970,291,755 instructions # 2.27 insn per cycle + 0.914693861 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe @@ -181,19 +181,19 @@ Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.482750e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.488249e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.488249e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.541127e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.546712e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.546712e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.115491 sec - 2,082,566,778 cycles # 1.862 GHz - 3,504,063,228 instructions # 1.68 insn per cycle - 1.120169830 seconds time elapsed +TOTAL : 1.071854 sec + 2,074,441,089 cycles # 1.930 GHz + 3,494,899,079 instructions # 1.68 insn per cycle + 1.075757314 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4162) (512y: 4) (512z:44465) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index e7c0a23b5c..7ad6f63659 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:05:17 +DATE: 2023-10-25_18:42:00 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.422363e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.468020e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.470345e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.490294e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.513614e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.515507e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.530782 sec - 2,220,044,517 cycles # 2.895 GHz - 3,146,111,901 instructions # 1.42 insn per cycle - 0.827597323 seconds time elapsed +TOTAL : 0.518671 sec + 2,226,766,382 cycles # 2.986 GHz + 3,540,179,326 instructions # 1.59 insn per cycle + 0.807219432 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.136489e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.165443e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.166620e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.120867e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.148001e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.149117e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.042440 sec - 10,003,660,868 cycles # 3.030 GHz - 22,703,917,020 instructions # 2.27 insn per cycle - 3.361771302 seconds time elapsed +TOTAL : 3.019190 sec + 10,060,652,484 cycles # 3.078 GHz + 22,177,348,496 instructions # 2.20 insn per cycle + 3.325766752 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.976585e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.977873e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.977873e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.954175e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.955102e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.955102e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.309361 sec - 25,674,704,286 cycles # 3.089 GHz - 79,183,385,361 instructions # 3.08 insn per cycle - 8.314358382 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4708) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.401551 sec + 26,112,965,520 cycles # 3.107 GHz + 79,187,055,919 instructions # 3.03 insn per cycle + 8.405522832 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4746) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.705238e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.709428e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.709428e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.704056e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.707340e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.707340e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.437753 sec - 12,812,708,621 cycles # 2.886 GHz - 38,591,867,024 instructions # 3.01 insn per cycle - 4.442817559 seconds time elapsed +TOTAL : 4.436595 sec + 12,893,512,565 cycles # 2.905 GHz + 38,578,382,892 instructions # 2.99 insn per cycle + 4.440842197 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13136) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe @@ -129,20 +129,20 @@ Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.522545e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.545503e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.545503e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.529594e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.548137e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.548137e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.935510 sec - 5,601,392,957 cycles # 2.889 GHz - 13,721,052,527 instructions # 2.45 insn per cycle - 1.940418002 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11246) (512y: 0) (512z: 0) +TOTAL : 1.931885 sec + 5,592,758,933 cycles # 2.891 GHz + 13,704,166,637 instructions # 2.45 insn per cycle + 1.936090809 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11245) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -155,20 +155,20 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.666679e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.696450e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.696450e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.692246e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.714062e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.714062e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.706777 sec - 4,941,339,381 cycles # 2.888 GHz - 12,361,926,927 instructions # 2.50 insn per cycle - 1.712002744 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10898) (512y: 79) (512z: 0) +TOTAL : 1.701537 sec + 4,935,885,889 cycles # 2.895 GHz + 12,346,516,315 instructions # 2.50 insn per cycle + 1.705790521 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10897) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -181,20 +181,20 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.527956e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.546398e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.546398e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.612609e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.626629e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.626629e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.189538 sec - 4,169,817,395 cycles # 1.901 GHz - 6,455,711,002 instructions # 1.55 insn per cycle - 2.194538825 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1796) (512y: 93) (512z:10086) +TOTAL : 2.163571 sec + 4,153,048,865 cycles # 1.917 GHz + 6,440,968,926 instructions # 1.55 insn per cycle + 2.167665946 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1803) (512y: 93) (512z:10092) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index 62cebd797f..f056a45974 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-08-15_08:05:54 +DATE: 2023-10-25_18:42:36 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.450693e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.499337e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.501640e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.481563e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.505145e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.507791e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.527663 sec - 2,207,556,580 cycles # 2.897 GHz - 3,167,926,058 instructions # 1.44 insn per cycle - 0.822538371 seconds time elapsed +TOTAL : 0.521573 sec + 2,153,426,616 cycles # 2.862 GHz + 3,368,297,517 instructions # 1.56 insn per cycle + 0.812765365 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.137091e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.166130e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.167323e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.140316e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168848e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.040251 sec - 10,050,103,857 cycles # 3.047 GHz - 22,708,720,443 instructions # 2.26 insn per cycle - 3.354433991 seconds time elapsed +TOTAL : 3.005546 sec + 10,001,668,357 cycles # 3.073 GHz + 22,545,107,075 instructions # 2.25 insn per cycle + 3.311305896 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.970205e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.971421e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.971421e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.946429e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.947329e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947329e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.334167 sec - 25,659,809,564 cycles # 3.078 GHz - 79,207,559,063 instructions # 3.09 insn per cycle - 8.339421888 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4383) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.435831 sec + 26,126,854,101 cycles # 3.097 GHz + 79,204,576,073 instructions # 3.03 insn per cycle + 8.439924281 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4401) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,20 +103,20 @@ Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.715781e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.720204e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.720204e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.694235e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.697554e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697554e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.423645 sec - 12,805,908,593 cycles # 2.892 GHz - 38,546,927,376 instructions # 3.01 insn per cycle - 4.428745496 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12902) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.448208 sec + 12,894,092,255 cycles # 2.897 GHz + 38,538,252,439 instructions # 2.99 insn per cycle + 4.452458550 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12903) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,20 +129,20 @@ Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.552852e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.577102e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.577102e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.136495e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.152364e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.152364e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.928072 sec - 5,590,516,918 cycles # 2.894 GHz - 13,839,830,857 instructions # 2.48 insn per cycle - 1.932926149 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11349) (512y: 0) (512z: 0) +TOTAL : 2.024697 sec + 5,646,666,731 cycles # 2.789 GHz + 13,825,634,230 instructions # 2.45 insn per cycle + 2.029023847 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -155,20 +155,20 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.504293e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.533513e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.533513e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.556941e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.579305e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.579305e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.736286 sec - 4,999,124,094 cycles # 2.873 GHz - 12,491,426,446 instructions # 2.50 insn per cycle - 1.741494791 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10894) (512y: 239) (512z: 0) +TOTAL : 1.725082 sec + 4,994,411,865 cycles # 2.889 GHz + 12,477,409,386 instructions # 2.50 insn per cycle + 1.729413379 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10888) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -181,20 +181,20 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.587755e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.605628e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.605628e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.605322e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.619068e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.619068e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.172271 sec - 4,166,068,862 cycles # 1.914 GHz - 6,558,419,576 instructions # 1.57 insn per cycle - 2.177416583 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1626) (512y: 191) (512z:10049) +TOTAL : 2.165836 sec + 4,156,250,470 cycles # 1.916 GHz + 6,542,526,880 instructions # 1.57 insn per cycle + 2.169952357 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1628) (512y: 191) (512z:10036) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 80fba0b18f..55b25786bd 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-08-15_08:08:16 +DATE: 2023-10-25_18:44:51 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.060900e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.062148e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.062247e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.070749e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.071143e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.071250e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.389867 sec - 8,215,695,678 cycles # 3.030 GHz - 17,275,019,503 instructions # 2.10 insn per cycle - 2.770513749 seconds time elapsed +TOTAL : 2.417941 sec + 8,377,415,337 cycles # 3.062 GHz + 18,838,612,351 instructions # 2.25 insn per cycle + 2.794089225 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.207928e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.211176e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.211361e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.235176e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.237005e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.237223e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.993094 sec - 13,152,666,523 cycles # 3.046 GHz - 31,702,540,241 instructions # 2.41 insn per cycle - 4.376722174 seconds time elapsed +TOTAL : 3.993841 sec + 13,055,750,655 cycles # 3.026 GHz + 31,160,662,070 instructions # 2.39 insn per cycle + 4.373705613 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.114407e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.114707e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.114707e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.897548e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.897775e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.897775e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.512842 sec - 19,958,191,083 cycles # 3.063 GHz - 54,053,553,080 instructions # 2.71 insn per cycle - 6.517944789 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32344) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.705050 sec + 19,396,394,057 cycles # 2.892 GHz + 54,051,876,234 instructions # 2.79 insn per cycle + 6.708932383 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32354) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.646584e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.646697e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.646697e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.653870e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.653969e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.653969e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.212662 sec - 9,945,345,971 cycles # 3.094 GHz - 27,088,220,434 instructions # 2.72 insn per cycle - 3.217491965 seconds time elapsed +TOTAL : 3.197917 sec + 9,907,124,994 cycles # 3.095 GHz + 27,081,765,597 instructions # 2.73 insn per cycle + 3.202038670 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96405) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.529039e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.529559e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.529559e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.542258e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.542698e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.542698e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.502572 sec - 4,344,096,873 cycles # 2.885 GHz - 9,674,263,200 instructions # 2.23 insn per cycle - 1.507666318 seconds time elapsed +TOTAL : 1.497561 sec + 4,341,680,359 cycles # 2.893 GHz + 9,666,416,740 instructions # 2.23 insn per cycle + 1.501545706 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84384) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.992732e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.993396e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.993396e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.866674e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.867185e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.867185e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.328375 sec - 3,847,060,555 cycles # 2.889 GHz - 8,624,394,596 instructions # 2.24 insn per cycle - 1.333304826 seconds time elapsed +TOTAL : 1.370361 sec + 3,840,509,187 cycles # 2.796 GHz + 8,617,030,376 instructions # 2.24 insn per cycle + 1.374450501 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84025) (512y: 89) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.596524e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.597264e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.597264e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.733060e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.733603e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.733603e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.473376 sec - 2,714,389,029 cycles # 1.837 GHz - 4,343,683,031 instructions # 1.60 insn per cycle - 1.478445198 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2293) (512y: 103) (512z:83066) +TOTAL : 1.420204 sec + 2,707,945,792 cycles # 1.903 GHz + 4,335,943,514 instructions # 1.60 insn per cycle + 1.424239943 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2300) (512y: 103) (512z:83067) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 1be170b683..94a23e2f12 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -36,26 +36,26 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-08-15_08:31:48 +DATE: 2023-10-25_19:06:41 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.060453e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.061571e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.061571e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.066351e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.067339e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.067339e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.369462 sec - 8,134,002,448 cycles # 3.017 GHz - 18,269,507,455 instructions # 2.25 insn per cycle - 2.752099708 seconds time elapsed +TOTAL : 2.354612 sec + 8,162,423,266 cycles # 3.056 GHz + 18,099,441,211 instructions # 2.22 insn per cycle + 2.730850635 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -69,17 +69,17 @@ WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.152234e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.192819e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.192819e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.246162e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.277852e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.277852e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.012383 sec - 13,220,105,631 cycles # 3.045 GHz - 31,192,462,945 instructions # 2.36 insn per cycle - 4.397831586 seconds time elapsed +TOTAL : 3.971347 sec + 13,204,436,201 cycles # 3.078 GHz + 30,282,503,163 instructions # 2.29 insn per cycle + 4.348676377 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,20 +90,20 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.311172e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.311488e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.311488e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.244919e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.245139e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.245139e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.356497 sec - 19,159,072,593 cycles # 3.013 GHz - 54,053,597,485 instructions # 2.82 insn per cycle - 6.361488517 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32344) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.408828 sec + 19,448,715,885 cycles # 3.033 GHz + 54,050,853,106 instructions # 2.78 insn per cycle + 6.412664226 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32354) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,19 +117,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.618618e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.618732e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.618732e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.650316e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.650406e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.650406e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.276148 sec - 10,008,076,049 cycles # 3.052 GHz - 27,089,522,265 instructions # 2.71 insn per cycle - 3.281084610 seconds time elapsed +TOTAL : 3.204606 sec + 9,890,944,577 cycles # 3.084 GHz + 27,082,213,615 instructions # 2.74 insn per cycle + 3.208413447 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96405) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -144,19 +144,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.522177e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.522803e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.522803e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.546707e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.547140e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.547140e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.505343 sec - 4,322,216,596 cycles # 2.865 GHz - 9,675,489,699 instructions # 2.24 insn per cycle - 1.510154474 seconds time elapsed +TOTAL : 1.493585 sec + 4,325,320,238 cycles # 2.890 GHz + 9,667,464,688 instructions # 2.24 insn per cycle + 1.497469046 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84384) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -171,19 +171,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.959978e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.960721e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.960721e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.037834e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.038367e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.038367e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.339988 sec - 3,869,344,477 cycles # 2.880 GHz - 8,625,428,660 instructions # 2.23 insn per cycle - 1.344873313 seconds time elapsed +TOTAL : 1.313129 sec + 3,813,971,496 cycles # 2.897 GHz + 8,617,412,652 instructions # 2.26 insn per cycle + 1.316983127 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84025) (512y: 89) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -198,20 +198,20 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.665923e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.666615e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.666615e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.743342e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.743918e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.743918e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.445605 sec - 2,710,048,805 cycles # 1.872 GHz - 4,344,691,123 instructions # 1.60 insn per cycle - 1.450245895 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2293) (512y: 103) (512z:83066) +TOTAL : 1.415154 sec + 2,707,174,442 cycles # 1.909 GHz + 4,336,832,605 instructions # 1.60 insn per cycle + 1.419043941 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2300) (512y: 103) (512z:83067) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index b48d707ba0..e3241a7638 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-08-15_08:09:20 +DATE: 2023-10-25_18:45:54 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.052449e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.053751e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.053867e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.058972e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.059386e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.059482e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.401792 sec - 8,176,355,448 cycles # 3.002 GHz - 18,453,741,110 instructions # 2.26 insn per cycle - 2.783918975 seconds time elapsed +TOTAL : 2.421465 sec + 8,436,981,615 cycles # 3.067 GHz + 19,028,712,837 instructions # 2.26 insn per cycle + 2.810095670 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.211968e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.215119e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.215326e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.255956e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.257794e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.257987e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.995060 sec - 13,201,969,473 cycles # 3.051 GHz - 29,028,921,322 instructions # 2.20 insn per cycle - 4.382926828 seconds time elapsed +TOTAL : 3.982903 sec + 13,219,371,069 cycles # 3.063 GHz + 29,416,443,528 instructions # 2.23 insn per cycle + 4.375059399 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.385858e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.386156e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.386156e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.015099e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.015315e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.015315e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.303840 sec - 19,464,475,657 cycles # 3.088 GHz - 54,075,582,748 instructions # 2.78 insn per cycle - 6.308485796 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32250) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.593815 sec + 19,095,708,850 cycles # 2.895 GHz + 54,047,292,212 instructions # 2.83 insn per cycle + 6.597605539 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:31965) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,20 +103,20 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.630524e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.630632e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.630632e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.634946e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.635033e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.635033e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.248802 sec - 9,955,685,609 cycles # 3.062 GHz - 27,083,252,200 instructions # 2.72 insn per cycle - 3.253385767 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96261) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.235048 sec + 10,011,189,889 cycles # 3.092 GHz + 27,077,379,591 instructions # 2.70 insn per cycle + 3.239059369 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96257) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -129,19 +129,19 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.474200e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.474751e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.474751e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.540865e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.541284e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.541284e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.526083 sec - 4,358,815,644 cycles # 2.850 GHz - 9,685,708,561 instructions # 2.22 insn per cycle - 1.530767974 seconds time elapsed +TOTAL : 1.495827 sec + 4,323,882,817 cycles # 2.884 GHz + 9,677,765,192 instructions # 2.24 insn per cycle + 1.499825664 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84456) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.953611e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.954307e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.954307e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.986806e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.987336e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.987336e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.341317 sec - 3,852,272,040 cycles # 2.864 GHz - 8,634,436,493 instructions # 2.24 insn per cycle - 1.345985072 seconds time elapsed +TOTAL : 1.330112 sec + 3,818,292,084 cycles # 2.864 GHz + 8,626,392,875 instructions # 2.26 insn per cycle + 1.334108022 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83903) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.652417e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.653091e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.653091e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.736329e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.736878e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.736878e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.451809 sec - 2,728,793,957 cycles # 1.876 GHz - 4,352,618,842 instructions # 1.60 insn per cycle - 1.456299369 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2175) (512y: 185) (512z:83037) +TOTAL : 1.419053 sec + 2,712,716,906 cycles # 1.907 GHz + 4,344,880,705 instructions # 1.60 insn per cycle + 1.423142145 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2177) (512y: 185) (512z:83030) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index c2f69eff61..6327c32a36 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -36,61 +36,61 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-08-15_08:10:24 +DATE: 2023-10-25_18:46:57 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.817389e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.820205e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.820500e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.757288e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.758127e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.758502e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.617573 sec - 5,689,264,894 cycles # 3.003 GHz - 12,093,638,450 instructions # 2.13 insn per cycle - 1.953515172 seconds time elapsed +TOTAL : 1.655983 sec + 5,859,016,098 cycles # 3.037 GHz + 12,599,305,189 instructions # 2.15 insn per cycle + 1.988634806 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.287920e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.289001e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.289084e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.346728e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347386e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347472e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.877475 sec - 6,493,452,356 cycles # 3.010 GHz - 13,536,213,574 instructions # 2.08 insn per cycle - 2.214676058 seconds time elapsed +TOTAL : 1.915477 sec + 6,698,649,731 cycles # 3.061 GHz + 13,457,604,803 instructions # 2.01 insn per cycle + 2.247936467 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 9.849636e-03 -Avg ME (F77/CUDA) = 9.8712405367932642E-003 -Relative difference = 0.002193435046052877 +Avg ME (F77/CUDA) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.933156e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.933445e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.933445e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.909786e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.910066e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.910066e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.930546 sec - 18,238,911,118 cycles # 3.076 GHz - 53,645,677,508 instructions # 2.94 insn per cycle - 5.935454775 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20320) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.942364 sec + 18,295,731,836 cycles # 3.078 GHz + 53,640,525,145 instructions # 2.93 insn per cycle + 5.946250751 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20286) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -98,24 +98,24 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087550399E-003 -Relative difference = 2.119779305548787e-08 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.592142e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.592603e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.592603e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.560034e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.560473e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560473e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.477317 sec - 4,525,666,351 cycles # 3.058 GHz - 13,770,742,105 instructions # 3.04 insn per cycle - 1.481927048 seconds time elapsed +TOTAL : 1.488735 sec + 4,616,421,294 cycles # 3.094 GHz + 13,762,957,080 instructions # 2.98 insn per cycle + 1.492690614 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96921) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.977692e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.979690e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.979690e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.154668e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.156604e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.156604e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.763048 sec - 2,202,514,087 cycles # 2.872 GHz - 4,877,098,573 instructions # 2.21 insn per cycle - 0.767957660 seconds time elapsed +TOTAL : 0.743016 sec + 2,158,936,332 cycles # 2.892 GHz + 4,868,873,872 instructions # 2.26 insn per cycle + 0.746953594 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84898) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.981050e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.983277e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.983277e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.993469e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.995570e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.995570e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.668655 sec - 1,928,988,428 cycles # 2.872 GHz - 4,349,149,408 instructions # 2.25 insn per cycle - 0.673285423 seconds time elapsed +TOTAL : 0.665634 sec + 1,930,674,595 cycles # 2.886 GHz + 4,341,032,805 instructions # 2.25 insn per cycle + 0.669411803 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84581) (512y: 44) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.478929e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.481681e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.481681e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.422100e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.424326e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.424326e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.713031 sec - 1,364,935,338 cycles # 1.905 GHz - 2,199,428,003 instructions # 1.61 insn per cycle - 0.718033552 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2884) (512y: 48) (512z:83271) +TOTAL : 0.716875 sec + 1,362,810,793 cycles # 1.892 GHz + 2,191,758,925 instructions # 1.61 insn per cycle + 0.720813478 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2896) (512y: 47) (512z:83271) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 47c696a06e..3a8d1c9eac 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -36,26 +36,26 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-08-15_08:32:52 +DATE: 2023-10-25_19:07:44 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.603720e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.605621e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.605621e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.793927e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.795607e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.795607e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.622946 sec - 5,724,029,892 cycles # 2.999 GHz - 12,052,755,071 instructions # 2.11 insn per cycle - 1.966452002 seconds time elapsed +TOTAL : 1.593125 sec + 5,711,497,040 cycles # 3.065 GHz + 11,071,875,711 instructions # 1.94 insn per cycle + 1.922969062 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -69,41 +69,41 @@ WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.303465e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.316648e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.316648e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.332193e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.344979e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.344979e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.899173 sec - 6,557,417,589 cycles # 3.011 GHz - 13,516,884,926 instructions # 2.06 insn per cycle - 2.237244968 seconds time elapsed +TOTAL : 1.859000 sec + 6,562,180,386 cycles # 3.077 GHz + 14,027,341,556 instructions # 2.14 insn per cycle + 2.188744808 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 9.849636e-03 -Avg ME (F77/CUDA) = 9.8712405367932642E-003 -Relative difference = 0.002193435046052877 +Avg ME (F77/CUDA) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.907375e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.907662e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.907662e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.934162e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.934430e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.934430e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.936791 sec - 18,252,219,781 cycles # 3.074 GHz - 53,645,575,809 instructions # 2.94 insn per cycle - 5.941222879 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20320) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.914443 sec + 18,289,747,509 cycles # 3.091 GHz + 53,640,880,499 instructions # 2.93 insn per cycle + 5.918348520 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20286) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -111,25 +111,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087550399E-003 -Relative difference = 2.119779305548787e-08 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.604346e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.604809e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.604809e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.554596e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.555024e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.555024e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.471022 sec - 4,520,493,698 cycles # 3.066 GHz - 13,771,561,641 instructions # 3.05 insn per cycle - 1.475822140 seconds time elapsed +TOTAL : 1.490597 sec + 4,617,566,850 cycles # 3.091 GHz + 13,763,927,839 instructions # 2.98 insn per cycle + 1.494414225 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96921) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe @@ -144,19 +144,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.900672e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.902327e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.902327e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.178518e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.180233e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.180233e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.771686 sec - 2,223,639,585 cycles # 2.868 GHz - 4,878,036,591 instructions # 2.19 insn per cycle - 0.776555403 seconds time elapsed +TOTAL : 0.740375 sec + 2,151,024,422 cycles # 2.893 GHz + 4,869,911,860 instructions # 2.26 insn per cycle + 0.744208061 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84898) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe @@ -171,19 +171,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.972027e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.974296e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.974296e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.974786e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.976961e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.976961e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.668515 sec - 1,929,712,588 cycles # 2.872 GHz - 4,350,045,769 instructions # 2.25 insn per cycle - 0.673226229 seconds time elapsed +TOTAL : 0.667174 sec + 1,931,553,706 cycles # 2.882 GHz + 4,342,018,470 instructions # 2.25 insn per cycle + 0.670962833 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84581) (512y: 44) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe @@ -198,20 +198,20 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.310786e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.313011e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.313011e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.427390e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.429642e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.429642e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.729035 sec - 1,369,441,194 cycles # 1.870 GHz - 2,200,529,525 instructions # 1.61 insn per cycle - 0.733767903 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2884) (512y: 48) (512z:83271) +TOTAL : 0.716682 sec + 1,362,755,127 cycles # 1.894 GHz + 2,192,432,791 instructions # 1.61 insn per cycle + 0.720592858 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2896) (512y: 47) (512z:83271) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 9da08c83fd..ea39ad8994 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -36,61 +36,61 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-08-15_08:11:12 +DATE: 2023-10-25_18:47:44 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.623177e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.625959e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.626212e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.770785e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.771818e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.772102e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.645327 sec - 5,745,791,715 cycles # 2.977 GHz - 11,621,961,179 instructions # 2.02 insn per cycle - 1.988370539 seconds time elapsed +TOTAL : 1.644101 sec + 5,902,851,310 cycles # 3.071 GHz + 12,347,610,066 instructions # 2.09 insn per cycle + 1.979279139 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.261103e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.262286e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.262365e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.344670e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.345330e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.345408e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.900522 sec - 6,625,830,421 cycles # 3.021 GHz - 13,870,125,042 instructions # 2.09 insn per cycle - 2.251246688 seconds time elapsed +TOTAL : 1.896839 sec + 6,669,880,941 cycles # 3.074 GHz + 13,819,367,336 instructions # 2.07 insn per cycle + 2.228526429 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 9.849636e-03 -Avg ME (F77/CUDA) = 9.8712405367932608E-003 -Relative difference = 0.0021934350460525243 +Avg ME (F77/CUDA) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.922448e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.922723e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.922723e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.942946e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.943229e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.943229e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.927760 sec - 18,213,792,071 cycles # 3.072 GHz - 53,662,418,994 instructions # 2.95 insn per cycle - 5.932795623 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20477) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.908590 sec + 18,230,304,521 cycles # 3.084 GHz + 53,620,524,232 instructions # 2.94 insn per cycle + 5.912442239 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20241) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -98,24 +98,24 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087571129E-003 -Relative difference = 2.119800355536229e-08 +Avg ME (F77/C++) = 9.8479612087572898E-003 +Relative difference = 2.1198021522715588e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.621315e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.621811e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.621811e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.576079e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.576533e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.576533e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.463851 sec - 4,517,955,155 cycles # 3.080 GHz - 13,763,574,365 instructions # 3.05 insn per cycle - 1.468420531 seconds time elapsed +TOTAL : 1.481262 sec + 4,588,697,172 cycles # 3.091 GHz + 13,755,977,699 instructions # 3.00 insn per cycle + 1.485242600 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96593) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe @@ -129,20 +129,20 @@ Relative difference = 3.151856596628469e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.102450e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.104369e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.104369e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.020825e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.022661e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.022661e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.749632 sec - 2,159,833,683 cycles # 2.868 GHz - 4,885,205,971 instructions # 2.26 insn per cycle - 0.754130887 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85271) (512y: 0) (512z: 0) +TOTAL : 0.756593 sec + 2,190,032,975 cycles # 2.882 GHz + 4,877,215,136 instructions # 2.23 insn per cycle + 0.760480627 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85321) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -155,20 +155,20 @@ Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.966817e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.969220e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.969220e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.993268e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.995423e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.995423e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.669184 sec - 1,936,191,266 cycles # 2.878 GHz - 4,356,892,283 instructions # 2.25 insn per cycle - 0.673682607 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85056) (512y: 24) (512z: 0) +TOTAL : 0.665169 sec + 1,931,098,294 cycles # 2.889 GHz + 4,348,628,190 instructions # 2.25 insn per cycle + 0.669029492 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84987) (512y: 24) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -181,20 +181,20 @@ Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.196993e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.199545e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.199545e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.452710e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.455265e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.455265e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.740090 sec - 1,384,261,381 cycles # 1.861 GHz - 2,208,765,319 instructions # 1.60 insn per cycle - 0.744974646 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3500) (512y: 33) (512z:83441) +TOTAL : 0.713644 sec + 1,367,244,738 cycles # 1.907 GHz + 2,200,694,530 instructions # 1.61 insn per cycle + 0.717609354 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3502) (512y: 32) (512z:83441) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 5f8ec2e089..7bedc1f54b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-08-15_08:11:59 +DATE: 2023-10-25_18:48:30 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.670105e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.671739e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.671928e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.689047e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.689716e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.689840e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.129095 sec - 7,309,412,823 cycles # 2.979 GHz - 16,121,474,446 instructions # 2.21 insn per cycle - 2.511223208 seconds time elapsed +TOTAL : 2.165623 sec + 7,623,672,543 cycles # 3.068 GHz + 16,620,823,162 instructions # 2.18 insn per cycle + 2.541779519 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.108729e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.109170e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.109194e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.116092e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.116360e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.116394e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.397563 sec - 11,348,855,392 cycles # 3.040 GHz - 27,320,618,219 instructions # 2.41 insn per cycle - 3.792342967 seconds time elapsed +TOTAL : 3.384767 sec + 11,295,486,944 cycles # 3.039 GHz + 26,143,309,789 instructions # 2.31 insn per cycle + 3.772489660 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.366085e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.366386e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.366386e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.338757e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.339014e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.339014e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.326826 sec - 19,384,652,452 cycles # 3.062 GHz - 54,291,405,977 instructions # 2.80 insn per cycle - 6.331822155 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:31979) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.354208 sec + 19,488,655,111 cycles # 3.066 GHz + 54,285,293,279 instructions # 2.79 insn per cycle + 6.358206624 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:31983) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.611293e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.611405e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.611405e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.580188e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.580272e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.580272e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.287257 sec - 9,520,222,893 cycles # 2.895 GHz - 26,121,547,178 instructions # 2.74 insn per cycle - 3.291758740 seconds time elapsed +TOTAL : 3.350145 sec + 9,541,746,297 cycles # 2.846 GHz + 26,114,002,349 instructions # 2.74 insn per cycle + 3.354075604 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:95979) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.681119e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.681692e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.681692e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.673808e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.674247e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.674247e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.440055 sec - 4,148,952,901 cycles # 2.876 GHz - 9,338,763,918 instructions # 2.25 insn per cycle - 1.445107953 seconds time elapsed +TOTAL : 1.442586 sec + 4,186,759,479 cycles # 2.896 GHz + 9,337,503,071 instructions # 2.23 insn per cycle + 1.446618657 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84147) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.218107e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.218862e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.218862e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.207665e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.208316e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.208316e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.257512 sec - 3,633,626,273 cycles # 2.881 GHz - 8,314,696,967 instructions # 2.29 insn per cycle - 1.262536037 seconds time elapsed +TOTAL : 1.260207 sec + 3,641,116,614 cycles # 2.881 GHz + 8,312,794,650 instructions # 2.28 insn per cycle + 1.264172407 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83817) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.695528e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.696316e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.696316e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.791929e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.792527e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.792527e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.436681 sec - 2,642,631,321 cycles # 1.836 GHz - 4,240,641,282 instructions # 1.60 insn per cycle - 1.441269837 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2285) (512y: 93) (512z:82779) +TOTAL : 1.397651 sec + 2,653,173,022 cycles # 1.895 GHz + 4,233,021,275 instructions # 1.60 insn per cycle + 1.401768259 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2292) (512y: 93) (512z:82780) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index ab539291a5..0525b2e4c1 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-08-15_08:13:01 +DATE: 2023-10-25_18:49:30 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.655737e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.657354e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.657527e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.679608e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.680082e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.680202e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.150810 sec - 7,429,217,381 cycles # 3.003 GHz - 15,053,638,777 instructions # 2.03 insn per cycle - 2.532126361 seconds time elapsed +TOTAL : 2.167186 sec + 7,636,339,894 cycles # 3.072 GHz + 15,813,775,134 instructions # 2.07 insn per cycle + 2.542843200 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.107795e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.108240e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.108265e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.107918e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.108184e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108215e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.398964 sec - 11,335,112,372 cycles # 3.033 GHz - 26,200,903,178 instructions # 2.31 insn per cycle - 3.794779431 seconds time elapsed +TOTAL : 3.395689 sec + 11,403,686,722 cycles # 3.066 GHz + 26,502,494,448 instructions # 2.32 insn per cycle + 3.775763409 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,20 +77,20 @@ Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.205581e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.205881e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.205881e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.358962e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.359192e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.359192e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.441909 sec - 19,418,391,360 cycles # 3.013 GHz - 54,296,934,460 instructions # 2.80 insn per cycle - 6.446485419 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32422) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.333189 sec + 19,421,629,325 cycles # 3.065 GHz + 54,272,919,506 instructions # 2.79 insn per cycle + 6.337193550 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32142) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,19 +103,19 @@ Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.605166e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.605275e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.605275e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.573591e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.573676e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.573676e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.298409 sec - 9,439,216,231 cycles # 2.860 GHz - 26,036,756,092 instructions # 2.76 insn per cycle - 3.302974381 seconds time elapsed +TOTAL : 3.359769 sec + 9,491,435,902 cycles # 2.826 GHz + 26,031,969,325 instructions # 2.74 insn per cycle + 3.363778642 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:95858) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.692424e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.693001e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.693001e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.730901e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.731358e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.731358e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.436141 sec - 4,130,114,036 cycles # 2.869 GHz - 9,319,033,220 instructions # 2.26 insn per cycle - 1.440658695 seconds time elapsed +TOTAL : 1.420536 sec + 4,117,938,873 cycles # 2.893 GHz + 9,317,350,688 instructions # 2.26 insn per cycle + 1.424344563 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83787) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.228544e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.229287e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.229287e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.227741e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.228352e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.228352e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.255330 sec - 3,631,851,616 cycles # 2.885 GHz - 8,310,847,527 instructions # 2.29 insn per cycle - 1.260145344 seconds time elapsed +TOTAL : 1.254191 sec + 3,641,355,182 cycles # 2.896 GHz + 8,309,383,106 instructions # 2.28 insn per cycle + 1.258235043 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83306) (512y: 229) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.774847e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.775588e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.775588e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.820448e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.821095e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.821095e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.406738 sec - 2,637,440,345 cycles # 1.872 GHz - 4,239,370,332 instructions # 1.61 insn per cycle - 1.411242699 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1729) (512y: 175) (512z:82792) +TOTAL : 1.387479 sec + 2,638,179,282 cycles # 1.897 GHz + 4,231,949,116 instructions # 1.60 insn per cycle + 1.391365284 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1731) (512y: 175) (512z:82815) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 7691ab57c4..db66144b99 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-08-15_08:06:31 +DATE: 2023-10-25_18:43:12 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.756917e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.386054e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.750556e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.996510e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.551380e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.892828e+07 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.449699 sec - 1,944,728,666 cycles # 2.912 GHz - 2,421,761,171 instructions # 1.25 insn per cycle - 0.725114953 seconds time elapsed +TOTAL : 0.438393 sec + 1,953,535,468 cycles # 3.006 GHz + 2,779,051,405 instructions # 1.42 insn per cycle + 0.706976484 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.656769e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.576035e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.998689e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.793351e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.668509e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.060188e+07 ) sec^-1 MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.533637 sec - 2,211,814,951 cycles # 2.864 GHz - 2,887,560,241 instructions # 1.31 insn per cycle - 0.830704475 seconds time elapsed +TOTAL : 0.516556 sec + 2,254,798,816 cycles # 3.015 GHz + 3,256,611,216 instructions # 1.44 insn per cycle + 0.804635125 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +77,19 @@ Relative difference = 2.984467216677476e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.170227e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.203378e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.203378e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.142180e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166897e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.166897e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.424484 sec - 4,413,499,773 cycles # 3.094 GHz - 12,854,148,105 instructions # 2.91 insn per cycle - 1.429447827 seconds time elapsed +TOTAL : 1.455956 sec + 4,526,705,701 cycles # 3.102 GHz + 12,813,772,224 instructions # 2.83 insn per cycle + 1.459879572 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe @@ -103,19 +103,19 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.073346e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.180896e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.180896e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.059229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.139815e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.139815e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.812922 sec - 2,507,689,277 cycles # 3.072 GHz - 7,235,529,848 instructions # 2.89 insn per cycle - 0.823426737 seconds time elapsed +TOTAL : 0.816563 sec + 2,541,682,069 cycles # 3.100 GHz + 7,194,219,151 instructions # 2.83 insn per cycle + 0.820635450 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3150) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.636144e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.974308e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.974308e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.555525e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.809689e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.809689e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.474060 sec - 1,392,319,534 cycles # 2.912 GHz - 3,007,129,515 instructions # 2.16 insn per cycle - 0.479278999 seconds time elapsed +TOTAL : 0.482439 sec + 1,365,016,241 cycles # 2.809 GHz + 2,962,982,028 instructions # 2.17 insn per cycle + 0.486447941 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.954912e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.358621e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.358621e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.042576e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.357609e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.357609e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.437714 sec - 1,282,899,010 cycles # 2.903 GHz - 2,860,715,054 instructions # 2.23 insn per cycle - 0.443030298 seconds time elapsed +TOTAL : 0.426283 sec + 1,250,204,594 cycles # 2.908 GHz + 2,816,555,243 instructions # 2.25 insn per cycle + 0.430386207 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.745804e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.939011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.939011e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.853383e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.013794e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.013794e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.621468 sec - 1,228,938,779 cycles # 1.969 GHz - 1,844,901,377 instructions # 1.50 insn per cycle - 0.626449091 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1375) (512y: 106) (512z: 2270) +TOTAL : 0.597884 sec + 1,199,308,383 cycles # 1.995 GHz + 1,804,468,596 instructions # 1.50 insn per cycle + 0.601975092 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 2c0323afc3..55664f3ef7 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -36,26 +36,26 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-08-15_08:30:06 +DATE: 2023-10-25_19:05:04 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.087095e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.261342e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.261342e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.715956e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.423473e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.423473e+07 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.499760 sec - 2,097,668,356 cycles # 2.894 GHz - 2,791,555,788 instructions # 1.33 insn per cycle - 0.782534486 seconds time elapsed +TOTAL : 0.466815 sec + 2,046,266,197 cycles # 2.991 GHz + 3,039,133,939 instructions # 1.49 insn per cycle + 0.742578114 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -69,17 +69,17 @@ WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.832332e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.392534e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.392534e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.437711e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.623194e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.623194e+07 ) sec^-1 MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.798926 sec - 3,096,775,317 cycles # 2.919 GHz - 4,431,168,284 instructions # 1.43 insn per cycle - 1.123669054 seconds time elapsed +TOTAL : 0.730298 sec + 2,928,685,026 cycles # 3.010 GHz + 4,469,035,726 instructions # 1.53 insn per cycle + 1.030829307 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,19 +90,19 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.165019e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.197795e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.197795e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.130473e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155364e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155364e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.434384 sec - 4,437,567,993 cycles # 3.085 GHz - 12,859,550,220 instructions # 2.90 insn per cycle - 1.439848105 seconds time elapsed +TOTAL : 1.477047 sec + 4,559,807,823 cycles # 3.080 GHz + 12,820,937,643 instructions # 2.81 insn per cycle + 1.481171657 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe @@ -117,19 +117,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.075971e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.181885e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.181885e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.037342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.116140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.116140e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.818868 sec - 2,544,346,828 cycles # 3.089 GHz - 7,287,580,278 instructions # 2.86 insn per cycle - 0.830252102 seconds time elapsed +TOTAL : 0.831756 sec + 2,576,245,613 cycles # 3.084 GHz + 7,244,217,190 instructions # 2.81 insn per cycle + 0.836091661 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3150) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe @@ -144,19 +144,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.614210e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.950189e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.950189e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.548861e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.803617e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.803617e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.482662 sec - 1,421,601,668 cycles # 2.921 GHz - 3,056,342,411 instructions # 2.15 insn per cycle - 0.497011065 seconds time elapsed +TOTAL : 0.489750 sec + 1,401,953,569 cycles # 2.841 GHz + 3,012,124,484 instructions # 2.15 insn per cycle + 0.494067218 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe @@ -171,19 +171,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.933148e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.331996e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.331996e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.888848e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.196973e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.196973e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.445752 sec - 1,309,065,815 cycles # 2.907 GHz - 2,907,990,208 instructions # 2.22 insn per cycle - 0.456912060 seconds time elapsed +TOTAL : 0.451008 sec + 1,291,042,478 cycles # 2.840 GHz + 2,867,771,305 instructions # 2.22 insn per cycle + 0.455219321 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe @@ -198,20 +198,20 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.784613e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.977109e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.977109e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.837117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.990188e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.990188e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.619630 sec - 1,260,312,878 cycles # 2.020 GHz - 1,884,777,232 instructions # 1.50 insn per cycle - 0.634711205 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1375) (512y: 106) (512z: 2270) +TOTAL : 0.606364 sec + 1,227,732,873 cycles # 2.013 GHz + 1,842,233,991 instructions # 1.50 insn per cycle + 0.610509212 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 9b62913c8c..b5138c5dae 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-08-15_08:06:48 +DATE: 2023-10-25_18:43:29 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.718463e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.224283e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.561996e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.920168e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.374674e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.702657e+07 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.450989 sec - 1,926,591,901 cycles # 2.878 GHz - 2,417,002,909 instructions # 1.25 insn per cycle - 0.726915120 seconds time elapsed +TOTAL : 0.437720 sec + 1,950,126,017 cycles # 3.006 GHz + 2,773,154,214 instructions # 1.42 insn per cycle + 0.705954337 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.610230e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.459724e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.850101e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.766528e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.558717e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.939267e+07 ) sec^-1 MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.532076 sec - 2,233,739,257 cycles # 2.906 GHz - 2,874,742,621 instructions # 1.29 insn per cycle - 0.828489500 seconds time elapsed +TOTAL : 0.513041 sec + 2,254,005,524 cycles # 3.033 GHz + 3,263,301,476 instructions # 1.45 insn per cycle + 0.800914962 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +77,19 @@ Relative difference = 2.984467216677476e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.181743e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.216053e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.216053e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.153195e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.178574e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.178574e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.408622 sec - 4,361,143,255 cycles # 3.089 GHz - 12,732,406,455 instructions # 2.92 insn per cycle - 1.413340753 seconds time elapsed +TOTAL : 1.442048 sec + 4,474,102,808 cycles # 3.096 GHz + 12,693,000,655 instructions # 2.84 insn per cycle + 1.446019473 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 687) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe @@ -103,19 +103,19 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.123224e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.235875e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.235875e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.053414e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.135311e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.135311e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.794392 sec - 2,460,614,760 cycles # 3.084 GHz - 7,101,608,991 instructions # 2.89 insn per cycle - 0.812765815 seconds time elapsed +TOTAL : 0.819184 sec + 2,493,704,947 cycles # 3.032 GHz + 7,048,175,291 instructions # 2.83 insn per cycle + 0.823254604 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2966) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.368124e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.655189e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.655189e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.165376e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.368230e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.368230e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.508970 sec - 1,486,600,307 cycles # 2.896 GHz - 3,239,541,081 instructions # 2.18 insn per cycle - 0.514300144 seconds time elapsed +TOTAL : 0.539826 sec + 1,467,619,890 cycles # 2.700 GHz + 3,195,865,906 instructions # 2.18 insn per cycle + 0.544230401 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3078) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.515852e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.825133e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.825133e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.602530e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.851517e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.851517e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.489162 sec - 1,429,316,422 cycles # 2.899 GHz - 3,143,556,021 instructions # 2.20 insn per cycle - 0.494066423 seconds time elapsed +TOTAL : 0.475458 sec + 1,396,242,750 cycles # 2.915 GHz + 3,099,467,840 instructions # 2.22 insn per cycle + 0.479442989 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2785) (512y: 257) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe @@ -181,19 +181,19 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.520314e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.687012e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.687012e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.759805e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.905242e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.905242e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.676320 sec - 1,279,958,194 cycles # 1.881 GHz - 2,113,888,708 instructions # 1.65 insn per cycle - 0.681951384 seconds time elapsed +TOTAL : 0.616827 sec + 1,241,766,049 cycles # 2.002 GHz + 2,069,716,754 instructions # 1.67 insn per cycle + 0.620974760 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1197) (512y: 194) (512z: 2426) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 2ebb36034e..8a44a3dd20 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -36,60 +36,60 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-08-15_08:07:06 +DATE: 2023-10-25_18:43:45 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.317870e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.206497e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.327873e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.953088e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.256905e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.378672e+08 ) sec^-1 MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 -TOTAL : 0.447120 sec - 1,895,627,368 cycles # 2.855 GHz - 2,376,491,406 instructions # 1.25 insn per cycle - 0.721871082 seconds time elapsed +TOTAL : 0.434393 sec + 1,939,780,167 cycles # 3.008 GHz + 2,743,949,203 instructions # 1.41 insn per cycle + 0.703990460 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 168 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.844312e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.817021e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.945412e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.221116e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.860706e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.982864e+08 ) sec^-1 MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 -TOTAL : 0.482475 sec - 2,050,887,558 cycles # 2.878 GHz - 2,606,594,300 instructions # 1.27 insn per cycle - 0.769817589 seconds time elapsed +TOTAL : 0.466982 sec + 2,071,697,565 cycles # 3.013 GHz + 2,925,920,597 instructions # 1.41 insn per cycle + 0.745407679 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629188472226 -Relative difference = 0.0005414908830687532 +Avg ME (F77/CUDA) = 0.56225629328206139 +Relative difference = 0.0005414933696496947 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.209994e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.239548e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.239548e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.175069e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.202207e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.202207e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 1.374925 sec - 4,255,805,112 cycles # 3.086 GHz - 12,772,760,715 instructions # 3.00 insn per cycle - 1.380363694 seconds time elapsed +TOTAL : 1.414014 sec + 4,387,994,343 cycles # 3.096 GHz + 12,757,087,191 instructions # 2.91 insn per cycle + 1.417904644 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 693) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe @@ -103,19 +103,19 @@ Relative difference = 1.714833339642312e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.320390e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.557734e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.557734e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.258633e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.477473e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.477473e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 0.513592 sec - 1,585,556,515 cycles # 3.064 GHz - 4,249,378,012 instructions # 2.68 insn per cycle - 0.523909176 seconds time elapsed +TOTAL : 0.522116 sec + 1,618,126,198 cycles # 3.079 GHz + 4,232,277,564 instructions # 2.62 insn per cycle + 0.526044496 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3709) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 4.180373005172264e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.542570e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.493905e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.493905e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.608070e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.524156e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.524156e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.271116 sec - 797,632,544 cycles # 2.903 GHz - 1,813,869,762 instructions # 2.27 insn per cycle - 0.275808482 seconds time elapsed +TOTAL : 0.267436 sec + 793,046,821 cycles # 2.927 GHz + 1,796,478,483 instructions # 2.27 insn per cycle + 0.271487422 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.021575e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.114751e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.114751e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.057162e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.116228e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.116228e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.254142 sec - 752,335,072 cycles # 2.915 GHz - 1,735,351,851 instructions # 2.31 insn per cycle - 0.259107554 seconds time elapsed +TOTAL : 0.251834 sec + 743,002,845 cycles # 2.913 GHz + 1,717,820,666 instructions # 2.31 insn per cycle + 0.255754900 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.361233e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.978175e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.978175e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.409485e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.023233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.023233e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.328715 sec - 686,462,283 cycles # 2.062 GHz - 1,222,689,460 instructions # 1.78 insn per cycle - 0.333921475 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 38) (512z: 2493) +TOTAL : 0.324434 sec + 678,869,673 cycles # 2.072 GHz + 1,206,887,131 instructions # 1.78 insn per cycle + 0.328433910 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 41f39a811d..35147cd718 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -36,26 +36,26 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-08-15_08:30:25 +DATE: 2023-10-25_19:05:21 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.591873e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.223950e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.223950e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.575149e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.561751e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.561751e+07 ) sec^-1 MeanMatrixElemValue = ( 3.419752e+01 +- 1.682900e+01 ) GeV^-2 -TOTAL : 0.473901 sec - 2,006,434,611 cycles # 2.909 GHz - 2,607,870,987 instructions # 1.30 insn per cycle - 0.748330623 seconds time elapsed +TOTAL : 0.447090 sec + 2,006,667,406 cycles # 3.008 GHz + 2,921,553,347 instructions # 1.46 insn per cycle + 0.724337546 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -69,40 +69,40 @@ WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.143568e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.858291e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.858291e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.349381e+02 +- 2.541442e+02 ) GeV^-2 -TOTAL : 0.643398 sec - 2,551,284,606 cycles # 2.899 GHz - 3,561,798,895 instructions # 1.40 insn per cycle - 0.939559641 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.387556e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.246561e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.246561e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.349385e+02 +- 2.541442e+02 ) GeV^-2 +TOTAL : 0.606579 sec + 2,519,662,089 cycles # 3.025 GHz + 3,861,788,300 instructions # 1.53 insn per cycle + 0.890689404 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629188472226 -Relative difference = 0.0005414908830687532 +Avg ME (F77/CUDA) = 0.56225629328206139 +Relative difference = 0.0005414933696496947 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.208519e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.237979e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.237979e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.174105e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.201186e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.201186e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 1.380608 sec - 4,272,088,122 cycles # 3.086 GHz - 12,777,586,064 instructions # 2.99 insn per cycle - 1.386039886 seconds time elapsed +TOTAL : 1.417963 sec + 4,402,481,360 cycles # 3.098 GHz + 12,761,539,634 instructions # 2.90 insn per cycle + 1.421844145 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 693) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe @@ -117,19 +117,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.316195e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.558201e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.558201e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.229854e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.445494e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.445494e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 0.518966 sec - 1,607,458,704 cycles # 3.073 GHz - 4,297,884,927 instructions # 2.67 insn per cycle - 0.529676971 seconds time elapsed +TOTAL : 0.530536 sec + 1,636,187,995 cycles # 3.064 GHz + 4,280,682,276 instructions # 2.62 insn per cycle + 0.534503101 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3709) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe @@ -144,19 +144,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.441566e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.357712e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.357712e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.470080e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.344369e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.344369e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.279512 sec - 817,665,413 cycles # 2.883 GHz - 1,851,099,137 instructions # 2.26 insn per cycle - 0.284756023 seconds time elapsed +TOTAL : 0.277101 sec + 811,784,719 cycles # 2.894 GHz + 1,833,505,900 instructions # 2.26 insn per cycle + 0.281119088 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe @@ -171,19 +171,19 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.824094e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.872005e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.872005e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.578285e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.554614e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.554614e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.264878 sec - 768,094,098 cycles # 2.860 GHz - 1,772,432,011 instructions # 2.31 insn per cycle - 0.270038989 seconds time elapsed +TOTAL : 0.273667 sec + 760,813,920 cycles # 2.744 GHz + 1,755,015,790 instructions # 2.31 insn per cycle + 0.277772969 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe @@ -198,20 +198,20 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.316128e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.941733e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.941733e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.356513e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.945123e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.945123e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.335239 sec - 706,774,302 cycles # 2.083 GHz - 1,264,350,644 instructions # 1.79 insn per cycle - 0.340454222 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 38) (512z: 2493) +TOTAL : 0.331761 sec + 698,245,569 cycles # 2.083 GHz + 1,248,346,490 instructions # 1.79 insn per cycle + 0.335801876 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 7d66debda8..da1ead0f77 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -36,60 +36,60 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-08-15_08:07:24 +DATE: 2023-10-25_18:44:02 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.315866e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169151e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.283291e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.781171e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.253056e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.376604e+08 ) sec^-1 MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 -TOTAL : 0.447750 sec - 1,887,621,838 cycles # 2.847 GHz - 2,363,659,721 instructions # 1.25 insn per cycle - 0.722412452 seconds time elapsed +TOTAL : 0.437609 sec + 1,935,685,013 cycles # 2.990 GHz + 2,706,812,030 instructions # 1.40 insn per cycle + 0.705999265 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 162 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.786680e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.782188e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.899575e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.205131e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.854735e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.976134e+08 ) sec^-1 MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 -TOTAL : 0.481145 sec - 2,053,728,244 cycles # 2.889 GHz - 2,608,696,340 instructions # 1.27 insn per cycle - 0.768584912 seconds time elapsed +TOTAL : 0.466415 sec + 2,068,328,114 cycles # 3.010 GHz + 2,987,091,963 instructions # 1.44 insn per cycle + 0.744952769 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629188472226 -Relative difference = 0.0005414908830687532 +Avg ME (F77/CUDA) = 0.56225629328206139 +Relative difference = 0.0005414933696496947 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.218792e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.248961e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.248961e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.180324e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.207350e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.207350e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 1.365137 sec - 4,226,666,689 cycles # 3.088 GHz - 12,672,231,770 instructions # 3.00 insn per cycle - 1.370122458 seconds time elapsed +TOTAL : 1.407934 sec + 4,364,873,703 cycles # 3.093 GHz + 12,656,518,331 instructions # 2.90 insn per cycle + 1.411910028 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe @@ -103,19 +103,19 @@ Relative difference = 1.714833339642312e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.664170e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.958366e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.958366e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.586065e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.857911e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.857911e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 0.467403 sec - 1,451,107,170 cycles # 3.079 GHz - 4,137,811,344 instructions # 2.85 insn per cycle - 0.472451500 seconds time elapsed +TOTAL : 0.475849 sec + 1,476,787,317 cycles # 3.082 GHz + 4,120,727,484 instructions # 2.79 insn per cycle + 0.479813364 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3414) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe @@ -129,19 +129,19 @@ Relative difference = 4.180373005172264e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.034343e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.588836e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.588836e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.086145e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.608974e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.608974e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.346626 sec - 1,014,394,653 cycles # 2.893 GHz - 2,142,123,582 instructions # 2.11 insn per cycle - 0.351820806 seconds time elapsed +TOTAL : 0.341638 sec + 1,007,266,248 cycles # 2.920 GHz + 2,124,817,247 instructions # 2.11 insn per cycle + 0.345623716 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4206) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe @@ -155,19 +155,19 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.191113e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.760427e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.760427e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.281396e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.848165e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.848165e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.336175 sec - 977,059,662 cycles # 2.873 GHz - 2,061,260,048 instructions # 2.11 insn per cycle - 0.340940509 seconds time elapsed +TOTAL : 0.329501 sec + 970,497,596 cycles # 2.916 GHz + 2,043,945,912 instructions # 2.11 insn per cycle + 0.333450865 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4013) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.066917e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.412043e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.412043e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.073132e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.415220e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.415220e+05 ) sec^-1 MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.425660 sec - 865,326,882 cycles # 2.013 GHz - 1,591,030,103 instructions # 1.84 insn per cycle - 0.430935126 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2526) (512y: 22) (512z: 2998) +TOTAL : 0.424070 sec + 856,736,633 cycles # 2.004 GHz + 1,573,705,553 instructions # 1.84 insn per cycle + 0.428192842 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2446) (512y: 16) (512z: 2998) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 1572e8a079..a1190251f1 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-08-15_08:07:40 +DATE: 2023-10-25_18:44:18 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.773573e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.383104e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.734022e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.018464e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.595321e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.946310e+07 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.454328 sec - 1,916,621,964 cycles # 2.855 GHz - 2,425,052,266 instructions # 1.27 insn per cycle - 0.730279804 seconds time elapsed +TOTAL : 0.439342 sec + 1,951,736,780 cycles # 3.004 GHz + 2,769,340,845 instructions # 1.42 insn per cycle + 0.707751210 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.650032e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.600937e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.004138e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.803845e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.713692e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.109529e+07 ) sec^-1 MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.530540 sec - 2,232,843,689 cycles # 2.896 GHz - 2,905,011,243 instructions # 1.30 insn per cycle - 0.828486188 seconds time elapsed +TOTAL : 0.513063 sec + 2,243,341,028 cycles # 3.023 GHz + 3,247,351,075 instructions # 1.45 insn per cycle + 0.800783669 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +77,19 @@ Relative difference = 2.782658397826986e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.162715e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.196172e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.196172e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.141045e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.165652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.165652e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.434253 sec - 4,436,956,891 cycles # 3.087 GHz - 12,827,279,350 instructions # 2.89 insn per cycle - 1.439255939 seconds time elapsed +TOTAL : 1.457233 sec + 4,537,826,727 cycles # 3.107 GHz + 12,784,913,374 instructions # 2.82 insn per cycle + 1.461163978 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe @@ -103,19 +103,19 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.052091e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.156668e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.156668e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.061046e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.141573e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.141573e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.821769 sec - 2,501,734,616 cycles # 3.029 GHz - 7,159,831,814 instructions # 2.86 insn per cycle - 0.832634068 seconds time elapsed +TOTAL : 0.815514 sec + 2,537,540,226 cycles # 3.099 GHz + 7,116,439,666 instructions # 2.80 insn per cycle + 0.819589417 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3215) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe @@ -129,20 +129,20 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.665751e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.013101e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.013101e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.732172e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.005113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.005113e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.470297 sec - 1,374,569,459 cycles # 2.900 GHz - 2,980,850,687 instructions # 2.17 insn per cycle - 0.475201900 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3175) (512y: 0) (512z: 0) +TOTAL : 0.460418 sec + 1,348,361,279 cycles # 2.906 GHz + 2,936,931,800 instructions # 2.18 insn per cycle + 0.464548729 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3174) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -155,19 +155,19 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.059483e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.477223e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.477223e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.149560e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.481402e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.481402e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.426148 sec - 1,249,053,156 cycles # 2.907 GHz - 2,832,877,164 instructions # 2.27 insn per cycle - 0.430932894 seconds time elapsed +TOTAL : 0.416544 sec + 1,218,162,673 cycles # 2.902 GHz + 2,791,024,677 instructions # 2.29 insn per cycle + 0.420753319 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2938) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe @@ -181,20 +181,20 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.712610e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.896335e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.896335e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.487755e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.613101e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.613101e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.629252 sec - 1,271,527,587 cycles # 2.008 GHz - 1,873,961,225 instructions # 1.47 insn per cycle - 0.634389126 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1723) (512y: 114) (512z: 2312) +TOTAL : 0.683117 sec + 1,240,069,199 cycles # 1.806 GHz + 1,831,774,203 instructions # 1.48 insn per cycle + 0.687252060 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1728) (512y: 114) (512z: 2312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 6d2531dde1..2a285d3003 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -36,38 +36,38 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-08-15_08:07:58 +DATE: 2023-10-25_18:44:34 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.789411e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.303398e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.650520e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.964309e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.404642e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.725368e+07 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.451196 sec - 1,919,651,498 cycles # 2.869 GHz - 2,413,802,258 instructions # 1.26 insn per cycle - 0.726545933 seconds time elapsed +TOTAL : 0.438377 sec + 1,959,656,771 cycles # 3.015 GHz + 2,794,102,152 instructions # 1.43 insn per cycle + 0.707326409 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.626179e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.465436e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.858156e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.773177e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.592536e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.977031e+07 ) sec^-1 MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.534363 sec - 2,214,066,326 cycles # 2.862 GHz - 2,892,011,520 instructions # 1.31 insn per cycle - 0.831589584 seconds time elapsed +TOTAL : 0.515907 sec + 2,236,298,354 cycles # 2.994 GHz + 3,229,465,087 instructions # 1.44 insn per cycle + 0.804040631 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +77,19 @@ Relative difference = 2.782658397826986e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.176974e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.211086e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.211086e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.106563e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.131055e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.131055e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.414731 sec - 4,384,977,798 cycles # 3.091 GHz - 12,710,908,577 instructions # 2.90 insn per cycle - 1.419974932 seconds time elapsed +TOTAL : 1.502633 sec + 4,502,084,245 cycles # 2.989 GHz + 12,668,944,796 instructions # 2.81 insn per cycle + 1.506842459 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 659) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe @@ -103,19 +103,19 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.092509e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.202126e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.202126e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.101140e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.184323e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.184323e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.805760 sec - 2,454,654,239 cycles # 3.033 GHz - 6,949,292,792 instructions # 2.83 insn per cycle - 0.810702702 seconds time elapsed +TOTAL : 0.800328 sec + 2,487,833,256 cycles # 3.095 GHz + 6,905,789,276 instructions # 2.78 insn per cycle + 0.804489667 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3036) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe @@ -129,20 +129,20 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.317533e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.604808e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.604808e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.392821e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.617920e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617920e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.516874 sec - 1,506,955,385 cycles # 2.891 GHz - 3,212,138,114 instructions # 2.13 insn per cycle - 0.522141241 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3285) (512y: 0) (512z: 0) +TOTAL : 0.504314 sec + 1,480,047,698 cycles # 2.915 GHz + 3,168,067,665 instructions # 2.14 insn per cycle + 0.508419797 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3284) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -155,19 +155,19 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.530873e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.849344e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.849344e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.646089e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.903192e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.903192e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.487148 sec - 1,414,973,528 cycles # 2.884 GHz - 3,084,278,864 instructions # 2.18 insn per cycle - 0.492087646 seconds time elapsed +TOTAL : 0.470441 sec + 1,380,675,105 cycles # 2.913 GHz + 3,040,126,384 instructions # 2.20 insn per cycle + 0.474602540 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2936) (512y: 265) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe @@ -181,19 +181,19 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.646198e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.823436e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.823436e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.746832e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.887698e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.887698e+05 ) sec^-1 MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.644560 sec - 1,282,518,009 cycles # 1.977 GHz - 2,048,181,150 instructions # 1.60 insn per cycle - 0.649746004 seconds time elapsed +TOTAL : 0.620483 sec + 1,249,575,366 cycles # 2.003 GHz + 2,003,971,184 instructions # 1.60 insn per cycle + 0.624635502 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1520) (512y: 202) (512z: 2499) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe From 319f035e50ef2facc4a4f477e34d9eeb518a15c7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 25 Oct 2023 08:40:24 +0200 Subject: [PATCH 002/119] [oct23av] rerun 18 tmad alltees, all ok (itscrd90 Silver4216 el9 VM, after hard reboot including downfall mitigation) New performance baseline (move from Alma8/itscrd80 to Alma9/itscrd90 on Silver4216+V100 and include downfall mitigation) Using codebases as of commit bd255c01fb1cf5377de344c42089765756fd75e1 (Wed Aug 16 15:05:27 2023 +0200) STARTED AT Wed Oct 25 07:17:34 PM CEST 2023 ENDED AT Wed Oct 25 11:35:50 PM CEST 2023 Status=0 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt [avalassi@itscrd90 gcc11/usr] /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp> cat /etc/redhat-release AlmaLinux release 9.2 (Turquoise Kodkod) [avalassi@itscrd90 gcc11/usr] /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp> grep 'stepping\|model\|microcode' /proc/cpuinfo | sort -u microcode : 0x5003604 model : 85 model name : Intel(R) Xeon(R) Silver 4216 CPU @ 2.10GHz stepping : 7 --- .../log_eemumu_mad_d_inl0_hrd0.txt | 180 ++++++------- .../log_eemumu_mad_f_inl0_hrd0.txt | 174 ++++++------- .../log_eemumu_mad_m_inl0_hrd0.txt | 178 ++++++------- .../log_ggtt_mad_d_inl0_hrd0.txt | 178 ++++++------- .../log_ggtt_mad_f_inl0_hrd0.txt | 176 ++++++------- .../log_ggtt_mad_m_inl0_hrd0.txt | 178 ++++++------- .../log_ggttg_mad_d_inl0_hrd0.txt | 238 +++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 236 ++++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 234 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 210 ++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 204 +++++++-------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 206 +++++++-------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 182 +++++++------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 186 +++++++------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 182 +++++++------- .../log_gqttq_mad_d_inl0_hrd0.txt | 42 ++-- .../log_gqttq_mad_f_inl0_hrd0.txt | 42 ++-- .../log_gqttq_mad_m_inl0_hrd0.txt | 46 ++-- 18 files changed, 1536 insertions(+), 1536 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index c4c65c361b..1a4d828546 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -3,29 +3,29 @@ CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-14_22:02:26 +DATE: 2023-10-25_19:18:26 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.5965s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5867s - [COUNTERS] Fortran MEs ( 1 ) : 0.0097s for 8192 events => throughput is 8.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6275s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6175s + [COUNTERS] Fortran MEs ( 1 ) : 0.0099s for 8192 events => throughput is 8.25E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1636s - [COUNTERS] Fortran MEs ( 1 ) : 0.0101s for 8192 events => throughput is 8.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1814s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1709s + [COUNTERS] Fortran MEs ( 1 ) : 0.0105s for 8192 events => throughput is 7.82E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4422s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3320s - [COUNTERS] Fortran MEs ( 1 ) : 0.1102s for 90112 events => throughput is 8.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4443s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3343s + [COUNTERS] Fortran MEs ( 1 ) : 0.1099s for 90112 events => throughput is 8.20E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1808s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1747s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1870s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1812s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4167s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3500s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0667s for 90112 events => throughput is 1.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3422s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0657s for 90112 events => throughput is 1.37E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +180,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813628E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.373911e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.344246e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.370703e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.355045e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1701s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1672s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.83E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1815s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3675s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3357s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 90112 events => throughput is 2.84E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3697s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3373s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 90112 events => throughput is 2.78E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813628E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.632282e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.734240e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.863980e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.880467e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2151s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2134s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.80E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1763s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1746s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.05E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3692s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3516s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0177s for 90112 events => throughput is 5.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3545s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 90112 events => throughput is 5.28E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813656E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.218014e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.204571e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.647046e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.588414e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1721s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1705s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0015s for 8192 events => throughput is 5.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1798s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1784s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.76E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3546s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3383s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 90112 events => throughput is 5.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3579s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0162s for 90112 events => throughput is 5.55E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813656E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.416843e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.597781e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.097824e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.189972e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1695s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1771s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1755s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.09E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3567s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3383s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0184s for 90112 events => throughput is 4.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3400s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0182s for 90112 events => throughput is 4.95E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813656E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.698775e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.738607e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.180879e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.243870e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.6260s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6255s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.65E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6205s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6200s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.63E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7926s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7879s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.92E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7581s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7534s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.93E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813628E-002 OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.973589e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.604143e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.239953e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.463611e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.963563e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.281447e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.778136e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.027562e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.980032e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.209260e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.886705e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.077193e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.879844e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.272993e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.000319e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.998178e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 801f94fd9d..f98575860b 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -2,28 +2,28 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-14_22:02:43 +DATE: 2023-10-25_19:18:43 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6083s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5984s - [COUNTERS] Fortran MEs ( 1 ) : 0.0098s for 8192 events => throughput is 8.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8283s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8183s + [COUNTERS] Fortran MEs ( 1 ) : 0.0100s for 8192 events => throughput is 8.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1882s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1782s - [COUNTERS] Fortran MEs ( 1 ) : 0.0100s for 8192 events => throughput is 8.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1908s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1801s + [COUNTERS] Fortran MEs ( 1 ) : 0.0107s for 8192 events => throughput is 7.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4524s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3406s - [COUNTERS] Fortran MEs ( 1 ) : 0.1118s for 90112 events => throughput is 8.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4773s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3594s + [COUNTERS] Fortran MEs ( 1 ) : 0.1178s for 90112 events => throughput is 7.65E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166140620297] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1839s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 8192 events => throughput is 1.47E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1802s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 8192 events => throughput is 1.43E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501907784661565E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3444s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0632s for 90112 events => throughput is 1.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4026s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3390s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0636s for 90112 events => throughput is 1.42E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +180,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501907784661565E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.400944e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.366198e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.371852e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.396541e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165549479658] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1725s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1710s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0015s for 8192 events => throughput is 5.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1780s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1763s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.75E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905692857932E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3565s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3392s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0173s for 90112 events => throughput is 5.20E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3798s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3613s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 90112 events => throughput is 4.88E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905692857932E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.167532e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.893797e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.551324e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.592095e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165569099927] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1716s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1708s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.89E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1803s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.92E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905658047333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3448s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3355s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 90112 events => throughput is 9.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3614s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3514s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 90112 events => throughput is 8.98E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905658047333E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.026521e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.801869e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.165567e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185551e+07 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165569099927] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1691s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1683s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 1.04E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.1801s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1793s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 1.05E+07 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905658047333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3474s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3387s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 90112 events => throughput is 1.03E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.3471s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3384s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 90112 events => throughput is 1.04E+07 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905658047333E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.055886e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.124905e+07 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.199864e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.311988e+07 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166431914253] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1734s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1724s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 8.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1801s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1791s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.83E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501909358591468E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3553s + [COUNTERS] PROGRAM TOTAL : 0.3552s [COUNTERS] Fortran Overhead ( 0 ) : 0.3451s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0102s for 90112 events => throughput is 8.83E+06 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 90112 events => throughput is 8.91E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501909358591468E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.602693e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.261918e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.049483e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082919e+07 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166796068879] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.6224s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6219s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.66E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5875s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.79E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501910316213061E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7901s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7856s + [COUNTERS] PROGRAM TOTAL : 0.7572s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7527s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 90112 events => throughput is 2.00E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501910316213061E-002 OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.165417e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.090855e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.596974e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.271435e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.713909e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.835160e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.290267e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.510333e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.637215e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.894374e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.358587e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.802495e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.862456e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.131306e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.309851e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.434205e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 9da4954542..0d49865b9c 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -2,27 +2,27 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-14_22:03:01 +DATE: 2023-10-25_19:18:59 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.5980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5879s - [COUNTERS] Fortran MEs ( 1 ) : 0.0101s for 8192 events => throughput is 8.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6278s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6178s + [COUNTERS] Fortran MEs ( 1 ) : 0.0100s for 8192 events => throughput is 8.21E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1746s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1648s - [COUNTERS] Fortran MEs ( 1 ) : 0.0098s for 8192 events => throughput is 8.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1782s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1684s + [COUNTERS] Fortran MEs ( 1 ) : 0.0099s for 8192 events => throughput is 8.30E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4498s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3386s - [COUNTERS] Fortran MEs ( 1 ) : 0.1112s for 90112 events => throughput is 8.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4565s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3432s + [COUNTERS] Fortran MEs ( 1 ) : 0.1134s for 90112 events => throughput is 7.95E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1766s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1706s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1810s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.30E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4060s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3403s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0657s for 90112 events => throughput is 1.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0667s for 90112 events => throughput is 1.35E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +180,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919915927155E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.337649e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.341399e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.342498e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.323934e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1703s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1675s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1801s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1772s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3650s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3336s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0314s for 90112 events => throughput is 2.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3694s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3374s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 90112 events => throughput is 2.82E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919915927155E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.759760e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.783585e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.898217e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.872248e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1680s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1664s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.11E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1786s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1770s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.06E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3528s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3361s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0168s for 90112 events => throughput is 5.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3561s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3389s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 90112 events => throughput is 5.24E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919908700741E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.328917e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.356083e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.916707e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.847494e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1661s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1647s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.72E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1798s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.77E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3497s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3335s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0161s for 90112 events => throughput is 5.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3603s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3443s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 90112 events => throughput is 5.63E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919908700741E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.630535e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.610629e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.259719e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.797344e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1741s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1723s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1826s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1808s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.43E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3537s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3355s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0182s for 90112 events => throughput is 4.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3604s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 90112 events => throughput is 4.88E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919908700741E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.795787e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.814394e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.204776e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.589840e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169066587257] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.6168s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6163s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5950s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5945s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.67E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919911173610E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7926s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7879s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.92E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7577s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7529s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.90E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919911173610E-002 OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.929606e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.583003e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.152731e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.467396e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.966239e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.252357e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.897511e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.030772e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.980784e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.251034e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.601623e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.079891e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.983432e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.286168e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.992790e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.995935e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 3e1484798a..57c094acdf 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -2,26 +2,26 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-14_22:03:19 +DATE: 2023-10-25_19:19:16 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3474s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2988s - [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4367s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3846s + [COUNTERS] Fortran MEs ( 1 ) : 0.0521s for 8192 events => throughput is 1.57E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2575s - [COUNTERS] Fortran MEs ( 1 ) : 0.0490s for 8192 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3291s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2810s + [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7188s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1877s - [COUNTERS] Fortran MEs ( 1 ) : 0.5311s for 90112 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7885s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2393s + [COUNTERS] Fortran MEs ( 1 ) : 0.5492s for 90112 events => throughput is 1.64E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3389s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2978s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3572s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3148s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6737s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2197s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4539s for 90112 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6843s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2210s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4633s for 90112 events => throughput is 1.94E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +180,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775379) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.982757e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.950448e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.025053e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.964951e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3057s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2825s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3143s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2905s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0239s for 8192 events => throughput is 3.43E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4510s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1965s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2545s for 90112 events => throughput is 3.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4654s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2042s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2612s for 90112 events => throughput is 3.45E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775379) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.396020e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.422695e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.358361e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.385899e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2883s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2740s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0143s for 8192 events => throughput is 5.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2964s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2820s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.70E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3549s + [COUNTERS] PROGRAM TOTAL : 1.3564s [COUNTERS] Fortran Overhead ( 0 ) : 1.1960s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1589s for 90112 events => throughput is 5.67E+05 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1604s for 90112 events => throughput is 5.62E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.448380e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.356829e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.487562e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.465883e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2858s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2725s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2947s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2816s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.29E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3702s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2215s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1488s for 90112 events => throughput is 6.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3386s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1916s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1469s for 90112 events => throughput is 6.13E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.072358e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.955245e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.050643e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.038215e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3069s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2860s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0208s for 8192 events => throughput is 3.94E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4286s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2036s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2250s for 90112 events => throughput is 4.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4351s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2261s for 90112 events => throughput is 3.99E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.909315e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.616558e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.808572e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.687418e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.7129s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7123s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.30E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6890s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6885s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6489s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6426s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6130s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6068s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.46E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775386) differ b OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.746360e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.179274e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.999297e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.706047e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.128017e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.310976e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.069316e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.080551e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.139660e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.327429e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.139735e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152072e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.130154e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328654e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.008036e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.061951e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 1fb26ee1bd..ac65217070 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -8,21 +8,19 @@ make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -30,12 +28,14 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2023-08-14_22:03:45 +DATE: 2023-10-25_19:19:41 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3478s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2990s - [COUNTERS] Fortran MEs ( 1 ) : 0.0488s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3097s + [COUNTERS] Fortran MEs ( 1 ) : 0.0485s for 8192 events => throughput is 1.69E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3063s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2575s - [COUNTERS] Fortran MEs ( 1 ) : 0.0487s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3161s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2673s + [COUNTERS] Fortran MEs ( 1 ) : 0.0488s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7209s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1852s - [COUNTERS] Fortran MEs ( 1 ) : 0.5357s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7673s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2278s + [COUNTERS] Fortran MEs ( 1 ) : 0.5395s for 90112 events => throughput is 1.67E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690706211693573] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.4250s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3846s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3486s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782418787778] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6608s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2221s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4387s for 90112 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6743s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2245s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4497s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +180,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782418787778) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.030523e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.956288e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.046954e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.982512e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690702562167019] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2907s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2751s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2999s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2833s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0166s for 8192 events => throughput is 4.94E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223778631221009] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3855s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2033s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1823s for 90112 events => throughput is 4.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3965s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2188s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1777s for 90112 events => throughput is 5.07E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223778631221009) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.061097e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.832476e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.023046e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.796478e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694055768034] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2732s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2650s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 9.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.65E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775988760060] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2839s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1903s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0936s for 90112 events => throughput is 9.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2853s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0933s for 90112 events => throughput is 9.66E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223775988760060) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.200760e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.545083e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.336469e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.445429e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694055768034] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2659s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2874s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2794s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775988760060] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2758s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1880s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0879s for 90112 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.2777s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1907s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0870s for 90112 events => throughput is 1.04E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223775988760060) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.811422e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.937961e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.841596e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.006840e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698865531559] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2720s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2925s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2818s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.67E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223780255562296] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3093s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1899s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1194s for 90112 events => throughput is 7.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3226s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2041s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1185s for 90112 events => throughput is 7.60E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223780255562296) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.953921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.089065e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.956124e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.293009e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690703397697980] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.7151s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7146s + [COUNTERS] PROGRAM TOTAL : 0.6877s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6872s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.51E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223786763175951] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6521s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6466s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 90112 events => throughput is 1.63E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6161s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6107s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.66E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223786763175951) differ b OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.669717e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.382566e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.507079e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.934525e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.217999e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.441859e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.691458e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.769075e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.264071e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.403190e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.753287e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.876277e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.675323e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.887286e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.418637e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.439919e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index b50bc7dc9d..c3747a1448 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,29 +1,29 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-14_22:04:10 +DATE: 2023-10-25_19:20:06 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3471s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2981s - [COUNTERS] Fortran MEs ( 1 ) : 0.0490s for 8192 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3588s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3101s + [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3072s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2581s - [COUNTERS] Fortran MEs ( 1 ) : 0.0490s for 8192 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3193s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2707s + [COUNTERS] Fortran MEs ( 1 ) : 0.0487s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7154s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1800s - [COUNTERS] Fortran MEs ( 1 ) : 0.5354s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7636s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2247s + [COUNTERS] Fortran MEs ( 1 ) : 0.5388s for 90112 events => throughput is 1.67E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3410s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2994s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3537s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3109s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6747s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2175s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4572s for 90112 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7009s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2276s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4732s for 90112 events => throughput is 1.90E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +180,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783635280988) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.908591e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.932354e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.961479e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.907654e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3055s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2824s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3139s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2905s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.50E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4672s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2033s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2639s for 90112 events => throughput is 3.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4697s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2111s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2585s for 90112 events => throughput is 3.49E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783635280988) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.528687e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.342983e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.550638e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.391505e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2724s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0146s for 8192 events => throughput is 5.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3006s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2861s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.64E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,8 +319,8 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3452s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1868s + [COUNTERS] PROGRAM TOTAL : 1.3584s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1999s [COUNTERS] CudaCpp MEs ( 2 ) : 0.1584s for 90112 events => throughput is 5.69E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783652032040) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.491200e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.413133e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.649330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.510615e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2843s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2714s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2939s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2806s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.18E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3388s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1937s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1451s for 90112 events => throughput is 6.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3412s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1968s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1445s for 90112 events => throughput is 6.24E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783652032040) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.047515e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.210141e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.949119e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.018773e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2999s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2799s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0200s for 8192 events => throughput is 4.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3082s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2878s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.02E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4180s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1980s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2201s for 90112 events => throughput is 4.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5311s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2846s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2465s for 90112 events => throughput is 3.66E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783652032040) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.019970e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.423825e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.682426e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.530096e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.7168s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7162s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.30E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6924s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.49E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782303744791] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6399s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6335s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.41E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6044s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5982s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.45E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782303744791) differ b OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.746029e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.130110e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.901621e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.628036e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.118488e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.274030e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.048897e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059654e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.143525e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.301020e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.123246e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.126562e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.125934e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.294170e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.051137e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.968784e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 5956b00a07..925cf1dd8b 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2023-08-14_22:04:36 +DATE: 2023-10-25_19:20:33 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5466s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2138s - [COUNTERS] Fortran MEs ( 1 ) : 0.3328s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5609s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2258s + [COUNTERS] Fortran MEs ( 1 ) : 0.3351s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5427s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2113s - [COUNTERS] Fortran MEs ( 1 ) : 0.3314s for 8192 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5566s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2205s + [COUNTERS] Fortran MEs ( 1 ) : 0.3361s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.1335s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3939s - [COUNTERS] Fortran MEs ( 1 ) : 3.7396s for 90112 events => throughput is 2.41E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0766s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3919s + [COUNTERS] Fortran MEs ( 1 ) : 3.6847s for 90112 events => throughput is 2.45E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196357922470791E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8780s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3309s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8617s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5333s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3285s for 8192 events => throughput is 2.49E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171234E-002) differ by less than 2E-14 (0.0) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470791E-002) differ by less than 2E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967963E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.3825s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7735s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6090s for 90112 events => throughput is 2.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2565s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6821s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5744s for 90112 events => throughput is 2.52E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967963E-002) differ by less than 2E-14 (4.440892098500626e-16) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655597E-002) differ by less than 2E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.566098e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.585485e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.613796e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.602908e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196357922470777E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5837s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1794s for 8192 events => throughput is 4.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5594s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3875s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1718s for 8192 events => throughput is 4.77E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171234E-002) differ by less than 2E-14 (0.0) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470777E-002) differ by less than 2E-14 (3.3306690738754696e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.6739s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6364s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.0375s for 90112 events => throughput is 4.42E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4116s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5278s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8838s for 90112 events => throughput is 4.78E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967921E-002) differ by less than 2E-14 (0.0) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655555E-002) differ by less than 2E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.711017e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.886187e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.669357e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.863074e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171206E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4257s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3264s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0993s for 8192 events => throughput is 8.25E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3866s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3020s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0845s for 8192 events => throughput is 9.69E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171206E-002) differ by less than 2E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967907E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.5988s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5757s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0232s for 90112 events => throughput is 8.81E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3806s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4438s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9368s for 90112 events => throughput is 9.62E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967907E-002) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655541E-002) differ by less than 2E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.326361e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.810175e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.350750e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.814710e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171206E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3837s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3040s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0797s for 8192 events => throughput is 1.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3697s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2950s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0747s for 8192 events => throughput is 1.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171206E-002) differ by less than 2E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967907E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3782s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5121s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8661s for 90112 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2633s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4344s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8288s for 90112 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967907E-002) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655541E-002) differ by less than 2E-14 (2.220446049250313e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.119867e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111782e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108578e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.107532e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4236s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3188s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1048s for 8192 events => throughput is 7.81E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4288s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3258s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1030s for 8192 events => throughput is 7.95E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171234E-002) differ by less than 2E-14 (0.0) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967907E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6525s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5012s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1513s for 90112 events => throughput is 7.83E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6474s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4850s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1624s for 90112 events => throughput is 7.75E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967907E-002) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655541E-002) differ by less than 2E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.650795e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.891678e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.685211e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.153326e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -512,15 +512,15 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171220E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6785s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6733s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 8192 events => throughput is 1.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6666s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6612s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171220E-002) differ by less than 2E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470764E-002) differ by less than 2E-14 (4.440892098500626e-16) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -545,58 +545,58 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967977E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9197s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8970s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.96E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8130s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7902s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.96E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967977E-002) differ by less than 2E-14 (6.661338147750939e-16) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655597E-002) differ by less than 2E-14 (4.440892098500626e-16) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.350016e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.639819e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.194176e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.202614e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.767099e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.989805e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.236435e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236697e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.762100e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.956986e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.243071e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.247459e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.770310e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.990764e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.734951e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.731178e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 404b9a91dc..fa99d034ca 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,28 +2,28 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - -make USEBUILDDIR=1 AVX=512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-14_22:05:19 +DATE: 2023-10-25_19:21:14 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5596s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2224s - [COUNTERS] Fortran MEs ( 1 ) : 0.3372s for 8192 events => throughput is 2.43E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5523s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2203s + [COUNTERS] Fortran MEs ( 1 ) : 0.3320s for 8192 events => throughput is 2.47E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5760s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2200s - [COUNTERS] Fortran MEs ( 1 ) : 0.3561s for 8192 events => throughput is 2.30E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5511s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2185s + [COUNTERS] Fortran MEs ( 1 ) : 0.3325s for 8192 events => throughput is 2.46E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.1446s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3992s - [COUNTERS] Fortran MEs ( 1 ) : 3.7455s for 90112 events => throughput is 2.41E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0773s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3954s + [COUNTERS] Fortran MEs ( 1 ) : 3.6819s for 90112 events => throughput is 2.45E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195711188152623E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196349725192449E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8211s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5128s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3083s for 8192 events => throughput is 2.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8861s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3355s for 8192 events => throughput is 2.44E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195711188152623E-002) differ by less than 4E-4 (8.434546971969326e-08) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196349725192449E-002) differ by less than 4E-4 (8.433729958845504e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310861450156910E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310860682799649E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.0994s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6914s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4080s for 90112 events => throughput is 2.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1805s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6708s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5097s for 90112 events => throughput is 2.57E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310861450156910E-002) differ by less than 4E-4 (1.401388352029187e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310860682799649E-002) differ by less than 4E-4 (1.4013938864909647e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.727937e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.644874e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.736999e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.623423e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195695504827997E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196334032667323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4175s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3201s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0974s for 8192 events => throughput is 8.41E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4066s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3108s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0959s for 8192 events => throughput is 8.55E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195695504827997E-002) differ by less than 4E-4 (2.457036522018896e-07) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196334032667323E-002) differ by less than 4E-4 (2.4578908086603235e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310848293145957E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310847525777316E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6174s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5236s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0938s for 90112 events => throughput is 8.24E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5043s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4504s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0538s for 90112 events => throughput is 8.55E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310848293145957E-002) differ by less than 4E-4 (3.0195004807609394e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847525777316E-002) differ by less than 4E-4 (3.0195074296468505e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.303655e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.644259e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.833265e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.595838e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195692323432697E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196330842071521E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2997s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2564s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0432s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3044s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2603s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 8192 events => throughput is 1.86E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195692323432697E-002) differ by less than 4E-4 (2.7843549810224744e-07) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196330842071521E-002) differ by less than 4E-4 (2.786153705525152e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310848252682449E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310847485320789E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8968s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4218s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4750s for 90112 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9365s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4416s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4949s for 90112 events => throughput is 1.82E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310848252682449E-002) differ by less than 4E-4 (3.0244768767229147e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847485320789E-002) differ by less than 4E-4 (3.024482967406428e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.832735e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.877444e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.888176e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.866007e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195692323432697E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196330842071521E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2910s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2522s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0388s for 8192 events => throughput is 2.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2968s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2571s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0397s for 8192 events => throughput is 2.06E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195692323432697E-002) differ by less than 4E-4 (2.7843549810224744e-07) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196330842071521E-002) differ by less than 4E-4 (2.786153705525152e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310848252682449E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310847485320789E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8572s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4225s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4347s for 90112 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8329s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3969s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4360s for 90112 events => throughput is 2.07E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310848252682449E-002) differ by less than 4E-4 (3.0244768767229147e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847485320789E-002) differ by less than 4E-4 (3.024482967406428e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.079597e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.064455e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.099658e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.134523e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195705534321677E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196344068381207E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3168s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2662s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0505s for 8192 events => throughput is 1.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3182s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2680s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0503s for 8192 events => throughput is 1.63E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195705534321677E-002) differ by less than 4E-4 (1.4251501656570298e-07) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196344068381207E-002) differ by less than 4E-4 (1.42537126879283e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310858570909916E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310857803543385E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9878s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4386s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5493s for 90112 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9728s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4203s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5524s for 90112 events => throughput is 1.63E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310858570909916E-002) differ by less than 4E-4 (1.7554919173878858e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310857803543385E-002) differ by less than 4E-4 (1.755498595379379e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.542276e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.608228e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.576050e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.603946e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -512,15 +512,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195710869056637E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196349366365994E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6723s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6713s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.76E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6467s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6458s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.73E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195710869056637E-002) differ by less than 4E-4 (8.762849490473457e-08) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196349366365994E-002) differ by less than 4E-4 (8.802906814597833e-08) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -545,58 +545,58 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310865716831132E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310864949473968E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8513s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8418s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0096s for 90112 events => throughput is 9.42E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7893s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.53E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310865716831132E-002) differ by less than 4E-4 (8.766523518222158e-08) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310864949473968E-002) differ by less than 4E-4 (8.766578696306482e-08) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.134153e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.347402e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.553684e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.856435e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.760576e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.795868e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.323684e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.305516e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.753000e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.791470e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.433602e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.491438e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.588900e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.627451e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.607695e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.615609e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 7b2d1275fc..8d56c45efe 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -4,28 +4,28 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 - make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-14_22:05:57 +DATE: 2023-10-25_19:21:50 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5485s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2136s - [COUNTERS] Fortran MEs ( 1 ) : 0.3349s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5526s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2199s + [COUNTERS] Fortran MEs ( 1 ) : 0.3328s for 8192 events => throughput is 2.46E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5447s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2109s - [COUNTERS] Fortran MEs ( 1 ) : 0.3338s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5518s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s + [COUNTERS] Fortran MEs ( 1 ) : 0.3335s for 8192 events => throughput is 2.46E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.0587s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3673s - [COUNTERS] Fortran MEs ( 1 ) : 3.6914s for 90112 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0409s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3789s + [COUNTERS] Fortran MEs ( 1 ) : 3.6620s for 90112 events => throughput is 2.46E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195720226233587E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196358763382007E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8546s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5317s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3229s for 8192 events => throughput is 2.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8757s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5434s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3323s for 8192 events => throughput is 2.46E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720226233587E-002) differ by less than 2E-4 (8.642997428864874e-09) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358763382007E-002) differ by less than 2E-4 (8.651674043846924e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310873602323142E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872835011053E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2458s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7009s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5449s for 90112 events => throughput is 2.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3915s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7212s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6703s for 90112 events => throughput is 2.46E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873602323142E-002) differ by less than 2E-4 (9.314316651298782e-09) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872835011053E-002) differ by less than 2E-4 (9.31432020401246e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.623250e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.532584e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.622638e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.521877e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195720267415450E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196358804670396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5530s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3879s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1651s for 8192 events => throughput is 4.96E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5544s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3845s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1699s for 8192 events => throughput is 4.82E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720267415450E-002) differ by less than 2E-4 (9.066697836956905e-09) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358804670396E-002) differ by less than 2E-4 (9.076467577529002e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310873604102080E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872836789727E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.4511s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5568s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8943s for 90112 events => throughput is 4.76E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.3887s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5301s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8586s for 90112 events => throughput is 4.85E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873604102080E-002) differ by less than 2E-4 (9.33619492826665e-09) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872836789727E-002) differ by less than 2E-4 (9.336195150311255e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.078320e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.952919e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.731440e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.914380e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195720049465126E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3809s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2975s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0834s for 8192 events => throughput is 9.82E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3876s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3037s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0839s for 8192 events => throughput is 9.76E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720049465126E-002) differ by less than 2E-4 (6.824311782338555e-09) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358586501358E-002) differ by less than 2E-4 (6.831845977828266e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310873476230255E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3858s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4666s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9192s for 90112 events => throughput is 9.80E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3852s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4574s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9277s for 90112 events => throughput is 9.71E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873476230255E-002) differ by less than 2E-4 (7.76356601228656e-09) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872708918333E-002) differ by less than 2E-4 (7.763571563401683e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.798755e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.889061e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.988246e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.855619e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195720049465126E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3620s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2877s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0743s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3661s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2915s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0745s for 8192 events => throughput is 1.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720049465126E-002) differ by less than 2E-4 (6.824311782338555e-09) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358586501358E-002) differ by less than 2E-4 (6.831845977828266e-09) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310873476230255E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2821s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4635s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8186s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2630s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4388s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8242s for 90112 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873476230255E-002) differ by less than 2E-4 (7.76356601228656e-09) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872708918333E-002) differ by less than 2E-4 (7.763571563401683e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.134221e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.127984e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.137832e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.119463e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195720220276491E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196358757578441E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3218s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1058s for 8192 events => throughput is 7.74E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4328s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3253s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1075s for 8192 events => throughput is 7.62E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720220276491E-002) differ by less than 2E-4 (8.581707788835047e-09) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358757578441E-002) differ by less than 2E-4 (8.591964251181139e-09) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310873571012007E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872803699391E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6686s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4970s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1715s for 90112 events => throughput is 7.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6477s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4725s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1752s for 90112 events => throughput is 7.67E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873571012007E-002) differ by less than 2E-4 (8.92923734951978e-09) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872803699391E-002) differ by less than 2E-4 (8.929234462939917e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.710860e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.668693e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.803485e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.591243e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -512,15 +512,15 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719566775987E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196358102981245E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6764s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 8192 events => throughput is 1.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7068s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7013s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.50E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719566775987E-002) differ by less than 2E-4 (1.858155407319373e-09) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358102981245E-002) differ by less than 2E-4 (1.8571728599425796e-09) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -545,58 +545,58 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872835946929E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872068634174E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8587s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8362s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0225s for 90112 events => throughput is 4.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8933s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8700s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.87E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872835946929E-002) differ by less than 2E-4 (1.1094447582848943e-10) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872068634174E-002) differ by less than 2E-4 (1.1094924978749532e-10) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.363896e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.611008e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.123801e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.220129e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.749420e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.993875e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.230614e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234445e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.744788e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000644e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.240384e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.243443e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.735434e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.965395e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.703353e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.708140e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index a5d20b4ac3..62d0e45c34 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -2,40 +2,40 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - -make USEBUILDDIR=1 AVX=512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2023-08-14_22:06:39 +DATE: 2023-10-25_19:22:32 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.4072s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2700s - [COUNTERS] Fortran MEs ( 1 ) : 4.1373s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3806s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2793s + [COUNTERS] Fortran MEs ( 1 ) : 4.1013s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.4081s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2638s - [COUNTERS] Fortran MEs ( 1 ) : 4.1443s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3901s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2745s + [COUNTERS] Fortran MEs ( 1 ) : 4.1156s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.3606s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8570s - [COUNTERS] Fortran MEs ( 1 ) : 45.5035s for 90112 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.2170s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8938s + [COUNTERS] Fortran MEs ( 1 ) : 45.3231s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.4702s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3194s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1508s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.7189s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4329s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2860s for 8192 events => throughput is 1.91E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -165,29 +165,29 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610601E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 51.9429s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8995s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.0433s for 90112 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 53.2752s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0306s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.2446s for 90112 events => throughput is 1.91E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610601E-004) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421161E-004) differ by less than 2E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.029654e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.975932e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.020693e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.969422e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.5389s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3899s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.1491s for 8192 events => throughput is 3.81E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7792s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4930s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2862s for 8192 events => throughput is 3.58E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610596E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 27.9466s - [COUNTERS] Fortran Overhead ( 0 ) : 3.9651s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.9815s for 90112 events => throughput is 3.76E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.2447s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0623s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.1823s for 90112 events => throughput is 3.58E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610596E-004) differ by less than 2E-14 (5.551115123125783e-16) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421158E-004) differ by less than 2E-14 (3.3306690738754696e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.702146e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.719602e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.717916e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.704578e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.1921s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2185s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9736s for 8192 events => throughput is 8.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.2095s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2292s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9803s for 8192 events => throughput is 8.36E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.5517s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7978s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.7540s for 90112 events => throughput is 8.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.7366s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8124s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.9242s for 90112 events => throughput is 8.25E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.581303e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.553162e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.615558e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.569833e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9838s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1152s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8686s for 8192 events => throughput is 9.43E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9852s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1172s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8679s for 8192 events => throughput is 9.44E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.2222s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6903s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5319s for 90112 events => throughput is 9.45E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.2612s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6975s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5637s for 90112 events => throughput is 9.42E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.645534e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.732365e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.653115e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.738434e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4015s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3265s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0750s for 8192 events => throughput is 7.62E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.3926s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3264s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0661s for 8192 events => throughput is 7.68E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.7712s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9299s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8413s for 90112 events => throughput is 7.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.7899s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9106s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8793s for 90112 events => throughput is 7.59E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.645947e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.707688e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.811200e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.746281e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8303s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7978s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7947s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7633s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -545,58 +545,58 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610601E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7324s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3765s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3559s for 90112 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6726s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3253s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3472s for 90112 events => throughput is 2.60E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610601E-004) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421161E-004) differ by less than 2E-14 (2.220446049250313e-16) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.249477e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.298413e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.470483e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.515584e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.113303e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.121587e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.156445e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.140818e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.104498e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.116718e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.175356e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.158341e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.109699e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.107036e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.428744e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.429475e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index c97e909f30..a11d40fa18 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -3,39 +3,39 @@ CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2023-08-14_22:10:50 +DATE: 2023-10-25_19:26:45 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.4523s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2691s - [COUNTERS] Fortran MEs ( 1 ) : 4.1832s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3808s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2757s + [COUNTERS] Fortran MEs ( 1 ) : 4.1052s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3874s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2637s - [COUNTERS] Fortran MEs ( 1 ) : 4.1238s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2719s + [COUNTERS] Fortran MEs ( 1 ) : 4.1018s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.3334s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8594s - [COUNTERS] Fortran MEs ( 1 ) : 45.4740s for 90112 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.1736s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9032s + [COUNTERS] Fortran MEs ( 1 ) : 45.2704s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396515517582E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.2448s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2068s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.0380s for 8192 events => throughput is 2.03E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.4177s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2835s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1342s for 8192 events => throughput is 1.98E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -165,29 +165,29 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774605353658E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774605164224E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 50.8901s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8012s - [COUNTERS] CudaCpp MEs ( 2 ) : 45.0889s for 90112 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 51.5557s + [COUNTERS] Fortran Overhead ( 0 ) : 5.8491s + [COUNTERS] CudaCpp MEs ( 2 ) : 45.7066s for 90112 events => throughput is 1.97E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803774605353658E-004) differ by less than 4E-4 (3.091469937599456e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774605164224E-004) differ by less than 4E-4 (3.091469938043545e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.028116e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.033185e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.096403e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.041307e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277389113409186E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.2932s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2672s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0259s for 8192 events => throughput is 7.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5057s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3763s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1294s for 8192 events => throughput is 7.25E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803771886003655E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803771885814218E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.2643s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8596s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.4048s for 90112 events => throughput is 7.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.4913s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9536s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.5377s for 90112 events => throughput is 7.19E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803771886003655E-004) differ by less than 4E-4 (2.919399753276153e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803771885814218E-004) differ by less than 4E-4 (2.9193997534981975e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.590778e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.412053e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.644068e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.406103e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390171873933E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.2437s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7480s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4957s for 8192 events => throughput is 1.65E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2465s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7541s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4925s for 8192 events => throughput is 1.66E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774410661750E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774410472313E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.7562s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3236s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4326s for 90112 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.7561s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3072s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4489s for 90112 events => throughput is 1.65E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803774410661750E-004) differ by less than 4E-4 (3.0791505700733524e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774410472313E-004) differ by less than 4E-4 (3.0791505700733524e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.703375e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.691454e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.698765e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.694442e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390171873933E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1323s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6918s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4405s for 8192 events => throughput is 1.86E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1341s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6956s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4385s for 8192 events => throughput is 1.87E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774410661750E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774410472313E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.1665s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2767s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.8897s for 90112 events => throughput is 1.84E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.1002s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2595s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.8407s for 90112 events => throughput is 1.86E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803774410661750E-004) differ by less than 4E-4 (3.0791505700733524e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774410472313E-004) differ by less than 4E-4 (3.0791505700733524e-06) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.906368e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.917142e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.916171e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.912635e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396414214383E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.3232s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7925s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5306s for 8192 events => throughput is 1.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3236s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7938s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5298s for 8192 events => throughput is 1.55E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803777740932968E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803777740743528E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 8.2464s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3859s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.8605s for 90112 events => throughput is 1.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.3515s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4437s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.9078s for 90112 events => throughput is 1.53E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803777740932968E-004) differ by less than 4E-4 (3.289877538392716e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803777740743528E-004) differ by less than 4E-4 (3.289877538392716e-06) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.551574e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.556068e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.558886e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.554806e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277400478491260E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7904s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7689s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0215s for 8192 events => throughput is 3.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7609s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7395s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.84E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -545,58 +545,58 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803779990344337E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803779990154892E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.5853s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3485s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2368s for 90112 events => throughput is 3.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5388s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3037s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2351s for 90112 events => throughput is 3.83E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803779990344337E-004) differ by less than 4E-4 (3.432211783227501e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803779990154892E-004) differ by less than 4E-4 (3.4322117830054566e-06) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.491279e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.602401e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.823364e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.943641e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.481420e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.505586e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.623321e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.637854e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.487885e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.505362e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.621916e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.630323e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.465738e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.491202e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.512206e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.522012e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index a16dc1bdcc..344f040590 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -3,27 +3,27 @@ CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 - make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-14_22:14:08 +DATE: 2023-10-25_19:30:04 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3923s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2687s - [COUNTERS] Fortran MEs ( 1 ) : 4.1236s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2765s + [COUNTERS] Fortran MEs ( 1 ) : 4.1104s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2648s - [COUNTERS] Fortran MEs ( 1 ) : 4.1218s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3884s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2753s + [COUNTERS] Fortran MEs ( 1 ) : 4.1132s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.4337s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8789s - [COUNTERS] Fortran MEs ( 1 ) : 45.5547s for 90112 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.2155s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9013s + [COUNTERS] Fortran MEs ( 1 ) : 45.3142s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277432965013E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.6146s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3733s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2414s for 8192 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.8199s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4948s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3251s for 8192 events => throughput is 1.89E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -165,29 +165,29 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725813215552E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725813026109E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 53.3443s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0267s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.3176s for 90112 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 53.9672s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0490s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.9182s for 90112 events => throughput is 1.88E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725813215552E-004) differ by less than 2E-4 (4.087956861908992e-09) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725813026109E-004) differ by less than 2E-4 (4.087956639864387e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.995920e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.959802e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.995197e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.958905e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277430934464E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.6948s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4569s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2380s for 8192 events => throughput is 3.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7279s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4759s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2519s for 8192 events => throughput is 3.64E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725816435760E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725816246317E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 28.9372s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0446s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.8926s for 90112 events => throughput is 3.62E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.0177s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0402s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.9775s for 90112 events => throughput is 3.61E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725816435760E-004) differ by less than 2E-4 (4.291719424287521e-09) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725816246317E-004) differ by less than 2E-4 (4.291719202242916e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.742445e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.730381e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.708779e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.728042e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.2106s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2271s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9835s for 8192 events => throughput is 8.33E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1830s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2180s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9649s for 8192 events => throughput is 8.49E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725810958764E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.7290s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8212s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.9078s for 90112 events => throughput is 8.26E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.5258s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8006s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.7251s for 90112 events => throughput is 8.40E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725810958764E-004) differ by less than 2E-4 (3.945155757634211e-09) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725810769321E-004) differ by less than 2E-4 (3.945155535589606e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.608179e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.577284e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.601501e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.645443e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9659s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1018s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8640s for 8192 events => throughput is 9.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9705s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1082s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8624s for 8192 events => throughput is 9.50E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725810958764E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.2242s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6903s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5339s for 90112 events => throughput is 9.45E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.1916s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6788s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5128s for 90112 events => throughput is 9.47E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725810958764E-004) differ by less than 2E-4 (3.945155757634211e-09) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725810769321E-004) differ by less than 2E-4 (3.945155535589606e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.811592e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.762571e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.738069e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.791935e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4290s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3406s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0884s for 8192 events => throughput is 7.53E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4166s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3396s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0770s for 8192 events => throughput is 7.61E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725810958764E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.9340s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9439s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9901s for 90112 events => throughput is 7.52E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.9279s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9270s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.0009s for 90112 events => throughput is 7.51E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725810958764E-004) differ by less than 2E-4 (3.945155757634211e-09) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725810769321E-004) differ by less than 2E-4 (3.945155535589606e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.467301e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.723112e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.309649e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.593326e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277293084707E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8266s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7942s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7973s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7658s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -545,58 +545,58 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725738920476E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725738731039E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7235s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3711s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3524s for 90112 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6608s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3213s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3395s for 90112 events => throughput is 2.65E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725738920476E-004) differ by less than 2E-4 (6.131546381737962e-10) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725738731039E-004) differ by less than 2E-4 (6.131544161291913e-10) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.246790e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.295568e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.470112e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.529340e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.121674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.107713e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.164710e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.153742e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.113815e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.126841e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.178399e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176580e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.123159e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.122237e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.448203e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.438612e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index dde22269fd..90411e1b5b 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,27 +1,27 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-14_22:18:42 +DATE: 2023-10-25_19:34:37 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 98.3097s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4392s - [COUNTERS] Fortran MEs ( 1 ) : 97.8704s for 8192 events => throughput is 8.37E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.8592s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4672s + [COUNTERS] Fortran MEs ( 1 ) : 96.3920s for 8192 events => throughput is 8.50E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 98.2929s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4361s - [COUNTERS] Fortran MEs ( 1 ) : 97.8568s for 8192 events => throughput is 8.37E+01 events/s + [COUNTERS] PROGRAM TOTAL : 97.0100s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4404s + [COUNTERS] Fortran MEs ( 1 ) : 96.5697s for 8192 events => throughput is 8.48E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1078.6346s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0665s - [COUNTERS] Fortran MEs ( 1 ) : 1074.5681s for 90112 events => throughput is 8.39E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1062.8511s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0497s + [COUNTERS] Fortran MEs ( 1 ) : 1058.8014s for 90112 events => throughput is 8.51E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435831E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 213.2407s - [COUNTERS] Fortran Overhead ( 0 ) : 97.1161s - [COUNTERS] CudaCpp MEs ( 2 ) : 116.1246s for 8192 events => throughput is 7.05E+01 events/s + [COUNTERS] PROGRAM TOTAL : 221.3790s + [COUNTERS] Fortran Overhead ( 0 ) : 101.7058s + [COUNTERS] CudaCpp MEs ( 2 ) : 119.6732s for 8192 events => throughput is 6.85E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813953E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1382.5585s - [COUNTERS] Fortran Overhead ( 0 ) : 99.9723s - [COUNTERS] CudaCpp MEs ( 2 ) : 1282.5862s for 90112 events => throughput is 7.03E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1450.0547s + [COUNTERS] Fortran Overhead ( 0 ) : 106.7786s + [COUNTERS] CudaCpp MEs ( 2 ) : 1343.2761s for 90112 events => throughput is 6.71E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +180,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813953E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.434815e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.310154e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.372917e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.288200e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435827E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 109.0524s - [COUNTERS] Fortran Overhead ( 0 ) : 50.3601s - [COUNTERS] CudaCpp MEs ( 2 ) : 58.6923s for 8192 events => throughput is 1.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 107.4772s + [COUNTERS] Fortran Overhead ( 0 ) : 49.8239s + [COUNTERS] CudaCpp MEs ( 2 ) : 57.6534s for 8192 events => throughput is 1.42E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 702.1622s - [COUNTERS] Fortran Overhead ( 0 ) : 53.8812s - [COUNTERS] CudaCpp MEs ( 2 ) : 648.2810s for 90112 events => throughput is 1.39E+02 events/s + [COUNTERS] PROGRAM TOTAL : 689.8975s + [COUNTERS] Fortran Overhead ( 0 ) : 53.4603s + [COUNTERS] CudaCpp MEs ( 2 ) : 636.4372s for 90112 events => throughput is 1.42E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.635222e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.666029e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.634341e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.661964e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 51.3602s - [COUNTERS] Fortran Overhead ( 0 ) : 23.4381s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.9221s for 8192 events => throughput is 2.93E+02 events/s + [COUNTERS] PROGRAM TOTAL : 51.5576s + [COUNTERS] Fortran Overhead ( 0 ) : 23.6406s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.9170s for 8192 events => throughput is 2.93E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 334.2973s - [COUNTERS] Fortran Overhead ( 0 ) : 27.0940s - [COUNTERS] CudaCpp MEs ( 2 ) : 307.2033s for 90112 events => throughput is 2.93E+02 events/s + [COUNTERS] PROGRAM TOTAL : 332.8698s + [COUNTERS] Fortran Overhead ( 0 ) : 27.2762s + [COUNTERS] CudaCpp MEs ( 2 ) : 305.5936s for 90112 events => throughput is 2.95E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.562217e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.564287e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.551709e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.571126e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 46.3422s - [COUNTERS] Fortran Overhead ( 0 ) : 21.0085s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.3337s for 8192 events => throughput is 3.23E+02 events/s + [COUNTERS] PROGRAM TOTAL : 45.7634s + [COUNTERS] Fortran Overhead ( 0 ) : 20.8536s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.9098s for 8192 events => throughput is 3.29E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 302.3362s - [COUNTERS] Fortran Overhead ( 0 ) : 24.5306s - [COUNTERS] CudaCpp MEs ( 2 ) : 277.8056s for 90112 events => throughput is 3.24E+02 events/s + [COUNTERS] PROGRAM TOTAL : 298.7296s + [COUNTERS] Fortran Overhead ( 0 ) : 24.5229s + [COUNTERS] CudaCpp MEs ( 2 ) : 274.2068s for 90112 events => throughput is 3.29E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.014963e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.037302e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.998376e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.011224e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 46.6138s - [COUNTERS] Fortran Overhead ( 0 ) : 22.6844s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.9295s for 8192 events => throughput is 3.42E+02 events/s + [COUNTERS] PROGRAM TOTAL : 46.0178s + [COUNTERS] Fortran Overhead ( 0 ) : 22.2114s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.8064s for 8192 events => throughput is 3.44E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 291.2529s - [COUNTERS] Fortran Overhead ( 0 ) : 26.1362s - [COUNTERS] CudaCpp MEs ( 2 ) : 265.1167s for 90112 events => throughput is 3.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 285.3431s + [COUNTERS] Fortran Overhead ( 0 ) : 25.8685s + [COUNTERS] CudaCpp MEs ( 2 ) : 259.4745s for 90112 events => throughput is 3.47E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.726492e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.723231e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.706362e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765137e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435838E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 4.2266s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1468s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0798s for 8192 events => throughput is 7.59E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.1896s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1063s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0833s for 8192 events => throughput is 7.56E+03 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 18.5987s - [COUNTERS] Fortran Overhead ( 0 ) : 6.6991s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8997s for 90112 events => throughput is 7.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 18.5925s + [COUNTERS] Fortran Overhead ( 0 ) : 6.7176s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8749s for 90112 events => throughput is 7.59E+03 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.498934e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.538676e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.242434e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.266826e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.213771e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.276191e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.550067e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.572409e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.212061e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.231100e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.428013e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.464972e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.218853e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.233366e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.251243e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.245300e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index a3f5901507..93e9694d2a 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -3,21 +3,21 @@ CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -27,15 +27,15 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2023-08-14_23:46:10 +DATE: 2023-10-25_21:02:37 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 98.1174s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4596s - [COUNTERS] Fortran MEs ( 1 ) : 97.6578s for 8192 events => throughput is 8.39E+01 events/s + [COUNTERS] PROGRAM TOTAL : 97.0230s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4439s + [COUNTERS] Fortran MEs ( 1 ) : 96.5791s for 8192 events => throughput is 8.48E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 98.2644s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4386s - [COUNTERS] Fortran MEs ( 1 ) : 97.8258s for 8192 events => throughput is 8.37E+01 events/s + [COUNTERS] PROGRAM TOTAL : 98.3689s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4421s + [COUNTERS] Fortran MEs ( 1 ) : 97.9268s for 8192 events => throughput is 8.37E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1081.8654s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0206s - [COUNTERS] Fortran MEs ( 1 ) : 1077.8447s for 90112 events => throughput is 8.36E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1065.5287s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0935s + [COUNTERS] Fortran MEs ( 1 ) : 1061.4352s for 90112 events => throughput is 8.49E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1694768395608941E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.169e-06 [1.1694768395202781E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 200.3484s - [COUNTERS] Fortran Overhead ( 0 ) : 91.1850s - [COUNTERS] CudaCpp MEs ( 2 ) : 109.1634s for 8192 events => throughput is 7.50E+01 events/s + [COUNTERS] PROGRAM TOTAL : 198.8176s + [COUNTERS] Fortran Overhead ( 0 ) : 92.5000s + [COUNTERS] CudaCpp MEs ( 2 ) : 106.3176s for 8192 events => throughput is 7.71E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694768395608941E-006) differ by less than 4E-4 (0.0001426011954326345) +OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694768395202781E-006) differ by less than 4E-4 (0.00014260116069753082) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1361436148187123E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.136e-07 [2.1361436140448921E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1298.3529s - [COUNTERS] Fortran Overhead ( 0 ) : 95.3324s - [COUNTERS] CudaCpp MEs ( 2 ) : 1203.0205s for 90112 events => throughput is 7.49E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1266.1327s + [COUNTERS] Fortran Overhead ( 0 ) : 95.7086s + [COUNTERS] CudaCpp MEs ( 2 ) : 1170.4241s for 90112 events => throughput is 7.70E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361436148187123E-007) differ by less than 4E-4 (0.00014045922420713453) +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361436140448921E-007) differ by less than 4E-4 (0.00014045886190539036) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.846416e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.002269e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.911135e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.967415e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694765850076731E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 49.1896s - [COUNTERS] Fortran Overhead ( 0 ) : 23.0434s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.1463s for 8192 events => throughput is 3.13E+02 events/s + [COUNTERS] PROGRAM TOTAL : 49.7619s + [COUNTERS] Fortran Overhead ( 0 ) : 23.4310s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3309s for 8192 events => throughput is 3.11E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430662723898E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 313.6106s - [COUNTERS] Fortran Overhead ( 0 ) : 26.5526s - [COUNTERS] CudaCpp MEs ( 2 ) : 287.0580s for 90112 events => throughput is 3.14E+02 events/s + [COUNTERS] PROGRAM TOTAL : 316.9053s + [COUNTERS] Fortran Overhead ( 0 ) : 27.1348s + [COUNTERS] CudaCpp MEs ( 2 ) : 289.7704s for 90112 events => throughput is 3.11E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430662723898E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.646835e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.586738e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.642303e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.580652e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694764962310603E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 25.9175s - [COUNTERS] Fortran Overhead ( 0 ) : 12.0650s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.8525s for 8192 events => throughput is 5.91E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.7462s + [COUNTERS] Fortran Overhead ( 0 ) : 11.9729s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.7733s for 8192 events => throughput is 5.95E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430432807771E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 168.3439s - [COUNTERS] Fortran Overhead ( 0 ) : 15.7176s - [COUNTERS] CudaCpp MEs ( 2 ) : 152.6263s for 90112 events => throughput is 5.90E+02 events/s + [COUNTERS] PROGRAM TOTAL : 168.1713s + [COUNTERS] Fortran Overhead ( 0 ) : 15.4495s + [COUNTERS] CudaCpp MEs ( 2 ) : 152.7218s for 90112 events => throughput is 5.90E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430432807771E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.065846e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.197172e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.036228e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.226176e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694764962310603E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 23.3924s - [COUNTERS] Fortran Overhead ( 0 ) : 10.7536s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.6388s for 8192 events => throughput is 6.48E+02 events/s + [COUNTERS] PROGRAM TOTAL : 22.7901s + [COUNTERS] Fortran Overhead ( 0 ) : 10.6491s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.1410s for 8192 events => throughput is 6.75E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430432807771E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 156.0365s - [COUNTERS] Fortran Overhead ( 0 ) : 14.2552s - [COUNTERS] CudaCpp MEs ( 2 ) : 141.7813s for 90112 events => throughput is 6.36E+02 events/s + [COUNTERS] PROGRAM TOTAL : 147.9220s + [COUNTERS] Fortran Overhead ( 0 ) : 14.3147s + [COUNTERS] CudaCpp MEs ( 2 ) : 133.6073s for 90112 events => throughput is 6.74E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430432807771E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.029563e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.019572e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.048196e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.643340e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694767969588676E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 23.1993s - [COUNTERS] Fortran Overhead ( 0 ) : 11.3204s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8789s for 8192 events => throughput is 6.90E+02 events/s + [COUNTERS] PROGRAM TOTAL : 23.2769s + [COUNTERS] Fortran Overhead ( 0 ) : 11.3993s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8776s for 8192 events => throughput is 6.90E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361435931847224E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 146.0712s - [COUNTERS] Fortran Overhead ( 0 ) : 14.9185s - [COUNTERS] CudaCpp MEs ( 2 ) : 131.1527s for 90112 events => throughput is 6.87E+02 events/s + [COUNTERS] PROGRAM TOTAL : 144.2454s + [COUNTERS] Fortran Overhead ( 0 ) : 15.1090s + [COUNTERS] CudaCpp MEs ( 2 ) : 129.1364s for 90112 events => throughput is 6.98E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361435931847224E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.491717e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.574328e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.590335e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.606464e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694770708195000E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 2.4743s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9732s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5011s for 8192 events => throughput is 1.63E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4561s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9552s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5008s for 8192 events => throughput is 1.64E+04 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361443477565659E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 10.9634s - [COUNTERS] Fortran Overhead ( 0 ) : 5.5332s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4302s for 90112 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 10.9874s + [COUNTERS] Fortran Overhead ( 0 ) : 5.5585s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4289s for 90112 events => throughput is 1.66E+04 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361443477565659E-007 OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.632358e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.636533e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.617727e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.620348e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.305070e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.363235e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.369690e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.405136e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.293527e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.330068e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.287585e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.392338e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.303185e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.319690e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.449332e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.432306e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index f09db3bdf5..fd7d860c5e 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,31 +1,31 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-15_00:53:08 +DATE: 2023-10-25_22:08:36 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 98.0858s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4390s - [COUNTERS] Fortran MEs ( 1 ) : 97.6468s for 8192 events => throughput is 8.39E+01 events/s + [COUNTERS] PROGRAM TOTAL : 97.1556s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4422s + [COUNTERS] Fortran MEs ( 1 ) : 96.7134s for 8192 events => throughput is 8.47E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 98.4138s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4365s - [COUNTERS] Fortran MEs ( 1 ) : 97.9773s for 8192 events => throughput is 8.36E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.9689s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4446s + [COUNTERS] Fortran MEs ( 1 ) : 96.5243s for 8192 events => throughput is 8.49E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1080.0781s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0355s - [COUNTERS] Fortran MEs ( 1 ) : 1076.0426s for 90112 events => throughput is 8.37E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1064.4592s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0566s + [COUNTERS] Fortran MEs ( 1 ) : 1060.4026s for 90112 events => throughput is 8.50E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101016896846E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 221.5781s - [COUNTERS] Fortran Overhead ( 0 ) : 101.1161s - [COUNTERS] CudaCpp MEs ( 2 ) : 120.4620s for 8192 events => throughput is 6.80E+01 events/s + [COUNTERS] PROGRAM TOTAL : 212.5728s + [COUNTERS] Fortran Overhead ( 0 ) : 98.0572s + [COUNTERS] CudaCpp MEs ( 2 ) : 114.5156s for 8192 events => throughput is 7.15E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436275882778E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1388.6113s - [COUNTERS] Fortran Overhead ( 0 ) : 104.3566s - [COUNTERS] CudaCpp MEs ( 2 ) : 1284.2548s for 90112 events => throughput is 7.02E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1418.4218s + [COUNTERS] Fortran Overhead ( 0 ) : 103.9288s + [COUNTERS] CudaCpp MEs ( 2 ) : 1314.4929s for 90112 events => throughput is 6.86E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +180,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436275882778E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.131347e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.001258e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.034510e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.939164e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101020910778E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 111.7729s - [COUNTERS] Fortran Overhead ( 0 ) : 51.0800s - [COUNTERS] CudaCpp MEs ( 2 ) : 60.6929s for 8192 events => throughput is 1.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 111.7906s + [COUNTERS] Fortran Overhead ( 0 ) : 51.2589s + [COUNTERS] CudaCpp MEs ( 2 ) : 60.5317s for 8192 events => throughput is 1.35E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436284111598E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 721.2879s - [COUNTERS] Fortran Overhead ( 0 ) : 54.6646s - [COUNTERS] CudaCpp MEs ( 2 ) : 666.6234s for 90112 events => throughput is 1.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 721.3517s + [COUNTERS] Fortran Overhead ( 0 ) : 54.9246s + [COUNTERS] CudaCpp MEs ( 2 ) : 666.4271s for 90112 events => throughput is 1.35E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436284111598E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.616275e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.612539e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.615168e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.614353e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 50.4058s - [COUNTERS] Fortran Overhead ( 0 ) : 23.1896s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.2162s for 8192 events => throughput is 3.01E+02 events/s + [COUNTERS] PROGRAM TOTAL : 49.1637s + [COUNTERS] Fortran Overhead ( 0 ) : 22.7432s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.4205s for 8192 events => throughput is 3.10E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 325.5201s - [COUNTERS] Fortran Overhead ( 0 ) : 26.0840s - [COUNTERS] CudaCpp MEs ( 2 ) : 299.4361s for 90112 events => throughput is 3.01E+02 events/s + [COUNTERS] PROGRAM TOTAL : 315.7719s + [COUNTERS] Fortran Overhead ( 0 ) : 26.2544s + [COUNTERS] CudaCpp MEs ( 2 ) : 289.5175s for 90112 events => throughput is 3.11E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.720314e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.644706e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.681792e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.685868e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 43.8843s - [COUNTERS] Fortran Overhead ( 0 ) : 19.9763s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.9080s for 8192 events => throughput is 3.43E+02 events/s + [COUNTERS] PROGRAM TOTAL : 44.6193s + [COUNTERS] Fortran Overhead ( 0 ) : 20.5510s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.0684s for 8192 events => throughput is 3.40E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 286.5397s - [COUNTERS] Fortran Overhead ( 0 ) : 23.4338s - [COUNTERS] CudaCpp MEs ( 2 ) : 263.1059s for 90112 events => throughput is 3.42E+02 events/s + [COUNTERS] PROGRAM TOTAL : 281.8182s + [COUNTERS] Fortran Overhead ( 0 ) : 23.6088s + [COUNTERS] CudaCpp MEs ( 2 ) : 258.2094s for 90112 events => throughput is 3.49E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.222074e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.253034e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.190205e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.210071e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 45.3746s - [COUNTERS] Fortran Overhead ( 0 ) : 21.8578s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.5169s for 8192 events => throughput is 3.48E+02 events/s + [COUNTERS] PROGRAM TOTAL : 45.1323s + [COUNTERS] Fortran Overhead ( 0 ) : 21.8275s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.3047s for 8192 events => throughput is 3.52E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 285.2207s - [COUNTERS] Fortran Overhead ( 0 ) : 25.6100s - [COUNTERS] CudaCpp MEs ( 2 ) : 259.6107s for 90112 events => throughput is 3.47E+02 events/s + [COUNTERS] PROGRAM TOTAL : 283.0785s + [COUNTERS] Fortran Overhead ( 0 ) : 25.5465s + [COUNTERS] CudaCpp MEs ( 2 ) : 257.5320s for 90112 events => throughput is 3.50E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.816775e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.872786e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.799139e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.828458e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100942770687E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 3.5516s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6904s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8612s for 8192 events => throughput is 9.51E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.5931s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7300s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8631s for 8192 events => throughput is 9.49E+03 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436157495368E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 15.7470s - [COUNTERS] Fortran Overhead ( 0 ) : 6.2391s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5079s for 90112 events => throughput is 9.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.7895s + [COUNTERS] Fortran Overhead ( 0 ) : 6.2917s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4978s for 90112 events => throughput is 9.49E+03 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436157495368E-007 OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.396159e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.449720e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.073817e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.084973e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.109660e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.109916e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159281e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.161304e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108050e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111586e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.113679e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.116031e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110694e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106540e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.2.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.646688e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.648699e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 8f0cfc72db..126d0b9ddb 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -4,38 +4,38 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2023-08-14_22:18:22 +DATE: 2023-10-25_19:34:18 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3049s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2273s - [COUNTERS] Fortran MEs ( 1 ) : 0.0775s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3162s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2400s + [COUNTERS] Fortran MEs ( 1 ) : 0.0762s for 8192 events => throughput is 1.07E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3005s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2222s - [COUNTERS] Fortran MEs ( 1 ) : 0.0783s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3078s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2307s + [COUNTERS] Fortran MEs ( 1 ) : 0.0772s for 8192 events => throughput is 1.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2310s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3829s - [COUNTERS] Fortran MEs ( 1 ) : 0.8481s for 90112 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2688s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4248s + [COUNTERS] Fortran MEs ( 1 ) : 0.8440s for 90112 events => throughput is 1.07E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.276 [1.2757941949814184] fbridge_mode=1 [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3741s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3046s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3877s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3161s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 6faeadd1e3..97d8938e38 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -5,27 +5,27 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' - -make USEBUILDDIR=1 AVX=512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' - -make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-14_22:18:29 +DATE: 2023-10-25_19:34:24 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3045s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2259s - [COUNTERS] Fortran MEs ( 1 ) : 0.0786s for 8192 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2355s + [COUNTERS] Fortran MEs ( 1 ) : 0.0772s for 8192 events => throughput is 1.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2981s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2204s - [COUNTERS] Fortran MEs ( 1 ) : 0.0777s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3089s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2315s + [COUNTERS] Fortran MEs ( 1 ) : 0.0773s for 8192 events => throughput is 1.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2653s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3866s - [COUNTERS] Fortran MEs ( 1 ) : 0.8787s for 90112 events => throughput is 1.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2899s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4433s + [COUNTERS] Fortran MEs ( 1 ) : 0.8467s for 90112 events => throughput is 1.06E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.276 [1.2757939713258191] fbridge_mode=1 [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3704s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3032s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0672s for 8192 events => throughput is 1.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3818s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3130s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0689s for 8192 events => throughput is 1.19E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 6d558f348a..ae1cc6d1c5 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,31 +1,31 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-08-14_22:18:35 +DATE: 2023-10-25_19:34:30 -On itscrd80.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3034s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2251s - [COUNTERS] Fortran MEs ( 1 ) : 0.0784s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3113s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2340s + [COUNTERS] Fortran MEs ( 1 ) : 0.0773s for 8192 events => throughput is 1.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2996s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2220s - [COUNTERS] Fortran MEs ( 1 ) : 0.0777s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3077s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2312s + [COUNTERS] Fortran MEs ( 1 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2282s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3831s - [COUNTERS] Fortran MEs ( 1 ) : 0.8450s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2619s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4224s + [COUNTERS] Fortran MEs ( 1 ) : 0.8395s for 90112 events => throughput is 1.07E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.276 [1.2757941960880730] fbridge_mode=1 [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3757s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3061s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0696s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3925s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3207s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0718s for 8192 events => throughput is 1.14E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** From ed1cd751af10b5f4e03e189f82f95ed051821b22 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 11:08:50 +0200 Subject: [PATCH 003/119] [oct23av] (TEMPORARY TESTS ON PLATINUM) rerun 78 tput alltees, all ok but no GPU (olgpu-03 Platinum8362 el8 including downfall mitigation) NB: 512z mode is faster than 512y on this Platinum node (2 FMA units) STARTED AT Wed Oct 25 18:03:19 CEST 2023 ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean ENDED(1) AT Wed Oct 25 18:26:16 CEST 2023 [Status=0] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Wed Oct 25 18:36:50 CEST 2023 [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Wed Oct 25 18:45:05 CEST 2023 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Wed Oct 25 18:46:35 CEST 2023 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ENDED(5) AT Wed Oct 25 18:46:58 CEST 2023 [Status=0] [avalassi@olgpu-03 gcc11.2/cvmfs] /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp> cat /etc/redhat-release Red Hat Enterprise Linux release 8.8 (Ootpa) [avalassi@olgpu-03 gcc11.2/cvmfs] /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp> grep 'stepping\|model\|microcode' /proc/cpuinfo | sort -u microcode : 0xd0003a5 model : 106 model name : Intel(R) Xeon(R) Platinum 8362 CPU @ 2.80GHz stepping : 6 --- .../log_eemumu_mad_d_inl0_hrd0.txt | 147 ++++++------ .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 155 ++++++------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 127 +++++------ .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 122 +++------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 149 ++++++------- .../log_eemumu_mad_d_inl0_hrd1.txt | 147 ++++++------ .../log_eemumu_mad_d_inl1_hrd0.txt | 147 ++++++------ .../log_eemumu_mad_d_inl1_hrd1.txt | 147 ++++++------ .../log_eemumu_mad_f_inl0_hrd0.txt | 151 ++++++------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 210 +++++++----------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 127 +++++------ .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 122 +++------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 153 ++++++------- .../log_eemumu_mad_f_inl0_hrd1.txt | 151 ++++++------- .../log_eemumu_mad_f_inl1_hrd0.txt | 151 ++++++------- .../log_eemumu_mad_f_inl1_hrd1.txt | 151 ++++++------- .../log_eemumu_mad_m_inl0_hrd0.txt | 147 ++++++------ .../log_eemumu_mad_m_inl0_hrd1.txt | 147 ++++++------ .../log_ggtt_mad_d_inl0_hrd0.txt | 149 ++++++------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 157 ++++++------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 129 +++++------ .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 124 +++-------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 151 ++++++------- .../log_ggtt_mad_d_inl0_hrd1.txt | 147 ++++++------ .../log_ggtt_mad_d_inl1_hrd0.txt | 149 ++++++------- .../log_ggtt_mad_d_inl1_hrd1.txt | 151 ++++++------- .../log_ggtt_mad_f_inl0_hrd0.txt | 149 ++++++------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 157 ++++++------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 129 +++++------ .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 124 +++-------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 151 ++++++------- .../log_ggtt_mad_f_inl0_hrd1.txt | 147 ++++++------ .../log_ggtt_mad_f_inl1_hrd0.txt | 147 ++++++------ .../log_ggtt_mad_f_inl1_hrd1.txt | 151 ++++++------- .../log_ggtt_mad_m_inl0_hrd0.txt | 149 ++++++------- .../log_ggtt_mad_m_inl0_hrd1.txt | 151 ++++++------- .../log_ggttg_mad_d_inl0_hrd0.txt | 162 ++++++-------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 174 ++++++--------- .../log_ggttg_mad_d_inl0_hrd1.txt | 164 ++++++-------- .../log_ggttg_mad_f_inl0_hrd0.txt | 164 ++++++-------- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 176 ++++++--------- .../log_ggttg_mad_f_inl0_hrd1.txt | 164 ++++++-------- .../log_ggttg_mad_m_inl0_hrd0.txt | 168 ++++++-------- .../log_ggttg_mad_m_inl0_hrd1.txt | 168 ++++++-------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 164 ++++++-------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 176 ++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 144 +++++------- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 139 +++--------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 167 ++++++-------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 170 ++++++-------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 166 ++++++-------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 168 ++++++-------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 164 ++++++-------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 176 ++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 144 +++++------- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 139 +++--------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 167 ++++++-------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 170 ++++++-------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 162 ++++++-------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 162 ++++++-------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 168 ++++++-------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 170 ++++++-------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 164 ++++++-------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 176 ++++++--------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 166 ++++++-------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 168 ++++++-------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 180 ++++++--------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 172 ++++++-------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 164 ++++++-------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 164 ++++++-------- .../log_gqttq_mad_d_inl0_hrd0.txt | 162 ++++++-------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 174 ++++++--------- .../log_gqttq_mad_d_inl0_hrd1.txt | 160 ++++++------- .../log_gqttq_mad_f_inl0_hrd0.txt | 162 ++++++-------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 174 ++++++--------- .../log_gqttq_mad_f_inl0_hrd1.txt | 162 ++++++-------- .../log_gqttq_mad_m_inl0_hrd0.txt | 164 ++++++-------- .../log_gqttq_mad_m_inl0_hrd1.txt | 162 ++++++-------- epochX/cudacpp/tput/throughputX.sh | 2 +- 79 files changed, 4794 insertions(+), 7401 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index fb3e759147..150846880a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:31:49 +DATE: 2023-10-25_18:17:58 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.992610e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.677560e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.800497e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.651038 sec - 2,636,526,277 cycles # 3.033 GHz - 4,084,504,000 instructions # 1.55 insn per cycle - 0.937514788 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282804e-02 -Avg ME (F77/CUDA) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.223824e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.458038e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.458038e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.535744 sec - 17,174,922,423 cycles # 3.101 GHz - 40,422,775,862 instructions # 2.35 insn per cycle - 5.540757574 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.465359e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.778769e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.778769e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.930496 sec + 16,959,876,668 cycles:u # 3.438 GHz + 41,097,155,657 instructions:u # 2.42 insn per cycle + 4.933925999 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.137087e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.061371e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.061371e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.338481 sec - 10,270,021,527 cycles # 3.072 GHz - 24,681,672,230 instructions # 2.40 insn per cycle - 3.343524574 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.710298e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.142425e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.142425e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.938305 sec + 10,014,794,390 cycles:u # 3.406 GHz + 25,264,551,946 instructions:u # 2.52 insn per cycle + 2.941362103 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.319309e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.049977e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.049977e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.283025 sec - 6,897,531,665 cycles # 3.016 GHz - 13,676,914,709 instructions # 1.98 insn per cycle - 2.287967204 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.041303e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.391151e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.391151e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.141766 sec + 7,165,504,049 cycles:u # 3.342 GHz + 14,162,644,516 instructions:u # 1.98 insn per cycle + 2.144827480 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.461379e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.455419e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.455419e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.201231 sec - 6,644,736,732 cycles # 3.013 GHz - 13,369,268,411 instructions # 2.01 insn per cycle - 2.206080825 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.084460e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.523028e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.523028e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.139335 sec + 7,155,441,053 cycles:u # 3.341 GHz + 13,668,764,880 instructions:u # 1.91 insn per cycle + 2.142319320 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.225436e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.708340e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.708340e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.342191 sec - 5,886,532,421 cycles # 2.509 GHz - 10,160,262,547 instructions # 1.73 insn per cycle - 2.347341313 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.529414e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.028573e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.028573e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.993617 sec + 6,291,828,835 cycles:u # 3.152 GHz + 10,451,434,542 instructions:u # 1.66 insn per cycle + 1.996693269 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index ff2ab6ab12..e8608b0c4d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -36,60 +36,31 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:02:17 +DATE: 2023-10-25_18:42:23 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.965333e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.255447e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.255447e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.135108 sec - 7,250,256,577 cycles # 3.055 GHz - 13,026,966,701 instructions # 1.80 insn per cycle - 2.429348584 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282804e-02 -Avg ME (F77/CUDA) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.180043e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.393531e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.393531e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.930110 sec - 18,404,007,216 cycles # 3.102 GHz - 40,649,787,986 instructions # 2.21 insn per cycle - 5.936245491 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.435235e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.733702e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.733702e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.124024 sec + 17,562,389,414 cycles:u # 3.425 GHz + 41,393,540,254 instructions:u # 2.36 insn per cycle + 5.128507187 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -100,23 +71,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001560e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.785736e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.785736e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.743997 sec - 11,597,299,620 cycles # 3.094 GHz - 25,525,941,371 instructions # 2.20 insn per cycle - 3.750076018 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.532861e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.728508e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.728508e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.292565 sec + 11,175,097,657 cycles:u # 3.390 GHz + 26,098,331,509 instructions:u # 2.34 insn per cycle + 3.296593444 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -127,23 +98,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.959223e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.997724e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.997724e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.737253 sec - 8,251,537,282 cycles # 3.009 GHz - 15,038,208,979 instructions # 1.82 insn per cycle - 2.743376591 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.626941e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.863862e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.863862e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.513698 sec + 8,381,768,733 cycles:u # 3.330 GHz + 15,514,162,728 instructions:u # 1.85 insn per cycle + 2.517729903 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -154,23 +125,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.076188e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.294642e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.294642e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.656399 sec - 7,925,615,204 cycles # 2.978 GHz - 14,731,067,513 instructions # 1.86 insn per cycle - 2.662615813 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.657205e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.957560e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.957560e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.521176 sec + 8,397,382,077 cycles:u # 3.326 GHz + 15,020,280,229 instructions:u # 1.79 insn per cycle + 2.525268729 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +152,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.899896e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.732839e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.732839e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.778907 sec - 7,285,067,282 cycles # 2.617 GHz - 11,305,402,811 instructions # 1.55 insn per cycle - 2.785056032 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.966511e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.028469e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.028469e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.389409 sec + 7,589,911,715 cycles:u # 3.172 GHz + 11,588,319,674 instructions:u # 1.53 insn per cycle + 2.393461319 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index ee209006c3..8ff0f3eecf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:14:42 +DATE: 2023-10-25_18:47:03 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.734802e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.548595e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.695477e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.288103 sec - 4,585,947,033 cycles # 3.031 GHz - 7,017,628,406 instructions # 1.53 insn per cycle - 1.570944429 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282804e-02 -Avg ME (F77/CUDA) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.218596e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.449391e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.449391e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.468978e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.783500e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.783500e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.906919 sec - 18,244,110,475 cycles # 3.087 GHz - 40,525,216,964 instructions # 2.22 insn per cycle - 5.912008529 seconds time elapsed +TOTAL : 4.920383 sec + 16,926,525,144 cycles:u # 3.438 GHz + 41,097,159,310 instructions:u # 2.43 insn per cycle + 4.923595989 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.094791e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.999575e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.999575e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.715228e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.143322e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.143322e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.757966 sec - 11,362,621,772 cycles # 3.020 GHz - 24,684,545,006 instructions # 2.17 insn per cycle - 3.762920389 seconds time elapsed +TOTAL : 2.931671 sec + 9,999,246,021 cycles:u # 3.408 GHz + 25,264,551,636 instructions:u # 2.53 insn per cycle + 2.934668341 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.221039e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.851480e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.851480e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.027283e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.399098e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.399098e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.709730 sec - 7,962,642,894 cycles # 2.935 GHz - 13,579,072,475 instructions # 1.71 insn per cycle - 2.714759310 seconds time elapsed +TOTAL : 2.150824 sec + 7,191,967,549 cycles:u # 3.340 GHz + 14,162,645,321 instructions:u # 1.97 insn per cycle + 2.153873675 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.422533e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.356319e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.356319e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.105423e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.575412e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.575412e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.582578 sec - 7,748,421,467 cycles # 2.996 GHz - 13,080,984,196 instructions # 1.69 insn per cycle - 2.587647023 seconds time elapsed +TOTAL : 2.132134 sec + 7,128,839,196 cycles:u # 3.340 GHz + 13,668,765,891 instructions:u # 1.92 insn per cycle + 2.135194157 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.234611e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.715248e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.715248e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.551154e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033926e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.033926e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.694072 sec - 7,028,142,015 cycles # 2.605 GHz - 9,860,263,834 instructions # 1.40 insn per cycle - 2.699027311 seconds time elapsed +TOTAL : 1.984779 sec + 6,263,623,021 cycles:u # 3.152 GHz + 10,451,436,460 instructions:u # 1.67 insn per cycle + 1.987850535 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 8ad22bdaab..409858a0af 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -36,51 +36,21 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:11:43 +DATE: 2023-10-25_18:46:40 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.751984e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.562686e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.712190e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.934603 sec - 3,531,370,849 cycles # 3.027 GHz - 7,056,706,138 instructions # 2.00 insn per cycle - 1.223131814 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282804e-02 -Avg ME (F77/CUDA) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.229006e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.461428e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.461428e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.514347 sec - 17,134,759,816 cycles # 3.105 GHz - 40,421,384,674 instructions # 2.36 insn per cycle - 5.519235575 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe: Aborted + 2,373,081 cycles:u # 1.483 GHz + 3,106,807 instructions:u # 1.31 insn per cycle + 0.115533650 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +60,14 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.136550e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.078809e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.078809e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.338536 sec - 10,358,915,958 cycles # 3.099 GHz - 24,681,209,780 instructions # 2.38 insn per cycle - 3.343472671 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe: Aborted + 2,018,225 cycles:u # 1.406 GHz + 3,111,473 instructions:u # 1.54 insn per cycle + 0.126137642 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +77,14 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.254346e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.883713e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.883713e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.325759 sec - 6,909,272,329 cycles # 2.965 GHz - 13,676,492,702 instructions # 1.98 insn per cycle - 2.330695390 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe: Aborted + 2,178,162 cycles:u # 1.375 GHz + 3,112,400 instructions:u # 1.43 insn per cycle + 0.118065407 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +94,14 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.418862e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.369541e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.369541e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.228021 sec - 6,650,960,186 cycles # 2.980 GHz - 13,380,296,124 instructions # 2.01 insn per cycle - 2.233066593 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe: Aborted + 1,944,370 cycles:u # 1.347 GHz + 3,113,238 instructions:u # 1.60 insn per cycle + 0.117323854 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +111,14 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.184006e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.596559e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.596559e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.371480 sec - 5,899,234,125 cycles # 2.483 GHz - 10,159,638,956 instructions # 1.72 insn per cycle - 2.376624580 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe: Aborted + 1,942,030 cycles:u # 1.321 GHz + 3,112,651 instructions:u # 1.60 insn per cycle + 0.114930364 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 683b401a34..89e20421c7 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -36,53 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:08:41 +DATE: 2023-10-25_18:45:11 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.450047e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.536531e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.695667e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.791948 sec - 6,196,047,830 cycles # 3.061 GHz - 11,389,938,256 instructions # 1.84 insn per cycle - 2.082150692 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282804e-02 -Avg ME (F77/CUDA) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.225914e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.460719e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.460719e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.523506 sec - 17,169,419,080 cycles # 3.106 GHz - 40,421,624,196 instructions # 2.35 insn per cycle - 5.528429327 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.465638e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.778986e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.778986e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.926167 sec + 16,955,984,921 cycles:u # 3.440 GHz + 41,097,153,427 instructions:u # 2.42 insn per cycle + 4.929406651 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -92,23 +69,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.159688e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.087862e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.087862e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.303087 sec - 10,283,099,650 cycles # 3.109 GHz - 24,681,354,623 instructions # 2.40 insn per cycle - 3.308198240 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.714818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.131227e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.131227e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.931381 sec + 9,999,486,631 cycles:u # 3.408 GHz + 25,264,555,991 instructions:u # 2.53 insn per cycle + 2.934404666 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -118,23 +95,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.299969e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.994161e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.994161e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.296796 sec - 6,872,706,594 cycles # 2.987 GHz - 13,676,412,365 instructions # 1.99 insn per cycle - 2.301612504 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.048019e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.434909e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.434909e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.138090 sec + 7,154,732,529 cycles:u # 3.343 GHz + 14,162,644,579 instructions:u # 1.98 insn per cycle + 2.141122902 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +121,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.398777e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.314475e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.314475e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.238868 sec - 6,648,921,003 cycles # 2.964 GHz - 13,380,372,785 instructions # 2.01 insn per cycle - 2.243705065 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.105047e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.546899e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.546899e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.129882 sec + 7,126,263,707 cycles:u # 3.342 GHz + 13,668,763,857 instructions:u # 1.92 insn per cycle + 2.132904165 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -170,23 +147,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.257611e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.762023e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.762023e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.324047 sec - 5,897,358,226 cycles # 2.533 GHz - 10,159,451,394 instructions # 1.72 insn per cycle - 2.329040547 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.519982e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.032103e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.032103e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.989870 sec + 6,289,368,749 cycles:u # 3.157 GHz + 10,451,432,330 instructions:u # 1.66 insn per cycle + 1.992889538 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 8f12496d4e..1a5b54f2e5 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:32:17 +DATE: 2023-10-25_18:18:15 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.135499e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.480702e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.088764e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.649915 sec - 2,521,027,262 cycles # 2.870 GHz - 3,932,562,496 instructions # 1.56 insn per cycle - 0.935284453 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282804e-02 -Avg ME (F77/CUDA) = 1.2828039868165206E-002 -Relative difference = 1.027708011645137e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.221519e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.452240e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.452240e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.545860 sec - 17,128,162,100 cycles # 3.086 GHz - 40,370,576,437 instructions # 2.36 insn per cycle - 5.550872301 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.470396e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.785699e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.785699e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.912905 sec + 16,908,371,061 cycles:u # 3.440 GHz + 41,046,076,657 instructions:u # 2.43 insn per cycle + 4.916092188 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 362) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.147682e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.063332e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.063332e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.321610 sec - 10,270,541,691 cycles # 3.088 GHz - 24,643,021,754 instructions # 2.40 insn per cycle - 3.326647423 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.719215e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.130723e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.130723e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.930018 sec + 9,990,030,213 cycles:u # 3.408 GHz + 25,226,057,535 instructions:u # 2.53 insn per cycle + 2.933121393 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.302607e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.009317e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.009317e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.292563 sec - 6,895,121,758 cycles # 3.004 GHz - 13,651,253,610 instructions # 1.98 insn per cycle - 2.297509965 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.041490e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.422170e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.422170e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.142736 sec + 7,169,921,245 cycles:u # 3.342 GHz + 14,136,733,258 instructions:u # 1.97 insn per cycle + 2.145802070 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1037) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.466481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.436410e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.436410e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.199117 sec - 6,634,520,360 cycles # 3.011 GHz - 13,355,581,160 instructions # 2.01 insn per cycle - 2.204141246 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.094436e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.601434e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.601434e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.132621 sec + 7,142,031,842 cycles:u # 3.345 GHz + 13,655,436,461 instructions:u # 1.91 insn per cycle + 2.135691611 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 989) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.383490e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.139882e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.139882e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.249115 sec - 5,748,008,303 cycles # 2.551 GHz - 10,038,931,524 instructions # 1.75 insn per cycle - 2.254226968 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.577060e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.059889e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059889e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.978556 sec + 6,253,669,114 cycles:u # 3.156 GHz + 10,331,152,687 instructions:u # 1.65 insn per cycle + 1.981645781 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 246) (512y: 0) (512z: 663) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index bad17671a9..ec4d769f1e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:52:22 +DATE: 2023-10-25_18:32:38 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.866442e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.650091e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.818114e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.673872 sec - 2,565,505,221 cycles # 2.855 GHz - 3,938,465,714 instructions # 1.54 insn per cycle - 0.962945607 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282804e-02 -Avg ME (F77/CUDA) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.788792e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.362239e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.362239e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.649101 sec - 8,210,295,285 cycles # 3.094 GHz - 17,459,406,832 instructions # 2.13 insn per cycle - 2.654141725 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.468824e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.990053e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.990053e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.450366 sec + 8,316,087,808 cycles:u # 3.390 GHz + 18,145,683,917 instructions:u # 2.18 insn per cycle + 2.453752491 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 125) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.776841e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.067159e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.067159e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.060741 sec - 6,367,782,565 cycles # 3.084 GHz - 12,773,139,369 instructions # 2.01 insn per cycle - 2.065923417 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.799990e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.213598e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.213598e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.927940 sec + 6,492,858,558 cycles:u # 3.363 GHz + 13,360,995,036 instructions:u # 2.06 insn per cycle + 1.931036236 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 810) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.730005e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.360397e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.360397e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.721074 sec - 5,169,510,873 cycles # 2.996 GHz - 9,371,577,717 instructions # 1.81 insn per cycle - 1.726231344 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.361282e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.752634e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.752634e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.762418 sec + 5,888,113,670 cycles:u # 3.336 GHz + 9,859,240,557 instructions:u # 1.67 insn per cycle + 1.765631929 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 720) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.006682e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.519447e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.519447e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.646907 sec - 4,985,951,497 cycles # 3.019 GHz - 9,229,216,123 instructions # 1.85 insn per cycle - 1.652062250 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.583635e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.930445e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.930445e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.731804 sec + 5,793,967,089 cycles:u # 3.341 GHz + 9,519,499,694 instructions:u # 1.64 insn per cycle + 1.734762424 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 641) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.264702e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.000270e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.000270e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.871689 sec - 5,009,273,536 cycles # 2.672 GHz - 8,693,527,346 instructions # 1.74 insn per cycle - 1.876855811 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.455559e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.657034e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.657034e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.751943 sec + 5,619,161,076 cycles:u # 3.203 GHz + 8,987,081,027 instructions:u # 1.60 insn per cycle + 1.754996822 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 200) (512y: 0) (512z: 276) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index e9aad49fe2..01bdb12502 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:52:44 +DATE: 2023-10-25_18:32:50 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.999685e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.416020e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.082814e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.643446 sec - 2,620,726,592 cycles # 3.012 GHz - 4,087,111,468 instructions # 1.56 insn per cycle - 0.931265549 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282804e-02 -Avg ME (F77/CUDA) = 1.2828039868165206E-002 -Relative difference = 1.027708011645137e-08 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.490278e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.520777e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.520777e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.208507 sec - 6,644,704,561 cycles # 3.003 GHz - 14,230,584,763 instructions # 2.14 insn per cycle - 2.213619219 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.270821e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.789105e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.789105e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.114220 sec + 7,121,938,402 cycles:u # 3.364 GHz + 14,917,387,608 instructions:u # 2.09 insn per cycle + 2.117449531 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 122) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.496209e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.203520e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.203520e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.793256 sec - 5,547,608,796 cycles # 3.086 GHz - 10,773,719,188 instructions # 1.94 insn per cycle - 1.798601488 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.489323e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.786799e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.786799e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.761799 sec + 5,917,699,661 cycles:u # 3.354 GHz + 11,362,692,418 instructions:u # 1.92 insn per cycle + 1.764919458 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 610) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.103748e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.641142e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.641142e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.621275 sec - 4,941,467,155 cycles # 3.041 GHz - 8,728,712,502 instructions # 1.77 insn per cycle - 1.626388139 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.607499e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.090410e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.090410e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.713581 sec + 5,730,127,623 cycles:u # 3.341 GHz + 9,216,760,607 instructions:u # 1.61 insn per cycle + 1.716701054 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.063835e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.792404e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.792404e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.652560 sec - 4,758,216,156 cycles # 2.885 GHz - 8,734,044,090 instructions # 1.84 insn per cycle - 1.657554337 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.926501e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.437680e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.437680e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.660901 sec + 5,558,612,040 cycles:u # 3.341 GHz + 9,031,161,270 instructions:u # 1.62 insn per cycle + 1.664029633 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 519) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.451379e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.103695e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.103695e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.809541 sec - 4,867,548,247 cycles # 2.684 GHz - 8,406,231,727 instructions # 1.73 insn per cycle - 1.814605280 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.832161e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.075253e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.075253e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.678555 sec + 5,588,592,180 cycles:u # 3.324 GHz + 8,700,071,941 instructions:u # 1.56 insn per cycle + 1.681676462 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 168) (512y: 0) (512z: 227) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index a96360aa15..35753c8692 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:32:45 +DATE: 2023-10-25_18:18:31 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.622469e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.481172e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.826768e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.551831 sec - 2,329,387,313 cycles # 3.021 GHz - 3,650,775,745 instructions # 1.57 insn per cycle - 0.828584114 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.243845e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.497749e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.497749e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.410831 sec - 16,844,648,065 cycles # 3.111 GHz - 40,088,965,912 instructions # 2.38 insn per cycle - 5.415719530 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.729460e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.110223e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.110223e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 4.255264 sec + 14,615,789,313 cycles:u # 3.433 GHz + 40,190,391,890 instructions:u # 2.75 insn per cycle + 4.258334734 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.198454e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.967626e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.967626e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.324919 sec - 7,138,346,939 cycles # 3.065 GHz - 16,729,497,470 instructions # 2.34 insn per cycle - 2.329783883 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.289620e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.040819e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.040819e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 2.069545 sec + 7,002,182,131 cycles:u # 3.379 GHz + 16,748,198,675 instructions:u # 2.39 insn per cycle + 2.072477252 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.643489e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.234035e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.234035e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.702583 sec - 5,155,065,818 cycles # 3.021 GHz - 10,628,955,239 instructions # 2.06 insn per cycle - 1.707446133 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.390893e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.733751e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.733751e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.552888 sec + 5,156,863,877 cycles:u # 3.316 GHz + 10,480,013,170 instructions:u # 2.03 insn per cycle + 1.555841409 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.817440e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.344869e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.344869e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.653471 sec - 5,044,234,982 cycles # 3.043 GHz - 10,475,715,128 instructions # 2.08 insn per cycle - 1.658543423 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.555638e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.817995e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.817995e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.546525 sec + 5,133,706,165 cycles:u # 3.314 GHz + 10,132,408,956 instructions:u # 1.97 insn per cycle + 1.549473269 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.608343e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175337e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.175337e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.712484 sec - 4,683,701,478 cycles # 2.728 GHz - 8,926,870,251 instructions # 1.91 insn per cycle - 1.717292179 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.863473e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.164421e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.164421e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 +TOTAL : 1.499357 sec + 4,803,893,523 cycles:u # 3.199 GHz + 8,580,368,826 instructions:u # 1.79 insn per cycle + 1.502292571 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index efa36acd38..eb9e9ef59f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -36,73 +36,35 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:02:51 +DATE: 2023-10-25_18:42:42 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! flagging abnormal ME for ievt=71728 -WARNING! flagging abnormal ME for ievt=152898 -WARNING! flagging abnormal ME for ievt=496545 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=465318 -WARNING! flagging abnormal ME for ievt=458848 -WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=7, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.717117e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.761186e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.761186e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371709e-02 +- 3.270385e-06 ) GeV^0 -TOTAL : 1.602340 sec - 5,595,842,121 cycles # 3.065 GHz - 10,162,817,469 instructions # 1.82 insn per cycle - 1.883301086 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=53874 -WARNING! flagging abnormal ME for ievt=71728 -WARNING! flagging abnormal ME for ievt=152898 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=164749 -WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +WARNING! flagging abnormal ME for ievt=334333 +WARNING! flagging abnormal ME for ievt=355930 +WARNING! flagging abnormal ME for ievt=450372 +WARNING! flagging abnormal ME for ievt=111162 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.194651e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.427134e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.427134e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.727507 sec - 17,468,478,153 cycles # 3.048 GHz - 40,238,549,213 instructions # 2.30 insn per cycle - 5.733108996 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.707220e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.075285e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.075285e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 4.347128 sec + 14,903,367,368 cycles:u # 3.426 GHz + 40,361,164,372 instructions:u # 2.71 insn per cycle + 4.351029431 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -113,29 +75,27 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=53874 -WARNING! flagging abnormal ME for ievt=71728 -WARNING! flagging abnormal ME for ievt=152898 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=164749 -WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +WARNING! flagging abnormal ME for ievt=334333 +WARNING! flagging abnormal ME for ievt=355930 +WARNING! flagging abnormal ME for ievt=450372 +WARNING! flagging abnormal ME for ievt=111162 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.015918e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.363005e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.363005e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.562966 sec - 7,929,821,727 cycles # 3.088 GHz - 18,064,430,946 instructions # 2.28 insn per cycle - 2.568572939 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.034302e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.225581e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.225581e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 2.272239 sec + 7,665,382,719 cycles:u # 3.369 GHz + 18,085,512,951 instructions:u # 2.36 insn per cycle + 2.275998631 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -146,27 +106,27 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=53874 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=164749 -WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +WARNING! flagging abnormal ME for ievt=355930 +WARNING! flagging abnormal ME for ievt=450372 +WARNING! flagging abnormal ME for ievt=186978 +WARNING! flagging abnormal ME for ievt=111162 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.272473e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.009652e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.009652e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.944574 sec - 5,929,261,497 cycles # 3.042 GHz - 11,749,715,523 instructions # 1.98 insn per cycle - 1.950245306 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.739168e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.371751e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.371751e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.777776 sec + 5,884,369,562 cycles:u # 3.304 GHz + 11,602,702,718 instructions:u # 1.97 insn per cycle + 1.781438494 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -177,27 +137,27 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=53874 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=164749 -WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +WARNING! flagging abnormal ME for ievt=355930 +WARNING! flagging abnormal ME for ievt=450372 +WARNING! flagging abnormal ME for ievt=186978 +WARNING! flagging abnormal ME for ievt=111162 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.364659e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.078675e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.078675e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.914043 sec - 5,843,415,161 cycles # 3.045 GHz - 11,595,784,393 instructions # 1.98 insn per cycle - 1.919813375 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.833662e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.427872e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.427872e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.774478 sec + 5,870,489,674 cycles:u # 3.304 GHz + 11,255,095,620 instructions:u # 1.92 insn per cycle + 1.778380730 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -208,27 +168,27 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=53874 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=164749 -WARNING! flagging abnormal ME for ievt=247522 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +WARNING! flagging abnormal ME for ievt=355930 +WARNING! flagging abnormal ME for ievt=450372 +WARNING! flagging abnormal ME for ievt=186978 +WARNING! flagging abnormal ME for ievt=111162 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.130829e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.360401e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.360401e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.001099 sec - 5,491,473,730 cycles # 2.738 GHz - 10,134,991,267 instructions # 1.85 insn per cycle - 2.006879900 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.068120e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.574984e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.574984e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 +TOTAL : 1.735354 sec + 5,572,441,027 cycles:u # 3.205 GHz + 9,789,888,606 instructions:u # 1.76 insn per cycle + 1.739135348 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 3649e05b61..4edddfd03c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:15:13 +DATE: 2023-10-25_18:47:20 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.566300e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.421516e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.742992e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.134749 sec - 4,098,993,410 cycles # 3.029 GHz - 6,605,372,981 instructions # 1.61 insn per cycle - 1.409722045 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.244098e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.493114e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.493114e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.731049e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.110306e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.110306e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 5.726200 sec - 17,811,812,788 cycles # 3.109 GHz - 40,270,712,628 instructions # 2.26 insn per cycle - 5.730949174 seconds time elapsed +TOTAL : 4.250142 sec + 14,600,038,747 cycles:u # 3.434 GHz + 40,190,394,588 instructions:u # 2.75 insn per cycle + 4.253095521 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.215182e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.000060e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.000060e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.292044e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.040786e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.040786e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 2.623227 sec - 8,152,502,492 cycles # 3.103 GHz - 16,810,279,631 instructions # 2.06 insn per cycle - 2.628033716 seconds time elapsed +TOTAL : 2.067760 sec + 6,998,868,955 cycles:u # 3.381 GHz + 16,748,198,517 instructions:u # 2.39 insn per cycle + 2.070661637 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.607786e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.225348e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.225348e+07 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.499962e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.738226e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.738226e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 2.027947 sec - 6,177,720,088 cycles # 3.041 GHz - 10,540,553,410 instructions # 1.71 insn per cycle - 2.032791206 seconds time elapsed +TOTAL : 1.537148 sec + 5,102,238,830 cycles:u # 3.314 GHz + 10,480,017,946 instructions:u # 2.05 insn per cycle + 1.540028852 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.739337e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.329843e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329843e+07 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.560211e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.820987e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.820987e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.998605 sec - 6,104,707,264 cycles # 3.049 GHz - 10,185,815,617 instructions # 1.67 insn per cycle - 2.003437292 seconds time elapsed +TOTAL : 1.544441 sec + 5,129,408,965 cycles:u # 3.316 GHz + 10,132,412,241 instructions:u # 1.98 insn per cycle + 1.547345562 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.461301e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.134325e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.134325e+07 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.802348e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.153811e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.153811e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 -TOTAL : 2.092643 sec - 5,695,345,035 cycles # 2.717 GHz - 8,637,352,141 instructions # 1.52 insn per cycle - 2.097505728 seconds time elapsed +TOTAL : 1.505652 sec + 4,829,542,766 cycles:u # 3.202 GHz + 8,580,367,611 instructions:u # 1.78 insn per cycle + 1.508693688 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 482f335b2b..4c77ada440 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -36,51 +36,21 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:12:11 +DATE: 2023-10-25_18:46:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.572199e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.443581e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.789713e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.852228 sec - 3,102,837,181 cycles # 2.900 GHz - 6,399,466,575 instructions # 2.06 insn per cycle - 1.127081538 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.242033e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.490396e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.490396e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.420944 sec - 16,812,615,332 cycles # 3.099 GHz - 40,088,432,090 instructions # 2.38 insn per cycle - 5.425745402 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe: Aborted + 2,237,732 cycles:u # 1.452 GHz + 3,112,523 instructions:u # 1.39 insn per cycle + 0.113282053 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +60,14 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.220212e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.000588e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.000588e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.308004 sec - 7,155,170,314 cycles # 3.095 GHz - 16,729,446,787 instructions # 2.34 insn per cycle - 2.312771061 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe: Aborted + 1,996,617 cycles:u # 1.391 GHz + 3,116,775 instructions:u # 1.56 insn per cycle + 0.108515474 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +77,14 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.640154e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.225902e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.225902e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.703655 sec - 5,164,709,474 cycles # 3.025 GHz - 10,629,819,542 instructions # 2.06 insn per cycle - 1.708526413 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe: Aborted + 1,946,925 cycles:u # 1.394 GHz + 3,117,578 instructions:u # 1.60 insn per cycle + 0.132825035 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +94,14 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.770352e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.326396e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326396e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.669695 sec - 5,072,425,715 cycles # 3.031 GHz - 10,481,476,531 instructions # 2.07 insn per cycle - 1.674496390 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe: Aborted + 1,958,449 cycles:u # 1.383 GHz + 3,117,235 instructions:u # 1.59 insn per cycle + 0.123003666 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +111,14 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.608761e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188893e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.188893e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.713521 sec - 4,682,934,639 cycles # 2.727 GHz - 8,926,936,750 instructions # 1.91 insn per cycle - 1.718311206 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe: Aborted + 1,946,905 cycles:u # 1.373 GHz + 3,116,503 instructions:u # 1.60 insn per cycle + 0.117625696 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index efc5436b49..b449440083 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -36,53 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:09:10 +DATE: 2023-10-25_18:45:27 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=7, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.218117e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.394390e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.636278e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371709e-02 +- 3.270385e-06 ) GeV^0 -TOTAL : 1.461263 sec - 4,982,734,203 cycles # 2.962 GHz - 9,103,785,620 instructions # 1.83 insn per cycle - 1.744448034 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.245708e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.494542e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.494542e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.404955 sec - 16,819,106,248 cycles # 3.110 GHz - 40,088,802,052 instructions # 2.38 insn per cycle - 5.409792938 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.728103e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.104696e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.104696e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 4.256254 sec + 14,624,053,552 cycles:u # 3.434 GHz + 40,190,390,955 instructions:u # 2.75 insn per cycle + 4.259246946 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -92,23 +69,23 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.222846e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.010307e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.010307e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.309298 sec - 7,155,727,700 cycles # 3.093 GHz - 16,729,709,933 instructions # 2.34 insn per cycle - 2.314175580 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.295978e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.044937e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.044937e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 2.066718 sec + 6,990,916,531 cycles:u # 3.379 GHz + 16,748,196,956 instructions:u # 2.40 insn per cycle + 2.069597625 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -118,23 +95,23 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.653137e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.236145e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.236145e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.698626 sec - 5,157,602,141 cycles # 3.030 GHz - 10,629,000,968 instructions # 2.06 insn per cycle - 1.703452950 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.363434e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.733774e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.733774e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.558394 sec + 5,178,315,742 cycles:u # 3.317 GHz + 10,480,013,528 instructions:u # 2.02 insn per cycle + 1.561451840 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +121,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.773001e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.326358e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326358e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.667901 sec - 5,061,515,411 cycles # 3.028 GHz - 10,480,815,680 instructions # 2.07 insn per cycle - 1.672594775 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.553056e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.818363e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.818363e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.542303 sec + 5,128,149,799 cycles:u # 3.319 GHz + 10,132,413,337 instructions:u # 1.98 insn per cycle + 1.545214987 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -170,23 +147,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.555200e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166909e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.166909e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.731879 sec - 4,689,690,631 cycles # 2.701 GHz - 8,927,107,008 instructions # 1.90 insn per cycle - 1.736672094 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.784579e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.155866e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.155866e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 +TOTAL : 1.509175 sec + 4,838,626,714 cycles:u # 3.201 GHz + 8,580,370,978 instructions:u # 1.77 insn per cycle + 1.512104777 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 5dbfdd3213..447e304729 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:33:09 +DATE: 2023-10-25_18:18:44 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.625913e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.503566e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.918133e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.552329 sec - 2,328,888,712 cycles # 3.020 GHz - 3,648,506,478 instructions # 1.57 insn per cycle - 0.830116688 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 80 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.244101e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.497364e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.497364e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.479428 sec - 17,033,310,610 cycles # 3.106 GHz - 40,038,122,508 instructions # 2.35 insn per cycle - 5.484429062 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.736162e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.117571e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.117571e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 4.238341 sec + 14,561,502,974 cycles:u # 3.434 GHz + 40,139,315,186 instructions:u # 2.76 insn per cycle + 4.241406501 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 347) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.023210e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.906828e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.906828e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.441591 sec - 7,417,759,581 cycles # 3.033 GHz - 16,653,923,334 instructions # 2.25 insn per cycle - 2.446386116 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.280509e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.015334e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.015334e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 2.072032 sec + 7,005,174,204 cycles:u # 3.377 GHz + 16,671,958,233 instructions:u # 2.38 insn per cycle + 2.075028280 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1335) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.697456e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.249241e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.249241e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.685112 sec - 5,138,959,489 cycles # 3.042 GHz - 10,615,393,712 instructions # 2.07 insn per cycle - 1.690001992 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.229686e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.714141e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.714141e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.586341 sec + 5,266,021,866 cycles:u # 3.313 GHz + 10,466,688,261 instructions:u # 1.99 insn per cycle + 1.589935074 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1092) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.752164e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.307340e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.307340e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.671544 sec - 5,088,363,074 cycles # 3.037 GHz - 10,468,790,591 instructions # 2.06 insn per cycle - 1.676598026 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.531291e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.816551e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.816551e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.550816 sec + 5,146,977,582 cycles:u # 3.313 GHz + 10,125,376,383 instructions:u # 1.97 insn per cycle + 1.553865343 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1044) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.538235e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204282e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.204282e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.747955 sec - 4,622,637,035 cycles # 2.639 GHz - 8,857,108,339 instructions # 1.92 insn per cycle - 1.752985428 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.960002e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.258537e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.258537e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 +TOTAL : 1.487427 sec + 4,768,127,222 cycles:u # 3.200 GHz + 8,509,628,646 instructions:u # 1.78 insn per cycle + 1.490431635 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 312) (512y: 0) (512z: 678) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 7778235778..3a783df15c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:53:05 +DATE: 2023-10-25_18:33:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.591002e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.452620e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.766126e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.556490 sec - 2,367,622,596 cycles # 3.019 GHz - 3,659,745,020 instructions # 1.55 insn per cycle - 0.843534071 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.920065e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.817963e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.817963e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.512043 sec - 7,713,453,992 cycles # 3.066 GHz - 17,403,928,818 instructions # 2.26 insn per cycle - 2.516888756 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.913105e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.593311e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.593311e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 2.221837 sec + 7,530,888,619 cycles:u # 3.385 GHz + 17,515,746,714 instructions:u # 2.33 insn per cycle + 2.224960973 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 1.4858695011109669e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.681101e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.456058e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.456058e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 1.700110 sec - 5,232,564,400 cycles # 3.070 GHz - 10,761,247,884 instructions # 2.06 insn per cycle - 1.704947526 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.376301e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.062960e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.062960e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 1.587805 sec + 5,325,014,500 cycles:u # 3.348 GHz + 10,782,260,862 instructions:u # 2.02 insn per cycle + 1.590744824 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 941) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.924793743706775e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.136284e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.424889e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.424889e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.396345 sec - 4,222,666,465 cycles # 3.047 GHz - 8,344,159,796 instructions # 1.98 insn per cycle - 1.401275041 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.415063e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.725017e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.725017e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.315985 sec + 4,356,664,668 cycles:u # 3.304 GHz + 8,188,326,455 instructions:u # 1.88 insn per cycle + 1.319108781 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 855) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 2.5235104658031306e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.252461e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.832504e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.832504e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.356528 sec - 4,156,578,728 cycles # 3.055 GHz - 8,308,294,757 instructions # 2.00 insn per cycle - 1.361324117 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.553543e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.471561e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.471561e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.322016 sec + 4,377,960,359 cycles:u # 3.305 GHz + 7,966,549,257 instructions:u # 1.82 insn per cycle + 1.325025435 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 779) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 2.5235104658031306e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.632818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.213085e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.213085e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.468207 sec - 4,189,515,183 cycles # 2.846 GHz - 8,197,193,406 instructions # 1.96 insn per cycle - 1.473030833 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.018327e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.858996e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.858996e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 +TOTAL : 1.368267 sec + 4,434,622,037 cycles:u # 3.235 GHz + 7,851,335,699 instructions:u # 1.77 insn per cycle + 1.371245457 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 280) (512y: 0) (512z: 301) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 0d46a7bcf5..f7ac3f3149 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:53:26 +DATE: 2023-10-25_18:33:12 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.594388e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.488622e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.908806e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.555353 sec - 2,369,008,611 cycles # 3.015 GHz - 3,693,428,004 instructions # 1.56 insn per cycle - 0.844152711 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 80 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.824530e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.823115e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.823115e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.002512 sec - 6,203,899,879 cycles # 3.095 GHz - 14,161,126,790 instructions # 2.28 insn per cycle - 2.007246903 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.021947e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.048940e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.048940e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 1.869449 sec + 6,296,012,904 cycles:u # 3.363 GHz + 14,274,872,024 instructions:u # 2.27 insn per cycle + 1.872524206 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 1.3015322037054697e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.299369e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.234812e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.234812e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 1.542994 sec - 4,763,831,720 cycles # 3.079 GHz - 9,566,058,895 instructions # 2.01 insn per cycle - 1.547857940 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.154287e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.223241e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.223241e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 +TOTAL : 1.480330 sec + 4,952,907,742 cycles:u # 3.340 GHz + 9,587,702,713 instructions:u # 1.94 insn per cycle + 1.483393390 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 663) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.326543e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.012502e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.012502e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.344736 sec - 4,139,920,255 cycles # 3.069 GHz - 8,120,823,500 instructions # 1.96 insn per cycle - 1.349591931 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.697649e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.520070e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.520070e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.291464 sec + 4,275,495,416 cycles:u # 3.304 GHz + 7,972,879,812 instructions:u # 1.86 insn per cycle + 1.294455433 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 623) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.344788e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.506915e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.506915e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.344145 sec - 4,090,602,024 cycles # 3.034 GHz - 8,121,231,635 instructions # 1.99 insn per cycle - 1.349056597 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.621966e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.806028e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.806028e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 +TOTAL : 1.312685 sec + 4,356,323,719 cycles:u # 3.312 GHz + 7,779,421,294 instructions:u # 1.79 insn per cycle + 1.315625377 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 590) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.650060e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.477330e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.477330e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.474656 sec - 4,125,251,957 cycles # 2.790 GHz - 8,033,155,651 instructions # 1.95 insn per cycle - 1.479796548 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.250034e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.675253e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.675253e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 +TOTAL : 1.350207 sec + 4,461,812,073 cycles:u # 3.298 GHz + 7,687,010,388 instructions:u # 1.72 insn per cycle + 1.353247958 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 238) (512y: 0) (512z: 234) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 42cb535764..56085f8e1f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:33:34 +DATE: 2023-10-25_18:18:58 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.989272e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.677595e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.794677e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.639629 sec - 2,624,746,782 cycles # 3.027 GHz - 4,095,202,506 instructions # 1.56 insn per cycle - 0.927049463 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282804e-02 -Avg ME (F77/CUDA) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.199685e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.421947e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.421947e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.639670 sec - 17,406,394,399 cycles # 3.084 GHz - 40,598,366,537 instructions # 2.33 insn per cycle - 5.644674626 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.431799e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.740777e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.740777e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.032135 sec + 17,315,960,908 cycles:u # 3.439 GHz + 41,273,314,342 instructions:u # 2.38 insn per cycle + 5.035304429 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 377) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.181362e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.136383e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.136383e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.277504 sec - 10,153,584,525 cycles # 3.094 GHz - 24,841,830,142 instructions # 2.45 insn per cycle - 3.282502956 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.715741e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.129042e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.129042e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.932644 sec + 9,999,498,591 cycles:u # 3.407 GHz + 25,424,986,719 instructions:u # 2.54 insn per cycle + 2.935821772 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1318) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.329721e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.123732e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.123732e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.277389 sec - 6,843,760,468 cycles # 3.000 GHz - 13,635,441,327 instructions # 1.99 insn per cycle - 2.282659453 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.104767e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.732661e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.732661e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.120965 sec + 7,089,718,272 cycles:u # 3.339 GHz + 14,120,965,347 instructions:u # 1.99 insn per cycle + 2.124110385 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1211) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.425597e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.470588e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.470588e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.232215 sec - 6,546,202,423 cycles # 2.927 GHz - 13,316,237,781 instructions # 2.03 insn per cycle - 2.237429944 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.202279e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.057529e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.057529e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.098690 sec + 7,015,864,901 cycles:u # 3.339 GHz + 13,615,283,917 instructions:u # 1.94 insn per cycle + 2.101710645 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1141) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.286536e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.835267e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.835267e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.304970 sec - 5,866,744,695 cycles # 2.541 GHz - 10,212,406,703 instructions # 1.74 insn per cycle - 2.310030357 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.636507e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.078185e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078185e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.956416 sec + 6,188,582,377 cycles:u # 3.159 GHz + 10,504,124,053 instructions:u # 1.70 insn per cycle + 1.959442647 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 406) (512y: 0) (512z: 707) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 84b58b8eae..c23fd8bd58 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:34:02 +DATE: 2023-10-25_18:19:14 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.129836e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.454495e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.086141e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.640256 sec - 2,612,587,104 cycles # 3.007 GHz - 4,030,428,105 instructions # 1.54 insn per cycle - 0.928744648 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.282804e-02 -Avg ME (F77/CUDA) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.208951e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.433392e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.433392e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.597792 sec - 17,342,645,679 cycles # 3.096 GHz - 40,546,867,973 instructions # 2.34 insn per cycle - 5.602682073 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.444987e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.749496e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.749496e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.989879 sec + 17,179,248,922 cycles:u # 3.441 GHz + 41,222,242,033 instructions:u # 2.40 insn per cycle + 4.993075094 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.133508e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.059933e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.059933e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.349360 sec - 10,185,466,163 cycles # 3.037 GHz - 24,803,480,189 instructions # 2.44 insn per cycle - 3.354498074 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.703014e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.099777e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.099777e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.943641 sec + 10,040,467,895 cycles:u # 3.408 GHz + 25,386,490,837 instructions:u # 2.53 insn per cycle + 2.946682776 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1305) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.351442e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.169056e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.169056e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.263569 sec - 6,798,954,008 cycles # 2.998 GHz - 13,608,714,241 instructions # 2.00 insn per cycle - 2.268496012 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.086719e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.629429e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.629429e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.123762 sec + 7,106,505,553 cycles:u # 3.342 GHz + 14,095,051,515 instructions:u # 1.98 insn per cycle + 2.127962021 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1191) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.513470e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.629747e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.629747e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.175520 sec - 6,534,951,743 cycles # 2.998 GHz - 13,313,454,459 instructions # 2.04 insn per cycle - 2.180544895 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.209543e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.058896e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.058896e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.092663 sec + 6,999,323,320 cycles:u # 3.341 GHz + 13,601,956,134 instructions:u # 1.94 insn per cycle + 2.095718322 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1121) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.364985e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.081130e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.081130e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.260031 sec - 5,775,308,811 cycles # 2.550 GHz - 10,091,603,442 instructions # 1.75 insn per cycle - 2.265220222 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.686227e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.100329e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100329e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.942782 sec + 6,146,902,959 cycles:u # 3.160 GHz + 10,383,844,685 instructions:u # 1.69 insn per cycle + 1.945768129 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 386) (512y: 0) (512z: 688) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index fc7d3d5581..83f9f3a9e7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:34:30 +DATE: 2023-10-25_18:19:31 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.189330e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175818e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270057e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.512252 sec - 2,155,173,458 cycles # 2.909 GHz - 3,041,305,881 instructions # 1.41 insn per cycle - 0.798678690 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028807e+00 -Avg ME (F77/CUDA) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.926195e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.975396e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.975396e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.545999 sec - 17,154,877,090 cycles # 3.091 GHz - 45,384,595,667 instructions # 2.65 insn per cycle - 5.551026412 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.430955e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.504273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.504273e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.448983 sec + 15,409,189,933 cycles:u # 3.461 GHz + 45,453,870,566 instructions:u # 2.95 insn per cycle + 4.452178333 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.351243e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.515324e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.515324e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.231698 sec - 10,007,778,960 cycles # 3.093 GHz - 27,771,257,423 instructions # 2.77 insn per cycle - 3.236933218 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.303881e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.554553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.554553e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.568460 sec + 8,853,028,358 cycles:u # 3.443 GHz + 27,825,917,583 instructions:u # 3.14 insn per cycle + 2.571602691 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.340358e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.757627e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.757627e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.065747 sec - 6,039,122,425 cycles # 2.917 GHz - 12,507,446,858 instructions # 2.07 insn per cycle - 2.070933576 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.074870e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.787571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.787571e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.609723 sec + 5,376,226,080 cycles:u # 3.334 GHz + 12,550,768,124 instructions:u # 2.33 insn per cycle + 1.612901337 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.874280e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.375597e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.375597e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.886900 sec - 5,532,181,520 cycles # 2.925 GHz - 11,883,413,800 instructions # 2.15 insn per cycle - 1.892172826 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.560817e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.369436e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.369436e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.517357 sec + 5,062,020,112 cycles:u # 3.330 GHz + 11,896,187,886 instructions:u # 2.35 insn per cycle + 1.520597674 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.697439e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.891301e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.891301e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.940719 sec - 5,705,029,690 cycles # 1.938 GHz - 8,291,496,940 instructions # 1.45 insn per cycle - 2.945995320 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.683379e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.286974e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.286974e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.697824 sec + 4,815,368,425 cycles:u # 2.831 GHz + 8,288,311,753 instructions:u # 1.72 insn per cycle + 1.701300285 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index bde7cbdb09..7237782240 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -36,60 +36,31 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:03:18 +DATE: 2023-10-25_18:42:56 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.773797e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.294173e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.294173e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.785416 sec - 3,063,164,499 cycles # 3.000 GHz - 4,792,639,654 instructions # 1.56 insn per cycle - 1.079850324 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028807e+00 -Avg ME (F77/CUDA) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.892855e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.940587e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.940587e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.719758 sec - 17,497,619,767 cycles # 3.056 GHz - 45,446,099,914 instructions # 2.60 insn per cycle - 5.726041885 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.422258e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.495267e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.495267e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.513529 sec + 15,562,098,392 cycles:u # 3.445 GHz + 45,512,672,442 instructions:u # 2.92 insn per cycle + 4.517738609 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -100,23 +71,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.325585e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.487595e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.487595e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.333037 sec - 10,354,211,298 cycles # 3.101 GHz - 27,955,092,209 instructions # 2.70 insn per cycle - 3.339341303 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.260972e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.503925e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.503925e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.660497 sec + 9,101,452,402 cycles:u # 3.416 GHz + 27,992,194,752 instructions:u # 3.08 insn per cycle + 2.664855606 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -127,23 +98,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.229345e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.633133e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.633133e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.186507 sec - 6,395,865,888 cycles # 2.918 GHz - 12,794,721,791 instructions # 2.00 insn per cycle - 2.192626317 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.937840e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.630556e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.630556e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.704387 sec + 5,636,463,258 cycles:u # 3.300 GHz + 12,820,595,692 instructions:u # 2.27 insn per cycle + 1.708515081 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -154,23 +125,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.718956e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.198053e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.198053e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.015312 sec - 5,880,292,764 cycles # 2.910 GHz - 12,172,549,562 instructions # 2.07 insn per cycle - 2.021489543 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.379272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.150299e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.150299e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.616624 sec + 5,338,130,644 cycles:u # 3.294 GHz + 12,166,018,598 instructions:u # 2.28 insn per cycle + 1.620766506 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +152,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.795700e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.000887e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.000887e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.946657 sec - 6,072,408,903 cycles # 2.057 GHz - 8,534,252,358 instructions # 1.41 insn per cycle - 2.952768361 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.563055e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.143112e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.143112e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.793756 sec + 5,078,809,580 cycles:u # 2.825 GHz + 8,515,208,662 instructions:u # 1.68 insn per cycle + 1.797847006 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 531c093860..de868da2b8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:15:40 +DATE: 2023-10-25_18:47:33 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.085961e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.172637e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270805e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.603556 sec - 2,510,793,087 cycles # 3.030 GHz - 3,673,074,819 instructions # 1.46 insn per cycle - 0.885869264 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028807e+00 -Avg ME (F77/CUDA) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.938086e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.988073e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.988073e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.415561e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.488156e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.488156e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.572089 sec - 17,339,920,977 cycles # 3.110 GHz - 45,401,031,280 instructions # 2.62 insn per cycle - 5.577099493 seconds time elapsed +TOTAL : 4.477845 sec + 15,510,356,917 cycles:u # 3.462 GHz + 45,453,872,830 instructions:u # 2.93 insn per cycle + 4.481018259 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.366267e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.533818e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.533818e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.310045e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.559828e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.559828e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.275090 sec - 10,190,622,487 cycles # 3.108 GHz - 27,770,717,333 instructions # 2.73 insn per cycle - 3.280028443 seconds time elapsed +TOTAL : 2.565742 sec + 8,841,179,622 cycles:u # 3.443 GHz + 27,825,913,835 instructions:u # 3.15 insn per cycle + 2.568838506 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.358680e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.785880e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.785880e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.087497e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.812354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.812354e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.118513 sec - 6,227,899,517 cycles # 2.934 GHz - 12,490,736,505 instructions # 2.01 insn per cycle - 2.123796775 seconds time elapsed +TOTAL : 1.607083 sec + 5,368,175,734 cycles:u # 3.335 GHz + 12,550,768,980 instructions:u # 2.34 insn per cycle + 1.610189658 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.791831e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.300158e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.300158e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.536057e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.337529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.337529e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.975789 sec - 5,740,046,861 cycles # 2.900 GHz - 11,834,515,828 instructions # 2.06 insn per cycle - 1.980847261 seconds time elapsed +TOTAL : 1.521050 sec + 5,076,727,472 cycles:u # 3.332 GHz + 11,896,188,861 instructions:u # 2.34 insn per cycle + 1.524133393 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.818538e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.030811e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.030811e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.696929e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.301242e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.301242e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.912399 sec - 5,874,294,388 cycles # 2.014 GHz - 8,239,488,482 instructions # 1.40 insn per cycle - 2.917538602 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) +TOTAL : 1.695428 sec + 4,808,224,755 cycles:u # 2.831 GHz + 8,288,307,748 instructions:u # 1.72 insn per cycle + 1.698538765 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index ee80d49776..f86eae16b9 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -36,51 +36,21 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:12:36 +DATE: 2023-10-25_18:46:46 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.085112e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169879e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.269257e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.542204 sec - 2,298,212,603 cycles # 3.002 GHz - 3,616,714,256 instructions # 1.57 insn per cycle - 0.823159797 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028807e+00 -Avg ME (F77/CUDA) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.902640e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.950734e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.950734e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.613770 sec - 17,167,591,716 cycles # 3.056 GHz - 45,385,422,779 instructions # 2.64 insn per cycle - 5.618850316 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe: Aborted + 2,192,346 cycles:u # 1.444 GHz + 3,107,719 instructions:u # 1.42 insn per cycle + 0.107597341 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +60,14 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.356859e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.522784e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.522784e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.227055 sec - 10,007,434,150 cycles # 3.097 GHz - 27,771,321,943 instructions # 2.78 insn per cycle - 3.232084319 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe: Aborted + 2,118,021 cycles:u # 1.381 GHz + 3,111,083 instructions:u # 1.47 insn per cycle + 0.118204330 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +77,14 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.339247e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.757472e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.757472e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.067924 sec - 6,064,030,334 cycles # 2.927 GHz - 12,508,006,764 instructions # 2.06 insn per cycle - 2.072907317 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe: Aborted + 2,004,671 cycles:u # 1.319 GHz + 3,111,988 instructions:u # 1.55 insn per cycle + 0.105862672 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +94,14 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.874688e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.380385e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.380385e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.887776 sec - 5,540,691,746 cycles # 2.929 GHz - 11,883,645,896 instructions # 2.14 insn per cycle - 1.892908998 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe: Aborted + 1,925,242 cycles:u # 1.333 GHz + 3,112,848 instructions:u # 1.62 insn per cycle + 0.108456654 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +111,14 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.817386e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.022798e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.022798e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.850472 sec - 5,706,017,461 cycles # 1.999 GHz - 8,290,142,366 instructions # 1.45 insn per cycle - 2.855563486 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe: Aborted + 1,927,580 cycles:u # 1.304 GHz + 3,112,262 instructions:u # 1.61 insn per cycle + 0.105664290 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 1b9c9ee7df..cd7befa7e8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -36,53 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:09:35 +DATE: 2023-10-25_18:45:40 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.145651e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.173920e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273175e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.691895 sec - 2,781,064,477 cycles # 3.018 GHz - 4,402,279,507 instructions # 1.58 insn per cycle - 0.980225226 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028807e+00 -Avg ME (F77/CUDA) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.929230e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.978722e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.978722e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.538774 sec - 17,159,821,844 cycles # 3.096 GHz - 45,385,185,100 instructions # 2.64 insn per cycle - 5.543842374 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.413682e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.487845e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.487845e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.480388 sec + 15,521,061,945 cycles:u # 3.462 GHz + 45,453,872,331 instructions:u # 2.93 insn per cycle + 4.483617521 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -92,23 +69,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.287540e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.449504e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.449504e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.292731 sec - 10,016,590,673 cycles # 3.038 GHz - 27,771,485,458 instructions # 2.77 insn per cycle - 3.297740861 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.312494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.561007e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.561007e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.564128 sec + 8,837,322,306 cycles:u # 3.443 GHz + 27,825,917,366 instructions:u # 3.15 insn per cycle + 2.567231215 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -118,23 +95,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.325678e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.741826e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.741826e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.072513 sec - 6,058,236,283 cycles # 2.917 GHz - 12,507,380,733 instructions # 2.06 insn per cycle - 2.077678265 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.108388e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.828424e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.828424e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.602773 sec + 5,351,831,079 cycles:u # 3.333 GHz + 12,550,772,615 instructions:u # 2.35 insn per cycle + 1.605940866 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +121,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.871793e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.376853e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.376853e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.888593 sec - 5,531,298,122 cycles # 2.922 GHz - 11,883,369,769 instructions # 2.15 insn per cycle - 1.893708699 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.550944e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.358867e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.358867e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.518337 sec + 5,059,813,173 cycles:u # 3.326 GHz + 11,896,190,671 instructions:u # 2.35 insn per cycle + 1.521540673 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -170,23 +147,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.825270e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.031884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.031884e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.844855 sec - 5,686,333,748 cycles # 1.997 GHz - 8,290,317,138 instructions # 1.46 insn per cycle - 2.849943056 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.689284e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.294108e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.294108e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.697971 sec + 4,812,488,030 cycles:u # 2.829 GHz + 8,288,308,200 instructions:u # 1.72 insn per cycle + 1.701195822 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 11300e6895..a5c9453825 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:34:57 +DATE: 2023-10-25_18:19:45 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.181607e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.171006e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.264343e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.507998 sec - 2,247,373,928 cycles # 3.001 GHz - 3,245,400,581 instructions # 1.44 insn per cycle - 0.806174140 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028807e+00 -Avg ME (F77/CUDA) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.976266e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.029293e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.029293e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.409225 sec - 16,681,628,869 cycles # 3.082 GHz - 44,378,235,380 instructions # 2.66 insn per cycle - 5.414507921 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.476229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.552300e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.552300e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.370568 sec + 15,136,818,641 cycles:u # 3.461 GHz + 44,446,477,812 instructions:u # 2.94 insn per cycle + 4.373795655 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 576) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.514456e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.696441e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.696441e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.086332 sec - 9,510,708,832 cycles # 3.078 GHz - 26,620,808,250 instructions # 2.80 insn per cycle - 3.091424217 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.436275e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.714377e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.714377e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.497946 sec + 8,541,476,152 cycles:u # 3.415 GHz + 26,675,915,603 instructions:u # 3.12 insn per cycle + 2.501489423 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2339) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.735122e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.074181e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.074181e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.319139 sec - 6,584,764,475 cycles # 2.834 GHz - 14,057,249,658 instructions # 2.13 insn per cycle - 2.324235278 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.405128e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.981595e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.981595e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.764525 sec + 5,900,836,520 cycles:u # 3.339 GHz + 14,099,110,491 instructions:u # 2.39 insn per cycle + 1.767682384 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2753) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.117258e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.494373e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.494373e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.150419 sec - 6,273,792,377 cycles # 2.911 GHz - 13,574,431,184 instructions # 2.16 insn per cycle - 2.155695099 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.693845e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.323982e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.323982e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.697866 sec + 5,671,023,494 cycles:u # 3.335 GHz + 13,586,138,958 instructions:u # 2.40 insn per cycle + 1.701096581 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2405) (512y: 296) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.685746e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.878776e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.878776e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.947596 sec - 5,924,739,734 cycles # 2.007 GHz - 10,074,038,054 instructions # 1.70 insn per cycle - 2.952990655 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.602718e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.194142e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.194142e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.718156 sec + 4,868,702,807 cycles:u # 2.829 GHz + 10,081,719,325 instructions:u # 2.07 insn per cycle + 1.721394711 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1324) (512y: 208) (512z: 1980) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 72e4f7ff9f..283f3eb87c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:53:45 +DATE: 2023-10-25_18:33:21 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.139570e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.179113e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.275835e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.507615 sec - 2,255,394,284 cycles # 3.018 GHz - 3,233,142,981 instructions # 1.43 insn per cycle - 0.804645133 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028807e+00 -Avg ME (F77/CUDA) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.504791e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.591296e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.591296e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.290630 sec - 13,015,421,868 cycles # 3.031 GHz - 34,406,707,609 instructions # 2.64 insn per cycle - 4.295722046 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.175373e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.302363e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.302363e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.437181 sec + 11,884,986,629 cycles:u # 3.455 GHz + 34,405,895,161 instructions:u # 2.89 insn per cycle + 3.440255501 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 680) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.162383e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.308946e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.308946e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.418889 sec - 10,608,875,216 cycles # 3.099 GHz - 24,023,081,327 instructions # 2.26 insn per cycle - 3.424022271 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.697270e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.878577e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.878577e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.968633 sec + 10,248,083,842 cycles:u # 3.449 GHz + 24,077,637,055 instructions:u # 2.35 insn per cycle + 2.971828444 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.707866e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.031974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.031974e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.329926 sec - 6,692,684,984 cycles # 2.867 GHz - 12,415,083,748 instructions # 1.86 insn per cycle - 2.335033574 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.113927e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.642581e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.642581e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.843940 sec + 6,167,857,289 cycles:u # 3.340 GHz + 12,457,536,472 instructions:u # 2.02 insn per cycle + 1.847176990 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3156) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.126119e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.508462e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.508462e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.148404 sec - 6,244,000,039 cycles # 2.900 GHz - 11,586,646,765 instructions # 1.86 insn per cycle - 2.153653502 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.562158e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.158272e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.158272e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.727660 sec + 5,775,824,126 cycles:u # 3.338 GHz + 11,598,742,571 instructions:u # 2.01 insn per cycle + 1.730844187 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2692) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.121660e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.365055e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.365055e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.647363 sec - 5,336,886,997 cycles # 2.013 GHz - 9,309,895,095 instructions # 1.74 insn per cycle - 2.652547834 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.715867e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.330159e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.330159e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.692034 sec + 4,795,227,737 cycles:u # 2.830 GHz + 9,318,641,605 instructions:u # 1.94 insn per cycle + 1.695200191 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2116) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index b8d2933568..9ef1271e0d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:54:12 +DATE: 2023-10-25_18:33:35 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.136826e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.174839e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270632e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.507009 sec - 2,214,441,092 cycles # 3.000 GHz - 3,174,554,342 instructions # 1.43 insn per cycle - 0.795728101 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028807e+00 -Avg ME (F77/CUDA) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.647788e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.745237e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.745237e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.064620 sec - 12,384,265,473 cycles # 3.044 GHz - 35,059,405,316 instructions # 2.83 insn per cycle - 4.069669316 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.388519e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.532825e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.532825e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.230821 sec + 11,161,749,522 cycles:u # 3.452 GHz + 35,109,807,854 instructions:u # 3.15 insn per cycle + 3.234089061 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 456) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.138033e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.282936e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.282936e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.444702 sec - 10,688,625,544 cycles # 3.099 GHz - 23,099,820,217 instructions # 2.16 insn per cycle - 3.449755846 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.637200e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.812939e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.812939e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.016744 sec + 10,412,913,669 cycles:u # 3.449 GHz + 23,154,129,157 instructions:u # 2.22 insn per cycle + 3.019990954 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.237103e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.643499e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.643499e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.105153 sec - 6,160,181,225 cycles # 2.920 GHz - 11,969,984,936 instructions # 1.94 insn per cycle - 2.110284671 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.548140e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.157289e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.157289e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.730430 sec + 5,783,016,311 cycles:u # 3.337 GHz + 12,013,376,708 instructions:u # 2.08 insn per cycle + 1.733620799 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2511) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.378177e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.801357e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.801357e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.052827 sec - 6,017,899,320 cycles # 2.925 GHz - 11,142,057,093 instructions # 1.85 insn per cycle - 2.058039153 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.901259e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.569168e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.569168e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.649565 sec + 5,509,653,613 cycles:u # 3.334 GHz + 11,153,994,408 instructions:u # 2.02 insn per cycle + 1.652856459 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 174) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.233582e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.488303e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.488303e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.581242 sec - 5,224,244,555 cycles # 2.021 GHz - 9,033,433,625 instructions # 1.73 insn per cycle - 2.586440370 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1567) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.946866e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.604676e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.604676e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.639153 sec + 4,647,988,628 cycles:u # 2.831 GHz + 9,041,068,616 instructions:u # 1.95 insn per cycle + 1.642368229 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1566) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 24b477c6c2..afff76e16b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:35:25 +DATE: 2023-10-25_18:19:59 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.085170e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.712610e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.977210e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.469467 sec - 2,068,301,293 cycles # 3.004 GHz - 3,012,364,622 instructions # 1.46 insn per cycle - 0.747476379 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.956122e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.010570e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.010570e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.445413 sec - 16,510,915,611 cycles # 3.030 GHz - 45,308,404,518 instructions # 2.74 insn per cycle - 5.450456954 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.690029e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.768739e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.768739e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 4.018311 sec + 13,934,247,146 cycles:u # 3.466 GHz + 45,275,831,077 instructions:u # 3.25 insn per cycle + 4.021247677 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.773825e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.129881e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.129881e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.278501 sec - 7,055,633,229 cycles # 3.091 GHz - 17,671,724,757 instructions # 2.50 insn per cycle - 2.283347357 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.035380e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.463877e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.463877e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.854868 sec + 6,394,320,124 cycles:u # 3.443 GHz + 17,637,598,795 instructions:u # 2.76 insn per cycle + 1.857812507 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.823101e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.001328e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.001328e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.269969 sec - 3,728,138,097 cycles # 2.926 GHz - 8,250,735,018 instructions # 2.21 insn per cycle - 1.274926428 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.164960e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322020e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322020e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.012580 sec + 3,376,070,689 cycles:u # 3.326 GHz + 8,192,979,761 instructions:u # 2.43 insn per cycle + 1.015651914 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.356043e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.069996e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.069996e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.201889 sec - 3,525,312,934 cycles # 2.923 GHz - 7,861,079,341 instructions # 2.23 insn per cycle - 1.206782783 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.224555e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.400507e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.400507e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 0.970770 sec + 3,236,501,289 cycles:u # 3.325 GHz + 7,771,054,439 instructions:u # 2.40 insn per cycle + 0.973791756 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.081368e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.847086e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.847086e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.564981 sec - 3,252,144,204 cycles # 2.073 GHz - 6,095,772,749 instructions # 1.87 insn per cycle - 1.569858235 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.192270e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.359162e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.359162e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 0.993442 sec + 2,851,654,073 cycles:u # 2.863 GHz + 5,993,393,284 instructions:u # 2.10 insn per cycle + 0.996475502 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index c228b2c37b..fbd40bbc03 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -36,60 +36,31 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:03:47 +DATE: 2023-10-25_18:43:11 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.513457e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.340252e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.340252e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.650789 sec - 2,641,546,515 cycles # 3.016 GHz - 4,117,903,371 instructions # 1.56 insn per cycle - 0.935043223 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.972248e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.026239e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.026239e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.440996 sec - 16,702,681,594 cycles # 3.067 GHz - 45,351,045,297 instructions # 2.72 insn per cycle - 5.446683603 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.685476e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.763690e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.763690e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 4.049978 sec + 14,009,689,743 cycles:u # 3.456 GHz + 45,309,289,127 instructions:u # 3.23 insn per cycle + 4.053957938 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -100,23 +71,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.605636e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.935791e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.935791e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.405458 sec - 7,261,686,615 cycles # 3.014 GHz - 17,953,553,750 instructions # 2.47 insn per cycle - 2.411441099 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.034966e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.463217e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.463217e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.892109 sec + 6,484,498,984 cycles:u # 3.422 GHz + 17,904,363,518 instructions:u # 2.76 insn per cycle + 1.895767588 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -127,23 +98,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.560330e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.721128e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.721128e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.356872 sec - 3,928,188,681 cycles # 2.884 GHz - 8,488,830,304 instructions # 2.16 insn per cycle - 1.362856063 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.150200e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.303661e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.303661e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.060115 sec + 3,504,074,762 cycles:u # 3.295 GHz + 8,416,815,935 instructions:u # 2.40 insn per cycle + 1.063970256 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -154,23 +125,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.116761e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.040482e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.040482e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.278534 sec - 3,740,578,395 cycles # 2.919 GHz - 8,100,523,605 instructions # 2.17 insn per cycle - 1.284258782 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.202115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.371685e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.371685e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.023905 sec + 3,381,091,057 cycles:u # 3.291 GHz + 7,994,892,117 instructions:u # 2.36 insn per cycle + 1.027748594 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +152,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.953315e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.671270e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.671270e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.636801 sec - 3,469,634,780 cycles # 2.114 GHz - 6,351,136,410 instructions # 1.83 insn per cycle - 1.642694122 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.170635e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.331138e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.331138e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.047330 sec + 2,998,363,263 cycles:u # 2.854 GHz + 6,234,599,723 instructions:u # 2.08 insn per cycle + 1.051142008 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 5dc74dfed7..20f261936e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:16:08 +DATE: 2023-10-25_18:47:47 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.063632e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.693440e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.968571e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.553622 sec - 2,317,649,288 cycles # 3.011 GHz - 3,439,584,300 instructions # 1.48 insn per cycle - 0.828678237 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.989342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.044248e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.044248e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.690186e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.768818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.768818e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 5.407392 sec - 16,682,462,365 cycles # 3.083 GHz - 45,337,082,640 instructions # 2.72 insn per cycle - 5.412277054 seconds time elapsed +TOTAL : 4.017811 sec + 13,932,869,761 cycles:u # 3.466 GHz + 45,275,831,531 instructions:u # 3.25 insn per cycle + 4.020754686 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.799956e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.158589e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.158589e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.076713e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.512062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.512062e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.320759 sec - 7,223,788,046 cycles # 3.108 GHz - 17,685,035,831 instructions # 2.45 insn per cycle - 2.325560432 seconds time elapsed +TOTAL : 1.844577 sec + 6,351,344,975 cycles:u # 3.438 GHz + 17,637,598,874 instructions:u # 2.78 insn per cycle + 1.847667927 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.822474e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.004978e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.004978e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.167837e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.326502e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326502e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.324170 sec - 3,914,841,287 cycles # 2.948 GHz - 8,235,477,108 instructions # 2.10 insn per cycle - 1.328966517 seconds time elapsed +TOTAL : 1.011648 sec + 3,368,215,541 cycles:u # 3.324 GHz + 8,192,980,425 instructions:u # 2.43 insn per cycle + 1.014577547 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.391242e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.078618e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.078618e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.225145e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.401523e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.401523e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.253021 sec - 3,707,252,167 cycles # 2.949 GHz - 7,811,825,096 instructions # 2.11 insn per cycle - 1.257855017 seconds time elapsed +TOTAL : 0.971297 sec + 3,236,208,077 cycles:u # 3.323 GHz + 7,771,055,176 instructions:u # 2.40 insn per cycle + 0.974309255 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.106245e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.850378e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.850378e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.195467e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.362661e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.362661e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.611554 sec - 3,420,500,042 cycles # 2.117 GHz - 6,046,541,541 instructions # 1.77 insn per cycle - 1.616370057 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) +TOTAL : 0.991718 sec + 2,846,173,659 cycles:u # 2.862 GHz + 5,993,394,886 instructions:u # 2.11 insn per cycle + 0.994890275 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index 7b90f03855..1e46c6ef40 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -36,51 +36,21 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:13:04 +DATE: 2023-10-25_18:46:49 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.065322e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.700283e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.974533e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.498915 sec - 2,192,440,414 cycles # 3.004 GHz - 3,416,060,899 instructions # 1.56 insn per cycle - 0.787716102 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.966466e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.020648e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.020648e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.415648 sec - 16,529,341,853 cycles # 3.050 GHz - 45,309,866,535 instructions # 2.74 insn per cycle - 5.420402206 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe: Aborted + 2,256,667 cycles:u # 1.457 GHz + 3,112,424 instructions:u # 1.38 insn per cycle + 0.113157713 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +60,14 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.693978e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.035776e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.035776e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.316382 sec - 7,054,105,370 cycles # 3.040 GHz - 17,671,721,016 instructions # 2.51 insn per cycle - 2.321167806 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe: Aborted + 2,089,876 cycles:u # 1.363 GHz + 3,116,091 instructions:u # 1.49 insn per cycle + 0.110633377 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +77,14 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.830241e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.005280e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005280e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.269396 sec - 3,738,194,435 cycles # 2.936 GHz - 8,251,074,147 instructions # 2.21 insn per cycle - 1.274249735 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe: Aborted + 1,922,821 cycles:u # 1.310 GHz + 3,116,914 instructions:u # 1.62 insn per cycle + 0.115774978 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +94,14 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.392639e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.077705e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.077705e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.198333 sec - 3,534,628,897 cycles # 2.940 GHz - 7,862,127,936 instructions # 2.22 insn per cycle - 1.203142647 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe: Aborted + 2,134,397 cycles:u # 1.376 GHz + 3,116,527 instructions:u # 1.46 insn per cycle + 0.119358887 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +111,14 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.097193e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.836994e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.836994e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.558825 sec - 3,254,164,123 cycles # 2.082 GHz - 6,095,387,295 instructions # 1.87 insn per cycle - 1.563579525 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe: Aborted + 1,911,214 cycles:u # 1.359 GHz + 3,116,839 instructions:u # 1.63 insn per cycle + 0.119822951 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index fdd315eb16..51cad3bfd5 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -36,53 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:10:03 +DATE: 2023-10-25_18:45:55 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.170150e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.660447e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.933538e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.602645 sec - 2,479,148,123 cycles # 2.998 GHz - 3,864,785,421 instructions # 1.56 insn per cycle - 0.885563297 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.957679e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.011741e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.011741e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.439322 sec - 16,519,636,229 cycles # 3.035 GHz - 45,307,914,586 instructions # 2.74 insn per cycle - 5.444150127 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.690132e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.768774e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.768774e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 4.017782 sec + 13,937,233,338 cycles:u # 3.467 GHz + 45,275,831,337 instructions:u # 3.25 insn per cycle + 4.020843807 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -92,23 +69,23 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.773026e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.136685e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.136685e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.280272 sec - 7,068,660,475 cycles # 3.094 GHz - 17,671,452,966 instructions # 2.50 insn per cycle - 2.285202578 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.090999e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.526280e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.526280e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.839059 sec + 6,339,758,318 cycles:u # 3.443 GHz + 17,637,599,906 instructions:u # 2.78 insn per cycle + 1.842053865 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -118,23 +95,23 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.805997e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.992625e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.992625e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.272643 sec - 3,731,426,488 cycles # 2.926 GHz - 8,249,195,685 instructions # 2.21 insn per cycle - 1.277241448 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.166572e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.328414e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.328414e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.011768 sec + 3,373,820,227 cycles:u # 3.326 GHz + 8,192,980,180 instructions:u # 2.43 insn per cycle + 1.014803347 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +121,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.399444e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.076890e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.076890e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.197731 sec - 3,530,731,182 cycles # 2.938 GHz - 7,860,812,005 instructions # 2.23 insn per cycle - 1.202556944 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.227633e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.404021e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.404021e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 0.969289 sec + 3,230,822,555 cycles:u # 3.324 GHz + 7,771,053,918 instructions:u # 2.41 insn per cycle + 0.972355237 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -170,23 +147,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.091593e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.835046e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.835046e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.560736 sec - 3,257,981,590 cycles # 2.083 GHz - 6,095,878,647 instructions # 1.87 insn per cycle - 1.565536774 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.189028e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.358334e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.358334e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 0.996460 sec + 2,860,549,247 cycles:u # 2.863 GHz + 5,993,395,533 instructions:u # 2.10 insn per cycle + 0.999433875 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 663a41142c..d0560c7753 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:35:49 +DATE: 2023-10-25_18:20:10 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.096390e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.766631e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.047368e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.467013 sec - 2,056,734,142 cycles # 2.997 GHz - 2,992,962,147 instructions # 1.46 insn per cycle - 0.744434014 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.031007e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.087778e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.087778e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.245108 sec - 16,244,805,475 cycles # 3.095 GHz - 44,484,348,190 instructions # 2.74 insn per cycle - 5.249986656 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.744316e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.826216e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.826216e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.941233 sec + 13,671,955,127 cycles:u # 3.467 GHz + 44,452,988,618 instructions:u # 3.25 insn per cycle + 3.944344997 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 576) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.358927e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.815806e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.815806e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.039403 sec - 6,083,169,654 cycles # 2.982 GHz - 16,972,342,736 instructions # 2.79 insn per cycle - 2.044363213 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.366823e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.013794e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.013794e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.540894 sec + 5,300,179,865 cycles:u # 3.434 GHz + 16,936,396,176 instructions:u # 3.20 insn per cycle + 1.543868299 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2881) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.400912e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.009313e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.009313e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.720669 sec - 5,008,260,515 cycles # 2.904 GHz - 10,214,809,232 instructions # 2.04 insn per cycle - 1.725527481 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.737785e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.599720e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.599720e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.312380 sec + 4,392,970,186 cycles:u # 3.341 GHz + 10,155,452,879 instructions:u # 2.31 insn per cycle + 1.315393351 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.537295e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.168989e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.168989e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.684974 sec - 4,937,248,513 cycles # 2.923 GHz - 9,938,060,774 instructions # 2.01 insn per cycle - 1.689989340 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.830088e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.708995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.708995e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.302046 sec + 4,359,069,122 cycles:u # 3.341 GHz + 9,845,204,780 instructions:u # 2.26 insn per cycle + 1.305123490 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3789) (512y: 2) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.077645e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.456866e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.456866e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.147132 sec - 4,363,010,014 cycles # 2.028 GHz - 8,442,845,303 instructions # 1.94 insn per cycle - 2.152072523 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.229108e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.998758e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.998758e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.388608 sec + 3,950,373,718 cycles:u # 2.839 GHz + 8,348,834,891 instructions:u # 2.11 insn per cycle + 1.391676114 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2882) (512y: 4) (512z: 2751) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index d0aa02b37a..acbb85ecfe 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:54:38 +DATE: 2023-10-25_18:33:49 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.072434e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.686668e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.952500e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.467510 sec - 2,069,160,049 cycles # 3.005 GHz - 2,965,842,897 instructions # 1.43 insn per cycle - 0.745986723 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.486168e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.574733e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.574733e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.300765 sec - 12,613,573,541 cycles # 2.930 GHz - 34,394,223,521 instructions # 2.73 insn per cycle - 4.305708849 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.320694e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.441368e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.441368e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.278201 sec + 11,344,861,246 cycles:u # 3.458 GHz + 34,349,491,078 instructions:u # 3.03 insn per cycle + 3.281385138 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.423404e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.902037e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.902037e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.018730 sec - 6,098,231,874 cycles # 3.015 GHz - 14,875,099,697 instructions # 2.44 insn per cycle - 2.023701584 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.028214e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.614990e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.614990e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.610057 sec + 5,538,663,516 cycles:u # 3.434 GHz + 14,842,604,118 instructions:u # 2.68 insn per cycle + 1.613149254 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 1.8746278463897685e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.288210e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.081005e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.081005e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.521473 sec - 4,271,996,539 cycles # 2.800 GHz - 9,042,309,170 instructions # 2.12 insn per cycle - 1.526427437 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.416844e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.042848e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.042848e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.226475 sec + 4,099,723,599 cycles:u # 3.335 GHz + 8,983,212,555 instructions:u # 2.19 insn per cycle + 1.229512528 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4445) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.762492e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.667202e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.667202e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.433726 sec - 4,213,011,776 cycles # 2.930 GHz - 8,676,320,241 instructions # 2.06 insn per cycle - 1.438715591 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.664502e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.072237e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.072237e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.202455 sec + 4,012,953,236 cycles:u # 3.334 GHz + 8,585,269,980 instructions:u # 2.14 insn per cycle + 1.205618514 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.878645e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.382089e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.382089e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.866240 sec - 3,836,736,420 cycles # 2.052 GHz - 7,820,066,058 instructions # 2.04 insn per cycle - 1.871114134 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.197420e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.016756e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.016756e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.254725 sec + 3,577,442,121 cycles:u # 2.845 GHz + 7,727,026,122 instructions:u # 2.16 insn per cycle + 1.257759402 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 0) (512z: 2556) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index de54279b1b..ca3ab1b14a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:55:01 +DATE: 2023-10-25_18:34:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.082452e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.759103e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.038632e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.468671 sec - 2,051,049,137 cycles # 2.975 GHz - 2,933,032,180 instructions # 1.43 insn per cycle - 0.745977930 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028811e+00 -Avg ME (F77/CUDA) = 2.0288499749731272 -Relative difference = 1.9210746159747678e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.728308e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.832886e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.832886e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 3.926359 sec - 11,754,711,596 cycles # 2.991 GHz - 35,130,335,361 instructions # 2.99 insn per cycle - 3.931207276 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.559167e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.698111e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.698111e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.066504 sec + 10,619,079,620 cycles:u # 3.460 GHz + 35,086,958,769 instructions:u # 3.30 insn per cycle + 3.069755465 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 471) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.711585e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.225013e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.225013e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.918624 sec - 5,946,615,728 cycles # 3.093 GHz - 14,483,958,293 instructions # 2.44 insn per cycle - 1.923457186 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.310334e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.946615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.946615e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 1.551856 sec + 5,338,107,838 cycles:u # 3.434 GHz + 14,452,486,870 instructions:u # 2.71 insn per cycle + 1.554878279 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.855810e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.786782e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.786782e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.419134 sec - 4,148,821,411 cycles # 2.915 GHz - 8,888,021,481 instructions # 2.14 insn per cycle - 1.424042048 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3576) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.030423e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.152738e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152738e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.130833 sec + 3,775,658,954 cycles:u # 3.331 GHz + 8,802,115,602 instructions:u # 2.33 insn per cycle + 1.133928357 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3563) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.911621e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.860580e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.860580e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.406850 sec - 4,137,327,451 cycles # 2.932 GHz - 8,424,234,551 instructions # 2.04 insn per cycle - 1.411791633 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.649559e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.070576e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.070576e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.201389 sec + 4,017,585,046 cycles:u # 3.337 GHz + 8,333,398,185 instructions:u # 2.07 insn per cycle + 1.204453170 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3320) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.947023e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.462476e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.462476e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.845349 sec - 3,784,294,920 cycles # 2.046 GHz - 7,713,085,184 instructions # 2.04 insn per cycle - 1.850240418 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.359258e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.036707e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.036707e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.235370 sec + 3,525,732,454 cycles:u # 2.848 GHz + 7,620,121,471 instructions:u # 2.16 insn per cycle + 1.238483305 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3436) (512y: 0) (512z: 2108) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index e5b5571dad..0a44353603 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:36:14 +DATE: 2023-10-25_18:20:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.194168e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.177440e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.271526e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.510074 sec - 2,249,628,773 cycles # 3.005 GHz - 3,213,054,699 instructions # 1.43 insn per cycle - 0.807578099 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028807e+00 -Avg ME (F77/CUDA) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.906485e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.954717e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.954717e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.603351 sec - 17,321,977,891 cycles # 3.090 GHz - 45,555,371,368 instructions # 2.63 insn per cycle - 5.608662187 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.391314e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.462263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.462263e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.523260 sec + 15,663,125,785 cycles:u # 3.462 GHz + 45,621,646,860 instructions:u # 2.91 insn per cycle + 4.526424672 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.376355e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.544078e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.544078e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.208637 sec - 9,923,474,484 cycles # 3.089 GHz - 27,529,097,588 instructions # 2.77 insn per cycle - 3.213810203 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.317442e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.568260e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.568260e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.561526 sec + 8,828,231,399 cycles:u # 3.443 GHz + 27,584,476,226 instructions:u # 3.12 insn per cycle + 2.564640940 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.199043e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.595306e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.595306e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.118667 sec - 5,989,500,217 cycles # 2.821 GHz - 12,420,938,473 instructions # 2.07 insn per cycle - 2.123732572 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.146962e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.885928e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.885928e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.595253 sec + 5,326,841,835 cycles:u # 3.334 GHz + 12,464,651,168 instructions:u # 2.34 insn per cycle + 1.598410034 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2753) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.938752e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.452534e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.452534e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.867638 sec - 5,462,355,916 cycles # 2.919 GHz - 11,803,822,809 instructions # 2.16 insn per cycle - 1.872842798 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.565952e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.372890e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.372890e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.515630 sec + 5,057,629,411 cycles:u # 3.331 GHz + 11,816,622,903 instructions:u # 2.34 insn per cycle + 1.518780363 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2503) (512y: 146) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.872672e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.090666e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.090666e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.812736 sec - 5,598,441,704 cycles # 1.988 GHz - 8,083,507,451 instructions # 1.44 insn per cycle - 2.817822099 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1660) (512y: 126) (512z: 1854) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.738673e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.362118e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.362118e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.686665 sec + 4,783,728,106 cycles:u # 2.832 GHz + 8,082,127,517 instructions:u # 1.69 insn per cycle + 1.689804959 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1655) (512y: 126) (512z: 1854) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index be7fa646e4..c8b63d4082 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -36,51 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:36:41 +DATE: 2023-10-25_18:20:37 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.207178e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.183934e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279144e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.508835 sec - 2,247,853,776 cycles # 3.008 GHz - 3,199,999,470 instructions # 1.42 insn per cycle - 0.806333964 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 2.028807e+00 -Avg ME (F77/CUDA) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.962576e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.013722e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.013722e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.445467 sec - 16,864,922,350 cycles # 3.095 GHz - 44,544,928,625 instructions # 2.64 insn per cycle - 5.450679101 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.452190e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.528965e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.528965e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.412606 sec + 15,282,987,184 cycles:u # 3.461 GHz + 44,614,253,384 instructions:u # 2.92 insn per cycle + 4.415862858 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -90,23 +69,23 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.463332e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.638084e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.638084e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.129878 sec - 9,472,664,981 cycles # 3.022 GHz - 26,172,690,479 instructions # 2.76 insn per cycle - 3.134859663 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.463995e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.731689e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.731689e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.481891 sec + 8,549,716,561 cycles:u # 3.441 GHz + 26,228,951,283 instructions:u # 3.07 insn per cycle + 2.485008452 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2397) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -116,23 +95,23 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.769147e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.097427e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.097427e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.300686 sec - 6,708,376,683 cycles # 2.910 GHz - 13,967,973,168 instructions # 2.08 insn per cycle - 2.306085049 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2875) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.358020e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.928350e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.928350e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.778069 sec + 5,943,404,216 cycles:u # 3.337 GHz + 14,010,901,248 instructions:u # 2.36 insn per cycle + 1.781311393 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2876) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -142,23 +121,23 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.897600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.248967e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.248967e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.245188 sec - 6,373,380,149 cycles # 2.833 GHz - 13,408,335,115 instructions # 2.10 insn per cycle - 2.250462198 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2517) (512y: 302) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.603546e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.207873e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.207873e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.717139 sec + 5,740,584,949 cycles:u # 3.338 GHz + 13,416,395,069 instructions:u # 2.34 insn per cycle + 1.720286760 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2516) (512y: 302) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -168,23 +147,23 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.921480e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.136739e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.136739e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.776161 sec - 5,570,521,591 cycles # 2.004 GHz - 9,179,596,120 instructions # 1.65 insn per cycle - 2.781332851 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.005999e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.675205e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.675205e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.626320 sec + 4,611,437,808 cycles:u # 2.830 GHz + 9,189,208,901 instructions:u # 1.99 insn per cycle + 1.629854221 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1439) (512y: 212) (512z: 2053) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index ebc965cc92..7987be9602 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:37:09 +DATE: 2023-10-25_18:20:51 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.017326e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.054319e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.066811e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.454634 sec - 1,998,901,545 cycles # 2.995 GHz - 2,904,650,993 instructions # 1.45 insn per cycle - 0.724253457 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.124031e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.322289e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.333522e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.587604 sec - 2,471,633,267 cycles # 3.015 GHz - 3,730,138,752 instructions # 1.51 insn per cycle - 0.878825359 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.413122e+00 -Avg ME (F77/CUDA) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.582748e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.595260e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.595260e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.365564 sec - 19,741,191,972 cycles # 3.100 GHz - 58,964,992,174 instructions # 2.99 insn per cycle - 6.369485962 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.240138e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.260111e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.260111e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.082762 sec + 17,702,531,037 cycles:u # 3.482 GHz + 58,956,924,456 instructions:u # 3.33 insn per cycle + 5.085034727 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1189) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.832276e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.875225e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.875225e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.412509 sec - 10,571,839,132 cycles # 3.095 GHz - 30,995,598,646 instructions # 2.93 insn per cycle - 3.416791050 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.124708e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.194947e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.194947e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.699702 sec + 9,406,628,400 cycles:u # 3.482 GHz + 30,994,148,991 instructions:u # 3.29 insn per cycle + 2.701936607 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5217) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.671174e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.843577e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.843577e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.715587 sec - 4,991,139,375 cycles # 2.903 GHz - 11,305,706,976 instructions # 2.27 insn per cycle - 1.719836361 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.277320e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.307020e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.307020e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.307271 sec + 4,422,416,567 cycles:u # 3.379 GHz + 11,308,048,756 instructions:u # 2.56 insn per cycle + 1.309311554 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.100643e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.122719e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.122719e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.509863 sec - 4,400,565,724 cycles # 2.908 GHz - 10,484,557,861 instructions # 2.38 insn per cycle - 1.513887056 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.411319e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.447250e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.447250e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.185443 sec + 4,012,065,391 cycles:u # 3.380 GHz + 10,479,719,967 instructions:u # 2.61 insn per cycle + 1.187626216 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4296) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.479826e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.587663e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.587663e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.214647 sec - 4,100,640,054 cycles # 1.849 GHz - 5,907,026,834 instructions # 1.44 insn per cycle - 2.218934371 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1551) (512y: 95) (512z: 3573) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.593142e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.639018e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.639018e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.054999 sec + 2,965,259,928 cycles:u # 2.806 GHz + 5,897,351,428 instructions:u # 1.99 insn per cycle + 1.057037566 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1544) (512y: 95) (512z: 3573) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 84eb682463..f6fba56746 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -36,77 +36,31 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_19:04:11 +DATE: 2023-10-25_18:43:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.737533e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.009392e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.009392e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.482468 sec - 2,092,072,357 cycles # 3.017 GHz - 3,191,388,192 instructions # 1.53 insn per cycle - 0.750549951 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.824576e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.948428e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.948428e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.804158 sec - 3,165,815,286 cycles # 3.022 GHz - 5,087,211,394 instructions # 1.61 insn per cycle - 1.108277579 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.413122e+00 -Avg ME (F77/CUDA) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.552798e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.565584e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.565584e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.447784 sec - 19,812,659,516 cycles # 3.072 GHz - 58,973,017,270 instructions # 2.98 insn per cycle - 6.451999180 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.240242e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.260203e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.260203e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.086432 sec + 17,723,527,014 cycles:u # 3.483 GHz + 58,965,934,898 instructions:u # 3.33 insn per cycle + 5.088899080 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1189) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -117,23 +71,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.843686e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.887129e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.887129e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.410617 sec - 10,606,495,630 cycles # 3.107 GHz - 31,045,364,778 instructions # 2.93 insn per cycle - 3.414818481 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.119391e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.188626e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.188626e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.706995 sec + 9,423,619,443 cycles:u # 3.479 GHz + 31,038,511,200 instructions:u # 3.29 insn per cycle + 2.709153553 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5217) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +98,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.635348e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.810089e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.810089e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.728990 sec - 5,025,463,399 cycles # 2.900 GHz - 11,356,936,586 instructions # 2.26 insn per cycle - 1.733253508 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.274205e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.303591e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.303591e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.314617 sec + 4,445,320,775 cycles:u # 3.377 GHz + 11,353,445,840 instructions:u # 2.55 insn per cycle + 1.316821327 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -171,23 +125,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.091196e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.113656e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.113656e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.530073 sec - 4,437,395,928 cycles # 2.894 GHz - 10,533,774,197 instructions # 2.37 insn per cycle - 1.534310467 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.407034e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.442978e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.442978e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.193915 sec + 4,035,505,893 cycles:u # 3.375 GHz + 10,525,116,592 instructions:u # 2.61 insn per cycle + 1.196189763 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4296) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -198,23 +152,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.778593e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.893791e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.893791e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.137109 sec - 4,135,271,327 cycles # 1.933 GHz - 5,946,987,935 instructions # 1.44 insn per cycle - 2.141484524 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1551) (512y: 95) (512z: 3573) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.589793e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.635308e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.635308e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.061298 sec + 2,984,133,149 cycles:u # 2.807 GHz + 5,931,522,413 instructions:u # 1.99 insn per cycle + 1.063389582 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1544) (512y: 95) (512z: 3573) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 5da42e2dfc..e5a4c81942 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:37:38 +DATE: 2023-10-25_18:21:05 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.980838e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.046634e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059100e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.459445 sec - 2,001,624,062 cycles # 2.971 GHz - 2,897,227,747 instructions # 1.45 insn per cycle - 0.730848040 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.119062e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.315389e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326570e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.584635 sec - 2,502,191,176 cycles # 3.029 GHz - 3,799,894,385 instructions # 1.52 insn per cycle - 0.885312342 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.413122e+00 -Avg ME (F77/CUDA) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.578276e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.590963e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.590963e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.377287 sec - 19,776,032,592 cycles # 3.100 GHz - 59,242,647,666 instructions # 3.00 insn per cycle - 6.381344291 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1315) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.235368e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.255294e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.255294e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.090662 sec + 17,743,101,818 cycles:u # 3.485 GHz + 59,223,602,918 instructions:u # 3.34 insn per cycle + 5.092817303 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1314) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.838897e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.882635e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.882635e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.407218 sec - 10,428,150,513 cycles # 3.058 GHz - 30,703,821,983 instructions # 2.94 insn per cycle - 3.411368559 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.179120e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.249951e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.249951e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.675866 sec + 9,324,042,232 cycles:u # 3.483 GHz + 30,702,394,700 instructions:u # 3.29 insn per cycle + 2.677861609 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5043) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.472201e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.635749e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.635749e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.750914 sec - 5,109,907,048 cycles # 2.913 GHz - 11,785,108,632 instructions # 2.31 insn per cycle - 1.754997634 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.244479e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.272491e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.272491e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.340778 sec + 4,537,512,394 cycles:u # 3.380 GHz + 11,787,142,879 instructions:u # 2.60 insn per cycle + 1.342784392 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4668) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.023891e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.043074e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.043074e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.621556 sec - 4,691,054,117 cycles # 2.887 GHz - 11,032,599,545 instructions # 2.35 insn per cycle - 1.625732931 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.338943e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.371481e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.371481e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.248173 sec + 4,224,381,209 cycles:u # 3.380 GHz + 11,027,417,491 instructions:u # 2.61 insn per cycle + 1.250158596 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4331) (512y: 245) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.596531e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.705229e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.705229e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.180528 sec - 4,124,129,963 cycles # 1.890 GHz - 6,174,744,538 instructions # 1.50 insn per cycle - 2.184771281 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1461) (512y: 139) (512z: 3675) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.589075e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.635288e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.635288e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.057347 sec + 2,971,264,960 cycles:u # 2.806 GHz + 6,175,004,911 instructions:u # 2.08 insn per cycle + 1.059302450 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1458) (512y: 139) (512z: 3673) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 758058b159..6d002fc9e5 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:38:06 +DATE: 2023-10-25_18:21:19 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.611847e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.385260e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.471189e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.437775 sec - 1,949,752,289 cycles # 2.999 GHz - 2,775,143,872 instructions # 1.42 insn per cycle - 0.707448913 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.418789e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.455087e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.521596e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.482861 sec - 2,123,251,884 cycles # 3.013 GHz - 3,090,198,407 instructions # 1.46 insn per cycle - 0.761733855 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.412608e+00 -Avg ME (F77/CUDA) = 1.4132214346515752 -Relative difference = 0.00043425681546129636 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.632015e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.645099e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.645099e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.245504 sec - 19,420,521,245 cycles # 3.108 GHz - 59,463,843,270 instructions # 3.06 insn per cycle - 6.249442801 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.297056e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.314416e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.314416e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724761e+02 +- 2.665338e+02 ) GeV^-2 +TOTAL : 4.993685 sec + 17,409,943,416 cycles:u # 3.485 GHz + 59,434,839,863 instructions:u # 3.41 insn per cycle + 4.995701548 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 961) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.1728426918172542e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.406220e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.547669e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.547669e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.968760 sec - 5,998,257,000 cycles # 3.042 GHz - 16,914,468,455 instructions # 2.82 insn per cycle - 1.972914932 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5858) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.094212e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.113675e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113675e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724754e+02 +- 2.665334e+02 ) GeV^-2 +TOTAL : 1.520331 sec + 5,295,494,774 cycles:u # 3.480 GHz + 16,897,912,284 instructions:u # 3.19 insn per cycle + 1.522266871 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5857) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.859553e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.925073e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.925073e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.900948 sec - 2,632,220,925 cycles # 2.911 GHz - 6,140,096,248 instructions # 2.33 insn per cycle - 0.904996982 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.452720e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.551675e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.551675e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 +TOTAL : 0.692108 sec + 2,341,513,988 cycles:u # 3.376 GHz + 6,121,248,476 instructions:u # 2.61 insn per cycle + 0.694004634 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5019) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.072824e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.155063e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.155063e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.809948 sec - 2,370,894,209 cycles # 2.915 GHz - 5,701,521,318 instructions # 2.40 insn per cycle - 0.814071799 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.710393e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.822562e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.822562e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 +TOTAL : 0.629663 sec + 2,122,516,384 cycles:u # 3.370 GHz + 5,675,155,914 instructions:u # 2.67 insn per cycle + 0.631514063 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4804) (512y: 36) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.607455e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.657302e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.657302e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.040929 sec - 2,057,132,306 cycles # 1.970 GHz - 3,365,579,683 instructions # 1.64 insn per cycle - 1.044863677 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2141) (512y: 39) (512z: 3775) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.268970e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.437003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.437003e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 +TOTAL : 0.526047 sec + 1,487,926,291 cycles:u # 2.820 GHz + 3,336,026,026 instructions:u # 2.24 insn per cycle + 0.527951018 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2130) (512y: 40) (512z: 3776) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 48beeeb5ad..e480dbbed6 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -36,77 +36,31 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_19:04:39 +DATE: 2023-10-25_18:43:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.864071e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.240535e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.240535e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.462582 sec - 1,884,746,111 cycles # 2.811 GHz - 2,786,242,007 instructions # 1.48 insn per cycle - 0.730246160 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.695482e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.755030e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.755030e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.635503 sec - 2,447,308,351 cycles # 2.825 GHz - 3,823,535,894 instructions # 1.56 insn per cycle - 0.923575231 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.412608e+00 -Avg ME (F77/CUDA) = 1.4132214346515752 -Relative difference = 0.00043425681546129636 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.567048e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.579966e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.579966e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.406446 sec - 19,444,692,901 cycles # 3.034 GHz - 59,468,886,107 instructions # 3.06 insn per cycle - 6.410558637 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.292354e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.310416e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.310416e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724761e+02 +- 2.665338e+02 ) GeV^-2 +TOTAL : 5.003443 sec + 17,430,376,903 cycles:u # 3.483 GHz + 59,440,189,671 instructions:u # 3.41 insn per cycle + 5.005751709 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 961) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -117,23 +71,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.536135e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.681573e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.681573e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.943530 sec - 6,018,572,710 cycles # 3.091 GHz - 16,962,561,293 instructions # 2.82 insn per cycle - 1.947552922 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5858) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.086780e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.105961e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.105961e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724754e+02 +- 2.665334e+02 ) GeV^-2 +TOTAL : 1.533556 sec + 5,339,863,398 cycles:u # 3.478 GHz + 16,943,311,888 instructions:u # 3.17 insn per cycle + 1.535527412 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5857) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +98,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.852963e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.918436e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.918436e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.908122 sec - 2,646,262,038 cycles # 2.903 GHz - 6,176,972,836 instructions # 2.33 insn per cycle - 0.912119450 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.453334e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.552921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.552921e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 +TOTAL : 0.694570 sec + 2,347,934,356 cycles:u # 3.372 GHz + 6,155,421,859 instructions:u # 2.62 insn per cycle + 0.696575325 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5019) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -171,23 +125,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.053883e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.135495e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.135495e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.821672 sec - 2,391,067,663 cycles # 2.897 GHz - 5,738,392,055 instructions # 2.40 insn per cycle - 0.825851117 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.704131e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.815547e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.815547e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 +TOTAL : 0.631667 sec + 2,134,776,064 cycles:u # 3.371 GHz + 5,709,329,585 instructions:u # 2.67 insn per cycle + 0.633691443 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4804) (512y: 36) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -198,23 +152,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.562282e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.609907e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.609907e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.075433 sec - 2,080,452,431 cycles # 1.928 GHz - 3,407,597,282 instructions # 1.64 insn per cycle - 1.079584991 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2141) (512y: 39) (512z: 3775) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.268724e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.434771e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.434771e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 +TOTAL : 0.528841 sec + 1,497,365,513 cycles:u # 2.823 GHz + 3,374,800,525 instructions:u # 2.25 insn per cycle + 0.530812857 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2130) (512y: 40) (512z: 3776) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 35d51d9f5b..8118424b1a 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:38:30 +DATE: 2023-10-25_18:21:29 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.558695e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.304995e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.390429e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.441583 sec - 1,913,307,169 cycles # 2.938 GHz - 2,730,410,416 instructions # 1.43 insn per cycle - 0.710289745 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.431804e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.480539e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.548305e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.484144 sec - 2,122,361,683 cycles # 3.000 GHz - 3,092,428,798 instructions # 1.46 insn per cycle - 0.764929414 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.412608e+00 -Avg ME (F77/CUDA) = 1.4132214346515752 -Relative difference = 0.00043425681546129636 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.622945e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.636379e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.636379e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.266974 sec - 19,387,707,588 cycles # 3.092 GHz - 59,211,783,711 instructions # 3.05 insn per cycle - 6.270947254 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.324498e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.342154e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.342154e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724761e+02 +- 2.665338e+02 ) GeV^-2 +TOTAL : 4.952652 sec + 17,264,726,428 cycles:u # 3.485 GHz + 59,182,405,457 instructions:u # 3.43 insn per cycle + 4.954691408 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.1728426918172542e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.919855e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.077833e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.077833e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.856181 sec - 5,736,685,730 cycles # 3.085 GHz - 16,708,949,188 instructions # 2.91 insn per cycle - 1.860305013 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5624) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.160023e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.181788e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724754e+02 +- 2.665334e+02 ) GeV^-2 +TOTAL : 1.435031 sec + 4,998,214,324 cycles:u # 3.479 GHz + 16,692,678,560 instructions:u # 3.34 insn per cycle + 1.436889827 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5623) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.619777e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.669337e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.669337e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.031321 sec - 3,001,059,822 cycles # 2.901 GHz - 6,807,446,499 instructions # 2.27 insn per cycle - 1.035316846 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.156379e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.233113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.233113e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 +TOTAL : 0.783752 sec + 2,651,680,008 cycles:u # 3.377 GHz + 6,788,110,157 instructions:u # 2.56 insn per cycle + 0.785591013 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.758092e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.816843e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.816843e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.951721 sec - 2,767,509,162 cycles # 2.897 GHz - 6,354,591,455 instructions # 2.30 insn per cycle - 0.955744845 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.329332e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.411541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.411541e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 +TOTAL : 0.726461 sec + 2,457,143,560 cycles:u # 3.375 GHz + 6,327,821,288 instructions:u # 2.58 insn per cycle + 0.728338919 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5429) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.458284e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.499118e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.499118e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.146088 sec - 2,235,083,857 cycles # 1.946 GHz - 3,731,059,413 instructions # 1.67 insn per cycle - 1.150018435 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2381) (512y: 29) (512z: 4070) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.935766e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.070635e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.070635e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 +TOTAL : 0.582695 sec + 1,645,265,503 cycles:u # 2.816 GHz + 3,708,730,324 instructions:u # 2.25 insn per cycle + 0.584530143 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2375) (512y: 30) (512z: 4073) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 392905595e..9fbb059e3e 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:38:54 +DATE: 2023-10-25_18:21:40 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.991876e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.048685e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.061466e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.460934 sec - 1,966,721,120 cycles # 2.924 GHz - 2,827,577,653 instructions # 1.44 insn per cycle - 0.730736586 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.122528e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.320226e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.331429e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.588908 sec - 2,472,359,151 cycles # 3.012 GHz - 3,773,127,523 instructions # 1.53 insn per cycle - 0.879878807 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.413122e+00 -Avg ME (F77/CUDA) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.519449e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.531484e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.531484e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.525526 sec - 20,092,492,706 cycles # 3.078 GHz - 60,052,973,297 instructions # 2.99 insn per cycle - 6.529664742 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.209963e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.229586e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.229586e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.130410 sec + 17,885,191,099 cycles:u # 3.485 GHz + 60,043,949,323 instructions:u # 3.36 insn per cycle + 5.132575599 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1224) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.869773e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.913961e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.913961e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.385815 sec - 10,415,517,369 cycles # 3.073 GHz - 30,737,885,914 instructions # 2.95 insn per cycle - 3.390029957 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.173070e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.243675e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.243675e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.678806 sec + 9,333,953,065 cycles:u # 3.482 GHz + 30,738,044,301 instructions:u # 3.29 insn per cycle + 2.680794166 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5353) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.784627e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.958931e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.958931e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.696342 sec - 4,938,080,705 cycles # 2.905 GHz - 11,263,764,405 instructions # 2.28 insn per cycle - 1.700575900 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4683) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.294459e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.325124e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.325124e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.290177 sec + 4,364,783,621 cycles:u # 3.379 GHz + 11,265,938,496 instructions:u # 2.58 insn per cycle + 1.292182431 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4684) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.113465e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.136337e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.136337e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.492907 sec - 4,341,247,814 cycles # 2.902 GHz - 10,434,510,449 instructions # 2.40 insn per cycle - 1.497014311 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4418) (512y: 83) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.431372e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.468767e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.468767e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.169549 sec + 3,950,178,208 cycles:u # 3.373 GHz + 10,430,174,280 instructions:u # 2.64 insn per cycle + 1.171659305 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 83) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.532516e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.636201e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.636201e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.198970 sec - 4,210,314,244 cycles # 1.912 GHz - 6,111,580,609 instructions # 1.45 insn per cycle - 2.203311339 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2065) (512y: 117) (512z: 3649) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.524309e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.566450e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.566450e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.101455 sec + 3,091,606,846 cycles:u # 2.803 GHz + 6,106,393,268 instructions:u # 1.98 insn per cycle + 1.103586338 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 117) (512z: 3648) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 39bb25c947..3b0432fb23 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:39:22 +DATE: 2023-10-25_18:21:54 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.944009e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.041807e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.053824e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.457165 sec - 2,013,112,285 cycles # 3.009 GHz - 2,933,107,931 instructions # 1.46 insn per cycle - 0.726273814 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.111421e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.304827e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.316338e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.584817 sec - 2,503,239,468 cycles # 3.024 GHz - 3,758,528,305 instructions # 1.50 insn per cycle - 0.886403311 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.413122e+00 -Avg ME (F77/CUDA) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.502057e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.514010e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.514010e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.570415 sec - 20,096,701,303 cycles # 3.057 GHz - 60,261,778,784 instructions # 3.00 insn per cycle - 6.574454844 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.191064e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.210461e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.210461e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 5.160089 sec + 17,987,750,192 cycles:u # 3.485 GHz + 60,252,944,387 instructions:u # 3.35 insn per cycle + 5.162228085 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.950569e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.996463e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.996463e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.330994 sec - 10,302,022,615 cycles # 3.090 GHz - 30,444,386,178 instructions # 2.96 insn per cycle - 3.335148743 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5149) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.300357e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.373693e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.373693e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.624914 sec + 9,146,018,588 cycles:u # 3.482 GHz + 30,447,600,201 instructions:u # 3.33 insn per cycle + 2.626891118 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5151) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.413979e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.578528e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.578528e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.762053 sec - 5,116,204,786 cycles # 2.900 GHz - 11,780,626,112 instructions # 2.30 insn per cycle - 1.766305951 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4795) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.254068e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.282604e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.282604e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.330736 sec + 4,503,482,582 cycles:u # 3.380 GHz + 11,782,394,378 instructions:u # 2.62 insn per cycle + 1.332765318 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4797) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.034226e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.053843e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.053843e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.605857 sec - 4,642,992,473 cycles # 2.885 GHz - 10,992,793,436 instructions # 2.37 insn per cycle - 1.609875653 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4423) (512y: 238) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.352471e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.385869e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.385869e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.236434 sec + 4,182,708,332 cycles:u # 3.378 GHz + 10,987,479,789 instructions:u # 2.63 insn per cycle + 1.238494525 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4427) (512y: 236) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.613528e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.720366e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.720366e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.175272 sec - 4,221,455,153 cycles # 1.938 GHz - 6,349,351,796 instructions # 1.50 insn per cycle - 2.179367593 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1959) (512y: 163) (512z: 3727) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.534076e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.577234e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.577234e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.094306 sec + 3,073,764,406 cycles:u # 2.805 GHz + 6,347,125,131 instructions:u # 2.06 insn per cycle + 1.096280225 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1957) (512y: 163) (512z: 3727) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index bff9233075..7bcc439d18 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:39:51 +DATE: 2023-10-25_18:22:08 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.468061e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.492682e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.494686e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.518589 sec - 2,245,805,905 cycles # 3.007 GHz - 3,544,979,174 instructions # 1.58 insn per cycle - 0.807315238 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.126268e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.153472e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.154624e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.016484 sec - 10,017,854,023 cycles # 3.069 GHz - 22,587,762,207 instructions # 2.25 insn per cycle - 3.322845777 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626675e-04 -Avg ME (F77/CUDA) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.955514e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.956404e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.956404e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.396540 sec - 25,870,300,414 cycles # 3.080 GHz - 78,705,757,349 instructions # 3.04 insn per cycle - 8.400556749 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.482693e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.484242e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.484242e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.613714 sec + 23,057,364,787 cycles:u # 3.486 GHz + 78,665,847,357 instructions:u # 3.41 insn per cycle + 6.615785324 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.628098e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.631366e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.631366e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.529617 sec - 13,157,831,550 cycles # 2.903 GHz - 39,316,654,466 instructions # 2.99 insn per cycle - 4.533882139 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.896420e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.902176e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.902176e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.357080 sec + 11,699,523,282 cycles:u # 3.483 GHz + 39,287,043,182 instructions:u # 3.36 insn per cycle + 3.359085316 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.489125e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.506110e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.506110e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.941066 sec - 5,640,899,050 cycles # 2.901 GHz - 13,915,027,017 instructions # 2.47 insn per cycle - 1.945275776 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.154371e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.157326e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.157326e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.427741 sec + 4,829,942,613 cycles:u # 3.379 GHz + 13,894,278,241 instructions:u # 2.88 insn per cycle + 1.429627727 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.632867e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.655023e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.655023e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.711467 sec - 4,969,822,591 cycles # 2.898 GHz - 12,556,829,300 instructions # 2.53 insn per cycle - 1.715640499 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.302434e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.306375e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.306375e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.266463 sec + 4,283,197,115 cycles:u # 3.378 GHz + 12,535,901,357 instructions:u # 2.93 insn per cycle + 1.268383272 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.688750e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.702937e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.702937e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.142319 sec - 4,116,162,262 cycles # 1.918 GHz - 6,441,474,951 instructions # 1.56 insn per cycle - 2.146523645 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.682573e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.689418e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.689418e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.982400 sec + 2,738,419,974 cycles:u # 2.783 GHz + 6,418,503,086 instructions:u # 2.34 insn per cycle + 0.984400733 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 19b0ccbfe1..fca93b3763 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -36,77 +36,31 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:05:36 +DATE: 2023-10-25_18:43:57 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.145401e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.455796e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.455796e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.511465 sec - 2,211,538,389 cycles # 2.999 GHz - 3,499,461,341 instructions # 1.58 insn per cycle - 0.799495725 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.639414e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.104964e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.104964e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.289425 sec - 10,927,544,611 cycles # 3.076 GHz - 23,831,419,819 instructions # 2.18 insn per cycle - 3.609486496 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626675e-04 -Avg ME (F77/CUDA) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.955661e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.956599e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.956599e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.399286 sec - 25,889,610,376 cycles # 3.081 GHz - 78,711,674,763 instructions # 3.04 insn per cycle - 8.403464378 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.482180e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.483695e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.483695e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.617936 sec + 23,065,153,659 cycles:u # 3.484 GHz + 78,667,897,687 instructions:u # 3.41 insn per cycle + 6.620406229 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -117,23 +71,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.684905e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.688434e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.688434e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.463396 sec - 13,180,558,134 cycles # 2.951 GHz - 39,329,251,791 instructions # 2.98 insn per cycle - 4.467689901 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.898091e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.903902e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.903902e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.359416 sec + 11,696,429,718 cycles:u # 3.480 GHz + 39,295,562,023 instructions:u # 3.36 insn per cycle + 3.362004153 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +98,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.312850e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.329128e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.329128e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.985951 sec - 5,671,057,559 cycles # 2.852 GHz - 13,925,731,418 instructions # 2.46 insn per cycle - 1.990267942 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.149196e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.152247e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152247e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.437338 sec + 4,852,315,777 cycles:u # 3.371 GHz + 13,900,258,972 instructions:u # 2.86 insn per cycle + 1.439595273 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -171,23 +125,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.189591e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.210527e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.210527e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.797800 sec - 4,986,486,293 cycles # 2.768 GHz - 12,566,997,052 instructions # 2.52 insn per cycle - 1.802092456 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.301397e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.305354e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.305354e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.270948 sec + 4,288,304,124 cycles:u # 3.369 GHz + 12,541,884,276 instructions:u # 2.92 insn per cycle + 1.273284777 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -198,23 +152,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.650923e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.665003e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.665003e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.156621 sec - 4,130,305,981 cycles # 1.912 GHz - 6,453,079,741 instructions # 1.56 insn per cycle - 2.160974147 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.683733e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.690312e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.690312e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.984644 sec + 2,740,270,423 cycles:u # 2.778 GHz + 6,425,075,350 instructions:u # 2.34 insn per cycle + 0.986900097 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 81203fa77a..ad7708b47f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:16:31 +DATE: 2023-10-25_18:47:58 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.481338e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.505105e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.507123e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.501686 sec - 2,214,909,881 cycles # 3.031 GHz - 3,458,747,276 instructions # 1.56 insn per cycle - 0.800330270 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.151978e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.180537e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.181762e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.117189 sec - 10,377,272,213 cycles # 3.074 GHz - 22,017,651,367 instructions # 2.12 insn per cycle - 3.433688033 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626675e-04 -Avg ME (F77/CUDA) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.951603e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.952527e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.952527e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.480693e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.482227e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.482227e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.414289 sec - 25,891,693,281 cycles # 3.076 GHz - 78,705,382,161 instructions # 3.04 insn per cycle - 8.418214136 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.619024 sec + 23,070,733,554 cycles:u # 3.485 GHz + 78,665,846,816 instructions:u # 3.41 insn per cycle + 6.621165760 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.708393e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.711838e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.711838e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.890759e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.896486e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.896486e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.432833 sec - 13,155,908,783 cycles # 2.966 GHz - 39,315,348,391 instructions # 2.99 insn per cycle - 4.436757068 seconds time elapsed +TOTAL : 3.360853 sec + 11,710,087,927 cycles:u # 3.483 GHz + 39,287,041,932 instructions:u # 3.35 insn per cycle + 3.362859101 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.511504e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.528904e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.528904e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.152802e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155855e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155855e+04 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.937344 sec - 5,646,349,471 cycles # 2.910 GHz - 13,913,307,123 instructions # 2.46 insn per cycle - 1.941259569 seconds time elapsed +TOTAL : 1.429922 sec + 4,836,711,653 cycles:u # 3.379 GHz + 13,894,277,998 instructions:u # 2.87 insn per cycle + 1.431987434 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.572932e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.594635e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.594635e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.303126e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.307117e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.307117e+04 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.724509 sec - 4,978,068,357 cycles # 2.882 GHz - 12,554,500,287 instructions # 2.52 insn per cycle - 1.728498714 seconds time elapsed +TOTAL : 1.265800 sec + 4,280,728,559 cycles:u # 3.378 GHz + 12,535,902,057 instructions:u # 2.93 insn per cycle + 1.267765042 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.677554e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.691380e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.691380e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.685218e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.691889e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.691889e+04 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.147049 sec - 4,126,422,075 cycles # 1.919 GHz - 6,439,114,110 instructions # 1.56 insn per cycle - 2.151134180 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) +TOTAL : 0.980549 sec + 2,733,827,610 cycles:u # 2.783 GHz + 6,418,505,164 instructions:u # 2.35 insn per cycle + 0.982514108 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index 983ed35921..dadc4798bd 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -36,64 +36,21 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:13:27 +DATE: 2023-10-25_18:46:52 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.486758e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.510432e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.512397e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.503841 sec - 2,219,754,850 cycles # 2.994 GHz - 3,493,120,915 instructions # 1.57 insn per cycle - 0.814222419 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.146385e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.174926e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.176115e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.060532 sec - 10,143,930,144 cycles # 3.066 GHz - 23,186,884,860 instructions # 2.29 insn per cycle - 3.364498351 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626675e-04 -Avg ME (F77/CUDA) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.960187e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.961109e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.961109e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.392474 sec - 25,877,587,986 cycles # 3.088 GHz - 78,705,423,071 instructions # 3.04 insn per cycle - 8.396357877 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe: Aborted + 2,347,012 cycles:u # 1.465 GHz + 3,109,741 instructions:u # 1.32 insn per cycle + 0.155906178 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +60,14 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.691020e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.694421e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.694421e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.452252 sec - 13,153,001,214 cycles # 2.952 GHz - 39,316,173,049 instructions # 2.99 insn per cycle - 4.456201629 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe: Aborted + 1,966,509 cycles:u # 1.396 GHz + 3,114,406 instructions:u # 1.58 insn per cycle + 0.114047165 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +77,14 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.443294e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.459986e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.459986e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.951557 sec - 5,638,519,517 cycles # 2.884 GHz - 13,914,420,326 instructions # 2.47 insn per cycle - 1.955513391 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe: Aborted + 1,955,937 cycles:u # 1.373 GHz + 3,115,607 instructions:u # 1.59 insn per cycle + 0.114119263 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +94,14 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.662759e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.685664e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.685664e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.706815 sec - 4,966,762,812 cycles # 2.905 GHz - 12,556,639,833 instructions # 2.53 insn per cycle - 1.710823467 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe: Aborted + 1,937,923 cycles:u # 1.379 GHz + 3,115,506 instructions:u # 1.61 insn per cycle + 0.126877087 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +111,14 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.630990e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.645195e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.645195e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.158956 sec - 4,117,585,001 cycles # 1.904 GHz - 6,441,334,233 instructions # 1.56 insn per cycle - 2.163053685 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe: Aborted + 1,973,927 cycles:u # 1.378 GHz + 3,115,404 instructions:u # 1.58 insn per cycle + 0.115762547 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 089d292aa8..79b738f1a7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -36,67 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:10:27 +DATE: 2023-10-25_18:46:06 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.218258e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.503307e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.505360e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.506835 sec - 2,221,452,108 cycles # 3.022 GHz - 3,523,570,836 instructions # 1.59 insn per cycle - 0.796752404 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.728232e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.175248e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.176437e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.202196 sec - 10,370,823,383 cycles # 3.013 GHz - 22,699,363,327 instructions # 2.19 insn per cycle - 3.506471268 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626675e-04 -Avg ME (F77/CUDA) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.964142e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.965067e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.965067e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.359175 sec - 25,872,166,576 cycles # 3.094 GHz - 78,706,432,099 instructions # 3.04 insn per cycle - 8.363176184 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.481274e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.482802e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.482802e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.617512 sec + 23,072,069,799 cycles:u # 3.486 GHz + 78,665,849,532 instructions:u # 3.41 insn per cycle + 6.619627854 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -106,23 +69,23 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.675901e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.679176e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.679176e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.470613 sec - 13,168,571,852 cycles # 2.943 GHz - 39,316,143,486 instructions # 2.99 insn per cycle - 4.474685106 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.900904e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.906480e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.906480e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.353509 sec + 11,687,455,700 cycles:u # 3.484 GHz + 39,287,041,736 instructions:u # 3.36 insn per cycle + 3.355437633 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -132,23 +95,23 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.475559e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.492856e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.492856e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.944338 sec - 5,656,434,910 cycles # 2.905 GHz - 13,914,488,872 instructions # 2.46 insn per cycle - 1.948357306 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.154470e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.157552e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.157552e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.427999 sec + 4,829,254,268 cycles:u # 3.378 GHz + 13,894,278,299 instructions:u # 2.88 insn per cycle + 1.430053175 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -158,23 +121,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.315923e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.336840e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.336840e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.769971 sec - 4,966,635,750 cycles # 2.801 GHz - 12,556,400,439 instructions # 2.53 insn per cycle - 1.774243834 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.302434e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.306408e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.306408e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.266829 sec + 4,282,871,958 cycles:u # 3.376 GHz + 12,535,901,370 instructions:u # 2.93 insn per cycle + 1.268787686 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -184,23 +147,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.505641e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.519600e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.519600e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.194843 sec - 4,122,096,598 cycles # 1.876 GHz - 6,442,654,429 instructions # 1.56 insn per cycle - 2.198924835 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.685109e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.691611e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.691611e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.980976 sec + 2,733,292,461 cycles:u # 2.783 GHz + 6,418,502,985 instructions:u # 2.35 insn per cycle + 0.982884138 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index db28556fed..b3631b047d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:40:27 +DATE: 2023-10-25_18:22:25 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.480350e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.503709e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.505689e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.515106 sec - 2,241,115,881 cycles # 3.010 GHz - 3,491,500,030 instructions # 1.56 insn per cycle - 0.803761340 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.140925e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.168327e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.169450e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.010736 sec - 10,017,349,944 cycles # 3.076 GHz - 21,234,719,579 instructions # 2.12 insn per cycle - 3.315687380 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626675e-04 -Avg ME (F77/CUDA) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.950176e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.951182e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.951182e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.419375 sec - 25,820,844,871 cycles # 3.067 GHz - 78,455,782,361 instructions # 3.04 insn per cycle - 8.423501286 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4147) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.489325e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.490886e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.490886e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.597808 sec + 22,996,862,806 cycles:u # 3.485 GHz + 78,410,058,827 instructions:u # 3.41 insn per cycle + 6.599999372 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.695368e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.698841e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.698841e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.446590 sec - 13,096,365,032 cycles # 2.943 GHz - 39,266,931,549 instructions # 3.00 insn per cycle - 4.450776925 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12925) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.869615e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.875394e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.875394e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.375402 sec + 11,763,307,713 cycles:u # 3.483 GHz + 39,236,968,244 instructions:u # 3.34 insn per cycle + 3.377385849 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12921) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.473385e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.490359e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.490359e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.944494 sec - 5,647,694,185 cycles # 2.899 GHz - 14,031,784,985 instructions # 2.48 insn per cycle - 1.948726891 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11428) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.162496e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.165531e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.165531e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.417701 sec + 4,795,627,688 cycles:u # 3.379 GHz + 14,010,552,440 instructions:u # 2.92 insn per cycle + 1.419630360 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11430) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.439321e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.460393e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.460393e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.746193 sec - 5,071,268,913 cycles # 2.898 GHz - 12,684,289,306 instructions # 2.50 insn per cycle - 1.750379728 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10992) (512y: 240) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.285048e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.288823e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.288823e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.283251 sec + 4,340,411,229 cycles:u # 3.378 GHz + 12,663,010,683 instructions:u # 2.92 insn per cycle + 1.285187524 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10994) (512y: 240) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.529292e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.543220e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.543220e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.188060 sec - 4,141,433,761 cycles # 1.890 GHz - 6,563,782,413 instructions # 1.58 insn per cycle - 2.192342750 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1640) (512y: 192) (512z:10068) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.672615e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.679206e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.679206e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.988073 sec + 2,754,136,726 cycles:u # 2.783 GHz + 6,540,580,960 instructions:u # 2.37 insn per cycle + 0.990085917 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1638) (512y: 192) (512z:10078) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 4c6f36c205..815102b1d0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:55:24 +DATE: 2023-10-25_18:34:11 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.222490e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.244878e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.246691e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.528299 sec - 2,270,979,797 cycles # 3.018 GHz - 3,579,093,863 instructions # 1.58 insn per cycle - 0.812313256 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.777355e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.800618e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.801578e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.287863 sec - 10,849,521,679 cycles # 3.070 GHz - 24,134,668,326 instructions # 2.22 insn per cycle - 3.593012410 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626675e-04 -Avg ME (F77/CUDA) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.444628e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.445098e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.445098e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 36.909027 sec - 113,587,132,048 cycles # 3.078 GHz - 144,964,358,008 instructions # 1.28 insn per cycle - 36.912946290 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:21605) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.019079e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.019719e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.019719e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 32.683479 sec + 113,917,594,376 cycles:u # 3.485 GHz + 144,831,023,491 instructions:u # 1.27 insn per cycle + 32.685587809 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:21600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.256823e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.259432e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.259432e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.044758 sec - 14,717,259,007 cycles # 2.916 GHz - 37,577,668,645 instructions # 2.55 insn per cycle - 5.048857745 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.953502e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.957154e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.957154e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 4.155616 sec + 14,485,066,140 cycles:u # 3.484 GHz + 37,546,432,009 instructions:u # 2.59 insn per cycle + 4.157607785 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68118) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.803332e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.817865e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.817865e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.110842 sec - 6,124,055,435 cycles # 2.897 GHz - 13,063,274,169 instructions # 2.13 insn per cycle - 2.114845473 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.286832e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.306233e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.306233e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.773343 sec + 5,991,122,249 cycles:u # 3.375 GHz + 13,042,658,926 instructions:u # 2.18 insn per cycle + 1.775462817 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.453213e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.474776e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.474776e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.744006 sec - 5,055,001,520 cycles # 2.893 GHz - 11,442,027,490 instructions # 2.26 insn per cycle - 1.747990275 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.128073e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.130991e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.130991e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.461200 sec + 4,938,278,108 cycles:u # 3.376 GHz + 11,421,264,080 instructions:u # 2.31 insn per cycle + 1.463298755 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40433) (512y: 285) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.925155e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.940309e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.940309e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.078899 sec - 3,977,787,711 cycles # 1.911 GHz - 5,943,488,721 instructions # 1.49 insn per cycle - 2.082985085 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.686006e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.692604e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.692604e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.979993 sec + 2,731,684,802 cycles:u # 2.783 GHz + 5,921,426,227 instructions:u # 2.17 insn per cycle + 0.981931900 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39409) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 6ac5000ce8..ae26013984 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:56:31 +DATE: 2023-10-25_18:34:56 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.239290e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.259875e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.261623e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.524624 sec - 2,256,102,466 cycles # 3.003 GHz - 3,565,937,124 instructions # 1.58 insn per cycle - 0.809821745 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.793727e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.817115e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.818113e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.259889 sec - 10,730,925,294 cycles # 3.058 GHz - 24,431,623,702 instructions # 2.28 insn per cycle - 3.565849378 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626675e-04 -Avg ME (F77/CUDA) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.412471e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.412949e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.412949e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.178028 sec - 114,361,849,248 cycles # 3.076 GHz - 145,560,134,005 instructions # 1.27 insn per cycle - 37.182120025 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:22248) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.957816e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.958427e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.958427e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 33.088882 sec + 115,320,361,517 cycles:u # 3.485 GHz + 145,426,453,230 instructions:u # 1.26 insn per cycle + 33.091126344 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:22238) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.195698e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.198225e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.198225e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.140800 sec - 15,162,495,765 cycles # 2.948 GHz - 37,764,610,972 instructions # 2.49 insn per cycle - 5.144813607 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.837495e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.841114e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.841114e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 4.281663 sec + 14,920,588,737 cycles:u # 3.483 GHz + 37,733,048,466 instructions:u # 2.53 insn per cycle + 4.283825491 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68446) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.961138e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.976889e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.976889e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.069144 sec - 6,001,988,087 cycles # 2.896 GHz - 12,897,757,655 instructions # 2.15 insn per cycle - 2.073134028 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.438589e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.458920e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.458920e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.744882 sec + 5,902,070,005 cycles:u # 3.379 GHz + 12,878,882,681 instructions:u # 2.18 insn per cycle + 1.746888270 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45936) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.425265e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.445952e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.445952e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.748961 sec - 5,100,741,339 cycles # 2.911 GHz - 11,448,531,367 instructions # 2.24 insn per cycle - 1.753002861 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.123541e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.126510e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.126510e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.467483 sec + 4,961,726,626 cycles:u # 3.377 GHz + 11,428,219,433 instructions:u # 2.30 insn per cycle + 1.469644474 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40124) (512y: 219) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.951658e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.967421e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.967421e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.071513 sec - 3,955,166,061 cycles # 1.907 GHz - 5,898,178,662 instructions # 1.49 insn per cycle - 2.075594399 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.699696e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.706624e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.706624e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.972545 sec + 2,710,296,061 cycles:u # 2.782 GHz + 5,875,812,314 instructions:u # 2.17 insn per cycle + 0.974620416 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38938) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 7d5250e643..3767dfd6cf 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:41:03 +DATE: 2023-10-25_18:22:41 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.344411e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.393693e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.398639e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.476002 sec - 2,055,020,376 cycles # 2.991 GHz - 3,081,758,808 instructions # 1.50 insn per cycle - 0.745881313 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.554616e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.614004e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.616642e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.707448 sec - 5,919,573,877 cycles # 3.066 GHz - 11,552,481,792 instructions # 1.95 insn per cycle - 1.990304243 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.027055e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.028034e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.028034e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.098867 sec - 25,111,702,681 cycles # 3.100 GHz - 78,142,230,902 instructions # 3.11 insn per cycle - 8.102919065 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.528823e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.530057e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.530057e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.491859 sec + 22,636,199,045 cycles:u # 3.486 GHz + 78,090,213,887 instructions:u # 3.45 insn per cycle + 6.493924913 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.175810e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.188746e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.188746e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.292926 sec - 6,573,476,191 cycles # 2.863 GHz - 20,176,795,660 instructions # 3.07 insn per cycle - 2.297103514 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.007809e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.009854e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.009854e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 1.633870 sec + 5,690,420,377 cycles:u # 3.479 GHz + 20,154,491,016 instructions:u # 3.54 insn per cycle + 1.635895783 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.680111e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.687118e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.687118e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.983720 sec - 2,861,168,699 cycles # 2.899 GHz - 7,112,434,592 instructions # 2.49 insn per cycle - 0.987814280 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.255026e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.265028e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.265028e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.733683 sec + 2,475,329,310 cycles:u # 3.366 GHz + 7,093,765,414 instructions:u # 2.87 insn per cycle + 0.735632372 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.901709e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910562e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910562e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.870092 sec - 2,532,384,607 cycles # 2.899 GHz - 6,407,671,698 instructions # 2.53 insn per cycle - 0.874200480 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.546225e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.559129e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.559129e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.650668 sec + 2,194,357,147 cycles:u # 3.364 GHz + 6,388,434,702 instructions:u # 2.91 insn per cycle + 0.652709293 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.544511e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.550364e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.550364e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.069933 sec - 2,059,770,627 cycles # 1.919 GHz - 3,321,177,538 instructions # 1.61 insn per cycle - 1.074034173 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.421815e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.446257e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.446257e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.485752 sec + 1,353,981,550 cycles:u # 2.779 GHz + 3,300,538,033 instructions:u # 2.44 insn per cycle + 0.487645256 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 866fb524ce..69063309c8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -36,77 +36,31 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:06:12 +DATE: 2023-10-25_18:44:13 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.649753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.350178e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.350178e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.465463 sec - 2,047,479,601 cycles # 2.978 GHz - 3,049,363,259 instructions # 1.49 insn per cycle - 0.744895811 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.287015e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.501118e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.501118e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.864461 sec - 6,383,949,622 cycles # 3.047 GHz - 13,653,993,577 instructions # 2.14 insn per cycle - 2.154815432 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.020017e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.021013e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.021013e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.129725 sec - 25,159,036,941 cycles # 3.094 GHz - 78,146,432,404 instructions # 3.11 insn per cycle - 8.133616973 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.529243e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.530758e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.530758e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.493017 sec + 22,628,505,293 cycles:u # 3.484 GHz + 78,091,639,574 instructions:u # 3.45 insn per cycle + 6.495239879 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -117,23 +71,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.180823e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.193936e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.193936e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.294060 sec - 6,581,200,311 cycles # 2.865 GHz - 20,186,134,505 instructions # 3.07 insn per cycle - 2.298229949 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.008271e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.010280e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.010280e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 1.634947 sec + 5,691,193,232 cycles:u # 3.477 GHz + 20,160,476,618 instructions:u # 3.54 insn per cycle + 1.636942657 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +98,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.675906e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.682979e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.682979e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.989013 sec - 2,874,126,906 cycles # 2.896 GHz - 7,122,171,177 instructions # 2.48 insn per cycle - 0.993096654 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.258479e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.268219e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.268219e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.734050 sec + 2,478,113,785 cycles:u # 3.368 GHz + 7,100,336,667 instructions:u # 2.87 insn per cycle + 0.736072875 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -171,23 +125,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.895474e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.904073e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.904073e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.875592 sec - 2,541,159,330 cycles # 2.891 GHz - 6,417,191,354 instructions # 2.53 insn per cycle - 0.879698021 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.547358e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.560752e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.560752e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.651695 sec + 2,198,993,035 cycles:u # 3.366 GHz + 6,395,008,325 instructions:u # 2.91 insn per cycle + 0.653635062 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -198,23 +152,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.548734e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.554330e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.554330e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.069331 sec - 2,068,634,398 cycles # 1.928 GHz - 3,331,804,154 instructions # 1.61 insn per cycle - 1.073485896 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.407460e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.431684e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.431684e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.489364 sec + 1,363,268,056 cycles:u # 2.776 GHz + 3,307,612,222 instructions:u # 2.43 insn per cycle + 0.491352009 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index b125b710bd..1e1a0613d7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:17:07 +DATE: 2023-10-25_18:48:15 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.332225e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.378563e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.386102e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.457977 sec - 2,007,013,185 cycles # 3.005 GHz - 3,016,655,563 instructions # 1.50 insn per cycle - 0.725068773 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.573671e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.635105e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.637758e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.789355 sec - 6,154,390,786 cycles # 3.059 GHz - 11,779,031,447 instructions # 1.91 insn per cycle - 2.068482222 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.992613e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.993617e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.993617e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.529664e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.530894e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.530894e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.239039 sec - 25,101,211,563 cycles # 3.046 GHz - 78,141,605,294 instructions # 3.11 insn per cycle - 8.242885554 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.489789 sec + 22,628,574,625 cycles:u # 3.486 GHz + 78,090,215,991 instructions:u # 3.45 insn per cycle + 6.491742658 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.360647e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.374286e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.374286e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.007608e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.009697e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.009697e+04 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.237472 sec - 6,571,783,960 cycles # 2.933 GHz - 20,176,847,169 instructions # 3.07 insn per cycle - 2.241174830 seconds time elapsed +TOTAL : 1.634125 sec + 5,692,134,926 cycles:u # 3.480 GHz + 20,154,489,840 instructions:u # 3.54 insn per cycle + 1.635961783 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.681723e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.688538e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.688538e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.255149e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.265738e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.265738e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.984343 sec - 2,863,785,178 cycles # 2.900 GHz - 7,111,595,374 instructions # 2.48 insn per cycle - 0.988141267 seconds time elapsed +TOTAL : 0.733638 sec + 2,479,108,717 cycles:u # 3.372 GHz + 7,093,769,493 instructions:u # 2.86 insn per cycle + 0.735515120 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.906951e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.915838e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.915838e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.547091e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.560890e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.560890e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.868571 sec - 2,534,531,591 cycles # 2.907 GHz - 6,404,093,295 instructions # 2.53 insn per cycle - 0.872424795 seconds time elapsed +TOTAL : 0.650404 sec + 2,196,802,965 cycles:u # 3.369 GHz + 6,388,437,300 instructions:u # 2.91 insn per cycle + 0.652384117 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.558486e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.564692e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.564692e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.409304e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.432131e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.432131e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.060964 sec - 2,062,134,932 cycles # 1.938 GHz - 3,317,722,223 instructions # 1.61 insn per cycle - 1.064830329 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) +TOTAL : 0.487306 sec + 1,358,849,860 cycles:u # 2.780 GHz + 3,300,537,525 instructions:u # 2.43 insn per cycle + 0.489109845 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 0197c733f9..caa1d052ce 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -36,64 +36,21 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:14:03 +DATE: 2023-10-25_18:46:55 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.335935e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.379594e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.384424e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.458171 sec - 1,997,983,261 cycles # 2.988 GHz - 3,059,794,719 instructions # 1.53 insn per cycle - 0.725811269 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.578157e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.639706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.642441e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.740586 sec - 5,998,214,102 cycles # 3.054 GHz - 12,259,549,434 instructions # 2.04 insn per cycle - 2.021545903 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001178e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002137e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.002137e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.203084 sec - 25,141,172,926 cycles # 3.064 GHz - 78,142,442,354 instructions # 3.11 insn per cycle - 8.206877433 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe: Aborted + 2,218,459 cycles:u # 1.490 GHz + 3,114,636 instructions:u # 1.40 insn per cycle + 0.112107092 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +60,14 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.194824e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.207542e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.207542e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.287147 sec - 6,571,682,415 cycles # 2.870 GHz - 20,177,851,750 instructions # 3.07 insn per cycle - 2.290998385 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe: Aborted + 2,112,380 cycles:u # 1.362 GHz + 3,112,947 instructions:u # 1.47 insn per cycle + 0.138312157 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +77,14 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.671381e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.678184e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.678184e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.988852 sec - 2,877,121,035 cycles # 2.900 GHz - 7,112,414,105 instructions # 2.47 insn per cycle - 0.992772923 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe: Aborted + 1,939,487 cycles:u # 1.317 GHz + 3,113,829 instructions:u # 1.61 insn per cycle + 0.105725842 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +94,14 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.905382e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.914336e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.914336e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.868290 sec - 2,533,476,931 cycles # 2.907 GHz - 6,407,633,337 instructions # 2.53 insn per cycle - 0.872075865 seconds time elapsed +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe: Aborted + 1,941,821 cycles:u # 1.336 GHz + 3,114,008 instructions:u # 1.60 insn per cycle + 0.110652085 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +111,14 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.547508e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.553260e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.553260e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.067544 sec - 2,060,630,355 cycles # 1.925 GHz - 3,320,987,634 instructions # 1.61 insn per cycle - 1.071418753 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) +/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe: Aborted + 1,910,010 cycles:u # 1.323 GHz + 3,115,075 instructions:u # 1.63 insn per cycle + 0.118536964 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 52987bd60d..2470ae94b5 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -36,67 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:11:03 +DATE: 2023-10-25_18:46:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.805977e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.405107e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.410125e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.460864 sec - 2,006,899,759 cycles # 2.993 GHz - 3,017,362,425 instructions # 1.50 insn per cycle - 0.728187465 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.501502e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.624461e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.627283e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.814750 sec - 6,240,031,804 cycles # 3.062 GHz - 12,062,681,609 instructions # 1.93 insn per cycle - 2.094320858 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001113e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002085e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.002085e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.203609 sec - 25,118,317,039 cycles # 3.061 GHz - 78,142,981,648 instructions # 3.11 insn per cycle - 8.207429402 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.528159e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.529391e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.529391e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.497742 sec + 22,641,679,611 cycles:u # 3.486 GHz + 78,090,217,675 instructions:u # 3.45 insn per cycle + 6.499738583 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -106,23 +69,23 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.323036e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.336228e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.336228e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.246931 sec - 6,579,301,389 cycles # 2.924 GHz - 20,176,586,022 instructions # 3.07 insn per cycle - 2.250705274 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.008068e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.010082e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.010082e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 1.633426 sec + 5,689,992,223 cycles:u # 3.480 GHz + 20,154,489,389 instructions:u # 3.54 insn per cycle + 1.635266445 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -132,23 +95,23 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.674657e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.681528e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.681528e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.986857 sec - 2,862,922,647 cycles # 2.892 GHz - 7,112,389,781 instructions # 2.48 insn per cycle - 0.990752111 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.258512e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.268856e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.268856e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.732443 sec + 2,475,265,704 cycles:u # 3.372 GHz + 7,093,763,921 instructions:u # 2.87 insn per cycle + 0.734306872 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -158,23 +121,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.894478e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.903271e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.903271e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.873227 sec - 2,541,312,245 cycles # 2.899 GHz - 6,407,310,369 instructions # 2.52 insn per cycle - 0.877122463 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.548560e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.561816e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.561816e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.649916 sec + 2,195,752,741 cycles:u # 3.371 GHz + 6,388,434,398 instructions:u # 2.91 insn per cycle + 0.651754209 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -184,23 +147,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.549285e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.555166e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.555166e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.066246 sec - 2,058,021,558 cycles # 1.924 GHz - 3,321,051,164 instructions # 1.61 insn per cycle - 1.070183678 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.424471e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.447344e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.447344e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.485893 sec + 1,353,311,053 cycles:u # 2.778 GHz + 3,300,539,344 instructions:u # 2.44 insn per cycle + 0.487750906 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index e3d102e7b5..4f9674331f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:41:32 +DATE: 2023-10-25_18:22:54 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.347885e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.397852e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.402767e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.473915 sec - 2,057,636,111 cycles # 3.008 GHz - 3,034,789,542 instructions # 1.47 insn per cycle - 0.743124266 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.510288e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.569100e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.571863e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.706491 sec - 5,801,343,951 cycles # 3.003 GHz - 11,478,639,093 instructions # 1.98 insn per cycle - 1.991446406 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262659968156085E-004 -Relative difference = 2.8371612387547027e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.032219e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033197e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.033197e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.077935 sec - 25,002,620,407 cycles # 3.094 GHz - 77,880,023,337 instructions # 3.11 insn per cycle - 8.081833653 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3061) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.550733e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.551990e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.551990e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.436120 sec + 22,440,295,397 cycles:u # 3.486 GHz + 77,833,366,637 instructions:u # 3.47 insn per cycle + 6.438186543 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 5.65798569465384e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.437992e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.452045e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.452045e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.212380 sec - 6,525,641,551 cycles # 2.945 GHz - 20,144,168,186 instructions # 3.09 insn per cycle - 2.216462164 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13439) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.014422e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.016466e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.016466e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 1.623058 sec + 5,653,015,371 cycles:u # 3.480 GHz + 20,121,469,831 instructions:u # 3.56 insn per cycle + 1.624945090 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.1853408865157068e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.631112e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.637585e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.637585e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.012887 sec - 2,950,530,206 cycles # 2.903 GHz - 7,252,358,943 instructions # 2.46 insn per cycle - 1.016940562 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12263) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.183984e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.193251e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.193251e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.757019 sec + 2,558,948,847 cycles:u # 3.374 GHz + 7,233,160,969 instructions:u # 2.83 insn per cycle + 0.758869976 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12273) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.851739e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.860081e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.860081e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.893117 sec - 2,605,469,056 cycles # 2.906 GHz - 6,549,528,920 instructions # 2.51 insn per cycle - 0.897080094 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11948) (512y: 26) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.477725e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.489552e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.489552e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.668014 sec + 2,257,387,875 cycles:u # 3.372 GHz + 6,529,983,861 instructions:u # 2.89 insn per cycle + 0.669967567 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11966) (512y: 26) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.502391e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.508119e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.508119e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.099172 sec - 2,123,741,528 cycles # 1.926 GHz - 3,480,482,498 instructions # 1.64 insn per cycle - 1.103291837 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2903) (512y: 22) (512z:10276) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 3.218040e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.238304e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.238304e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.515820 sec + 1,438,608,312 cycles:u # 2.781 GHz + 3,460,427,031 instructions:u # 2.41 insn per cycle + 0.517668261 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2901) (512y: 23) (512z:10269) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 31738cc5a1..32697efd77 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:57:39 +DATE: 2023-10-25_18:35:42 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.596965e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.633834e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.637880e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.481755 sec - 2,117,394,113 cycles # 2.997 GHz - 3,211,903,752 instructions # 1.52 insn per cycle - 0.763834850 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.702969e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.752015e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.754390e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.845325 sec - 6,364,196,440 cycles # 3.044 GHz - 12,697,584,410 instructions # 2.00 insn per cycle - 2.147798753 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262660579844562E-004 -Relative difference = 2.836238137986709e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.860218e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.861043e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.861043e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.993515 sec - 85,967,133,243 cycles # 3.071 GHz - 135,563,627,438 instructions # 1.58 insn per cycle - 27.997422911 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15486) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.125617e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.126338e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.126338e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.204932e-01 +- 3.252405e-01 ) GeV^-4 +TOTAL : 26.780468 sec + 93,313,216,496 cycles:u # 3.484 GHz + 135,431,060,141 instructions:u # 1.45 insn per cycle + 26.782615953 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15458) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 4.195614963669944e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.196137e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.208997e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.208997e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.287867 sec - 6,773,769,099 cycles # 2.957 GHz - 19,387,600,160 instructions # 2.86 insn per cycle - 2.291838045 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.768700e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.783995e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.783995e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 +TOTAL : 1.876922 sec + 6,538,775,041 cycles:u # 3.481 GHz + 19,364,912,946 instructions:u # 2.96 insn per cycle + 1.878795640 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69680) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 4.0849182767952624e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.513484e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.519092e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.519092e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.091418 sec - 3,173,929,820 cycles # 2.900 GHz - 6,808,660,445 instructions # 2.15 insn per cycle - 1.095213929 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.797914e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.804285e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.804285e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 0.918896 sec + 3,105,696,597 cycles:u # 3.374 GHz + 6,789,508,332 instructions:u # 2.19 insn per cycle + 0.920983211 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.813250e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.821399e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.821399e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.912193 sec - 2,648,785,634 cycles # 2.893 GHz - 5,986,998,268 instructions # 2.26 insn per cycle - 0.916244855 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.177052e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.186751e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.186751e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 0.759834 sec + 2,567,390,833 cycles:u # 3.372 GHz + 5,967,608,527 instructions:u # 2.32 insn per cycle + 0.761735929 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.539260e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.545021e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.545021e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.073101 sec - 2,071,594,759 cycles # 1.925 GHz - 3,501,390,779 instructions # 1.69 insn per cycle - 1.077005935 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.796510e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.813005e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.813005e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211848e-01 +- 3.254639e-01 ) GeV^-4 +TOTAL : 0.592992 sec + 1,653,029,441 cycles:u # 2.780 GHz + 3,481,255,390 instructions:u # 2.11 insn per cycle + 0.594900850 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5198) (512y: 3) (512z:44822) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index cddff811bf..17e4a9c48c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:58:30 +DATE: 2023-10-25_18:36:17 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.566704e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.601747e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.605700e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.479762 sec - 2,129,975,460 cycles # 3.025 GHz - 3,262,724,571 instructions # 1.53 insn per cycle - 0.761351418 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.654164e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.702666e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.704765e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.853079 sec - 6,372,411,967 cycles # 3.059 GHz - 13,261,029,776 instructions # 2.08 insn per cycle - 2.143146478 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626454e-04 -Avg ME (F77/CUDA) = 6.6262660579844562E-004 -Relative difference = 2.836238137986709e-05 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.859455e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.860302e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.860302e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.996994 sec - 86,063,307,798 cycles # 3.074 GHz - 135,905,248,930 instructions # 1.58 insn per cycle - 28.000969025 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15910) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.397979e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.398767e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.398767e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.204932e-01 +- 3.252405e-01 ) GeV^-4 +TOTAL : 25.641745 sec + 89,368,300,325 cycles:u # 3.485 GHz + 135,893,183,046 instructions:u # 1.52 insn per cycle + 25.643755173 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15937) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 4.0361421941458736e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.111595e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.124605e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.124605e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.313738 sec - 6,851,236,852 cycles # 2.957 GHz - 19,439,512,273 instructions # 2.84 insn per cycle - 2.317708744 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.661453e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.676391e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.676391e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 +TOTAL : 1.899849 sec + 6,619,325,215 cycles:u # 3.481 GHz + 19,416,769,897 instructions:u # 2.93 insn per cycle + 1.901796273 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69722) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 4.170542995014107e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.540866e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.546584e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.546584e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.071791 sec - 3,105,453,036 cycles # 2.888 GHz - 6,719,669,630 instructions # 2.16 insn per cycle - 1.075697475 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.844626e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.851170e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.851170e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 0.895271 sec + 3,027,082,745 cycles:u # 3.375 GHz + 6,700,693,510 instructions:u # 2.21 insn per cycle + 0.897161186 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.816139e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.824278e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.824278e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.910723 sec - 2,625,346,963 cycles # 2.872 GHz - 5,970,291,755 instructions # 2.27 insn per cycle - 0.914693861 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.190615e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.199886e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.199886e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 0.754903 sec + 2,551,683,836 cycles:u # 3.373 GHz + 5,950,861,283 instructions:u # 2.33 insn per cycle + 0.756777544 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.541127e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.546712e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.546712e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.071854 sec - 2,074,441,089 cycles # 1.930 GHz - 3,494,899,079 instructions # 1.68 insn per cycle - 1.075757314 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.818886e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.835239e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.835239e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211848e-01 +- 3.254639e-01 ) GeV^-4 +TOTAL : 0.587968 sec + 1,639,593,907 cycles:u # 2.782 GHz + 3,474,694,951 instructions:u # 2.12 insn per cycle + 0.589824720 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4162) (512y: 4) (512z:44465) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 7ad6f63659..5479c4ff17 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:42:00 +DATE: 2023-10-25_18:23:07 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.490294e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.513614e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.515507e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.518671 sec - 2,226,766,382 cycles # 2.986 GHz - 3,540,179,326 instructions # 1.59 insn per cycle - 0.807219432 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.120867e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.148001e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.149117e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.019190 sec - 10,060,652,484 cycles # 3.078 GHz - 22,177,348,496 instructions # 2.20 insn per cycle - 3.325766752 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626675e-04 -Avg ME (F77/CUDA) = 6.6266732376103494E-004 -Relative difference = 2.659538381540814e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.954175e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.955102e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.955102e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.401551 sec - 26,112,965,520 cycles # 3.107 GHz - 79,187,055,919 instructions # 3.03 insn per cycle - 8.405522832 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4746) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.443771e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.445279e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.445279e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.718856 sec + 23,426,099,550 cycles:u # 3.486 GHz + 79,131,145,870 instructions:u # 3.38 insn per cycle + 6.721027556 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4708) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.704056e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.707340e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.707340e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.436595 sec - 12,893,512,565 cycles # 2.905 GHz - 38,578,382,892 instructions # 2.99 insn per cycle - 4.440842197 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.929673e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.935374e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.935374e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.334498 sec + 11,619,248,352 cycles:u # 3.483 GHz + 38,549,380,029 instructions:u # 3.32 insn per cycle + 3.336425977 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13136) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.529594e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.548137e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.548137e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.931885 sec - 5,592,758,933 cycles # 2.891 GHz - 13,704,166,637 instructions # 2.45 insn per cycle - 1.936090809 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11245) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.153054e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.156058e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.156058e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.429560 sec + 4,835,341,307 cycles:u # 3.379 GHz + 13,686,717,164 instructions:u # 2.83 insn per cycle + 1.431504175 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11246) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.692246e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.714062e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.714062e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.701537 sec - 4,935,885,889 cycles # 2.895 GHz - 12,346,516,315 instructions # 2.50 insn per cycle - 1.705790521 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10897) (512y: 79) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.294470e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.298365e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.298365e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.274579 sec + 4,309,533,363 cycles:u # 3.377 GHz + 12,329,522,681 instructions:u # 2.86 insn per cycle + 1.276566120 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10898) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.612609e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.626629e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.626629e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.163571 sec - 4,153,048,865 cycles # 1.917 GHz - 6,440,968,926 instructions # 1.55 insn per cycle - 2.167665946 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1803) (512y: 93) (512z:10092) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.650976e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.657209e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.657209e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.000488 sec + 2,789,706,585 cycles:u # 2.784 GHz + 6,419,813,194 instructions:u # 2.30 insn per cycle + 1.002439628 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1796) (512y: 93) (512z:10086) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index f056a45974..931a425468 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:42:36 +DATE: 2023-10-25_18:23:23 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.481563e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.505145e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.507791e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.521573 sec - 2,153,426,616 cycles # 2.862 GHz - 3,368,297,517 instructions # 1.56 insn per cycle - 0.812765365 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.140316e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.167722e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.168848e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.005546 sec - 10,001,668,357 cycles # 3.073 GHz - 22,545,107,075 instructions # 2.25 insn per cycle - 3.311305896 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 6.626675e-04 -Avg ME (F77/CUDA) = 6.6266732376103494E-004 -Relative difference = 2.659538381540814e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.946429e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.947329e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.947329e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.435831 sec - 26,126,854,101 cycles # 3.097 GHz - 79,204,576,073 instructions # 3.03 insn per cycle - 8.439924281 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4401) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.449739e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.451219e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.451219e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.701998 sec + 23,368,009,528 cycles:u # 3.486 GHz + 79,155,600,974 instructions:u # 3.39 insn per cycle + 6.704105951 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4383) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.694235e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.697554e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.697554e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.448208 sec - 12,894,092,255 cycles # 2.897 GHz - 38,538,252,439 instructions # 2.99 insn per cycle - 4.452458550 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12903) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.803303e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.808772e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.808772e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.423406 sec + 11,924,323,531 cycles:u # 3.483 GHz + 38,507,173,931 instructions:u # 3.23 insn per cycle + 3.425385769 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12902) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.136495e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.152364e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.152364e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.024697 sec - 5,646,666,731 cycles # 2.789 GHz - 13,825,634,230 instructions # 2.45 insn per cycle - 2.029023847 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.145562e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.148491e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.148491e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.438621 sec + 4,866,589,673 cycles:u # 3.379 GHz + 13,805,745,696 instructions:u # 2.84 insn per cycle + 1.440536290 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11349) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.556941e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.579305e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.579305e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.725082 sec - 4,994,411,865 cycles # 2.889 GHz - 12,477,409,386 instructions # 2.50 insn per cycle - 1.729413379 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10888) (512y: 239) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.286025e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.289820e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.289820e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.282326 sec + 4,336,961,185 cycles:u # 3.378 GHz + 12,458,784,902 instructions:u # 2.87 insn per cycle + 1.284257691 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10894) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.605322e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.619068e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.619068e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.165836 sec - 4,156,250,470 cycles # 1.916 GHz - 6,542,526,880 instructions # 1.57 insn per cycle - 2.169952357 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1628) (512y: 191) (512z:10036) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.650021e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.656400e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.656400e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.001452 sec + 2,791,774,858 cycles:u # 2.783 GHz + 6,522,516,847 instructions:u # 2.34 insn per cycle + 1.003472742 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1626) (512y: 191) (512z:10049) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 55b25786bd..ba3d118b39 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:44:51 +DATE: 2023-10-25_18:24:10 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.070749e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.071143e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.071250e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.417941 sec - 8,377,415,337 cycles # 3.062 GHz - 18,838,612,351 instructions # 2.25 insn per cycle - 2.794089225 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.235176e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.237005e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.237223e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.993841 sec - 13,055,750,655 cycles # 3.026 GHz - 31,160,662,070 instructions # 2.39 insn per cycle - 4.373705613 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 9.872263e-03 -Avg ME (F77/CUDA) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.897548e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.897775e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.897775e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.705050 sec - 19,396,394,057 cycles # 2.892 GHz - 54,051,876,234 instructions # 2.79 insn per cycle - 6.708932383 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32354) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.680592e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.680896e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.680896e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.456227 sec + 19,018,581,238 cycles:u # 3.485 GHz + 54,010,805,402 instructions:u # 2.84 insn per cycle + 5.458105500 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32344) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.653870e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.653969e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.653969e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.197917 sec - 9,907,124,994 cycles # 3.095 GHz - 27,081,765,597 instructions # 2.73 insn per cycle - 3.202038670 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.877293e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.877401e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.877401e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.814507 sec + 9,809,689,328 cycles:u # 3.484 GHz + 27,056,297,784 instructions:u # 2.76 insn per cycle + 2.816303349 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96405) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.542258e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.542698e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.542698e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.497561 sec - 4,341,680,359 cycles # 2.893 GHz - 9,666,416,740 instructions # 2.23 insn per cycle - 1.501545706 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.298191e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.298726e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.298726e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.230507 sec + 4,164,918,618 cycles:u # 3.381 GHz + 9,647,913,152 instructions:u # 2.32 insn per cycle + 1.232250727 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84384) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.866674e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.867185e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.867185e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.370361 sec - 3,840,509,187 cycles # 2.796 GHz - 8,617,030,376 instructions # 2.24 insn per cycle - 1.374450501 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.852853e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.853549e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.853549e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.090366 sec + 3,688,799,345 cycles:u # 3.379 GHz + 8,598,576,162 instructions:u # 2.33 insn per cycle + 1.092175101 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84025) (512y: 89) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.733060e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.733603e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.733603e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.420204 sec - 2,707,945,792 cycles # 1.903 GHz - 4,335,943,514 instructions # 1.60 insn per cycle - 1.424239943 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2300) (512y: 103) (512z:83067) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.955570e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.957165e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.957165e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.761329 sec + 2,120,569,640 cycles:u # 2.780 GHz + 4,317,361,789 instructions:u # 2.04 insn per cycle + 0.763089467 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2293) (512y: 103) (512z:83066) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 94a23e2f12..fbe366c789 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -36,77 +36,31 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_19:06:41 +DATE: 2023-10-25_18:44:26 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.066351e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.067339e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.067339e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.354612 sec - 8,162,423,266 cycles # 3.056 GHz - 18,099,441,211 instructions # 2.22 insn per cycle - 2.730850635 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.246162e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.277852e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.277852e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.971347 sec - 13,204,436,201 cycles # 3.078 GHz - 30,282,503,163 instructions # 2.29 insn per cycle - 4.348676377 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 9.872263e-03 -Avg ME (F77/CUDA) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.244919e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.245139e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.245139e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.408828 sec - 19,448,715,885 cycles # 3.033 GHz - 54,050,853,106 instructions # 2.78 insn per cycle - 6.412664226 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32354) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.694870e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.695205e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.695205e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.449031 sec + 18,992,419,830 cycles:u # 3.485 GHz + 54,011,515,215 instructions:u # 2.84 insn per cycle + 5.450993128 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32344) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -117,23 +71,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.650316e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.650406e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.650406e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.204606 sec - 9,890,944,577 cycles # 3.084 GHz - 27,082,213,615 instructions # 2.74 insn per cycle - 3.208413447 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.878478e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.878594e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.878594e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.813007 sec + 9,802,151,523 cycles:u # 3.483 GHz + 27,057,251,738 instructions:u # 2.76 insn per cycle + 2.814919783 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96405) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +98,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.546707e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.547140e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.547140e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.493585 sec - 4,325,320,238 cycles # 2.890 GHz - 9,667,464,688 instructions # 2.24 insn per cycle - 1.497469046 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.308480e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.309039e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.309039e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.227874 sec + 4,154,924,285 cycles:u # 3.380 GHz + 9,648,778,691 instructions:u # 2.32 insn per cycle + 1.229660091 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84384) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -171,23 +125,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.037834e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.038367e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.038367e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.313129 sec - 3,813,971,496 cycles # 2.897 GHz - 8,617,412,652 instructions # 2.26 insn per cycle - 1.316983127 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.858370e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.859116e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.859116e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.089216 sec + 3,684,912,316 cycles:u # 3.379 GHz + 8,599,444,306 instructions:u # 2.33 insn per cycle + 1.090969381 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84025) (512y: 89) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -198,23 +152,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.743342e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.743918e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.743918e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.415154 sec - 2,707,174,442 cycles # 1.909 GHz - 4,336,832,605 instructions # 1.60 insn per cycle - 1.419043941 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2300) (512y: 103) (512z:83067) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.016416e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.018101e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.018101e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.754833 sec + 2,102,419,627 cycles:u # 2.780 GHz + 4,318,249,384 instructions:u # 2.05 insn per cycle + 0.756579131 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2293) (512y: 103) (512z:83066) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index e3241a7638..bac19cdccb 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:45:54 +DATE: 2023-10-25_18:24:33 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.058972e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.059386e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.059482e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.421465 sec - 8,436,981,615 cycles # 3.067 GHz - 19,028,712,837 instructions # 2.26 insn per cycle - 2.810095670 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.255956e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.257794e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.257987e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.982903 sec - 13,219,371,069 cycles # 3.063 GHz - 29,416,443,528 instructions # 2.23 insn per cycle - 4.375059399 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 9.872263e-03 -Avg ME (F77/CUDA) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.015099e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.015315e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.015315e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.593815 sec - 19,095,708,850 cycles # 2.895 GHz - 54,047,292,212 instructions # 2.83 insn per cycle - 6.597605539 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:31965) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.638383e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.638684e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.638684e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.481978 sec + 19,103,066,208 cycles:u # 3.485 GHz + 54,034,034,277 instructions:u # 2.83 insn per cycle + 5.483935387 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32250) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.634946e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.635033e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.635033e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.235048 sec - 10,011,189,889 cycles # 3.092 GHz - 27,077,379,591 instructions # 2.70 insn per cycle - 3.239059369 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96257) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.863455e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.863561e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.863561e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.835216 sec + 9,881,395,758 cycles:u # 3.484 GHz + 27,051,463,465 instructions:u # 2.74 insn per cycle + 2.836984541 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96261) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.540865e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.541284e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.541284e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.495827 sec - 4,323,882,817 cycles # 2.884 GHz - 9,677,765,192 instructions # 2.24 insn per cycle - 1.499825664 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.293792e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.294411e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.294411e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.231602 sec + 4,168,406,135 cycles:u # 3.381 GHz + 9,659,246,967 instructions:u # 2.32 insn per cycle + 1.233398649 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84456) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.986806e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.987336e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.987336e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.330112 sec - 3,818,292,084 cycles # 2.864 GHz - 8,626,392,875 instructions # 2.26 insn per cycle - 1.334108022 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.889186e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.889950e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.889950e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.082064 sec + 3,655,678,046 cycles:u # 3.373 GHz + 8,608,539,465 instructions:u # 2.35 insn per cycle + 1.084116660 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83903) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.736329e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.736878e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.736878e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.419053 sec - 2,712,716,906 cycles # 1.907 GHz - 4,344,880,705 instructions # 1.60 insn per cycle - 1.423142145 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2177) (512y: 185) (512z:83030) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.057749e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.059454e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.059454e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.750584 sec + 2,091,362,984 cycles:u # 2.781 GHz + 4,326,512,544 instructions:u # 2.07 insn per cycle + 0.752511146 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2175) (512y: 185) (512z:83037) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 6327c32a36..b834029070 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -36,90 +36,56 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:46:57 +DATE: 2023-10-25_18:24:56 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.757288e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.758127e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.758502e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.655983 sec - 5,859,016,098 cycles # 3.037 GHz - 12,599,305,189 instructions # 2.15 insn per cycle - 1.988634806 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.346728e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.347386e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.347472e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.915477 sec - 6,698,649,731 cycles # 3.061 GHz - 13,457,604,803 instructions # 2.01 insn per cycle - 2.247936467 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 9.849636e-03 -Avg ME (F77/CUDA) = 9.8712405367667715E-003 -Relative difference = 0.0021934350433631634 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.909786e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.910066e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.910066e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.942364 sec - 18,295,731,836 cycles # 3.078 GHz - 53,640,525,145 instructions # 2.93 insn per cycle - 5.946250751 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20286) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.021950e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.021979e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.021979e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 +TOTAL : 5.168809 sec + 18,013,411,233 cycles:u # 3.484 GHz + 53,603,517,178 instructions:u # 2.98 insn per cycle + 5.170747581 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20320) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087551509E-003 -Relative difference = 2.119780432912131e-08 +Avg ME (F77/C++) = 9.8479612087550399E-003 +Relative difference = 2.119779305548787e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.560034e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.560473e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.560473e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.488735 sec - 4,616,421,294 cycles # 3.094 GHz - 13,762,957,080 instructions # 2.98 insn per cycle - 1.492690614 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.210651e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.211125e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.211125e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 +TOTAL : 1.256048 sec + 4,376,251,084 cycles:u # 3.480 GHz + 13,744,562,962 instructions:u # 3.14 insn per cycle + 1.257791058 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96921) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.154668e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.156604e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.156604e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.743016 sec - 2,158,936,332 cycles # 2.892 GHz - 4,868,873,872 instructions # 2.26 insn per cycle - 0.746953594 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.588687e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.590709e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.590709e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.616847 sec + 2,086,353,348 cycles:u # 3.374 GHz + 4,853,210,335 instructions:u # 2.33 insn per cycle + 0.618687287 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84898) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.993469e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.995570e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.995570e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.665634 sec - 1,930,674,595 cycles # 2.886 GHz - 4,341,032,805 instructions # 2.25 insn per cycle - 0.669411803 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.689277e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.691596e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.691596e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.547165 sec + 1,849,659,683 cycles:u # 3.372 GHz + 4,325,575,589 instructions:u # 2.34 insn per cycle + 0.548927061 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84581) (512y: 44) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.422100e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.424326e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.424326e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.716875 sec - 1,362,810,793 cycles # 1.892 GHz - 2,191,758,925 instructions # 1.61 insn per cycle - 0.720813478 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2896) (512y: 47) (512z:83271) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.415883e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.416425e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.416425e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 +TOTAL : 0.375089 sec + 1,044,605,334 cycles:u # 2.775 GHz + 2,175,642,242 instructions:u # 2.08 insn per cycle + 0.376788208 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2884) (512y: 48) (512z:83271) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 3a8d1c9eac..c3fa78732f 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -36,104 +36,58 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_19:07:44 +DATE: 2023-10-25_18:44:49 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.793927e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.795607e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.795607e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.593125 sec - 5,711,497,040 cycles # 3.065 GHz - 11,071,875,711 instructions # 1.94 insn per cycle - 1.922969062 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.332193e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.344979e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.344979e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.859000 sec - 6,562,180,386 cycles # 3.077 GHz - 14,027,341,556 instructions # 2.14 insn per cycle - 2.188744808 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 9.849636e-03 -Avg ME (F77/CUDA) = 9.8712405367667715E-003 -Relative difference = 0.0021934350433631634 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.934162e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.934430e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.934430e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.914443 sec - 18,289,747,509 cycles # 3.091 GHz - 53,640,880,499 instructions # 2.93 insn per cycle - 5.918348520 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20286) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.020893e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.020922e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.020922e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 +TOTAL : 5.175832 sec + 18,016,139,817 cycles:u # 3.481 GHz + 53,604,226,868 instructions:u # 2.98 insn per cycle + 5.177799324 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20320) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087551509E-003 -Relative difference = 2.119780432912131e-08 +Avg ME (F77/C++) = 9.8479612087550399E-003 +Relative difference = 2.119779305548787e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.554596e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.555024e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.555024e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.490597 sec - 4,617,566,850 cycles # 3.091 GHz - 13,763,927,839 instructions # 2.98 insn per cycle - 1.494414225 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.213112e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.213591e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.213591e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 +TOTAL : 1.255475 sec + 4,373,170,498 cycles:u # 3.479 GHz + 13,745,433,050 instructions:u # 3.14 insn per cycle + 1.257250474 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96921) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +98,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.178518e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.180233e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.180233e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.740375 sec - 2,151,024,422 cycles # 2.893 GHz - 4,869,911,860 instructions # 2.26 insn per cycle - 0.744208061 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.581573e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.583573e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.583573e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.617677 sec + 2,088,828,384 cycles:u # 3.374 GHz + 4,854,095,100 instructions:u # 2.32 insn per cycle + 0.619460453 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84898) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -171,23 +125,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.974786e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.976961e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.976961e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.667174 sec - 1,931,553,706 cycles # 2.882 GHz - 4,342,018,470 instructions # 2.25 insn per cycle - 0.670962833 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.694742e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.697307e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.697307e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.547014 sec + 1,849,515,073 cycles:u # 3.371 GHz + 4,326,457,350 instructions:u # 2.34 insn per cycle + 0.548974684 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84581) (512y: 44) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -198,23 +152,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.427390e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.429642e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.429642e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.716682 sec - 1,362,755,127 cycles # 1.894 GHz - 2,192,432,791 instructions # 1.61 insn per cycle - 0.720592858 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2896) (512y: 47) (512z:83271) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.405098e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.405660e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.405660e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 +TOTAL : 0.378139 sec + 1,052,603,493 cycles:u # 2.773 GHz + 2,176,541,915 instructions:u # 2.07 insn per cycle + 0.379883885 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2884) (512y: 48) (512z:83271) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index ea39ad8994..bff929697f 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -36,90 +36,56 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:47:44 +DATE: 2023-10-25_18:25:13 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.770785e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.771818e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.772102e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.644101 sec - 5,902,851,310 cycles # 3.071 GHz - 12,347,610,066 instructions # 2.09 insn per cycle - 1.979279139 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.344670e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.345330e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.345408e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.896839 sec - 6,669,880,941 cycles # 3.074 GHz - 13,819,367,336 instructions # 2.07 insn per cycle - 2.228526429 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 9.849636e-03 -Avg ME (F77/CUDA) = 9.8712405367667715E-003 -Relative difference = 0.0021934350433631634 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.942946e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.943229e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.943229e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.908590 sec - 18,230,304,521 cycles # 3.084 GHz - 53,620,524,232 instructions # 2.94 insn per cycle - 5.912442239 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20241) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.013491e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.013519e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.013519e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 +TOTAL : 5.211690 sec + 18,167,100,484 cycles:u # 3.485 GHz + 53,621,489,297 instructions:u # 2.95 insn per cycle + 5.213585137 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20477) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087572898E-003 -Relative difference = 2.1198021522715588e-08 +Avg ME (F77/C++) = 9.8479612087571129E-003 +Relative difference = 2.119800355536229e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.576079e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.576533e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.576533e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.481262 sec - 4,588,697,172 cycles # 3.091 GHz - 13,755,977,699 instructions # 3.00 insn per cycle - 1.485242600 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.211809e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.212289e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.212289e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 +TOTAL : 1.255504 sec + 4,374,748,922 cycles:u # 3.481 GHz + 13,737,496,340 instructions:u # 3.14 insn per cycle + 1.257243369 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96593) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 3.151856596628469e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.020825e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.022661e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.022661e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.756593 sec - 2,190,032,975 cycles # 2.882 GHz - 4,877,215,136 instructions # 2.23 insn per cycle - 0.760480627 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85321) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.513547e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.515596e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.515596e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.622174 sec + 2,104,535,793 cycles:u # 3.375 GHz + 4,861,455,020 instructions:u # 2.31 insn per cycle + 0.623987486 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85271) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.993268e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.995423e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.995423e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.665169 sec - 1,931,098,294 cycles # 2.889 GHz - 4,348,628,190 instructions # 2.25 insn per cycle - 0.669029492 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84987) (512y: 24) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.712446e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.714857e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.714857e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 +TOTAL : 0.545700 sec + 1,845,269,421 cycles:u # 3.373 GHz + 4,333,408,828 instructions:u # 2.35 insn per cycle + 0.547415858 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85056) (512y: 24) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.452710e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.455265e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.455265e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.713644 sec - 1,367,244,738 cycles # 1.907 GHz - 2,200,694,530 instructions # 1.61 insn per cycle - 0.717609354 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3502) (512y: 32) (512z:83441) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.401340e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.401916e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.401916e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 +TOTAL : 0.378898 sec + 1,055,214,488 cycles:u # 2.774 GHz + 2,184,910,519 instructions:u # 2.07 insn per cycle + 0.380846680 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3500) (512y: 33) (512z:83441) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 7bedc1f54b..9b16beec79 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:48:30 +DATE: 2023-10-25_18:25:30 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.689047e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.689716e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.689840e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.165623 sec - 7,623,672,543 cycles # 3.068 GHz - 16,620,823,162 instructions # 2.18 insn per cycle - 2.541779519 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.116092e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.116360e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.116394e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.384767 sec - 11,295,486,944 cycles # 3.039 GHz - 26,143,309,789 instructions # 2.31 insn per cycle - 3.772489660 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 9.872263e-03 -Avg ME (F77/CUDA) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.338757e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.339014e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.339014e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.354208 sec - 19,488,655,111 cycles # 3.066 GHz - 54,285,293,279 instructions # 2.79 insn per cycle - 6.358206624 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:31983) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.641941e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.642182e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.642182e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 6.112149 sec + 21,300,997,014 cycles:u # 3.484 GHz + 54,249,790,190 instructions:u # 2.55 insn per cycle + 6.114042159 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:31979) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.580188e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.580272e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.580272e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.350145 sec - 9,541,746,297 cycles # 2.846 GHz - 26,114,002,349 instructions # 2.74 insn per cycle - 3.354075604 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.952820e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.952935e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.952935e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.705908 sec + 9,430,700,860 cycles:u # 3.483 GHz + 26,089,170,604 instructions:u # 2.77 insn per cycle + 2.707618648 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:95979) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.673808e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.674247e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.674247e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.442586 sec - 4,186,759,479 cycles # 2.896 GHz - 9,337,503,071 instructions # 2.23 insn per cycle - 1.446618657 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.439277e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.439874e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.439874e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.191765 sec + 4,032,660,347 cycles:u # 3.380 GHz + 9,312,691,267 instructions:u # 2.31 insn per cycle + 1.193592027 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84147) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.207665e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.208316e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.208316e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.260207 sec - 3,641,116,614 cycles # 2.881 GHz - 8,312,794,650 instructions # 2.28 insn per cycle - 1.264172407 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.042412e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.043185e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.043185e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.049435 sec + 3,549,337,707 cycles:u # 3.378 GHz + 8,289,077,085 instructions:u # 2.34 insn per cycle + 1.051272776 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83817) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.791929e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.792527e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.792527e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.397651 sec - 2,653,173,022 cycles # 1.895 GHz - 4,233,021,275 instructions # 1.60 insn per cycle - 1.401768259 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2292) (512y: 93) (512z:82780) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.254639e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.256437e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.256437e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.730440 sec + 2,032,807,695 cycles:u # 2.777 GHz + 4,214,224,961 instructions:u # 2.07 insn per cycle + 0.732426822 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2285) (512y: 93) (512z:82779) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 0525b2e4c1..ef9c5416a6 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:49:30 +DATE: 2023-10-25_18:25:54 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.679608e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.680082e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.680202e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.167186 sec - 7,636,339,894 cycles # 3.072 GHz - 15,813,775,134 instructions # 2.07 insn per cycle - 2.542843200 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.107918e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.108184e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.108215e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.395689 sec - 11,403,686,722 cycles # 3.066 GHz - 26,502,494,448 instructions # 2.32 insn per cycle - 3.775763409 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 9.872263e-03 -Avg ME (F77/CUDA) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.358962e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.359192e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.359192e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.333189 sec - 19,421,629,325 cycles # 3.065 GHz - 54,272,919,506 instructions # 2.79 insn per cycle - 6.337193550 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32142) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.597352e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.597647e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.597647e+01 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 5.503438 sec + 19,183,730,675 cycles:u # 3.485 GHz + 54,254,374,083 instructions:u # 2.83 insn per cycle + 5.505347088 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32422) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.573591e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.573676e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.573676e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.359769 sec - 9,491,435,902 cycles # 2.826 GHz - 26,031,969,325 instructions # 2.74 insn per cycle - 3.363778642 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.964366e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.964483e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.964483e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.689963 sec + 9,374,659,056 cycles:u # 3.483 GHz + 26,004,286,936 instructions:u # 2.77 insn per cycle + 2.691715090 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:95858) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.730901e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.731358e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.731358e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.420536 sec - 4,117,938,873 cycles # 2.893 GHz - 9,317,350,688 instructions # 2.26 insn per cycle - 1.424344563 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.493450e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.494038e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.494038e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.177107 sec + 3,983,648,475 cycles:u # 3.380 GHz + 9,292,970,405 instructions:u # 2.33 insn per cycle + 1.178850204 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83787) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.227741e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.228352e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.228352e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.254191 sec - 3,641,355,182 cycles # 2.896 GHz - 8,309,383,106 instructions # 2.28 insn per cycle - 1.258235043 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.091865e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.092631e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.092631e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.038968 sec + 3,515,865,981 cycles:u # 3.380 GHz + 8,285,400,038 instructions:u # 2.36 insn per cycle + 1.040689874 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83306) (512y: 229) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.820448e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.821095e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.821095e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.387479 sec - 2,638,179,282 cycles # 1.897 GHz - 4,231,949,116 instructions # 1.60 insn per cycle - 1.391365284 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1731) (512y: 175) (512z:82815) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.348142e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.349885e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.349885e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.720620 sec + 2,007,777,807 cycles:u # 2.781 GHz + 4,213,120,277 instructions:u # 2.10 insn per cycle + 0.722319298 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1729) (512y: 175) (512z:82792) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index db66144b99..c78ea1b1f4 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:43:12 +DATE: 2023-10-25_18:23:40 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.996510e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.551380e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.892828e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.438393 sec - 1,953,535,468 cycles # 3.006 GHz - 2,779,051,405 instructions # 1.42 insn per cycle - 0.706976484 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.793351e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.668509e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.060188e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.516556 sec - 2,254,798,816 cycles # 3.015 GHz - 3,256,611,216 instructions # 1.44 insn per cycle - 0.804635125 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224343220024076 -Relative difference = 2.984467216677476e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.142180e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166897e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.166897e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.455956 sec - 4,526,705,701 cycles # 3.102 GHz - 12,813,772,224 instructions # 2.83 insn per cycle - 1.459879572 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.437753e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.477972e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.477972e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 1.165368 sec + 4,054,699,564 cycles:u # 3.474 GHz + 12,823,868,306 instructions:u # 3.16 insn per cycle + 1.167587268 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.059229e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.139815e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.139815e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.816563 sec - 2,541,682,069 cycles # 3.100 GHz - 7,194,219,151 instructions # 2.83 insn per cycle - 0.820635450 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.604820e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.735660e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.735660e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.653112 sec + 2,269,463,429 cycles:u # 3.467 GHz + 7,203,431,560 instructions:u # 3.17 insn per cycle + 0.655053107 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3150) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.555525e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.809689e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.809689e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.482439 sec - 1,365,016,241 cycles # 2.809 GHz - 2,962,982,028 instructions # 2.17 insn per cycle - 0.486447941 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.864183e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.322274e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.322274e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.361048 sec + 1,218,175,859 cycles:u # 3.358 GHz + 2,970,266,811 instructions:u # 2.44 insn per cycle + 0.363052441 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.042576e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.357609e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.357609e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.426283 sec - 1,250,204,594 cycles # 2.908 GHz - 2,816,555,243 instructions # 2.25 insn per cycle - 0.430386207 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.217957e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.759741e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.759741e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.338661 sec + 1,142,726,892 cycles:u # 3.358 GHz + 2,816,190,155 instructions:u # 2.46 insn per cycle + 0.340687650 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.853383e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.013794e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.013794e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.597884 sec - 1,199,308,383 cycles # 1.995 GHz - 1,804,468,596 instructions # 1.50 insn per cycle - 0.601975092 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.137635e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.646009e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.646009e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.345214 sec + 986,195,561 cycles:u # 2.843 GHz + 1,801,659,388 instructions:u # 1.83 insn per cycle + 0.347223761 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1375) (512y: 106) (512z: 2270) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 55664f3ef7..056b4b9596 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -36,77 +36,31 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_19:05:04 +DATE: 2023-10-25_18:43:47 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.715956e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.423473e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.423473e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.466815 sec - 2,046,266,197 cycles # 2.991 GHz - 3,039,133,939 instructions # 1.49 insn per cycle - 0.742578114 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.437711e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.623194e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.623194e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.730298 sec - 2,928,685,026 cycles # 3.010 GHz - 4,469,035,726 instructions # 1.53 insn per cycle - 1.030829307 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224343220024076 -Relative difference = 2.984467216677476e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.130473e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155364e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.155364e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.477047 sec - 4,559,807,823 cycles # 3.080 GHz - 12,820,937,643 instructions # 2.81 insn per cycle - 1.481171657 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.440207e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.480666e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.480666e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 1.166780 sec + 4,056,392,494 cycles:u # 3.471 GHz + 12,832,877,645 instructions:u # 3.16 insn per cycle + 1.169040681 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -117,23 +71,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.037342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.116140e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.116140e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.831756 sec - 2,576,245,613 cycles # 3.084 GHz - 7,244,217,190 instructions # 2.81 insn per cycle - 0.836091661 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.598283e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.727901e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.727901e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.659249 sec + 2,286,538,153 cycles:u # 3.459 GHz + 7,247,797,470 instructions:u # 3.17 insn per cycle + 0.661412134 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3150) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +98,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.548861e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.803617e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.803617e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.489750 sec - 1,401,953,569 cycles # 2.841 GHz - 3,012,124,484 instructions # 2.15 insn per cycle - 0.494067218 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.828667e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.280660e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.280660e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.368356 sec + 1,239,463,042 cycles:u # 3.348 GHz + 3,015,665,250 instructions:u # 2.43 insn per cycle + 0.370564776 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -171,23 +125,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.888848e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.196973e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.196973e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.451008 sec - 1,291,042,478 cycles # 2.840 GHz - 2,867,771,305 instructions # 2.22 insn per cycle - 0.455219321 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.179908e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.703056e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.703056e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.345463 sec + 1,160,847,625 cycles:u # 3.342 GHz + 2,861,588,426 instructions:u # 2.47 insn per cycle + 0.347703208 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -198,23 +152,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.837117e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.990188e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.990188e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.606364 sec - 1,227,732,873 cycles # 2.013 GHz - 1,842,233,991 instructions # 1.50 insn per cycle - 0.610509212 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.103280e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.606771e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.606771e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.352148 sec + 1,005,187,952 cycles:u # 2.839 GHz + 1,835,830,680 instructions:u # 1.83 insn per cycle + 0.354435218 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1375) (512y: 106) (512z: 2270) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index b5138c5dae..2f0acc46f1 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:43:29 +DATE: 2023-10-25_18:23:45 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.920168e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.374674e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.702657e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.437720 sec - 1,950,126,017 cycles # 3.006 GHz - 2,773,154,214 instructions # 1.42 insn per cycle - 0.705954337 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.766528e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.558717e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.939267e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.513041 sec - 2,254,005,524 cycles # 3.033 GHz - 3,263,301,476 instructions # 1.45 insn per cycle - 0.800914962 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224343220024076 -Relative difference = 2.984467216677476e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.153195e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.178574e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.178574e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.442048 sec - 4,474,102,808 cycles # 3.096 GHz - 12,693,000,655 instructions # 2.84 insn per cycle - 1.446019473 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.459023e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.500620e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.500620e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 1.148633 sec + 3,996,615,161 cycles:u # 3.474 GHz + 12,702,510,463 instructions:u # 3.18 insn per cycle + 1.150829827 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 687) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.053414e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.135311e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.135311e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.819184 sec - 2,493,704,947 cycles # 3.032 GHz - 7,048,175,291 instructions # 2.83 insn per cycle - 0.823254604 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.646082e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.781072e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.781072e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.643530 sec + 2,235,534,308 cycles:u # 3.465 GHz + 7,067,656,593 instructions:u # 3.16 insn per cycle + 0.645546900 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2966) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.165376e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.368230e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.368230e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.539826 sec - 1,467,619,890 cycles # 2.700 GHz - 3,195,865,906 instructions # 2.18 insn per cycle - 0.544230401 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.483018e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.870529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.870529e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.389493 sec + 1,314,840,088 cycles:u # 3.361 GHz + 3,202,694,345 instructions:u # 2.44 insn per cycle + 0.391531874 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3078) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.602530e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.851517e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.851517e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.475458 sec - 1,396,242,750 cycles # 2.915 GHz - 3,099,467,840 instructions # 2.22 insn per cycle - 0.479442989 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.661378e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.079783e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.079783e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.376002 sec + 1,269,191,597 cycles:u # 3.360 GHz + 3,098,873,295 instructions:u # 2.44 insn per cycle + 0.378031092 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2785) (512y: 257) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.759805e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.905242e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.905242e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.616827 sec - 1,241,766,049 cycles # 2.002 GHz - 2,069,716,754 instructions # 1.67 insn per cycle - 0.620974760 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.011584e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.504113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.504113e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.352637 sec + 1,005,434,854 cycles:u # 2.839 GHz + 2,068,533,599 instructions:u # 2.06 insn per cycle + 0.354543034 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1197) (512y: 194) (512z: 2426) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 8a44a3dd20..0038e621e4 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:43:45 +DATE: 2023-10-25_18:23:51 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.953088e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.256905e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.378672e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 -TOTAL : 0.434393 sec - 1,939,780,167 cycles # 3.008 GHz - 2,743,949,203 instructions # 1.41 insn per cycle - 0.703990460 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 168 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.221116e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.860706e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.982864e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 -TOTAL : 0.466982 sec - 2,071,697,565 cycles # 3.013 GHz - 2,925,920,597 instructions # 1.41 insn per cycle - 0.745407679 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629328206139 -Relative difference = 0.0005414933696496947 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.175069e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.202207e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.202207e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 1.414014 sec - 4,387,994,343 cycles # 3.096 GHz - 12,757,087,191 instructions # 2.91 insn per cycle - 1.417904644 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.514673e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.552023e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.552023e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.678308e+04 +- 6.638991e+04 ) GeV^-2 +TOTAL : 1.106158 sec + 3,843,430,074 cycles:u # 3.469 GHz + 12,745,964,839 instructions:u # 3.32 insn per cycle + 1.108356847 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 693) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 1.714833339642312e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.258633e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.477473e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.477473e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 0.522116 sec - 1,618,126,198 cycles # 3.079 GHz - 4,232,277,564 instructions # 2.62 insn per cycle - 0.526044496 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.067589e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.379074e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.379074e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.678307e+04 +- 6.638990e+04 ) GeV^-2 +TOTAL : 0.425688 sec + 1,477,070,298 cycles:u # 3.457 GHz + 4,221,584,785 instructions:u # 2.86 insn per cycle + 0.427604681 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3709) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 4.180373005172264e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.608070e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.524156e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.524156e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.267436 sec - 793,046,821 cycles # 2.927 GHz - 1,796,478,483 instructions # 2.27 insn per cycle - 0.271487422 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.565999e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.964695e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.964695e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 +TOTAL : 0.215287 sec + 725,668,773 cycles:u # 3.348 GHz + 1,780,142,837 instructions:u # 2.45 insn per cycle + 0.217151599 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.057162e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.116228e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.116228e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.251834 sec - 743,002,845 cycles # 2.913 GHz - 1,717,820,666 instructions # 2.31 insn per cycle - 0.255754900 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.322151e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.079867e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.079867e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 +TOTAL : 0.198919 sec + 669,793,070 cycles:u # 3.341 GHz + 1,693,786,020 instructions:u # 2.53 insn per cycle + 0.200815058 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.409485e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.023233e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.023233e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.324434 sec - 678,869,673 cycles # 2.072 GHz - 1,206,887,131 instructions # 1.78 insn per cycle - 0.328433910 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.742845e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.143134e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.143134e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.763172e+04 +- 6.724065e+04 ) GeV^-2 +TOTAL : 0.192952 sec + 558,318,676 cycles:u # 2.871 GHz + 1,180,879,475 instructions:u # 2.12 insn per cycle + 0.194841259 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 38) (512z: 2493) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 35147cd718..801d12d22e 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -36,77 +36,31 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_19:05:21 +DATE: 2023-10-25_18:43:52 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.575149e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.561751e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.561751e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.419752e+01 +- 1.682900e+01 ) GeV^-2 -TOTAL : 0.447090 sec - 2,006,667,406 cycles # 3.008 GHz - 2,921,553,347 instructions # 1.46 insn per cycle - 0.724337546 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 168 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.387556e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.246561e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.246561e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.349385e+02 +- 2.541442e+02 ) GeV^-2 -TOTAL : 0.606579 sec - 2,519,662,089 cycles # 3.025 GHz - 3,861,788,300 instructions # 1.53 insn per cycle - 0.890689404 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629328206139 -Relative difference = 0.0005414933696496947 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.174105e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.201186e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.201186e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 1.417963 sec - 4,402,481,360 cycles # 3.098 GHz - 12,761,539,634 instructions # 2.90 insn per cycle - 1.421844145 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.514995e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.552275e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.552275e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.678308e+04 +- 6.638991e+04 ) GeV^-2 +TOTAL : 1.109293 sec + 3,853,846,022 cycles:u # 3.472 GHz + 12,751,320,522 instructions:u # 3.31 insn per cycle + 1.111546754 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 693) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -117,23 +71,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.229854e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.445494e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.445494e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 0.530536 sec - 1,636,187,995 cycles # 3.064 GHz - 4,280,682,276 instructions # 2.62 insn per cycle - 0.534503101 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.079923e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.363231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.363231e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.678307e+04 +- 6.638990e+04 ) GeV^-2 +TOTAL : 0.427203 sec + 1,481,245,124 cycles:u # 3.454 GHz + 4,266,979,277 instructions:u # 2.88 insn per cycle + 0.429156945 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3709) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -144,23 +98,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.470080e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.344369e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.344369e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.277101 sec - 811,784,719 cycles # 2.894 GHz - 1,833,505,900 instructions # 2.26 insn per cycle - 0.281119088 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 8.556684e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.905955e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.905955e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 +TOTAL : 0.218127 sec + 733,830,052 cycles:u # 3.339 GHz + 1,814,316,830 instructions:u # 2.47 insn per cycle + 0.220133581 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -171,23 +125,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.578285e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.554614e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.554614e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.273667 sec - 760,813,920 cycles # 2.744 GHz - 1,755,015,790 instructions # 2.31 insn per cycle - 0.277772969 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.250743e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.069445e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.069445e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 +TOTAL : 0.202951 sec + 682,021,599 cycles:u # 3.334 GHz + 1,727,956,692 instructions:u # 2.53 insn per cycle + 0.204910557 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -198,23 +152,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.356513e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.945123e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.945123e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.331761 sec - 698,245,569 cycles # 2.083 GHz - 1,248,346,490 instructions # 1.79 insn per cycle - 0.335801876 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 9.660570e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.127875e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.127875e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.763172e+04 +- 6.724065e+04 ) GeV^-2 +TOTAL : 0.197120 sec + 571,160,372 cycles:u # 2.873 GHz + 1,219,652,248 instructions:u # 2.14 insn per cycle + 0.199110775 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 38) (512z: 2493) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index da1ead0f77..a9290b7881 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:44:02 +DATE: 2023-10-25_18:23:55 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.781171e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.253056e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.376604e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 -TOTAL : 0.437609 sec - 1,935,685,013 cycles # 2.990 GHz - 2,706,812,030 instructions # 1.40 insn per cycle - 0.705999265 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 162 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.205131e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.854735e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.976134e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 -TOTAL : 0.466415 sec - 2,068,328,114 cycles # 3.010 GHz - 2,987,091,963 instructions # 1.44 insn per cycle - 0.744952769 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629328206139 -Relative difference = 0.0005414933696496947 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.180324e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.207350e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.207350e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 1.407934 sec - 4,364,873,703 cycles # 3.093 GHz - 12,656,518,331 instructions # 2.90 insn per cycle - 1.411910028 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.531080e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.569239e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.569239e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.678308e+04 +- 6.638991e+04 ) GeV^-2 +TOTAL : 1.094124 sec + 3,808,409,057 cycles:u # 3.475 GHz + 12,645,578,171 instructions:u # 3.32 insn per cycle + 1.096210625 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 1.714833339642312e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.586065e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.857911e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.857911e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 0.475849 sec - 1,476,787,317 cycles # 3.082 GHz - 4,120,727,484 instructions # 2.79 insn per cycle - 0.479813364 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.560755e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.917509e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.917509e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.678307e+04 +- 6.638990e+04 ) GeV^-2 +TOTAL : 0.381890 sec + 1,325,439,293 cycles:u # 3.457 GHz + 4,110,059,235 instructions:u # 3.10 insn per cycle + 0.383809713 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3414) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 4.180373005172264e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.086145e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.608974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.608974e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.341638 sec - 1,007,266,248 cycles # 2.920 GHz - 2,124,817,247 instructions # 2.11 insn per cycle - 0.345623716 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 6.684337e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.501881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.501881e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 +TOTAL : 0.269044 sec + 908,090,394 cycles:u # 3.356 GHz + 2,108,200,666 instructions:u # 2.32 insn per cycle + 0.270961294 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4206) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.281396e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.848165e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.848165e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.329501 sec - 970,497,596 cycles # 2.916 GHz - 2,043,945,912 instructions # 2.11 insn per cycle - 0.333450865 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.055180e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.882580e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.882580e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 +TOTAL : 0.255242 sec + 861,120,176 cycles:u # 3.354 GHz + 2,019,447,283 instructions:u # 2.35 insn per cycle + 0.257129923 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4013) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.073132e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.415220e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.415220e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.424070 sec - 856,736,633 cycles # 2.004 GHz - 1,573,705,553 instructions # 1.84 insn per cycle - 0.428192842 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2446) (512y: 16) (512z: 2998) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 7.012233e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.853713e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.853713e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.763172e+04 +- 6.724065e+04 ) GeV^-2 +TOTAL : 0.258506 sec + 740,460,053 cycles:u # 2.847 GHz + 1,548,901,955 instructions:u # 2.09 insn per cycle + 0.260458676 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2526) (512y: 22) (512z: 2998) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index a1190251f1..7270b50cac 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:44:18 +DATE: 2023-10-25_18:24:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.018464e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.595321e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.946310e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.439342 sec - 1,951,736,780 cycles # 3.004 GHz - 2,769,340,845 instructions # 1.42 insn per cycle - 0.707751210 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.803845e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.713692e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.109529e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.513063 sec - 2,243,341,028 cycles # 3.023 GHz - 3,247,351,075 instructions # 1.45 insn per cycle - 0.800783669 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224344354681244 -Relative difference = 2.782658397826986e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.141045e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.165652e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.165652e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.457233 sec - 4,537,826,727 cycles # 3.107 GHz - 12,784,913,374 instructions # 2.82 insn per cycle - 1.461163978 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.426490e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.466121e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.466121e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 1.174486 sec + 4,086,390,900 cycles:u # 3.474 GHz + 12,795,023,810 instructions:u # 3.13 insn per cycle + 1.176666020 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.061046e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.141573e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.141573e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.815514 sec - 2,537,540,226 cycles # 3.099 GHz - 7,116,439,666 instructions # 2.80 insn per cycle - 0.819589417 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.610669e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.741846e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.741846e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.651639 sec + 2,264,533,183 cycles:u # 3.467 GHz + 7,125,840,013 instructions:u # 3.15 insn per cycle + 0.653615825 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3215) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.732172e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.005113e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.005113e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.460418 sec - 1,348,361,279 cycles # 2.906 GHz - 2,936,931,800 instructions # 2.18 insn per cycle - 0.464548729 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3174) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.967395e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.448723e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.448723e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.354118 sec + 1,194,563,246 cycles:u # 3.358 GHz + 2,944,091,699 instructions:u # 2.46 insn per cycle + 0.356096854 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3175) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.149560e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.481402e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.481402e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.416544 sec - 1,218,162,673 cycles # 2.902 GHz - 2,791,024,677 instructions # 2.29 insn per cycle - 0.420753319 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 5.337763e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.892556e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.892556e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.331115 sec + 1,117,959,861 cycles:u # 3.360 GHz + 2,790,321,482 instructions:u # 2.50 insn per cycle + 0.333046991 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2938) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.487755e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.613101e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.613101e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.683117 sec - 1,240,069,199 cycles # 1.806 GHz - 1,831,774,203 instructions # 1.48 insn per cycle - 0.687252060 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1728) (512y: 114) (512z: 2312) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.915105e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.382882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.382882e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.359750 sec + 1,026,522,523 cycles:u # 2.841 GHz + 1,828,751,628 instructions:u # 1.78 insn per cycle + 0.361734455 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1723) (512y: 114) (512z: 2312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 2a285d3003..f252d093ae 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -36,64 +36,30 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:44:34 +DATE: 2023-10-25_18:24:05 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.964309e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.404642e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.725368e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.438377 sec - 1,959,656,771 cycles # 3.015 GHz - 2,794,102,152 instructions # 1.43 insn per cycle - 0.707326409 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.773177e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.592536e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.977031e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.515907 sec - 2,236,298,354 cycles # 2.994 GHz - 3,229,465,087 instructions # 1.44 insn per cycle - 0.804040631 seconds time elapsed -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224344354681244 -Relative difference = 2.782658397826986e-07 -OK (relative difference <= 5E-3) +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.106563e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.131055e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.131055e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.502633 sec - 4,502,084,245 cycles # 2.989 GHz - 12,668,944,796 instructions # 2.81 insn per cycle - 1.506842459 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 1.452248e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.493296e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.493296e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 1.153491 sec + 4,014,500,686 cycles:u # 3.475 GHz + 12,678,908,030 instructions:u # 3.16 insn per cycle + 1.155611467 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 659) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -103,23 +69,23 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.101140e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.184323e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.184323e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.800328 sec - 2,487,833,256 cycles # 3.095 GHz - 6,905,789,276 instructions # 2.78 insn per cycle - 0.804489667 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 2.643079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.777685e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.777685e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.644384 sec + 2,237,562,646 cycles:u # 3.463 GHz + 6,915,339,109 instructions:u # 3.09 insn per cycle + 0.646514361 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3036) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -129,23 +95,23 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.392821e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.617920e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.617920e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.504314 sec - 1,480,047,698 cycles # 2.915 GHz - 3,168,067,665 instructions # 2.14 insn per cycle - 0.508419797 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3284) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.501874e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.892092e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.892092e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.387750 sec + 1,309,458,986 cycles:u # 3.363 GHz + 3,175,203,936 instructions:u # 2.42 insn per cycle + 0.389703740 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3285) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -155,23 +121,23 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.646089e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.903192e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.903192e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.470441 sec - 1,380,675,105 cycles # 2.913 GHz - 3,040,126,384 instructions # 2.20 insn per cycle - 0.474602540 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.706458e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.134392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.134392e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.372552 sec + 1,257,581,323 cycles:u # 3.361 GHz + 3,039,574,412 instructions:u # 2.42 insn per cycle + 0.374531872 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2936) (512y: 265) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -181,23 +147,23 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.746832e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.887698e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.887698e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.620483 sec - 1,249,575,366 cycles # 2.003 GHz - 2,003,971,184 instructions # 1.60 insn per cycle - 0.624635502 seconds time elapsed +OMP threads / `nproc --all` = 1 / 64 +EvtsPerSec[Rmb+ME] (23) = ( 4.923796e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.399988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.399988e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 +TOTAL : 0.358523 sec + 1,021,718,285 cycles:u # 2.838 GHz + 2,002,976,357 instructions:u # 1.96 insn per cycle + 0.360403909 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1520) (512y: 202) (512z: 2499) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. +[ PASSED ] 3 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index bd656c5b93..897feb1c09 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -565,7 +565,7 @@ function runNcuReq() { set +x } -if nvidia-smi -L > /dev/null 2>&1; then gpuTxt="$(nvidia-smi -L | wc -l)x $(nvidia-smi -L | awk '{print $3,$4}' | sort -u)"; else gpuTxt=none; fi +#if nvidia-smi -L > /dev/null 2>&1; then gpuTxt="$(nvidia-smi -L | wc -l)x $(nvidia-smi -L | awk '{print $3,$4}' | sort -u)"; else gpuTxt=none; fi if [ "${unames}" == "Darwin" ]; then cpuTxt=$(sysctl -h machdep.cpu.brand_string) cpuTxt=${cpuTxt/machdep.cpu.brand_string: } From 93f29784929a9ab4b96446a5e60eee9b639b5361 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 11:08:49 +0200 Subject: [PATCH 004/119] [oct23av] (TEMPORARY TESTS ON PLATINUM) rerun 18 tmad alltees, all ok but no GPU (olgpu-03 Platinum8362 el8 including downfall mitigation) NB: 512z mode is faster than 512y on this Platinum node (2 FMA units) STARTED AT Wed Oct 25 18:48:27 CEST 2023 ENDED AT Wed Oct 25 22:37:43 CEST 2023 Status=0 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 20 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt [avalassi@olgpu-03 gcc11.2/cvmfs] /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp> cat /etc/redhat-release Red Hat Enterprise Linux release 8.8 (Ootpa) [avalassi@olgpu-03 gcc11.2/cvmfs] /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp> grep 'stepping\|model\|microco$ microcode : 0xd0003a5 model : 106 model name : Intel(R) Xeon(R) Platinum 8362 CPU @ 2.80GHz stepping : 6 --- .../log_eemumu_mad_d_inl0_hrd0.txt | 276 +++++---------- .../log_eemumu_mad_f_inl0_hrd0.txt | 272 +++++---------- .../log_eemumu_mad_m_inl0_hrd0.txt | 278 +++++---------- .../log_ggtt_mad_d_inl0_hrd0.txt | 276 +++++---------- .../log_ggtt_mad_f_inl0_hrd0.txt | 276 +++++---------- .../log_ggtt_mad_m_inl0_hrd0.txt | 276 +++++---------- .../log_ggttg_mad_d_inl0_hrd0.txt | 324 +++++++----------- .../log_ggttg_mad_f_inl0_hrd0.txt | 316 ++++++----------- .../log_ggttg_mad_m_inl0_hrd0.txt | 324 +++++++----------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 296 ++++++---------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 298 ++++++---------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 300 ++++++---------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 276 +++++---------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 282 +++++---------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 272 +++++---------- .../log_gqttq_mad_d_inl0_hrd0.txt | 53 +-- .../log_gqttq_mad_f_inl0_hrd0.txt | 45 +-- .../log_gqttq_mad_m_inl0_hrd0.txt | 49 +-- 18 files changed, 1556 insertions(+), 2933 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 1a4d828546..a0afbfa04f 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -2,13 +2,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y - make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -16,14 +16,14 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:18:26 +DATE: 2023-10-25_18:51:18 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6275s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6175s - [COUNTERS] Fortran MEs ( 1 ) : 0.0099s for 8192 events => throughput is 8.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4551s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4471s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1814s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1709s - [COUNTERS] Fortran MEs ( 1 ) : 0.0105s for 8192 events => throughput is 7.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1247s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1167s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4443s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3343s - [COUNTERS] Fortran MEs ( 1 ) : 0.1099s for 90112 events => throughput is 8.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3279s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2421s + [COUNTERS] Fortran MEs ( 1 ) : 0.0857s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1272s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1223s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 8192 events => throughput is 1.67E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,7 +160,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -167,9 +168,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3422s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0657s for 90112 events => throughput is 1.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3014s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2486s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0528s for 90112 events => throughput is 1.71E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +181,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813628E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.344246e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.772470e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.355045e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.794639e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1815s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1216s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1194s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.70E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,7 +236,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -243,9 +244,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3697s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3373s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 90112 events => throughput is 2.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2702s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2458s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 90112 events => throughput is 3.69E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +257,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813628E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.734240e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.892983e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.880467e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.144258e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1763s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1746s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1194s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0012s for 8192 events => throughput is 7.11E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,7 +312,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -319,9 +320,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3545s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3375s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 90112 events => throughput is 5.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2581s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2456s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 90112 events => throughput is 7.21E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +333,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813656E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.204571e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.860461e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.588414e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.542428e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1798s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.76E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1194s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0011s for 8192 events => throughput is 7.29E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,7 +388,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -395,9 +396,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3579s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0162s for 90112 events => throughput is 5.55E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2586s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2464s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0122s for 90112 events => throughput is 7.37E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +409,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813656E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.597781e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.021386e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.189972e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.711678e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1771s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1755s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1191s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1181s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.67E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,7 +464,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -471,9 +472,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3400s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0182s for 90112 events => throughput is 4.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2580s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2474s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 90112 events => throughput is 8.49E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +485,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813656E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.738607e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.789628e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.243870e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.056403e+07 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 - [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.6205s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.63E+07 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21747169064681776) and cpp (0.21747169064681776) differ by less than 2E-14 (0.0) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.93E+07 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813628E-002) differ by less than 2E-14 (3.3306690738754696e-16) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.604143e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.463611e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.281447e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.027562e+09 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.209260e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.077193e+09 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.272993e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.998178e+08 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index f98575860b..2bb17552f5 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,29 +1,29 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y - make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:18:43 +DATE: 2023-10-25_18:52:16 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.8283s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8183s - [COUNTERS] Fortran MEs ( 1 ) : 0.0100s for 8192 events => throughput is 8.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4323s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4243s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1801s - [COUNTERS] Fortran MEs ( 1 ) : 0.0107s for 8192 events => throughput is 7.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1246s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1166s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4773s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3594s - [COUNTERS] Fortran MEs ( 1 ) : 0.1178s for 90112 events => throughput is 7.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3273s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2416s + [COUNTERS] Fortran MEs ( 1 ) : 0.0857s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166140620297] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1802s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 8192 events => throughput is 1.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1255s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1212s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.94E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,7 +160,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -167,9 +168,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501907784661565E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4026s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3390s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0636s for 90112 events => throughput is 1.42E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2932s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2469s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0462s for 90112 events => throughput is 1.95E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +181,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501907784661565E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.366198e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.077247e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.396541e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.101215e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165549479658] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1780s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1763s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.75E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1196s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1184s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0012s for 8192 events => throughput is 6.78E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,7 +236,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -243,9 +244,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905692857932E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3798s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3613s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 90112 events => throughput is 4.88E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2579s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2447s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 90112 events => throughput is 6.84E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +257,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905692857932E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.893797e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.546371e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.592095e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.032570e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165569099927] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1803s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.92E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1183s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1177s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.32E+07 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,7 +312,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -319,9 +320,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905658047333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3614s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3514s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 90112 events => throughput is 8.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2525s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2456s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.30E+07 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +333,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905658047333E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.801869e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.534617e+07 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.185551e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.574301e+07 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165569099927] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1793s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 1.05E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.1201s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1196s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.38E+07 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,7 +388,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -395,9 +396,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905658047333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3471s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3384s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 90112 events => throughput is 1.04E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.2524s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2458s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.38E+07 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +409,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905658047333E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.124905e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.610171e+07 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.311988e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.666504e+07 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166431914253] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1791s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.83E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1184s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1179s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.52E+07 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,7 +464,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -471,9 +472,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501909358591468E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3552s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3451s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 90112 events => throughput is 8.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2531s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2466s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.39E+07 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +485,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501909358591468E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.261918e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.850271e+07 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082919e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.039998e+07 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2175 [0.21747166796068879] fbridge_mode=1 - [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5875s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.79E+07 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21747169064681776) and cpp (0.21747166796068879) differ by less than 4E-4 (1.043176189874373e-07) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0915 [9.1501910316213061E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7572s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7527s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 90112 events => throughput is 2.00E+07 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501910316213061E-002) differ by less than 4E-4 (1.0479125034379422e-07) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.090855e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.271435e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.835160e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.510333e+09 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.894374e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.802495e+09 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.131306e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.434205e+08 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 0d49865b9c..54f2157d62 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,29 +1,29 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y - make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0'CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' + +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:18:59 +DATE: 2023-10-25_18:53:14 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6278s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6178s - [COUNTERS] Fortran MEs ( 1 ) : 0.0100s for 8192 events => throughput is 8.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4418s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4339s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1782s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1684s - [COUNTERS] Fortran MEs ( 1 ) : 0.0099s for 8192 events => throughput is 8.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1240s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1160s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4565s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3432s - [COUNTERS] Fortran MEs ( 1 ) : 0.1134s for 90112 events => throughput is 7.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3334s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2477s + [COUNTERS] Fortran MEs ( 1 ) : 0.0856s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1810s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1278s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1228s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 8192 events => throughput is 1.64E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,7 +160,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -167,9 +168,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0667s for 90112 events => throughput is 1.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3026s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2480s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0547s for 90112 events => throughput is 1.65E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +181,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919915927155E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.341399e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.708796e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.323934e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.771441e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1772s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1217s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1194s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.63E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,7 +236,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -243,9 +244,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3694s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3374s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 90112 events => throughput is 2.82E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2701s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2458s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 90112 events => throughput is 3.70E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +257,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919915927155E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.783585e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.920617e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.872248e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.132238e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1786s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1770s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1196s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0011s for 8192 events => throughput is 7.31E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,7 +312,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -319,9 +320,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3561s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3389s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 90112 events => throughput is 5.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2583s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2459s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 90112 events => throughput is 7.26E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +333,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919908700741E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.356083e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.904879e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.847494e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.704430e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1798s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1192s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1182s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0011s for 8192 events => throughput is 7.49E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,7 +388,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -395,9 +396,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3603s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3443s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 90112 events => throughput is 5.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2585s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2466s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0119s for 90112 events => throughput is 7.59E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +409,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919908700741E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.610629e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.065826e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.797344e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.072113e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1808s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1190s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1180s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.66E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,7 +464,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -471,9 +472,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3604s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 90112 events => throughput is 4.88E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 90112 events => throughput is 8.42E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +485,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919908700741E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.814394e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.780839e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.589840e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.057833e+07 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2175 [0.21747169066587257] fbridge_mode=1 - [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5950s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5945s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.67E+07 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21747169064681776) and cpp (0.21747169066587257) differ by less than 2E-4 (8.761968928183705e-11) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0915 [9.1501919911173610E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7577s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7529s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.90E+07 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919911173610E-002) differ by less than 2E-4 (6.95061785904727e-11) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.583003e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.467396e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.252357e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.030772e+09 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.251034e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.079891e+09 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.286168e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.995935e+08 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 57c094acdf..6c8f068bce 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -5,22 +5,20 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 -make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -30,12 +28,15 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2023-10-25_19:19:16 +DATE: 2023-10-25_18:54:13 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.4367s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3846s - [COUNTERS] Fortran MEs ( 1 ) : 0.0521s for 8192 events => throughput is 1.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2539s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2152s + [COUNTERS] Fortran MEs ( 1 ) : 0.0387s for 8192 events => throughput is 2.12E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3291s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2810s - [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2234s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1846s + [COUNTERS] Fortran MEs ( 1 ) : 0.0388s for 8192 events => throughput is 2.11E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7885s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2393s - [COUNTERS] Fortran MEs ( 1 ) : 0.5492s for 90112 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2973s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8722s + [COUNTERS] Fortran MEs ( 1 ) : 0.4251s for 90112 events => throughput is 2.12E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3572s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3148s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2530s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2191s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0338s for 8192 events => throughput is 2.42E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,7 +160,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -167,9 +168,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6843s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2210s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4633s for 90112 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2715s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9026s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3689s for 90112 events => throughput is 2.44E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +181,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775379) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.950448e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.481533e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.964951e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.488395e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3143s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2905s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0239s for 8192 events => throughput is 3.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2233s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2049s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0184s for 8192 events => throughput is 4.44E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,7 +236,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -243,9 +244,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4654s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2042s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2612s for 90112 events => throughput is 3.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0887s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8858s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2029s for 90112 events => throughput is 4.44E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +257,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775379) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.422695e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.514435e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.385899e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.535739e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2964s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2084s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1977s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 8192 events => throughput is 7.62E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,7 +312,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -319,9 +320,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3564s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1960s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1604s for 90112 events => throughput is 5.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9993s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8804s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1189s for 90112 events => throughput is 7.58E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +333,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.356829e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.734700e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.465883e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.812072e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2947s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2816s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2075s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1973s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 8192 events => throughput is 8.08E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,7 +388,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -395,9 +396,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3386s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1916s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1469s for 90112 events => throughput is 6.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9909s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1117s for 90112 events => throughput is 8.07E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +409,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.955245e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.269115e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.038215e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.344819e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2868s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0208s for 8192 events => throughput is 3.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2105s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1990s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0115s for 8192 events => throughput is 7.15E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,7 +464,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -471,9 +472,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4351s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2261s for 90112 events => throughput is 3.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0100s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8830s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1270s for 90112 events => throughput is 7.10E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +485,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.616558e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.266848e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.687418e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.348158e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 - [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6890s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6885s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.690708277600116) and cpp (47.690708277600109) differ by less than 2E-14 (1.1102230246251565e-16) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 - [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6130s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6068s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.46E+07 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775386) differ by less than 2E-14 (4.440892098500626e-16) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.179274e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.706047e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.310976e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.080551e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.327429e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.152072e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.328654e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.061951e+07 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index ac65217070..7d8234ac8f 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,31 +1,31 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y - make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:19:41 +DATE: 2023-10-25_18:55:16 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3097s - [COUNTERS] Fortran MEs ( 1 ) : 0.0485s for 8192 events => throughput is 1.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2557s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2169s + [COUNTERS] Fortran MEs ( 1 ) : 0.0388s for 8192 events => throughput is 2.11E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3161s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2673s - [COUNTERS] Fortran MEs ( 1 ) : 0.0488s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2232s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1844s + [COUNTERS] Fortran MEs ( 1 ) : 0.0387s for 8192 events => throughput is 2.12E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7673s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2278s - [COUNTERS] Fortran MEs ( 1 ) : 0.5395s for 90112 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2938s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8687s + [COUNTERS] Fortran MEs ( 1 ) : 0.4251s for 90112 events => throughput is 2.12E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690706211693573] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3486s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3073s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2471s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2163s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0308s for 8192 events => throughput is 2.66E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,7 +160,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -167,9 +168,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782418787778] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6743s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2245s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4497s for 90112 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2373s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8994s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3379s for 90112 events => throughput is 2.67E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +181,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782418787778) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.956288e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.763907e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.982512e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.767633e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690702562167019] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2999s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0166s for 8192 events => throughput is 4.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2121s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1993s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 8192 events => throughput is 6.41E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,7 +236,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -243,9 +244,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223778631221009] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3965s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2188s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1777s for 90112 events => throughput is 5.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0243s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8838s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1405s for 90112 events => throughput is 6.42E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +257,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223778631221009) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.832476e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.403818e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.796478e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.434601e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694055768034] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2883s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2798s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1987s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1923s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,7 +312,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -319,9 +320,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775988760060] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2853s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1921s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0933s for 90112 events => throughput is 9.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9489s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8775s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0714s for 90112 events => throughput is 1.26E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +333,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223775988760060) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.545083e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.312849e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.445429e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.330161e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694055768034] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2874s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2794s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1998s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1937s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,7 +388,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -395,9 +396,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775988760060] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2777s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1907s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0870s for 90112 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.9433s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8755s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0678s for 90112 events => throughput is 1.33E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +409,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223775988760060) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.937961e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.381207e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.006840e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.402611e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698865531559] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2925s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1999s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1936s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.30E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,7 +464,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -471,9 +472,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223780255562296] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3226s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2041s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1185s for 90112 events => throughput is 7.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9471s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8766s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0705s for 90112 events => throughput is 1.28E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +485,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223780255562296) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.089065e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.345186e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.293009e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.350507e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.69 [47.690703397697980] fbridge_mode=1 - [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6872s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.51E+07 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.690708277600116) and cpp (47.690703397697980) differ by less than 4E-4 (1.0232396019382861e-07) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223786763175951] fbridge_mode=1 - [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6161s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6107s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.66E+07 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (46.223782291775365) and cpp (46.223786763175951) differ by less than 4E-4 (9.673376699659286e-08) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.382566e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.934525e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.441859e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.769075e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.403190e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.876277e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.887286e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.439919e+07 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index c3747a1448..8e4e8205ac 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -4,26 +4,26 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y - make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:20:06 +DATE: 2023-10-25_18:56:20 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3588s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3101s - [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2541s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2154s + [COUNTERS] Fortran MEs ( 1 ) : 0.0387s for 8192 events => throughput is 2.12E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3193s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2707s - [COUNTERS] Fortran MEs ( 1 ) : 0.0487s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2233s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1846s + [COUNTERS] Fortran MEs ( 1 ) : 0.0387s for 8192 events => throughput is 2.11E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7636s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2247s - [COUNTERS] Fortran MEs ( 1 ) : 0.5388s for 90112 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2941s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8688s + [COUNTERS] Fortran MEs ( 1 ) : 0.4252s for 90112 events => throughput is 2.12E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3537s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3109s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2550s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2206s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0344s for 8192 events => throughput is 2.38E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,7 +160,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -167,9 +168,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7009s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2276s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4732s for 90112 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2838s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9056s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3782s for 90112 events => throughput is 2.38E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +181,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783635280988) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.932354e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.460313e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.907654e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.444312e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3139s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2905s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2228s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2045s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0183s for 8192 events => throughput is 4.48E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,7 +236,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -243,9 +244,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4697s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2111s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2585s for 90112 events => throughput is 3.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8871s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2012s for 90112 events => throughput is 4.48E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +257,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783635280988) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.342983e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.534476e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.391505e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.564668e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3006s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2861s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2081s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1974s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.66E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,7 +312,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -319,9 +320,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3584s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1999s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1584s for 90112 events => throughput is 5.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9969s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8790s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1179s for 90112 events => throughput is 7.65E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +333,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783652032040) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.413133e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.841260e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.510615e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.885682e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2939s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2806s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2062s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1961s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 8192 events => throughput is 8.13E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,7 +388,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -395,9 +396,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3412s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1968s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1445s for 90112 events => throughput is 6.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1113s for 90112 events => throughput is 8.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +409,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783652032040) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.210141e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.357077e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.018773e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.361362e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3082s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2878s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2093s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1980s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.28E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,7 +464,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -471,9 +472,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.5311s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2846s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2465s for 90112 events => throughput is 3.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0075s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8827s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1248s for 90112 events => throughput is 7.22E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +485,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783652032040) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.423825e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.373677e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.530096e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.436880e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 - [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6929s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6924s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.49E+07 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.690708277600116) and cpp (47.690708266690699) differ by less than 2E-4 (2.2875357164053867e-10) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223782303744791] fbridge_mode=1 - [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6044s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5982s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.45E+07 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (46.223782291775365) and cpp (46.223782303744791) differ by less than 2E-4 (2.5894508759449764e-10) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.130110e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.628036e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.274030e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.059654e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.301020e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.126562e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.294170e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.968784e+07 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 925cf1dd8b..708cc25f0b 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,31 +1,31 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 -make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512y +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:20:33 +DATE: 2023-10-25_18:57:24 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,17 +52,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5609s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2258s - [COUNTERS] Fortran MEs ( 1 ) : 0.3351s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4284s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1564s + [COUNTERS] Fortran MEs ( 1 ) : 0.2720s for 8192 events => throughput is 3.01E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,17 +77,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5566s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2205s - [COUNTERS] Fortran MEs ( 1 ) : 0.3361s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4259s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1538s + [COUNTERS] Fortran MEs ( 1 ) : 0.2721s for 8192 events => throughput is 3.01E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,17 +102,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.0766s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3919s - [COUNTERS] Fortran MEs ( 1 ) : 3.6847s for 90112 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.9925s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0006s + [COUNTERS] Fortran MEs ( 1 ) : 2.9919s for 90112 events => throughput is 3.01E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,21 +127,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470791E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8617s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5333s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3285s for 8192 events => throughput is 2.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6643s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4056s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2588s for 8192 events => throughput is 3.17E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470791E-002) differ by less than 2E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171234E-002) differ by less than 2E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,35 +160,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872844967963E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2565s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6821s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5744s for 90112 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.1008s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2548s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8459s for 90112 events => throughput is 3.17E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655597E-002) differ by less than 2E-14 (4.440892098500626e-16) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967963E-002) differ by less than 2E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.585485e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.260360e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.602908e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.262382e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,21 +203,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470777E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5594s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3875s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1718s for 8192 events => throughput is 4.77E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4250s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2888s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1362s for 8192 events => throughput is 6.01E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470777E-002) differ by less than 2E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171234E-002) differ by less than 2E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -235,35 +236,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.4116s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5278s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8838s for 90112 events => throughput is 4.78E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6317s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1335s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.4982s for 90112 events => throughput is 6.01E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655555E-002) differ by less than 2E-14 (0.0) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967921E-002) differ by less than 2E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.886187e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.185953e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.863074e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.196023e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,21 +279,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195719386171206E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3020s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0845s for 8192 events => throughput is 9.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.2816s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2175s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0641s for 8192 events => throughput is 1.28E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171206E-002) differ by less than 2E-14 (3.3306690738754696e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -311,35 +312,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872844967907E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3806s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9368s for 90112 events => throughput is 9.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.7748s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0699s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7049s for 90112 events => throughput is 1.28E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655541E-002) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967907E-002) differ by less than 2E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.810175e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.303083e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.814710e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.306009e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,21 +355,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195719386171206E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3697s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2950s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0747s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2708s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2125s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0582s for 8192 events => throughput is 1.41E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171206E-002) differ by less than 2E-14 (3.3306690738754696e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -387,35 +388,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872844967907E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2633s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4344s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8288s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7043s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0630s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6413s for 90112 events => throughput is 1.41E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655541E-002) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967907E-002) differ by less than 2E-14 (2.220446049250313e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111782e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.443855e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.107532e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.447065e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,21 +431,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4288s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3258s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1030s for 8192 events => throughput is 7.95E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.2580s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2068s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0512s for 8192 events => throughput is 1.60E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171234E-002) differ by less than 2E-14 (0.0) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -463,35 +464,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872844967907E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6474s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4850s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1624s for 90112 events => throughput is 7.75E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.6189s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0547s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5642s for 90112 events => throughput is 1.60E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655541E-002) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967907E-002) differ by less than 2E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.891678e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.631688e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.153326e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.638264e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6666s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6612s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470764E-002) differ by less than 2E-14 (4.440892098500626e-16) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8130s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7902s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.96E+06 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655597E-002) differ by less than 2E-14 (4.440892098500626e-16) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.639819e+06 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.202614e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.989805e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.236697e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.956986e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.247459e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.990764e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.731178e+06 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index fa99d034ca..abeef091e4 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,24 +2,24 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 -make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:21:14 +DATE: 2023-10-25_18:58:38 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,17 +52,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5523s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2203s - [COUNTERS] Fortran MEs ( 1 ) : 0.3320s for 8192 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4284s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1564s + [COUNTERS] Fortran MEs ( 1 ) : 0.2720s for 8192 events => throughput is 3.01E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,17 +77,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5511s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2185s - [COUNTERS] Fortran MEs ( 1 ) : 0.3325s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4258s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1538s + [COUNTERS] Fortran MEs ( 1 ) : 0.2720s for 8192 events => throughput is 3.01E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,17 +102,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.0773s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3954s - [COUNTERS] Fortran MEs ( 1 ) : 3.6819s for 90112 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.9922s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9998s + [COUNTERS] Fortran MEs ( 1 ) : 2.9925s for 90112 events => throughput is 3.01E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,21 +127,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196349725192449E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195711188152623E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5506s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3355s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6577s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4026s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2551s for 8192 events => throughput is 3.21E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196349725192449E-002) differ by less than 4E-4 (8.433729958845504e-08) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195711188152623E-002) differ by less than 4E-4 (8.434546971969326e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,35 +160,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310860682799649E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310861450156910E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.1805s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6708s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5097s for 90112 events => throughput is 2.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.0573s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2500s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8074s for 90112 events => throughput is 3.21E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310860682799649E-002) differ by less than 4E-4 (1.4013938864909647e-07) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310861450156910E-002) differ by less than 4E-4 (1.401388352029187e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.644874e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.313409e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.623423e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.314649e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,21 +203,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196334032667323E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195695504827997E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4066s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3108s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0959s for 8192 events => throughput is 8.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3039s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2288s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196334032667323E-002) differ by less than 4E-4 (2.4578908086603235e-07) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195695504827997E-002) differ by less than 4E-4 (2.457036522018896e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -235,35 +236,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310847525777316E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310848293145957E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.5043s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4504s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0538s for 90112 events => throughput is 8.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.9056s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8257s for 90112 events => throughput is 1.09E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847525777316E-002) differ by less than 4E-4 (3.0195074296468505e-07) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310848293145957E-002) differ by less than 4E-4 (3.0195004807609394e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.644259e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.111598e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.595838e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.106035e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,21 +279,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196330842071521E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195692323432697E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3044s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2603s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2195s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1865s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 8192 events => throughput is 2.49E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196330842071521E-002) differ by less than 4E-4 (2.786153705525152e-07) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195692323432697E-002) differ by less than 4E-4 (2.7843549810224744e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -311,35 +312,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310847485320789E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310848252682449E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9365s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4416s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4949s for 90112 events => throughput is 1.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4009s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0379s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3630s for 90112 events => throughput is 2.48E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847485320789E-002) differ by less than 4E-4 (3.024482967406428e-07) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310848252682449E-002) differ by less than 4E-4 (3.0244768767229147e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.877444e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.547162e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.866007e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.553448e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,21 +355,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196330842071521E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195692323432697E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2968s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2571s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0397s for 8192 events => throughput is 2.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2144s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1844s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0300s for 8192 events => throughput is 2.73E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196330842071521E-002) differ by less than 4E-4 (2.786153705525152e-07) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195692323432697E-002) differ by less than 4E-4 (2.7843549810224744e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -387,35 +388,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310847485320789E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310848252682449E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8329s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3969s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4360s for 90112 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3630s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0322s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3307s for 90112 events => throughput is 2.72E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847485320789E-002) differ by less than 4E-4 (3.024482967406428e-07) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310848252682449E-002) differ by less than 4E-4 (3.0244768767229147e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.064455e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.811806e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.134523e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.819395e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,21 +431,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196344068381207E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195705534321677E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3182s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2680s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0503s for 8192 events => throughput is 1.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2030s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1786s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 8192 events => throughput is 3.35E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196344068381207E-002) differ by less than 4E-4 (1.42537126879283e-07) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195705534321677E-002) differ by less than 4E-4 (1.4251501656570298e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -463,35 +464,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310857803543385E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310858570909916E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9728s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4203s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5524s for 90112 events => throughput is 1.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2974s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0267s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2708s for 90112 events => throughput is 3.33E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310857803543385E-002) differ by less than 4E-4 (1.755498595379379e-07) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310858570909916E-002) differ by less than 4E-4 (1.7554919173878858e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.608228e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.430004e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.603946e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.443942e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196349366365994E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6467s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6458s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.73E+06 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196349366365994E-002) differ by less than 4E-4 (8.802906814597833e-08) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310864949473968E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.7893s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7798s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.53E+06 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310864949473968E-002) differ by less than 4E-4 (8.766578696306482e-08) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.347402e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.856435e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.795868e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.305516e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.791470e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.491438e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.627451e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.615609e+07 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 8d56c45efe..1cd53ced5d 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,29 +1,29 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 -make USEBUILDDIR=1 AVX=512y +make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0'CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' + make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:21:50 +DATE: 2023-10-25_18:59:51 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,17 +52,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5526s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2199s - [COUNTERS] Fortran MEs ( 1 ) : 0.3328s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4299s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1578s + [COUNTERS] Fortran MEs ( 1 ) : 0.2721s for 8192 events => throughput is 3.01E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,17 +77,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5518s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s - [COUNTERS] Fortran MEs ( 1 ) : 0.3335s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4263s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1543s + [COUNTERS] Fortran MEs ( 1 ) : 0.2720s for 8192 events => throughput is 3.01E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,17 +102,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.0409s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3789s - [COUNTERS] Fortran MEs ( 1 ) : 3.6620s for 90112 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.9932s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0020s + [COUNTERS] Fortran MEs ( 1 ) : 2.9912s for 90112 events => throughput is 3.01E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,21 +127,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196358763382007E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195720226233587E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8757s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5434s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3323s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6743s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4117s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2627s for 8192 events => throughput is 3.12E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358763382007E-002) differ by less than 2E-4 (8.651674043846924e-09) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720226233587E-002) differ by less than 2E-4 (8.642997428864874e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,35 +160,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872835011053E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310873602323142E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.3915s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7212s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6703s for 90112 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.1467s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2599s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8867s for 90112 events => throughput is 3.12E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872835011053E-002) differ by less than 2E-4 (9.31432020401246e-09) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873602323142E-002) differ by less than 2E-4 (9.314316651298782e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.532584e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.227112e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.521877e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.229042e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,21 +203,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196358804670396E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195720267415450E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5544s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3845s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1699s for 8192 events => throughput is 4.82E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4216s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2876s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1340s for 8192 events => throughput is 6.12E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358804670396E-002) differ by less than 2E-4 (9.076467577529002e-09) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720267415450E-002) differ by less than 2E-4 (9.066697836956905e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -235,35 +236,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872836789727E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310873604102080E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.3887s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5301s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8586s for 90112 events => throughput is 4.85E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6087s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1353s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.4734s for 90112 events => throughput is 6.12E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872836789727E-002) differ by less than 2E-4 (9.336195150311255e-09) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873604102080E-002) differ by less than 2E-4 (9.33619492826665e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.952919e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.232300e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.914380e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.239527e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,21 +279,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195720049465126E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3876s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3037s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0839s for 8192 events => throughput is 9.76E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.2815s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0632s for 8192 events => throughput is 1.30E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358586501358E-002) differ by less than 2E-4 (6.831845977828266e-09) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720049465126E-002) differ by less than 2E-4 (6.824311782338555e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -311,35 +312,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310873476230255E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3852s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4574s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9277s for 90112 events => throughput is 9.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.7612s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0666s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6946s for 90112 events => throughput is 1.30E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872708918333E-002) differ by less than 2E-4 (7.763571563401683e-09) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873476230255E-002) differ by less than 2E-4 (7.76356601228656e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.889061e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.321387e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.855619e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.326541e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,21 +355,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195720049465126E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3661s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2915s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0745s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2698s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2123s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0575s for 8192 events => throughput is 1.43E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358586501358E-002) differ by less than 2E-4 (6.831845977828266e-09) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720049465126E-002) differ by less than 2E-4 (6.824311782338555e-09) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -387,35 +388,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310873476230255E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2630s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4388s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8242s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6909s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0585s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6324s for 90112 events => throughput is 1.42E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872708918333E-002) differ by less than 2E-4 (7.763571563401683e-09) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873476230255E-002) differ by less than 2E-4 (7.76356601228656e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.127984e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.468207e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.119463e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.470735e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,21 +431,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196358757578441E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7195720220276491E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4328s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3253s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1075s for 8192 events => throughput is 7.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.2605s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2070s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0535s for 8192 events => throughput is 1.53E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358757578441E-002) differ by less than 2E-4 (8.591964251181139e-09) +OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720220276491E-002) differ by less than 2E-4 (8.581707788835047e-09) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -463,35 +464,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872803699391E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310873571012007E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6477s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4725s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1752s for 90112 events => throughput is 7.67E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.6480s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0595s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5885s for 90112 events => throughput is 1.53E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872803699391E-002) differ by less than 2E-4 (8.929234462939917e-09) +OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873571012007E-002) differ by less than 2E-4 (8.92923734951978e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.668693e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.565068e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.591243e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.569018e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196358102981245E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.7068s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7013s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.50E+06 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358102981245E-002) differ by less than 2E-4 (1.8571728599425796e-09) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872068634174E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8933s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8700s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.87E+06 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872068634174E-002) differ by less than 2E-4 (1.1094924978749532e-10) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.611008e+06 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.220129e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.993875e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.234445e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.000644e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.243443e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.965395e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.708140e+06 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 62d0e45c34..ce7e7609a4 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -2,28 +2,28 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 -make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:22:32 +DATE: 2023-10-25_19:01:06 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3806s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2793s - [COUNTERS] Fortran MEs ( 1 ) : 4.1013s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6457s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1953s + [COUNTERS] Fortran MEs ( 1 ) : 3.4504s for 8192 events => throughput is 2.37E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3901s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2745s - [COUNTERS] Fortran MEs ( 1 ) : 4.1156s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6434s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1917s + [COUNTERS] Fortran MEs ( 1 ) : 3.4518s for 8192 events => throughput is 2.37E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,17 +102,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.2170s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8938s - [COUNTERS] Fortran MEs ( 1 ) : 45.3231s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 39.3826s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3592s + [COUNTERS] Fortran MEs ( 1 ) : 38.0233s for 90112 events => throughput is 2.37E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.7189s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4329s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2860s for 8192 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 6.9417s + [COUNTERS] Fortran Overhead ( 0 ) : 3.5036s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4380s for 8192 events => throughput is 2.38E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,35 +160,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748610601E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 53.2752s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0306s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.2446s for 90112 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 42.4644s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6673s + [COUNTERS] CudaCpp MEs ( 2 ) : 37.7971s for 90112 events => throughput is 2.38E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421161E-004) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610601E-004) differ by less than 2E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.975932e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.484228e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.969422e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.484587e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.7792s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4930s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2862s for 8192 events => throughput is 3.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6038s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8665s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.7373s for 8192 events => throughput is 4.72E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,35 +236,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748610596E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 29.2447s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0623s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.1823s for 90112 events => throughput is 3.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 22.1554s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0347s + [COUNTERS] CudaCpp MEs ( 2 ) : 19.1207s for 90112 events => throughput is 4.71E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421158E-004) differ by less than 2E-14 (3.3306690738754696e-16) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610596E-004) differ by less than 2E-14 (5.551115123125783e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.719602e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.903888e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.704578e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.902506e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.2095s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2292s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9803s for 8192 events => throughput is 8.36E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6286s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9055s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7231s for 8192 events => throughput is 1.13E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,35 +312,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.7366s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8124s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.9242s for 90112 events => throughput is 8.25E+03 events/s + [COUNTERS] PROGRAM TOTAL : 10.0393s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0688s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.9705s for 90112 events => throughput is 1.13E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.553162e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.154246e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.569833e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.157006e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9852s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1172s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8679s for 8192 events => throughput is 9.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.4687s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8238s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6448s for 8192 events => throughput is 1.27E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,35 +388,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.2612s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6975s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5637s for 90112 events => throughput is 9.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.0853s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9863s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.0990s for 90112 events => throughput is 1.27E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.732365e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.306239e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.738434e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.306622e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.3926s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3264s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0661s for 8192 events => throughput is 7.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.1802s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6814s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4988s for 8192 events => throughput is 1.64E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,35 +464,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.7899s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9106s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8793s for 90112 events => throughput is 7.59E+03 events/s + [COUNTERS] PROGRAM TOTAL : 7.3177s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8482s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4695s for 90112 events => throughput is 1.65E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.707688e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.692604e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.746281e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.691372e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7947s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7633s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311352998E-004) differ by less than 2E-14 (4.440892098500626e-16) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.6726s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3253s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3472s for 90112 events => throughput is 2.60E+05 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421161E-004) differ by less than 2E-14 (2.220446049250313e-16) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.298413e+05 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.515584e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.121587e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.140818e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.116718e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.158341e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.107036e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.429475e+05 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index a11d40fa18..7decb75777 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,24 +1,25 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none - -make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y - make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -27,15 +28,15 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2023-10-25_19:26:45 +DATE: 2023-10-25_19:04:47 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3808s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2757s - [COUNTERS] Fortran MEs ( 1 ) : 4.1052s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6461s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1956s + [COUNTERS] Fortran MEs ( 1 ) : 3.4505s for 8192 events => throughput is 2.37E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2719s - [COUNTERS] Fortran MEs ( 1 ) : 4.1018s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6438s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1927s + [COUNTERS] Fortran MEs ( 1 ) : 3.4511s for 8192 events => throughput is 2.37E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,17 +102,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.1736s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9032s - [COUNTERS] Fortran MEs ( 1 ) : 45.2704s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 39.3691s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3694s + [COUNTERS] Fortran MEs ( 1 ) : 37.9997s for 90112 events => throughput is 2.37E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396515517582E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.4177s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2835s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1342s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 6.7930s + [COUNTERS] Fortran Overhead ( 0 ) : 3.4371s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.3559s for 8192 events => throughput is 2.44E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,35 +160,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774605164224E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774605353658E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 51.5557s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8491s - [COUNTERS] CudaCpp MEs ( 2 ) : 45.7066s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 41.5679s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6056s + [COUNTERS] CudaCpp MEs ( 2 ) : 36.9624s for 90112 events => throughput is 2.44E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774605164224E-004) differ by less than 4E-4 (3.091469938043545e-06) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803774605353658E-004) differ by less than 4E-4 (3.091469937599456e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.033185e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.530604e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.041307e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.530762e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277389113409186E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.5057s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3763s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1294s for 8192 events => throughput is 7.25E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.8393s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0062s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8331s for 8192 events => throughput is 9.83E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,35 +236,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803771885814218E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803771886003655E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 15.4913s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9536s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.5377s for 90112 events => throughput is 7.19E+03 events/s + [COUNTERS] PROGRAM TOTAL : 11.3573s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1735s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.1839s for 90112 events => throughput is 9.81E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803771885814218E-004) differ by less than 4E-4 (2.9193997534981975e-06) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803771886003655E-004) differ by less than 4E-4 (2.919399753276153e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.412053e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.008580e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.406103e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.010372e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390171873933E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.2465s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7541s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4925s for 8192 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9273s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5575s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3698s for 8192 events => throughput is 2.21E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,35 +312,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774410472313E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774410661750E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.7561s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3072s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4489s for 90112 events => throughput is 1.65E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.8038s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7240s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.0798s for 90112 events => throughput is 2.21E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774410472313E-004) differ by less than 4E-4 (3.0791505700733524e-06) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803774410661750E-004) differ by less than 4E-4 (3.0791505700733524e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.691454e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.267011e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.694442e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.267552e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390171873933E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1341s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6956s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4385s for 8192 events => throughput is 1.87E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8434s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5139s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3295s for 8192 events => throughput is 2.49E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,35 +388,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774410472313E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774410661750E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.1002s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2595s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.8407s for 90112 events => throughput is 1.86E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3196s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6858s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6338s for 90112 events => throughput is 2.48E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774410472313E-004) differ by less than 4E-4 (3.0791505700733524e-06) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803774410661750E-004) differ by less than 4E-4 (3.0791505700733524e-06) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.917142e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.554710e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.912635e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.563044e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396414214383E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.3236s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7938s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5298s for 8192 events => throughput is 1.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6806s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4346s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2460s for 8192 events => throughput is 3.33E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,35 +464,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803777740743528E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803777740932968E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 8.3515s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4437s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.9078s for 90112 events => throughput is 1.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.2973s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6049s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.6924s for 90112 events => throughput is 3.35E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803777740743528E-004) differ by less than 4E-4 (3.289877538392716e-06) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803777740932968E-004) differ by less than 4E-4 (3.289877538392716e-06) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.556068e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.442736e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.554806e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.446826e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277400478491260E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7609s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7395s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.84E+05 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277400478491260E-004) differ by less than 4E-4 (3.3951593780834344e-06) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803779990154892E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.5388s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3037s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2351s for 90112 events => throughput is 3.83E+05 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803779990154892E-004) differ by less than 4E-4 (3.4322117830054566e-06) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.602401e+05 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.943641e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.505586e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.637854e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.505362e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.630323e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.491202e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.522012e+05 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 344f040590..c4e4a1a740 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -2,13 +2,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 -make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' @@ -16,14 +16,14 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +make[1]: Nothing to be done for 'all'. make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:30:04 +DATE: 2023-10-25_19:07:59 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2765s - [COUNTERS] Fortran MEs ( 1 ) : 4.1104s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6443s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1951s + [COUNTERS] Fortran MEs ( 1 ) : 3.4492s for 8192 events => throughput is 2.38E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3884s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2753s - [COUNTERS] Fortran MEs ( 1 ) : 4.1132s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.6433s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1919s + [COUNTERS] Fortran MEs ( 1 ) : 3.4513s for 8192 events => throughput is 2.37E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,17 +102,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.2155s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9013s - [COUNTERS] Fortran MEs ( 1 ) : 45.3142s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 39.3762s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3593s + [COUNTERS] Fortran MEs ( 1 ) : 38.0168s for 90112 events => throughput is 2.37E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277432965013E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.8199s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4948s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3251s for 8192 events => throughput is 1.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 7.0366s + [COUNTERS] Fortran Overhead ( 0 ) : 3.5537s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4829s for 8192 events => throughput is 2.35E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,35 +160,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725813026109E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725813215552E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 53.9672s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0490s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.9182s for 90112 events => throughput is 1.88E+03 events/s + [COUNTERS] PROGRAM TOTAL : 43.0534s + [COUNTERS] Fortran Overhead ( 0 ) : 4.7304s + [COUNTERS] CudaCpp MEs ( 2 ) : 38.3231s for 90112 events => throughput is 2.35E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725813026109E-004) differ by less than 2E-4 (4.087956639864387e-09) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725813215552E-004) differ by less than 2E-4 (4.087956861908992e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.959802e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.445667e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.958905e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.446428e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277430934464E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.7279s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4759s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2519s for 8192 events => throughput is 3.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.5785s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8571s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.7215s for 8192 events => throughput is 4.76E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,35 +236,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725816246317E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725816435760E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 29.0177s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0402s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.9775s for 90112 events => throughput is 3.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 22.5227s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0477s + [COUNTERS] CudaCpp MEs ( 2 ) : 19.4750s for 90112 events => throughput is 4.63E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725816246317E-004) differ by less than 2E-4 (4.291719202242916e-09) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725816435760E-004) differ by less than 2E-4 (4.291719424287521e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.730381e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.936536e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.728042e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.937965e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.1830s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2180s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9649s for 8192 events => throughput is 8.49E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6313s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9049s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7264s for 8192 events => throughput is 1.13E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,35 +312,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725810958764E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.5258s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8006s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.7251s for 90112 events => throughput is 8.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 10.0798s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0800s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.9998s for 90112 events => throughput is 1.13E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725810769321E-004) differ by less than 2E-4 (3.945155535589606e-09) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725810958764E-004) differ by less than 2E-4 (3.945155757634211e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.577284e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.155342e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.645443e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.155498e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9705s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1082s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8624s for 8192 events => throughput is 9.50E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.4812s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8286s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6526s for 8192 events => throughput is 1.26E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,35 +388,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725810958764E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.1916s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6788s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5128s for 90112 events => throughput is 9.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.1555s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9926s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.1629s for 90112 events => throughput is 1.26E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725810769321E-004) differ by less than 2E-4 (3.945155535589606e-09) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725810958764E-004) differ by less than 2E-4 (3.945155757634211e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.762571e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.298345e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.791935e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.297893e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4166s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3396s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0770s for 8192 events => throughput is 7.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.1957s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6900s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5057s for 8192 events => throughput is 1.62E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,35 +464,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725810958764E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.9279s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9270s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.0009s for 90112 events => throughput is 7.51E+03 events/s + [COUNTERS] PROGRAM TOTAL : 7.4628s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8592s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.6036s for 90112 events => throughput is 1.61E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725810769321E-004) differ by less than 2E-4 (3.945155535589606e-09) +OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725810958764E-004) differ by less than 2E-4 (3.945155757634211e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.723112e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.658725e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.593326e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.657267e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277277293084707E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7973s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7658s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277293084707E-004) differ by less than 2E-4 (5.035735162195465e-10) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725738731039E-004] fbridge_mode=1 - [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.6608s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3213s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3395s for 90112 events => throughput is 2.65E+05 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725738731039E-004) differ by less than 2E-4 (6.131544161291913e-10) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.295568e+05 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.529340e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.107713e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.153742e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.126841e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.176580e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.122237e+05 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.438612e+05 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 90411e1b5b..c67f0da27f 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,27 +1,27 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y - make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:34:37 +DATE: 2023-10-25_19:14:32 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 96.8592s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4672s - [COUNTERS] Fortran MEs ( 1 ) : 96.3920s for 8192 events => throughput is 8.50E+01 events/s + [COUNTERS] PROGRAM TOTAL : 85.6732s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3092s + [COUNTERS] Fortran MEs ( 1 ) : 85.3640s for 8192 events => throughput is 9.60E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 97.0100s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4404s - [COUNTERS] Fortran MEs ( 1 ) : 96.5697s for 8192 events => throughput is 8.48E+01 events/s + [COUNTERS] PROGRAM TOTAL : 85.2231s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3098s + [COUNTERS] Fortran MEs ( 1 ) : 84.9133s for 8192 events => throughput is 9.65E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1062.8511s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0497s - [COUNTERS] Fortran MEs ( 1 ) : 1058.8014s for 90112 events => throughput is 8.51E+01 events/s + [COUNTERS] PROGRAM TOTAL : 941.3988s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8350s + [COUNTERS] Fortran MEs ( 1 ) : 938.5638s for 90112 events => throughput is 9.60E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435831E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 221.3790s - [COUNTERS] Fortran Overhead ( 0 ) : 101.7058s - [COUNTERS] CudaCpp MEs ( 2 ) : 119.6732s for 8192 events => throughput is 6.85E+01 events/s + [COUNTERS] PROGRAM TOTAL : 184.3438s + [COUNTERS] Fortran Overhead ( 0 ) : 85.0990s + [COUNTERS] CudaCpp MEs ( 2 ) : 99.2448s for 8192 events => throughput is 8.25E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,7 +160,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -167,9 +168,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813953E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1450.0547s - [COUNTERS] Fortran Overhead ( 0 ) : 106.7786s - [COUNTERS] CudaCpp MEs ( 2 ) : 1343.2761s for 90112 events => throughput is 6.71E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1178.9740s + [COUNTERS] Fortran Overhead ( 0 ) : 87.5978s + [COUNTERS] CudaCpp MEs ( 2 ) : 1091.3762s for 90112 events => throughput is 8.26E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +181,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813953E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.310154e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.695340e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.288200e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.690082e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435827E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 107.4772s - [COUNTERS] Fortran Overhead ( 0 ) : 49.8239s - [COUNTERS] CudaCpp MEs ( 2 ) : 57.6534s for 8192 events => throughput is 1.42E+02 events/s + [COUNTERS] PROGRAM TOTAL : 94.4928s + [COUNTERS] Fortran Overhead ( 0 ) : 43.9870s + [COUNTERS] CudaCpp MEs ( 2 ) : 50.5057s for 8192 events => throughput is 1.62E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,7 +236,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -243,9 +244,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 689.8975s - [COUNTERS] Fortran Overhead ( 0 ) : 53.4603s - [COUNTERS] CudaCpp MEs ( 2 ) : 636.4372s for 90112 events => throughput is 1.42E+02 events/s + [COUNTERS] PROGRAM TOTAL : 604.8039s + [COUNTERS] Fortran Overhead ( 0 ) : 46.6648s + [COUNTERS] CudaCpp MEs ( 2 ) : 558.1391s for 90112 events => throughput is 1.61E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +257,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.666029e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.871670e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.661964e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.870811e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 51.5576s - [COUNTERS] Fortran Overhead ( 0 ) : 23.6406s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.9170s for 8192 events => throughput is 2.93E+02 events/s + [COUNTERS] PROGRAM TOTAL : 42.3765s + [COUNTERS] Fortran Overhead ( 0 ) : 19.3321s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.0444s for 8192 events => throughput is 3.55E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,7 +312,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -319,9 +320,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 332.8698s - [COUNTERS] Fortran Overhead ( 0 ) : 27.2762s - [COUNTERS] CudaCpp MEs ( 2 ) : 305.5936s for 90112 events => throughput is 2.95E+02 events/s + [COUNTERS] PROGRAM TOTAL : 274.6168s + [COUNTERS] Fortran Overhead ( 0 ) : 21.8566s + [COUNTERS] CudaCpp MEs ( 2 ) : 252.7602s for 90112 events => throughput is 3.57E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +333,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.564287e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.309503e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.571126e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.313934e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 45.7634s - [COUNTERS] Fortran Overhead ( 0 ) : 20.8536s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.9098s for 8192 events => throughput is 3.29E+02 events/s + [COUNTERS] PROGRAM TOTAL : 37.8931s + [COUNTERS] Fortran Overhead ( 0 ) : 17.1832s + [COUNTERS] CudaCpp MEs ( 2 ) : 20.7099s for 8192 events => throughput is 3.96E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,7 +388,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -395,9 +396,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 298.7296s - [COUNTERS] Fortran Overhead ( 0 ) : 24.5229s - [COUNTERS] CudaCpp MEs ( 2 ) : 274.2068s for 90112 events => throughput is 3.29E+02 events/s + [COUNTERS] PROGRAM TOTAL : 247.2023s + [COUNTERS] Fortran Overhead ( 0 ) : 19.7302s + [COUNTERS] CudaCpp MEs ( 2 ) : 227.4722s for 90112 events => throughput is 3.96E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +409,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.037302e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.872948e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.011224e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.859207e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 46.0178s - [COUNTERS] Fortran Overhead ( 0 ) : 22.2114s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.8064s for 8192 events => throughput is 3.44E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.8670s + [COUNTERS] Fortran Overhead ( 0 ) : 11.9877s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.8792s for 8192 events => throughput is 5.90E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,7 +464,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -471,9 +472,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 285.3431s - [COUNTERS] Fortran Overhead ( 0 ) : 25.8685s - [COUNTERS] CudaCpp MEs ( 2 ) : 259.4745s for 90112 events => throughput is 3.47E+02 events/s + [COUNTERS] PROGRAM TOTAL : 168.3618s + [COUNTERS] Fortran Overhead ( 0 ) : 14.5580s + [COUNTERS] CudaCpp MEs ( 2 ) : 153.8038s for 90112 events => throughput is 5.86E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +485,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.723231e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.031931e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.765137e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.008943e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435838E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 4.1896s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1063s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0833s for 8192 events => throughput is 7.56E+03 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693100945435838E-006) differ by less than 2E-14 (2.4424906541753444e-15) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 18.5925s - [COUNTERS] Fortran Overhead ( 0 ) : 6.7176s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8749s for 90112 events => throughput is 7.59E+03 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.538676e+03 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.266826e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.276191e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.572409e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.231100e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.464972e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.233366e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.245300e+03 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 93e9694d2a..7af9e47c73 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,24 +1,25 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none - -make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 -make USEBUILDDIR=1 AVX=512y +make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -27,15 +28,15 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2023-10-25_21:02:37 +DATE: 2023-10-25_20:27:32 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 97.0230s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4439s - [COUNTERS] Fortran MEs ( 1 ) : 96.5791s for 8192 events => throughput is 8.48E+01 events/s + [COUNTERS] PROGRAM TOTAL : 84.9514s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3096s + [COUNTERS] Fortran MEs ( 1 ) : 84.6418s for 8192 events => throughput is 9.68E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 98.3689s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4421s - [COUNTERS] Fortran MEs ( 1 ) : 97.9268s for 8192 events => throughput is 8.37E+01 events/s + [COUNTERS] PROGRAM TOTAL : 85.7277s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3103s + [COUNTERS] Fortran MEs ( 1 ) : 85.4174s for 8192 events => throughput is 9.59E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1065.5287s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0935s - [COUNTERS] Fortran MEs ( 1 ) : 1061.4352s for 90112 events => throughput is 8.49E+01 events/s + [COUNTERS] PROGRAM TOTAL : 940.7097s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8292s + [COUNTERS] Fortran MEs ( 1 ) : 937.8805s for 90112 events => throughput is 9.61E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,21 +127,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1694768395202781E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.169e-06 [1.1694768395608941E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 198.8176s - [COUNTERS] Fortran Overhead ( 0 ) : 92.5000s - [COUNTERS] CudaCpp MEs ( 2 ) : 106.3176s for 8192 events => throughput is 7.71E+01 events/s + [COUNTERS] PROGRAM TOTAL : 176.4062s + [COUNTERS] Fortran Overhead ( 0 ) : 80.9717s + [COUNTERS] CudaCpp MEs ( 2 ) : 95.4345s for 8192 events => throughput is 8.58E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694768395202781E-006) differ by less than 4E-4 (0.00014260116069753082) +OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694768395608941E-006) differ by less than 4E-4 (0.0001426011954326345) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,35 +160,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1361436140448921E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.136e-07 [2.1361436148187123E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1266.1327s - [COUNTERS] Fortran Overhead ( 0 ) : 95.7086s - [COUNTERS] CudaCpp MEs ( 2 ) : 1170.4241s for 90112 events => throughput is 7.70E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1129.5632s + [COUNTERS] Fortran Overhead ( 0 ) : 83.1146s + [COUNTERS] CudaCpp MEs ( 2 ) : 1046.4486s for 90112 events => throughput is 8.61E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361436140448921E-007) differ by less than 4E-4 (0.00014045886190539036) +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361436148187123E-007) differ by less than 4E-4 (0.00014045922420713453) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.002269e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.022409e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.967415e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.021876e+02 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694765850076731E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 49.7619s - [COUNTERS] Fortran Overhead ( 0 ) : 23.4310s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.3309s for 8192 events => throughput is 3.11E+02 events/s + [COUNTERS] PROGRAM TOTAL : 42.7416s + [COUNTERS] Fortran Overhead ( 0 ) : 19.7901s + [COUNTERS] CudaCpp MEs ( 2 ) : 22.9515s for 8192 events => throughput is 3.57E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,7 +236,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -243,9 +244,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430662723898E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 316.9053s - [COUNTERS] Fortran Overhead ( 0 ) : 27.1348s - [COUNTERS] CudaCpp MEs ( 2 ) : 289.7704s for 90112 events => throughput is 3.11E+02 events/s + [COUNTERS] PROGRAM TOTAL : 275.8688s + [COUNTERS] Fortran Overhead ( 0 ) : 22.3496s + [COUNTERS] CudaCpp MEs ( 2 ) : 253.5192s for 90112 events => throughput is 3.55E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +257,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430662723898E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.586738e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.215690e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.580652e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.209963e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694764962310603E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 25.7462s - [COUNTERS] Fortran Overhead ( 0 ) : 11.9729s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.7733s for 8192 events => throughput is 5.95E+02 events/s + [COUNTERS] PROGRAM TOTAL : 21.3650s + [COUNTERS] Fortran Overhead ( 0 ) : 9.8744s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.4906s for 8192 events => throughput is 7.13E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,7 +312,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -319,9 +320,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430432807771E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 168.1713s - [COUNTERS] Fortran Overhead ( 0 ) : 15.4495s - [COUNTERS] CudaCpp MEs ( 2 ) : 152.7218s for 90112 events => throughput is 5.90E+02 events/s + [COUNTERS] PROGRAM TOTAL : 139.3548s + [COUNTERS] Fortran Overhead ( 0 ) : 12.3610s + [COUNTERS] CudaCpp MEs ( 2 ) : 126.9938s for 90112 events => throughput is 7.10E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +333,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430432807771E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.197172e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.561703e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.226176e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.587034e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694764962310603E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 22.7901s - [COUNTERS] Fortran Overhead ( 0 ) : 10.6491s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.1410s for 8192 events => throughput is 6.75E+02 events/s + [COUNTERS] PROGRAM TOTAL : 19.1741s + [COUNTERS] Fortran Overhead ( 0 ) : 8.7841s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.3900s for 8192 events => throughput is 7.88E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,7 +388,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -395,9 +396,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430432807771E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 147.9220s - [COUNTERS] Fortran Overhead ( 0 ) : 14.3147s - [COUNTERS] CudaCpp MEs ( 2 ) : 133.6073s for 90112 events => throughput is 6.74E+02 events/s + [COUNTERS] PROGRAM TOTAL : 125.8037s + [COUNTERS] Fortran Overhead ( 0 ) : 11.2828s + [COUNTERS] CudaCpp MEs ( 2 ) : 114.5209s for 90112 events => throughput is 7.87E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +409,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430432807771E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.019572e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.656474e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.643340e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.674300e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694767969588676E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 23.2769s - [COUNTERS] Fortran Overhead ( 0 ) : 11.3993s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8776s for 8192 events => throughput is 6.90E+02 events/s + [COUNTERS] PROGRAM TOTAL : 13.3091s + [COUNTERS] Fortran Overhead ( 0 ) : 6.1154s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.1937s for 8192 events => throughput is 1.14E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,7 +464,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -471,9 +472,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361435931847224E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 144.2454s - [COUNTERS] Fortran Overhead ( 0 ) : 15.1090s - [COUNTERS] CudaCpp MEs ( 2 ) : 129.1364s for 90112 events => throughput is 6.98E+02 events/s + [COUNTERS] PROGRAM TOTAL : 88.7101s + [COUNTERS] Fortran Overhead ( 0 ) : 8.6312s + [COUNTERS] CudaCpp MEs ( 2 ) : 80.0788s for 90112 events => throughput is 1.13E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +485,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361435931847224E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.574328e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.413638e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.606464e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.410813e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1694770708195000E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 2.4561s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9552s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5008s for 8192 events => throughput is 1.64E+04 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694770708195000E-006) differ by less than 4E-4 (0.00014279896898039546) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1361443477565659E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 10.9874s - [COUNTERS] Fortran Overhead ( 0 ) : 5.5585s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4289s for 90112 events => throughput is 1.66E+04 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361443477565659E-007) differ by less than 4E-4 (0.0001408023850304474) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.636533e+04 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.620348e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.363235e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.405136e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.330068e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.392338e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.319690e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.432306e+03 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index fd7d860c5e..4502fc4cc0 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,27 +1,27 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=avx2 -make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y - make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_22:08:36 +DATE: 2023-10-25_21:24:58 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 97.1556s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4422s - [COUNTERS] Fortran MEs ( 1 ) : 96.7134s for 8192 events => throughput is 8.47E+01 events/s + [COUNTERS] PROGRAM TOTAL : 84.9824s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3100s + [COUNTERS] Fortran MEs ( 1 ) : 84.6723s for 8192 events => throughput is 9.67E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 96.9689s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4446s - [COUNTERS] Fortran MEs ( 1 ) : 96.5243s for 8192 events => throughput is 8.49E+01 events/s + [COUNTERS] PROGRAM TOTAL : 85.2775s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3102s + [COUNTERS] Fortran MEs ( 1 ) : 84.9673s for 8192 events => throughput is 9.64E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1064.4592s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0566s - [COUNTERS] Fortran MEs ( 1 ) : 1060.4026s for 90112 events => throughput is 8.50E+01 events/s + [COUNTERS] PROGRAM TOTAL : 941.4544s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8344s + [COUNTERS] Fortran MEs ( 1 ) : 938.6200s for 90112 events => throughput is 9.60E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101016896846E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 212.5728s - [COUNTERS] Fortran Overhead ( 0 ) : 98.0572s - [COUNTERS] CudaCpp MEs ( 2 ) : 114.5156s for 8192 events => throughput is 7.15E+01 events/s + [COUNTERS] PROGRAM TOTAL : 187.4086s + [COUNTERS] Fortran Overhead ( 0 ) : 86.0928s + [COUNTERS] CudaCpp MEs ( 2 ) : 101.3159s for 8192 events => throughput is 8.09E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,7 +160,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -167,9 +168,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436275882778E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1418.4218s - [COUNTERS] Fortran Overhead ( 0 ) : 103.9288s - [COUNTERS] CudaCpp MEs ( 2 ) : 1314.4929s for 90112 events => throughput is 6.86E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1204.1464s + [COUNTERS] Fortran Overhead ( 0 ) : 88.5909s + [COUNTERS] CudaCpp MEs ( 2 ) : 1115.5554s for 90112 events => throughput is 8.08E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +181,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436275882778E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.001258e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.570846e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.939164e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.571217e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -202,7 +203,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -210,9 +211,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101020910778E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 111.7906s - [COUNTERS] Fortran Overhead ( 0 ) : 51.2589s - [COUNTERS] CudaCpp MEs ( 2 ) : 60.5317s for 8192 events => throughput is 1.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 92.0468s + [COUNTERS] Fortran Overhead ( 0 ) : 42.4424s + [COUNTERS] CudaCpp MEs ( 2 ) : 49.6044s for 8192 events => throughput is 1.65E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -235,7 +236,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -243,9 +244,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436284111598E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 721.3517s - [COUNTERS] Fortran Overhead ( 0 ) : 54.9246s - [COUNTERS] CudaCpp MEs ( 2 ) : 666.4271s for 90112 events => throughput is 1.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 590.4569s + [COUNTERS] Fortran Overhead ( 0 ) : 44.8763s + [COUNTERS] CudaCpp MEs ( 2 ) : 545.5806s for 90112 events => throughput is 1.65E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +257,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436284111598E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.612539e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.951579e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.614353e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.952871e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -278,7 +279,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -286,9 +287,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 49.1637s - [COUNTERS] Fortran Overhead ( 0 ) : 22.7432s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.4205s for 8192 events => throughput is 3.10E+02 events/s + [COUNTERS] PROGRAM TOTAL : 41.1033s + [COUNTERS] Fortran Overhead ( 0 ) : 18.8049s + [COUNTERS] CudaCpp MEs ( 2 ) : 22.2985s for 8192 events => throughput is 3.67E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -311,7 +312,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -319,9 +320,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 315.7719s - [COUNTERS] Fortran Overhead ( 0 ) : 26.2544s - [COUNTERS] CudaCpp MEs ( 2 ) : 289.5175s for 90112 events => throughput is 3.11E+02 events/s + [COUNTERS] PROGRAM TOTAL : 266.9925s + [COUNTERS] Fortran Overhead ( 0 ) : 21.2783s + [COUNTERS] CudaCpp MEs ( 2 ) : 245.7142s for 90112 events => throughput is 3.67E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +333,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.644706e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.437775e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.685868e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.452252e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -354,7 +355,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -362,9 +363,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 44.6193s - [COUNTERS] Fortran Overhead ( 0 ) : 20.5510s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.0684s for 8192 events => throughput is 3.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 36.5317s + [COUNTERS] Fortran Overhead ( 0 ) : 16.5682s + [COUNTERS] CudaCpp MEs ( 2 ) : 19.9634s for 8192 events => throughput is 4.10E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -387,7 +388,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -395,9 +396,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 281.8182s - [COUNTERS] Fortran Overhead ( 0 ) : 23.6088s - [COUNTERS] CudaCpp MEs ( 2 ) : 258.2094s for 90112 events => throughput is 3.49E+02 events/s + [COUNTERS] PROGRAM TOTAL : 239.5143s + [COUNTERS] Fortran Overhead ( 0 ) : 19.0951s + [COUNTERS] CudaCpp MEs ( 2 ) : 220.4191s for 90112 events => throughput is 4.09E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +409,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.253034e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.043303e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.210071e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.056876e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -430,7 +431,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -438,9 +439,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 45.1323s - [COUNTERS] Fortran Overhead ( 0 ) : 21.8275s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.3047s for 8192 events => throughput is 3.52E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.4199s + [COUNTERS] Fortran Overhead ( 0 ) : 11.5417s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.8782s for 8192 events => throughput is 5.90E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -463,7 +464,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -471,9 +472,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 283.0785s - [COUNTERS] Fortran Overhead ( 0 ) : 25.5465s - [COUNTERS] CudaCpp MEs ( 2 ) : 257.5320s for 90112 events => throughput is 3.50E+02 events/s + [COUNTERS] PROGRAM TOTAL : 166.7152s + [COUNTERS] Fortran Overhead ( 0 ) : 14.0683s + [COUNTERS] CudaCpp MEs ( 2 ) : 152.6469s for 90112 events => throughput is 5.90E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +485,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.872786e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.297717e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.828458e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.313776e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -506,97 +507,4 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100942770687E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 3.5931s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7300s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8631s for 8192 events => throughput is 9.49E+03 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693100942770687E-006) differ by less than 2E-4 (2.2792256970660674e-10) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436157495368E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 15.7895s - [COUNTERS] Fortran Overhead ( 0 ) : 6.2917s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4978s for 90112 events => throughput is 9.49E+03 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436157495368E-007) differ by less than 2E-4 (6.173705990875078e-11) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.449720e+03 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.084973e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.109916e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.161304e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111586e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.116031e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.106540e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.648699e+03 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' failed diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 126d0b9ddb..18810c7539 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -2,40 +2,41 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y - make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2023-10-25_19:34:18 +DATE: 2023-10-25_19:11:42 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3162s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2400s - [COUNTERS] Fortran MEs ( 1 ) : 0.0762s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2257s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1646s + [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3078s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2307s - [COUNTERS] Fortran MEs ( 1 ) : 0.0772s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2216s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1605s + [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2688s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4248s - [COUNTERS] Fortran MEs ( 1 ) : 0.8440s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6855s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0179s + [COUNTERS] Fortran MEs ( 1 ) : 0.6677s for 90112 events => throughput is 1.35E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.276 [1.2757941949814184] fbridge_mode=1 [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3161s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2272s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0571s for 8192 events => throughput is 1.43E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 97d8938e38..05038e6686 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,14 +1,14 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y - make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' @@ -16,12 +16,12 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:34:24 +DATE: 2023-10-25_19:12:39 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3127s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2355s - [COUNTERS] Fortran MEs ( 1 ) : 0.0772s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2265s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1654s + [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3089s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2315s - [COUNTERS] Fortran MEs ( 1 ) : 0.0773s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2226s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1615s + [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2899s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4433s - [COUNTERS] Fortran MEs ( 1 ) : 0.8467s for 90112 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6904s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0219s + [COUNTERS] Fortran MEs ( 1 ) : 0.6685s for 90112 events => throughput is 1.35E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.276 [1.2757939713258191] fbridge_mode=1 [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3818s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3130s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0689s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2775s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2237s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0539s for 8192 events => throughput is 1.52E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index ae1cc6d1c5..c740fcb04e 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -2,30 +2,30 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 -make USEBUILDDIR=1 AVX=512y + +make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,9 +33,10 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:34:30 +DATE: 2023-10-25_19:13:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB +NVIDIA L4]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -51,7 +52,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -59,9 +60,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3113s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2340s - [COUNTERS] Fortran MEs ( 1 ) : 0.0773s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2237s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1626s + [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -76,7 +77,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -84,9 +85,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3077s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2312s - [COUNTERS] Fortran MEs ( 1 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2210s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1599s + [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -101,7 +102,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -109,9 +110,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2619s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4224s - [COUNTERS] Fortran MEs ( 1 ) : 0.8395s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6878s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0196s + [COUNTERS] Fortran MEs ( 1 ) : 0.6682s for 90112 events => throughput is 1.35E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,7 +127,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/64 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,9 +135,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.276 [1.2757941960880730] fbridge_mode=1 [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3925s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3207s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0718s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2850s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2282s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0568s for 8192 events => throughput is 1.44E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** From ca18b7622f8ff1bb83b72a22423fc1e74ae3ecac Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 11:33:20 +0200 Subject: [PATCH 005/119] [oct23av] go back to performance baseline logs on itscrd90 (REVERT TEMPORARY TESTS ON PLATINUM) Revert "[oct23av] (TEMPORARY TESTS ON PLATINUM) rerun 18 tmad alltees, all ok but no GPU (olgpu-03 Platinum8362 el8 including downfall mitigation)" This reverts commit 93f29784929a9ab4b96446a5e60eee9b639b5361. Revert "[oct23av] (TEMPORARY TESTS ON PLATINUM) rerun 78 tput alltees, all ok but no GPU (olgpu-03 Platinum8362 el8 including downfall mitigation)" This reverts commit ed1cd751af10b5f4e03e189f82f95ed051821b22. --- .../log_eemumu_mad_d_inl0_hrd0.txt | 276 ++++++++++----- .../log_eemumu_mad_f_inl0_hrd0.txt | 272 ++++++++++----- .../log_eemumu_mad_m_inl0_hrd0.txt | 278 ++++++++++----- .../log_ggtt_mad_d_inl0_hrd0.txt | 276 ++++++++++----- .../log_ggtt_mad_f_inl0_hrd0.txt | 276 ++++++++++----- .../log_ggtt_mad_m_inl0_hrd0.txt | 276 ++++++++++----- .../log_ggttg_mad_d_inl0_hrd0.txt | 324 +++++++++++------- .../log_ggttg_mad_f_inl0_hrd0.txt | 316 +++++++++++------ .../log_ggttg_mad_m_inl0_hrd0.txt | 324 +++++++++++------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 296 ++++++++++------ .../log_ggttgg_mad_f_inl0_hrd0.txt | 298 ++++++++++------ .../log_ggttgg_mad_m_inl0_hrd0.txt | 300 ++++++++++------ .../log_ggttggg_mad_d_inl0_hrd0.txt | 276 ++++++++++----- .../log_ggttggg_mad_f_inl0_hrd0.txt | 282 ++++++++++----- .../log_ggttggg_mad_m_inl0_hrd0.txt | 272 ++++++++++----- .../log_gqttq_mad_d_inl0_hrd0.txt | 53 ++- .../log_gqttq_mad_f_inl0_hrd0.txt | 45 ++- .../log_gqttq_mad_m_inl0_hrd0.txt | 49 ++- .../log_eemumu_mad_d_inl0_hrd0.txt | 147 ++++---- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 155 +++++---- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 127 ++++--- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 122 +++++-- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 149 ++++---- .../log_eemumu_mad_d_inl0_hrd1.txt | 147 ++++---- .../log_eemumu_mad_d_inl1_hrd0.txt | 147 ++++---- .../log_eemumu_mad_d_inl1_hrd1.txt | 147 ++++---- .../log_eemumu_mad_f_inl0_hrd0.txt | 151 ++++---- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 210 +++++++----- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 127 ++++--- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 122 +++++-- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 153 +++++---- .../log_eemumu_mad_f_inl0_hrd1.txt | 151 ++++---- .../log_eemumu_mad_f_inl1_hrd0.txt | 151 ++++---- .../log_eemumu_mad_f_inl1_hrd1.txt | 151 ++++---- .../log_eemumu_mad_m_inl0_hrd0.txt | 147 ++++---- .../log_eemumu_mad_m_inl0_hrd1.txt | 147 ++++---- .../log_ggtt_mad_d_inl0_hrd0.txt | 149 ++++---- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 157 +++++---- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 129 ++++--- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 124 +++++-- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 151 ++++---- .../log_ggtt_mad_d_inl0_hrd1.txt | 147 ++++---- .../log_ggtt_mad_d_inl1_hrd0.txt | 149 ++++---- .../log_ggtt_mad_d_inl1_hrd1.txt | 151 ++++---- .../log_ggtt_mad_f_inl0_hrd0.txt | 149 ++++---- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 157 +++++---- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 129 ++++--- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 124 +++++-- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 151 ++++---- .../log_ggtt_mad_f_inl0_hrd1.txt | 147 ++++---- .../log_ggtt_mad_f_inl1_hrd0.txt | 147 ++++---- .../log_ggtt_mad_f_inl1_hrd1.txt | 151 ++++---- .../log_ggtt_mad_m_inl0_hrd0.txt | 149 ++++---- .../log_ggtt_mad_m_inl0_hrd1.txt | 151 ++++---- .../log_ggttg_mad_d_inl0_hrd0.txt | 162 +++++---- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 174 ++++++---- .../log_ggttg_mad_d_inl0_hrd1.txt | 164 +++++---- .../log_ggttg_mad_f_inl0_hrd0.txt | 164 +++++---- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 176 ++++++---- .../log_ggttg_mad_f_inl0_hrd1.txt | 164 +++++---- .../log_ggttg_mad_m_inl0_hrd0.txt | 168 +++++---- .../log_ggttg_mad_m_inl0_hrd1.txt | 168 +++++---- .../log_ggttgg_mad_d_inl0_hrd0.txt | 164 +++++---- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 176 ++++++---- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 144 +++++--- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 139 ++++++-- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 167 +++++---- .../log_ggttgg_mad_d_inl0_hrd1.txt | 170 +++++---- .../log_ggttgg_mad_d_inl1_hrd0.txt | 166 +++++---- .../log_ggttgg_mad_d_inl1_hrd1.txt | 168 +++++---- .../log_ggttgg_mad_f_inl0_hrd0.txt | 164 +++++---- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 176 ++++++---- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 144 +++++--- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 139 ++++++-- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 167 +++++---- .../log_ggttgg_mad_f_inl0_hrd1.txt | 170 +++++---- .../log_ggttgg_mad_f_inl1_hrd0.txt | 162 +++++---- .../log_ggttgg_mad_f_inl1_hrd1.txt | 162 +++++---- .../log_ggttgg_mad_m_inl0_hrd0.txt | 168 +++++---- .../log_ggttgg_mad_m_inl0_hrd1.txt | 170 +++++---- .../log_ggttggg_mad_d_inl0_hrd0.txt | 164 +++++---- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 176 ++++++---- .../log_ggttggg_mad_d_inl0_hrd1.txt | 166 +++++---- .../log_ggttggg_mad_f_inl0_hrd0.txt | 168 +++++---- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 180 ++++++---- .../log_ggttggg_mad_f_inl0_hrd1.txt | 172 ++++++---- .../log_ggttggg_mad_m_inl0_hrd0.txt | 164 +++++---- .../log_ggttggg_mad_m_inl0_hrd1.txt | 164 +++++---- .../log_gqttq_mad_d_inl0_hrd0.txt | 162 +++++---- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 174 ++++++---- .../log_gqttq_mad_d_inl0_hrd1.txt | 160 +++++---- .../log_gqttq_mad_f_inl0_hrd0.txt | 162 +++++---- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 174 ++++++---- .../log_gqttq_mad_f_inl0_hrd1.txt | 162 +++++---- .../log_gqttq_mad_m_inl0_hrd0.txt | 164 +++++---- .../log_gqttq_mad_m_inl0_hrd1.txt | 162 +++++---- epochX/cudacpp/tput/throughputX.sh | 2 +- 97 files changed, 10334 insertions(+), 6350 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index a0afbfa04f..1a4d828546 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -2,13 +2,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 - +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -16,14 +16,14 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_18:51:18 +DATE: 2023-10-25_19:18:26 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.4551s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4471s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6275s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6175s + [COUNTERS] Fortran MEs ( 1 ) : 0.0099s for 8192 events => throughput is 8.25E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1247s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1167s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1814s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1709s + [COUNTERS] Fortran MEs ( 1 ) : 0.0105s for 8192 events => throughput is 7.82E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3279s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2421s - [COUNTERS] Fortran MEs ( 1 ) : 0.0857s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4443s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3343s + [COUNTERS] Fortran MEs ( 1 ) : 0.1099s for 90112 events => throughput is 8.20E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1272s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1223s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 8192 events => throughput is 1.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1870s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1812s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,7 +159,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -168,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3014s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2486s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0528s for 90112 events => throughput is 1.71E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3422s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0657s for 90112 events => throughput is 1.37E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,14 +180,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813628E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.772470e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.344246e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.794639e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.355045e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1216s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1194s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1815s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +235,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2702s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2458s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 90112 events => throughput is 3.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3697s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3373s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 90112 events => throughput is 2.78E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,14 +256,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813628E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.892983e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.734240e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.144258e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.880467e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1194s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1183s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0012s for 8192 events => throughput is 7.11E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1763s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1746s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.05E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,7 +311,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -320,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2456s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 90112 events => throughput is 7.21E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3545s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 90112 events => throughput is 5.28E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,14 +332,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813656E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.860461e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.204571e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.542428e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.588414e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1194s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1183s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0011s for 8192 events => throughput is 7.29E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1798s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1784s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.76E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,7 +387,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -396,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2586s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2464s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0122s for 90112 events => throughput is 7.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3579s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0162s for 90112 events => throughput is 5.55E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,14 +408,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813656E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.021386e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.597781e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.711678e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.189972e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1191s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1181s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1771s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1755s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.09E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,7 +463,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -472,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2580s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2474s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 90112 events => throughput is 8.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3400s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0182s for 90112 events => throughput is 4.95E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,14 +484,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813656E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.789628e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.738607e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.056403e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.243870e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -ERROR! ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 + [UNWEIGHT] Wrote 1611 events (found 1616 events) + [COUNTERS] PROGRAM TOTAL : 0.6205s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6200s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.63E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21747169064681776) and cpp (0.21747169064681776) differ by less than 2E-14 (0.0) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.7581s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7534s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.93E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813628E-002) differ by less than 2E-14 (3.3306690738754696e-16) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.604143e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.463611e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.281447e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.027562e+09 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.209260e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.077193e+09 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.272993e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.998178e+08 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 2bb17552f5..f98575860b 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,29 +1,29 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_18:52:16 +DATE: 2023-10-25_19:18:43 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.4323s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4243s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8283s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8183s + [COUNTERS] Fortran MEs ( 1 ) : 0.0100s for 8192 events => throughput is 8.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1246s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1166s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1908s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1801s + [COUNTERS] Fortran MEs ( 1 ) : 0.0107s for 8192 events => throughput is 7.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3273s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2416s - [COUNTERS] Fortran MEs ( 1 ) : 0.0857s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4773s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3594s + [COUNTERS] Fortran MEs ( 1 ) : 0.1178s for 90112 events => throughput is 7.65E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166140620297] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1255s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1212s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1802s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 8192 events => throughput is 1.43E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,7 +159,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -168,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501907784661565E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2932s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2469s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0462s for 90112 events => throughput is 1.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4026s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3390s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0636s for 90112 events => throughput is 1.42E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,14 +180,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501907784661565E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.077247e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.366198e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.101215e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.396541e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165549479658] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1196s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1184s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0012s for 8192 events => throughput is 6.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1780s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1763s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.75E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +235,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905692857932E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2579s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 90112 events => throughput is 6.84E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3798s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3613s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 90112 events => throughput is 4.88E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,14 +256,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905692857932E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.546371e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.893797e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.032570e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.592095e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165569099927] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1183s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1177s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.32E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.1811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1803s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.92E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,7 +311,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -320,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905658047333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2525s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2456s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.30E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.3614s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3514s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 90112 events => throughput is 8.98E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,14 +332,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905658047333E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.534617e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.801869e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.574301e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.185551e+07 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165569099927] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1201s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1196s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.38E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.1801s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1793s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 1.05E+07 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,7 +387,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -396,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905658047333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2524s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2458s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.38E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.3471s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3384s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 90112 events => throughput is 1.04E+07 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,14 +408,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905658047333E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.610171e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.124905e+07 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.666504e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.311988e+07 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166431914253] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1184s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1179s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.52E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.1801s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1791s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.83E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,7 +463,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -472,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501909358591468E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2531s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2466s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.39E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.3552s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3451s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 90112 events => throughput is 8.91E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,14 +484,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501909358591468E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.850271e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.261918e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.039998e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.082919e+07 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -ERROR! ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2175 [0.21747166796068879] fbridge_mode=1 + [UNWEIGHT] Wrote 1611 events (found 1616 events) + [COUNTERS] PROGRAM TOTAL : 0.5875s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.79E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21747169064681776) and cpp (0.21747166796068879) differ by less than 4E-4 (1.043176189874373e-07) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.0915 [9.1501910316213061E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.7572s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 90112 events => throughput is 2.00E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501910316213061E-002) differ by less than 4E-4 (1.0479125034379422e-07) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.090855e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.271435e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.835160e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.510333e+09 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.894374e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.802495e+09 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.131306e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.434205e+08 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 54f2157d62..0d49865b9c 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,29 +1,29 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none - -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0'CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' - CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_18:53:14 +DATE: 2023-10-25_19:18:59 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.4418s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4339s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6278s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6178s + [COUNTERS] Fortran MEs ( 1 ) : 0.0100s for 8192 events => throughput is 8.21E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1240s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1160s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1782s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1684s + [COUNTERS] Fortran MEs ( 1 ) : 0.0099s for 8192 events => throughput is 8.30E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3334s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2477s - [COUNTERS] Fortran MEs ( 1 ) : 0.0856s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4565s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3432s + [COUNTERS] Fortran MEs ( 1 ) : 0.1134s for 90112 events => throughput is 7.95E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1278s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1228s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 8192 events => throughput is 1.64E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1810s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.30E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,7 +159,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -168,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3026s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2480s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0547s for 90112 events => throughput is 1.65E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0667s for 90112 events => throughput is 1.35E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,14 +180,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919915927155E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.708796e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.341399e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.771441e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.323934e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1217s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1194s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1801s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1772s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +235,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2701s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2458s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 90112 events => throughput is 3.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3694s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3374s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 90112 events => throughput is 2.82E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,14 +256,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919915927155E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.920617e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.783585e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.132238e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.872248e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1196s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1185s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0011s for 8192 events => throughput is 7.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1786s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1770s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.06E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,7 +311,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -320,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2583s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2459s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 90112 events => throughput is 7.26E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3561s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3389s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 90112 events => throughput is 5.24E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,14 +332,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919908700741E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.904879e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.356083e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.704430e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.847494e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1192s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1182s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0011s for 8192 events => throughput is 7.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1798s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.77E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,7 +387,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -396,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2585s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2466s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0119s for 90112 events => throughput is 7.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3603s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3443s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 90112 events => throughput is 5.63E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,14 +408,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919908700741E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.065826e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.610629e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.072113e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.797344e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1190s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1180s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.66E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1826s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1808s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.43E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,7 +463,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -472,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.2582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2475s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 90112 events => throughput is 8.42E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3604s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 90112 events => throughput is 4.88E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,14 +484,14 @@ OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919908700741E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.780839e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.814394e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.057833e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.589840e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -ERROR! ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2175 [0.21747169066587257] fbridge_mode=1 + [UNWEIGHT] Wrote 1611 events (found 1616 events) + [COUNTERS] PROGRAM TOTAL : 0.5950s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5945s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.67E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21747169064681776) and cpp (0.21747169066587257) differ by less than 2E-4 (8.761968928183705e-11) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.0915 [9.1501919911173610E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.7577s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7529s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.90E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919911173610E-002) differ by less than 2E-4 (6.95061785904727e-11) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.583003e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.467396e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.252357e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.030772e+09 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.251034e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.079891e+09 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.286168e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.995935e+08 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 6c8f068bce..57c094acdf 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -5,23 +5,23 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 - +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_18:54:13 +DATE: 2023-10-25_19:19:16 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.2539s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2152s - [COUNTERS] Fortran MEs ( 1 ) : 0.0387s for 8192 events => throughput is 2.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4367s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3846s + [COUNTERS] Fortran MEs ( 1 ) : 0.0521s for 8192 events => throughput is 1.57E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2234s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1846s - [COUNTERS] Fortran MEs ( 1 ) : 0.0388s for 8192 events => throughput is 2.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3291s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2810s + [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2973s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8722s - [COUNTERS] Fortran MEs ( 1 ) : 0.4251s for 90112 events => throughput is 2.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7885s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2393s + [COUNTERS] Fortran MEs ( 1 ) : 0.5492s for 90112 events => throughput is 1.64E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2530s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2191s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0338s for 8192 events => throughput is 2.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3572s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3148s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,7 +159,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -168,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2715s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9026s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3689s for 90112 events => throughput is 2.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6843s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2210s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4633s for 90112 events => throughput is 1.94E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,14 +180,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775379) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.481533e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.950448e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.488395e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.964951e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2233s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2049s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0184s for 8192 events => throughput is 4.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3143s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2905s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0239s for 8192 events => throughput is 3.43E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +235,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.0887s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8858s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2029s for 90112 events => throughput is 4.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4654s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2042s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2612s for 90112 events => throughput is 3.45E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,14 +256,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775379) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.514435e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.422695e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.535739e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.385899e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2084s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1977s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 8192 events => throughput is 7.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2964s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2820s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.70E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,7 +311,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -320,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 0.9993s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8804s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1189s for 90112 events => throughput is 7.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3564s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1960s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1604s for 90112 events => throughput is 5.62E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,14 +332,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.734700e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.356829e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.812072e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.465883e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2075s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1973s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 8192 events => throughput is 8.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2947s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2816s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.29E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,7 +387,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -396,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 0.9909s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8792s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1117s for 90112 events => throughput is 8.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3386s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1916s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1469s for 90112 events => throughput is 6.13E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,14 +408,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.269115e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.955245e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.344819e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.038215e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2105s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1990s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0115s for 8192 events => throughput is 7.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0208s for 8192 events => throughput is 3.94E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,7 +463,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -472,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.0100s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8830s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1270s for 90112 events => throughput is 7.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4351s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2261s for 90112 events => throughput is 3.99E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,14 +484,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.266848e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.616558e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.348158e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.687418e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -ERROR! ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 + [UNWEIGHT] Wrote 434 events (found 1125 events) + [COUNTERS] PROGRAM TOTAL : 0.6890s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6885s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.690708277600116) and cpp (47.690708277600109) differ by less than 2E-14 (1.1102230246251565e-16) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 + [UNWEIGHT] Wrote 1727 events (found 1732 events) + [COUNTERS] PROGRAM TOTAL : 1.6130s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6068s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.46E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775386) differ by less than 2E-14 (4.440892098500626e-16) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.179274e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.706047e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.310976e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.080551e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.327429e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.152072e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.328654e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.061951e+07 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 7d8234ac8f..ac65217070 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,31 +1,31 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_18:55:16 +DATE: 2023-10-25_19:19:41 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.2557s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2169s - [COUNTERS] Fortran MEs ( 1 ) : 0.0388s for 8192 events => throughput is 2.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3097s + [COUNTERS] Fortran MEs ( 1 ) : 0.0485s for 8192 events => throughput is 1.69E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2232s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1844s - [COUNTERS] Fortran MEs ( 1 ) : 0.0387s for 8192 events => throughput is 2.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3161s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2673s + [COUNTERS] Fortran MEs ( 1 ) : 0.0488s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2938s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8687s - [COUNTERS] Fortran MEs ( 1 ) : 0.4251s for 90112 events => throughput is 2.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7673s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2278s + [COUNTERS] Fortran MEs ( 1 ) : 0.5395s for 90112 events => throughput is 1.67E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690706211693573] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2471s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2163s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0308s for 8192 events => throughput is 2.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3486s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,7 +159,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -168,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782418787778] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2373s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8994s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3379s for 90112 events => throughput is 2.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6743s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2245s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4497s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,14 +180,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223782418787778) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.763907e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.956288e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.767633e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.982512e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690702562167019] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2121s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 8192 events => throughput is 6.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2999s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2833s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0166s for 8192 events => throughput is 4.94E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +235,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223778631221009] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.0243s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8838s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1405s for 90112 events => throughput is 6.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3965s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2188s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1777s for 90112 events => throughput is 5.07E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,14 +256,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223778631221009) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.403818e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.832476e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.434601e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.796478e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694055768034] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.1987s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1923s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.65E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,7 +311,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -320,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775988760060] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 0.9489s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8775s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0714s for 90112 events => throughput is 1.26E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.2853s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0933s for 90112 events => throughput is 9.66E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,14 +332,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223775988760060) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.312849e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.545083e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.330161e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.445429e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694055768034] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.1998s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1937s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2874s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2794s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,7 +387,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -396,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775988760060] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 0.9433s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8755s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0678s for 90112 events => throughput is 1.33E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.2777s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1907s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0870s for 90112 events => throughput is 1.04E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,14 +408,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223775988760060) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.381207e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.937961e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.402611e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.006840e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698865531559] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.1999s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1936s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2925s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2818s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.67E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,7 +463,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -472,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223780255562296] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 0.9471s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8766s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0705s for 90112 events => throughput is 1.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3226s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2041s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1185s for 90112 events => throughput is 7.60E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,14 +484,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223780255562296) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.345186e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.089065e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.350507e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.293009e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -ERROR! ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.69 [47.690703397697980] fbridge_mode=1 + [UNWEIGHT] Wrote 434 events (found 1125 events) + [COUNTERS] PROGRAM TOTAL : 0.6877s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6872s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.51E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.690708277600116) and cpp (47.690703397697980) differ by less than 4E-4 (1.0232396019382861e-07) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 46.22 [46.223786763175951] fbridge_mode=1 + [UNWEIGHT] Wrote 1727 events (found 1732 events) + [COUNTERS] PROGRAM TOTAL : 1.6161s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6107s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.66E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (46.223782291775365) and cpp (46.223786763175951) differ by less than 4E-4 (9.673376699659286e-08) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.382566e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.934525e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.441859e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.769075e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.403190e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.876277e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.887286e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.439919e+07 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 8e4e8205ac..c3747a1448 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -4,26 +4,26 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 - +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_18:56:20 +DATE: 2023-10-25_19:20:06 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.2541s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2154s - [COUNTERS] Fortran MEs ( 1 ) : 0.0387s for 8192 events => throughput is 2.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3588s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3101s + [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2233s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1846s - [COUNTERS] Fortran MEs ( 1 ) : 0.0387s for 8192 events => throughput is 2.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3193s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2707s + [COUNTERS] Fortran MEs ( 1 ) : 0.0487s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2941s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8688s - [COUNTERS] Fortran MEs ( 1 ) : 0.4252s for 90112 events => throughput is 2.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7636s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2247s + [COUNTERS] Fortran MEs ( 1 ) : 0.5388s for 90112 events => throughput is 1.67E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2550s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2206s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0344s for 8192 events => throughput is 2.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3537s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3109s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,7 +159,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -168,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2838s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9056s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3782s for 90112 events => throughput is 2.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7009s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2276s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4732s for 90112 events => throughput is 1.90E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,14 +180,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783635280988) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.460313e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.932354e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.444312e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.907654e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2228s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2045s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0183s for 8192 events => throughput is 4.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3139s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2905s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.50E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +235,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.0883s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8871s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2012s for 90112 events => throughput is 4.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4697s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2111s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2585s for 90112 events => throughput is 3.49E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,14 +256,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783635280988) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.534476e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.342983e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.564668e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.391505e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2081s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1974s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3006s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2861s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.64E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,7 +311,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -320,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 0.9969s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8790s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1179s for 90112 events => throughput is 7.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3584s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1999s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1584s for 90112 events => throughput is 5.69E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,14 +332,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783652032040) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.841260e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.413133e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.885682e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.510615e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2062s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1961s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 8192 events => throughput is 8.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2939s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2806s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.18E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,7 +387,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -396,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 0.9929s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8815s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1113s for 90112 events => throughput is 8.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3412s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1968s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1445s for 90112 events => throughput is 6.24E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,14 +408,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783652032040) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.357077e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.210141e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.361362e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.018773e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2093s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1980s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3082s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2878s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.02E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,7 +463,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -472,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.0075s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8827s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1248s for 90112 events => throughput is 7.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5311s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2846s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2465s for 90112 events => throughput is 3.66E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,14 +484,14 @@ OK! xsec from fortran (46.223782291775365) and cpp (46.223783652032040) differ b OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.373677e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.423825e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.436880e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.530096e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -ERROR! ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 + [UNWEIGHT] Wrote 434 events (found 1125 events) + [COUNTERS] PROGRAM TOTAL : 0.6929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6924s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.49E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.690708277600116) and cpp (47.690708266690699) differ by less than 2E-4 (2.2875357164053867e-10) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 46.22 [46.223782303744791] fbridge_mode=1 + [UNWEIGHT] Wrote 1727 events (found 1732 events) + [COUNTERS] PROGRAM TOTAL : 1.6044s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5982s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.45E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (46.223782291775365) and cpp (46.223782303744791) differ by less than 2E-4 (2.5894508759449764e-10) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.130110e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.628036e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.274030e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.059654e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.301020e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.126562e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.294170e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.968784e+07 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 708cc25f0b..925cf1dd8b 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,31 +1,31 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none - -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_18:57:24 +DATE: 2023-10-25_19:20:33 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,17 +51,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.4284s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1564s - [COUNTERS] Fortran MEs ( 1 ) : 0.2720s for 8192 events => throughput is 3.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5609s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2258s + [COUNTERS] Fortran MEs ( 1 ) : 0.3351s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,17 +76,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4259s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1538s - [COUNTERS] Fortran MEs ( 1 ) : 0.2721s for 8192 events => throughput is 3.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5566s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2205s + [COUNTERS] Fortran MEs ( 1 ) : 0.3361s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,17 +101,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.9925s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0006s - [COUNTERS] Fortran MEs ( 1 ) : 2.9919s for 90112 events => throughput is 3.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0766s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3919s + [COUNTERS] Fortran MEs ( 1 ) : 3.6847s for 90112 events => throughput is 2.45E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,21 +126,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196357922470791E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6643s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4056s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2588s for 8192 events => throughput is 3.17E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8617s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5333s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3285s for 8192 events => throughput is 2.49E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171234E-002) differ by less than 2E-14 (0.0) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470791E-002) differ by less than 2E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -160,35 +159,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967963E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.1008s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2548s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8459s for 90112 events => throughput is 3.17E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2565s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6821s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5744s for 90112 events => throughput is 2.52E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967963E-002) differ by less than 2E-14 (4.440892098500626e-16) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655597E-002) differ by less than 2E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.260360e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.585485e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.262382e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.602908e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,21 +202,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196357922470777E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4250s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2888s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1362s for 8192 events => throughput is 6.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5594s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3875s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1718s for 8192 events => throughput is 4.77E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171234E-002) differ by less than 2E-14 (0.0) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470777E-002) differ by less than 2E-14 (3.3306690738754696e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -236,35 +235,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6317s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1335s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.4982s for 90112 events => throughput is 6.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4116s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5278s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8838s for 90112 events => throughput is 4.78E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967921E-002) differ by less than 2E-14 (0.0) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655555E-002) differ by less than 2E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.185953e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.886187e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.196023e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.863074e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,21 +278,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171206E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2816s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2175s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0641s for 8192 events => throughput is 1.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3866s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3020s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0845s for 8192 events => throughput is 9.69E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171206E-002) differ by less than 2E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -312,35 +311,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967907E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.7748s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0699s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7049s for 90112 events => throughput is 1.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3806s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4438s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9368s for 90112 events => throughput is 9.62E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967907E-002) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655541E-002) differ by less than 2E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.303083e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.810175e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.306009e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.814710e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,21 +354,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171206E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2708s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2125s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0582s for 8192 events => throughput is 1.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3697s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2950s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0747s for 8192 events => throughput is 1.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171206E-002) differ by less than 2E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -388,35 +387,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967907E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.7043s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0630s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6413s for 90112 events => throughput is 1.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2633s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4344s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8288s for 90112 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967907E-002) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655541E-002) differ by less than 2E-14 (2.220446049250313e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.443855e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.111782e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.447065e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.107532e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,21 +430,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2580s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2068s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0512s for 8192 events => throughput is 1.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4288s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3258s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1030s for 8192 events => throughput is 7.95E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195719386171234E-002) differ by less than 2E-14 (0.0) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -464,35 +463,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967907E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.6189s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0547s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5642s for 90112 events => throughput is 1.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6474s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4850s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1624s for 90112 events => throughput is 7.75E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310872844967907E-002) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655541E-002) differ by less than 2E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.631688e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.891678e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.638264e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.153326e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -ERROR! ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 41 events (found 467 events) + [COUNTERS] PROGRAM TOTAL : 0.6666s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6612s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470764E-002) differ by less than 2E-14 (4.440892098500626e-16) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 679 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 1.8130s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7902s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.96E+06 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872077655597E-002) differ by less than 2E-14 (4.440892098500626e-16) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.639819e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.202614e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.989805e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.236697e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.956986e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.247459e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.990764e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.731178e+06 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index abeef091e4..fa99d034ca 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,24 +2,24 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 - - make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_18:58:38 +DATE: 2023-10-25_19:21:14 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,17 +51,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.4284s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1564s - [COUNTERS] Fortran MEs ( 1 ) : 0.2720s for 8192 events => throughput is 3.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5523s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2203s + [COUNTERS] Fortran MEs ( 1 ) : 0.3320s for 8192 events => throughput is 2.47E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,17 +76,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4258s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1538s - [COUNTERS] Fortran MEs ( 1 ) : 0.2720s for 8192 events => throughput is 3.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5511s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2185s + [COUNTERS] Fortran MEs ( 1 ) : 0.3325s for 8192 events => throughput is 2.46E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,17 +101,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.9922s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9998s - [COUNTERS] Fortran MEs ( 1 ) : 2.9925s for 90112 events => throughput is 3.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0773s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3954s + [COUNTERS] Fortran MEs ( 1 ) : 3.6819s for 90112 events => throughput is 2.45E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,21 +126,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195711188152623E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196349725192449E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6577s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4026s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2551s for 8192 events => throughput is 3.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8861s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3355s for 8192 events => throughput is 2.44E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195711188152623E-002) differ by less than 4E-4 (8.434546971969326e-08) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196349725192449E-002) differ by less than 4E-4 (8.433729958845504e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -160,35 +159,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310861450156910E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310860682799649E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.0573s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2500s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8074s for 90112 events => throughput is 3.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1805s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6708s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5097s for 90112 events => throughput is 2.57E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310861450156910E-002) differ by less than 4E-4 (1.401388352029187e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310860682799649E-002) differ by less than 4E-4 (1.4013938864909647e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.313409e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.644874e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.314649e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.623423e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,21 +202,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195695504827997E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196334032667323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3039s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2288s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4066s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3108s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0959s for 8192 events => throughput is 8.55E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195695504827997E-002) differ by less than 4E-4 (2.457036522018896e-07) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196334032667323E-002) differ by less than 4E-4 (2.4578908086603235e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -236,35 +235,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310848293145957E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310847525777316E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9056s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0799s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8257s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5043s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4504s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0538s for 90112 events => throughput is 8.55E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310848293145957E-002) differ by less than 4E-4 (3.0195004807609394e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847525777316E-002) differ by less than 4E-4 (3.0195074296468505e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111598e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.644259e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.106035e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.595838e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,21 +278,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195692323432697E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196330842071521E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2195s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 8192 events => throughput is 2.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3044s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2603s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 8192 events => throughput is 1.86E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195692323432697E-002) differ by less than 4E-4 (2.7843549810224744e-07) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196330842071521E-002) differ by less than 4E-4 (2.786153705525152e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -312,35 +311,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310848252682449E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310847485320789E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.4009s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0379s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3630s for 90112 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9365s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4416s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4949s for 90112 events => throughput is 1.82E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310848252682449E-002) differ by less than 4E-4 (3.0244768767229147e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847485320789E-002) differ by less than 4E-4 (3.024482967406428e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.547162e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.877444e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.553448e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.866007e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,21 +354,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195692323432697E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196330842071521E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2144s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1844s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0300s for 8192 events => throughput is 2.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2968s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2571s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0397s for 8192 events => throughput is 2.06E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195692323432697E-002) differ by less than 4E-4 (2.7843549810224744e-07) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196330842071521E-002) differ by less than 4E-4 (2.786153705525152e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -388,35 +387,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310848252682449E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310847485320789E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.3630s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0322s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3307s for 90112 events => throughput is 2.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8329s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3969s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4360s for 90112 events => throughput is 2.07E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310848252682449E-002) differ by less than 4E-4 (3.0244768767229147e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847485320789E-002) differ by less than 4E-4 (3.024482967406428e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.811806e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.064455e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.819395e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.134523e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,21 +430,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195705534321677E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196344068381207E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2030s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1786s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 8192 events => throughput is 3.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3182s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2680s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0503s for 8192 events => throughput is 1.63E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195705534321677E-002) differ by less than 4E-4 (1.4251501656570298e-07) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196344068381207E-002) differ by less than 4E-4 (1.42537126879283e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -464,35 +463,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310858570909916E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310857803543385E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.2974s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0267s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2708s for 90112 events => throughput is 3.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9728s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4203s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5524s for 90112 events => throughput is 1.63E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310858570909916E-002) differ by less than 4E-4 (1.7554919173878858e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310857803543385E-002) differ by less than 4E-4 (1.755498595379379e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.430004e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.608228e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.443942e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.603946e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -ERROR! ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.0972 [9.7196349366365994E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 41 events (found 467 events) + [COUNTERS] PROGRAM TOTAL : 0.6467s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6458s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.73E+06 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196349366365994E-002) differ by less than 4E-4 (8.802906814597833e-08) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.08131 [8.1310864949473968E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 679 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 1.7893s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.53E+06 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310864949473968E-002) differ by less than 4E-4 (8.766578696306482e-08) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.347402e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.856435e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.795868e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.305516e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.791470e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.491438e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.627451e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.615609e+07 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 1cd53ced5d..8d56c45efe 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,29 +1,29 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 - make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0'CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' - +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_18:59:51 +DATE: 2023-10-25_19:21:50 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,17 +51,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.4299s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1578s - [COUNTERS] Fortran MEs ( 1 ) : 0.2721s for 8192 events => throughput is 3.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5526s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2199s + [COUNTERS] Fortran MEs ( 1 ) : 0.3328s for 8192 events => throughput is 2.46E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,17 +76,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195719386171234E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4263s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1543s - [COUNTERS] Fortran MEs ( 1 ) : 0.2720s for 8192 events => throughput is 3.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5518s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s + [COUNTERS] Fortran MEs ( 1 ) : 0.3335s for 8192 events => throughput is 2.46E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,17 +101,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310872844967921E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.9932s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0020s - [COUNTERS] Fortran MEs ( 1 ) : 2.9912s for 90112 events => throughput is 3.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0409s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3789s + [COUNTERS] Fortran MEs ( 1 ) : 3.6620s for 90112 events => throughput is 2.46E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,21 +126,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195720226233587E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196358763382007E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6743s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4117s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2627s for 8192 events => throughput is 3.12E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8757s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5434s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3323s for 8192 events => throughput is 2.46E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720226233587E-002) differ by less than 2E-4 (8.642997428864874e-09) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358763382007E-002) differ by less than 2E-4 (8.651674043846924e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -160,35 +159,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310873602323142E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872835011053E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.1467s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2599s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8867s for 90112 events => throughput is 3.12E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3915s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7212s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6703s for 90112 events => throughput is 2.46E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873602323142E-002) differ by less than 2E-4 (9.314316651298782e-09) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872835011053E-002) differ by less than 2E-4 (9.31432020401246e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.227112e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.532584e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.229042e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.521877e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,21 +202,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195720267415450E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196358804670396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4216s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2876s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1340s for 8192 events => throughput is 6.12E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5544s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3845s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1699s for 8192 events => throughput is 4.82E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720267415450E-002) differ by less than 2E-4 (9.066697836956905e-09) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358804670396E-002) differ by less than 2E-4 (9.076467577529002e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -236,35 +235,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310873604102080E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872836789727E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6087s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1353s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.4734s for 90112 events => throughput is 6.12E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.3887s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5301s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8586s for 90112 events => throughput is 4.85E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873604102080E-002) differ by less than 2E-4 (9.33619492826665e-09) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872836789727E-002) differ by less than 2E-4 (9.336195150311255e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.232300e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.952919e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.239527e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.914380e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,21 +278,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195720049465126E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2815s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0632s for 8192 events => throughput is 1.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3876s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3037s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0839s for 8192 events => throughput is 9.76E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720049465126E-002) differ by less than 2E-4 (6.824311782338555e-09) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358586501358E-002) differ by less than 2E-4 (6.831845977828266e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -312,35 +311,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310873476230255E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.7612s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0666s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6946s for 90112 events => throughput is 1.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3852s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4574s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9277s for 90112 events => throughput is 9.71E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873476230255E-002) differ by less than 2E-4 (7.76356601228656e-09) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872708918333E-002) differ by less than 2E-4 (7.763571563401683e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.321387e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.889061e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.326541e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.855619e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,21 +354,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195720049465126E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2698s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2123s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0575s for 8192 events => throughput is 1.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3661s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2915s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0745s for 8192 events => throughput is 1.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720049465126E-002) differ by less than 2E-4 (6.824311782338555e-09) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358586501358E-002) differ by less than 2E-4 (6.831845977828266e-09) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -388,35 +387,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310873476230255E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.6909s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6324s for 90112 events => throughput is 1.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2630s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4388s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8242s for 90112 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873476230255E-002) differ by less than 2E-4 (7.76356601228656e-09) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872708918333E-002) differ by less than 2E-4 (7.763571563401683e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.468207e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.127984e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.470735e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.119463e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,21 +430,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7195720220276491E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196358757578441E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2605s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2070s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0535s for 8192 events => throughput is 1.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4328s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3253s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1075s for 8192 events => throughput is 7.62E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7195719386171234E-002) and cpp (9.7195720220276491E-002) differ by less than 2E-4 (8.581707788835047e-09) +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358757578441E-002) differ by less than 2E-4 (8.591964251181139e-09) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -464,35 +463,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310873571012007E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310872803699391E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.6480s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0595s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5885s for 90112 events => throughput is 1.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6477s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4725s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1752s for 90112 events => throughput is 7.67E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872844967921E-002) and cpp (8.1310873571012007E-002) differ by less than 2E-4 (8.92923734951978e-09) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872803699391E-002) differ by less than 2E-4 (8.929234462939917e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.565068e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.668693e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.569018e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.591243e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -ERROR! ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.0972 [9.7196358102981245E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 41 events (found 467 events) + [COUNTERS] PROGRAM TOTAL : 0.7068s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7013s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.50E+06 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358102981245E-002) differ by less than 2E-4 (1.8571728599425796e-09) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.08131 [8.1310872068634174E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 679 events (found 1787 events) + [COUNTERS] PROGRAM TOTAL : 1.8933s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8700s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.87E+06 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310872068634174E-002) differ by less than 2E-4 (1.1094924978749532e-10) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.611008e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.220129e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.993875e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.234445e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.000644e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.243443e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.965395e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.708140e+06 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index ce7e7609a4..62d0e45c34 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -2,28 +2,28 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 - +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:01:06 +DATE: 2023-10-25_19:22:32 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 3.6457s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1953s - [COUNTERS] Fortran MEs ( 1 ) : 3.4504s for 8192 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3806s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2793s + [COUNTERS] Fortran MEs ( 1 ) : 4.1013s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 3.6434s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1917s - [COUNTERS] Fortran MEs ( 1 ) : 3.4518s for 8192 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3901s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2745s + [COUNTERS] Fortran MEs ( 1 ) : 4.1156s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,17 +101,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 39.3826s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3592s - [COUNTERS] Fortran MEs ( 1 ) : 38.0233s for 90112 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.2170s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8938s + [COUNTERS] Fortran MEs ( 1 ) : 45.3231s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 6.9417s - [COUNTERS] Fortran Overhead ( 0 ) : 3.5036s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4380s for 8192 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.7189s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4329s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2860s for 8192 events => throughput is 1.91E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,35 +159,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610601E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 42.4644s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6673s - [COUNTERS] CudaCpp MEs ( 2 ) : 37.7971s for 90112 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 53.2752s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0306s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.2446s for 90112 events => throughput is 1.91E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610601E-004) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421161E-004) differ by less than 2E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.484228e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.975932e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.484587e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.969422e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 3.6038s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8665s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.7373s for 8192 events => throughput is 4.72E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7792s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4930s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2862s for 8192 events => throughput is 3.58E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,35 +235,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610596E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 22.1554s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0347s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.1207s for 90112 events => throughput is 4.71E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.2447s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0623s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.1823s for 90112 events => throughput is 3.58E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610596E-004) differ by less than 2E-14 (5.551115123125783e-16) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421158E-004) differ by less than 2E-14 (3.3306690738754696e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.903888e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.719602e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.902506e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.704578e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.6286s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9055s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7231s for 8192 events => throughput is 1.13E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.2095s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2292s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9803s for 8192 events => throughput is 8.36E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,35 +311,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 10.0393s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0688s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.9705s for 90112 events => throughput is 1.13E+04 events/s + [COUNTERS] PROGRAM TOTAL : 13.7366s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8124s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.9242s for 90112 events => throughput is 8.25E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.154246e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.553162e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.157006e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.569833e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.4687s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8238s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6448s for 8192 events => throughput is 1.27E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.9852s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1172s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8679s for 8192 events => throughput is 9.44E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,35 +387,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 9.0853s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9863s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.0990s for 90112 events => throughput is 1.27E+04 events/s + [COUNTERS] PROGRAM TOTAL : 12.2612s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6975s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5637s for 90112 events => throughput is 9.42E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.306239e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.732365e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.306622e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.738434e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1802s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6814s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4988s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3926s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3264s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0661s for 8192 events => throughput is 7.68E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,35 +463,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.3177s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8482s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4695s for 90112 events => throughput is 1.65E+04 events/s + [COUNTERS] PROGRAM TOTAL : 14.7899s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9106s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8793s for 90112 events => throughput is 7.59E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725748610604E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.692604e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.707688e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.691372e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.746281e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -ERROR! ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 59 events (found 420 events) + [COUNTERS] PROGRAM TOTAL : 0.7947s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7633s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311352998E-004) differ by less than 2E-14 (4.440892098500626e-16) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 207 events (found 1235 events) + [COUNTERS] PROGRAM TOTAL : 2.6726s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3253s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3472s for 90112 events => throughput is 2.60E+05 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421161E-004) differ by less than 2E-14 (2.220446049250313e-16) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.298413e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.515584e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.121587e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.140818e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.116718e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.158341e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.107036e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.429475e+05 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 7decb75777..a11d40fa18 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,25 +1,24 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none - -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -28,15 +27,15 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2023-10-25_19:04:47 +DATE: 2023-10-25_19:26:45 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 3.6461s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1956s - [COUNTERS] Fortran MEs ( 1 ) : 3.4505s for 8192 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3808s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2757s + [COUNTERS] Fortran MEs ( 1 ) : 4.1052s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 3.6438s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1927s - [COUNTERS] Fortran MEs ( 1 ) : 3.4511s for 8192 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2719s + [COUNTERS] Fortran MEs ( 1 ) : 4.1018s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,17 +101,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 39.3691s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3694s - [COUNTERS] Fortran MEs ( 1 ) : 37.9997s for 90112 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.1736s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9032s + [COUNTERS] Fortran MEs ( 1 ) : 45.2704s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396515517582E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 6.7930s - [COUNTERS] Fortran Overhead ( 0 ) : 3.4371s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.3559s for 8192 events => throughput is 2.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.4177s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2835s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1342s for 8192 events => throughput is 1.98E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,35 +159,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774605353658E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774605164224E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 41.5679s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6056s - [COUNTERS] CudaCpp MEs ( 2 ) : 36.9624s for 90112 events => throughput is 2.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 51.5557s + [COUNTERS] Fortran Overhead ( 0 ) : 5.8491s + [COUNTERS] CudaCpp MEs ( 2 ) : 45.7066s for 90112 events => throughput is 1.97E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803774605353658E-004) differ by less than 4E-4 (3.091469937599456e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774605164224E-004) differ by less than 4E-4 (3.091469938043545e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.530604e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.033185e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.530762e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.041307e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277389113409186E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.8393s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0062s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8331s for 8192 events => throughput is 9.83E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5057s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3763s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1294s for 8192 events => throughput is 7.25E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,35 +235,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803771886003655E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803771885814218E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 11.3573s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1735s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.1839s for 90112 events => throughput is 9.81E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.4913s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9536s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.5377s for 90112 events => throughput is 7.19E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803771886003655E-004) differ by less than 4E-4 (2.919399753276153e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803771885814218E-004) differ by less than 4E-4 (2.9193997534981975e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.008580e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.412053e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.010372e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.406103e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390171873933E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.9273s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5575s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3698s for 8192 events => throughput is 2.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2465s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7541s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4925s for 8192 events => throughput is 1.66E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,35 +311,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774410661750E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774410472313E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 5.8038s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7240s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.0798s for 90112 events => throughput is 2.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.7561s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3072s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4489s for 90112 events => throughput is 1.65E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803774410661750E-004) differ by less than 4E-4 (3.0791505700733524e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774410472313E-004) differ by less than 4E-4 (3.0791505700733524e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.267011e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.691454e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.267552e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.694442e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390171873933E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8434s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5139s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3295s for 8192 events => throughput is 2.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1341s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6956s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4385s for 8192 events => throughput is 1.87E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,35 +387,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774410661750E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774410472313E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 5.3196s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6858s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6338s for 90112 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.1002s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2595s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.8407s for 90112 events => throughput is 1.86E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803774410661750E-004) differ by less than 4E-4 (3.0791505700733524e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774410472313E-004) differ by less than 4E-4 (3.0791505700733524e-06) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.554710e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.917142e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.563044e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.912635e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396414214383E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.6806s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4346s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2460s for 8192 events => throughput is 3.33E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3236s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7938s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5298s for 8192 events => throughput is 1.55E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,35 +463,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803777740932968E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803777740743528E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 4.2973s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6049s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.6924s for 90112 events => throughput is 3.35E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.3515s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4437s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.9078s for 90112 events => throughput is 1.53E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803777740932968E-004) differ by less than 4E-4 (3.289877538392716e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803777740743528E-004) differ by less than 4E-4 (3.289877538392716e-06) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.442736e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.556068e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.446826e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.554806e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -ERROR! ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277400478491260E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 59 events (found 420 events) + [COUNTERS] PROGRAM TOTAL : 0.7609s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7395s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.84E+05 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277400478491260E-004) differ by less than 4E-4 (3.3951593780834344e-06) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803779990154892E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 207 events (found 1235 events) + [COUNTERS] PROGRAM TOTAL : 2.5388s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3037s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2351s for 90112 events => throughput is 3.83E+05 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803779990154892E-004) differ by less than 4E-4 (3.4322117830054566e-06) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.602401e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.943641e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.505586e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.637854e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.505362e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.630323e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.491202e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.522012e+05 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index c4e4a1a740..344f040590 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -2,13 +2,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none + +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 - - make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' @@ -16,13 +16,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -make[1]: Nothing to be done for 'all'. make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:07:59 +DATE: 2023-10-25_19:30:04 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 3.6443s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1951s - [COUNTERS] Fortran MEs ( 1 ) : 3.4492s for 8192 events => throughput is 2.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2765s + [COUNTERS] Fortran MEs ( 1 ) : 4.1104s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 3.6433s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1919s - [COUNTERS] Fortran MEs ( 1 ) : 3.4513s for 8192 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3884s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2753s + [COUNTERS] Fortran MEs ( 1 ) : 4.1132s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,17 +101,17 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748610604E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 39.3762s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3593s - [COUNTERS] Fortran MEs ( 1 ) : 38.0168s for 90112 events => throughput is 2.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.2155s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9013s + [COUNTERS] Fortran MEs ( 1 ) : 45.3142s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277432965013E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 7.0366s - [COUNTERS] Fortran Overhead ( 0 ) : 3.5537s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4829s for 8192 events => throughput is 2.35E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.8199s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4948s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3251s for 8192 events => throughput is 1.89E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,35 +159,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725813215552E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725813026109E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 43.0534s - [COUNTERS] Fortran Overhead ( 0 ) : 4.7304s - [COUNTERS] CudaCpp MEs ( 2 ) : 38.3231s for 90112 events => throughput is 2.35E+03 events/s + [COUNTERS] PROGRAM TOTAL : 53.9672s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0490s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.9182s for 90112 events => throughput is 1.88E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725813215552E-004) differ by less than 2E-4 (4.087956861908992e-09) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725813026109E-004) differ by less than 2E-4 (4.087956639864387e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.445667e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.959802e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.446428e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.958905e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277430934464E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 3.5785s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8571s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.7215s for 8192 events => throughput is 4.76E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7279s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4759s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2519s for 8192 events => throughput is 3.64E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,35 +235,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725816435760E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725816246317E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 22.5227s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0477s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.4750s for 90112 events => throughput is 4.63E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.0177s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0402s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.9775s for 90112 events => throughput is 3.61E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725816435760E-004) differ by less than 2E-4 (4.291719424287521e-09) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725816246317E-004) differ by less than 2E-4 (4.291719202242916e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.936536e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.730381e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.937965e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.728042e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.6313s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9049s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7264s for 8192 events => throughput is 1.13E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.1830s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2180s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9649s for 8192 events => throughput is 8.49E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,35 +311,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725810958764E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 10.0798s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0800s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.9998s for 90112 events => throughput is 1.13E+04 events/s + [COUNTERS] PROGRAM TOTAL : 13.5258s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8006s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.7251s for 90112 events => throughput is 8.40E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725810958764E-004) differ by less than 2E-4 (3.945155757634211e-09) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725810769321E-004) differ by less than 2E-4 (3.945155535589606e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155342e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.577284e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155498e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.645443e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.4812s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8286s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6526s for 8192 events => throughput is 1.26E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.9705s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1082s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8624s for 8192 events => throughput is 9.50E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,35 +387,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725810958764E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 9.1555s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9926s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.1629s for 90112 events => throughput is 1.26E+04 events/s + [COUNTERS] PROGRAM TOTAL : 12.1916s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6788s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5128s for 90112 events => throughput is 9.47E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725810958764E-004) differ by less than 2E-4 (3.945155757634211e-09) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725810769321E-004) differ by less than 2E-4 (3.945155535589606e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.298345e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.762571e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.297893e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.791935e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1957s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6900s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5057s for 8192 events => throughput is 1.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4166s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3396s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0770s for 8192 events => throughput is 7.61E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,35 +463,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725810958764E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.4628s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8592s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.6036s for 90112 events => throughput is 1.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 14.9279s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9270s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.0009s for 90112 events => throughput is 7.51E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748610604E-004) and cpp (1.5803725810958764E-004) differ by less than 2E-4 (3.945155757634211e-09) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725810769321E-004) differ by less than 2E-4 (3.945155535589606e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.658725e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.723112e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.657267e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.593326e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -ERROR! ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.0003628 [3.6277277293084707E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 59 events (found 420 events) + [COUNTERS] PROGRAM TOTAL : 0.7973s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7658s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277293084707E-004) differ by less than 2E-4 (5.035735162195465e-10) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 2 + [XSECTION] Cross section = 0.000158 [1.5803725738731039E-004] fbridge_mode=1 + [UNWEIGHT] Wrote 207 events (found 1235 events) + [COUNTERS] PROGRAM TOTAL : 2.6608s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3213s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3395s for 90112 events => throughput is 2.65E+05 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725738731039E-004) differ by less than 2E-4 (6.131544161291913e-10) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.295568e+05 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.529340e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.107713e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.153742e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.126841e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.176580e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.122237e+05 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.438612e+05 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index c67f0da27f..90411e1b5b 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,27 +1,27 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none - -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:14:32 +DATE: 2023-10-25_19:34:37 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 85.6732s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3092s - [COUNTERS] Fortran MEs ( 1 ) : 85.3640s for 8192 events => throughput is 9.60E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.8592s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4672s + [COUNTERS] Fortran MEs ( 1 ) : 96.3920s for 8192 events => throughput is 8.50E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 85.2231s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3098s - [COUNTERS] Fortran MEs ( 1 ) : 84.9133s for 8192 events => throughput is 9.65E+01 events/s + [COUNTERS] PROGRAM TOTAL : 97.0100s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4404s + [COUNTERS] Fortran MEs ( 1 ) : 96.5697s for 8192 events => throughput is 8.48E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 941.3988s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8350s - [COUNTERS] Fortran MEs ( 1 ) : 938.5638s for 90112 events => throughput is 9.60E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1062.8511s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0497s + [COUNTERS] Fortran MEs ( 1 ) : 1058.8014s for 90112 events => throughput is 8.51E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435831E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 184.3438s - [COUNTERS] Fortran Overhead ( 0 ) : 85.0990s - [COUNTERS] CudaCpp MEs ( 2 ) : 99.2448s for 8192 events => throughput is 8.25E+01 events/s + [COUNTERS] PROGRAM TOTAL : 221.3790s + [COUNTERS] Fortran Overhead ( 0 ) : 101.7058s + [COUNTERS] CudaCpp MEs ( 2 ) : 119.6732s for 8192 events => throughput is 6.85E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,7 +159,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -168,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813953E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1178.9740s - [COUNTERS] Fortran Overhead ( 0 ) : 87.5978s - [COUNTERS] CudaCpp MEs ( 2 ) : 1091.3762s for 90112 events => throughput is 8.26E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1450.0547s + [COUNTERS] Fortran Overhead ( 0 ) : 106.7786s + [COUNTERS] CudaCpp MEs ( 2 ) : 1343.2761s for 90112 events => throughput is 6.71E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,14 +180,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813953E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.695340e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.310154e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.690082e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.288200e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435827E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 94.4928s - [COUNTERS] Fortran Overhead ( 0 ) : 43.9870s - [COUNTERS] CudaCpp MEs ( 2 ) : 50.5057s for 8192 events => throughput is 1.62E+02 events/s + [COUNTERS] PROGRAM TOTAL : 107.4772s + [COUNTERS] Fortran Overhead ( 0 ) : 49.8239s + [COUNTERS] CudaCpp MEs ( 2 ) : 57.6534s for 8192 events => throughput is 1.42E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +235,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 604.8039s - [COUNTERS] Fortran Overhead ( 0 ) : 46.6648s - [COUNTERS] CudaCpp MEs ( 2 ) : 558.1391s for 90112 events => throughput is 1.61E+02 events/s + [COUNTERS] PROGRAM TOTAL : 689.8975s + [COUNTERS] Fortran Overhead ( 0 ) : 53.4603s + [COUNTERS] CudaCpp MEs ( 2 ) : 636.4372s for 90112 events => throughput is 1.42E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,14 +256,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.871670e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.666029e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.870811e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.661964e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 42.3765s - [COUNTERS] Fortran Overhead ( 0 ) : 19.3321s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.0444s for 8192 events => throughput is 3.55E+02 events/s + [COUNTERS] PROGRAM TOTAL : 51.5576s + [COUNTERS] Fortran Overhead ( 0 ) : 23.6406s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.9170s for 8192 events => throughput is 2.93E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,7 +311,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -320,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 274.6168s - [COUNTERS] Fortran Overhead ( 0 ) : 21.8566s - [COUNTERS] CudaCpp MEs ( 2 ) : 252.7602s for 90112 events => throughput is 3.57E+02 events/s + [COUNTERS] PROGRAM TOTAL : 332.8698s + [COUNTERS] Fortran Overhead ( 0 ) : 27.2762s + [COUNTERS] CudaCpp MEs ( 2 ) : 305.5936s for 90112 events => throughput is 2.95E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,14 +332,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.309503e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.564287e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.313934e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.571126e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 37.8931s - [COUNTERS] Fortran Overhead ( 0 ) : 17.1832s - [COUNTERS] CudaCpp MEs ( 2 ) : 20.7099s for 8192 events => throughput is 3.96E+02 events/s + [COUNTERS] PROGRAM TOTAL : 45.7634s + [COUNTERS] Fortran Overhead ( 0 ) : 20.8536s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.9098s for 8192 events => throughput is 3.29E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,7 +387,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -396,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 247.2023s - [COUNTERS] Fortran Overhead ( 0 ) : 19.7302s - [COUNTERS] CudaCpp MEs ( 2 ) : 227.4722s for 90112 events => throughput is 3.96E+02 events/s + [COUNTERS] PROGRAM TOTAL : 298.7296s + [COUNTERS] Fortran Overhead ( 0 ) : 24.5229s + [COUNTERS] CudaCpp MEs ( 2 ) : 274.2068s for 90112 events => throughput is 3.29E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,14 +408,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.872948e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.037302e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.859207e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.011224e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 25.8670s - [COUNTERS] Fortran Overhead ( 0 ) : 11.9877s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.8792s for 8192 events => throughput is 5.90E+02 events/s + [COUNTERS] PROGRAM TOTAL : 46.0178s + [COUNTERS] Fortran Overhead ( 0 ) : 22.2114s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.8064s for 8192 events => throughput is 3.44E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,7 +463,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -472,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 168.3618s - [COUNTERS] Fortran Overhead ( 0 ) : 14.5580s - [COUNTERS] CudaCpp MEs ( 2 ) : 153.8038s for 90112 events => throughput is 5.86E+02 events/s + [COUNTERS] PROGRAM TOTAL : 285.3431s + [COUNTERS] Fortran Overhead ( 0 ) : 25.8685s + [COUNTERS] CudaCpp MEs ( 2 ) : 259.4745s for 90112 events => throughput is 3.47E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,14 +484,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.031931e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.723231e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.008943e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.765137e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -ERROR! ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435838E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 4.1896s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1063s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0833s for 8192 events => throughput is 7.56E+03 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693100945435838E-006) differ by less than 2E-14 (2.4424906541753444e-15) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 18.5925s + [COUNTERS] Fortran Overhead ( 0 ) : 6.7176s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8749s for 90112 events => throughput is 7.59E+03 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.538676e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.266826e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.276191e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.572409e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.231100e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.464972e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.233366e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.245300e+03 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 7af9e47c73..93e9694d2a 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,25 +1,24 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 - make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -28,15 +27,15 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2023-10-25_20:27:32 +DATE: 2023-10-25_21:02:37 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 84.9514s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3096s - [COUNTERS] Fortran MEs ( 1 ) : 84.6418s for 8192 events => throughput is 9.68E+01 events/s + [COUNTERS] PROGRAM TOTAL : 97.0230s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4439s + [COUNTERS] Fortran MEs ( 1 ) : 96.5791s for 8192 events => throughput is 8.48E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 85.7277s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3103s - [COUNTERS] Fortran MEs ( 1 ) : 85.4174s for 8192 events => throughput is 9.59E+01 events/s + [COUNTERS] PROGRAM TOTAL : 98.3689s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4421s + [COUNTERS] Fortran MEs ( 1 ) : 97.9268s for 8192 events => throughput is 8.37E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 940.7097s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8292s - [COUNTERS] Fortran MEs ( 1 ) : 937.8805s for 90112 events => throughput is 9.61E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1065.5287s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0935s + [COUNTERS] Fortran MEs ( 1 ) : 1061.4352s for 90112 events => throughput is 8.49E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,21 +126,21 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1694768395608941E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.169e-06 [1.1694768395202781E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 176.4062s - [COUNTERS] Fortran Overhead ( 0 ) : 80.9717s - [COUNTERS] CudaCpp MEs ( 2 ) : 95.4345s for 8192 events => throughput is 8.58E+01 events/s + [COUNTERS] PROGRAM TOTAL : 198.8176s + [COUNTERS] Fortran Overhead ( 0 ) : 92.5000s + [COUNTERS] CudaCpp MEs ( 2 ) : 106.3176s for 8192 events => throughput is 7.71E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694768395608941E-006) differ by less than 4E-4 (0.0001426011954326345) +OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694768395202781E-006) differ by less than 4E-4 (0.00014260116069753082) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -160,35 +159,35 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1361436148187123E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.136e-07 [2.1361436140448921E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1129.5632s - [COUNTERS] Fortran Overhead ( 0 ) : 83.1146s - [COUNTERS] CudaCpp MEs ( 2 ) : 1046.4486s for 90112 events => throughput is 8.61E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1266.1327s + [COUNTERS] Fortran Overhead ( 0 ) : 95.7086s + [COUNTERS] CudaCpp MEs ( 2 ) : 1170.4241s for 90112 events => throughput is 7.70E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361436148187123E-007) differ by less than 4E-4 (0.00014045922420713453) +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361436140448921E-007) differ by less than 4E-4 (0.00014045886190539036) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.022409e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.002269e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.021876e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.967415e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694765850076731E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 42.7416s - [COUNTERS] Fortran Overhead ( 0 ) : 19.7901s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.9515s for 8192 events => throughput is 3.57E+02 events/s + [COUNTERS] PROGRAM TOTAL : 49.7619s + [COUNTERS] Fortran Overhead ( 0 ) : 23.4310s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3309s for 8192 events => throughput is 3.11E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +235,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430662723898E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 275.8688s - [COUNTERS] Fortran Overhead ( 0 ) : 22.3496s - [COUNTERS] CudaCpp MEs ( 2 ) : 253.5192s for 90112 events => throughput is 3.55E+02 events/s + [COUNTERS] PROGRAM TOTAL : 316.9053s + [COUNTERS] Fortran Overhead ( 0 ) : 27.1348s + [COUNTERS] CudaCpp MEs ( 2 ) : 289.7704s for 90112 events => throughput is 3.11E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,14 +256,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430662723898E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.215690e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.586738e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.209963e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.580652e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694764962310603E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 21.3650s - [COUNTERS] Fortran Overhead ( 0 ) : 9.8744s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.4906s for 8192 events => throughput is 7.13E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.7462s + [COUNTERS] Fortran Overhead ( 0 ) : 11.9729s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.7733s for 8192 events => throughput is 5.95E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,7 +311,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -320,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430432807771E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 139.3548s - [COUNTERS] Fortran Overhead ( 0 ) : 12.3610s - [COUNTERS] CudaCpp MEs ( 2 ) : 126.9938s for 90112 events => throughput is 7.10E+02 events/s + [COUNTERS] PROGRAM TOTAL : 168.1713s + [COUNTERS] Fortran Overhead ( 0 ) : 15.4495s + [COUNTERS] CudaCpp MEs ( 2 ) : 152.7218s for 90112 events => throughput is 5.90E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,14 +332,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430432807771E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.561703e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.197172e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.587034e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.226176e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694764962310603E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 19.1741s - [COUNTERS] Fortran Overhead ( 0 ) : 8.7841s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.3900s for 8192 events => throughput is 7.88E+02 events/s + [COUNTERS] PROGRAM TOTAL : 22.7901s + [COUNTERS] Fortran Overhead ( 0 ) : 10.6491s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.1410s for 8192 events => throughput is 6.75E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,7 +387,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -396,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361430432807771E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 125.8037s - [COUNTERS] Fortran Overhead ( 0 ) : 11.2828s - [COUNTERS] CudaCpp MEs ( 2 ) : 114.5209s for 90112 events => throughput is 7.87E+02 events/s + [COUNTERS] PROGRAM TOTAL : 147.9220s + [COUNTERS] Fortran Overhead ( 0 ) : 14.3147s + [COUNTERS] CudaCpp MEs ( 2 ) : 133.6073s for 90112 events => throughput is 6.74E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,14 +408,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430432807771E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.656474e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.019572e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.674300e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.643340e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1694767969588676E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 13.3091s - [COUNTERS] Fortran Overhead ( 0 ) : 6.1154s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.1937s for 8192 events => throughput is 1.14E+03 events/s + [COUNTERS] PROGRAM TOTAL : 23.2769s + [COUNTERS] Fortran Overhead ( 0 ) : 11.3993s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8776s for 8192 events => throughput is 6.90E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,7 +463,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -472,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1361435931847224E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 88.7101s - [COUNTERS] Fortran Overhead ( 0 ) : 8.6312s - [COUNTERS] CudaCpp MEs ( 2 ) : 80.0788s for 90112 events => throughput is 1.13E+03 events/s + [COUNTERS] PROGRAM TOTAL : 144.2454s + [COUNTERS] Fortran Overhead ( 0 ) : 15.1090s + [COUNTERS] CudaCpp MEs ( 2 ) : 129.1364s for 90112 events => throughput is 6.98E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,14 +484,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361435931847224E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.413638e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.574328e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.410813e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.606464e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -ERROR! ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1694770708195000E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 2.4561s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9552s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5008s for 8192 events => throughput is 1.64E+04 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694770708195000E-006) differ by less than 4E-4 (0.00014279896898039546) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1361443477565659E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 10.9874s + [COUNTERS] Fortran Overhead ( 0 ) : 5.5585s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4289s for 90112 events => throughput is 1.66E+04 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361443477565659E-007) differ by less than 4E-4 (0.0001408023850304474) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.636533e+04 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.620348e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.363235e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.405136e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.330068e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.392338e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.319690e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.432306e+03 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 4502fc4cc0..fd7d860c5e 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,27 +1,27 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_21:24:58 +DATE: 2023-10-25_22:08:36 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 84.9824s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3100s - [COUNTERS] Fortran MEs ( 1 ) : 84.6723s for 8192 events => throughput is 9.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 97.1556s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4422s + [COUNTERS] Fortran MEs ( 1 ) : 96.7134s for 8192 events => throughput is 8.47E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 85.2775s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3102s - [COUNTERS] Fortran MEs ( 1 ) : 84.9673s for 8192 events => throughput is 9.64E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.9689s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4446s + [COUNTERS] Fortran MEs ( 1 ) : 96.5243s for 8192 events => throughput is 8.49E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 941.4544s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8344s - [COUNTERS] Fortran MEs ( 1 ) : 938.6200s for 90112 events => throughput is 9.60E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1064.4592s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0566s + [COUNTERS] Fortran MEs ( 1 ) : 1060.4026s for 90112 events => throughput is 8.50E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101016896846E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 187.4086s - [COUNTERS] Fortran Overhead ( 0 ) : 86.0928s - [COUNTERS] CudaCpp MEs ( 2 ) : 101.3159s for 8192 events => throughput is 8.09E+01 events/s + [COUNTERS] PROGRAM TOTAL : 212.5728s + [COUNTERS] Fortran Overhead ( 0 ) : 98.0572s + [COUNTERS] CudaCpp MEs ( 2 ) : 114.5156s for 8192 events => throughput is 7.15E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -160,7 +159,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -168,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436275882778E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1204.1464s - [COUNTERS] Fortran Overhead ( 0 ) : 88.5909s - [COUNTERS] CudaCpp MEs ( 2 ) : 1115.5554s for 90112 events => throughput is 8.08E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1418.4218s + [COUNTERS] Fortran Overhead ( 0 ) : 103.9288s + [COUNTERS] CudaCpp MEs ( 2 ) : 1314.4929s for 90112 events => throughput is 6.86E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,14 +180,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436275882778E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.570846e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.001258e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.571217e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.939164e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,7 +202,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -211,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101020910778E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 92.0468s - [COUNTERS] Fortran Overhead ( 0 ) : 42.4424s - [COUNTERS] CudaCpp MEs ( 2 ) : 49.6044s for 8192 events => throughput is 1.65E+02 events/s + [COUNTERS] PROGRAM TOTAL : 111.7906s + [COUNTERS] Fortran Overhead ( 0 ) : 51.2589s + [COUNTERS] CudaCpp MEs ( 2 ) : 60.5317s for 8192 events => throughput is 1.35E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -236,7 +235,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -244,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436284111598E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 590.4569s - [COUNTERS] Fortran Overhead ( 0 ) : 44.8763s - [COUNTERS] CudaCpp MEs ( 2 ) : 545.5806s for 90112 events => throughput is 1.65E+02 events/s + [COUNTERS] PROGRAM TOTAL : 721.3517s + [COUNTERS] Fortran Overhead ( 0 ) : 54.9246s + [COUNTERS] CudaCpp MEs ( 2 ) : 666.4271s for 90112 events => throughput is 1.35E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,14 +256,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436284111598E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.951579e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.612539e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.952871e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.614353e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -279,7 +278,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -287,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 41.1033s - [COUNTERS] Fortran Overhead ( 0 ) : 18.8049s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.2985s for 8192 events => throughput is 3.67E+02 events/s + [COUNTERS] PROGRAM TOTAL : 49.1637s + [COUNTERS] Fortran Overhead ( 0 ) : 22.7432s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.4205s for 8192 events => throughput is 3.10E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -312,7 +311,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -320,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 266.9925s - [COUNTERS] Fortran Overhead ( 0 ) : 21.2783s - [COUNTERS] CudaCpp MEs ( 2 ) : 245.7142s for 90112 events => throughput is 3.67E+02 events/s + [COUNTERS] PROGRAM TOTAL : 315.7719s + [COUNTERS] Fortran Overhead ( 0 ) : 26.2544s + [COUNTERS] CudaCpp MEs ( 2 ) : 289.5175s for 90112 events => throughput is 3.11E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,14 +332,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.437775e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.644706e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.452252e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.685868e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -355,7 +354,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -363,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 36.5317s - [COUNTERS] Fortran Overhead ( 0 ) : 16.5682s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.9634s for 8192 events => throughput is 4.10E+02 events/s + [COUNTERS] PROGRAM TOTAL : 44.6193s + [COUNTERS] Fortran Overhead ( 0 ) : 20.5510s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.0684s for 8192 events => throughput is 3.40E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -388,7 +387,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -396,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 239.5143s - [COUNTERS] Fortran Overhead ( 0 ) : 19.0951s - [COUNTERS] CudaCpp MEs ( 2 ) : 220.4191s for 90112 events => throughput is 4.09E+02 events/s + [COUNTERS] PROGRAM TOTAL : 281.8182s + [COUNTERS] Fortran Overhead ( 0 ) : 23.6088s + [COUNTERS] CudaCpp MEs ( 2 ) : 258.2094s for 90112 events => throughput is 3.49E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,14 +408,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.043303e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.253034e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.056876e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.210071e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -431,7 +430,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -439,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 25.4199s - [COUNTERS] Fortran Overhead ( 0 ) : 11.5417s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.8782s for 8192 events => throughput is 5.90E+02 events/s + [COUNTERS] PROGRAM TOTAL : 45.1323s + [COUNTERS] Fortran Overhead ( 0 ) : 21.8275s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.3047s for 8192 events => throughput is 3.52E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -464,7 +463,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -472,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 166.7152s - [COUNTERS] Fortran Overhead ( 0 ) : 14.0683s - [COUNTERS] CudaCpp MEs ( 2 ) : 152.6469s for 90112 events => throughput is 5.90E+02 events/s + [COUNTERS] PROGRAM TOTAL : 283.0785s + [COUNTERS] Fortran Overhead ( 0 ) : 25.5465s + [COUNTERS] CudaCpp MEs ( 2 ) : 257.5320s for 90112 events => throughput is 3.50E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,14 +484,14 @@ OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.297717e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.872786e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.313776e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.828458e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -507,4 +506,97 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -ERROR! ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' failed + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100942770687E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 3.5931s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7300s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8631s for 8192 events => throughput is 9.49E+03 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693100942770687E-006) differ by less than 2E-4 (2.2792256970660674e-10) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436157495368E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 15.7895s + [COUNTERS] Fortran Overhead ( 0 ) : 6.2917s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4978s for 90112 events => throughput is 9.49E+03 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436157495368E-007) differ by less than 2E-4 (6.173705990875078e-11) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.449720e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.084973e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.109916e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.161304e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.111586e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.116031e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.106540e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.648699e+03 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 18810c7539..126d0b9ddb 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -2,41 +2,40 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 - +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2023-10-25_19:11:42 +DATE: 2023-10-25_19:34:18 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.2257s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1646s - [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3162s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2400s + [COUNTERS] Fortran MEs ( 1 ) : 0.0762s for 8192 events => throughput is 1.07E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2216s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1605s - [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3078s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2307s + [COUNTERS] Fortran MEs ( 1 ) : 0.0772s for 8192 events => throughput is 1.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6855s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0179s - [COUNTERS] Fortran MEs ( 1 ) : 0.6677s for 90112 events => throughput is 1.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2688s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4248s + [COUNTERS] Fortran MEs ( 1 ) : 0.8440s for 90112 events => throughput is 1.07E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.276 [1.2757941949814184] fbridge_mode=1 [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.2844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2272s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0571s for 8192 events => throughput is 1.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3877s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3161s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 05038e6686..97d8938e38 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,14 +1,14 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' @@ -16,12 +16,12 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:12:39 +DATE: 2023-10-25_19:34:24 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.2265s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1654s - [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2355s + [COUNTERS] Fortran MEs ( 1 ) : 0.0772s for 8192 events => throughput is 1.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2226s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1615s - [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3089s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2315s + [COUNTERS] Fortran MEs ( 1 ) : 0.0773s for 8192 events => throughput is 1.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6904s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0219s - [COUNTERS] Fortran MEs ( 1 ) : 0.6685s for 90112 events => throughput is 1.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2899s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4433s + [COUNTERS] Fortran MEs ( 1 ) : 0.8467s for 90112 events => throughput is 1.06E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.276 [1.2757939713258191] fbridge_mode=1 [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.2775s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2237s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0539s for 8192 events => throughput is 1.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3818s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3130s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0689s for 8192 events => throughput is 1.19E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index c740fcb04e..ae1cc6d1c5 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -2,30 +2,30 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 - +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,10 +33,9 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:13:36 +DATE: 2023-10-25_19:34:30 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: 4x NVIDIA A100-PCIE-40GB -NVIDIA L4]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** @@ -52,7 +51,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -60,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.2237s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1626s - [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3113s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2340s + [COUNTERS] Fortran MEs ( 1 ) : 0.0773s for 8192 events => throughput is 1.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -77,7 +76,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -85,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2210s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1599s - [COUNTERS] Fortran MEs ( 1 ) : 0.0611s for 8192 events => throughput is 1.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3077s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2312s + [COUNTERS] Fortran MEs ( 1 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -102,7 +101,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -110,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6878s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0196s - [COUNTERS] Fortran MEs ( 1 ) : 0.6682s for 90112 events => throughput is 1.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2619s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4224s + [COUNTERS] Fortran MEs ( 1 ) : 0.8395s for 90112 events => throughput is 1.07E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -127,7 +126,7 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/64 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -135,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.276 [1.2757941960880730] fbridge_mode=1 [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.2850s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2282s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0568s for 8192 events => throughput is 1.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3925s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3207s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0718s for 8192 events => throughput is 1.14E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 150846880a..fb3e759147 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:17:58 +DATE: 2023-10-25_18:31:49 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.992610e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.677560e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.800497e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.651038 sec + 2,636,526,277 cycles # 3.033 GHz + 4,084,504,000 instructions # 1.55 insn per cycle + 0.937514788 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282804e-02 +Avg ME (F77/CUDA) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.465359e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.778769e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.778769e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.930496 sec - 16,959,876,668 cycles:u # 3.438 GHz - 41,097,155,657 instructions:u # 2.42 insn per cycle - 4.933925999 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.223824e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.458038e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.458038e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 5.535744 sec + 17,174,922,423 cycles # 3.101 GHz + 40,422,775,862 instructions # 2.35 insn per cycle + 5.540757574 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.710298e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.142425e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.142425e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.938305 sec - 10,014,794,390 cycles:u # 3.406 GHz - 25,264,551,946 instructions:u # 2.52 insn per cycle - 2.941362103 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.137087e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.061371e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.061371e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.338481 sec + 10,270,021,527 cycles # 3.072 GHz + 24,681,672,230 instructions # 2.40 insn per cycle + 3.343524574 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.041303e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.391151e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.391151e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.141766 sec - 7,165,504,049 cycles:u # 3.342 GHz - 14,162,644,516 instructions:u # 1.98 insn per cycle - 2.144827480 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.319309e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.049977e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.049977e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.283025 sec + 6,897,531,665 cycles # 3.016 GHz + 13,676,914,709 instructions # 1.98 insn per cycle + 2.287967204 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.084460e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.523028e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.523028e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.139335 sec - 7,155,441,053 cycles:u # 3.341 GHz - 13,668,764,880 instructions:u # 1.91 insn per cycle - 2.142319320 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.461379e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.455419e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.455419e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.201231 sec + 6,644,736,732 cycles # 3.013 GHz + 13,369,268,411 instructions # 2.01 insn per cycle + 2.206080825 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.529414e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.028573e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.028573e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.993617 sec - 6,291,828,835 cycles:u # 3.152 GHz - 10,451,434,542 instructions:u # 1.66 insn per cycle - 1.996693269 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.225436e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.708340e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.708340e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.342191 sec + 5,886,532,421 cycles # 2.509 GHz + 10,160,262,547 instructions # 1.73 insn per cycle + 2.347341313 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index e8608b0c4d..ff2ab6ab12 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -36,31 +36,60 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:42:23 +DATE: 2023-10-25_19:02:17 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.965333e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.255447e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.255447e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.135108 sec + 7,250,256,577 cycles # 3.055 GHz + 13,026,966,701 instructions # 1.80 insn per cycle + 2.429348584 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282804e-02 +Avg ME (F77/CUDA) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.435235e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.733702e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.733702e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.124024 sec - 17,562,389,414 cycles:u # 3.425 GHz - 41,393,540,254 instructions:u # 2.36 insn per cycle - 5.128507187 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.180043e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.393531e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.393531e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 5.930110 sec + 18,404,007,216 cycles # 3.102 GHz + 40,649,787,986 instructions # 2.21 insn per cycle + 5.936245491 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -71,23 +100,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.532861e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.728508e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.728508e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.292565 sec - 11,175,097,657 cycles:u # 3.390 GHz - 26,098,331,509 instructions:u # 2.34 insn per cycle - 3.296593444 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.001560e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.785736e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.785736e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.743997 sec + 11,597,299,620 cycles # 3.094 GHz + 25,525,941,371 instructions # 2.20 insn per cycle + 3.750076018 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -98,23 +127,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.626941e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.863862e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.863862e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.513698 sec - 8,381,768,733 cycles:u # 3.330 GHz - 15,514,162,728 instructions:u # 1.85 insn per cycle - 2.517729903 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.959223e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.997724e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.997724e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.737253 sec + 8,251,537,282 cycles # 3.009 GHz + 15,038,208,979 instructions # 1.82 insn per cycle + 2.743376591 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -125,23 +154,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.657205e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.957560e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.957560e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.521176 sec - 8,397,382,077 cycles:u # 3.326 GHz - 15,020,280,229 instructions:u # 1.79 insn per cycle - 2.525268729 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.076188e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.294642e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.294642e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.656399 sec + 7,925,615,204 cycles # 2.978 GHz + 14,731,067,513 instructions # 1.86 insn per cycle + 2.662615813 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -152,23 +181,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.966511e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.028469e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.028469e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.389409 sec - 7,589,911,715 cycles:u # 3.172 GHz - 11,588,319,674 instructions:u # 1.53 insn per cycle - 2.393461319 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.899896e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.732839e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.732839e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.778907 sec + 7,285,067,282 cycles # 2.617 GHz + 11,305,402,811 instructions # 1.55 insn per cycle + 2.785056032 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 8ff0f3eecf..ee209006c3 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:47:03 +DATE: 2023-10-25_19:14:42 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.734802e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.548595e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.695477e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.288103 sec + 4,585,947,033 cycles # 3.031 GHz + 7,017,628,406 instructions # 1.53 insn per cycle + 1.570944429 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282804e-02 +Avg ME (F77/CUDA) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.468978e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.783500e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.783500e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.218596e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.449391e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.449391e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.920383 sec - 16,926,525,144 cycles:u # 3.438 GHz - 41,097,159,310 instructions:u # 2.43 insn per cycle - 4.923595989 seconds time elapsed +TOTAL : 5.906919 sec + 18,244,110,475 cycles # 3.087 GHz + 40,525,216,964 instructions # 2.22 insn per cycle + 5.912008529 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.715228e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.143322e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.143322e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.094791e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.999575e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.999575e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.931671 sec - 9,999,246,021 cycles:u # 3.408 GHz - 25,264,551,636 instructions:u # 2.53 insn per cycle - 2.934668341 seconds time elapsed +TOTAL : 3.757966 sec + 11,362,621,772 cycles # 3.020 GHz + 24,684,545,006 instructions # 2.17 insn per cycle + 3.762920389 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.027283e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.399098e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.399098e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.221039e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.851480e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.851480e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.150824 sec - 7,191,967,549 cycles:u # 3.340 GHz - 14,162,645,321 instructions:u # 1.97 insn per cycle - 2.153873675 seconds time elapsed +TOTAL : 2.709730 sec + 7,962,642,894 cycles # 2.935 GHz + 13,579,072,475 instructions # 1.71 insn per cycle + 2.714759310 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.105423e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.575412e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.575412e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.422533e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.356319e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.356319e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.132134 sec - 7,128,839,196 cycles:u # 3.340 GHz - 13,668,765,891 instructions:u # 1.92 insn per cycle - 2.135194157 seconds time elapsed +TOTAL : 2.582578 sec + 7,748,421,467 cycles # 2.996 GHz + 13,080,984,196 instructions # 1.69 insn per cycle + 2.587647023 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.551154e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.033926e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.033926e+07 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.234611e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.715248e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.715248e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.984779 sec - 6,263,623,021 cycles:u # 3.152 GHz - 10,451,436,460 instructions:u # 1.67 insn per cycle - 1.987850535 seconds time elapsed +TOTAL : 2.694072 sec + 7,028,142,015 cycles # 2.605 GHz + 9,860,263,834 instructions # 1.40 insn per cycle + 2.699027311 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 409858a0af..8ad22bdaab 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -36,21 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:46:40 +DATE: 2023-10-25_19:11:43 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --curhst OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.751984e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.562686e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.712190e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.934603 sec + 3,531,370,849 cycles # 3.027 GHz + 7,056,706,138 instructions # 2.00 insn per cycle + 1.223131814 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282804e-02 +Avg ME (F77/CUDA) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe: Aborted - 2,373,081 cycles:u # 1.483 GHz - 3,106,807 instructions:u # 1.31 insn per cycle - 0.115533650 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.229006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.461428e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.461428e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 5.514347 sec + 17,134,759,816 cycles # 3.105 GHz + 40,421,384,674 instructions # 2.36 insn per cycle + 5.519235575 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -60,14 +90,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe: Aborted - 2,018,225 cycles:u # 1.406 GHz - 3,111,473 instructions:u # 1.54 insn per cycle - 0.126137642 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.136550e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.078809e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.078809e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.338536 sec + 10,358,915,958 cycles # 3.099 GHz + 24,681,209,780 instructions # 2.38 insn per cycle + 3.343472671 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -77,14 +116,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe: Aborted - 2,178,162 cycles:u # 1.375 GHz - 3,112,400 instructions:u # 1.43 insn per cycle - 0.118065407 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.254346e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.883713e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.883713e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.325759 sec + 6,909,272,329 cycles # 2.965 GHz + 13,676,492,702 instructions # 1.98 insn per cycle + 2.330695390 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -94,14 +142,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe: Aborted - 1,944,370 cycles:u # 1.347 GHz - 3,113,238 instructions:u # 1.60 insn per cycle - 0.117323854 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.418862e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.369541e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.369541e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.228021 sec + 6,650,960,186 cycles # 2.980 GHz + 13,380,296,124 instructions # 2.01 insn per cycle + 2.233066593 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -111,14 +168,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe: Aborted - 1,942,030 cycles:u # 1.321 GHz - 3,112,651 instructions:u # 1.60 insn per cycle - 0.114930364 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.184006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.596559e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.596559e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.371480 sec + 5,899,234,125 cycles # 2.483 GHz + 10,159,638,956 instructions # 1.72 insn per cycle + 2.376624580 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 89e20421c7..683b401a34 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -36,30 +36,53 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:45:11 +DATE: 2023-10-25_19:08:41 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.450047e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.536531e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.695667e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.791948 sec + 6,196,047,830 cycles # 3.061 GHz + 11,389,938,256 instructions # 1.84 insn per cycle + 2.082150692 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282804e-02 +Avg ME (F77/CUDA) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.465638e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.778986e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.778986e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.926167 sec - 16,955,984,921 cycles:u # 3.440 GHz - 41,097,153,427 instructions:u # 2.42 insn per cycle - 4.929406651 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.225914e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.460719e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.460719e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 5.523506 sec + 17,169,419,080 cycles # 3.106 GHz + 40,421,624,196 instructions # 2.35 insn per cycle + 5.528429327 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +92,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.714818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.131227e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.131227e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.931381 sec - 9,999,486,631 cycles:u # 3.408 GHz - 25,264,555,991 instructions:u # 2.53 insn per cycle - 2.934404666 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.159688e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.087862e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.087862e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.303087 sec + 10,283,099,650 cycles # 3.109 GHz + 24,681,354,623 instructions # 2.40 insn per cycle + 3.308198240 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +118,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.048019e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.434909e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.434909e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.138090 sec - 7,154,732,529 cycles:u # 3.343 GHz - 14,162,644,579 instructions:u # 1.98 insn per cycle - 2.141122902 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.299969e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.994161e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.994161e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.296796 sec + 6,872,706,594 cycles # 2.987 GHz + 13,676,412,365 instructions # 1.99 insn per cycle + 2.301612504 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +144,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.105047e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.546899e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.546899e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.129882 sec - 7,126,263,707 cycles:u # 3.342 GHz - 13,668,763,857 instructions:u # 1.92 insn per cycle - 2.132904165 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.398777e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.314475e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.314475e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.238868 sec + 6,648,921,003 cycles # 2.964 GHz + 13,380,372,785 instructions # 2.01 insn per cycle + 2.243705065 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +170,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.519982e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.032103e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.032103e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.989870 sec - 6,289,368,749 cycles:u # 3.157 GHz - 10,451,432,330 instructions:u # 1.66 insn per cycle - 1.992889538 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.257611e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.762023e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.762023e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.324047 sec + 5,897,358,226 cycles # 2.533 GHz + 10,159,451,394 instructions # 1.72 insn per cycle + 2.329040547 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 1a5b54f2e5..8f12496d4e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:18:15 +DATE: 2023-10-25_18:32:17 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.135499e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.480702e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.088764e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.649915 sec + 2,521,027,262 cycles # 2.870 GHz + 3,932,562,496 instructions # 1.56 insn per cycle + 0.935284453 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282804e-02 +Avg ME (F77/CUDA) = 1.2828039868165206E-002 +Relative difference = 1.027708011645137e-08 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.470396e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.785699e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.785699e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.912905 sec - 16,908,371,061 cycles:u # 3.440 GHz - 41,046,076,657 instructions:u # 2.43 insn per cycle - 4.916092188 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.221519e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.452240e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.452240e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 5.545860 sec + 17,128,162,100 cycles # 3.086 GHz + 40,370,576,437 instructions # 2.36 insn per cycle + 5.550872301 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 362) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.719215e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.130723e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.130723e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.930018 sec - 9,990,030,213 cycles:u # 3.408 GHz - 25,226,057,535 instructions:u # 2.53 insn per cycle - 2.933121393 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.147682e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.063332e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.063332e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.321610 sec + 10,270,541,691 cycles # 3.088 GHz + 24,643,021,754 instructions # 2.40 insn per cycle + 3.326647423 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.041490e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.422170e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.422170e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.142736 sec - 7,169,921,245 cycles:u # 3.342 GHz - 14,136,733,258 instructions:u # 1.97 insn per cycle - 2.145802070 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.302607e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.009317e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.009317e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.292563 sec + 6,895,121,758 cycles # 3.004 GHz + 13,651,253,610 instructions # 1.98 insn per cycle + 2.297509965 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1037) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.094436e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.601434e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.601434e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.132621 sec - 7,142,031,842 cycles:u # 3.345 GHz - 13,655,436,461 instructions:u # 1.91 insn per cycle - 2.135691611 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.466481e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.436410e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.436410e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.199117 sec + 6,634,520,360 cycles # 3.011 GHz + 13,355,581,160 instructions # 2.01 insn per cycle + 2.204141246 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 989) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.577060e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.059889e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059889e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.978556 sec - 6,253,669,114 cycles:u # 3.156 GHz - 10,331,152,687 instructions:u # 1.65 insn per cycle - 1.981645781 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.383490e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.139882e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.139882e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.249115 sec + 5,748,008,303 cycles # 2.551 GHz + 10,038,931,524 instructions # 1.75 insn per cycle + 2.254226968 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 246) (512y: 0) (512z: 663) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index ec4d769f1e..bad17671a9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:32:38 +DATE: 2023-10-25_18:52:22 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.866442e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.650091e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.818114e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.673872 sec + 2,565,505,221 cycles # 2.855 GHz + 3,938,465,714 instructions # 1.54 insn per cycle + 0.962945607 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282804e-02 +Avg ME (F77/CUDA) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.468824e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.990053e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.990053e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.450366 sec - 8,316,087,808 cycles:u # 3.390 GHz - 18,145,683,917 instructions:u # 2.18 insn per cycle - 2.453752491 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.788792e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.362239e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.362239e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.649101 sec + 8,210,295,285 cycles # 3.094 GHz + 17,459,406,832 instructions # 2.13 insn per cycle + 2.654141725 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 125) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.799990e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.213598e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.213598e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.927940 sec - 6,492,858,558 cycles:u # 3.363 GHz - 13,360,995,036 instructions:u # 2.06 insn per cycle - 1.931036236 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.776841e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.067159e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.067159e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.060741 sec + 6,367,782,565 cycles # 3.084 GHz + 12,773,139,369 instructions # 2.01 insn per cycle + 2.065923417 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 810) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.361282e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.752634e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.752634e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.762418 sec - 5,888,113,670 cycles:u # 3.336 GHz - 9,859,240,557 instructions:u # 1.67 insn per cycle - 1.765631929 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.730005e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.360397e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.360397e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.721074 sec + 5,169,510,873 cycles # 2.996 GHz + 9,371,577,717 instructions # 1.81 insn per cycle + 1.726231344 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 720) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.583635e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.930445e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.930445e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.731804 sec - 5,793,967,089 cycles:u # 3.341 GHz - 9,519,499,694 instructions:u # 1.64 insn per cycle - 1.734762424 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.006682e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.519447e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.519447e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.646907 sec + 4,985,951,497 cycles # 3.019 GHz + 9,229,216,123 instructions # 1.85 insn per cycle + 1.652062250 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 641) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.455559e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.657034e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.657034e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.751943 sec - 5,619,161,076 cycles:u # 3.203 GHz - 8,987,081,027 instructions:u # 1.60 insn per cycle - 1.754996822 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.264702e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.000270e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000270e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.871689 sec + 5,009,273,536 cycles # 2.672 GHz + 8,693,527,346 instructions # 1.74 insn per cycle + 1.876855811 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 200) (512y: 0) (512z: 276) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 01bdb12502..e9aad49fe2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:32:50 +DATE: 2023-10-25_18:52:44 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.999685e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.416020e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082814e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.643446 sec + 2,620,726,592 cycles # 3.012 GHz + 4,087,111,468 instructions # 1.56 insn per cycle + 0.931265549 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282804e-02 +Avg ME (F77/CUDA) = 1.2828039868165206E-002 +Relative difference = 1.027708011645137e-08 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.270821e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.789105e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.789105e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.114220 sec - 7,121,938,402 cycles:u # 3.364 GHz - 14,917,387,608 instructions:u # 2.09 insn per cycle - 2.117449531 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.490278e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.520777e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.520777e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.208507 sec + 6,644,704,561 cycles # 3.003 GHz + 14,230,584,763 instructions # 2.14 insn per cycle + 2.213619219 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 122) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.489323e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.786799e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.786799e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.761799 sec - 5,917,699,661 cycles:u # 3.354 GHz - 11,362,692,418 instructions:u # 1.92 insn per cycle - 1.764919458 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.496209e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.203520e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.203520e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.793256 sec + 5,547,608,796 cycles # 3.086 GHz + 10,773,719,188 instructions # 1.94 insn per cycle + 1.798601488 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 610) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.607499e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.090410e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.090410e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.713581 sec - 5,730,127,623 cycles:u # 3.341 GHz - 9,216,760,607 instructions:u # 1.61 insn per cycle - 1.716701054 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.103748e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.641142e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.641142e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.621275 sec + 4,941,467,155 cycles # 3.041 GHz + 8,728,712,502 instructions # 1.77 insn per cycle + 1.626388139 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.926501e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.437680e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.437680e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.660901 sec - 5,558,612,040 cycles:u # 3.341 GHz - 9,031,161,270 instructions:u # 1.62 insn per cycle - 1.664029633 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.063835e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.792404e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.792404e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.652560 sec + 4,758,216,156 cycles # 2.885 GHz + 8,734,044,090 instructions # 1.84 insn per cycle + 1.657554337 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 519) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.832161e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.075253e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.075253e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.678555 sec - 5,588,592,180 cycles:u # 3.324 GHz - 8,700,071,941 instructions:u # 1.56 insn per cycle - 1.681676462 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.451379e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.103695e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103695e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.809541 sec + 4,867,548,247 cycles # 2.684 GHz + 8,406,231,727 instructions # 1.73 insn per cycle + 1.814605280 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 168) (512y: 0) (512z: 227) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 35753c8692..a96360aa15 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:18:31 +DATE: 2023-10-25_18:32:45 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=2, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.622469e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.481172e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.826768e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 +TOTAL : 0.551831 sec + 2,329,387,313 cycles # 3.021 GHz + 3,650,775,745 instructions # 1.57 insn per cycle + 0.828584114 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282802e-02 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.729460e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.110223e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.110223e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 4.255264 sec - 14,615,789,313 cycles:u # 3.433 GHz - 40,190,391,890 instructions:u # 2.75 insn per cycle - 4.258334734 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.243845e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.497749e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.497749e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 5.410831 sec + 16,844,648,065 cycles # 3.111 GHz + 40,088,965,912 instructions # 2.38 insn per cycle + 5.415719530 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.289620e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.040819e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.040819e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 2.069545 sec - 7,002,182,131 cycles:u # 3.379 GHz - 16,748,198,675 instructions:u # 2.39 insn per cycle - 2.072477252 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.198454e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.967626e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.967626e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 2.324919 sec + 7,138,346,939 cycles # 3.065 GHz + 16,729,497,470 instructions # 2.34 insn per cycle + 2.329783883 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.390893e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.733751e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.733751e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.552888 sec - 5,156,863,877 cycles:u # 3.316 GHz - 10,480,013,170 instructions:u # 2.03 insn per cycle - 1.555841409 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.643489e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.234035e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234035e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.702583 sec + 5,155,065,818 cycles # 3.021 GHz + 10,628,955,239 instructions # 2.06 insn per cycle + 1.707446133 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.555638e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.817995e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.817995e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.546525 sec - 5,133,706,165 cycles:u # 3.314 GHz - 10,132,408,956 instructions:u # 1.97 insn per cycle - 1.549473269 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.817440e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.344869e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.344869e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.653471 sec + 5,044,234,982 cycles # 3.043 GHz + 10,475,715,128 instructions # 2.08 insn per cycle + 1.658543423 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.863473e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.164421e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.164421e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 -TOTAL : 1.499357 sec - 4,803,893,523 cycles:u # 3.199 GHz - 8,580,368,826 instructions:u # 1.79 insn per cycle - 1.502292571 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.608343e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175337e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.175337e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 +TOTAL : 1.712484 sec + 4,683,701,478 cycles # 2.728 GHz + 8,926,870,251 instructions # 1.91 insn per cycle + 1.717292179 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index eb9e9ef59f..efa36acd38 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -36,35 +36,73 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:42:42 +DATE: 2023-10-25_19:02:51 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! flagging abnormal ME for ievt=71728 +WARNING! flagging abnormal ME for ievt=152898 +WARNING! flagging abnormal ME for ievt=496545 +WARNING! flagging abnormal ME for ievt=66427 +WARNING! flagging abnormal ME for ievt=465318 +WARNING! flagging abnormal ME for ievt=458848 +WARNING! flagging abnormal ME for ievt=247522 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=7, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.717117e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.761186e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.761186e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371709e-02 +- 3.270385e-06 ) GeV^0 +TOTAL : 1.602340 sec + 5,595,842,121 cycles # 3.065 GHz + 10,162,817,469 instructions # 1.82 insn per cycle + 1.883301086 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282802e-02 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=334333 -WARNING! flagging abnormal ME for ievt=355930 -WARNING! flagging abnormal ME for ievt=450372 -WARNING! flagging abnormal ME for ievt=111162 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +WARNING! flagging abnormal ME for ievt=53874 +WARNING! flagging abnormal ME for ievt=71728 +WARNING! flagging abnormal ME for ievt=152898 +WARNING! flagging abnormal ME for ievt=66427 +WARNING! flagging abnormal ME for ievt=164749 +WARNING! flagging abnormal ME for ievt=247522 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.707220e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.075285e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.075285e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 4.347128 sec - 14,903,367,368 cycles:u # 3.426 GHz - 40,361,164,372 instructions:u # 2.71 insn per cycle - 4.351029431 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.194651e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.427134e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.427134e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 5.727507 sec + 17,468,478,153 cycles # 3.048 GHz + 40,238,549,213 instructions # 2.30 insn per cycle + 5.733108996 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -75,27 +113,29 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=334333 -WARNING! flagging abnormal ME for ievt=355930 -WARNING! flagging abnormal ME for ievt=450372 -WARNING! flagging abnormal ME for ievt=111162 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +WARNING! flagging abnormal ME for ievt=53874 +WARNING! flagging abnormal ME for ievt=71728 +WARNING! flagging abnormal ME for ievt=152898 +WARNING! flagging abnormal ME for ievt=66427 +WARNING! flagging abnormal ME for ievt=164749 +WARNING! flagging abnormal ME for ievt=247522 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.034302e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.225581e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.225581e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 2.272239 sec - 7,665,382,719 cycles:u # 3.369 GHz - 18,085,512,951 instructions:u # 2.36 insn per cycle - 2.275998631 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.015918e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.363005e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.363005e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 2.562966 sec + 7,929,821,727 cycles # 3.088 GHz + 18,064,430,946 instructions # 2.28 insn per cycle + 2.568572939 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -106,27 +146,27 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=355930 -WARNING! flagging abnormal ME for ievt=450372 -WARNING! flagging abnormal ME for ievt=186978 -WARNING! flagging abnormal ME for ievt=111162 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +WARNING! flagging abnormal ME for ievt=53874 +WARNING! flagging abnormal ME for ievt=66427 +WARNING! flagging abnormal ME for ievt=164749 +WARNING! flagging abnormal ME for ievt=247522 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.739168e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.371751e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.371751e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.777776 sec - 5,884,369,562 cycles:u # 3.304 GHz - 11,602,702,718 instructions:u # 1.97 insn per cycle - 1.781438494 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.272473e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.009652e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.009652e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.944574 sec + 5,929,261,497 cycles # 3.042 GHz + 11,749,715,523 instructions # 1.98 insn per cycle + 1.950245306 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -137,27 +177,27 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=355930 -WARNING! flagging abnormal ME for ievt=450372 -WARNING! flagging abnormal ME for ievt=186978 -WARNING! flagging abnormal ME for ievt=111162 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +WARNING! flagging abnormal ME for ievt=53874 +WARNING! flagging abnormal ME for ievt=66427 +WARNING! flagging abnormal ME for ievt=164749 +WARNING! flagging abnormal ME for ievt=247522 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.833662e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.427872e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.427872e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.774478 sec - 5,870,489,674 cycles:u # 3.304 GHz - 11,255,095,620 instructions:u # 1.92 insn per cycle - 1.778380730 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.364659e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.078675e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078675e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.914043 sec + 5,843,415,161 cycles # 3.045 GHz + 11,595,784,393 instructions # 1.98 insn per cycle + 1.919813375 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -168,27 +208,27 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=355930 -WARNING! flagging abnormal ME for ievt=450372 -WARNING! flagging abnormal ME for ievt=186978 -WARNING! flagging abnormal ME for ievt=111162 -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +WARNING! flagging abnormal ME for ievt=53874 +WARNING! flagging abnormal ME for ievt=66427 +WARNING! flagging abnormal ME for ievt=164749 +WARNING! flagging abnormal ME for ievt=247522 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.068120e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.574984e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.574984e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 -TOTAL : 1.735354 sec - 5,572,441,027 cycles:u # 3.205 GHz - 9,789,888,606 instructions:u # 1.76 insn per cycle - 1.739135348 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.130829e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.360401e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.360401e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 +TOTAL : 2.001099 sec + 5,491,473,730 cycles # 2.738 GHz + 10,134,991,267 instructions # 1.85 insn per cycle + 2.006879900 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 4edddfd03c..3649e05b61 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:47:20 +DATE: 2023-10-25_19:15:13 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.566300e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.421516e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.742992e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 +TOTAL : 1.134749 sec + 4,098,993,410 cycles # 3.029 GHz + 6,605,372,981 instructions # 1.61 insn per cycle + 1.409722045 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282802e-02 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.731049e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.110306e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.110306e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.244098e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.493114e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.493114e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 4.250142 sec - 14,600,038,747 cycles:u # 3.434 GHz - 40,190,394,588 instructions:u # 2.75 insn per cycle - 4.253095521 seconds time elapsed +TOTAL : 5.726200 sec + 17,811,812,788 cycles # 3.109 GHz + 40,270,712,628 instructions # 2.26 insn per cycle + 5.730949174 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.292044e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.040786e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.040786e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.215182e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.000060e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.000060e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 2.067760 sec - 6,998,868,955 cycles:u # 3.381 GHz - 16,748,198,517 instructions:u # 2.39 insn per cycle - 2.070661637 seconds time elapsed +TOTAL : 2.623227 sec + 8,152,502,492 cycles # 3.103 GHz + 16,810,279,631 instructions # 2.06 insn per cycle + 2.628033716 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.499962e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.738226e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.738226e+07 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.607786e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.225348e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.225348e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.537148 sec - 5,102,238,830 cycles:u # 3.314 GHz - 10,480,017,946 instructions:u # 2.05 insn per cycle - 1.540028852 seconds time elapsed +TOTAL : 2.027947 sec + 6,177,720,088 cycles # 3.041 GHz + 10,540,553,410 instructions # 1.71 insn per cycle + 2.032791206 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.560211e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.820987e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.820987e+07 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.739337e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.329843e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329843e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.544441 sec - 5,129,408,965 cycles:u # 3.316 GHz - 10,132,412,241 instructions:u # 1.98 insn per cycle - 1.547345562 seconds time elapsed +TOTAL : 1.998605 sec + 6,104,707,264 cycles # 3.049 GHz + 10,185,815,617 instructions # 1.67 insn per cycle + 2.003437292 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.802348e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.153811e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.153811e+07 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.461301e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.134325e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.134325e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 -TOTAL : 1.505652 sec - 4,829,542,766 cycles:u # 3.202 GHz - 8,580,367,611 instructions:u # 1.78 insn per cycle - 1.508693688 seconds time elapsed +TOTAL : 2.092643 sec + 5,695,345,035 cycles # 2.717 GHz + 8,637,352,141 instructions # 1.52 insn per cycle + 2.097505728 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 4c77ada440..482f335b2b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -36,21 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:46:43 +DATE: 2023-10-25_19:12:11 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --curhst OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=2, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.572199e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.443581e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.789713e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 +TOTAL : 0.852228 sec + 3,102,837,181 cycles # 2.900 GHz + 6,399,466,575 instructions # 2.06 insn per cycle + 1.127081538 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282802e-02 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe: Aborted - 2,237,732 cycles:u # 1.452 GHz - 3,112,523 instructions:u # 1.39 insn per cycle - 0.113282053 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.242033e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.490396e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.490396e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 5.420944 sec + 16,812,615,332 cycles # 3.099 GHz + 40,088,432,090 instructions # 2.38 insn per cycle + 5.425745402 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -60,14 +90,23 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe: Aborted - 1,996,617 cycles:u # 1.391 GHz - 3,116,775 instructions:u # 1.56 insn per cycle - 0.108515474 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.220212e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.000588e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.000588e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 2.308004 sec + 7,155,170,314 cycles # 3.095 GHz + 16,729,446,787 instructions # 2.34 insn per cycle + 2.312771061 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -77,14 +116,23 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe: Aborted - 1,946,925 cycles:u # 1.394 GHz - 3,117,578 instructions:u # 1.60 insn per cycle - 0.132825035 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.640154e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.225902e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.225902e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.703655 sec + 5,164,709,474 cycles # 3.025 GHz + 10,629,819,542 instructions # 2.06 insn per cycle + 1.708526413 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -94,14 +142,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe: Aborted - 1,958,449 cycles:u # 1.383 GHz - 3,117,235 instructions:u # 1.59 insn per cycle - 0.123003666 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.770352e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.326396e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326396e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.669695 sec + 5,072,425,715 cycles # 3.031 GHz + 10,481,476,531 instructions # 2.07 insn per cycle + 1.674496390 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -111,14 +168,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe: Aborted - 1,946,905 cycles:u # 1.373 GHz - 3,116,503 instructions:u # 1.60 insn per cycle - 0.117625696 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=4, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.608761e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188893e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188893e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 +TOTAL : 1.713521 sec + 4,682,934,639 cycles # 2.727 GHz + 8,926,936,750 instructions # 1.91 insn per cycle + 1.718311206 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index b449440083..efc5436b49 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -36,30 +36,53 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:45:27 +DATE: 2023-10-25_19:09:10 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=7, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.218117e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.394390e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.636278e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371709e-02 +- 3.270385e-06 ) GeV^0 +TOTAL : 1.461263 sec + 4,982,734,203 cycles # 2.962 GHz + 9,103,785,620 instructions # 1.83 insn per cycle + 1.744448034 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282802e-02 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.728103e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.104696e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.104696e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 4.256254 sec - 14,624,053,552 cycles:u # 3.434 GHz - 40,190,390,955 instructions:u # 2.75 insn per cycle - 4.259246946 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.245708e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.494542e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.494542e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 5.404955 sec + 16,819,106,248 cycles # 3.110 GHz + 40,088,802,052 instructions # 2.38 insn per cycle + 5.409792938 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +92,23 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.295978e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.044937e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.044937e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 2.066718 sec - 6,990,916,531 cycles:u # 3.379 GHz - 16,748,196,956 instructions:u # 2.40 insn per cycle - 2.069597625 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.222846e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.010307e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.010307e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 2.309298 sec + 7,155,727,700 cycles # 3.093 GHz + 16,729,709,933 instructions # 2.34 insn per cycle + 2.314175580 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +118,23 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.363434e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.733774e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.733774e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.558394 sec - 5,178,315,742 cycles:u # 3.317 GHz - 10,480,013,528 instructions:u # 2.02 insn per cycle - 1.561451840 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.653137e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.236145e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236145e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.698626 sec + 5,157,602,141 cycles # 3.030 GHz + 10,629,000,968 instructions # 2.06 insn per cycle + 1.703452950 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +144,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.553056e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.818363e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.818363e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.542303 sec - 5,128,149,799 cycles:u # 3.319 GHz - 10,132,413,337 instructions:u # 1.98 insn per cycle - 1.545214987 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.773001e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.326358e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326358e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.667901 sec + 5,061,515,411 cycles # 3.028 GHz + 10,480,815,680 instructions # 2.07 insn per cycle + 1.672594775 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +170,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.784579e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.155866e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.155866e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 -TOTAL : 1.509175 sec - 4,838,626,714 cycles:u # 3.201 GHz - 8,580,370,978 instructions:u # 1.77 insn per cycle - 1.512104777 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.555200e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166909e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.166909e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 +TOTAL : 1.731879 sec + 4,689,690,631 cycles # 2.701 GHz + 8,927,107,008 instructions # 1.90 insn per cycle + 1.736672094 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 447e304729..5dbfdd3213 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:18:44 +DATE: 2023-10-25_18:33:09 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=2, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.625913e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.503566e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.918133e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 +TOTAL : 0.552329 sec + 2,328,888,712 cycles # 3.020 GHz + 3,648,506,478 instructions # 1.57 insn per cycle + 0.830116688 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 80 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282802e-02 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.736162e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.117571e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.117571e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 4.238341 sec - 14,561,502,974 cycles:u # 3.434 GHz - 40,139,315,186 instructions:u # 2.76 insn per cycle - 4.241406501 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.244101e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.497364e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.497364e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 5.479428 sec + 17,033,310,610 cycles # 3.106 GHz + 40,038,122,508 instructions # 2.35 insn per cycle + 5.484429062 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 347) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 1.500049293219082e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.280509e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.015334e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.015334e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 2.072032 sec - 7,005,174,204 cycles:u # 3.377 GHz - 16,671,958,233 instructions:u # 2.38 insn per cycle - 2.075028280 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.023210e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.906828e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.906828e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 2.441591 sec + 7,417,759,581 cycles # 3.033 GHz + 16,653,923,334 instructions # 2.25 insn per cycle + 2.446386116 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1335) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.229686e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.714141e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.714141e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.586341 sec - 5,266,021,866 cycles:u # 3.313 GHz - 10,466,688,261 instructions:u # 1.99 insn per cycle - 1.589935074 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.697456e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.249241e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.249241e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.685112 sec + 5,138,959,489 cycles # 3.042 GHz + 10,615,393,712 instructions # 2.07 insn per cycle + 1.690001992 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1092) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.531291e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.816551e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.816551e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.550816 sec - 5,146,977,582 cycles:u # 3.313 GHz - 10,125,376,383 instructions:u # 1.97 insn per cycle - 1.553865343 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.752164e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.307340e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.307340e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.671544 sec + 5,088,363,074 cycles # 3.037 GHz + 10,468,790,591 instructions # 2.06 insn per cycle + 1.676598026 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1044) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 2.5306003563303186e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.960002e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.258537e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.258537e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 -TOTAL : 1.487427 sec - 4,768,127,222 cycles:u # 3.200 GHz - 8,509,628,646 instructions:u # 1.78 insn per cycle - 1.490431635 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.538235e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.204282e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204282e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 +TOTAL : 1.747955 sec + 4,622,637,035 cycles # 2.639 GHz + 8,857,108,339 instructions # 1.92 insn per cycle + 1.752985428 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 312) (512y: 0) (512z: 678) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 3a783df15c..7778235778 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:33:01 +DATE: 2023-10-25_18:53:05 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=2, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.591002e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.452620e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.766126e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 +TOTAL : 0.556490 sec + 2,367,622,596 cycles # 3.019 GHz + 3,659,745,020 instructions # 1.55 insn per cycle + 0.843534071 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282802e-02 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.913105e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.593311e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.593311e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 2.221837 sec - 7,530,888,619 cycles:u # 3.385 GHz - 17,515,746,714 instructions:u # 2.33 insn per cycle - 2.224960973 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.920065e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.817963e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.817963e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 2.512043 sec + 7,713,453,992 cycles # 3.066 GHz + 17,403,928,818 instructions # 2.26 insn per cycle + 2.516888756 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 1.4858695011109669e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.376301e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.062960e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.062960e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 1.587805 sec - 5,325,014,500 cycles:u # 3.348 GHz - 10,782,260,862 instructions:u # 2.02 insn per cycle - 1.590744824 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.681101e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.456058e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.456058e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 1.700110 sec + 5,232,564,400 cycles # 3.070 GHz + 10,761,247,884 instructions # 2.06 insn per cycle + 1.704947526 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 941) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.924793743706775e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.415063e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.725017e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.725017e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.315985 sec - 4,356,664,668 cycles:u # 3.304 GHz - 8,188,326,455 instructions:u # 1.88 insn per cycle - 1.319108781 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.136284e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.424889e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.424889e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.396345 sec + 4,222,666,465 cycles # 3.047 GHz + 8,344,159,796 instructions # 1.98 insn per cycle + 1.401275041 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 855) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 2.5235104658031306e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.553543e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.471561e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.471561e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.322016 sec - 4,377,960,359 cycles:u # 3.305 GHz - 7,966,549,257 instructions:u # 1.82 insn per cycle - 1.325025435 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.252461e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.832504e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.832504e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.356528 sec + 4,156,578,728 cycles # 3.055 GHz + 8,308,294,757 instructions # 2.00 insn per cycle + 1.361324117 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 779) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 2.5235104658031306e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.018327e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.858996e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.858996e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 -TOTAL : 1.368267 sec - 4,434,622,037 cycles:u # 3.235 GHz - 7,851,335,699 instructions:u # 1.77 insn per cycle - 1.371245457 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.632818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.213085e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.213085e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 +TOTAL : 1.468207 sec + 4,189,515,183 cycles # 2.846 GHz + 8,197,193,406 instructions # 1.96 insn per cycle + 1.473030833 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 280) (512y: 0) (512z: 301) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index f7ac3f3149..0d46a7bcf5 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:33:12 +DATE: 2023-10-25_18:53:26 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=2, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.594388e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.488622e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.908806e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 +TOTAL : 0.555353 sec + 2,369,008,611 cycles # 3.015 GHz + 3,693,428,004 instructions # 1.56 insn per cycle + 0.844152711 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 80 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282802e-02 +Avg ME (F77/CUDA) = 1.2828112108763889E-002 +Relative difference = 7.180279099086847e-06 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.021947e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.048940e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.048940e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 1.869449 sec - 6,296,012,904 cycles:u # 3.363 GHz - 14,274,872,024 instructions:u # 2.27 insn per cycle - 1.872524206 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.824530e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.823115e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.823115e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 2.002512 sec + 6,203,899,879 cycles # 3.095 GHz + 14,161,126,790 instructions # 2.28 insn per cycle + 2.007246903 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 133) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 1.3015322037054697e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=6, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.154287e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.223241e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.223241e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 1.480330 sec - 4,952,907,742 cycles:u # 3.340 GHz - 9,587,702,713 instructions:u # 1.94 insn per cycle - 1.483393390 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.299369e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.234812e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.234812e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 +TOTAL : 1.542994 sec + 4,763,831,720 cycles # 3.079 GHz + 9,566,058,895 instructions # 2.01 insn per cycle + 1.547857940 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 663) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.8113554068418534e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.697649e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.520070e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.520070e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.291464 sec - 4,275,495,416 cycles:u # 3.304 GHz - 7,972,879,812 instructions:u # 1.86 insn per cycle - 1.294455433 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.326543e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.012502e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.012502e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.344736 sec + 4,139,920,255 cycles # 3.069 GHz + 8,120,823,500 instructions # 1.96 insn per cycle + 1.349591931 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 623) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.621966e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.806028e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.806028e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.312685 sec - 4,356,323,719 cycles:u # 3.312 GHz - 7,779,421,294 instructions:u # 1.79 insn per cycle - 1.315625377 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.344788e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.506915e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.506915e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 +TOTAL : 1.344145 sec + 4,090,602,024 cycles # 3.034 GHz + 8,121,231,635 instructions # 1.99 insn per cycle + 1.349056597 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 590) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=4, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.250034e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.675253e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.675253e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 -TOTAL : 1.350207 sec - 4,461,812,073 cycles:u # 3.298 GHz - 7,687,010,388 instructions:u # 1.72 insn per cycle - 1.353247958 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.650060e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.477330e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.477330e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 +TOTAL : 1.474656 sec + 4,125,251,957 cycles # 2.790 GHz + 8,033,155,651 instructions # 1.95 insn per cycle + 1.479796548 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 238) (512y: 0) (512z: 234) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 56085f8e1f..42cb535764 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:18:58 +DATE: 2023-10-25_18:33:34 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.989272e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.677595e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.794677e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.639629 sec + 2,624,746,782 cycles # 3.027 GHz + 4,095,202,506 instructions # 1.56 insn per cycle + 0.927049463 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282804e-02 +Avg ME (F77/CUDA) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.431799e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.740777e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.740777e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.032135 sec - 17,315,960,908 cycles:u # 3.439 GHz - 41,273,314,342 instructions:u # 2.38 insn per cycle - 5.035304429 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.199685e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.421947e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.421947e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 5.639670 sec + 17,406,394,399 cycles # 3.084 GHz + 40,598,366,537 instructions # 2.33 insn per cycle + 5.644674626 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 377) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.715741e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.129042e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.129042e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.932644 sec - 9,999,498,591 cycles:u # 3.407 GHz - 25,424,986,719 instructions:u # 2.54 insn per cycle - 2.935821772 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.181362e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.136383e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.136383e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.277504 sec + 10,153,584,525 cycles # 3.094 GHz + 24,841,830,142 instructions # 2.45 insn per cycle + 3.282502956 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1318) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.104767e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.732661e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.732661e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.120965 sec - 7,089,718,272 cycles:u # 3.339 GHz - 14,120,965,347 instructions:u # 1.99 insn per cycle - 2.124110385 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.329721e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.123732e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.123732e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.277389 sec + 6,843,760,468 cycles # 3.000 GHz + 13,635,441,327 instructions # 1.99 insn per cycle + 2.282659453 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1211) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.202279e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.057529e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.057529e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.098690 sec - 7,015,864,901 cycles:u # 3.339 GHz - 13,615,283,917 instructions:u # 1.94 insn per cycle - 2.101710645 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.425597e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.470588e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.470588e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.232215 sec + 6,546,202,423 cycles # 2.927 GHz + 13,316,237,781 instructions # 2.03 insn per cycle + 2.237429944 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1141) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.636507e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.078185e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.078185e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.956416 sec - 6,188,582,377 cycles:u # 3.159 GHz - 10,504,124,053 instructions:u # 1.70 insn per cycle - 1.959442647 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.286536e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.835267e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.835267e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.304970 sec + 5,866,744,695 cycles # 2.541 GHz + 10,212,406,703 instructions # 1.74 insn per cycle + 2.310030357 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 406) (512y: 0) (512z: 707) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index c23fd8bd58..84b58b8eae 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:19:14 +DATE: 2023-10-25_18:34:02 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.129836e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.454495e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.086141e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.640256 sec + 2,612,587,104 cycles # 3.007 GHz + 4,030,428,105 instructions # 1.54 insn per cycle + 0.928744648 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.282804e-02 +Avg ME (F77/CUDA) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.444987e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.749496e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.749496e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.989879 sec - 17,179,248,922 cycles:u # 3.441 GHz - 41,222,242,033 instructions:u # 2.40 insn per cycle - 4.993075094 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.208951e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.433392e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.433392e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 5.597792 sec + 17,342,645,679 cycles # 3.096 GHz + 40,546,867,973 instructions # 2.34 insn per cycle + 5.602682073 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.703014e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.099777e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.099777e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.943641 sec - 10,040,467,895 cycles:u # 3.408 GHz - 25,386,490,837 instructions:u # 2.53 insn per cycle - 2.946682776 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.133508e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.059933e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.059933e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.349360 sec + 10,185,466,163 cycles # 3.037 GHz + 24,803,480,189 instructions # 2.44 insn per cycle + 3.354498074 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1305) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.086719e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.629429e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.629429e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.123762 sec - 7,106,505,553 cycles:u # 3.342 GHz - 14,095,051,515 instructions:u # 1.98 insn per cycle - 2.127962021 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.351442e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.169056e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.169056e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.263569 sec + 6,798,954,008 cycles # 2.998 GHz + 13,608,714,241 instructions # 2.00 insn per cycle + 2.268496012 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1191) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.209543e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.058896e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.058896e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.092663 sec - 6,999,323,320 cycles:u # 3.341 GHz - 13,601,956,134 instructions:u # 1.94 insn per cycle - 2.095718322 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.513470e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.629747e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.629747e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.175520 sec + 6,534,951,743 cycles # 2.998 GHz + 13,313,454,459 instructions # 2.04 insn per cycle + 2.180544895 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1121) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.686227e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.100329e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.100329e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.942782 sec - 6,146,902,959 cycles:u # 3.160 GHz - 10,383,844,685 instructions:u # 1.69 insn per cycle - 1.945768129 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.364985e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.081130e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.081130e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.260031 sec + 5,775,308,811 cycles # 2.550 GHz + 10,091,603,442 instructions # 1.75 insn per cycle + 2.265220222 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 386) (512y: 0) (512z: 688) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 83f9f3a9e7..fc7d3d5581 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:19:31 +DATE: 2023-10-25_18:34:30 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.189330e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175818e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270057e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.512252 sec + 2,155,173,458 cycles # 2.909 GHz + 3,041,305,881 instructions # 1.41 insn per cycle + 0.798678690 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028807e+00 +Avg ME (F77/CUDA) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.430955e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.504273e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.504273e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.448983 sec - 15,409,189,933 cycles:u # 3.461 GHz - 45,453,870,566 instructions:u # 2.95 insn per cycle - 4.452178333 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.926195e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.975396e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.975396e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.545999 sec + 17,154,877,090 cycles # 3.091 GHz + 45,384,595,667 instructions # 2.65 insn per cycle + 5.551026412 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.303881e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.554553e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.554553e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.568460 sec - 8,853,028,358 cycles:u # 3.443 GHz - 27,825,917,583 instructions:u # 3.14 insn per cycle - 2.571602691 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.351243e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.515324e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.515324e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.231698 sec + 10,007,778,960 cycles # 3.093 GHz + 27,771,257,423 instructions # 2.77 insn per cycle + 3.236933218 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.074870e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.787571e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.787571e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.609723 sec - 5,376,226,080 cycles:u # 3.334 GHz - 12,550,768,124 instructions:u # 2.33 insn per cycle - 1.612901337 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.340358e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.757627e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.757627e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.065747 sec + 6,039,122,425 cycles # 2.917 GHz + 12,507,446,858 instructions # 2.07 insn per cycle + 2.070933576 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.560817e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.369436e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.369436e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.517357 sec - 5,062,020,112 cycles:u # 3.330 GHz - 11,896,187,886 instructions:u # 2.35 insn per cycle - 1.520597674 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.874280e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.375597e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.375597e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.886900 sec + 5,532,181,520 cycles # 2.925 GHz + 11,883,413,800 instructions # 2.15 insn per cycle + 1.892172826 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.683379e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.286974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.286974e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.697824 sec - 4,815,368,425 cycles:u # 2.831 GHz - 8,288,311,753 instructions:u # 1.72 insn per cycle - 1.701300285 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.697439e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.891301e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.891301e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.940719 sec + 5,705,029,690 cycles # 1.938 GHz + 8,291,496,940 instructions # 1.45 insn per cycle + 2.945995320 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 7237782240..bde7cbdb09 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -36,31 +36,60 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:42:56 +DATE: 2023-10-25_19:03:18 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.773797e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.294173e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.294173e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.785416 sec + 3,063,164,499 cycles # 3.000 GHz + 4,792,639,654 instructions # 1.56 insn per cycle + 1.079850324 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028807e+00 +Avg ME (F77/CUDA) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.422258e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.495267e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.495267e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.513529 sec - 15,562,098,392 cycles:u # 3.445 GHz - 45,512,672,442 instructions:u # 2.92 insn per cycle - 4.517738609 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.892855e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.940587e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.940587e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.719758 sec + 17,497,619,767 cycles # 3.056 GHz + 45,446,099,914 instructions # 2.60 insn per cycle + 5.726041885 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -71,23 +100,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.260972e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.503925e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.503925e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.660497 sec - 9,101,452,402 cycles:u # 3.416 GHz - 27,992,194,752 instructions:u # 3.08 insn per cycle - 2.664855606 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.325585e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.487595e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.487595e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.333037 sec + 10,354,211,298 cycles # 3.101 GHz + 27,955,092,209 instructions # 2.70 insn per cycle + 3.339341303 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -98,23 +127,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.937840e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.630556e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.630556e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.704387 sec - 5,636,463,258 cycles:u # 3.300 GHz - 12,820,595,692 instructions:u # 2.27 insn per cycle - 1.708515081 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.229345e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.633133e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.633133e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.186507 sec + 6,395,865,888 cycles # 2.918 GHz + 12,794,721,791 instructions # 2.00 insn per cycle + 2.192626317 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -125,23 +154,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.379272e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.150299e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.150299e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.616624 sec - 5,338,130,644 cycles:u # 3.294 GHz - 12,166,018,598 instructions:u # 2.28 insn per cycle - 1.620766506 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.718956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.198053e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.198053e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.015312 sec + 5,880,292,764 cycles # 2.910 GHz + 12,172,549,562 instructions # 2.07 insn per cycle + 2.021489543 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -152,23 +181,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.563055e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.143112e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.143112e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.793756 sec - 5,078,809,580 cycles:u # 2.825 GHz - 8,515,208,662 instructions:u # 1.68 insn per cycle - 1.797847006 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.795700e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.000887e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.000887e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.946657 sec + 6,072,408,903 cycles # 2.057 GHz + 8,534,252,358 instructions # 1.41 insn per cycle + 2.952768361 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index de868da2b8..531c093860 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:47:33 +DATE: 2023-10-25_19:15:40 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.085961e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.172637e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270805e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 0.603556 sec + 2,510,793,087 cycles # 3.030 GHz + 3,673,074,819 instructions # 1.46 insn per cycle + 0.885869264 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028807e+00 +Avg ME (F77/CUDA) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.415561e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.488156e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.488156e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.938086e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.988073e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.988073e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.477845 sec - 15,510,356,917 cycles:u # 3.462 GHz - 45,453,872,830 instructions:u # 2.93 insn per cycle - 4.481018259 seconds time elapsed +TOTAL : 5.572089 sec + 17,339,920,977 cycles # 3.110 GHz + 45,401,031,280 instructions # 2.62 insn per cycle + 5.577099493 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.310045e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.559828e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.559828e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.366267e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.533818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.533818e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.565742 sec - 8,841,179,622 cycles:u # 3.443 GHz - 27,825,913,835 instructions:u # 3.15 insn per cycle - 2.568838506 seconds time elapsed +TOTAL : 3.275090 sec + 10,190,622,487 cycles # 3.108 GHz + 27,770,717,333 instructions # 2.73 insn per cycle + 3.280028443 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.087497e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.812354e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.812354e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.358680e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.785880e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.785880e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.607083 sec - 5,368,175,734 cycles:u # 3.335 GHz - 12,550,768,980 instructions:u # 2.34 insn per cycle - 1.610189658 seconds time elapsed +TOTAL : 2.118513 sec + 6,227,899,517 cycles # 2.934 GHz + 12,490,736,505 instructions # 2.01 insn per cycle + 2.123796775 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.536057e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.337529e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.337529e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.791831e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.300158e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.300158e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.521050 sec - 5,076,727,472 cycles:u # 3.332 GHz - 11,896,188,861 instructions:u # 2.34 insn per cycle - 1.524133393 seconds time elapsed +TOTAL : 1.975789 sec + 5,740,046,861 cycles # 2.900 GHz + 11,834,515,828 instructions # 2.06 insn per cycle + 1.980847261 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.696929e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.301242e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.301242e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.818538e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.030811e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.030811e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.695428 sec - 4,808,224,755 cycles:u # 2.831 GHz - 8,288,307,748 instructions:u # 1.72 insn per cycle - 1.698538765 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) +TOTAL : 2.912399 sec + 5,874,294,388 cycles # 2.014 GHz + 8,239,488,482 instructions # 1.40 insn per cycle + 2.917538602 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index f86eae16b9..ee80d49776 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -36,21 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:46:46 +DATE: 2023-10-25_19:12:36 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --curhst OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.085112e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169879e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269257e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.542204 sec + 2,298,212,603 cycles # 3.002 GHz + 3,616,714,256 instructions # 1.57 insn per cycle + 0.823159797 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028807e+00 +Avg ME (F77/CUDA) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe: Aborted - 2,192,346 cycles:u # 1.444 GHz - 3,107,719 instructions:u # 1.42 insn per cycle - 0.107597341 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.902640e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.950734e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.950734e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.613770 sec + 17,167,591,716 cycles # 3.056 GHz + 45,385,422,779 instructions # 2.64 insn per cycle + 5.618850316 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -60,14 +90,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe: Aborted - 2,118,021 cycles:u # 1.381 GHz - 3,111,083 instructions:u # 1.47 insn per cycle - 0.118204330 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.356859e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.522784e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.522784e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.227055 sec + 10,007,434,150 cycles # 3.097 GHz + 27,771,321,943 instructions # 2.78 insn per cycle + 3.232084319 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -77,14 +116,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe: Aborted - 2,004,671 cycles:u # 1.319 GHz - 3,111,988 instructions:u # 1.55 insn per cycle - 0.105862672 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.339247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.757472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.757472e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.067924 sec + 6,064,030,334 cycles # 2.927 GHz + 12,508,006,764 instructions # 2.06 insn per cycle + 2.072907317 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -94,14 +142,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe: Aborted - 1,925,242 cycles:u # 1.333 GHz - 3,112,848 instructions:u # 1.62 insn per cycle - 0.108456654 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.874688e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.380385e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.380385e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.887776 sec + 5,540,691,746 cycles # 2.929 GHz + 11,883,645,896 instructions # 2.14 insn per cycle + 1.892908998 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -111,14 +168,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe: Aborted - 1,927,580 cycles:u # 1.304 GHz - 3,112,262 instructions:u # 1.61 insn per cycle - 0.105664290 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.817386e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.022798e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.022798e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.850472 sec + 5,706,017,461 cycles # 1.999 GHz + 8,290,142,366 instructions # 1.45 insn per cycle + 2.855563486 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index cd7befa7e8..1b9c9ee7df 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -36,30 +36,53 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:45:40 +DATE: 2023-10-25_19:09:35 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.145651e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.173920e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273175e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.691895 sec + 2,781,064,477 cycles # 3.018 GHz + 4,402,279,507 instructions # 1.58 insn per cycle + 0.980225226 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028807e+00 +Avg ME (F77/CUDA) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.413682e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.487845e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.487845e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.480388 sec - 15,521,061,945 cycles:u # 3.462 GHz - 45,453,872,331 instructions:u # 2.93 insn per cycle - 4.483617521 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.929230e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.978722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.978722e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.538774 sec + 17,159,821,844 cycles # 3.096 GHz + 45,385,185,100 instructions # 2.64 insn per cycle + 5.543842374 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +92,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.312494e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.561007e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.561007e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.564128 sec - 8,837,322,306 cycles:u # 3.443 GHz - 27,825,917,366 instructions:u # 3.15 insn per cycle - 2.567231215 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.287540e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.449504e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.449504e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.292731 sec + 10,016,590,673 cycles # 3.038 GHz + 27,771,485,458 instructions # 2.77 insn per cycle + 3.297740861 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +118,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.108388e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.828424e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.828424e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.602773 sec - 5,351,831,079 cycles:u # 3.333 GHz - 12,550,772,615 instructions:u # 2.35 insn per cycle - 1.605940866 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.325678e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.741826e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.741826e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.072513 sec + 6,058,236,283 cycles # 2.917 GHz + 12,507,380,733 instructions # 2.06 insn per cycle + 2.077678265 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +144,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.550944e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.358867e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.358867e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.518337 sec - 5,059,813,173 cycles:u # 3.326 GHz - 11,896,190,671 instructions:u # 2.35 insn per cycle - 1.521540673 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.871793e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.376853e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.376853e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.888593 sec + 5,531,298,122 cycles # 2.922 GHz + 11,883,369,769 instructions # 2.15 insn per cycle + 1.893708699 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +170,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.689284e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.294108e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.294108e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.697971 sec - 4,812,488,030 cycles:u # 2.829 GHz - 8,288,308,200 instructions:u # 1.72 insn per cycle - 1.701195822 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1797) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.825270e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.031884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.031884e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.844855 sec + 5,686,333,748 cycles # 1.997 GHz + 8,290,317,138 instructions # 1.46 insn per cycle + 2.849943056 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index a5c9453825..11300e6895 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:19:45 +DATE: 2023-10-25_18:34:57 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.181607e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.171006e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.264343e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.507998 sec + 2,247,373,928 cycles # 3.001 GHz + 3,245,400,581 instructions # 1.44 insn per cycle + 0.806174140 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028807e+00 +Avg ME (F77/CUDA) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.476229e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.552300e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.552300e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.370568 sec - 15,136,818,641 cycles:u # 3.461 GHz - 44,446,477,812 instructions:u # 2.94 insn per cycle - 4.373795655 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.976266e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.029293e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.029293e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.409225 sec + 16,681,628,869 cycles # 3.082 GHz + 44,378,235,380 instructions # 2.66 insn per cycle + 5.414507921 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 576) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.436275e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.714377e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.714377e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.497946 sec - 8,541,476,152 cycles:u # 3.415 GHz - 26,675,915,603 instructions:u # 3.12 insn per cycle - 2.501489423 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.514456e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.696441e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.696441e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.086332 sec + 9,510,708,832 cycles # 3.078 GHz + 26,620,808,250 instructions # 2.80 insn per cycle + 3.091424217 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2339) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.405128e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.981595e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.981595e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.764525 sec - 5,900,836,520 cycles:u # 3.339 GHz - 14,099,110,491 instructions:u # 2.39 insn per cycle - 1.767682384 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.735122e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.074181e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.074181e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.319139 sec + 6,584,764,475 cycles # 2.834 GHz + 14,057,249,658 instructions # 2.13 insn per cycle + 2.324235278 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2753) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.693845e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.323982e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.323982e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.697866 sec - 5,671,023,494 cycles:u # 3.335 GHz - 13,586,138,958 instructions:u # 2.40 insn per cycle - 1.701096581 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.117258e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.494373e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.494373e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.150419 sec + 6,273,792,377 cycles # 2.911 GHz + 13,574,431,184 instructions # 2.16 insn per cycle + 2.155695099 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2405) (512y: 296) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.602718e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.194142e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.194142e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.718156 sec - 4,868,702,807 cycles:u # 2.829 GHz - 10,081,719,325 instructions:u # 2.07 insn per cycle - 1.721394711 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.685746e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.878776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.878776e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.947596 sec + 5,924,739,734 cycles # 2.007 GHz + 10,074,038,054 instructions # 1.70 insn per cycle + 2.952990655 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1324) (512y: 208) (512z: 1980) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 283f3eb87c..72e4f7ff9f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:33:21 +DATE: 2023-10-25_18:53:45 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.139570e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.179113e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275835e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.507615 sec + 2,255,394,284 cycles # 3.018 GHz + 3,233,142,981 instructions # 1.43 insn per cycle + 0.804645133 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028807e+00 +Avg ME (F77/CUDA) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.175373e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.302363e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.302363e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.437181 sec - 11,884,986,629 cycles:u # 3.455 GHz - 34,405,895,161 instructions:u # 2.89 insn per cycle - 3.440255501 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 680) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.504791e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.591296e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.591296e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.290630 sec + 13,015,421,868 cycles # 3.031 GHz + 34,406,707,609 instructions # 2.64 insn per cycle + 4.295722046 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.697270e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.878577e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.878577e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.968633 sec - 10,248,083,842 cycles:u # 3.449 GHz - 24,077,637,055 instructions:u # 2.35 insn per cycle - 2.971828444 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.162383e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.308946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.308946e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.418889 sec + 10,608,875,216 cycles # 3.099 GHz + 24,023,081,327 instructions # 2.26 insn per cycle + 3.424022271 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.113927e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.642581e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.642581e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.843940 sec - 6,167,857,289 cycles:u # 3.340 GHz - 12,457,536,472 instructions:u # 2.02 insn per cycle - 1.847176990 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.707866e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.031974e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.031974e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.329926 sec + 6,692,684,984 cycles # 2.867 GHz + 12,415,083,748 instructions # 1.86 insn per cycle + 2.335033574 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3156) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.562158e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.158272e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.158272e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.727660 sec - 5,775,824,126 cycles:u # 3.338 GHz - 11,598,742,571 instructions:u # 2.01 insn per cycle - 1.730844187 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.126119e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.508462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.508462e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.148404 sec + 6,244,000,039 cycles # 2.900 GHz + 11,586,646,765 instructions # 1.86 insn per cycle + 2.153653502 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2692) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.715867e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.330159e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.330159e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.692034 sec - 4,795,227,737 cycles:u # 2.830 GHz - 9,318,641,605 instructions:u # 1.94 insn per cycle - 1.695200191 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.121660e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.365055e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.365055e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.647363 sec + 5,336,886,997 cycles # 2.013 GHz + 9,309,895,095 instructions # 1.74 insn per cycle + 2.652547834 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2116) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 9ef1271e0d..b8d2933568 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:33:35 +DATE: 2023-10-25_18:54:12 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.136826e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.174839e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270632e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.507009 sec + 2,214,441,092 cycles # 3.000 GHz + 3,174,554,342 instructions # 1.43 insn per cycle + 0.795728101 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028807e+00 +Avg ME (F77/CUDA) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.388519e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.532825e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.532825e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.230821 sec - 11,161,749,522 cycles:u # 3.452 GHz - 35,109,807,854 instructions:u # 3.15 insn per cycle - 3.234089061 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 456) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.647788e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.745237e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.745237e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.064620 sec + 12,384,265,473 cycles # 3.044 GHz + 35,059,405,316 instructions # 2.83 insn per cycle + 4.069669316 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.637200e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.812939e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.812939e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.016744 sec - 10,412,913,669 cycles:u # 3.449 GHz - 23,154,129,157 instructions:u # 2.22 insn per cycle - 3.019990954 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.138033e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.282936e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.282936e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.444702 sec + 10,688,625,544 cycles # 3.099 GHz + 23,099,820,217 instructions # 2.16 insn per cycle + 3.449755846 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.548140e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.157289e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.157289e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.730430 sec - 5,783,016,311 cycles:u # 3.337 GHz - 12,013,376,708 instructions:u # 2.08 insn per cycle - 1.733620799 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.237103e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.643499e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.643499e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.105153 sec + 6,160,181,225 cycles # 2.920 GHz + 11,969,984,936 instructions # 1.94 insn per cycle + 2.110284671 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2511) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.901259e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.569168e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.569168e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.649565 sec - 5,509,653,613 cycles:u # 3.334 GHz - 11,153,994,408 instructions:u # 2.02 insn per cycle - 1.652856459 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.378177e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.801357e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.801357e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.052827 sec + 6,017,899,320 cycles # 2.925 GHz + 11,142,057,093 instructions # 1.85 insn per cycle + 2.058039153 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 174) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.946866e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.604676e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.604676e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.639153 sec - 4,647,988,628 cycles:u # 2.831 GHz - 9,041,068,616 instructions:u # 1.95 insn per cycle - 1.642368229 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1566) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.233582e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.488303e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.488303e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.581242 sec + 5,224,244,555 cycles # 2.021 GHz + 9,033,433,625 instructions # 1.73 insn per cycle + 2.586440370 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index afff76e16b..24b477c6c2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:19:59 +DATE: 2023-10-25_18:35:25 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.085170e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.712610e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.977210e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.469467 sec + 2,068,301,293 cycles # 3.004 GHz + 3,012,364,622 instructions # 1.46 insn per cycle + 0.747476379 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028811e+00 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.690029e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.768739e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.768739e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.018311 sec - 13,934,247,146 cycles:u # 3.466 GHz - 45,275,831,077 instructions:u # 3.25 insn per cycle - 4.021247677 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.956122e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.010570e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.010570e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.445413 sec + 16,510,915,611 cycles # 3.030 GHz + 45,308,404,518 instructions # 2.74 insn per cycle + 5.450456954 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.035380e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.463877e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.463877e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.854868 sec - 6,394,320,124 cycles:u # 3.443 GHz - 17,637,598,795 instructions:u # 2.76 insn per cycle - 1.857812507 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.773825e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.129881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.129881e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.278501 sec + 7,055,633,229 cycles # 3.091 GHz + 17,671,724,757 instructions # 2.50 insn per cycle + 2.283347357 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.164960e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.322020e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.322020e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.012580 sec - 3,376,070,689 cycles:u # 3.326 GHz - 8,192,979,761 instructions:u # 2.43 insn per cycle - 1.015651914 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.823101e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.001328e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001328e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.269969 sec + 3,728,138,097 cycles # 2.926 GHz + 8,250,735,018 instructions # 2.21 insn per cycle + 1.274926428 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.224555e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.400507e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.400507e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 0.970770 sec - 3,236,501,289 cycles:u # 3.325 GHz - 7,771,054,439 instructions:u # 2.40 insn per cycle - 0.973791756 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.356043e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.069996e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.069996e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.201889 sec + 3,525,312,934 cycles # 2.923 GHz + 7,861,079,341 instructions # 2.23 insn per cycle + 1.206782783 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.192270e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.359162e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.359162e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 0.993442 sec - 2,851,654,073 cycles:u # 2.863 GHz - 5,993,393,284 instructions:u # 2.10 insn per cycle - 0.996475502 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.081368e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.847086e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.847086e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.564981 sec + 3,252,144,204 cycles # 2.073 GHz + 6,095,772,749 instructions # 1.87 insn per cycle + 1.569858235 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index fbd40bbc03..c228b2c37b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -36,31 +36,60 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:43:11 +DATE: 2023-10-25_19:03:47 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.513457e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.340252e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.340252e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.650789 sec + 2,641,546,515 cycles # 3.016 GHz + 4,117,903,371 instructions # 1.56 insn per cycle + 0.935043223 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028811e+00 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.685476e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.763690e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.763690e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.049978 sec - 14,009,689,743 cycles:u # 3.456 GHz - 45,309,289,127 instructions:u # 3.23 insn per cycle - 4.053957938 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.972248e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.026239e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.026239e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.440996 sec + 16,702,681,594 cycles # 3.067 GHz + 45,351,045,297 instructions # 2.72 insn per cycle + 5.446683603 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -71,23 +100,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.034966e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.463217e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.463217e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.892109 sec - 6,484,498,984 cycles:u # 3.422 GHz - 17,904,363,518 instructions:u # 2.76 insn per cycle - 1.895767588 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.605636e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.935791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.935791e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.405458 sec + 7,261,686,615 cycles # 3.014 GHz + 17,953,553,750 instructions # 2.47 insn per cycle + 2.411441099 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -98,23 +127,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.150200e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.303661e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.303661e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.060115 sec - 3,504,074,762 cycles:u # 3.295 GHz - 8,416,815,935 instructions:u # 2.40 insn per cycle - 1.063970256 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.560330e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.721128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.721128e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.356872 sec + 3,928,188,681 cycles # 2.884 GHz + 8,488,830,304 instructions # 2.16 insn per cycle + 1.362856063 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -125,23 +154,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.202115e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.371685e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.371685e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.023905 sec - 3,381,091,057 cycles:u # 3.291 GHz - 7,994,892,117 instructions:u # 2.36 insn per cycle - 1.027748594 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.116761e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.040482e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.040482e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.278534 sec + 3,740,578,395 cycles # 2.919 GHz + 8,100,523,605 instructions # 2.17 insn per cycle + 1.284258782 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -152,23 +181,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.170635e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.331138e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.331138e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.047330 sec - 2,998,363,263 cycles:u # 2.854 GHz - 6,234,599,723 instructions:u # 2.08 insn per cycle - 1.051142008 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.953315e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.671270e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.671270e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.636801 sec + 3,469,634,780 cycles # 2.114 GHz + 6,351,136,410 instructions # 1.83 insn per cycle + 1.642694122 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 20f261936e..5dc74dfed7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:47:47 +DATE: 2023-10-25_19:16:08 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.063632e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.693440e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.968571e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 +TOTAL : 0.553622 sec + 2,317,649,288 cycles # 3.011 GHz + 3,439,584,300 instructions # 1.48 insn per cycle + 0.828678237 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028811e+00 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.690186e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.768818e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.768818e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.989342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.044248e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.044248e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.017811 sec - 13,932,869,761 cycles:u # 3.466 GHz - 45,275,831,531 instructions:u # 3.25 insn per cycle - 4.020754686 seconds time elapsed +TOTAL : 5.407392 sec + 16,682,462,365 cycles # 3.083 GHz + 45,337,082,640 instructions # 2.72 insn per cycle + 5.412277054 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.076713e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.512062e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.512062e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.799956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.158589e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.158589e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.844577 sec - 6,351,344,975 cycles:u # 3.438 GHz - 17,637,598,874 instructions:u # 2.78 insn per cycle - 1.847667927 seconds time elapsed +TOTAL : 2.320759 sec + 7,223,788,046 cycles # 3.108 GHz + 17,685,035,831 instructions # 2.45 insn per cycle + 2.325560432 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.167837e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.326502e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326502e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.822474e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.004978e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.004978e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.011648 sec - 3,368,215,541 cycles:u # 3.324 GHz - 8,192,980,425 instructions:u # 2.43 insn per cycle - 1.014577547 seconds time elapsed +TOTAL : 1.324170 sec + 3,914,841,287 cycles # 2.948 GHz + 8,235,477,108 instructions # 2.10 insn per cycle + 1.328966517 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.225145e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.401523e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.401523e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.391242e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.078618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078618e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 0.971297 sec - 3,236,208,077 cycles:u # 3.323 GHz - 7,771,055,176 instructions:u # 2.40 insn per cycle - 0.974309255 seconds time elapsed +TOTAL : 1.253021 sec + 3,707,252,167 cycles # 2.949 GHz + 7,811,825,096 instructions # 2.11 insn per cycle + 1.257855017 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.195467e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.362661e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.362661e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.106245e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.850378e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.850378e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 0.991718 sec - 2,846,173,659 cycles:u # 2.862 GHz - 5,993,394,886 instructions:u # 2.11 insn per cycle - 0.994890275 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) +TOTAL : 1.611554 sec + 3,420,500,042 cycles # 2.117 GHz + 6,046,541,541 instructions # 1.77 insn per cycle + 1.616370057 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index 1e46c6ef40..7b90f03855 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -36,21 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:46:49 +DATE: 2023-10-25_19:13:04 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --curhst OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.065322e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.700283e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.974533e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.498915 sec + 2,192,440,414 cycles # 3.004 GHz + 3,416,060,899 instructions # 1.56 insn per cycle + 0.787716102 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028811e+00 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe: Aborted - 2,256,667 cycles:u # 1.457 GHz - 3,112,424 instructions:u # 1.38 insn per cycle - 0.113157713 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.966466e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.020648e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.020648e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.415648 sec + 16,529,341,853 cycles # 3.050 GHz + 45,309,866,535 instructions # 2.74 insn per cycle + 5.420402206 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -60,14 +90,23 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe: Aborted - 2,089,876 cycles:u # 1.363 GHz - 3,116,091 instructions:u # 1.49 insn per cycle - 0.110633377 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.693978e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.035776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.035776e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.316382 sec + 7,054,105,370 cycles # 3.040 GHz + 17,671,721,016 instructions # 2.51 insn per cycle + 2.321167806 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -77,14 +116,23 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe: Aborted - 1,922,821 cycles:u # 1.310 GHz - 3,116,914 instructions:u # 1.62 insn per cycle - 0.115774978 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.830241e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.005280e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005280e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.269396 sec + 3,738,194,435 cycles # 2.936 GHz + 8,251,074,147 instructions # 2.21 insn per cycle + 1.274249735 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -94,14 +142,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe: Aborted - 2,134,397 cycles:u # 1.376 GHz - 3,116,527 instructions:u # 1.46 insn per cycle - 0.119358887 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.392639e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.077705e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.077705e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.198333 sec + 3,534,628,897 cycles # 2.940 GHz + 7,862,127,936 instructions # 2.22 insn per cycle + 1.203142647 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -111,14 +168,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe: Aborted - 1,911,214 cycles:u # 1.359 GHz - 3,116,839 instructions:u # 1.63 insn per cycle - 0.119822951 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.097193e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.836994e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.836994e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.558825 sec + 3,254,164,123 cycles # 2.082 GHz + 6,095,387,295 instructions # 1.87 insn per cycle + 1.563579525 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 51cad3bfd5..fdd315eb16 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -36,30 +36,53 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:45:55 +DATE: 2023-10-25_19:10:03 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.170150e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.660447e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.933538e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.602645 sec + 2,479,148,123 cycles # 2.998 GHz + 3,864,785,421 instructions # 1.56 insn per cycle + 0.885563297 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028811e+00 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.690132e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.768774e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.768774e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.017782 sec - 13,937,233,338 cycles:u # 3.467 GHz - 45,275,831,337 instructions:u # 3.25 insn per cycle - 4.020843807 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.957679e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.011741e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.011741e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.439322 sec + 16,519,636,229 cycles # 3.035 GHz + 45,307,914,586 instructions # 2.74 insn per cycle + 5.444150127 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +92,23 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.090999e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.526280e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.526280e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.839059 sec - 6,339,758,318 cycles:u # 3.443 GHz - 17,637,599,906 instructions:u # 2.78 insn per cycle - 1.842053865 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.773026e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.136685e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.136685e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.280272 sec + 7,068,660,475 cycles # 3.094 GHz + 17,671,452,966 instructions # 2.50 insn per cycle + 2.285202578 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +118,23 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.166572e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.328414e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.328414e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.011768 sec - 3,373,820,227 cycles:u # 3.326 GHz - 8,192,980,180 instructions:u # 2.43 insn per cycle - 1.014803347 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.805997e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.992625e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.992625e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.272643 sec + 3,731,426,488 cycles # 2.926 GHz + 8,249,195,685 instructions # 2.21 insn per cycle + 1.277241448 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +144,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.227633e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.404021e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.404021e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 0.969289 sec - 3,230,822,555 cycles:u # 3.324 GHz - 7,771,053,918 instructions:u # 2.41 insn per cycle - 0.972355237 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.399444e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.076890e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.076890e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.197731 sec + 3,530,731,182 cycles # 2.938 GHz + 7,860,812,005 instructions # 2.23 insn per cycle + 1.202556944 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +170,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.189028e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.358334e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.358334e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 0.996460 sec - 2,860,549,247 cycles:u # 2.863 GHz - 5,993,395,533 instructions:u # 2.10 insn per cycle - 0.999433875 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2367) (512y: 24) (512z: 2156) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.091593e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.835046e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.835046e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.560736 sec + 3,257,981,590 cycles # 2.083 GHz + 6,095,878,647 instructions # 1.87 insn per cycle + 1.565536774 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index d0560c7753..663a41142c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:20:10 +DATE: 2023-10-25_18:35:49 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.096390e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.766631e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.047368e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.467013 sec + 2,056,734,142 cycles # 2.997 GHz + 2,992,962,147 instructions # 1.46 insn per cycle + 0.744434014 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028811e+00 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.744316e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.826216e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.826216e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.941233 sec - 13,671,955,127 cycles:u # 3.467 GHz - 44,452,988,618 instructions:u # 3.25 insn per cycle - 3.944344997 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.031007e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.087778e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.087778e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.245108 sec + 16,244,805,475 cycles # 3.095 GHz + 44,484,348,190 instructions # 2.74 insn per cycle + 5.249986656 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 576) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 6.443528218283898e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.366823e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.013794e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.013794e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.540894 sec - 5,300,179,865 cycles:u # 3.434 GHz - 16,936,396,176 instructions:u # 3.20 insn per cycle - 1.543868299 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.358927e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.815806e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.815806e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.039403 sec + 6,083,169,654 cycles # 2.982 GHz + 16,972,342,736 instructions # 2.79 insn per cycle + 2.044363213 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2881) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.737785e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.599720e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.599720e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.312380 sec - 4,392,970,186 cycles:u # 3.341 GHz - 10,155,452,879 instructions:u # 2.31 insn per cycle - 1.315393351 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.400912e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.009313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.009313e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.720669 sec + 5,008,260,515 cycles # 2.904 GHz + 10,214,809,232 instructions # 2.04 insn per cycle + 1.725527481 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.830088e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.708995e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.708995e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.302046 sec - 4,359,069,122 cycles:u # 3.341 GHz - 9,845,204,780 instructions:u # 2.26 insn per cycle - 1.305123490 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.537295e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.168989e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.168989e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.684974 sec + 4,937,248,513 cycles # 2.923 GHz + 9,938,060,774 instructions # 2.01 insn per cycle + 1.689989340 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3789) (512y: 2) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.229108e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.998758e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.998758e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.388608 sec - 3,950,373,718 cycles:u # 2.839 GHz - 8,348,834,891 instructions:u # 2.11 insn per cycle - 1.391676114 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.077645e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.456866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.456866e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.147132 sec + 4,363,010,014 cycles # 2.028 GHz + 8,442,845,303 instructions # 1.94 insn per cycle + 2.152072523 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2882) (512y: 4) (512z: 2751) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index acbb85ecfe..d0aa02b37a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:33:49 +DATE: 2023-10-25_18:54:38 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.072434e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.686668e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.952500e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.467510 sec + 2,069,160,049 cycles # 3.005 GHz + 2,965,842,897 instructions # 1.43 insn per cycle + 0.745986723 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028811e+00 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.320694e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.441368e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.441368e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.278201 sec - 11,344,861,246 cycles:u # 3.458 GHz - 34,349,491,078 instructions:u # 3.03 insn per cycle - 3.281385138 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.486168e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.574733e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.574733e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.300765 sec + 12,613,573,541 cycles # 2.930 GHz + 34,394,223,521 instructions # 2.73 insn per cycle + 4.305708849 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.028214e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.614990e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.614990e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.610057 sec - 5,538,663,516 cycles:u # 3.434 GHz - 14,842,604,118 instructions:u # 2.68 insn per cycle - 1.613149254 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.423404e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.902037e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.902037e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.018730 sec + 6,098,231,874 cycles # 3.015 GHz + 14,875,099,697 instructions # 2.44 insn per cycle + 2.023701584 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 1.8746278463897685e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.416844e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.042848e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.042848e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.226475 sec - 4,099,723,599 cycles:u # 3.335 GHz - 8,983,212,555 instructions:u # 2.19 insn per cycle - 1.229512528 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.288210e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.081005e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.081005e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.521473 sec + 4,271,996,539 cycles # 2.800 GHz + 9,042,309,170 instructions # 2.12 insn per cycle + 1.526427437 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4445) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.664502e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.072237e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.072237e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.202455 sec - 4,012,953,236 cycles:u # 3.334 GHz - 8,585,269,980 instructions:u # 2.14 insn per cycle - 1.205618514 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.762492e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.667202e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.667202e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.433726 sec + 4,213,011,776 cycles # 2.930 GHz + 8,676,320,241 instructions # 2.06 insn per cycle + 1.438715591 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.197420e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.016756e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.016756e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.254725 sec - 3,577,442,121 cycles:u # 2.845 GHz - 7,727,026,122 instructions:u # 2.16 insn per cycle - 1.257759402 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.878645e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.382089e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.382089e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.866240 sec + 3,836,736,420 cycles # 2.052 GHz + 7,820,066,058 instructions # 2.04 insn per cycle + 1.871114134 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 0) (512z: 2556) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index ca3ab1b14a..de54279b1b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:34:00 +DATE: 2023-10-25_18:55:01 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.082452e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.759103e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.038632e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.468671 sec + 2,051,049,137 cycles # 2.975 GHz + 2,933,032,180 instructions # 1.43 insn per cycle + 0.745977930 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028811e+00 +Avg ME (F77/CUDA) = 2.0288499749731272 +Relative difference = 1.9210746159747678e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.559167e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.698111e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.698111e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.066504 sec - 10,619,079,620 cycles:u # 3.460 GHz - 35,086,958,769 instructions:u # 3.30 insn per cycle - 3.069755465 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 471) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.728308e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.832886e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.832886e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 3.926359 sec + 11,754,711,596 cycles # 2.991 GHz + 35,130,335,361 instructions # 2.99 insn per cycle + 3.931207276 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.310334e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.946615e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.946615e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 1.551856 sec - 5,338,107,838 cycles:u # 3.434 GHz - 14,452,486,870 instructions:u # 2.71 insn per cycle - 1.554878279 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.711585e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.225013e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.225013e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 1.918624 sec + 5,946,615,728 cycles # 3.093 GHz + 14,483,958,293 instructions # 2.44 insn per cycle + 1.923457186 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.030423e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.152738e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.152738e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.130833 sec - 3,775,658,954 cycles:u # 3.331 GHz - 8,802,115,602 instructions:u # 2.33 insn per cycle - 1.133928357 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3563) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.855810e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.786782e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.786782e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.419134 sec + 4,148,821,411 cycles # 2.915 GHz + 8,888,021,481 instructions # 2.14 insn per cycle + 1.424042048 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3576) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.649559e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.070576e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.070576e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.201389 sec - 4,017,585,046 cycles:u # 3.337 GHz - 8,333,398,185 instructions:u # 2.07 insn per cycle - 1.204453170 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.911621e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.860580e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.860580e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.406850 sec + 4,137,327,451 cycles # 2.932 GHz + 8,424,234,551 instructions # 2.04 insn per cycle + 1.411791633 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3320) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.359258e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.036707e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.036707e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.235370 sec - 3,525,732,454 cycles:u # 2.848 GHz - 7,620,121,471 instructions:u # 2.16 insn per cycle - 1.238483305 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.947023e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.462476e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.462476e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.845349 sec + 3,784,294,920 cycles # 2.046 GHz + 7,713,085,184 instructions # 2.04 insn per cycle + 1.850240418 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3436) (512y: 0) (512z: 2108) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 0a44353603..e5b5571dad 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:20:22 +DATE: 2023-10-25_18:36:14 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.194168e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.177440e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.271526e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.510074 sec + 2,249,628,773 cycles # 3.005 GHz + 3,213,054,699 instructions # 1.43 insn per cycle + 0.807578099 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028807e+00 +Avg ME (F77/CUDA) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.391314e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.462263e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.462263e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.523260 sec - 15,663,125,785 cycles:u # 3.462 GHz - 45,621,646,860 instructions:u # 2.91 insn per cycle - 4.526424672 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.906485e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.954717e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.954717e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.603351 sec + 17,321,977,891 cycles # 3.090 GHz + 45,555,371,368 instructions # 2.63 insn per cycle + 5.608662187 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.317442e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.568260e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.568260e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.561526 sec - 8,828,231,399 cycles:u # 3.443 GHz - 27,584,476,226 instructions:u # 3.12 insn per cycle - 2.564640940 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.376355e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.544078e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.544078e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.208637 sec + 9,923,474,484 cycles # 3.089 GHz + 27,529,097,588 instructions # 2.77 insn per cycle + 3.213810203 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.146962e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.885928e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.885928e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.595253 sec - 5,326,841,835 cycles:u # 3.334 GHz - 12,464,651,168 instructions:u # 2.34 insn per cycle - 1.598410034 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.199043e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.595306e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.595306e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.118667 sec + 5,989,500,217 cycles # 2.821 GHz + 12,420,938,473 instructions # 2.07 insn per cycle + 2.123732572 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2753) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.565952e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.372890e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.372890e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.515630 sec - 5,057,629,411 cycles:u # 3.331 GHz - 11,816,622,903 instructions:u # 2.34 insn per cycle - 1.518780363 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.938752e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.452534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.452534e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.867638 sec + 5,462,355,916 cycles # 2.919 GHz + 11,803,822,809 instructions # 2.16 insn per cycle + 1.872842798 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2503) (512y: 146) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.738673e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.362118e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.362118e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.686665 sec - 4,783,728,106 cycles:u # 2.832 GHz - 8,082,127,517 instructions:u # 1.69 insn per cycle - 1.689804959 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1655) (512y: 126) (512z: 1854) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.872672e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.090666e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.090666e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.812736 sec + 5,598,441,704 cycles # 1.988 GHz + 8,083,507,451 instructions # 1.44 insn per cycle + 2.817822099 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1660) (512y: 126) (512z: 1854) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index c8b63d4082..be7fa646e4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -36,30 +36,51 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:20:37 +DATE: 2023-10-25_18:36:41 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.207178e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.183934e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279144e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.508835 sec + 2,247,853,776 cycles # 3.008 GHz + 3,199,999,470 instructions # 1.42 insn per cycle + 0.806333964 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 2.028807e+00 +Avg ME (F77/CUDA) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.452190e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.528965e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.528965e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.412606 sec - 15,282,987,184 cycles:u # 3.461 GHz - 44,614,253,384 instructions:u # 2.92 insn per cycle - 4.415862858 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.962576e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.013722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.013722e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.445467 sec + 16,864,922,350 cycles # 3.095 GHz + 44,544,928,625 instructions # 2.64 insn per cycle + 5.450679101 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +90,23 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.463995e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.731689e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.731689e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.481891 sec - 8,549,716,561 cycles:u # 3.441 GHz - 26,228,951,283 instructions:u # 3.07 insn per cycle - 2.485008452 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.463332e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.638084e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.638084e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.129878 sec + 9,472,664,981 cycles # 3.022 GHz + 26,172,690,479 instructions # 2.76 insn per cycle + 3.134859663 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2397) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +116,23 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.358020e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.928350e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.928350e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.778069 sec - 5,943,404,216 cycles:u # 3.337 GHz - 14,010,901,248 instructions:u # 2.36 insn per cycle - 1.781311393 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2876) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.769147e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.097427e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.097427e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.300686 sec + 6,708,376,683 cycles # 2.910 GHz + 13,967,973,168 instructions # 2.08 insn per cycle + 2.306085049 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2875) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +142,23 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.603546e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.207873e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.207873e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.717139 sec - 5,740,584,949 cycles:u # 3.338 GHz - 13,416,395,069 instructions:u # 2.34 insn per cycle - 1.720286760 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2516) (512y: 302) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.897600e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.248967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.248967e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.245188 sec + 6,373,380,149 cycles # 2.833 GHz + 13,408,335,115 instructions # 2.10 insn per cycle + 2.250462198 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2517) (512y: 302) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +168,23 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.005999e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.675205e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.675205e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.626320 sec - 4,611,437,808 cycles:u # 2.830 GHz - 9,189,208,901 instructions:u # 1.99 insn per cycle - 1.629854221 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.921480e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.136739e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.136739e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.776161 sec + 5,570,521,591 cycles # 2.004 GHz + 9,179,596,120 instructions # 1.65 insn per cycle + 2.781332851 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1439) (512y: 212) (512z: 2053) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 7987be9602..ebc965cc92 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:20:51 +DATE: 2023-10-25_18:37:09 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.017326e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.054319e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.066811e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.454634 sec + 1,998,901,545 cycles # 2.995 GHz + 2,904,650,993 instructions # 1.45 insn per cycle + 0.724253457 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.124031e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322289e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.333522e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.587604 sec + 2,471,633,267 cycles # 3.015 GHz + 3,730,138,752 instructions # 1.51 insn per cycle + 0.878825359 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.413122e+00 +Avg ME (F77/CUDA) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.240138e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.260111e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.260111e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.082762 sec - 17,702,531,037 cycles:u # 3.482 GHz - 58,956,924,456 instructions:u # 3.33 insn per cycle - 5.085034727 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.582748e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.595260e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.595260e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.365564 sec + 19,741,191,972 cycles # 3.100 GHz + 58,964,992,174 instructions # 2.99 insn per cycle + 6.369485962 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1189) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.124708e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.194947e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.194947e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.699702 sec - 9,406,628,400 cycles:u # 3.482 GHz - 30,994,148,991 instructions:u # 3.29 insn per cycle - 2.701936607 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.832276e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.875225e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.875225e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.412509 sec + 10,571,839,132 cycles # 3.095 GHz + 30,995,598,646 instructions # 2.93 insn per cycle + 3.416791050 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5217) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.277320e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.307020e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.307020e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.307271 sec - 4,422,416,567 cycles:u # 3.379 GHz - 11,308,048,756 instructions:u # 2.56 insn per cycle - 1.309311554 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.671174e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.843577e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.843577e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.715587 sec + 4,991,139,375 cycles # 2.903 GHz + 11,305,706,976 instructions # 2.27 insn per cycle + 1.719836361 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.411319e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.447250e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.447250e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.185443 sec - 4,012,065,391 cycles:u # 3.380 GHz - 10,479,719,967 instructions:u # 2.61 insn per cycle - 1.187626216 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.100643e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.122719e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.122719e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.509863 sec + 4,400,565,724 cycles # 2.908 GHz + 10,484,557,861 instructions # 2.38 insn per cycle + 1.513887056 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4296) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.593142e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.639018e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.639018e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.054999 sec - 2,965,259,928 cycles:u # 2.806 GHz - 5,897,351,428 instructions:u # 1.99 insn per cycle - 1.057037566 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1544) (512y: 95) (512z: 3573) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.479826e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.587663e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.587663e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.214647 sec + 4,100,640,054 cycles # 1.849 GHz + 5,907,026,834 instructions # 1.44 insn per cycle + 2.218934371 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1551) (512y: 95) (512z: 3573) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index f6fba56746..84eb682463 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -36,31 +36,77 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:43:22 +DATE: 2023-10-25_19:04:11 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.737533e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.009392e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.009392e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.482468 sec + 2,092,072,357 cycles # 3.017 GHz + 3,191,388,192 instructions # 1.53 insn per cycle + 0.750549951 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.824576e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.948428e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.948428e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.804158 sec + 3,165,815,286 cycles # 3.022 GHz + 5,087,211,394 instructions # 1.61 insn per cycle + 1.108277579 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.413122e+00 +Avg ME (F77/CUDA) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.240242e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.260203e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.260203e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.086432 sec - 17,723,527,014 cycles:u # 3.483 GHz - 58,965,934,898 instructions:u # 3.33 insn per cycle - 5.088899080 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.552798e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.565584e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.565584e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.447784 sec + 19,812,659,516 cycles # 3.072 GHz + 58,973,017,270 instructions # 2.98 insn per cycle + 6.451999180 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1189) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -71,23 +117,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.119391e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.188626e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.188626e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.706995 sec - 9,423,619,443 cycles:u # 3.479 GHz - 31,038,511,200 instructions:u # 3.29 insn per cycle - 2.709153553 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.843686e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.887129e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.887129e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.410617 sec + 10,606,495,630 cycles # 3.107 GHz + 31,045,364,778 instructions # 2.93 insn per cycle + 3.414818481 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5217) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -98,23 +144,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.274205e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.303591e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.303591e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.314617 sec - 4,445,320,775 cycles:u # 3.377 GHz - 11,353,445,840 instructions:u # 2.55 insn per cycle - 1.316821327 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.635348e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.810089e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.810089e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.728990 sec + 5,025,463,399 cycles # 2.900 GHz + 11,356,936,586 instructions # 2.26 insn per cycle + 1.733253508 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -125,23 +171,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.407034e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.442978e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.442978e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.193915 sec - 4,035,505,893 cycles:u # 3.375 GHz - 10,525,116,592 instructions:u # 2.61 insn per cycle - 1.196189763 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.091196e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.113656e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113656e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.530073 sec + 4,437,395,928 cycles # 2.894 GHz + 10,533,774,197 instructions # 2.37 insn per cycle + 1.534310467 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4296) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -152,23 +198,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.589793e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.635308e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.635308e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.061298 sec - 2,984,133,149 cycles:u # 2.807 GHz - 5,931,522,413 instructions:u # 1.99 insn per cycle - 1.063389582 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1544) (512y: 95) (512z: 3573) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.778593e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.893791e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.893791e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.137109 sec + 4,135,271,327 cycles # 1.933 GHz + 5,946,987,935 instructions # 1.44 insn per cycle + 2.141484524 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1551) (512y: 95) (512z: 3573) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index e5a4c81942..5da42e2dfc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:21:05 +DATE: 2023-10-25_18:37:38 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.980838e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.046634e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059100e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.459445 sec + 2,001,624,062 cycles # 2.971 GHz + 2,897,227,747 instructions # 1.45 insn per cycle + 0.730848040 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.119062e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.315389e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326570e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.584635 sec + 2,502,191,176 cycles # 3.029 GHz + 3,799,894,385 instructions # 1.52 insn per cycle + 0.885312342 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.413122e+00 +Avg ME (F77/CUDA) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.235368e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.255294e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.255294e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.090662 sec - 17,743,101,818 cycles:u # 3.485 GHz - 59,223,602,918 instructions:u # 3.34 insn per cycle - 5.092817303 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1314) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.578276e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.590963e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.590963e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.377287 sec + 19,776,032,592 cycles # 3.100 GHz + 59,242,647,666 instructions # 3.00 insn per cycle + 6.381344291 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1315) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.179120e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.249951e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.249951e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.675866 sec - 9,324,042,232 cycles:u # 3.483 GHz - 30,702,394,700 instructions:u # 3.29 insn per cycle - 2.677861609 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.838897e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.882635e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.882635e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.407218 sec + 10,428,150,513 cycles # 3.058 GHz + 30,703,821,983 instructions # 2.94 insn per cycle + 3.411368559 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5043) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.244479e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.272491e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.272491e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.340778 sec - 4,537,512,394 cycles:u # 3.380 GHz - 11,787,142,879 instructions:u # 2.60 insn per cycle - 1.342784392 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.472201e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.635749e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.635749e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.750914 sec + 5,109,907,048 cycles # 2.913 GHz + 11,785,108,632 instructions # 2.31 insn per cycle + 1.754997634 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4668) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.338943e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.371481e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.371481e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.248173 sec - 4,224,381,209 cycles:u # 3.380 GHz - 11,027,417,491 instructions:u # 2.61 insn per cycle - 1.250158596 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.023891e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.043074e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.043074e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.621556 sec + 4,691,054,117 cycles # 2.887 GHz + 11,032,599,545 instructions # 2.35 insn per cycle + 1.625732931 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4331) (512y: 245) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.589075e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.635288e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.635288e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.057347 sec - 2,971,264,960 cycles:u # 2.806 GHz - 6,175,004,911 instructions:u # 2.08 insn per cycle - 1.059302450 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1458) (512y: 139) (512z: 3673) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.596531e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.705229e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.705229e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.180528 sec + 4,124,129,963 cycles # 1.890 GHz + 6,174,744,538 instructions # 1.50 insn per cycle + 2.184771281 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1461) (512y: 139) (512z: 3675) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 6d002fc9e5..758058b159 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:21:19 +DATE: 2023-10-25_18:38:06 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.611847e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.385260e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.471189e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.437775 sec + 1,949,752,289 cycles # 2.999 GHz + 2,775,143,872 instructions # 1.42 insn per cycle + 0.707448913 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.418789e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.455087e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.521596e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 +TOTAL : 0.482861 sec + 2,123,251,884 cycles # 3.013 GHz + 3,090,198,407 instructions # 1.46 insn per cycle + 0.761733855 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.412608e+00 +Avg ME (F77/CUDA) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.297056e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.314416e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.314416e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724761e+02 +- 2.665338e+02 ) GeV^-2 -TOTAL : 4.993685 sec - 17,409,943,416 cycles:u # 3.485 GHz - 59,434,839,863 instructions:u # 3.41 insn per cycle - 4.995701548 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.632015e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.645099e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.645099e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.245504 sec + 19,420,521,245 cycles # 3.108 GHz + 59,463,843,270 instructions # 3.06 insn per cycle + 6.249442801 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 961) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.1728426918172542e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.094212e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.113675e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.113675e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724754e+02 +- 2.665334e+02 ) GeV^-2 -TOTAL : 1.520331 sec - 5,295,494,774 cycles:u # 3.480 GHz - 16,897,912,284 instructions:u # 3.19 insn per cycle - 1.522266871 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5857) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.406220e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.547669e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.547669e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.968760 sec + 5,998,257,000 cycles # 3.042 GHz + 16,914,468,455 instructions # 2.82 insn per cycle + 1.972914932 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5858) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.452720e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.551675e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.551675e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.692108 sec - 2,341,513,988 cycles:u # 3.376 GHz - 6,121,248,476 instructions:u # 2.61 insn per cycle - 0.694004634 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.859553e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.925073e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.925073e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.900948 sec + 2,632,220,925 cycles # 2.911 GHz + 6,140,096,248 instructions # 2.33 insn per cycle + 0.904996982 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5019) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.710393e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.822562e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.822562e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.629663 sec - 2,122,516,384 cycles:u # 3.370 GHz - 5,675,155,914 instructions:u # 2.67 insn per cycle - 0.631514063 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.072824e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.155063e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.155063e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.809948 sec + 2,370,894,209 cycles # 2.915 GHz + 5,701,521,318 instructions # 2.40 insn per cycle + 0.814071799 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4804) (512y: 36) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.268970e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.437003e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.437003e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 -TOTAL : 0.526047 sec - 1,487,926,291 cycles:u # 2.820 GHz - 3,336,026,026 instructions:u # 2.24 insn per cycle - 0.527951018 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2130) (512y: 40) (512z: 3776) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.607455e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.657302e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.657302e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.040929 sec + 2,057,132,306 cycles # 1.970 GHz + 3,365,579,683 instructions # 1.64 insn per cycle + 1.044863677 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2141) (512y: 39) (512z: 3775) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index e480dbbed6..48beeeb5ad 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -36,31 +36,77 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:43:36 +DATE: 2023-10-25_19:04:39 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.864071e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.240535e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.240535e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 +TOTAL : 0.462582 sec + 1,884,746,111 cycles # 2.811 GHz + 2,786,242,007 instructions # 1.48 insn per cycle + 0.730246160 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.695482e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.755030e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.755030e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 +TOTAL : 0.635503 sec + 2,447,308,351 cycles # 2.825 GHz + 3,823,535,894 instructions # 1.56 insn per cycle + 0.923575231 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.412608e+00 +Avg ME (F77/CUDA) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.292354e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.310416e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.310416e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724761e+02 +- 2.665338e+02 ) GeV^-2 -TOTAL : 5.003443 sec - 17,430,376,903 cycles:u # 3.483 GHz - 59,440,189,671 instructions:u # 3.41 insn per cycle - 5.005751709 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.567048e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.579966e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.579966e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.406446 sec + 19,444,692,901 cycles # 3.034 GHz + 59,468,886,107 instructions # 3.06 insn per cycle + 6.410558637 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 961) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -71,23 +117,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.086780e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.105961e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.105961e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724754e+02 +- 2.665334e+02 ) GeV^-2 -TOTAL : 1.533556 sec - 5,339,863,398 cycles:u # 3.478 GHz - 16,943,311,888 instructions:u # 3.17 insn per cycle - 1.535527412 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5857) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.536135e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.681573e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.681573e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.943530 sec + 6,018,572,710 cycles # 3.091 GHz + 16,962,561,293 instructions # 2.82 insn per cycle + 1.947552922 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5858) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -98,23 +144,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.453334e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.552921e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.552921e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.694570 sec - 2,347,934,356 cycles:u # 3.372 GHz - 6,155,421,859 instructions:u # 2.62 insn per cycle - 0.696575325 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.852963e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.918436e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.918436e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.908122 sec + 2,646,262,038 cycles # 2.903 GHz + 6,176,972,836 instructions # 2.33 insn per cycle + 0.912119450 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5019) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -125,23 +171,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.704131e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.815547e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.815547e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.631667 sec - 2,134,776,064 cycles:u # 3.371 GHz - 5,709,329,585 instructions:u # 2.67 insn per cycle - 0.633691443 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.053883e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.135495e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.135495e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.821672 sec + 2,391,067,663 cycles # 2.897 GHz + 5,738,392,055 instructions # 2.40 insn per cycle + 0.825851117 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4804) (512y: 36) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -152,23 +198,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.268724e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.434771e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.434771e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 -TOTAL : 0.528841 sec - 1,497,365,513 cycles:u # 2.823 GHz - 3,374,800,525 instructions:u # 2.25 insn per cycle - 0.530812857 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2130) (512y: 40) (512z: 3776) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.562282e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.609907e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.609907e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.075433 sec + 2,080,452,431 cycles # 1.928 GHz + 3,407,597,282 instructions # 1.64 insn per cycle + 1.079584991 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2141) (512y: 39) (512z: 3775) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 8118424b1a..35d51d9f5b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:21:29 +DATE: 2023-10-25_18:38:30 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.558695e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.304995e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.390429e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.441583 sec + 1,913,307,169 cycles # 2.938 GHz + 2,730,410,416 instructions # 1.43 insn per cycle + 0.710289745 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.431804e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.480539e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.548305e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 +TOTAL : 0.484144 sec + 2,122,361,683 cycles # 3.000 GHz + 3,092,428,798 instructions # 1.46 insn per cycle + 0.764929414 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.412608e+00 +Avg ME (F77/CUDA) = 1.4132214346515752 +Relative difference = 0.00043425681546129636 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.324498e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.342154e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.342154e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724761e+02 +- 2.665338e+02 ) GeV^-2 -TOTAL : 4.952652 sec - 17,264,726,428 cycles:u # 3.485 GHz - 59,182,405,457 instructions:u # 3.43 insn per cycle - 4.954691408 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.622945e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.636379e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.636379e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.266974 sec + 19,387,707,588 cycles # 3.092 GHz + 59,211,783,711 instructions # 3.05 insn per cycle + 6.270947254 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.1728426918172542e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.160023e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.181788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.181788e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724754e+02 +- 2.665334e+02 ) GeV^-2 -TOTAL : 1.435031 sec - 4,998,214,324 cycles:u # 3.479 GHz - 16,692,678,560 instructions:u # 3.34 insn per cycle - 1.436889827 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5623) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.919855e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.077833e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.077833e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.856181 sec + 5,736,685,730 cycles # 3.085 GHz + 16,708,949,188 instructions # 2.91 insn per cycle + 1.860305013 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5624) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.156379e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.233113e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.233113e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.783752 sec - 2,651,680,008 cycles:u # 3.377 GHz - 6,788,110,157 instructions:u # 2.56 insn per cycle - 0.785591013 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.619777e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.669337e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.669337e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.031321 sec + 3,001,059,822 cycles # 2.901 GHz + 6,807,446,499 instructions # 2.27 insn per cycle + 1.035316846 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5670) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.329332e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.411541e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.411541e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743692e+02 +- 2.676600e+02 ) GeV^-2 -TOTAL : 0.726461 sec - 2,457,143,560 cycles:u # 3.375 GHz - 6,327,821,288 instructions:u # 2.58 insn per cycle - 0.728338919 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.758092e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.816843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.816843e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.951721 sec + 2,767,509,162 cycles # 2.897 GHz + 6,354,591,455 instructions # 2.30 insn per cycle + 0.955744845 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5429) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.935766e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.070635e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.070635e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743730e+02 +- 2.676609e+02 ) GeV^-2 -TOTAL : 0.582695 sec - 1,645,265,503 cycles:u # 2.816 GHz - 3,708,730,324 instructions:u # 2.25 insn per cycle - 0.584530143 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2375) (512y: 30) (512z: 4073) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.458284e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.499118e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.499118e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.146088 sec + 2,235,083,857 cycles # 1.946 GHz + 3,731,059,413 instructions # 1.67 insn per cycle + 1.150018435 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2381) (512y: 29) (512z: 4070) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 9fbb059e3e..392905595e 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:21:40 +DATE: 2023-10-25_18:38:54 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.991876e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.048685e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.061466e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.460934 sec + 1,966,721,120 cycles # 2.924 GHz + 2,827,577,653 instructions # 1.44 insn per cycle + 0.730736586 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.122528e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.320226e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.331429e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.588908 sec + 2,472,359,151 cycles # 3.012 GHz + 3,773,127,523 instructions # 1.53 insn per cycle + 0.879878807 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.413122e+00 +Avg ME (F77/CUDA) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.209963e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.229586e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.229586e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.130410 sec - 17,885,191,099 cycles:u # 3.485 GHz - 60,043,949,323 instructions:u # 3.36 insn per cycle - 5.132575599 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.519449e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.531484e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.531484e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.525526 sec + 20,092,492,706 cycles # 3.078 GHz + 60,052,973,297 instructions # 2.99 insn per cycle + 6.529664742 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1224) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.173070e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.243675e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.243675e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.678806 sec - 9,333,953,065 cycles:u # 3.482 GHz - 30,738,044,301 instructions:u # 3.29 insn per cycle - 2.680794166 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5353) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.869773e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.913961e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.913961e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.385815 sec + 10,415,517,369 cycles # 3.073 GHz + 30,737,885,914 instructions # 2.95 insn per cycle + 3.390029957 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.294459e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.325124e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.325124e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.290177 sec - 4,364,783,621 cycles:u # 3.379 GHz - 11,265,938,496 instructions:u # 2.58 insn per cycle - 1.292182431 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4684) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.784627e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.958931e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.958931e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.696342 sec + 4,938,080,705 cycles # 2.905 GHz + 11,263,764,405 instructions # 2.28 insn per cycle + 1.700575900 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4683) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.431372e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.468767e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.468767e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.169549 sec - 3,950,178,208 cycles:u # 3.373 GHz - 10,430,174,280 instructions:u # 2.64 insn per cycle - 1.171659305 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 83) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.113465e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.136337e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.136337e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.492907 sec + 4,341,247,814 cycles # 2.902 GHz + 10,434,510,449 instructions # 2.40 insn per cycle + 1.497014311 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4418) (512y: 83) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.524309e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.566450e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.566450e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.101455 sec - 3,091,606,846 cycles:u # 2.803 GHz - 6,106,393,268 instructions:u # 1.98 insn per cycle - 1.103586338 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 117) (512z: 3648) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.532516e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.636201e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.636201e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.198970 sec + 4,210,314,244 cycles # 1.912 GHz + 6,111,580,609 instructions # 1.45 insn per cycle + 2.203311339 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2065) (512y: 117) (512z: 3649) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 3b0432fb23..39bb25c947 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:21:54 +DATE: 2023-10-25_18:39:22 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.944009e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.041807e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.053824e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.457165 sec + 2,013,112,285 cycles # 3.009 GHz + 2,933,107,931 instructions # 1.46 insn per cycle + 0.726273814 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.111421e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.304827e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.316338e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.584817 sec + 2,503,239,468 cycles # 3.024 GHz + 3,758,528,305 instructions # 1.50 insn per cycle + 0.886403311 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 1.413122e+00 +Avg ME (F77/CUDA) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.191064e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.210461e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.210461e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 5.160089 sec - 17,987,750,192 cycles:u # 3.485 GHz - 60,252,944,387 instructions:u # 3.35 insn per cycle - 5.162228085 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.502057e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.514010e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.514010e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.570415 sec + 20,096,701,303 cycles # 3.057 GHz + 60,261,778,784 instructions # 3.00 insn per cycle + 6.574454844 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.300357e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.373693e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.373693e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.624914 sec - 9,146,018,588 cycles:u # 3.482 GHz - 30,447,600,201 instructions:u # 3.33 insn per cycle - 2.626891118 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5151) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.950569e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.996463e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.996463e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.330994 sec + 10,302,022,615 cycles # 3.090 GHz + 30,444,386,178 instructions # 2.96 insn per cycle + 3.335148743 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5149) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.254068e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.282604e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.282604e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.330736 sec - 4,503,482,582 cycles:u # 3.380 GHz - 11,782,394,378 instructions:u # 2.62 insn per cycle - 1.332765318 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4797) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.413979e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.578528e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.578528e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.762053 sec + 5,116,204,786 cycles # 2.900 GHz + 11,780,626,112 instructions # 2.30 insn per cycle + 1.766305951 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4795) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.352471e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.385869e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.385869e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.236434 sec - 4,182,708,332 cycles:u # 3.378 GHz - 10,987,479,789 instructions:u # 2.63 insn per cycle - 1.238494525 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4427) (512y: 236) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.034226e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.053843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.053843e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.605857 sec + 4,642,992,473 cycles # 2.885 GHz + 10,992,793,436 instructions # 2.37 insn per cycle + 1.609875653 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4423) (512y: 238) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.534076e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.577234e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.577234e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.094306 sec - 3,073,764,406 cycles:u # 2.805 GHz - 6,347,125,131 instructions:u # 2.06 insn per cycle - 1.096280225 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1957) (512y: 163) (512z: 3727) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.613528e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.720366e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.720366e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.175272 sec + 4,221,455,153 cycles # 1.938 GHz + 6,349,351,796 instructions # 1.50 insn per cycle + 2.179367593 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1959) (512y: 163) (512z: 3727) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 7bcc439d18..bff9233075 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:22:08 +DATE: 2023-10-25_18:39:51 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.468061e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.492682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.494686e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.518589 sec + 2,245,805,905 cycles # 3.007 GHz + 3,544,979,174 instructions # 1.58 insn per cycle + 0.807315238 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.126268e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.153472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154624e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.016484 sec + 10,017,854,023 cycles # 3.069 GHz + 22,587,762,207 instructions # 2.25 insn per cycle + 3.322845777 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626675e-04 +Avg ME (F77/CUDA) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.482693e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.484242e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.484242e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.613714 sec - 23,057,364,787 cycles:u # 3.486 GHz - 78,665,847,357 instructions:u # 3.41 insn per cycle - 6.615785324 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.955514e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.956404e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.956404e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.396540 sec + 25,870,300,414 cycles # 3.080 GHz + 78,705,757,349 instructions # 3.04 insn per cycle + 8.400556749 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.896420e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.902176e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.902176e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.357080 sec - 11,699,523,282 cycles:u # 3.483 GHz - 39,287,043,182 instructions:u # 3.36 insn per cycle - 3.359085316 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.628098e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.631366e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.631366e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.529617 sec + 13,157,831,550 cycles # 2.903 GHz + 39,316,654,466 instructions # 2.99 insn per cycle + 4.533882139 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.154371e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.157326e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.157326e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.427741 sec - 4,829,942,613 cycles:u # 3.379 GHz - 13,894,278,241 instructions:u # 2.88 insn per cycle - 1.429627727 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.489125e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.506110e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.506110e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.941066 sec + 5,640,899,050 cycles # 2.901 GHz + 13,915,027,017 instructions # 2.47 insn per cycle + 1.945275776 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.302434e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.306375e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.306375e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.266463 sec - 4,283,197,115 cycles:u # 3.378 GHz - 12,535,901,357 instructions:u # 2.93 insn per cycle - 1.268383272 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.632867e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.655023e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.655023e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.711467 sec + 4,969,822,591 cycles # 2.898 GHz + 12,556,829,300 instructions # 2.53 insn per cycle + 1.715640499 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.682573e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.689418e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.689418e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.982400 sec - 2,738,419,974 cycles:u # 2.783 GHz - 6,418,503,086 instructions:u # 2.34 insn per cycle - 0.984400733 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.688750e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.702937e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.702937e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.142319 sec + 4,116,162,262 cycles # 1.918 GHz + 6,441,474,951 instructions # 1.56 insn per cycle + 2.146523645 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index fca93b3763..19b0ccbfe1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -36,31 +36,77 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:43:57 +DATE: 2023-10-25_19:05:36 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.145401e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.455796e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.455796e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.511465 sec + 2,211,538,389 cycles # 2.999 GHz + 3,499,461,341 instructions # 1.58 insn per cycle + 0.799495725 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.639414e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.104964e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.104964e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.289425 sec + 10,927,544,611 cycles # 3.076 GHz + 23,831,419,819 instructions # 2.18 insn per cycle + 3.609486496 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626675e-04 +Avg ME (F77/CUDA) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.482180e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.483695e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.483695e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.617936 sec - 23,065,153,659 cycles:u # 3.484 GHz - 78,667,897,687 instructions:u # 3.41 insn per cycle - 6.620406229 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.955661e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.956599e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.956599e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.399286 sec + 25,889,610,376 cycles # 3.081 GHz + 78,711,674,763 instructions # 3.04 insn per cycle + 8.403464378 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -71,23 +117,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.898091e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.903902e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.903902e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.359416 sec - 11,696,429,718 cycles:u # 3.480 GHz - 39,295,562,023 instructions:u # 3.36 insn per cycle - 3.362004153 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.684905e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.688434e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.688434e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.463396 sec + 13,180,558,134 cycles # 2.951 GHz + 39,329,251,791 instructions # 2.98 insn per cycle + 4.467689901 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -98,23 +144,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.149196e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.152247e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.152247e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.437338 sec - 4,852,315,777 cycles:u # 3.371 GHz - 13,900,258,972 instructions:u # 2.86 insn per cycle - 1.439595273 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.312850e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.329128e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.329128e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.985951 sec + 5,671,057,559 cycles # 2.852 GHz + 13,925,731,418 instructions # 2.46 insn per cycle + 1.990267942 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -125,23 +171,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.301397e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.305354e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.305354e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.270948 sec - 4,288,304,124 cycles:u # 3.369 GHz - 12,541,884,276 instructions:u # 2.92 insn per cycle - 1.273284777 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.189591e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.210527e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.210527e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.797800 sec + 4,986,486,293 cycles # 2.768 GHz + 12,566,997,052 instructions # 2.52 insn per cycle + 1.802092456 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -152,23 +198,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.683733e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.690312e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.690312e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.984644 sec - 2,740,270,423 cycles:u # 2.778 GHz - 6,425,075,350 instructions:u # 2.34 insn per cycle - 0.986900097 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.650923e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.665003e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.665003e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.156621 sec + 4,130,305,981 cycles # 1.912 GHz + 6,453,079,741 instructions # 1.56 insn per cycle + 2.160974147 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index ad7708b47f..81203fa77a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:47:58 +DATE: 2023-10-25_19:16:31 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.481338e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.505105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.507123e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.501686 sec + 2,214,909,881 cycles # 3.031 GHz + 3,458,747,276 instructions # 1.56 insn per cycle + 0.800330270 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.151978e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.180537e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.181762e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 +TOTAL : 3.117189 sec + 10,377,272,213 cycles # 3.074 GHz + 22,017,651,367 instructions # 2.12 insn per cycle + 3.433688033 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626675e-04 +Avg ME (F77/CUDA) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.480693e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.482227e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.482227e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.951603e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.952527e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.952527e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.619024 sec - 23,070,733,554 cycles:u # 3.485 GHz - 78,665,846,816 instructions:u # 3.41 insn per cycle - 6.621165760 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.414289 sec + 25,891,693,281 cycles # 3.076 GHz + 78,705,382,161 instructions # 3.04 insn per cycle + 8.418214136 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.890759e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.896486e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.896486e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.708393e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.711838e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.711838e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.360853 sec - 11,710,087,927 cycles:u # 3.483 GHz - 39,287,041,932 instructions:u # 3.35 insn per cycle - 3.362859101 seconds time elapsed +TOTAL : 4.432833 sec + 13,155,908,783 cycles # 2.966 GHz + 39,315,348,391 instructions # 2.99 insn per cycle + 4.436757068 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.152802e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155855e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.155855e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.511504e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.528904e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.528904e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.429922 sec - 4,836,711,653 cycles:u # 3.379 GHz - 13,894,277,998 instructions:u # 2.87 insn per cycle - 1.431987434 seconds time elapsed +TOTAL : 1.937344 sec + 5,646,349,471 cycles # 2.910 GHz + 13,913,307,123 instructions # 2.46 insn per cycle + 1.941259569 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.303126e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.307117e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.307117e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.572932e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.594635e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.594635e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.265800 sec - 4,280,728,559 cycles:u # 3.378 GHz - 12,535,902,057 instructions:u # 2.93 insn per cycle - 1.267765042 seconds time elapsed +TOTAL : 1.724509 sec + 4,978,068,357 cycles # 2.882 GHz + 12,554,500,287 instructions # 2.52 insn per cycle + 1.728498714 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.685218e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.691889e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.691889e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.677554e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.691380e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.691380e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.980549 sec - 2,733,827,610 cycles:u # 2.783 GHz - 6,418,505,164 instructions:u # 2.35 insn per cycle - 0.982514108 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) +TOTAL : 2.147049 sec + 4,126,422,075 cycles # 1.919 GHz + 6,439,114,110 instructions # 1.56 insn per cycle + 2.151134180 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index dadc4798bd..983ed35921 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -36,21 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:46:52 +DATE: 2023-10-25_19:13:27 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.486758e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.510432e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.512397e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.503841 sec + 2,219,754,850 cycles # 2.994 GHz + 3,493,120,915 instructions # 1.57 insn per cycle + 0.814222419 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.146385e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.174926e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176115e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.060532 sec + 10,143,930,144 cycles # 3.066 GHz + 23,186,884,860 instructions # 2.29 insn per cycle + 3.364498351 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626675e-04 +Avg ME (F77/CUDA) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe: Aborted - 2,347,012 cycles:u # 1.465 GHz - 3,109,741 instructions:u # 1.32 insn per cycle - 0.155906178 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.960187e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.961109e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.961109e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.392474 sec + 25,877,587,986 cycles # 3.088 GHz + 78,705,423,071 instructions # 3.04 insn per cycle + 8.396357877 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -60,14 +103,23 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe: Aborted - 1,966,509 cycles:u # 1.396 GHz - 3,114,406 instructions:u # 1.58 insn per cycle - 0.114047165 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.691020e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.694421e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.694421e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.452252 sec + 13,153,001,214 cycles # 2.952 GHz + 39,316,173,049 instructions # 2.99 insn per cycle + 4.456201629 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -77,14 +129,23 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe: Aborted - 1,955,937 cycles:u # 1.373 GHz - 3,115,607 instructions:u # 1.59 insn per cycle - 0.114119263 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.443294e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.459986e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.459986e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.951557 sec + 5,638,519,517 cycles # 2.884 GHz + 13,914,420,326 instructions # 2.47 insn per cycle + 1.955513391 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -94,14 +155,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe: Aborted - 1,937,923 cycles:u # 1.379 GHz - 3,115,506 instructions:u # 1.61 insn per cycle - 0.126877087 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.662759e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.685664e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.685664e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.706815 sec + 4,966,762,812 cycles # 2.905 GHz + 12,556,639,833 instructions # 2.53 insn per cycle + 1.710823467 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -111,14 +181,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe: Aborted - 1,973,927 cycles:u # 1.378 GHz - 3,115,404 instructions:u # 1.58 insn per cycle - 0.115762547 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.630990e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.645195e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.645195e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.158956 sec + 4,117,585,001 cycles # 1.904 GHz + 6,441,334,233 instructions # 1.56 insn per cycle + 2.163053685 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 79b738f1a7..089d292aa8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -36,30 +36,67 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:46:06 +DATE: 2023-10-25_19:10:27 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.218258e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.503307e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.505360e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.506835 sec + 2,221,452,108 cycles # 3.022 GHz + 3,523,570,836 instructions # 1.59 insn per cycle + 0.796752404 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.728232e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.175248e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176437e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.202196 sec + 10,370,823,383 cycles # 3.013 GHz + 22,699,363,327 instructions # 2.19 insn per cycle + 3.506471268 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626675e-04 +Avg ME (F77/CUDA) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.481274e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.482802e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.482802e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.617512 sec - 23,072,069,799 cycles:u # 3.486 GHz - 78,665,849,532 instructions:u # 3.41 insn per cycle - 6.619627854 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4809) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.964142e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.965067e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.965067e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.359175 sec + 25,872,166,576 cycles # 3.094 GHz + 78,706,432,099 instructions # 3.04 insn per cycle + 8.363176184 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +106,23 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.900904e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.906480e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.906480e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.353509 sec - 11,687,455,700 cycles:u # 3.484 GHz - 39,287,041,736 instructions:u # 3.36 insn per cycle - 3.355437633 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.675901e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.679176e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.679176e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.470613 sec + 13,168,571,852 cycles # 2.943 GHz + 39,316,143,486 instructions # 2.99 insn per cycle + 4.474685106 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +132,23 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.154470e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.157552e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.157552e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.427999 sec - 4,829,254,268 cycles:u # 3.378 GHz - 13,894,278,299 instructions:u # 2.88 insn per cycle - 1.430053175 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.475559e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.492856e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.492856e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.944338 sec + 5,656,434,910 cycles # 2.905 GHz + 13,914,488,872 instructions # 2.46 insn per cycle + 1.948357306 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +158,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.302434e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.306408e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.306408e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.266829 sec - 4,282,871,958 cycles:u # 3.376 GHz - 12,535,901,370 instructions:u # 2.93 insn per cycle - 1.268787686 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.315923e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.336840e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.336840e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.769971 sec + 4,966,635,750 cycles # 2.801 GHz + 12,556,400,439 instructions # 2.53 insn per cycle + 1.774243834 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +184,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.685109e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.691611e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.691611e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.980976 sec - 2,733,292,461 cycles:u # 2.783 GHz - 6,418,502,985 instructions:u # 2.35 insn per cycle - 0.982884138 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 102) (512z:10109) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.505641e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.519600e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.519600e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.194843 sec + 4,122,096,598 cycles # 1.876 GHz + 6,442,654,429 instructions # 1.56 insn per cycle + 2.198924835 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index b3631b047d..db28556fed 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:22:25 +DATE: 2023-10-25_18:40:27 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.480350e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.503709e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.505689e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.515106 sec + 2,241,115,881 cycles # 3.010 GHz + 3,491,500,030 instructions # 1.56 insn per cycle + 0.803761340 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.140925e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.168327e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169450e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.010736 sec + 10,017,349,944 cycles # 3.076 GHz + 21,234,719,579 instructions # 2.12 insn per cycle + 3.315687380 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626675e-04 +Avg ME (F77/CUDA) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.489325e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.490886e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.490886e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.597808 sec - 22,996,862,806 cycles:u # 3.485 GHz - 78,410,058,827 instructions:u # 3.41 insn per cycle - 6.599999372 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4141) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.950176e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.951182e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.951182e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.419375 sec + 25,820,844,871 cycles # 3.067 GHz + 78,455,782,361 instructions # 3.04 insn per cycle + 8.423501286 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4147) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.869615e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.875394e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.875394e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.375402 sec - 11,763,307,713 cycles:u # 3.483 GHz - 39,236,968,244 instructions:u # 3.34 insn per cycle - 3.377385849 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12921) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.695368e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.698841e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.698841e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.446590 sec + 13,096,365,032 cycles # 2.943 GHz + 39,266,931,549 instructions # 3.00 insn per cycle + 4.450776925 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12925) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.162496e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.165531e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.165531e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.417701 sec - 4,795,627,688 cycles:u # 3.379 GHz - 14,010,552,440 instructions:u # 2.92 insn per cycle - 1.419630360 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11430) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.473385e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.490359e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.490359e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.944494 sec + 5,647,694,185 cycles # 2.899 GHz + 14,031,784,985 instructions # 2.48 insn per cycle + 1.948726891 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11428) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.285048e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.288823e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.288823e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.283251 sec - 4,340,411,229 cycles:u # 3.378 GHz - 12,663,010,683 instructions:u # 2.92 insn per cycle - 1.285187524 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10994) (512y: 240) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.439321e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.460393e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.460393e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.746193 sec + 5,071,268,913 cycles # 2.898 GHz + 12,684,289,306 instructions # 2.50 insn per cycle + 1.750379728 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10992) (512y: 240) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.672615e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.679206e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.679206e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.988073 sec - 2,754,136,726 cycles:u # 2.783 GHz - 6,540,580,960 instructions:u # 2.37 insn per cycle - 0.990085917 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1638) (512y: 192) (512z:10078) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.529292e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.543220e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.543220e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.188060 sec + 4,141,433,761 cycles # 1.890 GHz + 6,563,782,413 instructions # 1.58 insn per cycle + 2.192342750 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1640) (512y: 192) (512z:10068) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 815102b1d0..4c6f36c205 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:34:11 +DATE: 2023-10-25_18:55:24 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.222490e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.244878e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.246691e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.528299 sec + 2,270,979,797 cycles # 3.018 GHz + 3,579,093,863 instructions # 1.58 insn per cycle + 0.812313256 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.777355e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.800618e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.801578e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.287863 sec + 10,849,521,679 cycles # 3.070 GHz + 24,134,668,326 instructions # 2.22 insn per cycle + 3.593012410 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626675e-04 +Avg ME (F77/CUDA) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.019079e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.019719e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.019719e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 32.683479 sec - 113,917,594,376 cycles:u # 3.485 GHz - 144,831,023,491 instructions:u # 1.27 insn per cycle - 32.685587809 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:21600) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.444628e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.445098e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.445098e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 36.909027 sec + 113,587,132,048 cycles # 3.078 GHz + 144,964,358,008 instructions # 1.28 insn per cycle + 36.912946290 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:21605) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.953502e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.957154e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.957154e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.155616 sec - 14,485,066,140 cycles:u # 3.484 GHz - 37,546,432,009 instructions:u # 2.59 insn per cycle - 4.157607785 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.256823e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.259432e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.259432e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.044758 sec + 14,717,259,007 cycles # 2.916 GHz + 37,577,668,645 instructions # 2.55 insn per cycle + 5.048857745 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68118) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.286832e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.306233e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.306233e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.773343 sec - 5,991,122,249 cycles:u # 3.375 GHz - 13,042,658,926 instructions:u # 2.18 insn per cycle - 1.775462817 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.803332e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.817865e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.817865e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.110842 sec + 6,124,055,435 cycles # 2.897 GHz + 13,063,274,169 instructions # 2.13 insn per cycle + 2.114845473 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.128073e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.130991e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.130991e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.461200 sec - 4,938,278,108 cycles:u # 3.376 GHz - 11,421,264,080 instructions:u # 2.31 insn per cycle - 1.463298755 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40433) (512y: 285) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.453213e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.474776e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.474776e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.744006 sec + 5,055,001,520 cycles # 2.893 GHz + 11,442,027,490 instructions # 2.26 insn per cycle + 1.747990275 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.686006e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.692604e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.692604e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.979993 sec - 2,731,684,802 cycles:u # 2.783 GHz - 5,921,426,227 instructions:u # 2.17 insn per cycle - 0.981931900 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39409) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.925155e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.940309e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.940309e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.078899 sec + 3,977,787,711 cycles # 1.911 GHz + 5,943,488,721 instructions # 1.49 insn per cycle + 2.082985085 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index ae26013984..6ac5000ce8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:34:56 +DATE: 2023-10-25_18:56:31 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.239290e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.259875e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.261623e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.524624 sec + 2,256,102,466 cycles # 3.003 GHz + 3,565,937,124 instructions # 1.58 insn per cycle + 0.809821745 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.793727e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.817115e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.818113e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.259889 sec + 10,730,925,294 cycles # 3.058 GHz + 24,431,623,702 instructions # 2.28 insn per cycle + 3.565849378 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626675e-04 +Avg ME (F77/CUDA) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.957816e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.958427e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.958427e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 33.088882 sec - 115,320,361,517 cycles:u # 3.485 GHz - 145,426,453,230 instructions:u # 1.26 insn per cycle - 33.091126344 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:22238) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.412471e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.412949e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.412949e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 37.178028 sec + 114,361,849,248 cycles # 3.076 GHz + 145,560,134,005 instructions # 1.27 insn per cycle + 37.182120025 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:22248) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.837495e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.841114e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.841114e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.281663 sec - 14,920,588,737 cycles:u # 3.483 GHz - 37,733,048,466 instructions:u # 2.53 insn per cycle - 4.283825491 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.195698e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.198225e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.198225e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.140800 sec + 15,162,495,765 cycles # 2.948 GHz + 37,764,610,972 instructions # 2.49 insn per cycle + 5.144813607 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68446) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.438589e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.458920e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.458920e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.744882 sec - 5,902,070,005 cycles:u # 3.379 GHz - 12,878,882,681 instructions:u # 2.18 insn per cycle - 1.746888270 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45936) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.961138e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.976889e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.976889e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.069144 sec + 6,001,988,087 cycles # 2.896 GHz + 12,897,757,655 instructions # 2.15 insn per cycle + 2.073134028 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.123541e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.126510e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.126510e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.467483 sec - 4,961,726,626 cycles:u # 3.377 GHz - 11,428,219,433 instructions:u # 2.30 insn per cycle - 1.469644474 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40124) (512y: 219) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.425265e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.445952e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.445952e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.748961 sec + 5,100,741,339 cycles # 2.911 GHz + 11,448,531,367 instructions # 2.24 insn per cycle + 1.753002861 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.699696e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.706624e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.706624e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.972545 sec - 2,710,296,061 cycles:u # 2.782 GHz - 5,875,812,314 instructions:u # 2.17 insn per cycle - 0.974620416 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38938) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.951658e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.967421e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.967421e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.071513 sec + 3,955,166,061 cycles # 1.907 GHz + 5,898,178,662 instructions # 1.49 insn per cycle + 2.075594399 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 3767dfd6cf..7d5250e643 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:22:41 +DATE: 2023-10-25_18:41:03 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.344411e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.393693e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.398639e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.476002 sec + 2,055,020,376 cycles # 2.991 GHz + 3,081,758,808 instructions # 1.50 insn per cycle + 0.745881313 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.554616e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.614004e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.616642e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.707448 sec + 5,919,573,877 cycles # 3.066 GHz + 11,552,481,792 instructions # 1.95 insn per cycle + 1.990304243 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626454e-04 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.528823e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.530057e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.530057e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.491859 sec - 22,636,199,045 cycles:u # 3.486 GHz - 78,090,213,887 instructions:u # 3.45 insn per cycle - 6.493924913 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.027055e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.028034e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.028034e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.098867 sec + 25,111,702,681 cycles # 3.100 GHz + 78,142,230,902 instructions # 3.11 insn per cycle + 8.102919065 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.007809e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.009854e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.009854e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.633870 sec - 5,690,420,377 cycles:u # 3.479 GHz - 20,154,491,016 instructions:u # 3.54 insn per cycle - 1.635895783 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.175810e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.188746e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.188746e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.292926 sec + 6,573,476,191 cycles # 2.863 GHz + 20,176,795,660 instructions # 3.07 insn per cycle + 2.297103514 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.255026e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.265028e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.265028e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.733683 sec - 2,475,329,310 cycles:u # 3.366 GHz - 7,093,765,414 instructions:u # 2.87 insn per cycle - 0.735632372 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.680111e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.687118e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.687118e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.983720 sec + 2,861,168,699 cycles # 2.899 GHz + 7,112,434,592 instructions # 2.49 insn per cycle + 0.987814280 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.546225e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.559129e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.559129e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.650668 sec - 2,194,357,147 cycles:u # 3.364 GHz - 6,388,434,702 instructions:u # 2.91 insn per cycle - 0.652709293 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.901709e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910562e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910562e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.870092 sec + 2,532,384,607 cycles # 2.899 GHz + 6,407,671,698 instructions # 2.53 insn per cycle + 0.874200480 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.421815e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.446257e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.446257e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.485752 sec - 1,353,981,550 cycles:u # 2.779 GHz - 3,300,538,033 instructions:u # 2.44 insn per cycle - 0.487645256 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.544511e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.550364e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.550364e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.069933 sec + 2,059,770,627 cycles # 1.919 GHz + 3,321,177,538 instructions # 1.61 insn per cycle + 1.074034173 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 69063309c8..866fb524ce 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -36,31 +36,77 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:44:13 +DATE: 2023-10-25_19:06:12 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.649753e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.350178e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.350178e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.465463 sec + 2,047,479,601 cycles # 2.978 GHz + 3,049,363,259 instructions # 1.49 insn per cycle + 0.744895811 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.287015e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.501118e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.501118e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 +TOTAL : 1.864461 sec + 6,383,949,622 cycles # 3.047 GHz + 13,653,993,577 instructions # 2.14 insn per cycle + 2.154815432 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626454e-04 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.529243e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.530758e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.530758e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.493017 sec - 22,628,505,293 cycles:u # 3.484 GHz - 78,091,639,574 instructions:u # 3.45 insn per cycle - 6.495239879 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.020017e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.021013e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.021013e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.129725 sec + 25,159,036,941 cycles # 3.094 GHz + 78,146,432,404 instructions # 3.11 insn per cycle + 8.133616973 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -71,23 +117,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.008271e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.010280e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.010280e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.634947 sec - 5,691,193,232 cycles:u # 3.477 GHz - 20,160,476,618 instructions:u # 3.54 insn per cycle - 1.636942657 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.180823e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.193936e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.193936e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.294060 sec + 6,581,200,311 cycles # 2.865 GHz + 20,186,134,505 instructions # 3.07 insn per cycle + 2.298229949 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -98,23 +144,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.258479e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.268219e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.268219e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.734050 sec - 2,478,113,785 cycles:u # 3.368 GHz - 7,100,336,667 instructions:u # 2.87 insn per cycle - 0.736072875 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.675906e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.682979e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.682979e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.989013 sec + 2,874,126,906 cycles # 2.896 GHz + 7,122,171,177 instructions # 2.48 insn per cycle + 0.993096654 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -125,23 +171,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.547358e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.560752e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.560752e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.651695 sec - 2,198,993,035 cycles:u # 3.366 GHz - 6,395,008,325 instructions:u # 2.91 insn per cycle - 0.653635062 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.895474e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.904073e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.904073e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.875592 sec + 2,541,159,330 cycles # 2.891 GHz + 6,417,191,354 instructions # 2.53 insn per cycle + 0.879698021 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -152,23 +198,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.407460e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.431684e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.431684e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.489364 sec - 1,363,268,056 cycles:u # 2.776 GHz - 3,307,612,222 instructions:u # 2.43 insn per cycle - 0.491352009 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.548734e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.554330e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.554330e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.069331 sec + 2,068,634,398 cycles # 1.928 GHz + 3,331,804,154 instructions # 1.61 insn per cycle + 1.073485896 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 1e1a0613d7..b125b710bd 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:48:15 +DATE: 2023-10-25_19:17:07 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.332225e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.378563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.386102e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 +TOTAL : 0.457977 sec + 2,007,013,185 cycles # 3.005 GHz + 3,016,655,563 instructions # 1.50 insn per cycle + 0.725068773 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.573671e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.635105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.637758e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 +TOTAL : 1.789355 sec + 6,154,390,786 cycles # 3.059 GHz + 11,779,031,447 instructions # 1.91 insn per cycle + 2.068482222 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626454e-04 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.529664e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.530894e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.530894e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.992613e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.993617e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.993617e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.489789 sec - 22,628,574,625 cycles:u # 3.486 GHz - 78,090,215,991 instructions:u # 3.45 insn per cycle - 6.491742658 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.239039 sec + 25,101,211,563 cycles # 3.046 GHz + 78,141,605,294 instructions # 3.11 insn per cycle + 8.242885554 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.007608e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.009697e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.009697e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.360647e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.374286e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.374286e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.634125 sec - 5,692,134,926 cycles:u # 3.480 GHz - 20,154,489,840 instructions:u # 3.54 insn per cycle - 1.635961783 seconds time elapsed +TOTAL : 2.237472 sec + 6,571,783,960 cycles # 2.933 GHz + 20,176,847,169 instructions # 3.07 insn per cycle + 2.241174830 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.255149e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.265738e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.265738e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.681723e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.688538e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.688538e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.733638 sec - 2,479,108,717 cycles:u # 3.372 GHz - 7,093,769,493 instructions:u # 2.86 insn per cycle - 0.735515120 seconds time elapsed +TOTAL : 0.984343 sec + 2,863,785,178 cycles # 2.900 GHz + 7,111,595,374 instructions # 2.48 insn per cycle + 0.988141267 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.547091e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.560890e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.560890e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.906951e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.915838e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.915838e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.650404 sec - 2,196,802,965 cycles:u # 3.369 GHz - 6,388,437,300 instructions:u # 2.91 insn per cycle - 0.652384117 seconds time elapsed +TOTAL : 0.868571 sec + 2,534,531,591 cycles # 2.907 GHz + 6,404,093,295 instructions # 2.53 insn per cycle + 0.872424795 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.409304e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.432131e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.432131e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.558486e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.564692e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.564692e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.487306 sec - 1,358,849,860 cycles:u # 2.780 GHz - 3,300,537,525 instructions:u # 2.43 insn per cycle - 0.489109845 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) +TOTAL : 1.060964 sec + 2,062,134,932 cycles # 1.938 GHz + 3,317,722,223 instructions # 1.61 insn per cycle + 1.064830329 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index caa1d052ce..0197c733f9 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -36,21 +36,64 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:46:55 +DATE: 2023-10-25_19:14:03 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.335935e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.379594e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.384424e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.458171 sec + 1,997,983,261 cycles # 2.988 GHz + 3,059,794,719 instructions # 1.53 insn per cycle + 0.725811269 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.578157e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.639706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.642441e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.740586 sec + 5,998,214,102 cycles # 3.054 GHz + 12,259,549,434 instructions # 2.04 insn per cycle + 2.021545903 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626454e-04 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe: Aborted - 2,218,459 cycles:u # 1.490 GHz - 3,114,636 instructions:u # 1.40 insn per cycle - 0.112107092 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.001178e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.002137e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.002137e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.203084 sec + 25,141,172,926 cycles # 3.064 GHz + 78,142,442,354 instructions # 3.11 insn per cycle + 8.206877433 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -60,14 +103,23 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe: Aborted - 2,112,380 cycles:u # 1.362 GHz - 3,112,947 instructions:u # 1.47 insn per cycle - 0.138312157 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.194824e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.207542e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.207542e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.287147 sec + 6,571,682,415 cycles # 2.870 GHz + 20,177,851,750 instructions # 3.07 insn per cycle + 2.290998385 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -77,14 +129,23 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe: Aborted - 1,939,487 cycles:u # 1.317 GHz - 3,113,829 instructions:u # 1.61 insn per cycle - 0.105725842 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.671381e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.678184e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.678184e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.988852 sec + 2,877,121,035 cycles # 2.900 GHz + 7,112,414,105 instructions # 2.47 insn per cycle + 0.992772923 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -94,14 +155,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe: Aborted - 1,941,821 cycles:u # 1.336 GHz - 3,114,008 instructions:u # 1.60 insn per cycle - 0.110652085 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.905382e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.914336e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.914336e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.868290 sec + 2,533,476,931 cycles # 2.907 GHz + 6,407,633,337 instructions # 2.53 insn per cycle + 0.872075865 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -111,14 +181,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= -/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe: Aborted - 1,910,010 cycles:u # 1.323 GHz - 3,115,075 instructions:u # 1.63 insn per cycle - 0.118536964 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.547508e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.553260e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.553260e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.067544 sec + 2,060,630,355 cycles # 1.925 GHz + 3,320,987,634 instructions # 1.61 insn per cycle + 1.071418753 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 2470ae94b5..52987bd60d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -36,30 +36,67 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:46:22 +DATE: 2023-10-25_19:11:03 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.805977e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.405107e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.410125e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.460864 sec + 2,006,899,759 cycles # 2.993 GHz + 3,017,362,425 instructions # 1.50 insn per cycle + 0.728187465 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.501502e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.624461e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.627283e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 +TOTAL : 1.814750 sec + 6,240,031,804 cycles # 3.062 GHz + 12,062,681,609 instructions # 1.93 insn per cycle + 2.094320858 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626454e-04 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.528159e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.529391e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.529391e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.497742 sec - 22,641,679,611 cycles:u # 3.486 GHz - 78,090,217,675 instructions:u # 3.45 insn per cycle - 6.499738583 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3554) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.001113e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.002085e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.002085e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.203609 sec + 25,118,317,039 cycles # 3.061 GHz + 78,142,981,648 instructions # 3.11 insn per cycle + 8.207429402 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +106,23 @@ Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.008068e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.010082e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.010082e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.633426 sec - 5,689,992,223 cycles:u # 3.480 GHz - 20,154,489,389 instructions:u # 3.54 insn per cycle - 1.635266445 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.323036e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.336228e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.336228e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.246931 sec + 6,579,301,389 cycles # 2.924 GHz + 20,176,586,022 instructions # 3.07 insn per cycle + 2.250705274 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +132,23 @@ Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.258512e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.268856e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.268856e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.732443 sec - 2,475,265,704 cycles:u # 3.372 GHz - 7,093,763,921 instructions:u # 2.87 insn per cycle - 0.734306872 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.674657e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.681528e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.681528e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.986857 sec + 2,862,922,647 cycles # 2.892 GHz + 7,112,389,781 instructions # 2.48 insn per cycle + 0.990752111 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +158,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.548560e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.561816e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.561816e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.649916 sec - 2,195,752,741 cycles:u # 3.371 GHz - 6,388,434,398 instructions:u # 2.91 insn per cycle - 0.651754209 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.894478e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.903271e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.903271e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.873227 sec + 2,541,312,245 cycles # 2.899 GHz + 6,407,310,369 instructions # 2.52 insn per cycle + 0.877122463 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +184,23 @@ Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.424471e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.447344e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.447344e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.485893 sec - 1,353,311,053 cycles:u # 2.778 GHz - 3,300,539,344 instructions:u # 2.44 insn per cycle - 0.487750906 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2396) (512y: 47) (512z:10312) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.549285e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.555166e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.555166e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.066246 sec + 2,058,021,558 cycles # 1.924 GHz + 3,321,051,164 instructions # 1.61 insn per cycle + 1.070183678 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 4f9674331f..e3d102e7b5 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:22:54 +DATE: 2023-10-25_18:41:32 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.347885e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.397852e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.402767e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.473915 sec + 2,057,636,111 cycles # 3.008 GHz + 3,034,789,542 instructions # 1.47 insn per cycle + 0.743124266 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.510288e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.569100e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.571863e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.706491 sec + 5,801,343,951 cycles # 3.003 GHz + 11,478,639,093 instructions # 1.98 insn per cycle + 1.991446406 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626454e-04 +Avg ME (F77/CUDA) = 6.6262659968156085E-004 +Relative difference = 2.8371612387547027e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.550733e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.551990e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.551990e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.436120 sec - 22,440,295,397 cycles:u # 3.486 GHz - 77,833,366,637 instructions:u # 3.47 insn per cycle - 6.438186543 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.032219e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.033197e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.033197e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.077935 sec + 25,002,620,407 cycles # 3.094 GHz + 77,880,023,337 instructions # 3.11 insn per cycle + 8.081833653 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3061) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 5.65798569465384e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.014422e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.016466e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.016466e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 1.623058 sec - 5,653,015,371 cycles:u # 3.480 GHz - 20,121,469,831 instructions:u # 3.56 insn per cycle - 1.624945090 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13433) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.437992e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.452045e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.452045e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.212380 sec + 6,525,641,551 cycles # 2.945 GHz + 20,144,168,186 instructions # 3.09 insn per cycle + 2.216462164 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.1853408865157068e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.183984e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.193251e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.193251e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.757019 sec - 2,558,948,847 cycles:u # 3.374 GHz - 7,233,160,969 instructions:u # 2.83 insn per cycle - 0.758869976 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12273) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.631112e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.637585e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.637585e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.012887 sec + 2,950,530,206 cycles # 2.903 GHz + 7,252,358,943 instructions # 2.46 insn per cycle + 1.016940562 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12263) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.477725e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.489552e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.489552e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.668014 sec - 2,257,387,875 cycles:u # 3.372 GHz - 6,529,983,861 instructions:u # 2.89 insn per cycle - 0.669967567 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11966) (512y: 26) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.851739e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.860081e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.860081e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.893117 sec + 2,605,469,056 cycles # 2.906 GHz + 6,549,528,920 instructions # 2.51 insn per cycle + 0.897080094 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11948) (512y: 26) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 3.218040e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.238304e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.238304e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.515820 sec - 1,438,608,312 cycles:u # 2.781 GHz - 3,460,427,031 instructions:u # 2.41 insn per cycle - 0.517668261 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2901) (512y: 23) (512z:10269) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.502391e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.508119e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.508119e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.099172 sec + 2,123,741,528 cycles # 1.926 GHz + 3,480,482,498 instructions # 1.64 insn per cycle + 1.103291837 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2903) (512y: 22) (512z:10276) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 32697efd77..31738cc5a1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:35:42 +DATE: 2023-10-25_18:57:39 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.596965e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.633834e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.637880e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.481755 sec + 2,117,394,113 cycles # 2.997 GHz + 3,211,903,752 instructions # 1.52 insn per cycle + 0.763834850 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.702969e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.752015e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.754390e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.845325 sec + 6,364,196,440 cycles # 3.044 GHz + 12,697,584,410 instructions # 2.00 insn per cycle + 2.147798753 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626454e-04 +Avg ME (F77/CUDA) = 6.6262660579844562E-004 +Relative difference = 2.836238137986709e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.125617e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.126338e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.126338e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204932e-01 +- 3.252405e-01 ) GeV^-4 -TOTAL : 26.780468 sec - 93,313,216,496 cycles:u # 3.484 GHz - 135,431,060,141 instructions:u # 1.45 insn per cycle - 26.782615953 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15458) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.860218e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.861043e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.861043e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 27.993515 sec + 85,967,133,243 cycles # 3.071 GHz + 135,563,627,438 instructions # 1.58 insn per cycle + 27.997422911 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15486) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 4.195614963669944e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.768700e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.783995e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.783995e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 1.876922 sec - 6,538,775,041 cycles:u # 3.481 GHz - 19,364,912,946 instructions:u # 2.96 insn per cycle - 1.878795640 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.196137e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.208997e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.208997e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.287867 sec + 6,773,769,099 cycles # 2.957 GHz + 19,387,600,160 instructions # 2.86 insn per cycle + 2.291838045 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69680) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 4.0849182767952624e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.797914e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.804285e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.804285e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.918896 sec - 3,105,696,597 cycles:u # 3.374 GHz - 6,789,508,332 instructions:u # 2.19 insn per cycle - 0.920983211 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.513484e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.519092e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.519092e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 1.091418 sec + 3,173,929,820 cycles # 2.900 GHz + 6,808,660,445 instructions # 2.15 insn per cycle + 1.095213929 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.177052e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.186751e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.186751e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.759834 sec - 2,567,390,833 cycles:u # 3.372 GHz - 5,967,608,527 instructions:u # 2.32 insn per cycle - 0.761735929 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.813250e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.821399e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821399e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 0.912193 sec + 2,648,785,634 cycles # 2.893 GHz + 5,986,998,268 instructions # 2.26 insn per cycle + 0.916244855 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.796510e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.813005e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.813005e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211848e-01 +- 3.254639e-01 ) GeV^-4 -TOTAL : 0.592992 sec - 1,653,029,441 cycles:u # 2.780 GHz - 3,481,255,390 instructions:u # 2.11 insn per cycle - 0.594900850 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.539260e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.545021e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.545021e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.073101 sec + 2,071,594,759 cycles # 1.925 GHz + 3,501,390,779 instructions # 1.69 insn per cycle + 1.077005935 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5198) (512y: 3) (512z:44822) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 17e4a9c48c..cddff811bf 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:36:17 +DATE: 2023-10-25_18:58:30 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.566704e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.601747e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.605700e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.479762 sec + 2,129,975,460 cycles # 3.025 GHz + 3,262,724,571 instructions # 1.53 insn per cycle + 0.761351418 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.654164e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.702666e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.704765e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.853079 sec + 6,372,411,967 cycles # 3.059 GHz + 13,261,029,776 instructions # 2.08 insn per cycle + 2.143146478 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626454e-04 +Avg ME (F77/CUDA) = 6.6262660579844562E-004 +Relative difference = 2.836238137986709e-05 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.397979e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.398767e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.398767e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204932e-01 +- 3.252405e-01 ) GeV^-4 -TOTAL : 25.641745 sec - 89,368,300,325 cycles:u # 3.485 GHz - 135,893,183,046 instructions:u # 1.52 insn per cycle - 25.643755173 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15937) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.859455e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.860302e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.860302e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 27.996994 sec + 86,063,307,798 cycles # 3.074 GHz + 135,905,248,930 instructions # 1.58 insn per cycle + 28.000969025 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15910) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 4.0361421941458736e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.661453e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.676391e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.676391e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 1.899849 sec - 6,619,325,215 cycles:u # 3.481 GHz - 19,416,769,897 instructions:u # 2.93 insn per cycle - 1.901796273 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.111595e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.124605e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.124605e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.313738 sec + 6,851,236,852 cycles # 2.957 GHz + 19,439,512,273 instructions # 2.84 insn per cycle + 2.317708744 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69722) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 4.170542995014107e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.844626e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.851170e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.851170e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.895271 sec - 3,027,082,745 cycles:u # 3.375 GHz - 6,700,693,510 instructions:u # 2.21 insn per cycle - 0.897161186 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.540866e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.546584e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.546584e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 1.071791 sec + 3,105,453,036 cycles # 2.888 GHz + 6,719,669,630 instructions # 2.16 insn per cycle + 1.075697475 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.190615e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.199886e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.199886e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 0.754903 sec - 2,551,683,836 cycles:u # 3.373 GHz - 5,950,861,283 instructions:u # 2.33 insn per cycle - 0.756777544 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.816139e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.824278e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.824278e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 +TOTAL : 0.910723 sec + 2,625,346,963 cycles # 2.872 GHz + 5,970,291,755 instructions # 2.27 insn per cycle + 0.914693861 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.818886e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.835239e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.835239e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211848e-01 +- 3.254639e-01 ) GeV^-4 -TOTAL : 0.587968 sec - 1,639,593,907 cycles:u # 2.782 GHz - 3,474,694,951 instructions:u # 2.12 insn per cycle - 0.589824720 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.541127e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.546712e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.546712e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.071854 sec + 2,074,441,089 cycles # 1.930 GHz + 3,494,899,079 instructions # 1.68 insn per cycle + 1.075757314 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4162) (512y: 4) (512z:44465) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 5479c4ff17..7ad6f63659 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:23:07 +DATE: 2023-10-25_18:42:00 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.490294e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.513614e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.515507e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.518671 sec + 2,226,766,382 cycles # 2.986 GHz + 3,540,179,326 instructions # 1.59 insn per cycle + 0.807219432 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.120867e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.148001e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.149117e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.019190 sec + 10,060,652,484 cycles # 3.078 GHz + 22,177,348,496 instructions # 2.20 insn per cycle + 3.325766752 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626675e-04 +Avg ME (F77/CUDA) = 6.6266732376103494E-004 +Relative difference = 2.659538381540814e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.443771e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.445279e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.445279e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.718856 sec - 23,426,099,550 cycles:u # 3.486 GHz - 79,131,145,870 instructions:u # 3.38 insn per cycle - 6.721027556 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4708) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.954175e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.955102e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.955102e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.401551 sec + 26,112,965,520 cycles # 3.107 GHz + 79,187,055,919 instructions # 3.03 insn per cycle + 8.405522832 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4746) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.929673e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.935374e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.935374e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.334498 sec - 11,619,248,352 cycles:u # 3.483 GHz - 38,549,380,029 instructions:u # 3.32 insn per cycle - 3.336425977 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.704056e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.707340e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.707340e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.436595 sec + 12,893,512,565 cycles # 2.905 GHz + 38,578,382,892 instructions # 2.99 insn per cycle + 4.440842197 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13136) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.153054e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156058e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.156058e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.429560 sec - 4,835,341,307 cycles:u # 3.379 GHz - 13,686,717,164 instructions:u # 2.83 insn per cycle - 1.431504175 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11246) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.529594e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.548137e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.548137e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.931885 sec + 5,592,758,933 cycles # 2.891 GHz + 13,704,166,637 instructions # 2.45 insn per cycle + 1.936090809 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11245) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.294470e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.298365e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.298365e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.274579 sec - 4,309,533,363 cycles:u # 3.377 GHz - 12,329,522,681 instructions:u # 2.86 insn per cycle - 1.276566120 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10898) (512y: 79) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.692246e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.714062e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.714062e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.701537 sec + 4,935,885,889 cycles # 2.895 GHz + 12,346,516,315 instructions # 2.50 insn per cycle + 1.705790521 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10897) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.650976e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.657209e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.657209e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.000488 sec - 2,789,706,585 cycles:u # 2.784 GHz - 6,419,813,194 instructions:u # 2.30 insn per cycle - 1.002439628 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1796) (512y: 93) (512z:10086) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.612609e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.626629e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.626629e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.163571 sec + 4,153,048,865 cycles # 1.917 GHz + 6,440,968,926 instructions # 1.55 insn per cycle + 2.167665946 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1803) (512y: 93) (512z:10092) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index 931a425468..f056a45974 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:23:23 +DATE: 2023-10-25_18:42:36 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.481563e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.505145e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.507791e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.521573 sec + 2,153,426,616 cycles # 2.862 GHz + 3,368,297,517 instructions # 1.56 insn per cycle + 0.812765365 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.140316e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168848e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.005546 sec + 10,001,668,357 cycles # 3.073 GHz + 22,545,107,075 instructions # 2.25 insn per cycle + 3.311305896 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 6.626675e-04 +Avg ME (F77/CUDA) = 6.6266732376103494E-004 +Relative difference = 2.659538381540814e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.449739e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.451219e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.451219e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.701998 sec - 23,368,009,528 cycles:u # 3.486 GHz - 79,155,600,974 instructions:u # 3.39 insn per cycle - 6.704105951 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4383) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.946429e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.947329e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947329e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.435831 sec + 26,126,854,101 cycles # 3.097 GHz + 79,204,576,073 instructions # 3.03 insn per cycle + 8.439924281 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4401) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.803303e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.808772e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.808772e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.423406 sec - 11,924,323,531 cycles:u # 3.483 GHz - 38,507,173,931 instructions:u # 3.23 insn per cycle - 3.425385769 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12902) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.694235e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.697554e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697554e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.448208 sec + 12,894,092,255 cycles # 2.897 GHz + 38,538,252,439 instructions # 2.99 insn per cycle + 4.452458550 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12903) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.145562e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.148491e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.148491e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.438621 sec - 4,866,589,673 cycles:u # 3.379 GHz - 13,805,745,696 instructions:u # 2.84 insn per cycle - 1.440536290 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11349) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.136495e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.152364e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.152364e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.024697 sec + 5,646,666,731 cycles # 2.789 GHz + 13,825,634,230 instructions # 2.45 insn per cycle + 2.029023847 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.286025e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.289820e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.289820e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.282326 sec - 4,336,961,185 cycles:u # 3.378 GHz - 12,458,784,902 instructions:u # 2.87 insn per cycle - 1.284257691 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10894) (512y: 239) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.556941e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.579305e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.579305e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.725082 sec + 4,994,411,865 cycles # 2.889 GHz + 12,477,409,386 instructions # 2.50 insn per cycle + 1.729413379 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10888) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.650021e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.656400e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.656400e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.001452 sec - 2,791,774,858 cycles:u # 2.783 GHz - 6,522,516,847 instructions:u # 2.34 insn per cycle - 1.003472742 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1626) (512y: 191) (512z:10049) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.605322e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.619068e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.619068e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.165836 sec + 4,156,250,470 cycles # 1.916 GHz + 6,542,526,880 instructions # 1.57 insn per cycle + 2.169952357 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1628) (512y: 191) (512z:10036) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index ba3d118b39..55b25786bd 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:24:10 +DATE: 2023-10-25_18:44:51 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.070749e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.071143e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.071250e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.417941 sec + 8,377,415,337 cycles # 3.062 GHz + 18,838,612,351 instructions # 2.25 insn per cycle + 2.794089225 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.235176e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.237005e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.237223e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.993841 sec + 13,055,750,655 cycles # 3.026 GHz + 31,160,662,070 instructions # 2.39 insn per cycle + 4.373705613 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 9.872263e-03 +Avg ME (F77/CUDA) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.680592e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.680896e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.680896e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.456227 sec - 19,018,581,238 cycles:u # 3.485 GHz - 54,010,805,402 instructions:u # 2.84 insn per cycle - 5.458105500 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32344) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.897548e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.897775e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.897775e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.705050 sec + 19,396,394,057 cycles # 2.892 GHz + 54,051,876,234 instructions # 2.79 insn per cycle + 6.708932383 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32354) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.877293e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.877401e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.877401e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.814507 sec - 9,809,689,328 cycles:u # 3.484 GHz - 27,056,297,784 instructions:u # 2.76 insn per cycle - 2.816303349 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.653870e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.653969e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.653969e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.197917 sec + 9,907,124,994 cycles # 3.095 GHz + 27,081,765,597 instructions # 2.73 insn per cycle + 3.202038670 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96405) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.298191e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.298726e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.298726e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.230507 sec - 4,164,918,618 cycles:u # 3.381 GHz - 9,647,913,152 instructions:u # 2.32 insn per cycle - 1.232250727 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.542258e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.542698e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.542698e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.497561 sec + 4,341,680,359 cycles # 2.893 GHz + 9,666,416,740 instructions # 2.23 insn per cycle + 1.501545706 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84384) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.852853e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.853549e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.853549e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.090366 sec - 3,688,799,345 cycles:u # 3.379 GHz - 8,598,576,162 instructions:u # 2.33 insn per cycle - 1.092175101 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.866674e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.867185e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.867185e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.370361 sec + 3,840,509,187 cycles # 2.796 GHz + 8,617,030,376 instructions # 2.24 insn per cycle + 1.374450501 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84025) (512y: 89) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.955570e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.957165e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.957165e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.761329 sec - 2,120,569,640 cycles:u # 2.780 GHz - 4,317,361,789 instructions:u # 2.04 insn per cycle - 0.763089467 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2293) (512y: 103) (512z:83066) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.733060e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.733603e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.733603e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.420204 sec + 2,707,945,792 cycles # 1.903 GHz + 4,335,943,514 instructions # 1.60 insn per cycle + 1.424239943 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2300) (512y: 103) (512z:83067) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index fbe366c789..94a23e2f12 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -36,31 +36,77 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:44:26 +DATE: 2023-10-25_19:06:41 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.066351e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.067339e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.067339e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.354612 sec + 8,162,423,266 cycles # 3.056 GHz + 18,099,441,211 instructions # 2.22 insn per cycle + 2.730850635 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.246162e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.277852e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.277852e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.971347 sec + 13,204,436,201 cycles # 3.078 GHz + 30,282,503,163 instructions # 2.29 insn per cycle + 4.348676377 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 9.872263e-03 +Avg ME (F77/CUDA) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.694870e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.695205e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.695205e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.449031 sec - 18,992,419,830 cycles:u # 3.485 GHz - 54,011,515,215 instructions:u # 2.84 insn per cycle - 5.450993128 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32344) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.244919e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.245139e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.245139e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.408828 sec + 19,448,715,885 cycles # 3.033 GHz + 54,050,853,106 instructions # 2.78 insn per cycle + 6.412664226 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32354) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -71,23 +117,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.878478e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.878594e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.878594e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.813007 sec - 9,802,151,523 cycles:u # 3.483 GHz - 27,057,251,738 instructions:u # 2.76 insn per cycle - 2.814919783 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.650316e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.650406e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.650406e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.204606 sec + 9,890,944,577 cycles # 3.084 GHz + 27,082,213,615 instructions # 2.74 insn per cycle + 3.208413447 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96405) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -98,23 +144,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.308480e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.309039e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.309039e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.227874 sec - 4,154,924,285 cycles:u # 3.380 GHz - 9,648,778,691 instructions:u # 2.32 insn per cycle - 1.229660091 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.546707e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.547140e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.547140e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.493585 sec + 4,325,320,238 cycles # 2.890 GHz + 9,667,464,688 instructions # 2.24 insn per cycle + 1.497469046 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84384) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -125,23 +171,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.858370e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.859116e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.859116e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.089216 sec - 3,684,912,316 cycles:u # 3.379 GHz - 8,599,444,306 instructions:u # 2.33 insn per cycle - 1.090969381 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.037834e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.038367e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.038367e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.313129 sec + 3,813,971,496 cycles # 2.897 GHz + 8,617,412,652 instructions # 2.26 insn per cycle + 1.316983127 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84025) (512y: 89) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -152,23 +198,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.016416e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.018101e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.018101e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.754833 sec - 2,102,419,627 cycles:u # 2.780 GHz - 4,318,249,384 instructions:u # 2.05 insn per cycle - 0.756579131 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2293) (512y: 103) (512z:83066) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.743342e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.743918e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.743918e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.415154 sec + 2,707,174,442 cycles # 1.909 GHz + 4,336,832,605 instructions # 1.60 insn per cycle + 1.419043941 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2300) (512y: 103) (512z:83067) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index bac19cdccb..e3241a7638 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:24:33 +DATE: 2023-10-25_18:45:54 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.058972e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.059386e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.059482e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.421465 sec + 8,436,981,615 cycles # 3.067 GHz + 19,028,712,837 instructions # 2.26 insn per cycle + 2.810095670 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.255956e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.257794e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.257987e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.982903 sec + 13,219,371,069 cycles # 3.063 GHz + 29,416,443,528 instructions # 2.23 insn per cycle + 4.375059399 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 9.872263e-03 +Avg ME (F77/CUDA) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.638383e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.638684e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.638684e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.481978 sec - 19,103,066,208 cycles:u # 3.485 GHz - 54,034,034,277 instructions:u # 2.83 insn per cycle - 5.483935387 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32250) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.015099e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.015315e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.015315e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.593815 sec + 19,095,708,850 cycles # 2.895 GHz + 54,047,292,212 instructions # 2.83 insn per cycle + 6.597605539 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:31965) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.863455e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.863561e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.863561e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.835216 sec - 9,881,395,758 cycles:u # 3.484 GHz - 27,051,463,465 instructions:u # 2.74 insn per cycle - 2.836984541 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96261) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.634946e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.635033e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.635033e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.235048 sec + 10,011,189,889 cycles # 3.092 GHz + 27,077,379,591 instructions # 2.70 insn per cycle + 3.239059369 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96257) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.293792e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.294411e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.294411e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.231602 sec - 4,168,406,135 cycles:u # 3.381 GHz - 9,659,246,967 instructions:u # 2.32 insn per cycle - 1.233398649 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.540865e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.541284e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.541284e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.495827 sec + 4,323,882,817 cycles # 2.884 GHz + 9,677,765,192 instructions # 2.24 insn per cycle + 1.499825664 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84456) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.889186e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.889950e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.889950e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.082064 sec - 3,655,678,046 cycles:u # 3.373 GHz - 8,608,539,465 instructions:u # 2.35 insn per cycle - 1.084116660 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.986806e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.987336e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.987336e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.330112 sec + 3,818,292,084 cycles # 2.864 GHz + 8,626,392,875 instructions # 2.26 insn per cycle + 1.334108022 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83903) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.057749e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.059454e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.059454e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.750584 sec - 2,091,362,984 cycles:u # 2.781 GHz - 4,326,512,544 instructions:u # 2.07 insn per cycle - 0.752511146 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2175) (512y: 185) (512z:83037) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.736329e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.736878e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.736878e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.419053 sec + 2,712,716,906 cycles # 1.907 GHz + 4,344,880,705 instructions # 1.60 insn per cycle + 1.423142145 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2177) (512y: 185) (512z:83030) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index b834029070..6327c32a36 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -36,56 +36,90 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:24:56 +DATE: 2023-10-25_18:46:57 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.757288e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.758127e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.758502e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.655983 sec + 5,859,016,098 cycles # 3.037 GHz + 12,599,305,189 instructions # 2.15 insn per cycle + 1.988634806 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.346728e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347386e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347472e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 +TOTAL : 1.915477 sec + 6,698,649,731 cycles # 3.061 GHz + 13,457,604,803 instructions # 2.01 insn per cycle + 2.247936467 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 9.849636e-03 +Avg ME (F77/CUDA) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.021950e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.021979e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.021979e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 5.168809 sec - 18,013,411,233 cycles:u # 3.484 GHz - 53,603,517,178 instructions:u # 2.98 insn per cycle - 5.170747581 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20320) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.909786e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.910066e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.910066e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 5.942364 sec + 18,295,731,836 cycles # 3.078 GHz + 53,640,525,145 instructions # 2.93 insn per cycle + 5.946250751 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20286) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087550399E-003 -Relative difference = 2.119779305548787e-08 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.210651e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.211125e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.211125e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 -TOTAL : 1.256048 sec - 4,376,251,084 cycles:u # 3.480 GHz - 13,744,562,962 instructions:u # 3.14 insn per cycle - 1.257791058 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.560034e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.560473e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560473e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.488735 sec + 4,616,421,294 cycles # 3.094 GHz + 13,762,957,080 instructions # 2.98 insn per cycle + 1.492690614 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96921) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.588687e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.590709e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.590709e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.616847 sec - 2,086,353,348 cycles:u # 3.374 GHz - 4,853,210,335 instructions:u # 2.33 insn per cycle - 0.618687287 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.154668e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.156604e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.156604e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.743016 sec + 2,158,936,332 cycles # 2.892 GHz + 4,868,873,872 instructions # 2.26 insn per cycle + 0.746953594 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84898) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.689277e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.691596e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.691596e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.547165 sec - 1,849,659,683 cycles:u # 3.372 GHz - 4,325,575,589 instructions:u # 2.34 insn per cycle - 0.548927061 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.993469e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.995570e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.995570e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.665634 sec + 1,930,674,595 cycles # 2.886 GHz + 4,341,032,805 instructions # 2.25 insn per cycle + 0.669411803 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84581) (512y: 44) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.415883e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.416425e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.416425e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 -TOTAL : 0.375089 sec - 1,044,605,334 cycles:u # 2.775 GHz - 2,175,642,242 instructions:u # 2.08 insn per cycle - 0.376788208 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2884) (512y: 48) (512z:83271) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.422100e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.424326e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.424326e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.716875 sec + 1,362,810,793 cycles # 1.892 GHz + 2,191,758,925 instructions # 1.61 insn per cycle + 0.720813478 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2896) (512y: 47) (512z:83271) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index c3fa78732f..3a8d1c9eac 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -36,58 +36,104 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:44:49 +DATE: 2023-10-25_19:07:44 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.793927e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.795607e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.795607e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 +TOTAL : 1.593125 sec + 5,711,497,040 cycles # 3.065 GHz + 11,071,875,711 instructions # 1.94 insn per cycle + 1.922969062 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.332193e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.344979e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.344979e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 +TOTAL : 1.859000 sec + 6,562,180,386 cycles # 3.077 GHz + 14,027,341,556 instructions # 2.14 insn per cycle + 2.188744808 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 9.849636e-03 +Avg ME (F77/CUDA) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.020893e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.020922e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.020922e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 5.175832 sec - 18,016,139,817 cycles:u # 3.481 GHz - 53,604,226,868 instructions:u # 2.98 insn per cycle - 5.177799324 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20320) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.934162e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.934430e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.934430e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 5.914443 sec + 18,289,747,509 cycles # 3.091 GHz + 53,640,880,499 instructions # 2.93 insn per cycle + 5.918348520 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20286) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087550399E-003 -Relative difference = 2.119779305548787e-08 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.213112e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.213591e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.213591e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 -TOTAL : 1.255475 sec - 4,373,170,498 cycles:u # 3.479 GHz - 13,745,433,050 instructions:u # 3.14 insn per cycle - 1.257250474 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.554596e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.555024e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.555024e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.490597 sec + 4,617,566,850 cycles # 3.091 GHz + 13,763,927,839 instructions # 2.98 insn per cycle + 1.494414225 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96921) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -98,23 +144,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.581573e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.583573e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.583573e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.617677 sec - 2,088,828,384 cycles:u # 3.374 GHz - 4,854,095,100 instructions:u # 2.32 insn per cycle - 0.619460453 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.178518e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.180233e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.180233e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.740375 sec + 2,151,024,422 cycles # 2.893 GHz + 4,869,911,860 instructions # 2.26 insn per cycle + 0.744208061 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84898) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -125,23 +171,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.694742e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.697307e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.697307e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.547014 sec - 1,849,515,073 cycles:u # 3.371 GHz - 4,326,457,350 instructions:u # 2.34 insn per cycle - 0.548974684 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.974786e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.976961e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.976961e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.667174 sec + 1,931,553,706 cycles # 2.882 GHz + 4,342,018,470 instructions # 2.25 insn per cycle + 0.670962833 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84581) (512y: 44) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -152,23 +198,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.405098e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.405660e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.405660e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 -TOTAL : 0.378139 sec - 1,052,603,493 cycles:u # 2.773 GHz - 2,176,541,915 instructions:u # 2.07 insn per cycle - 0.379883885 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2884) (512y: 48) (512z:83271) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.427390e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.429642e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.429642e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.716682 sec + 1,362,755,127 cycles # 1.894 GHz + 2,192,432,791 instructions # 1.61 insn per cycle + 0.720592858 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2896) (512y: 47) (512z:83271) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index bff929697f..ea39ad8994 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -36,56 +36,90 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:25:13 +DATE: 2023-10-25_18:47:44 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.770785e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.771818e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.772102e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.644101 sec + 5,902,851,310 cycles # 3.071 GHz + 12,347,610,066 instructions # 2.09 insn per cycle + 1.979279139 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.344670e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.345330e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.345408e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 +TOTAL : 1.896839 sec + 6,669,880,941 cycles # 3.074 GHz + 13,819,367,336 instructions # 2.07 insn per cycle + 2.228526429 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 9.849636e-03 +Avg ME (F77/CUDA) = 9.8712405367667715E-003 +Relative difference = 0.0021934350433631634 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.013491e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.013519e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.013519e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924324e-03 +- 4.918778e-03 ) GeV^-6 -TOTAL : 5.211690 sec - 18,167,100,484 cycles:u # 3.485 GHz - 53,621,489,297 instructions:u # 2.95 insn per cycle - 5.213585137 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20477) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.942946e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.943229e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.943229e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 5.908590 sec + 18,230,304,521 cycles # 3.084 GHz + 53,620,524,232 instructions # 2.94 insn per cycle + 5.912442239 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20241) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087571129E-003 -Relative difference = 2.119800355536229e-08 +Avg ME (F77/C++) = 9.8479612087572898E-003 +Relative difference = 2.1198021522715588e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.211809e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.212289e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.212289e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.924321e-03 +- 4.918774e-03 ) GeV^-6 -TOTAL : 1.255504 sec - 4,374,748,922 cycles:u # 3.481 GHz - 13,737,496,340 instructions:u # 3.14 insn per cycle - 1.257243369 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.576079e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.576533e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.576533e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.481262 sec + 4,588,697,172 cycles # 3.091 GHz + 13,755,977,699 instructions # 3.00 insn per cycle + 1.485242600 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96593) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 3.151856596628469e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.513547e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.515596e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.515596e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.622174 sec - 2,104,535,793 cycles:u # 3.375 GHz - 4,861,455,020 instructions:u # 2.31 insn per cycle - 0.623987486 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85271) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.020825e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.022661e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.022661e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.756593 sec + 2,190,032,975 cycles # 2.882 GHz + 4,877,215,136 instructions # 2.23 insn per cycle + 0.760480627 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85321) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.712446e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.714857e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.714857e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.946830e-03 +- 4.941261e-03 ) GeV^-6 -TOTAL : 0.545700 sec - 1,845,269,421 cycles:u # 3.373 GHz - 4,333,408,828 instructions:u # 2.35 insn per cycle - 0.547415858 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85056) (512y: 24) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.993268e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.995423e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.995423e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.665169 sec + 1,931,098,294 cycles # 2.889 GHz + 4,348,628,190 instructions # 2.25 insn per cycle + 0.669029492 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84987) (512y: 24) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.401340e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.401916e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.401916e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.946834e-03 +- 4.941266e-03 ) GeV^-6 -TOTAL : 0.378898 sec - 1,055,214,488 cycles:u # 2.774 GHz - 2,184,910,519 instructions:u # 2.07 insn per cycle - 0.380846680 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3500) (512y: 33) (512z:83441) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.452710e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.455265e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.455265e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.713644 sec + 1,367,244,738 cycles # 1.907 GHz + 2,200,694,530 instructions # 1.61 insn per cycle + 0.717609354 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3502) (512y: 32) (512z:83441) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 9b16beec79..7bedc1f54b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:25:30 +DATE: 2023-10-25_18:48:30 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.689047e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.689716e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.689840e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.165623 sec + 7,623,672,543 cycles # 3.068 GHz + 16,620,823,162 instructions # 2.18 insn per cycle + 2.541779519 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.116092e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.116360e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.116394e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.384767 sec + 11,295,486,944 cycles # 3.039 GHz + 26,143,309,789 instructions # 2.31 insn per cycle + 3.772489660 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 9.872263e-03 +Avg ME (F77/CUDA) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.641941e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.642182e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.642182e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 6.112149 sec - 21,300,997,014 cycles:u # 3.484 GHz - 54,249,790,190 instructions:u # 2.55 insn per cycle - 6.114042159 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:31979) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.338757e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.339014e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.339014e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.354208 sec + 19,488,655,111 cycles # 3.066 GHz + 54,285,293,279 instructions # 2.79 insn per cycle + 6.358206624 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:31983) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.952820e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.952935e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.952935e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.705908 sec - 9,430,700,860 cycles:u # 3.483 GHz - 26,089,170,604 instructions:u # 2.77 insn per cycle - 2.707618648 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.580188e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.580272e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.580272e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.350145 sec + 9,541,746,297 cycles # 2.846 GHz + 26,114,002,349 instructions # 2.74 insn per cycle + 3.354075604 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:95979) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.439277e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.439874e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.439874e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.191765 sec - 4,032,660,347 cycles:u # 3.380 GHz - 9,312,691,267 instructions:u # 2.31 insn per cycle - 1.193592027 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.673808e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.674247e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.674247e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.442586 sec + 4,186,759,479 cycles # 2.896 GHz + 9,337,503,071 instructions # 2.23 insn per cycle + 1.446618657 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84147) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.042412e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.043185e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.043185e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.049435 sec - 3,549,337,707 cycles:u # 3.378 GHz - 8,289,077,085 instructions:u # 2.34 insn per cycle - 1.051272776 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.207665e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.208316e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.208316e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.260207 sec + 3,641,116,614 cycles # 2.881 GHz + 8,312,794,650 instructions # 2.28 insn per cycle + 1.264172407 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83817) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.254639e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.256437e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.256437e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.730440 sec - 2,032,807,695 cycles:u # 2.777 GHz - 4,214,224,961 instructions:u # 2.07 insn per cycle - 0.732426822 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2285) (512y: 93) (512z:82779) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.791929e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.792527e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.792527e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.397651 sec + 2,653,173,022 cycles # 1.895 GHz + 4,233,021,275 instructions # 1.60 insn per cycle + 1.401768259 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2292) (512y: 93) (512z:82780) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index ef9c5416a6..0525b2e4c1 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:25:54 +DATE: 2023-10-25_18:49:30 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.679608e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.680082e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.680202e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.167186 sec + 7,636,339,894 cycles # 3.072 GHz + 15,813,775,134 instructions # 2.07 insn per cycle + 2.542843200 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.107918e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.108184e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108215e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.395689 sec + 11,403,686,722 cycles # 3.066 GHz + 26,502,494,448 instructions # 2.32 insn per cycle + 3.775763409 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 9.872263e-03 +Avg ME (F77/CUDA) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.597352e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.597647e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.597647e+01 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 5.503438 sec - 19,183,730,675 cycles:u # 3.485 GHz - 54,254,374,083 instructions:u # 2.83 insn per cycle - 5.505347088 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32422) (avx2: 0) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.358962e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.359192e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.359192e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.333189 sec + 19,421,629,325 cycles # 3.065 GHz + 54,272,919,506 instructions # 2.79 insn per cycle + 6.337193550 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32142) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.964366e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.964483e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.964483e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.689963 sec - 9,374,659,056 cycles:u # 3.483 GHz - 26,004,286,936 instructions:u # 2.77 insn per cycle - 2.691715090 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.573591e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.573676e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.573676e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.359769 sec + 9,491,435,902 cycles # 2.826 GHz + 26,031,969,325 instructions # 2.74 insn per cycle + 3.363778642 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:95858) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.493450e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.494038e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.494038e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.177107 sec - 3,983,648,475 cycles:u # 3.380 GHz - 9,292,970,405 instructions:u # 2.33 insn per cycle - 1.178850204 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.730901e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.731358e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.731358e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.420536 sec + 4,117,938,873 cycles # 2.893 GHz + 9,317,350,688 instructions # 2.26 insn per cycle + 1.424344563 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83787) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.091865e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.092631e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.092631e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.038968 sec - 3,515,865,981 cycles:u # 3.380 GHz - 8,285,400,038 instructions:u # 2.36 insn per cycle - 1.040689874 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.227741e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.228352e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.228352e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.254191 sec + 3,641,355,182 cycles # 2.896 GHz + 8,309,383,106 instructions # 2.28 insn per cycle + 1.258235043 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83306) (512y: 229) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check.exe -p 1 256 2 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.348142e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.349885e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.349885e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.720620 sec - 2,007,777,807 cycles:u # 2.781 GHz - 4,213,120,277 instructions:u # 2.10 insn per cycle - 0.722319298 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1729) (512y: 175) (512z:82792) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.820448e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.821095e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.821095e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.387479 sec + 2,638,179,282 cycles # 1.897 GHz + 4,231,949,116 instructions # 1.60 insn per cycle + 1.391365284 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1731) (512y: 175) (512z:82815) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index c78ea1b1f4..db66144b99 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:23:40 +DATE: 2023-10-25_18:43:12 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.996510e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.551380e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.892828e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.438393 sec + 1,953,535,468 cycles # 3.006 GHz + 2,779,051,405 instructions # 1.42 insn per cycle + 0.706976484 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.793351e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.668509e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.060188e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 +TOTAL : 0.516556 sec + 2,254,798,816 cycles # 3.015 GHz + 3,256,611,216 instructions # 1.44 insn per cycle + 0.804635125 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 5.622436e-01 +Avg ME (F77/CUDA) = 0.56224343220024076 +Relative difference = 2.984467216677476e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.437753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.477972e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.477972e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 1.165368 sec - 4,054,699,564 cycles:u # 3.474 GHz - 12,823,868,306 instructions:u # 3.16 insn per cycle - 1.167587268 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.142180e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166897e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.166897e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 1.455956 sec + 4,526,705,701 cycles # 3.102 GHz + 12,813,772,224 instructions # 2.83 insn per cycle + 1.459879572 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.604820e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.735660e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.735660e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.653112 sec - 2,269,463,429 cycles:u # 3.467 GHz - 7,203,431,560 instructions:u # 3.17 insn per cycle - 0.655053107 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.059229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.139815e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.139815e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.816563 sec + 2,541,682,069 cycles # 3.100 GHz + 7,194,219,151 instructions # 2.83 insn per cycle + 0.820635450 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3150) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.864183e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.322274e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.322274e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.361048 sec - 1,218,175,859 cycles:u # 3.358 GHz - 2,970,266,811 instructions:u # 2.44 insn per cycle - 0.363052441 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.555525e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.809689e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.809689e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.482439 sec + 1,365,016,241 cycles # 2.809 GHz + 2,962,982,028 instructions # 2.17 insn per cycle + 0.486447941 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.217957e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.759741e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.759741e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.338661 sec - 1,142,726,892 cycles:u # 3.358 GHz - 2,816,190,155 instructions:u # 2.46 insn per cycle - 0.340687650 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.042576e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.357609e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.357609e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.426283 sec + 1,250,204,594 cycles # 2.908 GHz + 2,816,555,243 instructions # 2.25 insn per cycle + 0.430386207 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.137635e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.646009e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.646009e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.345214 sec - 986,195,561 cycles:u # 2.843 GHz - 1,801,659,388 instructions:u # 1.83 insn per cycle - 0.347223761 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1375) (512y: 106) (512z: 2270) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.853383e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.013794e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.013794e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.597884 sec + 1,199,308,383 cycles # 1.995 GHz + 1,804,468,596 instructions # 1.50 insn per cycle + 0.601975092 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 056b4b9596..55664f3ef7 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -36,31 +36,77 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:43:47 +DATE: 2023-10-25_19:05:04 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.715956e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.423473e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.423473e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.466815 sec + 2,046,266,197 cycles # 2.991 GHz + 3,039,133,939 instructions # 1.49 insn per cycle + 0.742578114 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.437711e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.623194e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.623194e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 +TOTAL : 0.730298 sec + 2,928,685,026 cycles # 3.010 GHz + 4,469,035,726 instructions # 1.53 insn per cycle + 1.030829307 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 5.622436e-01 +Avg ME (F77/CUDA) = 0.56224343220024076 +Relative difference = 2.984467216677476e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.440207e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.480666e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.480666e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 1.166780 sec - 4,056,392,494 cycles:u # 3.471 GHz - 12,832,877,645 instructions:u # 3.16 insn per cycle - 1.169040681 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.130473e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155364e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155364e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 1.477047 sec + 4,559,807,823 cycles # 3.080 GHz + 12,820,937,643 instructions # 2.81 insn per cycle + 1.481171657 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -71,23 +117,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.598283e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.727901e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.727901e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.659249 sec - 2,286,538,153 cycles:u # 3.459 GHz - 7,247,797,470 instructions:u # 3.17 insn per cycle - 0.661412134 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.037342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.116140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.116140e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.831756 sec + 2,576,245,613 cycles # 3.084 GHz + 7,244,217,190 instructions # 2.81 insn per cycle + 0.836091661 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3150) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -98,23 +144,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.828667e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.280660e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.280660e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.368356 sec - 1,239,463,042 cycles:u # 3.348 GHz - 3,015,665,250 instructions:u # 2.43 insn per cycle - 0.370564776 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.548861e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.803617e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.803617e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.489750 sec + 1,401,953,569 cycles # 2.841 GHz + 3,012,124,484 instructions # 2.15 insn per cycle + 0.494067218 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -125,23 +171,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.179908e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.703056e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.703056e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.345463 sec - 1,160,847,625 cycles:u # 3.342 GHz - 2,861,588,426 instructions:u # 2.47 insn per cycle - 0.347703208 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.888848e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.196973e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.196973e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.451008 sec + 1,291,042,478 cycles # 2.840 GHz + 2,867,771,305 instructions # 2.22 insn per cycle + 0.455219321 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 @@ -152,23 +198,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.103280e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.606771e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.606771e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.352148 sec - 1,005,187,952 cycles:u # 2.839 GHz - 1,835,830,680 instructions:u # 1.83 insn per cycle - 0.354435218 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1375) (512y: 106) (512z: 2270) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.837117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.990188e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.990188e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.606364 sec + 1,227,732,873 cycles # 2.013 GHz + 1,842,233,991 instructions # 1.50 insn per cycle + 0.610509212 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 2f0acc46f1..b5138c5dae 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:23:45 +DATE: 2023-10-25_18:43:29 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.920168e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.374674e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.702657e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.437720 sec + 1,950,126,017 cycles # 3.006 GHz + 2,773,154,214 instructions # 1.42 insn per cycle + 0.705954337 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.766528e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.558717e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.939267e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 +TOTAL : 0.513041 sec + 2,254,005,524 cycles # 3.033 GHz + 3,263,301,476 instructions # 1.45 insn per cycle + 0.800914962 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 5.622436e-01 +Avg ME (F77/CUDA) = 0.56224343220024076 +Relative difference = 2.984467216677476e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.459023e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.500620e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.500620e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 1.148633 sec - 3,996,615,161 cycles:u # 3.474 GHz - 12,702,510,463 instructions:u # 3.18 insn per cycle - 1.150829827 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.153195e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.178574e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.178574e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 1.442048 sec + 4,474,102,808 cycles # 3.096 GHz + 12,693,000,655 instructions # 2.84 insn per cycle + 1.446019473 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 687) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.646082e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.781072e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.781072e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.643530 sec - 2,235,534,308 cycles:u # 3.465 GHz - 7,067,656,593 instructions:u # 3.16 insn per cycle - 0.645546900 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.053414e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.135311e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.135311e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.819184 sec + 2,493,704,947 cycles # 3.032 GHz + 7,048,175,291 instructions # 2.83 insn per cycle + 0.823254604 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2966) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.9844565299804477e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.483018e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.870529e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.870529e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.389493 sec - 1,314,840,088 cycles:u # 3.361 GHz - 3,202,694,345 instructions:u # 2.44 insn per cycle - 0.391531874 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.165376e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.368230e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.368230e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.539826 sec + 1,467,619,890 cycles # 2.700 GHz + 3,195,865,906 instructions # 2.18 insn per cycle + 0.544230401 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3078) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.661378e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.079783e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.079783e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.376002 sec - 1,269,191,597 cycles:u # 3.360 GHz - 3,098,873,295 instructions:u # 2.44 insn per cycle - 0.378031092 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.602530e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.851517e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.851517e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.475458 sec + 1,396,242,750 cycles # 2.915 GHz + 3,099,467,840 instructions # 2.22 insn per cycle + 0.479442989 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2785) (512y: 257) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.9844659193456305e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.011584e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.504113e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.504113e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.352637 sec - 1,005,434,854 cycles:u # 2.839 GHz - 2,068,533,599 instructions:u # 2.06 insn per cycle - 0.354543034 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.759805e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.905242e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.905242e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.616827 sec + 1,241,766,049 cycles # 2.002 GHz + 2,069,716,754 instructions # 1.67 insn per cycle + 0.620974760 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1197) (512y: 194) (512z: 2426) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 0038e621e4..8a44a3dd20 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:23:51 +DATE: 2023-10-25_18:43:45 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.953088e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.256905e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.378672e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 +TOTAL : 0.434393 sec + 1,939,780,167 cycles # 3.008 GHz + 2,743,949,203 instructions # 1.41 insn per cycle + 0.703990460 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 168 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.221116e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.860706e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.982864e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 +TOTAL : 0.466982 sec + 2,071,697,565 cycles # 3.013 GHz + 2,925,920,597 instructions # 1.41 insn per cycle + 0.745407679 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 5.619520e-01 +Avg ME (F77/CUDA) = 0.56225629328206139 +Relative difference = 0.0005414933696496947 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.514673e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.552023e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.552023e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.678308e+04 +- 6.638991e+04 ) GeV^-2 -TOTAL : 1.106158 sec - 3,843,430,074 cycles:u # 3.469 GHz - 12,745,964,839 instructions:u # 3.32 insn per cycle - 1.108356847 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.175069e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.202207e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.202207e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 +TOTAL : 1.414014 sec + 4,387,994,343 cycles # 3.096 GHz + 12,757,087,191 instructions # 2.91 insn per cycle + 1.417904644 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 693) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 1.714833339642312e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.067589e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.379074e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.379074e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.678307e+04 +- 6.638990e+04 ) GeV^-2 -TOTAL : 0.425688 sec - 1,477,070,298 cycles:u # 3.457 GHz - 4,221,584,785 instructions:u # 2.86 insn per cycle - 0.427604681 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.258633e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.477473e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.477473e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 +TOTAL : 0.522116 sec + 1,618,126,198 cycles # 3.079 GHz + 4,232,277,564 instructions # 2.62 insn per cycle + 0.526044496 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3709) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 4.180373005172264e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.565999e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.964695e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.964695e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 -TOTAL : 0.215287 sec - 725,668,773 cycles:u # 3.348 GHz - 1,780,142,837 instructions:u # 2.45 insn per cycle - 0.217151599 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.608070e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.524156e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.524156e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 +TOTAL : 0.267436 sec + 793,046,821 cycles # 2.927 GHz + 1,796,478,483 instructions # 2.27 insn per cycle + 0.271487422 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.322151e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.079867e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.079867e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 -TOTAL : 0.198919 sec - 669,793,070 cycles:u # 3.341 GHz - 1,693,786,020 instructions:u # 2.53 insn per cycle - 0.200815058 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.057162e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.116228e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.116228e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 +TOTAL : 0.251834 sec + 743,002,845 cycles # 2.913 GHz + 1,717,820,666 instructions # 2.31 insn per cycle + 0.255754900 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.742845e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.143134e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.143134e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.763172e+04 +- 6.724065e+04 ) GeV^-2 -TOTAL : 0.192952 sec - 558,318,676 cycles:u # 2.871 GHz - 1,180,879,475 instructions:u # 2.12 insn per cycle - 0.194841259 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 38) (512z: 2493) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.409485e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.023233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.023233e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 +TOTAL : 0.324434 sec + 678,869,673 cycles # 2.072 GHz + 1,206,887,131 instructions # 1.78 insn per cycle + 0.328433910 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 801d12d22e..35147cd718 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -36,31 +36,77 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:43:52 +DATE: 2023-10-25_19:05:21 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.575149e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.561751e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.561751e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.419752e+01 +- 1.682900e+01 ) GeV^-2 +TOTAL : 0.447090 sec + 2,006,667,406 cycles # 3.008 GHz + 2,921,553,347 instructions # 1.46 insn per cycle + 0.724337546 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 168 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.387556e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.246561e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.246561e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.349385e+02 +- 2.541442e+02 ) GeV^-2 +TOTAL : 0.606579 sec + 2,519,662,089 cycles # 3.025 GHz + 3,861,788,300 instructions # 1.53 insn per cycle + 0.890689404 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 5.619520e-01 +Avg ME (F77/CUDA) = 0.56225629328206139 +Relative difference = 0.0005414933696496947 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.514995e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.552275e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.552275e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.678308e+04 +- 6.638991e+04 ) GeV^-2 -TOTAL : 1.109293 sec - 3,853,846,022 cycles:u # 3.472 GHz - 12,751,320,522 instructions:u # 3.31 insn per cycle - 1.111546754 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.174105e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.201186e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.201186e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 +TOTAL : 1.417963 sec + 4,402,481,360 cycles # 3.098 GHz + 12,761,539,634 instructions # 2.90 insn per cycle + 1.421844145 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 693) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -71,23 +117,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.079923e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.363231e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.363231e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.678307e+04 +- 6.638990e+04 ) GeV^-2 -TOTAL : 0.427203 sec - 1,481,245,124 cycles:u # 3.454 GHz - 4,266,979,277 instructions:u # 2.88 insn per cycle - 0.429156945 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.229854e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.445494e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.445494e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 +TOTAL : 0.530536 sec + 1,636,187,995 cycles # 3.064 GHz + 4,280,682,276 instructions # 2.62 insn per cycle + 0.534503101 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3709) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -98,23 +144,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 8.556684e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.905955e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.905955e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 -TOTAL : 0.218127 sec - 733,830,052 cycles:u # 3.339 GHz - 1,814,316,830 instructions:u # 2.47 insn per cycle - 0.220133581 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.470080e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.344369e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.344369e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 +TOTAL : 0.277101 sec + 811,784,719 cycles # 2.894 GHz + 1,833,505,900 instructions # 2.26 insn per cycle + 0.281119088 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -125,23 +171,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.250743e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.069445e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.069445e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 -TOTAL : 0.202951 sec - 682,021,599 cycles:u # 3.334 GHz - 1,727,956,692 instructions:u # 2.53 insn per cycle - 0.204910557 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.578285e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.554614e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.554614e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 +TOTAL : 0.273667 sec + 760,813,920 cycles # 2.744 GHz + 1,755,015,790 instructions # 2.31 insn per cycle + 0.277772969 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 @@ -152,23 +198,23 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 9.660570e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.127875e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.127875e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.763172e+04 +- 6.724065e+04 ) GeV^-2 -TOTAL : 0.197120 sec - 571,160,372 cycles:u # 2.873 GHz - 1,219,652,248 instructions:u # 2.14 insn per cycle - 0.199110775 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 38) (512z: 2493) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.356513e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.945123e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.945123e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 +TOTAL : 0.331761 sec + 698,245,569 cycles # 2.083 GHz + 1,248,346,490 instructions # 1.79 insn per cycle + 0.335801876 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index a9290b7881..da1ead0f77 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:23:55 +DATE: 2023-10-25_18:44:02 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.781171e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.253056e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.376604e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 +TOTAL : 0.437609 sec + 1,935,685,013 cycles # 2.990 GHz + 2,706,812,030 instructions # 1.40 insn per cycle + 0.705999265 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 162 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.205131e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.854735e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.976134e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 +TOTAL : 0.466415 sec + 2,068,328,114 cycles # 3.010 GHz + 2,987,091,963 instructions # 1.44 insn per cycle + 0.744952769 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 5.619520e-01 +Avg ME (F77/CUDA) = 0.56225629328206139 +Relative difference = 0.0005414933696496947 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.531080e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.569239e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.569239e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.678308e+04 +- 6.638991e+04 ) GeV^-2 -TOTAL : 1.094124 sec - 3,808,409,057 cycles:u # 3.475 GHz - 12,645,578,171 instructions:u # 3.32 insn per cycle - 1.096210625 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.180324e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.207350e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.207350e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 +TOTAL : 1.407934 sec + 4,364,873,703 cycles # 3.093 GHz + 12,656,518,331 instructions # 2.90 insn per cycle + 1.411910028 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 644) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 1.714833339642312e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.560755e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.917509e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.917509e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.678307e+04 +- 6.638990e+04 ) GeV^-2 -TOTAL : 0.381890 sec - 1,325,439,293 cycles:u # 3.457 GHz - 4,110,059,235 instructions:u # 3.10 insn per cycle - 0.383809713 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.586065e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.857911e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.857911e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 +TOTAL : 0.475849 sec + 1,476,787,317 cycles # 3.082 GHz + 4,120,727,484 instructions # 2.79 insn per cycle + 0.479813364 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3414) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 4.180373005172264e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 6.684337e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.501881e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.501881e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 -TOTAL : 0.269044 sec - 908,090,394 cycles:u # 3.356 GHz - 2,108,200,666 instructions:u # 2.32 insn per cycle - 0.270961294 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.086145e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.608974e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.608974e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 +TOTAL : 0.341638 sec + 1,007,266,248 cycles # 2.920 GHz + 2,124,817,247 instructions # 2.11 insn per cycle + 0.345623716 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4206) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.055180e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.882580e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.882580e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763173e+04 +- 6.724066e+04 ) GeV^-2 -TOTAL : 0.255242 sec - 861,120,176 cycles:u # 3.354 GHz - 2,019,447,283 instructions:u # 2.35 insn per cycle - 0.257129923 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.281396e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.848165e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.848165e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 +TOTAL : 0.329501 sec + 970,497,596 cycles # 2.916 GHz + 2,043,945,912 instructions # 2.11 insn per cycle + 0.333450865 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4013) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 6.001494295464523e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 7.012233e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.853713e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.853713e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.763172e+04 +- 6.724065e+04 ) GeV^-2 -TOTAL : 0.258506 sec - 740,460,053 cycles:u # 2.847 GHz - 1,548,901,955 instructions:u # 2.09 insn per cycle - 0.260458676 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2526) (512y: 22) (512z: 2998) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.073132e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.415220e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.415220e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 +TOTAL : 0.424070 sec + 856,736,633 cycles # 2.004 GHz + 1,573,705,553 instructions # 1.84 insn per cycle + 0.428192842 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2446) (512y: 16) (512z: 2998) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 7270b50cac..a1190251f1 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:24:00 +DATE: 2023-10-25_18:44:18 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.018464e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.595321e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.946310e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.439342 sec + 1,951,736,780 cycles # 3.004 GHz + 2,769,340,845 instructions # 1.42 insn per cycle + 0.707751210 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.803845e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.713692e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.109529e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 +TOTAL : 0.513063 sec + 2,243,341,028 cycles # 3.023 GHz + 3,247,351,075 instructions # 1.45 insn per cycle + 0.800783669 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 5.622436e-01 +Avg ME (F77/CUDA) = 0.56224344354681244 +Relative difference = 2.782658397826986e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.426490e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.466121e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.466121e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 1.174486 sec - 4,086,390,900 cycles:u # 3.474 GHz - 12,795,023,810 instructions:u # 3.13 insn per cycle - 1.176666020 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.141045e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.165652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.165652e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 1.457233 sec + 4,537,826,727 cycles # 3.107 GHz + 12,784,913,374 instructions # 2.82 insn per cycle + 1.461163978 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.610669e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.741846e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.741846e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.651639 sec - 2,264,533,183 cycles:u # 3.467 GHz - 7,125,840,013 instructions:u # 3.15 insn per cycle - 0.653615825 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.061046e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.141573e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.141573e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.815514 sec + 2,537,540,226 cycles # 3.099 GHz + 7,116,439,666 instructions # 2.80 insn per cycle + 0.819589417 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3215) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.967395e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.448723e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.448723e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.354118 sec - 1,194,563,246 cycles:u # 3.358 GHz - 2,944,091,699 instructions:u # 2.46 insn per cycle - 0.356096854 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3175) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.732172e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.005113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.005113e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.460418 sec + 1,348,361,279 cycles # 2.906 GHz + 2,936,931,800 instructions # 2.18 insn per cycle + 0.464548729 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3174) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 5.337763e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.892556e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.892556e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.331115 sec - 1,117,959,861 cycles:u # 3.360 GHz - 2,790,321,482 instructions:u # 2.50 insn per cycle - 0.333046991 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.149560e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.481402e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.481402e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.416544 sec + 1,218,162,673 cycles # 2.902 GHz + 2,791,024,677 instructions # 2.29 insn per cycle + 0.420753319 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2938) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.915105e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.382882e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.382882e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.359750 sec - 1,026,522,523 cycles:u # 2.841 GHz - 1,828,751,628 instructions:u # 1.78 insn per cycle - 0.361734455 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1723) (512y: 114) (512z: 2312) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.487755e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.613101e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.613101e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.683117 sec + 1,240,069,199 cycles # 1.806 GHz + 1,831,774,203 instructions # 1.48 insn per cycle + 0.687252060 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1728) (512y: 114) (512z: 2312) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index f252d093ae..2a285d3003 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -36,30 +36,64 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:24:05 +DATE: 2023-10-25_18:44:34 -On olgpu-03.cern.ch [CPU: Intel(R) Xeon(R) Platinum 8362 CPU] [GPU: ]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.964309e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.404642e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.725368e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.438377 sec + 1,959,656,771 cycles # 3.015 GHz + 2,794,102,152 instructions # 1.43 insn per cycle + 0.707326409 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.773177e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.592536e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.977031e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 +TOTAL : 0.515907 sec + 2,236,298,354 cycles # 2.994 GHz + 3,229,465,087 instructions # 1.44 insn per cycle + 0.804040631 seconds time elapsed +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 +Avg ME (C++/CUDA) = 5.622436e-01 +Avg ME (F77/CUDA) = 0.56224344354681244 +Relative difference = 2.782658397826986e-07 +OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 1.452248e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.493296e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.493296e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 1.153491 sec - 4,014,500,686 cycles:u # 3.475 GHz - 12,678,908,030 instructions:u # 3.16 insn per cycle - 1.155611467 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.106563e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.131055e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.131055e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 1.502633 sec + 4,502,084,245 cycles # 2.989 GHz + 12,668,944,796 instructions # 2.81 insn per cycle + 1.506842459 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 659) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -69,23 +103,23 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 2.643079e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.777685e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.777685e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.644384 sec - 2,237,562,646 cycles:u # 3.463 GHz - 6,915,339,109 instructions:u # 3.09 insn per cycle - 0.646514361 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.101140e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.184323e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.184323e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.800328 sec + 2,487,833,256 cycles # 3.095 GHz + 6,905,789,276 instructions # 2.78 insn per cycle + 0.804489667 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3036) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -95,23 +129,23 @@ Relative difference = 2.608483884671339e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.501874e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.892092e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.892092e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.387750 sec - 1,309,458,986 cycles:u # 3.363 GHz - 3,175,203,936 instructions:u # 2.42 insn per cycle - 0.389703740 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3285) (512y: 0) (512z: 0) +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.392821e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.617920e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617920e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.504314 sec + 1,480,047,698 cycles # 2.915 GHz + 3,168,067,665 instructions # 2.14 insn per cycle + 0.508419797 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3284) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -121,23 +155,23 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.706458e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.134392e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.134392e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.372552 sec - 1,257,581,323 cycles:u # 3.361 GHz - 3,039,574,412 instructions:u # 2.42 insn per cycle - 0.374531872 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.646089e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.903192e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.903192e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.470441 sec + 1,380,675,105 cycles # 2.913 GHz + 3,040,126,384 instructions # 2.20 insn per cycle + 0.474602540 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2936) (512y: 265) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 @@ -147,23 +181,23 @@ Relative difference = 2.777561258016791e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.2.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 64 -EvtsPerSec[Rmb+ME] (23) = ( 4.923796e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.399988e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.399988e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.589975e+04 +- 6.551194e+04 ) GeV^-2 -TOTAL : 0.358523 sec - 1,021,718,285 cycles:u # 2.838 GHz - 2,002,976,357 instructions:u # 1.96 insn per cycle - 0.360403909 seconds time elapsed +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.746832e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.887698e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.887698e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.620483 sec + 1,249,575,366 cycles # 2.003 GHz + 2,003,971,184 instructions # 1.60 insn per cycle + 0.624635502 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1520) (512y: 202) (512z: 2499) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 3 tests. +[ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index 897feb1c09..bd656c5b93 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -565,7 +565,7 @@ function runNcuReq() { set +x } -#if nvidia-smi -L > /dev/null 2>&1; then gpuTxt="$(nvidia-smi -L | wc -l)x $(nvidia-smi -L | awk '{print $3,$4}' | sort -u)"; else gpuTxt=none; fi +if nvidia-smi -L > /dev/null 2>&1; then gpuTxt="$(nvidia-smi -L | wc -l)x $(nvidia-smi -L | awk '{print $3,$4}' | sort -u)"; else gpuTxt=none; fi if [ "${unames}" == "Darwin" ]; then cpuTxt=$(sysctl -h machdep.cpu.brand_string) cpuTxt=${cpuTxt/machdep.cpu.brand_string: } From b20257ad1c0942d27463a89f01a61e4fbc7cdea7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 11:56:34 +0200 Subject: [PATCH 006/119] [oct23av] regenerate 8 processes mad (move from itscrd80 to itscrd90) No real code differences - only a few irrelevant changes due to the new code generating environment on itscrd90 Using codebases as of commit bd255c01fb1cf5377de344c42089765756fd75e1 (Wed Aug 16 15:05:27 2023 +0200) --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 72 +++---- .../ee_mumu.mad/Cards/me5_configuration.txt | 4 +- .../ee_mumu.mad/Source/DHELAS/aloha_file.inc | 2 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42833 -> 42831 bytes .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 74 +++---- .../gg_tt.mad/Cards/me5_configuration.txt | 4 +- .../gg_tt.mad/Source/DHELAS/aloha_file.inc | 2 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42833 -> 42831 bytes .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 84 ++++---- .../gg_tt01g.mad/Cards/me5_configuration.txt | 4 +- .../gg_tt01g.mad/Source/DHELAS/aloha_file.inc | 2 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42833 -> 42831 bytes .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 74 +++---- .../gg_ttg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42833 -> 42831 bytes .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 76 ++++---- .../gg_ttgg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttgg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42833 -> 42831 bytes .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 78 ++++---- .../gg_ttggg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttggg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42833 -> 42831 bytes .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 82 ++++---- .../gq_ttq.mad/Cards/me5_configuration.txt | 4 +- .../gq_ttq.mad/Source/DHELAS/aloha_file.inc | 2 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42833 -> 42831 bytes .../CODEGEN_mad_pp_tt012j_log.txt | 182 +++++++++--------- .../pp_tt012j.mad/Cards/me5_configuration.txt | 4 +- .../Source/DHELAS/aloha_file.inc | 2 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42833 -> 42831 bytes 32 files changed, 385 insertions(+), 385 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 2d9aaf2a44..f44e6a17e5 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004520893096923828  +DEBUG: model prefixing takes 0.005282878875732422  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -165,10 +165,10 @@ Load PLUGIN.CUDACPP_OUTPUT INFO: initialize a new directory: CODEGEN_mad_ee_mumu INFO: remove old information in CODEGEN_mad_ee_mumu DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 @@ -176,7 +176,7 @@ INFO: Creating files in directory P1_epem_mupmum DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -201,7 +201,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -210,19 +210,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.094 s +Wrote files for 8 helas calls in 0.097 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.174 s +ALOHA: aloha creates 3 routines in 0.203 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.217 s +ALOHA: aloha creates 7 routines in 0.251 s FFV1 FFV1 FFV2 @@ -231,20 +231,20 @@ ALOHA: aloha creates 7 routines in 0.217 s FFV4 FFV2_4 FFV2_4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages @@ -269,12 +269,12 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP quit @@ -300,17 +300,17 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile @@ -318,7 +318,7 @@ patching file Source/make_opts patching file bin/internal/banner.py patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f @@ -328,12 +328,12 @@ Hunk #4 succeeded at 267 (offset 18 lines). Hunk #5 succeeded at 312 (offset 18 lines). Hunk #6 succeeded at 410 (offset 14 lines). Hunk #7 succeeded at 478 (offset 8 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README Run "open index.html" to see more information about this process. quit -real 0m2.433s -user 0m2.060s -sys 0m0.335s +real 0m2.536s +user 0m2.251s +sys 0m0.265s diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt index 5ca005676e..cdeedc7863 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc index 4f385d6435..738db319fd 100644 --- a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV2_3.o FFV2_0.o FFV4_0.o FFV4_3.o FFV1_0.o FFV1P0_3.o +ALOHARoutine = FFV1_0.o FFV4_3.o FFV1P0_3.o FFV2_0.o FFV4_0.o FFV2_3.o diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model.pkl index dc38da0bfa76ea4206a3c5b2d34b98c606f7d044..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index c777d7154a..96c70b75b6 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004714488983154297  +DEBUG: model prefixing takes 0.005307674407958984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.007 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -166,10 +166,10 @@ Load PLUGIN.CUDACPP_OUTPUT INFO: initialize a new directory: CODEGEN_mad_gg_tt INFO: remove old information in CODEGEN_mad_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -206,7 +206,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -214,22 +214,22 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.104 s +Wrote files for 10 helas calls in 0.108 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.126 s +ALOHA: aloha creates 2 routines in 0.142 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.112 s +ALOHA: aloha creates 4 routines in 0.129 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -238,13 +238,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages @@ -269,12 +269,12 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP quit @@ -300,17 +300,17 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile @@ -318,16 +318,16 @@ patching file Source/make_opts patching file bin/internal/banner.py patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README Run "open index.html" to see more information about this process. quit -real 0m2.341s -user 0m1.909s -sys 0m0.352s +real 0m2.316s +user 0m2.039s +sys 0m0.271s diff --git a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt index 5ca005676e..cdeedc7863 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc index 59e590217d..5597c614b0 100644 --- a/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o FFV1_2.o VVV1P0_1.o FFV1_0.o +ALOHARoutine = FFV1_1.o FFV1_0.o FFV1_2.o VVV1P0_1.o diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model.pkl index dc38da0bfa76ea4206a3c5b2d34b98c606f7d044..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 666f2f1d0b..ed715d1fb9 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0046291351318359375  +DEBUG: model prefixing takes 0.005259990692138672  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,14 +155,14 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.007 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.018 s +1 processes with 16 diagrams generated in 0.020 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -173,10 +173,10 @@ Load PLUGIN.CUDACPP_OUTPUT INFO: initialize a new directory: CODEGEN_mad_gg_tt01g INFO: remove old information in CODEGEN_mad_gg_tt01g DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @2 INFO: Processing color information for process: g g > t t~ g @2 @@ -186,7 +186,7 @@ INFO: Creating files in directory P2_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -217,7 +217,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -228,7 +228,7 @@ INFO: Creating files in directory P1_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -257,22 +257,22 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.039 s -Wrote files for 46 helas calls in 0.247 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s +Wrote files for 46 helas calls in 0.264 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.276 s +ALOHA: aloha creates 5 routines in 0.319 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -280,7 +280,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.263 s +ALOHA: aloha creates 10 routines in 0.307 s VVV1 VVV1 FFV1 @@ -290,8 +290,8 @@ ALOHA: aloha creates 10 routines in 0.263 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -300,13 +300,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages @@ -331,12 +331,12 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP quit @@ -362,17 +362,17 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile @@ -380,11 +380,11 @@ patching file Source/make_opts patching file bin/internal/banner.py patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -394,12 +394,12 @@ Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). Hunk #6 succeeded at 434 (offset 38 lines). Hunk #7 succeeded at 588 (offset 118 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README Run "open index.html" to see more information about this process. quit -real 0m2.847s -user 0m2.424s -sys 0m0.365s +real 0m2.924s +user 0m2.596s +sys 0m0.316s diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt index 5ca005676e..cdeedc7863 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc index 4f2ef3d0d8..50c12b0804 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = VVVV4P0_1.o VVVV3P0_1.o VVVV1P0_1.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o FFV1_0.o FFV1P0_3.o +ALOHARoutine = FFV1_1.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model.pkl index dc38da0bfa76ea4206a3c5b2d34b98c606f7d044..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index eea422eba1..21203e3b73 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004817008972167969  +DEBUG: model prefixing takes 0.005400657653808594  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -166,10 +166,10 @@ Load PLUGIN.CUDACPP_OUTPUT INFO: initialize a new directory: CODEGEN_mad_gg_ttg INFO: remove old information in CODEGEN_mad_gg_ttg DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -208,22 +208,22 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.035 s -Wrote files for 36 helas calls in 0.153 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Wrote files for 36 helas calls in 0.162 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.277 s +ALOHA: aloha creates 5 routines in 0.323 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -231,7 +231,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.263 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -241,8 +241,8 @@ ALOHA: aloha creates 10 routines in 0.263 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -251,13 +251,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages @@ -282,12 +282,12 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP quit @@ -313,17 +313,17 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile @@ -331,7 +331,7 @@ patching file Source/make_opts patching file bin/internal/banner.py patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -341,12 +341,12 @@ Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). Hunk #6 succeeded at 434 (offset 38 lines). Hunk #7 succeeded at 588 (offset 118 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README Run "open index.html" to see more information about this process. quit -real 0m2.802s -user 0m2.323s -sys 0m0.356s +real 0m2.824s +user 0m2.491s +sys 0m0.311s diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt index 5ca005676e..cdeedc7863 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc index 4f2ef3d0d8..50c12b0804 100644 --- a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = VVVV4P0_1.o VVVV3P0_1.o VVVV1P0_1.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o FFV1_0.o FFV1P0_3.o +ALOHARoutine = FFV1_1.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model.pkl index dc38da0bfa76ea4206a3c5b2d34b98c606f7d044..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 0dfbe85bbc..69f66c3b7c 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004771232604980469  +DEBUG: model prefixing takes 0.0052814483642578125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.145 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -166,10 +166,10 @@ Load PLUGIN.CUDACPP_OUTPUT INFO: initialize a new directory: CODEGEN_mad_gg_ttgg INFO: remove old information in CODEGEN_mad_gg_ttgg DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -210,22 +210,22 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.385 s -Wrote files for 222 helas calls in 0.655 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.418 s +Wrote files for 222 helas calls in 0.725 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.285 s +ALOHA: aloha creates 5 routines in 0.325 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -233,7 +233,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.268 s +ALOHA: aloha creates 10 routines in 0.309 s VVV1 VVV1 FFV1 @@ -246,8 +246,8 @@ ALOHA: aloha creates 10 routines in 0.268 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -256,13 +256,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages @@ -287,12 +287,12 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP quit @@ -318,17 +318,17 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile @@ -336,7 +336,7 @@ patching file Source/make_opts patching file bin/internal/banner.py patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -346,12 +346,12 @@ Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). Hunk #6 succeeded at 830 (offset 434 lines). Hunk #7 succeeded at 1717 (offset 1247 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README Run "open index.html" to see more information about this process. quit -real 0m3.946s -user 0m3.315s -sys 0m0.356s +real 0m3.919s +user 0m3.598s +sys 0m0.302s diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt index 5ca005676e..cdeedc7863 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc index cf4ec946f8..ec923afd6d 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = VVVV3_0.o VVVV4P0_1.o VVVV3P0_1.o VVVV1P0_1.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o FFV1_0.o FFV1P0_3.o VVVV1_0.o VVVV4_0.o +ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model.pkl index dc38da0bfa76ea4206a3c5b2d34b98c606f7d044..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index b0f5bcbfef..17a191d92c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0044476985931396484  +DEBUG: model prefixing takes 0.005379438400268555  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.726 s +1 processes with 1240 diagrams generated in 1.856 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -166,20 +166,20 @@ Load PLUGIN.CUDACPP_OUTPUT INFO: initialize a new directory: CODEGEN_mad_gg_ttggg INFO: remove old information in CODEGEN_mad_gg_ttggg DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1592 term in 30s. Introduce 2768 contraction +INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -214,22 +214,22 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 5.721 s -Wrote files for 2281 helas calls in 39.057 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.508 s +Wrote files for 2281 helas calls in 46.421 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.276 s +ALOHA: aloha creates 5 routines in 0.312 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -237,7 +237,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.271 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -250,8 +250,8 @@ ALOHA: aloha creates 10 routines in 0.271 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -260,13 +260,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages @@ -291,12 +291,12 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP quit @@ -322,17 +322,17 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile @@ -340,7 +340,7 @@ patching file Source/make_opts patching file bin/internal/banner.py patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -350,12 +350,12 @@ Hunk #4 succeeded at 361 (offset 112 lines). Hunk #5 succeeded at 406 (offset 112 lines). Hunk #6 succeeded at 9862 (offset 9466 lines). Hunk #7 succeeded at 19616 (offset 19146 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README Run "open index.html" to see more information about this process. quit -real 0m49.632s -user 0m48.102s -sys 0m0.988s +real 0m57.640s +user 0m56.636s +sys 0m0.801s diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt index 5ca005676e..cdeedc7863 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc index cf4ec946f8..ec923afd6d 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = VVVV3_0.o VVVV4P0_1.o VVVV3P0_1.o VVVV1P0_1.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o FFV1_0.o FFV1P0_3.o VVVV1_0.o VVVV4_0.o +ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model.pkl index dc38da0bfa76ea4206a3c5b2d34b98c606f7d044..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index c3d29c5b80..98599d4160 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004678249359130859  +DEBUG: model prefixing takes 0.005768775939941406  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.070 s +8 processes with 40 diagrams generated in 0.076 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -180,10 +180,10 @@ Load PLUGIN.CUDACPP_OUTPUT INFO: initialize a new directory: CODEGEN_mad_gq_ttq INFO: remove old information in CODEGEN_mad_gq_ttq DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -199,7 +199,7 @@ INFO: Creating files in directory P1_gu_ttxu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -226,7 +226,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -237,7 +237,7 @@ INFO: Creating files in directory P1_gux_ttxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -264,31 +264,31 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.028 s -Wrote files for 32 helas calls in 0.210 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Wrote files for 32 helas calls in 0.224 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.125 s +ALOHA: aloha creates 2 routines in 0.142 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.113 s +ALOHA: aloha creates 4 routines in 0.129 s FFV1 FFV1 FFV1 FFV1 VVV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -297,13 +297,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages @@ -328,12 +328,12 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP quit @@ -359,17 +359,17 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile @@ -377,7 +377,7 @@ patching file Source/make_opts patching file bin/internal/banner.py patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 513 (offset 44 lines). patching file driver.f @@ -389,7 +389,7 @@ Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). Hunk #6 succeeded at 441 (offset 45 lines). Hunk #7 succeeded at 531 (offset 61 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 517 (offset 48 lines). patching file driver.f @@ -401,12 +401,12 @@ Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). Hunk #6 succeeded at 441 (offset 45 lines). Hunk #7 succeeded at 531 (offset 61 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README Run "open index.html" to see more information about this process. quit -real 0m2.498s -user 0m2.119s -sys 0m0.353s +real 0m2.557s +user 0m2.244s +sys 0m0.297s diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt index 5ca005676e..cdeedc7863 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc index 0c895f2b2c..4457933199 100644 --- a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o FFV1_2.o VVV1_0.o FFV1_0.o FFV1P0_3.o +ALOHARoutine = FFV1_1.o FFV1_0.o VVV1_0.o FFV1_2.o FFV1P0_3.o diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model.pkl index dc38da0bfa76ea4206a3c5b2d34b98c606f7d044..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 1134659ef0..3b95a7edb5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0048046112060546875  +DEBUG: model prefixing takes 0.005426168441772461  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.027 s +5 processes with 7 diagrams generated in 0.028 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.125 s +13 processes with 76 diagrams generated in 0.134 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.848 s +65 processes with 1119 diagrams generated in 1.812 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -389,10 +389,10 @@ Load PLUGIN.CUDACPP_OUTPUT INFO: initialize a new directory: CODEGEN_mad_pp_tt012j INFO: remove old information in CODEGEN_mad_pp_tt012j DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Processing color information for process: g g > t t~ g g @2 @@ -499,7 +499,7 @@ INFO: Creating files in directory P2_gg_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -532,7 +532,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -543,7 +543,7 @@ INFO: Creating files in directory P2_gg_ttxuux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -572,7 +572,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxuux.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxuux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  @@ -583,7 +583,7 @@ INFO: Creating files in directory P2_gu_ttxgu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -612,7 +612,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxgu.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxgu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  2 [export_cpp.py at line 712]  @@ -623,7 +623,7 @@ INFO: Creating files in directory P2_gux_ttxgux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -652,7 +652,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxgux.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxgux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  3 [export_cpp.py at line 712]  @@ -663,7 +663,7 @@ INFO: Creating files in directory P2_uux_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -692,7 +692,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxgg.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxgg.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  4 [export_cpp.py at line 712]  @@ -703,7 +703,7 @@ INFO: Creating files in directory P1_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -734,7 +734,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  5 [export_cpp.py at line 712]  @@ -745,7 +745,7 @@ INFO: Creating files in directory P2_uu_ttxuu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -770,7 +770,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uu_ttxuu.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uu_ttxuu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  6 [export_cpp.py at line 712]  @@ -781,7 +781,7 @@ INFO: Creating files in directory P2_uux_ttxuux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -806,7 +806,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxuux.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxuux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  7 [export_cpp.py at line 712]  @@ -817,7 +817,7 @@ INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -842,7 +842,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxux_ttxuxux.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxux_ttxuxux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  8 [export_cpp.py at line 712]  @@ -853,7 +853,7 @@ INFO: Creating files in directory P2_uc_ttxuc DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -878,7 +878,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uc_ttxuc.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uc_ttxuc.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  9 [export_cpp.py at line 712]  @@ -889,7 +889,7 @@ INFO: Creating files in directory P2_uux_ttxccx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -914,7 +914,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxccx.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxccx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  10 [export_cpp.py at line 712]  @@ -925,7 +925,7 @@ INFO: Creating files in directory P2_ucx_ttxucx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -950,7 +950,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_ucx_ttxucx.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_ucx_ttxucx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  11 [export_cpp.py at line 712]  @@ -961,7 +961,7 @@ INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -986,7 +986,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxcx_ttxuxcx.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxcx_ttxuxcx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  12 [export_cpp.py at line 712]  @@ -997,7 +997,7 @@ INFO: Creating files in directory P1_gu_ttxu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -1024,7 +1024,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  13 [export_cpp.py at line 712]  @@ -1035,7 +1035,7 @@ INFO: Creating files in directory P1_gux_ttxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -1062,7 +1062,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  14 [export_cpp.py at line 712]  @@ -1073,7 +1073,7 @@ INFO: Creating files in directory P1_uux_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -1100,7 +1100,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxg.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxg.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  15 [export_cpp.py at line 712]  @@ -1111,7 +1111,7 @@ INFO: Creating files in directory P0_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -1140,7 +1140,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  16 [export_cpp.py at line 712]  @@ -1151,7 +1151,7 @@ INFO: Creating files in directory P0_uux_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  @@ -1176,22 +1176,22 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttx.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1] [export_cpp.py at line 711]  DEBUG: subproc_number =  17 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.145 s -Wrote files for 810 helas calls in 3.136 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.269 s +Wrote files for 810 helas calls in 3.278 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.287 s +ALOHA: aloha creates 5 routines in 0.330 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -1199,7 +1199,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.671 s +ALOHA: aloha creates 10 routines in 0.312 s VVV1 VVV1 FFV1 @@ -1212,8 +1212,8 @@ ALOHA: aloha creates 10 routines in 0.671 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -1222,13 +1222,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages @@ -1253,12 +1253,12 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP quit @@ -1284,17 +1284,17 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile @@ -1302,11 +1302,11 @@ patching file Source/make_opts patching file bin/internal/banner.py patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f @@ -1318,7 +1318,7 @@ Hunk #4 succeeded at 252 (offset 3 lines). Hunk #5 succeeded at 297 (offset 3 lines). Hunk #6 succeeded at 402 (offset 6 lines). Hunk #7 succeeded at 466 (offset -4 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -1328,7 +1328,7 @@ Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). Hunk #6 succeeded at 434 (offset 38 lines). Hunk #7 succeeded at 588 (offset 118 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 513 (offset 44 lines). patching file driver.f @@ -1340,7 +1340,7 @@ Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). Hunk #6 succeeded at 428 (offset 32 lines). Hunk #7 succeeded at 518 (offset 48 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 517 (offset 48 lines). patching file driver.f @@ -1352,7 +1352,7 @@ Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). Hunk #6 succeeded at 428 (offset 32 lines). Hunk #7 succeeded at 518 (offset 48 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f @@ -1364,7 +1364,7 @@ Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). Hunk #6 succeeded at 428 (offset 32 lines). Hunk #7 succeeded at 518 (offset 48 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -1374,7 +1374,7 @@ Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). Hunk #6 succeeded at 830 (offset 434 lines). Hunk #7 succeeded at 1717 (offset 1247 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 502 (offset 33 lines). patching file driver.f @@ -1386,7 +1386,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). Hunk #6 succeeded at 540 (offset 144 lines). Hunk #7 succeeded at 813 (offset 343 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 513 (offset 44 lines). patching file driver.f @@ -1398,7 +1398,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). Hunk #6 succeeded at 540 (offset 144 lines). Hunk #7 succeeded at 815 (offset 345 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 517 (offset 48 lines). patching file driver.f @@ -1410,7 +1410,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). Hunk #6 succeeded at 538 (offset 142 lines). Hunk #7 succeeded at 812 (offset 342 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 540 (offset 71 lines). patching file driver.f @@ -1422,7 +1422,7 @@ Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). Hunk #6 succeeded at 472 (offset 76 lines). Hunk #7 succeeded at 581 (offset 111 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 616 (offset 147 lines). patching file driver.f @@ -1434,7 +1434,7 @@ Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). Hunk #6 succeeded at 484 (offset 88 lines). Hunk #7 succeeded at 593 (offset 123 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 524 (offset 55 lines). patching file driver.f @@ -1446,7 +1446,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). Hunk #6 succeeded at 468 (offset 72 lines). Hunk #7 succeeded at 620 (offset 150 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 616 (offset 147 lines). patching file driver.f @@ -1458,7 +1458,7 @@ Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). Hunk #6 succeeded at 484 (offset 88 lines). Hunk #7 succeeded at 593 (offset 123 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f @@ -1470,7 +1470,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). Hunk #6 succeeded at 540 (offset 144 lines). Hunk #7 succeeded at 821 (offset 351 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f @@ -1482,7 +1482,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). Hunk #6 succeeded at 468 (offset 72 lines). Hunk #7 succeeded at 620 (offset 150 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 546 (offset 77 lines). patching file driver.f @@ -1494,7 +1494,7 @@ Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). Hunk #6 succeeded at 472 (offset 76 lines). Hunk #7 succeeded at 581 (offset 111 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 532 (offset 63 lines). patching file driver.f @@ -1506,12 +1506,12 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). Hunk #6 succeeded at 468 (offset 72 lines). Hunk #7 succeeded at 620 (offset 150 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README Run "open index.html" to see more information about this process. quit -real 0m9.562s -user 0m8.185s -sys 0m0.659s +real 0m9.439s +user 0m8.865s +sys 0m0.532s diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt index 5ca005676e..cdeedc7863 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc index cf4ec946f8..ec923afd6d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = VVVV3_0.o VVVV4P0_1.o VVVV3P0_1.o VVVV1P0_1.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o FFV1_0.o FFV1P0_3.o VVVV1_0.o VVVV4_0.o +ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model.pkl index dc38da0bfa76ea4206a3c5b2d34b98c606f7d044..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs From ae5498a732005325161a613ef7743660a46881b4 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 12:00:46 +0200 Subject: [PATCH 007/119] [oct23av] regenerate 7 processes sa (move from itscrd80 to itscrd90) - changes only in codegen logs Using codebases as of commit bd255c01fb1cf5377de344c42089765756fd75e1 (Wed Aug 16 15:05:27 2023 +0200) --- .../CODEGEN_cudacpp_ee_mumu_log.txt | 38 ++++----- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 42 +++++----- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 42 +++++----- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 42 +++++----- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 42 +++++----- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 52 ++++++------ .../CODEGEN_cudacpp_heft_gg_h_log.txt | 80 ++++++++++++++----- 7 files changed, 189 insertions(+), 149 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index e8795ee643..b97caf4bef 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004621267318725586  +DEBUG: model prefixing takes 0.005479097366333008  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -162,7 +162,7 @@ Load PLUGIN.CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 @@ -172,12 +172,12 @@ INFO: Processing color information for process: e+ e- > mu+ mu- @1 DEBUG: type(me)= me=0 [output.py at line 190]  DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  @@ -186,7 +186,7 @@ FileWriter for / DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  @@ -194,7 +194,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1336]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes @@ -202,7 +202,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.227 s +ALOHA: aloha creates 4 routines in 0.264 s FFV1 FFV1 FFV2 @@ -211,20 +211,20 @@ ALOHA: aloha creates 4 routines in 0.227 s FFV4 FFV2_4 FFV2_4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.627s -user 0m0.560s -sys 0m0.060s +real 0m0.682s +user 0m0.633s +sys 0m0.043s diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index cb7b25ef28..c2d25ca792 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004621744155883789  +DEBUG: model prefixing takes 0.0054187774658203125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.007 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -163,7 +163,7 @@ Load PLUGIN.CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 @@ -173,12 +173,12 @@ INFO: Processing color information for process: g g > t t~ @1 DEBUG: type(me)= me=0 [output.py at line 190]  DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  @@ -191,7 +191,7 @@ FileWriter for / DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  @@ -199,19 +199,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.126 s +ALOHA: aloha creates 2 routines in 0.140 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -220,13 +220,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.572s -user 0m0.500s -sys 0m0.057s +real 0m0.596s +user 0m0.525s +sys 0m0.050s diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index c23adaa32c..5779ff4ea8 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005018949508666992  +DEBUG: model prefixing takes 0.005312681198120117  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.020 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -163,7 +163,7 @@ Load PLUGIN.CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 @@ -173,12 +173,12 @@ INFO: Processing color information for process: g g > t t~ g @1 DEBUG: type(me)= me=0 [output.py at line 190]  DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  @@ -193,7 +193,7 @@ FileWriter for / DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  @@ -201,8 +201,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.034 s +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -210,7 +210,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.278 s +ALOHA: aloha creates 5 routines in 0.321 s VVV1 VVV1 FFV1 @@ -220,8 +220,8 @@ ALOHA: aloha creates 5 routines in 0.278 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -230,13 +230,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.847s -user 0m0.711s -sys 0m0.052s +real 0m0.841s +user 0m0.778s +sys 0m0.047s diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index fb29a354ab..dd76086af3 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0045855045318603516  +DEBUG: model prefixing takes 0.005469560623168945  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.142 s +1 processes with 123 diagrams generated in 0.157 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -163,7 +163,7 @@ Load PLUGIN.CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 @@ -173,12 +173,12 @@ INFO: Processing color information for process: g g > t t~ g g @1 DEBUG: type(me)= me=0 [output.py at line 190]  DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  @@ -195,7 +195,7 @@ FileWriter for / DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  @@ -203,8 +203,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.382 s +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  +Generated helas calls for 1 subprocesses (123 diagrams) in 0.428 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -212,7 +212,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.601 s +ALOHA: aloha creates 5 routines in 0.313 s VVV1 VVV1 FFV1 @@ -225,8 +225,8 @@ ALOHA: aloha creates 5 routines in 0.601 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -235,13 +235,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m1.922s -user 0m1.324s -sys 0m0.063s +real 0m1.538s +user 0m1.424s +sys 0m0.057s diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 9bee64b205..cbcbb8aed5 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004521608352661133  +DEBUG: model prefixing takes 0.0053789615631103516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.695 s +1 processes with 1240 diagrams generated in 1.855 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -163,7 +163,7 @@ Load PLUGIN.CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 @@ -173,12 +173,12 @@ INFO: Processing color information for process: g g > t t~ g g g @1 DEBUG: type(me)= me=0 [output.py at line 190]  DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  @@ -197,7 +197,7 @@ FileWriter for / DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  @@ -205,8 +205,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 5.784 s +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1336]  +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.486 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -214,7 +214,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.291 s +ALOHA: aloha creates 5 routines in 0.338 s VVV1 VVV1 FFV1 @@ -227,8 +227,8 @@ ALOHA: aloha creates 5 routines in 0.291 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -237,13 +237,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m11.544s -user 0m11.267s -sys 0m0.097s +real 0m12.917s +user 0m12.668s +sys 0m0.098s diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 386e592a4e..834b04055a 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004611015319824219  +DEBUG: model prefixing takes 0.005788087844848633  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.071 s +8 processes with 40 diagrams generated in 0.083 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ Load PLUGIN.CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -195,12 +195,12 @@ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ DEBUG: type(me)= me=0 [output.py at line 190]  DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  @@ -211,7 +211,7 @@ FileWriter for / DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  @@ -219,19 +219,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  DEBUG: type(subproc_group)= [output.py at line 188]  DEBUG: type(fortran_model)= [output.py at line 189]  DEBUG: type(me)= me=1 [output.py at line 190]  DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  @@ -242,7 +242,7 @@ FileWriter for / DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  @@ -250,20 +250,20 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.027 s +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  +Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.123 s +ALOHA: aloha creates 2 routines in 0.141 s FFV1 FFV1 FFV1 FFV1 VVV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -272,13 +272,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.730s -user 0m0.605s -sys 0m0.063s +real 0m0.749s +user 0m0.700s +sys 0m0.042s diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index d13feee76f..93eb281326 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -51,16 +51,53 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T; import model heft; generate g g > h save options auto_convert_model -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: download model from https://madgraph.mi.infn.it/Downloads/models/heft.tgz to the following directory: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/models  +--2023-10-26 12:00:16-- https://madgraph.mi.infn.it/Downloads/models/heft.tgz +Resolving madgraph.mi.infn.it (madgraph.mi.infn.it)... 192.135.21.75 +Connecting to madgraph.mi.infn.it (madgraph.mi.infn.it)|192.135.21.75|:443... connected. +HTTP request sent, awaiting response... 200 OK +Length: 13558 (13K) [application/x-gzip] +Saving to: ‘tmp.tgz’ + + 0K .......... ... 100% 37.3M=0s + +2023-10-26 12:00:17 (37.3 MB/s) - ‘tmp.tgz’ saved [13558/13558] + +heft/ +heft/__init__.py +heft/coupling_orders.py +heft/couplings.py +heft/function_library.py +heft/HEFT_UFO.log +heft/lorentz.py +heft/object_library.py +heft/parameters.py +heft/particles.py +heft/restrict_ckm.dat +heft/restrict_default.dat +heft/restrict_no_b_mass.dat +heft/restrict_no_masses.dat +heft/restrict_no_tau_mass.dat +heft/restrict_zeromass_ckm.dat +heft/vertices.py +heft/write_param_card.py +fail to load model but auto_convert_model is on True. Trying to convert the model +convert model /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/models/heft +retry the load of the model +import model heft +INFO: load particles +INFO: load vertices +DEBUG: model prefixing takes 0.006020307540893555  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -123,11 +160,14 @@ Defined multiparticle l- = e- mu- Defined multiparticle vl = ve vm vt Defined multiparticle vl~ = ve~ vm~ vt~ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ e+ mu+ t b t~ b~ z w+ h h1 w- ta- ta+ +INFO: Change particles name to pass to MG5 convention +Kept definitions of multiparticles p / j / l+ / l- / vl / vl~ unchanged +Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ e+ mu+ t b t~ b~ z w+ h h1 w- ta- ta+ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: g g > h HIG<=1 HIW<=1 WEIGHTED<=2 @1 INFO: Process has 1 diagrams -1 processes with 1 diagrams generated in 0.003 s +1 processes with 1 diagrams generated in 0.004 s Total: 1 processes with 1 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_h Load PLUGIN.CUDACPP_OUTPUT @@ -135,7 +175,7 @@ Load PLUGIN.CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > h HIG<=1 HIW<=1 WEIGHTED<=2 @1 INFO: Processing color information for process: g g > h HIG<=1 HIW<=1 @1 @@ -145,12 +185,12 @@ INFO: Processing color information for process: g g > h HIG<=1 HIW<=1 @1 DEBUG: type(me)= me=0 [output.py at line 190]  DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  @@ -163,7 +203,7 @@ FileWriter for / DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  @@ -171,15 +211,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_heft_gg_h.txt [model_handling.py at line 1336]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_heft_gg_h.txt [model_handling.py at line 1336]  Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.054 s +ALOHA: aloha creates 1 routines in 0.072 s VVS3 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h -INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h +INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 59 , keys size = 59 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) @@ -188,13 +228,13 @@ super_write_set_parameters_onlyfixMajorana (hardcoded=True) DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 5 , keys size = 5 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 1 , keys size = 1 [model_handling.py at line 729]  DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 1 , keys size = 1 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.cc INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.497s -user 0m0.397s -sys 0m0.051s +real 0m0.965s +user 0m0.636s +sys 0m0.071s From 2a67667aaafd63c6727b73f2ee3229c46066c28c Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 12:12:18 +0200 Subject: [PATCH 008/119] [oct23av] regenerate 7 processes mad (all but pp012j) and all 7 sa, only changes are in codegen logs Codebase includes merging commit a6731bd9e (Olivier Wed Aug 23 13:23:12 2023 +0200) This uses Olivier's 'fix_mirror' branch for PR #754 --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 40 ++++----- .../CODEGEN_cudacpp_ee_mumu_log.txt | 34 +++---- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 48 +++++----- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 38 ++++---- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 90 +++++++++---------- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 52 +++++------ .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 48 +++++----- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 58 ++++++------ .../CODEGEN_cudacpp_gg_ttgg_log.txt | 54 +++++------ .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 64 ++++++------- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 58 ++++++------ .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 74 +++++++-------- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 70 +++++++-------- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 80 +++++------------ 14 files changed, 384 insertions(+), 424 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index f44e6a17e5..cb58296f12 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005282878875732422  +DEBUG: model prefixing takes 0.005522251129150391  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -176,32 +176,32 @@ INFO: Creating files in directory P1_epem_mupmum DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2} [model_handling.py at line 1710]  +DEBUG: multi_channel_map =  {1: [0], 2: [1]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2} [model_handling.py at line 1711]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -215,14 +215,14 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.203 s +ALOHA: aloha creates 3 routines in 0.200 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.251 s +ALOHA: aloha creates 7 routines in 0.253 s FFV1 FFV1 FFV2 @@ -334,6 +334,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.536s -user 0m2.251s -sys 0m0.265s +real 0m2.814s +user 0m2.201s +sys 0m0.285s diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index b97caf4bef..cc4cb313f6 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005479097366333008  +DEBUG: model prefixing takes 0.005342245101928711  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -176,25 +176,25 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1337]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes @@ -202,7 +202,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.264 s +ALOHA: aloha creates 4 routines in 0.261 s FFV1 FFV1 FFV2 @@ -225,6 +225,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.682s -user 0m0.633s -sys 0m0.043s +real 0m0.684s +user 0m0.614s +sys 0m0.063s diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 96c70b75b6..7e2b93281d 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005307674407958984  +DEBUG: model prefixing takes 0.0052642822265625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,36 +177,36 @@ INFO: Creating files in directory P1_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -214,7 +214,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.108 s +Wrote files for 10 helas calls in 0.109 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines @@ -223,7 +223,7 @@ ALOHA: aloha creates 2 routines in 0.142 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.129 s +ALOHA: aloha creates 4 routines in 0.130 s VVV1 FFV1 FFV1 @@ -328,6 +328,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.316s -user 0m2.039s -sys 0m0.271s +real 0m2.320s +user 0m2.033s +sys 0m0.279s diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index c2d25ca792..dddb8c87ad 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0054187774658203125  +DEBUG: model prefixing takes 0.005263805389404297  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,29 +177,29 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1337]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes @@ -227,6 +227,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.596s +real 0m0.588s user 0m0.525s -sys 0m0.050s +sys 0m0.054s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index ed715d1fb9..0297f740e9 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005259990692138672  +DEBUG: model prefixing takes 0.005698442459106445  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -162,7 +162,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.020 s +1 processes with 16 diagrams generated in 0.019 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -186,38 +186,38 @@ INFO: Creating files in directory P2_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -228,36 +228,36 @@ INFO: Creating files in directory P1_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  @@ -265,14 +265,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.264 s +Wrote files for 46 helas calls in 0.263 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.319 s +ALOHA: aloha creates 5 routines in 0.320 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -400,6 +400,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.924s -user 0m2.596s -sys 0m0.316s +real 0m2.929s +user 0m2.616s +sys 0m0.306s diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 21203e3b73..c8912079c8 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005400657653808594  +DEBUG: model prefixing takes 0.005263566970825195  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,38 +177,38 @@ INFO: Creating files in directory P1_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -223,7 +223,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.323 s +ALOHA: aloha creates 5 routines in 0.317 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -231,7 +231,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.304 s VVV1 VVV1 FFV1 @@ -347,6 +347,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.824s -user 0m2.491s -sys 0m0.311s +real 0m2.800s +user 0m2.477s +sys 0m0.307s diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 5779ff4ea8..900f2c0ac9 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005312681198120117  +DEBUG: model prefixing takes 0.005434989929199219  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,32 +177,32 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1337]  +Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -210,7 +210,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.321 s +ALOHA: aloha creates 5 routines in 0.319 s VVV1 VVV1 FFV1 @@ -237,6 +237,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.841s -user 0m0.778s -sys 0m0.047s +real 0m0.874s +user 0m0.781s +sys 0m0.045s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 69f66c3b7c..e3548d9fdb 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052814483642578125  +DEBUG: model prefixing takes 0.005757331848144531  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,40 +177,40 @@ INFO: Creating files in directory P1_gg_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -218,14 +218,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg Generated helas calls for 1 subprocesses (123 diagrams) in 0.418 s -Wrote files for 222 helas calls in 0.725 s +Wrote files for 222 helas calls in 0.724 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.325 s +ALOHA: aloha creates 5 routines in 0.324 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -233,7 +233,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.309 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -352,6 +352,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.919s -user 0m3.598s -sys 0m0.302s +real 0m3.928s +user 0m3.594s +sys 0m0.299s diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index dd76086af3..654dd3ea8d 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005469560623168945  +DEBUG: model prefixing takes 0.005308866500854492  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.157 s +1 processes with 123 diagrams generated in 0.164 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -177,34 +177,34 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.428 s +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1337]  +Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -212,7 +212,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.313 s +ALOHA: aloha creates 5 routines in 0.315 s VVV1 VVV1 FFV1 @@ -242,6 +242,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m1.538s -user 0m1.424s -sys 0m0.057s +real 0m1.520s +user 0m1.435s +sys 0m0.054s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 17a191d92c..6dbbb56578 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005379438400268555  +DEBUG: model prefixing takes 0.005812644958496094  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.856 s +1 processes with 1240 diagrams generated in 1.982 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -179,50 +179,50 @@ INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 6: 3, 7: 4, 11: 5, 12: 6, 28: 7, 29: 8, 30: 9, 34: 10, 35: 11, 36: 12, 40: 13, 41: 14, 42: 15, 46: 16, 47: 17, 48: 18, 49: 19, 50: 20, 51: 21, 55: 22, 56: 23, 57: 24, 58: 25, 59: 26, 60: 27, 61: 28, 62: 29, 63: 30, 64: 31, 65: 32, 66: 33, 67: 34, 68: 35, 69: 36, 73: 37, 74: 38, 75: 39, 76: 40, 77: 41, 78: 42, 79: 43, 80: 44, 81: 45, 82: 46, 83: 47, 84: 48, 85: 49, 86: 50, 87: 51, 91: 52, 92: 53, 93: 54, 94: 55, 95: 56, 96: 57, 97: 58, 98: 59, 99: 60, 100: 61, 101: 62, 102: 63, 103: 64, 104: 65, 105: 66, 109: 67, 110: 68, 111: 69, 112: 70, 113: 71, 114: 72, 115: 73, 116: 74, 117: 75, 121: 76, 122: 77, 123: 78, 124: 79, 125: 80, 126: 81, 127: 82, 128: 83, 129: 84, 133: 85, 134: 86, 135: 87, 136: 88, 137: 89, 138: 90, 139: 91, 140: 92, 141: 93, 142: 94, 143: 95, 144: 96, 145: 97, 146: 98, 147: 99, 148: 100, 149: 101, 150: 102, 151: 103, 152: 104, 153: 105, 160: 106, 161: 107, 162: 108, 163: 109, 164: 110, 165: 111, 166: 112, 167: 113, 168: 114, 169: 115, 170: 116, 171: 117, 172: 118, 173: 119, 174: 120, 178: 121, 179: 122, 183: 123, 184: 124, 185: 125, 186: 126, 187: 127, 188: 128, 189: 129, 190: 130, 191: 131, 192: 132, 193: 133, 194: 134, 195: 135, 196: 136, 197: 137, 201: 138, 202: 139, 203: 140, 204: 141, 205: 142, 206: 143, 207: 144, 208: 145, 209: 146, 210: 147, 211: 148, 212: 149, 213: 150, 214: 151, 215: 152, 219: 153, 220: 154, 221: 155, 222: 156, 223: 157, 224: 158, 225: 159, 226: 160, 227: 161, 228: 162, 229: 163, 230: 164, 231: 165, 232: 166, 233: 167, 234: 168, 235: 169, 236: 170, 237: 171, 238: 172, 239: 173, 240: 174, 241: 175, 242: 176, 243: 177, 244: 178, 245: 179, 246: 180, 247: 181, 248: 182, 249: 183, 250: 184, 251: 185, 252: 186, 253: 187, 254: 188, 255: 189, 256: 190, 257: 191, 258: 192, 259: 193, 260: 194, 261: 195, 262: 196, 266: 197, 267: 198, 268: 199, 269: 200, 270: 201, 271: 202, 275: 203, 276: 204, 277: 205, 278: 206, 279: 207, 280: 208, 284: 209, 285: 210, 319: 211, 320: 212, 321: 213, 322: 214, 323: 215, 324: 216, 325: 217, 326: 218, 327: 219, 328: 220, 329: 221, 330: 222, 331: 223, 332: 224, 333: 225, 337: 226, 338: 227, 342: 228, 343: 229, 344: 230, 345: 231, 346: 232, 347: 233, 348: 234, 349: 235, 350: 236, 351: 237, 352: 238, 353: 239, 354: 240, 355: 241, 356: 242, 360: 243, 361: 244, 362: 245, 363: 246, 364: 247, 365: 248, 366: 249, 367: 250, 368: 251, 369: 252, 370: 253, 371: 254, 372: 255, 373: 256, 374: 257, 378: 258, 379: 259, 380: 260, 381: 261, 382: 262, 383: 263, 384: 264, 385: 265, 386: 266, 387: 267, 388: 268, 389: 269, 390: 270, 391: 271, 392: 272, 393: 273, 394: 274, 395: 275, 396: 276, 397: 277, 398: 278, 399: 279, 400: 280, 401: 281, 402: 282, 403: 283, 404: 284, 405: 285, 406: 286, 407: 287, 408: 288, 409: 289, 410: 290, 411: 291, 412: 292, 413: 293, 414: 294, 415: 295, 416: 296, 417: 297, 418: 298, 419: 299, 420: 300, 421: 301, 425: 302, 426: 303, 427: 304, 428: 305, 429: 306, 430: 307, 434: 308, 435: 309, 436: 310, 437: 311, 438: 312, 439: 313, 443: 314, 444: 315, 478: 316, 479: 317, 480: 318, 481: 319, 482: 320, 483: 321, 487: 322, 488: 323, 489: 324, 490: 325, 491: 326, 492: 327, 493: 328, 494: 329, 495: 330, 496: 331, 497: 332, 498: 333, 499: 334, 500: 335, 501: 336, 505: 337, 506: 338, 507: 339, 508: 340, 509: 341, 510: 342, 511: 343, 512: 344, 513: 345, 514: 346, 515: 347, 516: 348, 517: 349, 518: 350, 519: 351, 523: 352, 524: 353, 525: 354, 526: 355, 527: 356, 528: 357, 529: 358, 530: 359, 531: 360, 532: 361, 533: 362, 534: 363, 535: 364, 536: 365, 537: 366, 541: 367, 542: 368, 543: 369, 544: 370, 545: 371, 546: 372, 547: 373, 548: 374, 549: 375, 550: 376, 551: 377, 555: 378, 556: 379, 560: 380, 561: 381, 577: 382, 578: 383, 579: 384, 580: 385, 581: 386, 582: 387, 583: 388, 584: 389, 585: 390, 589: 391, 590: 392, 591: 393, 592: 394, 593: 395, 594: 396, 595: 397, 596: 398, 597: 399, 601: 400, 602: 401, 603: 402, 604: 403, 605: 404, 606: 405, 607: 406, 608: 407, 609: 408, 613: 409, 614: 410, 615: 411, 616: 412, 617: 413, 618: 414, 622: 415, 623: 416, 624: 417, 625: 418, 626: 419, 627: 420, 637: 421, 638: 422, 639: 423, 640: 424, 641: 425, 642: 426, 646: 427, 647: 428, 648: 429, 649: 430, 650: 431, 651: 432, 652: 433, 653: 434, 654: 435, 655: 436, 656: 437, 657: 438, 658: 439, 659: 440, 660: 441, 664: 442, 665: 443, 666: 444, 667: 445, 668: 446, 669: 447, 670: 448, 671: 449, 672: 450, 673: 451, 674: 452, 675: 453, 676: 454, 677: 455, 678: 456, 682: 457, 683: 458, 684: 459, 685: 460, 686: 461, 687: 462, 688: 463, 689: 464, 690: 465, 691: 466, 692: 467, 693: 468, 694: 469, 695: 470, 696: 471, 700: 472, 701: 473, 702: 474, 703: 475, 704: 476, 705: 477, 706: 478, 707: 479, 708: 480, 709: 481, 710: 482, 714: 483, 715: 484, 719: 485, 720: 486, 736: 487, 737: 488, 738: 489, 739: 490, 740: 491, 741: 492, 742: 493, 743: 494, 744: 495, 748: 496, 749: 497, 750: 498, 751: 499, 752: 500, 753: 501, 754: 502, 755: 503, 756: 504, 760: 505, 761: 506, 762: 507, 763: 508, 764: 509, 765: 510, 766: 511, 767: 512, 768: 513, 772: 514, 773: 515, 774: 516, 775: 517, 776: 518, 777: 519, 781: 520, 782: 521, 783: 522, 784: 523, 785: 524, 786: 525, 796: 526, 797: 527, 798: 528, 799: 529, 800: 530, 801: 531, 805: 532, 806: 533, 807: 534, 808: 535, 809: 536, 810: 537, 811: 538, 812: 539, 813: 540, 814: 541, 815: 542, 816: 543, 817: 544, 818: 545, 819: 546, 823: 547, 824: 548, 825: 549, 826: 550, 827: 551, 828: 552, 829: 553, 830: 554, 831: 555, 832: 556, 833: 557, 834: 558, 835: 559, 836: 560, 837: 561, 841: 562, 842: 563, 843: 564, 844: 565, 845: 566, 846: 567, 847: 568, 848: 569, 849: 570, 850: 571, 851: 572, 852: 573, 853: 574, 854: 575, 855: 576, 859: 577, 860: 578, 861: 579, 862: 580, 863: 581, 864: 582, 865: 583, 866: 584, 867: 585, 868: 586, 869: 587, 873: 588, 874: 589, 878: 590, 879: 591, 895: 592, 896: 593, 897: 594, 898: 595, 899: 596, 900: 597, 901: 598, 902: 599, 903: 600, 907: 601, 908: 602, 909: 603, 910: 604, 911: 605, 912: 606, 913: 607, 914: 608, 915: 609, 919: 610, 920: 611, 921: 612, 922: 613, 923: 614, 924: 615, 925: 616, 926: 617, 927: 618, 931: 619, 932: 620, 933: 621, 934: 622, 935: 623, 936: 624, 940: 625, 941: 626, 942: 627, 943: 628, 944: 629, 945: 630, 955: 631, 956: 632, 957: 633, 958: 634, 959: 635, 960: 636, 961: 637, 962: 638, 963: 639, 964: 640, 965: 641, 966: 642, 967: 643, 968: 644, 969: 645, 970: 646, 971: 647, 972: 648, 973: 649, 974: 650, 975: 651, 976: 652, 977: 653, 978: 654, 979: 655, 980: 656, 981: 657, 982: 658, 983: 659, 984: 660, 985: 661, 986: 662, 987: 663, 991: 664, 992: 665, 993: 666, 994: 667, 995: 668, 996: 669, 1000: 670, 1001: 671, 1002: 672, 1003: 673, 1004: 674, 1005: 675, 1015: 676, 1016: 677, 1017: 678, 1018: 679, 1019: 680, 1020: 681, 1021: 682, 1022: 683, 1023: 684, 1024: 685, 1025: 686, 1026: 687, 1027: 688, 1028: 689, 1029: 690, 1030: 691, 1031: 692, 1032: 693, 1033: 694, 1034: 695, 1035: 696, 1036: 697, 1037: 698, 1038: 699, 1039: 700, 1040: 701, 1041: 702, 1042: 703, 1043: 704, 1044: 705, 1045: 706, 1046: 707, 1047: 708, 1051: 709, 1052: 710, 1053: 711, 1054: 712, 1055: 713, 1056: 714, 1060: 715, 1061: 716, 1062: 717, 1063: 718, 1064: 719, 1065: 720, 1075: 721, 1076: 722, 1080: 723, 1081: 724, 1085: 725, 1086: 726, 1102: 727, 1103: 728, 1104: 729, 1105: 730, 1106: 731, 1107: 732, 1108: 733, 1109: 734, 1110: 735, 1114: 736, 1115: 737, 1116: 738, 1117: 739, 1118: 740, 1119: 741, 1120: 742, 1121: 743, 1122: 744, 1126: 745, 1127: 746, 1128: 747, 1129: 748, 1130: 749, 1131: 750, 1132: 751, 1133: 752, 1134: 753, 1138: 754, 1139: 755, 1140: 756, 1141: 757, 1142: 758, 1143: 759, 1147: 760, 1148: 761, 1149: 762, 1150: 763, 1151: 764, 1152: 765, 1153: 766, 1154: 767, 1158: 768, 1159: 769, 1163: 770, 1164: 771, 1180: 772, 1181: 773, 1182: 774, 1183: 775, 1184: 776, 1185: 777, 1186: 778, 1187: 779, 1188: 780, 1192: 781, 1193: 782, 1194: 783, 1195: 784, 1196: 785, 1197: 786, 1198: 787, 1199: 788, 1200: 789, 1204: 790, 1205: 791, 1206: 792, 1207: 793, 1208: 794, 1209: 795, 1210: 796, 1211: 797, 1212: 798, 1216: 799, 1217: 800, 1218: 801, 1219: 802, 1220: 803, 1221: 804, 1225: 805, 1226: 806, 1227: 807, 1228: 808, 1229: 809, 1230: 810, 1231: 811, 1232: 812, 1236: 813, 1237: 814, 1241: 815, 1242: 816, 1258: 817, 1259: 818, 1260: 819, 1261: 820, 1262: 821, 1263: 822, 1264: 823, 1265: 824, 1266: 825, 1270: 826, 1271: 827, 1272: 828, 1273: 829, 1274: 830, 1275: 831, 1276: 832, 1277: 833, 1278: 834, 1282: 835, 1283: 836, 1284: 837, 1285: 838, 1286: 839, 1287: 840, 1288: 841, 1289: 842, 1290: 843, 1294: 844, 1295: 845, 1296: 846, 1297: 847, 1298: 848, 1299: 849, 1303: 850, 1304: 851, 1305: 852, 1306: 853, 1307: 854, 1308: 855, 1309: 856, 1310: 857, 1314: 858, 1315: 859, 1319: 860, 1320: 861, 1333: 862, 1334: 863, 1338: 864, 1339: 865, 1343: 866, 1344: 867, 1357: 868, 1358: 869, 1362: 870, 1363: 871, 1367: 872, 1368: 873, 1396: 874, 1397: 875, 1398: 876, 1399: 877, 1400: 878, 1401: 879, 1402: 880, 1403: 881, 1404: 882, 1405: 883, 1406: 884, 1407: 885, 1408: 886, 1409: 887, 1410: 888, 1411: 889, 1412: 890, 1413: 891, 1417: 892, 1418: 893, 1419: 894, 1420: 895, 1421: 896, 1422: 897, 1423: 898, 1424: 899, 1425: 900, 1426: 901, 1427: 902, 1428: 903, 1429: 904, 1430: 905, 1431: 906, 1432: 907, 1433: 908, 1434: 909, 1438: 910, 1439: 911, 1440: 912, 1441: 913, 1442: 914, 1443: 915, 1444: 916, 1445: 917, 1446: 918, 1447: 919, 1448: 920, 1449: 921, 1450: 922, 1451: 923, 1452: 924, 1453: 925, 1454: 926, 1455: 927, 1459: 928, 1460: 929, 1461: 930, 1462: 931, 1463: 932, 1464: 933, 1468: 934, 1469: 935, 1470: 936, 1471: 937, 1472: 938, 1473: 939, 1477: 940, 1478: 941, 1479: 942, 1480: 943, 1481: 944, 1482: 945} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 6: 3, 7: 4, 11: 5, 12: 6, 28: 7, 29: 8, 30: 9, 34: 10, 35: 11, 36: 12, 40: 13, 41: 14, 42: 15, 46: 16, 47: 17, 48: 18, 49: 19, 50: 20, 51: 21, 55: 22, 56: 23, 57: 24, 58: 25, 59: 26, 60: 27, 61: 28, 62: 29, 63: 30, 64: 31, 65: 32, 66: 33, 67: 34, 68: 35, 69: 36, 73: 37, 74: 38, 75: 39, 76: 40, 77: 41, 78: 42, 79: 43, 80: 44, 81: 45, 82: 46, 83: 47, 84: 48, 85: 49, 86: 50, 87: 51, 91: 52, 92: 53, 93: 54, 94: 55, 95: 56, 96: 57, 97: 58, 98: 59, 99: 60, 100: 61, 101: 62, 102: 63, 103: 64, 104: 65, 105: 66, 109: 67, 110: 68, 111: 69, 112: 70, 113: 71, 114: 72, 115: 73, 116: 74, 117: 75, 121: 76, 122: 77, 123: 78, 124: 79, 125: 80, 126: 81, 127: 82, 128: 83, 129: 84, 133: 85, 134: 86, 135: 87, 136: 88, 137: 89, 138: 90, 139: 91, 140: 92, 141: 93, 142: 94, 143: 95, 144: 96, 145: 97, 146: 98, 147: 99, 148: 100, 149: 101, 150: 102, 151: 103, 152: 104, 153: 105, 160: 106, 161: 107, 162: 108, 163: 109, 164: 110, 165: 111, 166: 112, 167: 113, 168: 114, 169: 115, 170: 116, 171: 117, 172: 118, 173: 119, 174: 120, 178: 121, 179: 122, 183: 123, 184: 124, 185: 125, 186: 126, 187: 127, 188: 128, 189: 129, 190: 130, 191: 131, 192: 132, 193: 133, 194: 134, 195: 135, 196: 136, 197: 137, 201: 138, 202: 139, 203: 140, 204: 141, 205: 142, 206: 143, 207: 144, 208: 145, 209: 146, 210: 147, 211: 148, 212: 149, 213: 150, 214: 151, 215: 152, 219: 153, 220: 154, 221: 155, 222: 156, 223: 157, 224: 158, 225: 159, 226: 160, 227: 161, 228: 162, 229: 163, 230: 164, 231: 165, 232: 166, 233: 167, 234: 168, 235: 169, 236: 170, 237: 171, 238: 172, 239: 173, 240: 174, 241: 175, 242: 176, 243: 177, 244: 178, 245: 179, 246: 180, 247: 181, 248: 182, 249: 183, 250: 184, 251: 185, 252: 186, 253: 187, 254: 188, 255: 189, 256: 190, 257: 191, 258: 192, 259: 193, 260: 194, 261: 195, 262: 196, 266: 197, 267: 198, 268: 199, 269: 200, 270: 201, 271: 202, 275: 203, 276: 204, 277: 205, 278: 206, 279: 207, 280: 208, 284: 209, 285: 210, 319: 211, 320: 212, 321: 213, 322: 214, 323: 215, 324: 216, 325: 217, 326: 218, 327: 219, 328: 220, 329: 221, 330: 222, 331: 223, 332: 224, 333: 225, 337: 226, 338: 227, 342: 228, 343: 229, 344: 230, 345: 231, 346: 232, 347: 233, 348: 234, 349: 235, 350: 236, 351: 237, 352: 238, 353: 239, 354: 240, 355: 241, 356: 242, 360: 243, 361: 244, 362: 245, 363: 246, 364: 247, 365: 248, 366: 249, 367: 250, 368: 251, 369: 252, 370: 253, 371: 254, 372: 255, 373: 256, 374: 257, 378: 258, 379: 259, 380: 260, 381: 261, 382: 262, 383: 263, 384: 264, 385: 265, 386: 266, 387: 267, 388: 268, 389: 269, 390: 270, 391: 271, 392: 272, 393: 273, 394: 274, 395: 275, 396: 276, 397: 277, 398: 278, 399: 279, 400: 280, 401: 281, 402: 282, 403: 283, 404: 284, 405: 285, 406: 286, 407: 287, 408: 288, 409: 289, 410: 290, 411: 291, 412: 292, 413: 293, 414: 294, 415: 295, 416: 296, 417: 297, 418: 298, 419: 299, 420: 300, 421: 301, 425: 302, 426: 303, 427: 304, 428: 305, 429: 306, 430: 307, 434: 308, 435: 309, 436: 310, 437: 311, 438: 312, 439: 313, 443: 314, 444: 315, 478: 316, 479: 317, 480: 318, 481: 319, 482: 320, 483: 321, 487: 322, 488: 323, 489: 324, 490: 325, 491: 326, 492: 327, 493: 328, 494: 329, 495: 330, 496: 331, 497: 332, 498: 333, 499: 334, 500: 335, 501: 336, 505: 337, 506: 338, 507: 339, 508: 340, 509: 341, 510: 342, 511: 343, 512: 344, 513: 345, 514: 346, 515: 347, 516: 348, 517: 349, 518: 350, 519: 351, 523: 352, 524: 353, 525: 354, 526: 355, 527: 356, 528: 357, 529: 358, 530: 359, 531: 360, 532: 361, 533: 362, 534: 363, 535: 364, 536: 365, 537: 366, 541: 367, 542: 368, 543: 369, 544: 370, 545: 371, 546: 372, 547: 373, 548: 374, 549: 375, 550: 376, 551: 377, 555: 378, 556: 379, 560: 380, 561: 381, 577: 382, 578: 383, 579: 384, 580: 385, 581: 386, 582: 387, 583: 388, 584: 389, 585: 390, 589: 391, 590: 392, 591: 393, 592: 394, 593: 395, 594: 396, 595: 397, 596: 398, 597: 399, 601: 400, 602: 401, 603: 402, 604: 403, 605: 404, 606: 405, 607: 406, 608: 407, 609: 408, 613: 409, 614: 410, 615: 411, 616: 412, 617: 413, 618: 414, 622: 415, 623: 416, 624: 417, 625: 418, 626: 419, 627: 420, 637: 421, 638: 422, 639: 423, 640: 424, 641: 425, 642: 426, 646: 427, 647: 428, 648: 429, 649: 430, 650: 431, 651: 432, 652: 433, 653: 434, 654: 435, 655: 436, 656: 437, 657: 438, 658: 439, 659: 440, 660: 441, 664: 442, 665: 443, 666: 444, 667: 445, 668: 446, 669: 447, 670: 448, 671: 449, 672: 450, 673: 451, 674: 452, 675: 453, 676: 454, 677: 455, 678: 456, 682: 457, 683: 458, 684: 459, 685: 460, 686: 461, 687: 462, 688: 463, 689: 464, 690: 465, 691: 466, 692: 467, 693: 468, 694: 469, 695: 470, 696: 471, 700: 472, 701: 473, 702: 474, 703: 475, 704: 476, 705: 477, 706: 478, 707: 479, 708: 480, 709: 481, 710: 482, 714: 483, 715: 484, 719: 485, 720: 486, 736: 487, 737: 488, 738: 489, 739: 490, 740: 491, 741: 492, 742: 493, 743: 494, 744: 495, 748: 496, 749: 497, 750: 498, 751: 499, 752: 500, 753: 501, 754: 502, 755: 503, 756: 504, 760: 505, 761: 506, 762: 507, 763: 508, 764: 509, 765: 510, 766: 511, 767: 512, 768: 513, 772: 514, 773: 515, 774: 516, 775: 517, 776: 518, 777: 519, 781: 520, 782: 521, 783: 522, 784: 523, 785: 524, 786: 525, 796: 526, 797: 527, 798: 528, 799: 529, 800: 530, 801: 531, 805: 532, 806: 533, 807: 534, 808: 535, 809: 536, 810: 537, 811: 538, 812: 539, 813: 540, 814: 541, 815: 542, 816: 543, 817: 544, 818: 545, 819: 546, 823: 547, 824: 548, 825: 549, 826: 550, 827: 551, 828: 552, 829: 553, 830: 554, 831: 555, 832: 556, 833: 557, 834: 558, 835: 559, 836: 560, 837: 561, 841: 562, 842: 563, 843: 564, 844: 565, 845: 566, 846: 567, 847: 568, 848: 569, 849: 570, 850: 571, 851: 572, 852: 573, 853: 574, 854: 575, 855: 576, 859: 577, 860: 578, 861: 579, 862: 580, 863: 581, 864: 582, 865: 583, 866: 584, 867: 585, 868: 586, 869: 587, 873: 588, 874: 589, 878: 590, 879: 591, 895: 592, 896: 593, 897: 594, 898: 595, 899: 596, 900: 597, 901: 598, 902: 599, 903: 600, 907: 601, 908: 602, 909: 603, 910: 604, 911: 605, 912: 606, 913: 607, 914: 608, 915: 609, 919: 610, 920: 611, 921: 612, 922: 613, 923: 614, 924: 615, 925: 616, 926: 617, 927: 618, 931: 619, 932: 620, 933: 621, 934: 622, 935: 623, 936: 624, 940: 625, 941: 626, 942: 627, 943: 628, 944: 629, 945: 630, 955: 631, 956: 632, 957: 633, 958: 634, 959: 635, 960: 636, 961: 637, 962: 638, 963: 639, 964: 640, 965: 641, 966: 642, 967: 643, 968: 644, 969: 645, 970: 646, 971: 647, 972: 648, 973: 649, 974: 650, 975: 651, 976: 652, 977: 653, 978: 654, 979: 655, 980: 656, 981: 657, 982: 658, 983: 659, 984: 660, 985: 661, 986: 662, 987: 663, 991: 664, 992: 665, 993: 666, 994: 667, 995: 668, 996: 669, 1000: 670, 1001: 671, 1002: 672, 1003: 673, 1004: 674, 1005: 675, 1015: 676, 1016: 677, 1017: 678, 1018: 679, 1019: 680, 1020: 681, 1021: 682, 1022: 683, 1023: 684, 1024: 685, 1025: 686, 1026: 687, 1027: 688, 1028: 689, 1029: 690, 1030: 691, 1031: 692, 1032: 693, 1033: 694, 1034: 695, 1035: 696, 1036: 697, 1037: 698, 1038: 699, 1039: 700, 1040: 701, 1041: 702, 1042: 703, 1043: 704, 1044: 705, 1045: 706, 1046: 707, 1047: 708, 1051: 709, 1052: 710, 1053: 711, 1054: 712, 1055: 713, 1056: 714, 1060: 715, 1061: 716, 1062: 717, 1063: 718, 1064: 719, 1065: 720, 1075: 721, 1076: 722, 1080: 723, 1081: 724, 1085: 725, 1086: 726, 1102: 727, 1103: 728, 1104: 729, 1105: 730, 1106: 731, 1107: 732, 1108: 733, 1109: 734, 1110: 735, 1114: 736, 1115: 737, 1116: 738, 1117: 739, 1118: 740, 1119: 741, 1120: 742, 1121: 743, 1122: 744, 1126: 745, 1127: 746, 1128: 747, 1129: 748, 1130: 749, 1131: 750, 1132: 751, 1133: 752, 1134: 753, 1138: 754, 1139: 755, 1140: 756, 1141: 757, 1142: 758, 1143: 759, 1147: 760, 1148: 761, 1149: 762, 1150: 763, 1151: 764, 1152: 765, 1153: 766, 1154: 767, 1158: 768, 1159: 769, 1163: 770, 1164: 771, 1180: 772, 1181: 773, 1182: 774, 1183: 775, 1184: 776, 1185: 777, 1186: 778, 1187: 779, 1188: 780, 1192: 781, 1193: 782, 1194: 783, 1195: 784, 1196: 785, 1197: 786, 1198: 787, 1199: 788, 1200: 789, 1204: 790, 1205: 791, 1206: 792, 1207: 793, 1208: 794, 1209: 795, 1210: 796, 1211: 797, 1212: 798, 1216: 799, 1217: 800, 1218: 801, 1219: 802, 1220: 803, 1221: 804, 1225: 805, 1226: 806, 1227: 807, 1228: 808, 1229: 809, 1230: 810, 1231: 811, 1232: 812, 1236: 813, 1237: 814, 1241: 815, 1242: 816, 1258: 817, 1259: 818, 1260: 819, 1261: 820, 1262: 821, 1263: 822, 1264: 823, 1265: 824, 1266: 825, 1270: 826, 1271: 827, 1272: 828, 1273: 829, 1274: 830, 1275: 831, 1276: 832, 1277: 833, 1278: 834, 1282: 835, 1283: 836, 1284: 837, 1285: 838, 1286: 839, 1287: 840, 1288: 841, 1289: 842, 1290: 843, 1294: 844, 1295: 845, 1296: 846, 1297: 847, 1298: 848, 1299: 849, 1303: 850, 1304: 851, 1305: 852, 1306: 853, 1307: 854, 1308: 855, 1309: 856, 1310: 857, 1314: 858, 1315: 859, 1319: 860, 1320: 861, 1333: 862, 1334: 863, 1338: 864, 1339: 865, 1343: 866, 1344: 867, 1357: 868, 1358: 869, 1362: 870, 1363: 871, 1367: 872, 1368: 873, 1396: 874, 1397: 875, 1398: 876, 1399: 877, 1400: 878, 1401: 879, 1402: 880, 1403: 881, 1404: 882, 1405: 883, 1406: 884, 1407: 885, 1408: 886, 1409: 887, 1410: 888, 1411: 889, 1412: 890, 1413: 891, 1417: 892, 1418: 893, 1419: 894, 1420: 895, 1421: 896, 1422: 897, 1423: 898, 1424: 899, 1425: 900, 1426: 901, 1427: 902, 1428: 903, 1429: 904, 1430: 905, 1431: 906, 1432: 907, 1433: 908, 1434: 909, 1438: 910, 1439: 911, 1440: 912, 1441: 913, 1442: 914, 1443: 915, 1444: 916, 1445: 917, 1446: 918, 1447: 919, 1448: 920, 1449: 921, 1450: 922, 1451: 923, 1452: 924, 1453: 925, 1454: 926, 1455: 927, 1459: 928, 1460: 929, 1461: 930, 1462: 931, 1463: 932, 1464: 933, 1468: 934, 1469: 935, 1470: 936, 1471: 937, 1472: 938, 1473: 939, 1477: 940, 1478: 941, 1479: 942, 1480: 943, 1481: 944, 1482: 945} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.508 s -Wrote files for 2281 helas calls in 46.421 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.762 s +Wrote files for 2281 helas calls in 46.233 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -237,7 +237,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.311 s VVV1 VVV1 FFV1 @@ -356,6 +356,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m57.640s -user 0m56.636s -sys 0m0.801s +real 0m57.853s +user 0m56.814s +sys 0m0.842s diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index cbcbb8aed5..75995cb13b 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053789615631103516  +DEBUG: model prefixing takes 0.005396604537963867  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.855 s +1 processes with 1240 diagrams generated in 1.840 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -177,36 +177,36 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.486 s +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1337]  +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.488 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -214,7 +214,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.338 s +ALOHA: aloha creates 5 routines in 0.342 s VVV1 VVV1 FFV1 @@ -244,6 +244,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m12.917s -user 0m12.668s -sys 0m0.098s +real 0m12.947s +user 0m12.782s +sys 0m0.106s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 98599d4160..da82c33819 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005768775939941406  +DEBUG: model prefixing takes 0.0053882598876953125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.076 s +8 processes with 40 diagrams generated in 0.078 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -199,34 +199,34 @@ INFO: Creating files in directory P1_gu_ttxu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -237,34 +237,34 @@ INFO: Creating files in directory P1_gux_ttxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  @@ -407,6 +407,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.557s -user 0m2.244s -sys 0m0.297s +real 0m2.559s +user 0m2.238s +sys 0m0.312s diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 834b04055a..865d85fca2 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005788087844848633  +DEBUG: model prefixing takes 0.005266427993774414  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.083 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -199,27 +199,27 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1340]  DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  DEBUG: type(subproc_group)= [output.py at line 188]  DEBUG: type(fortran_model)= [output.py at line 189]  @@ -230,33 +230,33 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1337]  +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.142 s FFV1 FFV1 FFV1 @@ -279,6 +279,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.749s -user 0m0.700s -sys 0m0.042s +real 0m0.737s +user 0m0.663s +sys 0m0.063s diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 93eb281326..04b5080170 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -61,43 +61,6 @@ set zerowidth_tchannel F set auto_convert_model T; import model heft; generate g g > h save options auto_convert_model save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: download model from https://madgraph.mi.infn.it/Downloads/models/heft.tgz to the following directory: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/models  ---2023-10-26 12:00:16-- https://madgraph.mi.infn.it/Downloads/models/heft.tgz -Resolving madgraph.mi.infn.it (madgraph.mi.infn.it)... 192.135.21.75 -Connecting to madgraph.mi.infn.it (madgraph.mi.infn.it)|192.135.21.75|:443... connected. -HTTP request sent, awaiting response... 200 OK -Length: 13558 (13K) [application/x-gzip] -Saving to: ‘tmp.tgz’ - - 0K .......... ... 100% 37.3M=0s - -2023-10-26 12:00:17 (37.3 MB/s) - ‘tmp.tgz’ saved [13558/13558] - -heft/ -heft/__init__.py -heft/coupling_orders.py -heft/couplings.py -heft/function_library.py -heft/HEFT_UFO.log -heft/lorentz.py -heft/object_library.py -heft/parameters.py -heft/particles.py -heft/restrict_ckm.dat -heft/restrict_default.dat -heft/restrict_no_b_mass.dat -heft/restrict_no_masses.dat -heft/restrict_no_tau_mass.dat -heft/restrict_zeromass_ckm.dat -heft/vertices.py -heft/write_param_card.py -fail to load model but auto_convert_model is on True. Trying to convert the model -convert model /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/models/heft -retry the load of the model -import model heft -INFO: load particles -INFO: load vertices -DEBUG: model prefixing takes 0.006020307540893555  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -160,9 +123,6 @@ Defined multiparticle l- = e- mu- Defined multiparticle vl = ve vm vt Defined multiparticle vl~ = ve~ vm~ vt~ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ e+ mu+ t b t~ b~ z w+ h h1 w- ta- ta+ -INFO: Change particles name to pass to MG5 convention -Kept definitions of multiparticles p / j / l+ / l- / vl / vl~ unchanged -Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ e+ mu+ t b t~ b~ z w+ h h1 w- ta- ta+ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: g g > h HIG<=1 HIW<=1 WEIGHTED<=2 @1 @@ -189,34 +149,34 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_heft_gg_h.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_heft_gg_h.txt [model_handling.py at line 1337]  Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.072 s +ALOHA: aloha creates 1 routines in 0.060 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -235,6 +195,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.965s -user 0m0.636s -sys 0m0.071s +real 0m0.489s +user 0m0.426s +sys 0m0.045s From 1903d30c6196bc9dfa80c0d174136e1642b6c959 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 12:28:46 +0200 Subject: [PATCH 009/119] [oct23av] regenerate 7 pp_tt012j.mad, there are two issues: clang formatting, and especially the build will fail. Codebase includes merging commit a6731bd9e (Olivier Wed Aug 23 13:23:12 2023 +0200) This uses Olivier's 'fix_mirror' branch for PR #754 In particular a6731bd9e Olivier Mattelaer Wed Aug 23 13:23:12 2023 +0200 Merge branch 'fix_mirror' 2556cddc8 Olivier Mattelaer Wed Aug 23 09:27:38 2023 +0200 avoid that mirroring is reset by the plugin These lines fail the build (as well as clang formatting) [NOT OK] Check formatting in: pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc 786c786 < constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) --- > constexpr int helcolDenominators[1] = { 36, 36 }; // assume nprocesses == 1 (#272 and #343) The same happens in each P subdirectory. Build errors: ccache /usr/local/cuda-12.0/bin/nvcc -Xcompiler -O3 -lineinfo -I. -I../../src -I/usr/local/cuda-12.0/include/ -DUSE_NVTX -gencode arch=compute_70,code=compute_70 -gencode arch=compute_70,code=sm_70 -use_fast_math -std=c++17 -ccbin /usr/lib64/ccache/g++ -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE -Xcompiler -fPIC -c gCPPProcess.cu -o gCPPProcess.o gCPPProcess.cu(779): error: static assertion failed with "Assume nprocesses == 1" gCPPProcess.cu(786): error: too many initializer values 2 errors detected in the compilation of "gCPPProcess.cu". --- .../CODEGEN_mad_pp_tt012j_log.txt | 602 +++++++++--------- .../SubProcesses/P0_uux_ttx/CPPProcess.cc | 4 +- .../SubProcesses/P0_uux_ttx/mirrorprocs.inc | 2 +- .../SubProcesses/P0_uux_ttx/processes.dat | 2 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 4 +- .../SubProcesses/P1_gu_ttxu/mirrorprocs.inc | 2 +- .../SubProcesses/P1_gu_ttxu/processes.dat | 2 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 4 +- .../SubProcesses/P1_gux_ttxux/mirrorprocs.inc | 2 +- .../SubProcesses/P1_gux_ttxux/processes.dat | 2 +- .../SubProcesses/P1_uux_ttxg/CPPProcess.cc | 4 +- .../SubProcesses/P1_uux_ttxg/mirrorprocs.inc | 2 +- .../SubProcesses/P1_uux_ttxg/processes.dat | 2 +- .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc | 4 +- .../SubProcesses/P2_gu_ttxgu/mirrorprocs.inc | 2 +- .../SubProcesses/P2_gu_ttxgu/processes.dat | 2 +- .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc | 4 +- .../P2_gux_ttxgux/mirrorprocs.inc | 2 +- .../SubProcesses/P2_gux_ttxgux/processes.dat | 2 +- .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc | 4 +- .../SubProcesses/P2_uc_ttxuc/mirrorprocs.inc | 2 +- .../SubProcesses/P2_uc_ttxuc/processes.dat | 2 +- .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc | 4 +- .../P2_ucx_ttxucx/mirrorprocs.inc | 2 +- .../SubProcesses/P2_ucx_ttxucx/processes.dat | 2 +- .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc | 4 +- .../P2_uux_ttxccx/mirrorprocs.inc | 2 +- .../SubProcesses/P2_uux_ttxccx/processes.dat | 2 +- .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc | 4 +- .../SubProcesses/P2_uux_ttxgg/mirrorprocs.inc | 2 +- .../SubProcesses/P2_uux_ttxgg/processes.dat | 2 +- .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc | 4 +- .../P2_uux_ttxuux/mirrorprocs.inc | 2 +- .../SubProcesses/P2_uux_ttxuux/processes.dat | 2 +- .../P2_uxcx_ttxuxcx/CPPProcess.cc | 4 +- .../P2_uxcx_ttxuxcx/mirrorprocs.inc | 2 +- .../P2_uxcx_ttxuxcx/processes.dat | 2 +- 37 files changed, 349 insertions(+), 349 deletions(-) diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 3b95a7edb5..b08e06fa79 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005426168441772461  +DEBUG: model prefixing takes 0.005528926849365234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.028 s +5 processes with 7 diagrams generated in 0.029 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.812 s +65 processes with 1119 diagrams generated in 1.802 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -499,40 +499,40 @@ INFO: Creating files in directory P2_gg_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -543,36 +543,36 @@ INFO: Creating files in directory P2_gg_ttxuux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxuux.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxuux.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  @@ -583,36 +583,36 @@ INFO: Creating files in directory P2_gu_ttxgu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxgu.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxgu.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  2 [export_cpp.py at line 712]  @@ -623,36 +623,36 @@ INFO: Creating files in directory P2_gux_ttxgux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxgux.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxgux.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  3 [export_cpp.py at line 712]  @@ -663,36 +663,36 @@ INFO: Creating files in directory P2_uux_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxgg.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxgg.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  4 [export_cpp.py at line 712]  @@ -703,38 +703,38 @@ INFO: Creating files in directory P1_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  5 [export_cpp.py at line 712]  @@ -745,32 +745,32 @@ INFO: Creating files in directory P2_uu_ttxuu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1710]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1711]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uu_ttxuu.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uu_ttxuu.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  6 [export_cpp.py at line 712]  @@ -781,32 +781,32 @@ INFO: Creating files in directory P2_uux_ttxuux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1710]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1711]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxuux.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxuux.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  7 [export_cpp.py at line 712]  @@ -817,32 +817,32 @@ INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1710]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1711]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxux_ttxuxux.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxux_ttxuxux.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  8 [export_cpp.py at line 712]  @@ -853,32 +853,32 @@ INFO: Creating files in directory P2_uc_ttxuc DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1711]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uc_ttxuc.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uc_ttxuc.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  9 [export_cpp.py at line 712]  @@ -889,32 +889,32 @@ INFO: Creating files in directory P2_uux_ttxccx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1711]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxccx.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxccx.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  10 [export_cpp.py at line 712]  @@ -925,32 +925,32 @@ INFO: Creating files in directory P2_ucx_ttxucx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1711]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_ucx_ttxucx.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_ucx_ttxucx.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  11 [export_cpp.py at line 712]  @@ -961,32 +961,32 @@ INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1711]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxcx_ttxuxcx.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxcx_ttxuxcx.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  12 [export_cpp.py at line 712]  @@ -997,34 +997,34 @@ INFO: Creating files in directory P1_gu_ttxu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  13 [export_cpp.py at line 712]  @@ -1035,34 +1035,34 @@ INFO: Creating files in directory P1_gux_ttxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  14 [export_cpp.py at line 712]  @@ -1073,34 +1073,34 @@ INFO: Creating files in directory P1_uux_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxg.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxg.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  15 [export_cpp.py at line 712]  @@ -1111,36 +1111,36 @@ INFO: Creating files in directory P0_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1711]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1337]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  16 [export_cpp.py at line 712]  @@ -1151,47 +1151,47 @@ INFO: Creating files in directory P0_uux_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  DEBUG: self.support_multichannel, self.include_multi_channel =  True [1] [model_handling.py at line 1164]  DEBUG: multi_channel =  {1: [0]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1} [model_handling.py at line 1710]  +DEBUG: multi_channel_map =  {1: [0]} [model_handling.py at line 1656]  +DEBUG: diag_to_config =  {1: 1} [model_handling.py at line 1711]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttx.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttx.txt [model_handling.py at line 1340]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1] [export_cpp.py at line 711]  DEBUG: subproc_number =  17 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.269 s -Wrote files for 810 helas calls in 3.278 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.261 s +Wrote files for 810 helas calls in 3.311 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.330 s +ALOHA: aloha creates 5 routines in 0.329 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -1512,6 +1512,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.439s -user 0m8.865s -sys 0m0.532s +real 0m9.493s +user 0m8.917s +sys 0m0.525s diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc index 7567442343..89ce64642c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc @@ -775,7 +775,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -783,7 +783,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/processes.dat index aebe5534f0..4e3f859a9f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/processes.dat @@ -1,2 +1,2 @@ 1 u u~ > t t~,c c~ > t t~,d d~ > t t~,s s~ > t t~ -mirror none \ No newline at end of file +mirror u~ u > t t~,c~ c > t t~,d~ d > t t~,s~ s > t t~ \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index e9456e497d..2d43c24604 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -855,7 +855,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -863,7 +863,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/processes.dat index c3d6ba5983..cecfd6fccc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/processes.dat @@ -1,2 +1,2 @@ 1 g u > t t~ u,g c > t t~ c,g d > t t~ d,g s > t t~ s -mirror none \ No newline at end of file +mirror u g > t t~ u,c g > t t~ c,d g > t t~ d,s g > t t~ s \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index daa670b4b7..a676bbeb6e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -855,7 +855,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -863,7 +863,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/processes.dat index 999d31033a..a4dc13c625 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/processes.dat @@ -1,2 +1,2 @@ 1 g u~ > t t~ u~,g c~ > t t~ c~,g d~ > t t~ d~,g s~ > t t~ s~ -mirror none \ No newline at end of file +mirror u~ g > t t~ u~,c~ g > t t~ c~,d~ g > t t~ d~,s~ g > t t~ s~ \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc index 81166eb007..7edb26013e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc @@ -855,7 +855,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -863,7 +863,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/processes.dat index 3e7ec80666..70fc4fbfd9 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/processes.dat @@ -1,2 +1,2 @@ 1 u u~ > t t~ g,c c~ > t t~ g,d d~ > t t~ g,s s~ > t t~ g -mirror none \ No newline at end of file +mirror u~ u > t t~ g,c~ c > t t~ g,d~ d > t t~ g,s~ s > t t~ g \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc index f018e86fc7..59c382aca4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc @@ -1360,7 +1360,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -1368,7 +1368,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/processes.dat index 223af0d709..7fe113513b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/processes.dat @@ -1,2 +1,2 @@ 1 g u > t t~ g u,g c > t t~ g c,g d > t t~ g d,g s > t t~ g s -mirror none \ No newline at end of file +mirror u g > t t~ g u,c g > t t~ g c,d g > t t~ g d,s g > t t~ g s \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc index cf1ab85565..607cb95cfd 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc @@ -1360,7 +1360,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -1368,7 +1368,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/processes.dat index aa01e6faf9..163cc3efce 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/processes.dat @@ -1,2 +1,2 @@ 1 g u~ > t t~ g u~,g c~ > t t~ g c~,g d~ > t t~ g d~,g s~ > t t~ g s~ -mirror none \ No newline at end of file +mirror u~ g > t t~ g u~,c~ g > t t~ g c~,d~ g > t t~ g d~,s~ g > t t~ g s~ \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc index deddedbe37..0ac5734c21 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc @@ -935,7 +935,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -943,7 +943,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/processes.dat index d745151b92..ae198732c6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/processes.dat @@ -1,2 +1,2 @@ 1 u c > t t~ u c,u d > t t~ u d,u s > t t~ u s,c d > t t~ c d,c s > t t~ c s,d s > t t~ d s -mirror none \ No newline at end of file +mirror c u > t t~ u c,d u > t t~ u d,s u > t t~ u s,d c > t t~ c d,s c > t t~ c s,s d > t t~ d s \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc index 035f37f9f3..5d8331468c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc @@ -941,7 +941,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -949,7 +949,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/processes.dat index 6dae119052..42f56ba6f0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/processes.dat @@ -1,2 +1,2 @@ 1 u c~ > t t~ u c~,u d~ > t t~ u d~,u s~ > t t~ u s~,c u~ > t t~ c u~,c d~ > t t~ c d~,c s~ > t t~ c s~,d u~ > t t~ d u~,d c~ > t t~ d c~,d s~ > t t~ d s~,s u~ > t t~ s u~,s c~ > t t~ s c~,s d~ > t t~ s d~ -mirror none \ No newline at end of file +mirror c~ u > t t~ u c~,d~ u > t t~ u d~,s~ u > t t~ u s~,u~ c > t t~ c u~,d~ c > t t~ c d~,s~ c > t t~ c s~,u~ d > t t~ d u~,c~ d > t t~ d c~,s~ d > t t~ d s~,u~ s > t t~ s u~,c~ s > t t~ s c~,d~ s > t t~ s d~ \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc index ed484f79bf..58687f7276 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc @@ -941,7 +941,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -949,7 +949,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/processes.dat index e3981c4625..b073954d9b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/processes.dat @@ -1,2 +1,2 @@ 1 u u~ > t t~ c c~,u u~ > t t~ d d~,u u~ > t t~ s s~,c c~ > t t~ u u~,c c~ > t t~ d d~,c c~ > t t~ s s~,d d~ > t t~ u u~,d d~ > t t~ c c~,d d~ > t t~ s s~,s s~ > t t~ u u~,s s~ > t t~ c c~,s s~ > t t~ d d~ -mirror none \ No newline at end of file +mirror u~ u > t t~ c c~,u~ u > t t~ d d~,u~ u > t t~ s s~,c~ c > t t~ u u~,c~ c > t t~ d d~,c~ c > t t~ s s~,d~ d > t t~ u u~,d~ d > t t~ c c~,d~ d > t t~ s s~,s~ s > t t~ u u~,s~ s > t t~ c c~,s~ s > t t~ d d~ \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc index 57e20e66f1..0622603ad2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc @@ -1360,7 +1360,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -1368,7 +1368,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 72 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 72,72 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/processes.dat index 6154f86412..253ae4161c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/processes.dat @@ -1,2 +1,2 @@ 1 u u~ > t t~ g g,c c~ > t t~ g g,d d~ > t t~ g g,s s~ > t t~ g g -mirror none \ No newline at end of file +mirror u~ u > t t~ g g,c~ c > t t~ g g,d~ d > t t~ g g,s~ s > t t~ g g \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc index a42b3d8af9..5bf4c02337 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc @@ -1045,7 +1045,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -1053,7 +1053,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/processes.dat index b85fa2760c..cd7b317223 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/processes.dat @@ -1,2 +1,2 @@ 1 u u~ > t t~ u u~,c c~ > t t~ c c~,d d~ > t t~ d d~,s s~ > t t~ s s~ -mirror none \ No newline at end of file +mirror u~ u > t t~ u u~,c~ c > t t~ c c~,d~ d > t t~ d d~,s~ s > t t~ s s~ \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc index d1d96581db..739b5a1bb2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc @@ -935,7 +935,7 @@ namespace mg5amcCpu // These variable are not used anywhere else in the code and their scope is limited to this sanity check { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) - constexpr int nprocesses = 1; + constexpr int nprocesses = 2; static_assert( nprocesses == 1, "Assume nprocesses == 1" ); // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter @@ -943,7 +943,7 @@ namespace mg5amcCpu } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/mirrorprocs.inc index 57f5243762..6f9280a1be 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ + DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/processes.dat index 9c0368f622..ada123d362 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/processes.dat @@ -1,2 +1,2 @@ 1 u~ c~ > t t~ u~ c~,u~ d~ > t t~ u~ d~,u~ s~ > t t~ u~ s~,c~ d~ > t t~ c~ d~,c~ s~ > t t~ c~ s~,d~ s~ > t t~ d~ s~ -mirror none \ No newline at end of file +mirror c~ u~ > t t~ u~ c~,d~ u~ > t t~ u~ d~,s~ u~ > t t~ u~ s~,d~ c~ > t t~ c~ d~,s~ c~ > t t~ c~ s~,s~ d~ > t t~ d~ s~ \ No newline at end of file From 8641b29af0b4ec80280916e385b54026ab7fe5a5 Mon Sep 17 00:00:00 2001 From: Olivier Mattelaer Date: Thu, 31 Aug 2023 15:11:54 +0200 Subject: [PATCH 010/119] fixing issue of stefan with nprocesses>1 --- .../PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py | 38 +++++++++++++++++++ .../gpu/process_function_definitions.inc | 3 +- .../CUDACPP_SA_OUTPUT/model_handling.py | 7 ++++ 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py new file mode 100644 index 0000000000..fca3df3652 --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py @@ -0,0 +1,38 @@ + +import madgraph.various.misc as misc +import madgraph.interface.extended_cmd as extended_cmd +import logging + +logger = logging.getLogger('cmdprint') # for stdout + +try: + import madgraph +except: + import internal.madevent_interface as madevent_interface +else: + import madgraph.interface.madevent_interface as madevent_interface + +class CPPMEInterface(madevent_interface.MadEventCmdShell): + + def compile(self, *args, **opts): + """ """ + import multiprocessing + if not self.options['nb_core'] or self.options['nb_core'] == 'None': + self.options['nb_core'] = multiprocessing.cpu_count() + + if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + cudacpp_backend = self.run_card['cudacpp_backend'] # the default value is defined in banner.py + logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend == 'FORTRAN': + args[0][0] = 'madevent_fortran_link' + elif cudacpp_backend == 'CPP': + args[0][0] = 'madevent_cpp_link' + elif cudacpp_backend == 'CUDA': + args[0][0] = 'madevent_cuda_link' + else: + raise Exception("Invalid cudacpp_backend='%s': only 'FORTRAN', 'CPP', 'CUDA' are supported") + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + else: + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + +MEINTERFACE = CPPMEInterface diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc index c3c0812b94..a3ce5ad2dc 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc @@ -405,8 +405,7 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = %(nproc)i; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = %(proc_id)i; // code generation source: %(proc_id_source)s static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 2a80be2e90..bdece2ead8 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1147,6 +1147,13 @@ def get_sigmaKin_lines(self, color_amplitudes, write=True): replace_dict = super().get_sigmaKin_lines(color_amplitudes, write=False) replace_dict['proc_id'] = self.proc_id if self.proc_id>0 else 1 replace_dict['proc_id_source'] = 'madevent + cudacpp exporter' if self.proc_id>0 else 'standalone_cudacpp' # FIXME? use self.in_madevent_mode instead? + + # Extract denominator (avoid to extend size for mirroring) + den_factors = [str(me.get_denominator_factor()) for me in \ + self.matrix_elements] + replace_dict['den_factors'] = ",".join(den_factors) + misc.sprint(replace_dict['den_factors']) + if write: file = self.read_template_file(self.process_sigmaKin_function_template) % replace_dict file = '\n'.join( file.split('\n')[8:] ) # skip first 8 lines in process_sigmaKin_function.inc (copyright) From 8a34ba19d7222b0ec81b810e0889e028959f84c0 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 12:47:49 +0200 Subject: [PATCH 011/119] [oct23av] regenerate pp_tt012j.mad after cherry-picking Olivier's fix, previous issues are now solved Codebase includes cherry-picking this commit (providing a fix for PR #754) 5b22a9201 Olivier Mattelaer Thu Aug 31 15:11:54 2023 +0200 fixing issue of stefan with nprocesses>1 Code changes are epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc < static_assert( nprocesses == 1, "Assume nprocesses == 1" ); < // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) --- > static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); --- .../CODEGEN_mad_pp_tt012j_log.txt | 800 +++++++++--------- .../SubProcesses/P0_gg_ttx/CPPProcess.cc | 3 +- .../SubProcesses/P0_uux_ttx/CPPProcess.cc | 5 +- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 3 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 5 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 5 +- .../SubProcesses/P1_uux_ttxg/CPPProcess.cc | 5 +- .../SubProcesses/P2_gg_ttxgg/CPPProcess.cc | 3 +- .../SubProcesses/P2_gg_ttxuux/CPPProcess.cc | 3 +- .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc | 5 +- .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc | 5 +- .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc | 5 +- .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc | 5 +- .../SubProcesses/P2_uu_ttxuu/CPPProcess.cc | 3 +- .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc | 5 +- .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc | 5 +- .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc | 5 +- .../P2_uxcx_ttxuxcx/CPPProcess.cc | 5 +- .../P2_uxux_ttxuxux/CPPProcess.cc | 3 +- 19 files changed, 439 insertions(+), 439 deletions(-) diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index b08e06fa79..279aed8466 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005528926849365234  +DEBUG: model prefixing takes 0.005307912826538086  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.802 s +65 processes with 1119 diagrams generated in 1.791 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -499,40 +499,41 @@ INFO: Creating files in directory P2_gg_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -543,36 +544,37 @@ INFO: Creating files in directory P2_gg_ttxuux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxuux.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxuux.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  @@ -583,36 +585,37 @@ INFO: Creating files in directory P2_gu_ttxgu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxgu.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxgu.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  2 [export_cpp.py at line 712]  @@ -623,36 +626,37 @@ INFO: Creating files in directory P2_gux_ttxgux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxgux.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxgux.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  3 [export_cpp.py at line 712]  @@ -663,36 +667,37 @@ INFO: Creating files in directory P2_uux_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxgg.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxgg.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  4 [export_cpp.py at line 712]  @@ -703,38 +708,39 @@ INFO: Creating files in directory P1_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  5 [export_cpp.py at line 712]  @@ -745,32 +751,33 @@ INFO: Creating files in directory P2_uu_ttxuu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1711]  +DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uu_ttxuu.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uu_ttxuu.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  6 [export_cpp.py at line 712]  @@ -781,32 +788,33 @@ INFO: Creating files in directory P2_uux_ttxuux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1711]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxuux.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxuux.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  7 [export_cpp.py at line 712]  @@ -817,32 +825,33 @@ INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1711]  +DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxux_ttxuxux.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxux_ttxuxux.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  8 [export_cpp.py at line 712]  @@ -853,32 +862,33 @@ INFO: Creating files in directory P2_uc_ttxuc DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1711]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uc_ttxuc.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uc_ttxuc.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  9 [export_cpp.py at line 712]  @@ -889,32 +899,33 @@ INFO: Creating files in directory P2_uux_ttxccx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1711]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxccx.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxccx.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  10 [export_cpp.py at line 712]  @@ -925,32 +936,33 @@ INFO: Creating files in directory P2_ucx_ttxucx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1711]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_ucx_ttxucx.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_ucx_ttxucx.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  11 [export_cpp.py at line 712]  @@ -961,32 +973,33 @@ INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1711]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxcx_ttxuxcx.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxcx_ttxuxcx.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  12 [export_cpp.py at line 712]  @@ -997,34 +1010,35 @@ INFO: Creating files in directory P1_gu_ttxu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  13 [export_cpp.py at line 712]  @@ -1035,34 +1049,35 @@ INFO: Creating files in directory P1_gux_ttxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  14 [export_cpp.py at line 712]  @@ -1073,34 +1088,35 @@ INFO: Creating files in directory P1_uux_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxg.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxg.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  15 [export_cpp.py at line 712]  @@ -1111,36 +1127,37 @@ INFO: Creating files in directory P0_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  16 [export_cpp.py at line 712]  @@ -1151,47 +1168,48 @@ INFO: Creating files in directory P0_uux_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1} [model_handling.py at line 1711]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttx.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttx.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1] [export_cpp.py at line 711]  DEBUG: subproc_number =  17 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.261 s -Wrote files for 810 helas calls in 3.311 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.269 s +Wrote files for 810 helas calls in 3.353 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.327 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -1199,7 +1217,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.312 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -1512,6 +1530,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.493s -user 0m8.917s -sys 0m0.525s +real 0m9.500s +user 0m8.936s +sys 0m0.521s diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc index 44f313bf0a..b6ab300c5d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc @@ -799,8 +799,7 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc index 89ce64642c..def5e2cc0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc @@ -776,14 +776,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 8cc007dff8..eec9dbdfba 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -1020,8 +1020,7 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 2d43c24604..45a03cc5d6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -856,14 +856,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index a676bbeb6e..af9c1f8e76 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -856,14 +856,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc index 7edb26013e..be4cebd309 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc @@ -856,14 +856,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc index f1e1f21142..c49542b09c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc @@ -2966,8 +2966,7 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc index e5e62a0af2..96bb278ee2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc @@ -1368,8 +1368,7 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc index 59c382aca4..91b45db10a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc @@ -1361,14 +1361,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc index 607cb95cfd..1481bae046 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc @@ -1361,14 +1361,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc index 0ac5734c21..d7f73cd70b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc @@ -936,14 +936,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc index 5d8331468c..c4f926adaf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc @@ -942,14 +942,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc index d5d7e9e858..f6b35ebd8f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc @@ -1046,8 +1046,7 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc index 58687f7276..a9c2d049e5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc @@ -942,14 +942,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc index 0622603ad2..c9d9e0d8ec 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc @@ -1361,14 +1361,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 72,72 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 72 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc index 5bf4c02337..c04b6dd063 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc @@ -1046,14 +1046,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc index 739b5a1bb2..ec709fbfa6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc @@ -936,14 +936,13 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc index 129dd8551a..1fee1c00c6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc @@ -1046,8 +1046,7 @@ namespace mg5amcCpu { // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } From ec4c8b5616da7a325ee812781f9d17c38d28b87b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 13:06:20 +0200 Subject: [PATCH 012/119] [oct23av] in CODEGEN, improve comments to generated code for mirror processes (see PR #754) --- .../template_files/gpu/process_function_definitions.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc index a3ce5ad2dc..5f1ea36b9e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc @@ -400,10 +400,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = %(nproc)i; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = %(proc_id)i; // code generation source: %(proc_id_source)s From 6e6dcbf71c056938da2efb2f80bb414677715dd2 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 13:09:14 +0200 Subject: [PATCH 013/119] [oct23av] regenerate pp_tt012j.mad with my own changes to code comments --- .../CODEGEN_mad_pp_tt012j_log.txt | 54 +++++++++---------- .../SubProcesses/P0_gg_ttx/CPPProcess.cc | 4 +- .../SubProcesses/P0_uux_ttx/CPPProcess.cc | 4 +- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 4 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 4 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 4 +- .../SubProcesses/P1_uux_ttxg/CPPProcess.cc | 4 +- .../SubProcesses/P2_gg_ttxgg/CPPProcess.cc | 4 +- .../SubProcesses/P2_gg_ttxuux/CPPProcess.cc | 4 +- .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc | 4 +- .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc | 4 +- .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc | 4 +- .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc | 4 +- .../SubProcesses/P2_uu_ttxuu/CPPProcess.cc | 4 +- .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc | 4 +- .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc | 4 +- .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc | 4 +- .../P2_uxcx_ttxuxcx/CPPProcess.cc | 4 +- .../P2_uxux_ttxuxux/CPPProcess.cc | 4 +- 19 files changed, 63 insertions(+), 63 deletions(-) diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 279aed8466..8d7f70c22d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005307912826538086  +DEBUG: model prefixing takes 0.005370378494262695  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.791 s +65 processes with 1119 diagrams generated in 1.798 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -499,7 +499,7 @@ INFO: Creating files in directory P2_gg_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -544,7 +544,7 @@ INFO: Creating files in directory P2_gg_ttxuux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -585,7 +585,7 @@ INFO: Creating files in directory P2_gu_ttxgu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -626,7 +626,7 @@ INFO: Creating files in directory P2_gux_ttxgux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -667,7 +667,7 @@ INFO: Creating files in directory P2_uux_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -708,7 +708,7 @@ INFO: Creating files in directory P1_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -751,7 +751,7 @@ INFO: Creating files in directory P2_uu_ttxuu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -788,7 +788,7 @@ INFO: Creating files in directory P2_uux_ttxuux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -825,7 +825,7 @@ INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -862,7 +862,7 @@ INFO: Creating files in directory P2_uc_ttxuc DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -899,7 +899,7 @@ INFO: Creating files in directory P2_uux_ttxccx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -936,7 +936,7 @@ INFO: Creating files in directory P2_ucx_ttxucx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -973,7 +973,7 @@ INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -1010,7 +1010,7 @@ INFO: Creating files in directory P1_gu_ttxu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -1049,7 +1049,7 @@ INFO: Creating files in directory P1_gux_ttxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -1088,7 +1088,7 @@ INFO: Creating files in directory P1_uux_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -1127,7 +1127,7 @@ INFO: Creating files in directory P0_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -1168,7 +1168,7 @@ INFO: Creating files in directory P0_uux_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  @@ -1201,15 +1201,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.269 s -Wrote files for 810 helas calls in 3.353 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.266 s +Wrote files for 810 helas calls in 3.354 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.330 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -1217,7 +1217,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -1530,6 +1530,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.500s -user 0m8.936s -sys 0m0.521s +real 0m9.536s +user 0m8.950s +sys 0m0.551s diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc index b6ab300c5d..f3892b05e4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc @@ -794,10 +794,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc index def5e2cc0d..419ca8dad1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc @@ -771,10 +771,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index eec9dbdfba..62e8d65a7d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -1015,10 +1015,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 45a03cc5d6..fdd4c7ce0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -851,10 +851,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index af9c1f8e76..f2854178a3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -851,10 +851,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc index be4cebd309..99ca7b88a2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc @@ -851,10 +851,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc index c49542b09c..7ea6145d0a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc @@ -2961,10 +2961,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc index 96bb278ee2..c1788e814a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc @@ -1363,10 +1363,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc index 91b45db10a..abc5ef9719 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc @@ -1356,10 +1356,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc index 1481bae046..2656f0ca15 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc @@ -1356,10 +1356,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc index d7f73cd70b..d1e9379b52 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc @@ -931,10 +931,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc index c4f926adaf..274c259bc6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc @@ -937,10 +937,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc index f6b35ebd8f..cab6dd62ef 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc @@ -1041,10 +1041,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc index a9c2d049e5..60c41f8fb1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc @@ -937,10 +937,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc index c9d9e0d8ec..71dbdec476 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc @@ -1356,10 +1356,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc index c04b6dd063..bc743c56c3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc @@ -1041,10 +1041,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc index ec709fbfa6..c09a29e015 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc @@ -931,10 +931,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc index 1fee1c00c6..d4a4794688 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc @@ -1041,10 +1041,10 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter From 45ebd42ab64710c2ecd7187d8e4e2e9564b17fe3 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 13:12:54 +0200 Subject: [PATCH 014/119] [oct23av] regenerate all other 7 processes mad and 7 sa, no code changes (except for static assert allowing nprocesses==2) This completes the checks for PR #754, all looks good --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 53 +++++---- .../SubProcesses/P1_epem_mupmum/CPPProcess.cc | 7 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 43 +++---- .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc | 7 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 61 +++++----- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 7 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 51 ++++---- .../P1_Sigma_sm_gg_ttx/CPPProcess.cc | 7 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 112 +++++++++--------- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 7 +- .../SubProcesses/P2_gg_ttxg/CPPProcess.cc | 7 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 67 +++++------ .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 7 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 59 ++++----- .../P1_Sigma_sm_gg_ttxg/CPPProcess.cc | 7 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 73 ++++++------ .../SubProcesses/P1_gg_ttxgg/CPPProcess.cc | 7 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 63 +++++----- .../P1_Sigma_sm_gg_ttxgg/CPPProcess.cc | 7 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 77 ++++++------ .../SubProcesses/P1_gg_ttxggg/CPPProcess.cc | 7 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 65 +++++----- .../P1_Sigma_sm_gg_ttxggg/CPPProcess.cc | 7 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 100 ++++++++-------- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 7 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 7 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 82 ++++++------- .../P1_Sigma_sm_gu_ttxu/CPPProcess.cc | 7 +- .../P1_Sigma_sm_gux_ttxux/CPPProcess.cc | 7 +- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 47 ++++---- .../P1_Sigma_heft_gg_h/CPPProcess.cc | 7 +- 31 files changed, 536 insertions(+), 536 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index cb58296f12..74e619abdf 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005522251129150391  +DEBUG: model prefixing takes 0.00560760498046875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -176,32 +176,33 @@ INFO: Creating files in directory P1_epem_mupmum DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2} [model_handling.py at line 1711]  +DEBUG: replace_dict['den_factors'] =  4 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -210,19 +211,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.097 s +Wrote files for 8 helas calls in 0.099 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.200 s +ALOHA: aloha creates 3 routines in 0.195 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.253 s +ALOHA: aloha creates 7 routines in 0.252 s FFV1 FFV1 FFV2 @@ -334,6 +335,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.814s -user 0m2.201s -sys 0m0.285s +real 0m2.502s +user 0m2.200s +sys 0m0.288s diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc index a6d90a2d1a..873b8c2fd2 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc @@ -786,13 +786,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index cc4cb313f6..152aa855f1 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005342245101928711  +DEBUG: model prefixing takes 0.005333423614501953  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -173,28 +173,29 @@ INFO: Processing color information for process: e+ e- > mu+ mu- @1 DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  +DEBUG: replace_dict['den_factors'] =  4 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1344]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes @@ -202,7 +203,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.261 s +ALOHA: aloha creates 4 routines in 0.263 s FFV1 FFV1 FFV2 @@ -225,6 +226,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.684s -user 0m0.614s -sys 0m0.063s +real 0m0.686s +user 0m0.622s +sys 0m0.057s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc index 12a28d3f7a..53aaf5cb29 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc @@ -784,13 +784,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 7e2b93281d..81d7b1f4da 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052642822265625  +DEBUG: model prefixing takes 0.0053462982177734375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,36 +177,37 @@ INFO: Creating files in directory P1_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -214,16 +215,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.109 s +Wrote files for 10 helas calls in 0.111 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.145 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.132 s VVV1 FFV1 FFV1 @@ -328,6 +329,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.320s -user 0m2.033s -sys 0m0.279s +real 0m2.442s +user 0m2.049s +sys 0m0.318s diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 0afa202e07..8d41a93302 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -794,13 +794,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index dddb8c87ad..8cfac3fd00 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005263805389404297  +DEBUG: model prefixing takes 0.005245208740234375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,38 +174,39 @@ INFO: Processing color information for process: g g > t t~ @1 DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1344]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.140 s +ALOHA: aloha creates 2 routines in 0.142 s VVV1 FFV1 FFV1 @@ -227,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.588s -user 0m0.525s -sys 0m0.054s +real 0m0.594s +user 0m0.529s +sys 0m0.056s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc index ea42fb3e96..528f0c80d9 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc @@ -791,13 +791,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 0297f740e9..556e1bf01e 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005698442459106445  +DEBUG: model prefixing takes 0.005356788635253906  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -186,38 +186,39 @@ INFO: Creating files in directory P2_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -228,36 +229,37 @@ INFO: Creating files in directory P1_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  @@ -265,14 +267,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.263 s +Wrote files for 46 helas calls in 0.265 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.320 s +ALOHA: aloha creates 5 routines in 0.322 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -280,7 +282,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.307 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -400,6 +402,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.929s -user 0m2.616s -sys 0m0.306s +real 0m2.943s +user 0m2.610s +sys 0m0.320s diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 0afa202e07..8d41a93302 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -794,13 +794,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc index 9dfd471c50..32555ba549 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc @@ -1015,13 +1015,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index c8912079c8..0b27815951 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005263566970825195  +DEBUG: model prefixing takes 0.0053675174713134766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,38 +177,39 @@ INFO: Creating files in directory P1_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -216,14 +217,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s -Wrote files for 36 helas calls in 0.162 s +Wrote files for 36 helas calls in 0.163 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.317 s +ALOHA: aloha creates 5 routines in 0.327 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -231,7 +232,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.304 s +ALOHA: aloha creates 10 routines in 0.315 s VVV1 VVV1 FFV1 @@ -347,6 +348,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.800s -user 0m2.477s -sys 0m0.307s +real 0m2.842s +user 0m2.517s +sys 0m0.314s diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 8cc007dff8..62e8d65a7d 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -1015,13 +1015,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 900f2c0ac9..eb8e67097c 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005434989929199219  +DEBUG: model prefixing takes 0.005820512771606445  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.023 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -174,35 +174,36 @@ INFO: Processing color information for process: g g > t t~ g @1 DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1337]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1344]  +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -210,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.319 s +ALOHA: aloha creates 5 routines in 0.334 s VVV1 VVV1 FFV1 @@ -237,6 +238,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.874s -user 0m0.781s -sys 0m0.045s +real 0m0.890s +user 0m0.814s +sys 0m0.053s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc index 6f71af24b1..56af4b74c1 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc @@ -1009,13 +1009,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index e3548d9fdb..fb4dacd9e3 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005757331848144531  +DEBUG: model prefixing takes 0.005468130111694336  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.154 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,55 +177,56 @@ INFO: Creating files in directory P1_gg_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.418 s -Wrote files for 222 helas calls in 0.724 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.416 s +Wrote files for 222 helas calls in 0.728 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.324 s +ALOHA: aloha creates 5 routines in 0.326 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -233,7 +234,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -352,6 +353,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.928s -user 0m3.594s -sys 0m0.299s +real 0m3.918s +user 0m3.605s +sys 0m0.297s diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc index 442d769ae3..71c4eee18c 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc @@ -2961,13 +2961,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 654dd3ea8d..3bebed0b58 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005308866500854492  +DEBUG: model prefixing takes 0.005362033843994141  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.164 s +1 processes with 123 diagrams generated in 0.154 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -174,37 +174,38 @@ INFO: Processing color information for process: g g > t t~ g g @1 DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1337]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1344]  +Generated helas calls for 1 subprocesses (123 diagrams) in 0.417 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -212,7 +213,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.315 s +ALOHA: aloha creates 5 routines in 0.316 s VVV1 VVV1 FFV1 @@ -242,6 +243,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m1.520s -user 0m1.435s -sys 0m0.054s +real 0m1.491s +user 0m1.428s +sys 0m0.050s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc index 25f123c774..55877e70c4 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc @@ -3018,13 +3018,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 6dbbb56578..14de1d052f 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005812644958496094  +DEBUG: model prefixing takes 0.00551152229309082  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.982 s +1 processes with 1240 diagrams generated in 1.858 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -179,57 +179,58 @@ INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 6: 3, 7: 4, 11: 5, 12: 6, 28: 7, 29: 8, 30: 9, 34: 10, 35: 11, 36: 12, 40: 13, 41: 14, 42: 15, 46: 16, 47: 17, 48: 18, 49: 19, 50: 20, 51: 21, 55: 22, 56: 23, 57: 24, 58: 25, 59: 26, 60: 27, 61: 28, 62: 29, 63: 30, 64: 31, 65: 32, 66: 33, 67: 34, 68: 35, 69: 36, 73: 37, 74: 38, 75: 39, 76: 40, 77: 41, 78: 42, 79: 43, 80: 44, 81: 45, 82: 46, 83: 47, 84: 48, 85: 49, 86: 50, 87: 51, 91: 52, 92: 53, 93: 54, 94: 55, 95: 56, 96: 57, 97: 58, 98: 59, 99: 60, 100: 61, 101: 62, 102: 63, 103: 64, 104: 65, 105: 66, 109: 67, 110: 68, 111: 69, 112: 70, 113: 71, 114: 72, 115: 73, 116: 74, 117: 75, 121: 76, 122: 77, 123: 78, 124: 79, 125: 80, 126: 81, 127: 82, 128: 83, 129: 84, 133: 85, 134: 86, 135: 87, 136: 88, 137: 89, 138: 90, 139: 91, 140: 92, 141: 93, 142: 94, 143: 95, 144: 96, 145: 97, 146: 98, 147: 99, 148: 100, 149: 101, 150: 102, 151: 103, 152: 104, 153: 105, 160: 106, 161: 107, 162: 108, 163: 109, 164: 110, 165: 111, 166: 112, 167: 113, 168: 114, 169: 115, 170: 116, 171: 117, 172: 118, 173: 119, 174: 120, 178: 121, 179: 122, 183: 123, 184: 124, 185: 125, 186: 126, 187: 127, 188: 128, 189: 129, 190: 130, 191: 131, 192: 132, 193: 133, 194: 134, 195: 135, 196: 136, 197: 137, 201: 138, 202: 139, 203: 140, 204: 141, 205: 142, 206: 143, 207: 144, 208: 145, 209: 146, 210: 147, 211: 148, 212: 149, 213: 150, 214: 151, 215: 152, 219: 153, 220: 154, 221: 155, 222: 156, 223: 157, 224: 158, 225: 159, 226: 160, 227: 161, 228: 162, 229: 163, 230: 164, 231: 165, 232: 166, 233: 167, 234: 168, 235: 169, 236: 170, 237: 171, 238: 172, 239: 173, 240: 174, 241: 175, 242: 176, 243: 177, 244: 178, 245: 179, 246: 180, 247: 181, 248: 182, 249: 183, 250: 184, 251: 185, 252: 186, 253: 187, 254: 188, 255: 189, 256: 190, 257: 191, 258: 192, 259: 193, 260: 194, 261: 195, 262: 196, 266: 197, 267: 198, 268: 199, 269: 200, 270: 201, 271: 202, 275: 203, 276: 204, 277: 205, 278: 206, 279: 207, 280: 208, 284: 209, 285: 210, 319: 211, 320: 212, 321: 213, 322: 214, 323: 215, 324: 216, 325: 217, 326: 218, 327: 219, 328: 220, 329: 221, 330: 222, 331: 223, 332: 224, 333: 225, 337: 226, 338: 227, 342: 228, 343: 229, 344: 230, 345: 231, 346: 232, 347: 233, 348: 234, 349: 235, 350: 236, 351: 237, 352: 238, 353: 239, 354: 240, 355: 241, 356: 242, 360: 243, 361: 244, 362: 245, 363: 246, 364: 247, 365: 248, 366: 249, 367: 250, 368: 251, 369: 252, 370: 253, 371: 254, 372: 255, 373: 256, 374: 257, 378: 258, 379: 259, 380: 260, 381: 261, 382: 262, 383: 263, 384: 264, 385: 265, 386: 266, 387: 267, 388: 268, 389: 269, 390: 270, 391: 271, 392: 272, 393: 273, 394: 274, 395: 275, 396: 276, 397: 277, 398: 278, 399: 279, 400: 280, 401: 281, 402: 282, 403: 283, 404: 284, 405: 285, 406: 286, 407: 287, 408: 288, 409: 289, 410: 290, 411: 291, 412: 292, 413: 293, 414: 294, 415: 295, 416: 296, 417: 297, 418: 298, 419: 299, 420: 300, 421: 301, 425: 302, 426: 303, 427: 304, 428: 305, 429: 306, 430: 307, 434: 308, 435: 309, 436: 310, 437: 311, 438: 312, 439: 313, 443: 314, 444: 315, 478: 316, 479: 317, 480: 318, 481: 319, 482: 320, 483: 321, 487: 322, 488: 323, 489: 324, 490: 325, 491: 326, 492: 327, 493: 328, 494: 329, 495: 330, 496: 331, 497: 332, 498: 333, 499: 334, 500: 335, 501: 336, 505: 337, 506: 338, 507: 339, 508: 340, 509: 341, 510: 342, 511: 343, 512: 344, 513: 345, 514: 346, 515: 347, 516: 348, 517: 349, 518: 350, 519: 351, 523: 352, 524: 353, 525: 354, 526: 355, 527: 356, 528: 357, 529: 358, 530: 359, 531: 360, 532: 361, 533: 362, 534: 363, 535: 364, 536: 365, 537: 366, 541: 367, 542: 368, 543: 369, 544: 370, 545: 371, 546: 372, 547: 373, 548: 374, 549: 375, 550: 376, 551: 377, 555: 378, 556: 379, 560: 380, 561: 381, 577: 382, 578: 383, 579: 384, 580: 385, 581: 386, 582: 387, 583: 388, 584: 389, 585: 390, 589: 391, 590: 392, 591: 393, 592: 394, 593: 395, 594: 396, 595: 397, 596: 398, 597: 399, 601: 400, 602: 401, 603: 402, 604: 403, 605: 404, 606: 405, 607: 406, 608: 407, 609: 408, 613: 409, 614: 410, 615: 411, 616: 412, 617: 413, 618: 414, 622: 415, 623: 416, 624: 417, 625: 418, 626: 419, 627: 420, 637: 421, 638: 422, 639: 423, 640: 424, 641: 425, 642: 426, 646: 427, 647: 428, 648: 429, 649: 430, 650: 431, 651: 432, 652: 433, 653: 434, 654: 435, 655: 436, 656: 437, 657: 438, 658: 439, 659: 440, 660: 441, 664: 442, 665: 443, 666: 444, 667: 445, 668: 446, 669: 447, 670: 448, 671: 449, 672: 450, 673: 451, 674: 452, 675: 453, 676: 454, 677: 455, 678: 456, 682: 457, 683: 458, 684: 459, 685: 460, 686: 461, 687: 462, 688: 463, 689: 464, 690: 465, 691: 466, 692: 467, 693: 468, 694: 469, 695: 470, 696: 471, 700: 472, 701: 473, 702: 474, 703: 475, 704: 476, 705: 477, 706: 478, 707: 479, 708: 480, 709: 481, 710: 482, 714: 483, 715: 484, 719: 485, 720: 486, 736: 487, 737: 488, 738: 489, 739: 490, 740: 491, 741: 492, 742: 493, 743: 494, 744: 495, 748: 496, 749: 497, 750: 498, 751: 499, 752: 500, 753: 501, 754: 502, 755: 503, 756: 504, 760: 505, 761: 506, 762: 507, 763: 508, 764: 509, 765: 510, 766: 511, 767: 512, 768: 513, 772: 514, 773: 515, 774: 516, 775: 517, 776: 518, 777: 519, 781: 520, 782: 521, 783: 522, 784: 523, 785: 524, 786: 525, 796: 526, 797: 527, 798: 528, 799: 529, 800: 530, 801: 531, 805: 532, 806: 533, 807: 534, 808: 535, 809: 536, 810: 537, 811: 538, 812: 539, 813: 540, 814: 541, 815: 542, 816: 543, 817: 544, 818: 545, 819: 546, 823: 547, 824: 548, 825: 549, 826: 550, 827: 551, 828: 552, 829: 553, 830: 554, 831: 555, 832: 556, 833: 557, 834: 558, 835: 559, 836: 560, 837: 561, 841: 562, 842: 563, 843: 564, 844: 565, 845: 566, 846: 567, 847: 568, 848: 569, 849: 570, 850: 571, 851: 572, 852: 573, 853: 574, 854: 575, 855: 576, 859: 577, 860: 578, 861: 579, 862: 580, 863: 581, 864: 582, 865: 583, 866: 584, 867: 585, 868: 586, 869: 587, 873: 588, 874: 589, 878: 590, 879: 591, 895: 592, 896: 593, 897: 594, 898: 595, 899: 596, 900: 597, 901: 598, 902: 599, 903: 600, 907: 601, 908: 602, 909: 603, 910: 604, 911: 605, 912: 606, 913: 607, 914: 608, 915: 609, 919: 610, 920: 611, 921: 612, 922: 613, 923: 614, 924: 615, 925: 616, 926: 617, 927: 618, 931: 619, 932: 620, 933: 621, 934: 622, 935: 623, 936: 624, 940: 625, 941: 626, 942: 627, 943: 628, 944: 629, 945: 630, 955: 631, 956: 632, 957: 633, 958: 634, 959: 635, 960: 636, 961: 637, 962: 638, 963: 639, 964: 640, 965: 641, 966: 642, 967: 643, 968: 644, 969: 645, 970: 646, 971: 647, 972: 648, 973: 649, 974: 650, 975: 651, 976: 652, 977: 653, 978: 654, 979: 655, 980: 656, 981: 657, 982: 658, 983: 659, 984: 660, 985: 661, 986: 662, 987: 663, 991: 664, 992: 665, 993: 666, 994: 667, 995: 668, 996: 669, 1000: 670, 1001: 671, 1002: 672, 1003: 673, 1004: 674, 1005: 675, 1015: 676, 1016: 677, 1017: 678, 1018: 679, 1019: 680, 1020: 681, 1021: 682, 1022: 683, 1023: 684, 1024: 685, 1025: 686, 1026: 687, 1027: 688, 1028: 689, 1029: 690, 1030: 691, 1031: 692, 1032: 693, 1033: 694, 1034: 695, 1035: 696, 1036: 697, 1037: 698, 1038: 699, 1039: 700, 1040: 701, 1041: 702, 1042: 703, 1043: 704, 1044: 705, 1045: 706, 1046: 707, 1047: 708, 1051: 709, 1052: 710, 1053: 711, 1054: 712, 1055: 713, 1056: 714, 1060: 715, 1061: 716, 1062: 717, 1063: 718, 1064: 719, 1065: 720, 1075: 721, 1076: 722, 1080: 723, 1081: 724, 1085: 725, 1086: 726, 1102: 727, 1103: 728, 1104: 729, 1105: 730, 1106: 731, 1107: 732, 1108: 733, 1109: 734, 1110: 735, 1114: 736, 1115: 737, 1116: 738, 1117: 739, 1118: 740, 1119: 741, 1120: 742, 1121: 743, 1122: 744, 1126: 745, 1127: 746, 1128: 747, 1129: 748, 1130: 749, 1131: 750, 1132: 751, 1133: 752, 1134: 753, 1138: 754, 1139: 755, 1140: 756, 1141: 757, 1142: 758, 1143: 759, 1147: 760, 1148: 761, 1149: 762, 1150: 763, 1151: 764, 1152: 765, 1153: 766, 1154: 767, 1158: 768, 1159: 769, 1163: 770, 1164: 771, 1180: 772, 1181: 773, 1182: 774, 1183: 775, 1184: 776, 1185: 777, 1186: 778, 1187: 779, 1188: 780, 1192: 781, 1193: 782, 1194: 783, 1195: 784, 1196: 785, 1197: 786, 1198: 787, 1199: 788, 1200: 789, 1204: 790, 1205: 791, 1206: 792, 1207: 793, 1208: 794, 1209: 795, 1210: 796, 1211: 797, 1212: 798, 1216: 799, 1217: 800, 1218: 801, 1219: 802, 1220: 803, 1221: 804, 1225: 805, 1226: 806, 1227: 807, 1228: 808, 1229: 809, 1230: 810, 1231: 811, 1232: 812, 1236: 813, 1237: 814, 1241: 815, 1242: 816, 1258: 817, 1259: 818, 1260: 819, 1261: 820, 1262: 821, 1263: 822, 1264: 823, 1265: 824, 1266: 825, 1270: 826, 1271: 827, 1272: 828, 1273: 829, 1274: 830, 1275: 831, 1276: 832, 1277: 833, 1278: 834, 1282: 835, 1283: 836, 1284: 837, 1285: 838, 1286: 839, 1287: 840, 1288: 841, 1289: 842, 1290: 843, 1294: 844, 1295: 845, 1296: 846, 1297: 847, 1298: 848, 1299: 849, 1303: 850, 1304: 851, 1305: 852, 1306: 853, 1307: 854, 1308: 855, 1309: 856, 1310: 857, 1314: 858, 1315: 859, 1319: 860, 1320: 861, 1333: 862, 1334: 863, 1338: 864, 1339: 865, 1343: 866, 1344: 867, 1357: 868, 1358: 869, 1362: 870, 1363: 871, 1367: 872, 1368: 873, 1396: 874, 1397: 875, 1398: 876, 1399: 877, 1400: 878, 1401: 879, 1402: 880, 1403: 881, 1404: 882, 1405: 883, 1406: 884, 1407: 885, 1408: 886, 1409: 887, 1410: 888, 1411: 889, 1412: 890, 1413: 891, 1417: 892, 1418: 893, 1419: 894, 1420: 895, 1421: 896, 1422: 897, 1423: 898, 1424: 899, 1425: 900, 1426: 901, 1427: 902, 1428: 903, 1429: 904, 1430: 905, 1431: 906, 1432: 907, 1433: 908, 1434: 909, 1438: 910, 1439: 911, 1440: 912, 1441: 913, 1442: 914, 1443: 915, 1444: 916, 1445: 917, 1446: 918, 1447: 919, 1448: 920, 1449: 921, 1450: 922, 1451: 923, 1452: 924, 1453: 925, 1454: 926, 1455: 927, 1459: 928, 1460: 929, 1461: 930, 1462: 931, 1463: 932, 1464: 933, 1468: 934, 1469: 935, 1470: 936, 1471: 937, 1472: 938, 1473: 939, 1477: 940, 1478: 941, 1479: 942, 1480: 943, 1481: 944, 1482: 945} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 6: 3, 7: 4, 11: 5, 12: 6, 28: 7, 29: 8, 30: 9, 34: 10, 35: 11, 36: 12, 40: 13, 41: 14, 42: 15, 46: 16, 47: 17, 48: 18, 49: 19, 50: 20, 51: 21, 55: 22, 56: 23, 57: 24, 58: 25, 59: 26, 60: 27, 61: 28, 62: 29, 63: 30, 64: 31, 65: 32, 66: 33, 67: 34, 68: 35, 69: 36, 73: 37, 74: 38, 75: 39, 76: 40, 77: 41, 78: 42, 79: 43, 80: 44, 81: 45, 82: 46, 83: 47, 84: 48, 85: 49, 86: 50, 87: 51, 91: 52, 92: 53, 93: 54, 94: 55, 95: 56, 96: 57, 97: 58, 98: 59, 99: 60, 100: 61, 101: 62, 102: 63, 103: 64, 104: 65, 105: 66, 109: 67, 110: 68, 111: 69, 112: 70, 113: 71, 114: 72, 115: 73, 116: 74, 117: 75, 121: 76, 122: 77, 123: 78, 124: 79, 125: 80, 126: 81, 127: 82, 128: 83, 129: 84, 133: 85, 134: 86, 135: 87, 136: 88, 137: 89, 138: 90, 139: 91, 140: 92, 141: 93, 142: 94, 143: 95, 144: 96, 145: 97, 146: 98, 147: 99, 148: 100, 149: 101, 150: 102, 151: 103, 152: 104, 153: 105, 160: 106, 161: 107, 162: 108, 163: 109, 164: 110, 165: 111, 166: 112, 167: 113, 168: 114, 169: 115, 170: 116, 171: 117, 172: 118, 173: 119, 174: 120, 178: 121, 179: 122, 183: 123, 184: 124, 185: 125, 186: 126, 187: 127, 188: 128, 189: 129, 190: 130, 191: 131, 192: 132, 193: 133, 194: 134, 195: 135, 196: 136, 197: 137, 201: 138, 202: 139, 203: 140, 204: 141, 205: 142, 206: 143, 207: 144, 208: 145, 209: 146, 210: 147, 211: 148, 212: 149, 213: 150, 214: 151, 215: 152, 219: 153, 220: 154, 221: 155, 222: 156, 223: 157, 224: 158, 225: 159, 226: 160, 227: 161, 228: 162, 229: 163, 230: 164, 231: 165, 232: 166, 233: 167, 234: 168, 235: 169, 236: 170, 237: 171, 238: 172, 239: 173, 240: 174, 241: 175, 242: 176, 243: 177, 244: 178, 245: 179, 246: 180, 247: 181, 248: 182, 249: 183, 250: 184, 251: 185, 252: 186, 253: 187, 254: 188, 255: 189, 256: 190, 257: 191, 258: 192, 259: 193, 260: 194, 261: 195, 262: 196, 266: 197, 267: 198, 268: 199, 269: 200, 270: 201, 271: 202, 275: 203, 276: 204, 277: 205, 278: 206, 279: 207, 280: 208, 284: 209, 285: 210, 319: 211, 320: 212, 321: 213, 322: 214, 323: 215, 324: 216, 325: 217, 326: 218, 327: 219, 328: 220, 329: 221, 330: 222, 331: 223, 332: 224, 333: 225, 337: 226, 338: 227, 342: 228, 343: 229, 344: 230, 345: 231, 346: 232, 347: 233, 348: 234, 349: 235, 350: 236, 351: 237, 352: 238, 353: 239, 354: 240, 355: 241, 356: 242, 360: 243, 361: 244, 362: 245, 363: 246, 364: 247, 365: 248, 366: 249, 367: 250, 368: 251, 369: 252, 370: 253, 371: 254, 372: 255, 373: 256, 374: 257, 378: 258, 379: 259, 380: 260, 381: 261, 382: 262, 383: 263, 384: 264, 385: 265, 386: 266, 387: 267, 388: 268, 389: 269, 390: 270, 391: 271, 392: 272, 393: 273, 394: 274, 395: 275, 396: 276, 397: 277, 398: 278, 399: 279, 400: 280, 401: 281, 402: 282, 403: 283, 404: 284, 405: 285, 406: 286, 407: 287, 408: 288, 409: 289, 410: 290, 411: 291, 412: 292, 413: 293, 414: 294, 415: 295, 416: 296, 417: 297, 418: 298, 419: 299, 420: 300, 421: 301, 425: 302, 426: 303, 427: 304, 428: 305, 429: 306, 430: 307, 434: 308, 435: 309, 436: 310, 437: 311, 438: 312, 439: 313, 443: 314, 444: 315, 478: 316, 479: 317, 480: 318, 481: 319, 482: 320, 483: 321, 487: 322, 488: 323, 489: 324, 490: 325, 491: 326, 492: 327, 493: 328, 494: 329, 495: 330, 496: 331, 497: 332, 498: 333, 499: 334, 500: 335, 501: 336, 505: 337, 506: 338, 507: 339, 508: 340, 509: 341, 510: 342, 511: 343, 512: 344, 513: 345, 514: 346, 515: 347, 516: 348, 517: 349, 518: 350, 519: 351, 523: 352, 524: 353, 525: 354, 526: 355, 527: 356, 528: 357, 529: 358, 530: 359, 531: 360, 532: 361, 533: 362, 534: 363, 535: 364, 536: 365, 537: 366, 541: 367, 542: 368, 543: 369, 544: 370, 545: 371, 546: 372, 547: 373, 548: 374, 549: 375, 550: 376, 551: 377, 555: 378, 556: 379, 560: 380, 561: 381, 577: 382, 578: 383, 579: 384, 580: 385, 581: 386, 582: 387, 583: 388, 584: 389, 585: 390, 589: 391, 590: 392, 591: 393, 592: 394, 593: 395, 594: 396, 595: 397, 596: 398, 597: 399, 601: 400, 602: 401, 603: 402, 604: 403, 605: 404, 606: 405, 607: 406, 608: 407, 609: 408, 613: 409, 614: 410, 615: 411, 616: 412, 617: 413, 618: 414, 622: 415, 623: 416, 624: 417, 625: 418, 626: 419, 627: 420, 637: 421, 638: 422, 639: 423, 640: 424, 641: 425, 642: 426, 646: 427, 647: 428, 648: 429, 649: 430, 650: 431, 651: 432, 652: 433, 653: 434, 654: 435, 655: 436, 656: 437, 657: 438, 658: 439, 659: 440, 660: 441, 664: 442, 665: 443, 666: 444, 667: 445, 668: 446, 669: 447, 670: 448, 671: 449, 672: 450, 673: 451, 674: 452, 675: 453, 676: 454, 677: 455, 678: 456, 682: 457, 683: 458, 684: 459, 685: 460, 686: 461, 687: 462, 688: 463, 689: 464, 690: 465, 691: 466, 692: 467, 693: 468, 694: 469, 695: 470, 696: 471, 700: 472, 701: 473, 702: 474, 703: 475, 704: 476, 705: 477, 706: 478, 707: 479, 708: 480, 709: 481, 710: 482, 714: 483, 715: 484, 719: 485, 720: 486, 736: 487, 737: 488, 738: 489, 739: 490, 740: 491, 741: 492, 742: 493, 743: 494, 744: 495, 748: 496, 749: 497, 750: 498, 751: 499, 752: 500, 753: 501, 754: 502, 755: 503, 756: 504, 760: 505, 761: 506, 762: 507, 763: 508, 764: 509, 765: 510, 766: 511, 767: 512, 768: 513, 772: 514, 773: 515, 774: 516, 775: 517, 776: 518, 777: 519, 781: 520, 782: 521, 783: 522, 784: 523, 785: 524, 786: 525, 796: 526, 797: 527, 798: 528, 799: 529, 800: 530, 801: 531, 805: 532, 806: 533, 807: 534, 808: 535, 809: 536, 810: 537, 811: 538, 812: 539, 813: 540, 814: 541, 815: 542, 816: 543, 817: 544, 818: 545, 819: 546, 823: 547, 824: 548, 825: 549, 826: 550, 827: 551, 828: 552, 829: 553, 830: 554, 831: 555, 832: 556, 833: 557, 834: 558, 835: 559, 836: 560, 837: 561, 841: 562, 842: 563, 843: 564, 844: 565, 845: 566, 846: 567, 847: 568, 848: 569, 849: 570, 850: 571, 851: 572, 852: 573, 853: 574, 854: 575, 855: 576, 859: 577, 860: 578, 861: 579, 862: 580, 863: 581, 864: 582, 865: 583, 866: 584, 867: 585, 868: 586, 869: 587, 873: 588, 874: 589, 878: 590, 879: 591, 895: 592, 896: 593, 897: 594, 898: 595, 899: 596, 900: 597, 901: 598, 902: 599, 903: 600, 907: 601, 908: 602, 909: 603, 910: 604, 911: 605, 912: 606, 913: 607, 914: 608, 915: 609, 919: 610, 920: 611, 921: 612, 922: 613, 923: 614, 924: 615, 925: 616, 926: 617, 927: 618, 931: 619, 932: 620, 933: 621, 934: 622, 935: 623, 936: 624, 940: 625, 941: 626, 942: 627, 943: 628, 944: 629, 945: 630, 955: 631, 956: 632, 957: 633, 958: 634, 959: 635, 960: 636, 961: 637, 962: 638, 963: 639, 964: 640, 965: 641, 966: 642, 967: 643, 968: 644, 969: 645, 970: 646, 971: 647, 972: 648, 973: 649, 974: 650, 975: 651, 976: 652, 977: 653, 978: 654, 979: 655, 980: 656, 981: 657, 982: 658, 983: 659, 984: 660, 985: 661, 986: 662, 987: 663, 991: 664, 992: 665, 993: 666, 994: 667, 995: 668, 996: 669, 1000: 670, 1001: 671, 1002: 672, 1003: 673, 1004: 674, 1005: 675, 1015: 676, 1016: 677, 1017: 678, 1018: 679, 1019: 680, 1020: 681, 1021: 682, 1022: 683, 1023: 684, 1024: 685, 1025: 686, 1026: 687, 1027: 688, 1028: 689, 1029: 690, 1030: 691, 1031: 692, 1032: 693, 1033: 694, 1034: 695, 1035: 696, 1036: 697, 1037: 698, 1038: 699, 1039: 700, 1040: 701, 1041: 702, 1042: 703, 1043: 704, 1044: 705, 1045: 706, 1046: 707, 1047: 708, 1051: 709, 1052: 710, 1053: 711, 1054: 712, 1055: 713, 1056: 714, 1060: 715, 1061: 716, 1062: 717, 1063: 718, 1064: 719, 1065: 720, 1075: 721, 1076: 722, 1080: 723, 1081: 724, 1085: 725, 1086: 726, 1102: 727, 1103: 728, 1104: 729, 1105: 730, 1106: 731, 1107: 732, 1108: 733, 1109: 734, 1110: 735, 1114: 736, 1115: 737, 1116: 738, 1117: 739, 1118: 740, 1119: 741, 1120: 742, 1121: 743, 1122: 744, 1126: 745, 1127: 746, 1128: 747, 1129: 748, 1130: 749, 1131: 750, 1132: 751, 1133: 752, 1134: 753, 1138: 754, 1139: 755, 1140: 756, 1141: 757, 1142: 758, 1143: 759, 1147: 760, 1148: 761, 1149: 762, 1150: 763, 1151: 764, 1152: 765, 1153: 766, 1154: 767, 1158: 768, 1159: 769, 1163: 770, 1164: 771, 1180: 772, 1181: 773, 1182: 774, 1183: 775, 1184: 776, 1185: 777, 1186: 778, 1187: 779, 1188: 780, 1192: 781, 1193: 782, 1194: 783, 1195: 784, 1196: 785, 1197: 786, 1198: 787, 1199: 788, 1200: 789, 1204: 790, 1205: 791, 1206: 792, 1207: 793, 1208: 794, 1209: 795, 1210: 796, 1211: 797, 1212: 798, 1216: 799, 1217: 800, 1218: 801, 1219: 802, 1220: 803, 1221: 804, 1225: 805, 1226: 806, 1227: 807, 1228: 808, 1229: 809, 1230: 810, 1231: 811, 1232: 812, 1236: 813, 1237: 814, 1241: 815, 1242: 816, 1258: 817, 1259: 818, 1260: 819, 1261: 820, 1262: 821, 1263: 822, 1264: 823, 1265: 824, 1266: 825, 1270: 826, 1271: 827, 1272: 828, 1273: 829, 1274: 830, 1275: 831, 1276: 832, 1277: 833, 1278: 834, 1282: 835, 1283: 836, 1284: 837, 1285: 838, 1286: 839, 1287: 840, 1288: 841, 1289: 842, 1290: 843, 1294: 844, 1295: 845, 1296: 846, 1297: 847, 1298: 848, 1299: 849, 1303: 850, 1304: 851, 1305: 852, 1306: 853, 1307: 854, 1308: 855, 1309: 856, 1310: 857, 1314: 858, 1315: 859, 1319: 860, 1320: 861, 1333: 862, 1334: 863, 1338: 864, 1339: 865, 1343: 866, 1344: 867, 1357: 868, 1358: 869, 1362: 870, 1363: 871, 1367: 872, 1368: 873, 1396: 874, 1397: 875, 1398: 876, 1399: 877, 1400: 878, 1401: 879, 1402: 880, 1403: 881, 1404: 882, 1405: 883, 1406: 884, 1407: 885, 1408: 886, 1409: 887, 1410: 888, 1411: 889, 1412: 890, 1413: 891, 1417: 892, 1418: 893, 1419: 894, 1420: 895, 1421: 896, 1422: 897, 1423: 898, 1424: 899, 1425: 900, 1426: 901, 1427: 902, 1428: 903, 1429: 904, 1430: 905, 1431: 906, 1432: 907, 1433: 908, 1434: 909, 1438: 910, 1439: 911, 1440: 912, 1441: 913, 1442: 914, 1443: 915, 1444: 916, 1445: 917, 1446: 918, 1447: 919, 1448: 920, 1449: 921, 1450: 922, 1451: 923, 1452: 924, 1453: 925, 1454: 926, 1455: 927, 1459: 928, 1460: 929, 1461: 930, 1462: 931, 1463: 932, 1464: 933, 1468: 934, 1469: 935, 1470: 936, 1471: 937, 1472: 938, 1473: 939, 1477: 940, 1478: 941, 1479: 942, 1480: 943, 1481: 944, 1482: 945} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.762 s -Wrote files for 2281 helas calls in 46.233 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.616 s +Wrote files for 2281 helas calls in 46.335 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.312 s +ALOHA: aloha creates 5 routines in 0.313 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -237,7 +238,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.311 s +ALOHA: aloha creates 10 routines in 0.307 s VVV1 VVV1 FFV1 @@ -356,6 +357,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m57.853s -user 0m56.814s -sys 0m0.842s +real 0m57.671s +user 0m56.600s +sys 0m0.875s diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc index 18c4db8539..ca9e346bf8 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc @@ -30628,13 +30628,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 75995cb13b..395f0229dc 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005396604537963867  +DEBUG: model prefixing takes 0.0055789947509765625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.840 s +1 processes with 1240 diagrams generated in 1.839 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -174,39 +174,40 @@ INFO: Processing color information for process: g g > t t~ g g g @1 DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1337]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.488 s +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1344]  +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.465 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -244,6 +245,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m12.947s -user 0m12.782s -sys 0m0.106s +real 0m13.689s +user 0m12.660s +sys 0m0.103s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc index 36675814b4..aa20dc7f78 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc @@ -32518,13 +32518,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index da82c33819..fb5e23da39 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053882598876953125  +DEBUG: model prefixing takes 0.005346775054931641  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.078 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -199,34 +199,35 @@ INFO: Creating files in directory P1_gu_ttxu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1347]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -237,34 +238,35 @@ INFO: Creating files in directory P1_gux_ttxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1171]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1403]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  @@ -272,7 +274,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.224 s +Wrote files for 32 helas calls in 0.227 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines @@ -281,7 +283,7 @@ ALOHA: aloha creates 2 routines in 0.142 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.129 s +ALOHA: aloha creates 4 routines in 0.130 s FFV1 FFV1 FFV1 @@ -407,6 +409,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.559s -user 0m2.238s -sys 0m0.312s +real 0m2.776s +user 0m2.282s +sys 0m0.285s diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 47666e308a..dcfa5aa1cf 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -858,13 +858,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index 04a5cc423c..8565935186 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -851,13 +851,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 865d85fca2..8fa3a22bec 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005266427993774414  +DEBUG: model prefixing takes 0.005364894866943359  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -196,30 +196,31 @@ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1340]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1347]  DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  DEBUG: type(subproc_group)= [output.py at line 188]  DEBUG: type(fortran_model)= [output.py at line 189]  @@ -227,36 +228,37 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1344]  Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.143 s FFV1 FFV1 FFV1 @@ -279,6 +281,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.737s -user 0m0.663s +real 0m0.786s +user 0m0.668s sys 0m0.063s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc index 0b2899d317..7c85fe10ef 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc @@ -853,13 +853,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc index e37fd43d6a..ddf0e84af6 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc @@ -846,13 +846,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 04b5080170..a0fbab9704 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -146,32 +146,33 @@ INFO: Processing color information for process: g g > h HIG<=1 HIW<=1 @1 DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  DEBUG: proc_id =  0 [model_handling.py at line 1046]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1455]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1477]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1656]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1711]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1824]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1823]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1824]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1345]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1354]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1371]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1391]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1421]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1432]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1443]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_heft_gg_h.txt [model_handling.py at line 1337]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_heft_gg_h.txt [model_handling.py at line 1344]  Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes @@ -195,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.489s -user 0m0.426s -sys 0m0.045s +real 0m0.516s +user 0m0.419s +sys 0m0.054s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc index e120b9206f..6c47c93912 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc @@ -745,13 +745,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } From 6a4192aabee2359307a6fc1008d4fd4e08f998a9 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 13:42:25 +0200 Subject: [PATCH 015/119] [oct23av] regenerate 7 processes mad and 6 sa (all except ee_mumu) after merging Stefan's fix for coupling order in PR #757 - code changes only in gq_ttq The new codebase includes in particular also 6bf4a658e oliviermattelaer Wed Aug 30 14:57:19 2023 +0200 Merge pull request #754 from madgraph5/fix_mirror 0aba0c6f3 oliviermattelaer Wed Aug 30 14:53:17 2023 +0200 Merge pull request #757 from roiser/fixcouplingordering 4f3cdd86e Stefan Roiser Tue Aug 29 17:43:25 2023 +0200 in PLUGIN_UFOModelConverter overwrite teh prepare_couplings function the additional functionality will re-order the dictionary keys such that they follow the order of the couplings in parameter wanted_couplings wanted_couplings contains the correct ordering as it is discovered in the OneProcessExporter class NB: generated code is identical in all processes except for gq_ttq NB: ee_mumu.mad code generation (and similary ee_mumu.sa) fails with Command "import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg" interrupted in sub-command: "output madevent ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp" with error: KeyError : 'GC_3' --- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 72 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 62 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 136 +-- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 80 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 70 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 86 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 72 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 88 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 78 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 128 +- .../cudacpp/gq_ttq.mad/src/Parameters_sm.cc | 4 +- epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h | 18 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 106 +- epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc | 4 +- epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h | 18 +- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 58 +- .../CODEGEN_mad_pp_tt012j_log.txt | 1036 ++++++++--------- 17 files changed, 1058 insertions(+), 1058 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 81d7b1f4da..104ac89849 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053462982177734375  +DEBUG: model prefixing takes 0.005397319793701172  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,40 +174,40 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -215,7 +215,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.111 s +Wrote files for 10 helas calls in 0.110 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines @@ -224,7 +224,7 @@ ALOHA: aloha creates 2 routines in 0.145 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.132 s +ALOHA: aloha creates 4 routines in 0.130 s VVV1 FFV1 FFV1 @@ -329,6 +329,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.442s -user 0m2.049s -sys 0m0.318s +real 0m2.336s +user 0m2.056s +sys 0m0.274s diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 8cfac3fd00..2bd9dd5346 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005245208740234375  +DEBUG: model prefixing takes 0.0055425167083740234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -171,37 +171,37 @@ INFO: Processing color information for process: g g > t t~ @1 DEBUG: type(subproc_group)= [output.py at line 188]  DEBUG: type(fortran_model)= [output.py at line 189]  DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: proc_id =  0 [model_handling.py at line 1052]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1344]  -Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1350]  +Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -228,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.594s -user 0m0.529s -sys 0m0.056s +real 0m0.593s +user 0m0.522s +sys 0m0.063s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 556e1bf01e..ec552e612b 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005356788635253906  +DEBUG: model prefixing takes 0.005248308181762695  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -183,42 +183,42 @@ INFO: Processing color information for process: g g > t t~ g @2 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -226,40 +226,40 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  @@ -274,7 +274,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.322 s +ALOHA: aloha creates 5 routines in 0.329 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -282,7 +282,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.319 s VVV1 VVV1 FFV1 @@ -402,6 +402,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.943s -user 0m2.610s -sys 0m0.320s +real 0m3.053s +user 0m2.731s +sys 0m0.311s diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 0b27815951..a080c269b8 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053675174713134766  +DEBUG: model prefixing takes 0.005520343780517578  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,57 +174,57 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s -Wrote files for 36 helas calls in 0.163 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +Wrote files for 36 helas calls in 0.165 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.321 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -232,7 +232,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.315 s +ALOHA: aloha creates 10 routines in 0.307 s VVV1 VVV1 FFV1 @@ -348,6 +348,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.842s -user 0m2.517s -sys 0m0.314s +real 0m2.825s +user 0m2.539s +sys 0m0.271s diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index eb8e67097c..04729cf52e 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005820512771606445  +DEBUG: model prefixing takes 0.0057621002197265625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.023 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -171,39 +171,39 @@ INFO: Processing color information for process: g g > t t~ g @1 DEBUG: type(subproc_group)= [output.py at line 188]  DEBUG: type(fortran_model)= [output.py at line 189]  DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: proc_id =  0 [model_handling.py at line 1052]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1344]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1350]  +Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -211,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.334 s +ALOHA: aloha creates 5 routines in 0.318 s VVV1 VVV1 FFV1 @@ -238,6 +238,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.890s -user 0m0.814s -sys 0m0.053s +real 0m0.839s +user 0m0.765s +sys 0m0.061s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index fb4dacd9e3..e46a2aad90 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005468130111694336  +DEBUG: model prefixing takes 0.005277156829833984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.154 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -174,59 +174,59 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.416 s -Wrote files for 222 helas calls in 0.728 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s +Wrote files for 222 helas calls in 0.724 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.326 s +ALOHA: aloha creates 5 routines in 0.322 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -234,7 +234,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.307 s VVV1 VVV1 FFV1 @@ -353,6 +353,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.918s -user 0m3.605s -sys 0m0.297s +real 0m4.067s +user 0m3.594s +sys 0m0.311s diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 3bebed0b58..4886319fa1 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005362033843994141  +DEBUG: model prefixing takes 0.005265235900878906  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.154 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -171,40 +171,40 @@ INFO: Processing color information for process: g g > t t~ g g @1 DEBUG: type(subproc_group)= [output.py at line 188]  DEBUG: type(fortran_model)= [output.py at line 189]  DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: proc_id =  0 [model_handling.py at line 1052]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1350]  Generated helas calls for 1 subprocesses (123 diagrams) in 0.417 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes @@ -213,7 +213,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.316 s +ALOHA: aloha creates 5 routines in 0.314 s VVV1 VVV1 FFV1 @@ -243,6 +243,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m1.491s -user 0m1.428s -sys 0m0.050s +real 0m1.539s +user 0m1.427s +sys 0m0.051s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 14de1d052f..f94189e829 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00551152229309082  +DEBUG: model prefixing takes 0.005403995513916016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.858 s +1 processes with 1240 diagrams generated in 1.849 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -176,61 +176,61 @@ INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 6: 3, 7: 4, 11: 5, 12: 6, 28: 7, 29: 8, 30: 9, 34: 10, 35: 11, 36: 12, 40: 13, 41: 14, 42: 15, 46: 16, 47: 17, 48: 18, 49: 19, 50: 20, 51: 21, 55: 22, 56: 23, 57: 24, 58: 25, 59: 26, 60: 27, 61: 28, 62: 29, 63: 30, 64: 31, 65: 32, 66: 33, 67: 34, 68: 35, 69: 36, 73: 37, 74: 38, 75: 39, 76: 40, 77: 41, 78: 42, 79: 43, 80: 44, 81: 45, 82: 46, 83: 47, 84: 48, 85: 49, 86: 50, 87: 51, 91: 52, 92: 53, 93: 54, 94: 55, 95: 56, 96: 57, 97: 58, 98: 59, 99: 60, 100: 61, 101: 62, 102: 63, 103: 64, 104: 65, 105: 66, 109: 67, 110: 68, 111: 69, 112: 70, 113: 71, 114: 72, 115: 73, 116: 74, 117: 75, 121: 76, 122: 77, 123: 78, 124: 79, 125: 80, 126: 81, 127: 82, 128: 83, 129: 84, 133: 85, 134: 86, 135: 87, 136: 88, 137: 89, 138: 90, 139: 91, 140: 92, 141: 93, 142: 94, 143: 95, 144: 96, 145: 97, 146: 98, 147: 99, 148: 100, 149: 101, 150: 102, 151: 103, 152: 104, 153: 105, 160: 106, 161: 107, 162: 108, 163: 109, 164: 110, 165: 111, 166: 112, 167: 113, 168: 114, 169: 115, 170: 116, 171: 117, 172: 118, 173: 119, 174: 120, 178: 121, 179: 122, 183: 123, 184: 124, 185: 125, 186: 126, 187: 127, 188: 128, 189: 129, 190: 130, 191: 131, 192: 132, 193: 133, 194: 134, 195: 135, 196: 136, 197: 137, 201: 138, 202: 139, 203: 140, 204: 141, 205: 142, 206: 143, 207: 144, 208: 145, 209: 146, 210: 147, 211: 148, 212: 149, 213: 150, 214: 151, 215: 152, 219: 153, 220: 154, 221: 155, 222: 156, 223: 157, 224: 158, 225: 159, 226: 160, 227: 161, 228: 162, 229: 163, 230: 164, 231: 165, 232: 166, 233: 167, 234: 168, 235: 169, 236: 170, 237: 171, 238: 172, 239: 173, 240: 174, 241: 175, 242: 176, 243: 177, 244: 178, 245: 179, 246: 180, 247: 181, 248: 182, 249: 183, 250: 184, 251: 185, 252: 186, 253: 187, 254: 188, 255: 189, 256: 190, 257: 191, 258: 192, 259: 193, 260: 194, 261: 195, 262: 196, 266: 197, 267: 198, 268: 199, 269: 200, 270: 201, 271: 202, 275: 203, 276: 204, 277: 205, 278: 206, 279: 207, 280: 208, 284: 209, 285: 210, 319: 211, 320: 212, 321: 213, 322: 214, 323: 215, 324: 216, 325: 217, 326: 218, 327: 219, 328: 220, 329: 221, 330: 222, 331: 223, 332: 224, 333: 225, 337: 226, 338: 227, 342: 228, 343: 229, 344: 230, 345: 231, 346: 232, 347: 233, 348: 234, 349: 235, 350: 236, 351: 237, 352: 238, 353: 239, 354: 240, 355: 241, 356: 242, 360: 243, 361: 244, 362: 245, 363: 246, 364: 247, 365: 248, 366: 249, 367: 250, 368: 251, 369: 252, 370: 253, 371: 254, 372: 255, 373: 256, 374: 257, 378: 258, 379: 259, 380: 260, 381: 261, 382: 262, 383: 263, 384: 264, 385: 265, 386: 266, 387: 267, 388: 268, 389: 269, 390: 270, 391: 271, 392: 272, 393: 273, 394: 274, 395: 275, 396: 276, 397: 277, 398: 278, 399: 279, 400: 280, 401: 281, 402: 282, 403: 283, 404: 284, 405: 285, 406: 286, 407: 287, 408: 288, 409: 289, 410: 290, 411: 291, 412: 292, 413: 293, 414: 294, 415: 295, 416: 296, 417: 297, 418: 298, 419: 299, 420: 300, 421: 301, 425: 302, 426: 303, 427: 304, 428: 305, 429: 306, 430: 307, 434: 308, 435: 309, 436: 310, 437: 311, 438: 312, 439: 313, 443: 314, 444: 315, 478: 316, 479: 317, 480: 318, 481: 319, 482: 320, 483: 321, 487: 322, 488: 323, 489: 324, 490: 325, 491: 326, 492: 327, 493: 328, 494: 329, 495: 330, 496: 331, 497: 332, 498: 333, 499: 334, 500: 335, 501: 336, 505: 337, 506: 338, 507: 339, 508: 340, 509: 341, 510: 342, 511: 343, 512: 344, 513: 345, 514: 346, 515: 347, 516: 348, 517: 349, 518: 350, 519: 351, 523: 352, 524: 353, 525: 354, 526: 355, 527: 356, 528: 357, 529: 358, 530: 359, 531: 360, 532: 361, 533: 362, 534: 363, 535: 364, 536: 365, 537: 366, 541: 367, 542: 368, 543: 369, 544: 370, 545: 371, 546: 372, 547: 373, 548: 374, 549: 375, 550: 376, 551: 377, 555: 378, 556: 379, 560: 380, 561: 381, 577: 382, 578: 383, 579: 384, 580: 385, 581: 386, 582: 387, 583: 388, 584: 389, 585: 390, 589: 391, 590: 392, 591: 393, 592: 394, 593: 395, 594: 396, 595: 397, 596: 398, 597: 399, 601: 400, 602: 401, 603: 402, 604: 403, 605: 404, 606: 405, 607: 406, 608: 407, 609: 408, 613: 409, 614: 410, 615: 411, 616: 412, 617: 413, 618: 414, 622: 415, 623: 416, 624: 417, 625: 418, 626: 419, 627: 420, 637: 421, 638: 422, 639: 423, 640: 424, 641: 425, 642: 426, 646: 427, 647: 428, 648: 429, 649: 430, 650: 431, 651: 432, 652: 433, 653: 434, 654: 435, 655: 436, 656: 437, 657: 438, 658: 439, 659: 440, 660: 441, 664: 442, 665: 443, 666: 444, 667: 445, 668: 446, 669: 447, 670: 448, 671: 449, 672: 450, 673: 451, 674: 452, 675: 453, 676: 454, 677: 455, 678: 456, 682: 457, 683: 458, 684: 459, 685: 460, 686: 461, 687: 462, 688: 463, 689: 464, 690: 465, 691: 466, 692: 467, 693: 468, 694: 469, 695: 470, 696: 471, 700: 472, 701: 473, 702: 474, 703: 475, 704: 476, 705: 477, 706: 478, 707: 479, 708: 480, 709: 481, 710: 482, 714: 483, 715: 484, 719: 485, 720: 486, 736: 487, 737: 488, 738: 489, 739: 490, 740: 491, 741: 492, 742: 493, 743: 494, 744: 495, 748: 496, 749: 497, 750: 498, 751: 499, 752: 500, 753: 501, 754: 502, 755: 503, 756: 504, 760: 505, 761: 506, 762: 507, 763: 508, 764: 509, 765: 510, 766: 511, 767: 512, 768: 513, 772: 514, 773: 515, 774: 516, 775: 517, 776: 518, 777: 519, 781: 520, 782: 521, 783: 522, 784: 523, 785: 524, 786: 525, 796: 526, 797: 527, 798: 528, 799: 529, 800: 530, 801: 531, 805: 532, 806: 533, 807: 534, 808: 535, 809: 536, 810: 537, 811: 538, 812: 539, 813: 540, 814: 541, 815: 542, 816: 543, 817: 544, 818: 545, 819: 546, 823: 547, 824: 548, 825: 549, 826: 550, 827: 551, 828: 552, 829: 553, 830: 554, 831: 555, 832: 556, 833: 557, 834: 558, 835: 559, 836: 560, 837: 561, 841: 562, 842: 563, 843: 564, 844: 565, 845: 566, 846: 567, 847: 568, 848: 569, 849: 570, 850: 571, 851: 572, 852: 573, 853: 574, 854: 575, 855: 576, 859: 577, 860: 578, 861: 579, 862: 580, 863: 581, 864: 582, 865: 583, 866: 584, 867: 585, 868: 586, 869: 587, 873: 588, 874: 589, 878: 590, 879: 591, 895: 592, 896: 593, 897: 594, 898: 595, 899: 596, 900: 597, 901: 598, 902: 599, 903: 600, 907: 601, 908: 602, 909: 603, 910: 604, 911: 605, 912: 606, 913: 607, 914: 608, 915: 609, 919: 610, 920: 611, 921: 612, 922: 613, 923: 614, 924: 615, 925: 616, 926: 617, 927: 618, 931: 619, 932: 620, 933: 621, 934: 622, 935: 623, 936: 624, 940: 625, 941: 626, 942: 627, 943: 628, 944: 629, 945: 630, 955: 631, 956: 632, 957: 633, 958: 634, 959: 635, 960: 636, 961: 637, 962: 638, 963: 639, 964: 640, 965: 641, 966: 642, 967: 643, 968: 644, 969: 645, 970: 646, 971: 647, 972: 648, 973: 649, 974: 650, 975: 651, 976: 652, 977: 653, 978: 654, 979: 655, 980: 656, 981: 657, 982: 658, 983: 659, 984: 660, 985: 661, 986: 662, 987: 663, 991: 664, 992: 665, 993: 666, 994: 667, 995: 668, 996: 669, 1000: 670, 1001: 671, 1002: 672, 1003: 673, 1004: 674, 1005: 675, 1015: 676, 1016: 677, 1017: 678, 1018: 679, 1019: 680, 1020: 681, 1021: 682, 1022: 683, 1023: 684, 1024: 685, 1025: 686, 1026: 687, 1027: 688, 1028: 689, 1029: 690, 1030: 691, 1031: 692, 1032: 693, 1033: 694, 1034: 695, 1035: 696, 1036: 697, 1037: 698, 1038: 699, 1039: 700, 1040: 701, 1041: 702, 1042: 703, 1043: 704, 1044: 705, 1045: 706, 1046: 707, 1047: 708, 1051: 709, 1052: 710, 1053: 711, 1054: 712, 1055: 713, 1056: 714, 1060: 715, 1061: 716, 1062: 717, 1063: 718, 1064: 719, 1065: 720, 1075: 721, 1076: 722, 1080: 723, 1081: 724, 1085: 725, 1086: 726, 1102: 727, 1103: 728, 1104: 729, 1105: 730, 1106: 731, 1107: 732, 1108: 733, 1109: 734, 1110: 735, 1114: 736, 1115: 737, 1116: 738, 1117: 739, 1118: 740, 1119: 741, 1120: 742, 1121: 743, 1122: 744, 1126: 745, 1127: 746, 1128: 747, 1129: 748, 1130: 749, 1131: 750, 1132: 751, 1133: 752, 1134: 753, 1138: 754, 1139: 755, 1140: 756, 1141: 757, 1142: 758, 1143: 759, 1147: 760, 1148: 761, 1149: 762, 1150: 763, 1151: 764, 1152: 765, 1153: 766, 1154: 767, 1158: 768, 1159: 769, 1163: 770, 1164: 771, 1180: 772, 1181: 773, 1182: 774, 1183: 775, 1184: 776, 1185: 777, 1186: 778, 1187: 779, 1188: 780, 1192: 781, 1193: 782, 1194: 783, 1195: 784, 1196: 785, 1197: 786, 1198: 787, 1199: 788, 1200: 789, 1204: 790, 1205: 791, 1206: 792, 1207: 793, 1208: 794, 1209: 795, 1210: 796, 1211: 797, 1212: 798, 1216: 799, 1217: 800, 1218: 801, 1219: 802, 1220: 803, 1221: 804, 1225: 805, 1226: 806, 1227: 807, 1228: 808, 1229: 809, 1230: 810, 1231: 811, 1232: 812, 1236: 813, 1237: 814, 1241: 815, 1242: 816, 1258: 817, 1259: 818, 1260: 819, 1261: 820, 1262: 821, 1263: 822, 1264: 823, 1265: 824, 1266: 825, 1270: 826, 1271: 827, 1272: 828, 1273: 829, 1274: 830, 1275: 831, 1276: 832, 1277: 833, 1278: 834, 1282: 835, 1283: 836, 1284: 837, 1285: 838, 1286: 839, 1287: 840, 1288: 841, 1289: 842, 1290: 843, 1294: 844, 1295: 845, 1296: 846, 1297: 847, 1298: 848, 1299: 849, 1303: 850, 1304: 851, 1305: 852, 1306: 853, 1307: 854, 1308: 855, 1309: 856, 1310: 857, 1314: 858, 1315: 859, 1319: 860, 1320: 861, 1333: 862, 1334: 863, 1338: 864, 1339: 865, 1343: 866, 1344: 867, 1357: 868, 1358: 869, 1362: 870, 1363: 871, 1367: 872, 1368: 873, 1396: 874, 1397: 875, 1398: 876, 1399: 877, 1400: 878, 1401: 879, 1402: 880, 1403: 881, 1404: 882, 1405: 883, 1406: 884, 1407: 885, 1408: 886, 1409: 887, 1410: 888, 1411: 889, 1412: 890, 1413: 891, 1417: 892, 1418: 893, 1419: 894, 1420: 895, 1421: 896, 1422: 897, 1423: 898, 1424: 899, 1425: 900, 1426: 901, 1427: 902, 1428: 903, 1429: 904, 1430: 905, 1431: 906, 1432: 907, 1433: 908, 1434: 909, 1438: 910, 1439: 911, 1440: 912, 1441: 913, 1442: 914, 1443: 915, 1444: 916, 1445: 917, 1446: 918, 1447: 919, 1448: 920, 1449: 921, 1450: 922, 1451: 923, 1452: 924, 1453: 925, 1454: 926, 1455: 927, 1459: 928, 1460: 929, 1461: 930, 1462: 931, 1463: 932, 1464: 933, 1468: 934, 1469: 935, 1470: 936, 1471: 937, 1472: 938, 1473: 939, 1477: 940, 1478: 941, 1479: 942, 1480: 943, 1481: 944, 1482: 945} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 6: 3, 7: 4, 11: 5, 12: 6, 28: 7, 29: 8, 30: 9, 34: 10, 35: 11, 36: 12, 40: 13, 41: 14, 42: 15, 46: 16, 47: 17, 48: 18, 49: 19, 50: 20, 51: 21, 55: 22, 56: 23, 57: 24, 58: 25, 59: 26, 60: 27, 61: 28, 62: 29, 63: 30, 64: 31, 65: 32, 66: 33, 67: 34, 68: 35, 69: 36, 73: 37, 74: 38, 75: 39, 76: 40, 77: 41, 78: 42, 79: 43, 80: 44, 81: 45, 82: 46, 83: 47, 84: 48, 85: 49, 86: 50, 87: 51, 91: 52, 92: 53, 93: 54, 94: 55, 95: 56, 96: 57, 97: 58, 98: 59, 99: 60, 100: 61, 101: 62, 102: 63, 103: 64, 104: 65, 105: 66, 109: 67, 110: 68, 111: 69, 112: 70, 113: 71, 114: 72, 115: 73, 116: 74, 117: 75, 121: 76, 122: 77, 123: 78, 124: 79, 125: 80, 126: 81, 127: 82, 128: 83, 129: 84, 133: 85, 134: 86, 135: 87, 136: 88, 137: 89, 138: 90, 139: 91, 140: 92, 141: 93, 142: 94, 143: 95, 144: 96, 145: 97, 146: 98, 147: 99, 148: 100, 149: 101, 150: 102, 151: 103, 152: 104, 153: 105, 160: 106, 161: 107, 162: 108, 163: 109, 164: 110, 165: 111, 166: 112, 167: 113, 168: 114, 169: 115, 170: 116, 171: 117, 172: 118, 173: 119, 174: 120, 178: 121, 179: 122, 183: 123, 184: 124, 185: 125, 186: 126, 187: 127, 188: 128, 189: 129, 190: 130, 191: 131, 192: 132, 193: 133, 194: 134, 195: 135, 196: 136, 197: 137, 201: 138, 202: 139, 203: 140, 204: 141, 205: 142, 206: 143, 207: 144, 208: 145, 209: 146, 210: 147, 211: 148, 212: 149, 213: 150, 214: 151, 215: 152, 219: 153, 220: 154, 221: 155, 222: 156, 223: 157, 224: 158, 225: 159, 226: 160, 227: 161, 228: 162, 229: 163, 230: 164, 231: 165, 232: 166, 233: 167, 234: 168, 235: 169, 236: 170, 237: 171, 238: 172, 239: 173, 240: 174, 241: 175, 242: 176, 243: 177, 244: 178, 245: 179, 246: 180, 247: 181, 248: 182, 249: 183, 250: 184, 251: 185, 252: 186, 253: 187, 254: 188, 255: 189, 256: 190, 257: 191, 258: 192, 259: 193, 260: 194, 261: 195, 262: 196, 266: 197, 267: 198, 268: 199, 269: 200, 270: 201, 271: 202, 275: 203, 276: 204, 277: 205, 278: 206, 279: 207, 280: 208, 284: 209, 285: 210, 319: 211, 320: 212, 321: 213, 322: 214, 323: 215, 324: 216, 325: 217, 326: 218, 327: 219, 328: 220, 329: 221, 330: 222, 331: 223, 332: 224, 333: 225, 337: 226, 338: 227, 342: 228, 343: 229, 344: 230, 345: 231, 346: 232, 347: 233, 348: 234, 349: 235, 350: 236, 351: 237, 352: 238, 353: 239, 354: 240, 355: 241, 356: 242, 360: 243, 361: 244, 362: 245, 363: 246, 364: 247, 365: 248, 366: 249, 367: 250, 368: 251, 369: 252, 370: 253, 371: 254, 372: 255, 373: 256, 374: 257, 378: 258, 379: 259, 380: 260, 381: 261, 382: 262, 383: 263, 384: 264, 385: 265, 386: 266, 387: 267, 388: 268, 389: 269, 390: 270, 391: 271, 392: 272, 393: 273, 394: 274, 395: 275, 396: 276, 397: 277, 398: 278, 399: 279, 400: 280, 401: 281, 402: 282, 403: 283, 404: 284, 405: 285, 406: 286, 407: 287, 408: 288, 409: 289, 410: 290, 411: 291, 412: 292, 413: 293, 414: 294, 415: 295, 416: 296, 417: 297, 418: 298, 419: 299, 420: 300, 421: 301, 425: 302, 426: 303, 427: 304, 428: 305, 429: 306, 430: 307, 434: 308, 435: 309, 436: 310, 437: 311, 438: 312, 439: 313, 443: 314, 444: 315, 478: 316, 479: 317, 480: 318, 481: 319, 482: 320, 483: 321, 487: 322, 488: 323, 489: 324, 490: 325, 491: 326, 492: 327, 493: 328, 494: 329, 495: 330, 496: 331, 497: 332, 498: 333, 499: 334, 500: 335, 501: 336, 505: 337, 506: 338, 507: 339, 508: 340, 509: 341, 510: 342, 511: 343, 512: 344, 513: 345, 514: 346, 515: 347, 516: 348, 517: 349, 518: 350, 519: 351, 523: 352, 524: 353, 525: 354, 526: 355, 527: 356, 528: 357, 529: 358, 530: 359, 531: 360, 532: 361, 533: 362, 534: 363, 535: 364, 536: 365, 537: 366, 541: 367, 542: 368, 543: 369, 544: 370, 545: 371, 546: 372, 547: 373, 548: 374, 549: 375, 550: 376, 551: 377, 555: 378, 556: 379, 560: 380, 561: 381, 577: 382, 578: 383, 579: 384, 580: 385, 581: 386, 582: 387, 583: 388, 584: 389, 585: 390, 589: 391, 590: 392, 591: 393, 592: 394, 593: 395, 594: 396, 595: 397, 596: 398, 597: 399, 601: 400, 602: 401, 603: 402, 604: 403, 605: 404, 606: 405, 607: 406, 608: 407, 609: 408, 613: 409, 614: 410, 615: 411, 616: 412, 617: 413, 618: 414, 622: 415, 623: 416, 624: 417, 625: 418, 626: 419, 627: 420, 637: 421, 638: 422, 639: 423, 640: 424, 641: 425, 642: 426, 646: 427, 647: 428, 648: 429, 649: 430, 650: 431, 651: 432, 652: 433, 653: 434, 654: 435, 655: 436, 656: 437, 657: 438, 658: 439, 659: 440, 660: 441, 664: 442, 665: 443, 666: 444, 667: 445, 668: 446, 669: 447, 670: 448, 671: 449, 672: 450, 673: 451, 674: 452, 675: 453, 676: 454, 677: 455, 678: 456, 682: 457, 683: 458, 684: 459, 685: 460, 686: 461, 687: 462, 688: 463, 689: 464, 690: 465, 691: 466, 692: 467, 693: 468, 694: 469, 695: 470, 696: 471, 700: 472, 701: 473, 702: 474, 703: 475, 704: 476, 705: 477, 706: 478, 707: 479, 708: 480, 709: 481, 710: 482, 714: 483, 715: 484, 719: 485, 720: 486, 736: 487, 737: 488, 738: 489, 739: 490, 740: 491, 741: 492, 742: 493, 743: 494, 744: 495, 748: 496, 749: 497, 750: 498, 751: 499, 752: 500, 753: 501, 754: 502, 755: 503, 756: 504, 760: 505, 761: 506, 762: 507, 763: 508, 764: 509, 765: 510, 766: 511, 767: 512, 768: 513, 772: 514, 773: 515, 774: 516, 775: 517, 776: 518, 777: 519, 781: 520, 782: 521, 783: 522, 784: 523, 785: 524, 786: 525, 796: 526, 797: 527, 798: 528, 799: 529, 800: 530, 801: 531, 805: 532, 806: 533, 807: 534, 808: 535, 809: 536, 810: 537, 811: 538, 812: 539, 813: 540, 814: 541, 815: 542, 816: 543, 817: 544, 818: 545, 819: 546, 823: 547, 824: 548, 825: 549, 826: 550, 827: 551, 828: 552, 829: 553, 830: 554, 831: 555, 832: 556, 833: 557, 834: 558, 835: 559, 836: 560, 837: 561, 841: 562, 842: 563, 843: 564, 844: 565, 845: 566, 846: 567, 847: 568, 848: 569, 849: 570, 850: 571, 851: 572, 852: 573, 853: 574, 854: 575, 855: 576, 859: 577, 860: 578, 861: 579, 862: 580, 863: 581, 864: 582, 865: 583, 866: 584, 867: 585, 868: 586, 869: 587, 873: 588, 874: 589, 878: 590, 879: 591, 895: 592, 896: 593, 897: 594, 898: 595, 899: 596, 900: 597, 901: 598, 902: 599, 903: 600, 907: 601, 908: 602, 909: 603, 910: 604, 911: 605, 912: 606, 913: 607, 914: 608, 915: 609, 919: 610, 920: 611, 921: 612, 922: 613, 923: 614, 924: 615, 925: 616, 926: 617, 927: 618, 931: 619, 932: 620, 933: 621, 934: 622, 935: 623, 936: 624, 940: 625, 941: 626, 942: 627, 943: 628, 944: 629, 945: 630, 955: 631, 956: 632, 957: 633, 958: 634, 959: 635, 960: 636, 961: 637, 962: 638, 963: 639, 964: 640, 965: 641, 966: 642, 967: 643, 968: 644, 969: 645, 970: 646, 971: 647, 972: 648, 973: 649, 974: 650, 975: 651, 976: 652, 977: 653, 978: 654, 979: 655, 980: 656, 981: 657, 982: 658, 983: 659, 984: 660, 985: 661, 986: 662, 987: 663, 991: 664, 992: 665, 993: 666, 994: 667, 995: 668, 996: 669, 1000: 670, 1001: 671, 1002: 672, 1003: 673, 1004: 674, 1005: 675, 1015: 676, 1016: 677, 1017: 678, 1018: 679, 1019: 680, 1020: 681, 1021: 682, 1022: 683, 1023: 684, 1024: 685, 1025: 686, 1026: 687, 1027: 688, 1028: 689, 1029: 690, 1030: 691, 1031: 692, 1032: 693, 1033: 694, 1034: 695, 1035: 696, 1036: 697, 1037: 698, 1038: 699, 1039: 700, 1040: 701, 1041: 702, 1042: 703, 1043: 704, 1044: 705, 1045: 706, 1046: 707, 1047: 708, 1051: 709, 1052: 710, 1053: 711, 1054: 712, 1055: 713, 1056: 714, 1060: 715, 1061: 716, 1062: 717, 1063: 718, 1064: 719, 1065: 720, 1075: 721, 1076: 722, 1080: 723, 1081: 724, 1085: 725, 1086: 726, 1102: 727, 1103: 728, 1104: 729, 1105: 730, 1106: 731, 1107: 732, 1108: 733, 1109: 734, 1110: 735, 1114: 736, 1115: 737, 1116: 738, 1117: 739, 1118: 740, 1119: 741, 1120: 742, 1121: 743, 1122: 744, 1126: 745, 1127: 746, 1128: 747, 1129: 748, 1130: 749, 1131: 750, 1132: 751, 1133: 752, 1134: 753, 1138: 754, 1139: 755, 1140: 756, 1141: 757, 1142: 758, 1143: 759, 1147: 760, 1148: 761, 1149: 762, 1150: 763, 1151: 764, 1152: 765, 1153: 766, 1154: 767, 1158: 768, 1159: 769, 1163: 770, 1164: 771, 1180: 772, 1181: 773, 1182: 774, 1183: 775, 1184: 776, 1185: 777, 1186: 778, 1187: 779, 1188: 780, 1192: 781, 1193: 782, 1194: 783, 1195: 784, 1196: 785, 1197: 786, 1198: 787, 1199: 788, 1200: 789, 1204: 790, 1205: 791, 1206: 792, 1207: 793, 1208: 794, 1209: 795, 1210: 796, 1211: 797, 1212: 798, 1216: 799, 1217: 800, 1218: 801, 1219: 802, 1220: 803, 1221: 804, 1225: 805, 1226: 806, 1227: 807, 1228: 808, 1229: 809, 1230: 810, 1231: 811, 1232: 812, 1236: 813, 1237: 814, 1241: 815, 1242: 816, 1258: 817, 1259: 818, 1260: 819, 1261: 820, 1262: 821, 1263: 822, 1264: 823, 1265: 824, 1266: 825, 1270: 826, 1271: 827, 1272: 828, 1273: 829, 1274: 830, 1275: 831, 1276: 832, 1277: 833, 1278: 834, 1282: 835, 1283: 836, 1284: 837, 1285: 838, 1286: 839, 1287: 840, 1288: 841, 1289: 842, 1290: 843, 1294: 844, 1295: 845, 1296: 846, 1297: 847, 1298: 848, 1299: 849, 1303: 850, 1304: 851, 1305: 852, 1306: 853, 1307: 854, 1308: 855, 1309: 856, 1310: 857, 1314: 858, 1315: 859, 1319: 860, 1320: 861, 1333: 862, 1334: 863, 1338: 864, 1339: 865, 1343: 866, 1344: 867, 1357: 868, 1358: 869, 1362: 870, 1363: 871, 1367: 872, 1368: 873, 1396: 874, 1397: 875, 1398: 876, 1399: 877, 1400: 878, 1401: 879, 1402: 880, 1403: 881, 1404: 882, 1405: 883, 1406: 884, 1407: 885, 1408: 886, 1409: 887, 1410: 888, 1411: 889, 1412: 890, 1413: 891, 1417: 892, 1418: 893, 1419: 894, 1420: 895, 1421: 896, 1422: 897, 1423: 898, 1424: 899, 1425: 900, 1426: 901, 1427: 902, 1428: 903, 1429: 904, 1430: 905, 1431: 906, 1432: 907, 1433: 908, 1434: 909, 1438: 910, 1439: 911, 1440: 912, 1441: 913, 1442: 914, 1443: 915, 1444: 916, 1445: 917, 1446: 918, 1447: 919, 1448: 920, 1449: 921, 1450: 922, 1451: 923, 1452: 924, 1453: 925, 1454: 926, 1455: 927, 1459: 928, 1460: 929, 1461: 930, 1462: 931, 1463: 932, 1464: 933, 1468: 934, 1469: 935, 1470: 936, 1471: 937, 1472: 938, 1473: 939, 1477: 940, 1478: 941, 1479: 942, 1480: 943, 1481: 944, 1482: 945} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.616 s -Wrote files for 2281 helas calls in 46.335 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.434 s +Wrote files for 2281 helas calls in 46.235 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.313 s +ALOHA: aloha creates 5 routines in 0.315 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -357,6 +357,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m57.671s -user 0m56.600s -sys 0m0.875s +real 0m57.407s +user 0m56.369s +sys 0m0.817s diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 395f0229dc..41f7811981 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055789947509765625  +DEBUG: model prefixing takes 0.005392789840698242  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.839 s +1 processes with 1240 diagrams generated in 1.829 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -171,43 +171,43 @@ INFO: Processing color information for process: g g > t t~ g g g @1 DEBUG: type(subproc_group)= [output.py at line 188]  DEBUG: type(fortran_model)= [output.py at line 189]  DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: proc_id =  0 [model_handling.py at line 1052]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1344]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.465 s +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1350]  +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.427 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -215,7 +215,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.342 s +ALOHA: aloha creates 5 routines in 0.341 s VVV1 VVV1 FFV1 @@ -245,6 +245,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m13.689s -user 0m12.660s -sys 0m0.103s +real 0m12.777s +user 0m12.625s +sys 0m0.105s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index fb5e23da39..2527bafd35 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005346775054931641  +DEBUG: model prefixing takes 0.005446910858154297  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.076 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -196,38 +196,38 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -235,46 +235,46 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.227 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Wrote files for 32 helas calls in 0.226 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines @@ -283,7 +283,7 @@ ALOHA: aloha creates 2 routines in 0.142 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.129 s FFV1 FFV1 FFV1 @@ -409,6 +409,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.776s -user 0m2.282s -sys 0m0.285s +real 0m2.593s +user 0m2.241s +sys 0m0.317s diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc index 3452d1e8da..d5eda63ee0 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc @@ -111,8 +111,8 @@ Parameters_sm::setDependentParameters() // now computed event-by-event (running void Parameters_sm::setDependentCouplings() // now computed event-by-event (running alphas #373) { - GC_10 = -G; GC_11 = mdl_complexi * G; + GC_10 = -G; } */ @@ -195,7 +195,7 @@ void Parameters_sm::printDependentCouplings() // now computed event-by-event (running alphas #373) { std::cout << "sm model couplings dependent on event kinematics:" << std::endl; - std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; + std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; } */ diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h index 4f6f322ed9..0c77cf58f0 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h @@ -54,7 +54,7 @@ namespace mg5amcCpu //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //cxsmpl GC_10, GC_11; // now computed event-by-event (running alphas #373) + //cxsmpl GC_11, GC_10; // now computed event-by-event (running alphas #373) // Set parameters that are unchanged during the run void setIndependentParameters( SLHAReader& slha ); @@ -194,8 +194,8 @@ namespace mg5amcCpu //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) + //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) // Print parameters that are unchanged during the run void printIndependentParameters(); @@ -226,12 +226,12 @@ namespace mg5amcCpu namespace Parameters_sm_dependentCouplings { constexpr size_t ndcoup = 2; // #couplings that vary event by event because they depend on the running alphas QCD - constexpr size_t idcoup_GC_10 = 0; - constexpr size_t idcoup_GC_11 = 1; + constexpr size_t idcoup_GC_11 = 0; + constexpr size_t idcoup_GC_10 = 1; struct DependentCouplings_sv { - cxtype_sv GC_10; cxtype_sv GC_11; + cxtype_sv GC_10; }; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> @@ -257,8 +257,8 @@ namespace mg5amcCpu //const fptype_sv G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); const fptype_sv mdl_G__exp__2 = ( ( G ) * ( G ) ); // Model couplings dependent on aS - out.GC_10 = -G; out.GC_11 = cI * G; + out.GC_10 = -G; } // End SM implementation - no special handling of vectors of floats as in EFT (#439) return out; @@ -293,12 +293,12 @@ namespace mg5amcCpu using namespace Parameters_sm_dependentCouplings; const fptype_sv& gs_sv = G_ACCESS::kernelAccessConst( gs ); DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv ); - fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); - cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); + fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); - GC_10s_sv = couplings_sv.GC_10; + cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); GC_11s_sv = couplings_sv.GC_11; + GC_10s_sv = couplings_sv.GC_10; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 8fa3a22bec..f398fe393a 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005364894866943359  +DEBUG: model prefixing takes 0.0053250789642333984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -193,72 +193,72 @@ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ DEBUG: type(subproc_group)= [output.py at line 188]  DEBUG: type(fortran_model)= [output.py at line 189]  DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: proc_id =  0 [model_handling.py at line 1052]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1353]  DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  DEBUG: type(subproc_group)= [output.py at line 188]  DEBUG: type(fortran_model)= [output.py at line 189]  DEBUG: type(me)= me=1 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: proc_id =  0 [model_handling.py at line 1052]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1350]  Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.141 s FFV1 FFV1 FFV1 @@ -281,6 +281,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.786s -user 0m0.668s -sys 0m0.063s +real 0m0.730s +user 0m0.666s +sys 0m0.058s diff --git a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc index 3452d1e8da..d5eda63ee0 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc +++ b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc @@ -111,8 +111,8 @@ Parameters_sm::setDependentParameters() // now computed event-by-event (running void Parameters_sm::setDependentCouplings() // now computed event-by-event (running alphas #373) { - GC_10 = -G; GC_11 = mdl_complexi * G; + GC_10 = -G; } */ @@ -195,7 +195,7 @@ void Parameters_sm::printDependentCouplings() // now computed event-by-event (running alphas #373) { std::cout << "sm model couplings dependent on event kinematics:" << std::endl; - std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; + std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; } */ diff --git a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h index 4f6f322ed9..0c77cf58f0 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h +++ b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h @@ -54,7 +54,7 @@ namespace mg5amcCpu //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //cxsmpl GC_10, GC_11; // now computed event-by-event (running alphas #373) + //cxsmpl GC_11, GC_10; // now computed event-by-event (running alphas #373) // Set parameters that are unchanged during the run void setIndependentParameters( SLHAReader& slha ); @@ -194,8 +194,8 @@ namespace mg5amcCpu //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) + //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) // Print parameters that are unchanged during the run void printIndependentParameters(); @@ -226,12 +226,12 @@ namespace mg5amcCpu namespace Parameters_sm_dependentCouplings { constexpr size_t ndcoup = 2; // #couplings that vary event by event because they depend on the running alphas QCD - constexpr size_t idcoup_GC_10 = 0; - constexpr size_t idcoup_GC_11 = 1; + constexpr size_t idcoup_GC_11 = 0; + constexpr size_t idcoup_GC_10 = 1; struct DependentCouplings_sv { - cxtype_sv GC_10; cxtype_sv GC_11; + cxtype_sv GC_10; }; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> @@ -257,8 +257,8 @@ namespace mg5amcCpu //const fptype_sv G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); const fptype_sv mdl_G__exp__2 = ( ( G ) * ( G ) ); // Model couplings dependent on aS - out.GC_10 = -G; out.GC_11 = cI * G; + out.GC_10 = -G; } // End SM implementation - no special handling of vectors of floats as in EFT (#439) return out; @@ -293,12 +293,12 @@ namespace mg5amcCpu using namespace Parameters_sm_dependentCouplings; const fptype_sv& gs_sv = G_ACCESS::kernelAccessConst( gs ); DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv ); - fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); - cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); + fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); - GC_10s_sv = couplings_sv.GC_10; + cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); GC_11s_sv = couplings_sv.GC_11; + GC_10s_sv = couplings_sv.GC_10; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index a0fbab9704..45f905069b 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -143,36 +143,36 @@ INFO: Processing color information for process: g g > h HIG<=1 HIW<=1 @1 DEBUG: type(subproc_group)= [output.py at line 188]  DEBUG: type(fortran_model)= [output.py at line 189]  DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: proc_id =  0 [model_handling.py at line 1052]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_heft_gg_h.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_heft_gg_h.txt [model_handling.py at line 1350]  Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes @@ -196,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.516s -user 0m0.419s -sys 0m0.054s +real 0m0.480s +user 0m0.423s +sys 0m0.052s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 8d7f70c22d..47ee63c762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005370378494262695  +DEBUG: model prefixing takes 0.005712747573852539  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.134 s +13 processes with 76 diagrams generated in 0.135 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.798 s +65 processes with 1119 diagrams generated in 1.791 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -496,44 +496,44 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -541,40 +541,40 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxuux.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxuux.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  @@ -582,40 +582,40 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxgu.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxgu.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  2 [export_cpp.py at line 712]  @@ -623,40 +623,40 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxgux.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxgux.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  3 [export_cpp.py at line 712]  @@ -664,40 +664,40 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxgg.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxgg.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  4 [export_cpp.py at line 712]  @@ -705,42 +705,42 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  5 [export_cpp.py at line 712]  @@ -748,36 +748,36 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1718]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uu_ttxuu.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uu_ttxuu.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  6 [export_cpp.py at line 712]  @@ -785,36 +785,36 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1718]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxuux.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxuux.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  7 [export_cpp.py at line 712]  @@ -822,36 +822,36 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1718]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxux_ttxuxux.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxux_ttxuxux.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  8 [export_cpp.py at line 712]  @@ -859,36 +859,36 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1718]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uc_ttxuc.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uc_ttxuc.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  9 [export_cpp.py at line 712]  @@ -896,36 +896,36 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1718]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxccx.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxccx.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  10 [export_cpp.py at line 712]  @@ -933,36 +933,36 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1718]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_ucx_ttxucx.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_ucx_ttxucx.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  11 [export_cpp.py at line 712]  @@ -970,36 +970,36 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1718]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxcx_ttxuxcx.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxcx_ttxuxcx.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  12 [export_cpp.py at line 712]  @@ -1007,38 +1007,38 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  13 [export_cpp.py at line 712]  @@ -1046,38 +1046,38 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  14 [export_cpp.py at line 712]  @@ -1085,38 +1085,38 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxg.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxg.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  15 [export_cpp.py at line 712]  @@ -1124,40 +1124,40 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1718]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1831]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1830]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1831]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1724]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  16 [export_cpp.py at line 712]  @@ -1165,51 +1165,51 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  +DEBUG: proc_id =  1 [model_handling.py at line 1052]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1} [model_handling.py at line 1718]  +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  +DEBUG: self.include_multi_channel =  [1] [model_handling.py at line 1151]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1] [model_handling.py at line 1177]  +DEBUG: multi_channel =  {1: [0]} [model_handling.py at line 1183]  +DEBUG: multi_channel_map =  {1: [0]} [model_handling.py at line 1669]  +DEBUG: diag_to_config =  {1: 1} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttx.txt [model_handling.py at line 1347]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttx.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1] [export_cpp.py at line 711]  DEBUG: subproc_number =  17 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.266 s -Wrote files for 810 helas calls in 3.354 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.262 s +Wrote files for 810 helas calls in 3.360 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.330 s +ALOHA: aloha creates 5 routines in 0.331 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -1217,7 +1217,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.311 s VVV1 VVV1 FFV1 @@ -1530,6 +1530,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.536s -user 0m8.950s -sys 0m0.551s +real 0m9.518s +user 0m8.931s +sys 0m0.545s From 7964ff99f95196e0f098e81501cd147fee2b05cd Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 16:32:23 +0200 Subject: [PATCH 016/119] [oct23av] rerun gq_ttq tmad tests after Stefan's PR #757 - gqttq xsec is now correct! fixes high-priority issue #748 These three tests now succeed (they used to fail) ./tmad/teeMadX.sh +10x -gqttq -makeclean ./tmad/teeMadX.sh +10x -gqttq -makeclean -fltonly ./tmad/teeMadX.sh +10x -gqttq -makeclean -mixonly NB: eemumu code generation remains to be fixed after PR #757 --- .../log_gqttq_mad_d_inl0_hrd0.txt | 493 ++++++++++++++++- .../log_gqttq_mad_f_inl0_hrd0.txt | 499 +++++++++++++++++- .../log_gqttq_mad_m_inl0_hrd0.txt | 499 +++++++++++++++++- 3 files changed, 1434 insertions(+), 57 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 126d0b9ddb..ef2a008fd8 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -16,6 +16,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. @@ -27,13 +28,12 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2023-10-25_19:34:18 +DATE: 2023-10-26_13:52:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3162s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2400s - [COUNTERS] Fortran MEs ( 1 ) : 0.0762s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3116s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2347s + [COUNTERS] Fortran MEs ( 1 ) : 0.0769s for 8192 events => throughput is 1.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3078s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2307s - [COUNTERS] Fortran MEs ( 1 ) : 0.0772s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3066s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2302s + [COUNTERS] Fortran MEs ( 1 ) : 0.0764s for 8192 events => throughput is 1.07E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2688s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4248s - [COUNTERS] Fortran MEs ( 1 ) : 0.8440s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2632s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4231s + [COUNTERS] Fortran MEs ( 1 ) : 0.8401s for 90112 events => throughput is 1.07E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,12 +132,471 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.276 [1.2757941949814184] fbridge_mode=1 - [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3161s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s + [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3031s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0723s for 8192 events => throughput is 1.13E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (0.26050333309703716) and cpp (1.2757941949814184) differ by more than 2E-14 (3.8974198518457603) +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703716) differ by less than 2E-14 (0.0) + +*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615872] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 2.2698s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4786s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7912s for 90112 events => throughput is 1.14E+05 events/s + +*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615872) differ by less than 2E-14 (1.1102230246251565e-16) + +*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.151677e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.155366e+05 ) sec^-1 + +*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333309703727] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3152s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2761s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0391s for 8192 events => throughput is 2.10E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703727) differ by less than 2E-14 (4.440892098500626e-16) + +*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8777s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4487s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4290s for 90112 events => throughput is 2.10E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615874) differ by less than 2E-14 (0.0) + +*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.093968e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.072845e+05 ) sec^-1 + +*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2755s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2543s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.85E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.6671s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4324s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2348s for 90112 events => throughput is 3.84E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615863) differ by less than 2E-14 (5.551115123125783e-16) + +*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.838452e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.878300e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2707s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2515s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0192s for 8192 events => throughput is 4.27E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.6979s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4773s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2206s for 90112 events => throughput is 4.08E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615863) differ by less than 2E-14 (5.551115123125783e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.101159e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.094655e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2902s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2615s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0287s for 8192 events => throughput is 2.85E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.7445s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4413s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3032s for 90112 events => throughput is 2.97E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615863) differ by less than 2E-14 (5.551115123125783e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.863782e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.971210e+05 ) sec^-1 + +*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.6724s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6717s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615869] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8414s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8339s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 90112 events => throughput is 1.20E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615869) differ by less than 2E-14 (2.220446049250313e-16) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.555463e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.293951e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.436968e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.907161e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.448784e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.272589e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.448370e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.882086e+07 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 97d8938e38..3031fbe602 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2023-10-25_19:34:24 +DATE: 2023-10-26_16:26:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3127s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2355s - [COUNTERS] Fortran MEs ( 1 ) : 0.0772s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3132s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2359s + [COUNTERS] Fortran MEs ( 1 ) : 0.0773s for 8192 events => throughput is 1.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3089s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2315s - [COUNTERS] Fortran MEs ( 1 ) : 0.0773s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3083s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2305s + [COUNTERS] Fortran MEs ( 1 ) : 0.0778s for 8192 events => throughput is 1.05E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2899s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4433s - [COUNTERS] Fortran MEs ( 1 ) : 0.8467s for 90112 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2687s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4262s + [COUNTERS] Fortran MEs ( 1 ) : 0.8425s for 90112 events => throughput is 1.07E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,12 +132,471 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.276 [1.2757939713258191] fbridge_mode=1 - [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3818s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3130s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0689s for 8192 events => throughput is 1.19E+05 events/s + [XSECTION] Cross section = 0.2605 [0.26050316227723969] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3718s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3018s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0700s for 8192 events => throughput is 1.17E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (0.26050333309703716) and cpp (1.2757939713258191) differ by more than 4E-4 (3.897418993293984) +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050316227723969) differ by less than 4E-4 (6.55729796017468e-07) + +*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182848184220] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 2.2605s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4911s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7694s for 90112 events => throughput is 1.17E+05 events/s + +*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182848184220) differ by less than 4E-4 (9.154014657397624e-09) + +*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.189355e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.185221e+05 ) sec^-1 + +*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050312995876956] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2829s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2595s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.50E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050312995876956) differ by less than 4E-4 (7.797914336471479e-07) + +*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801179112233499] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.7023s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4403s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2619s for 90112 events => throughput is 3.44E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801179112233499) differ by less than 4E-4 (1.6221057508314374e-07) + +*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.361662e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.377760e+05 ) sec^-1 + +*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050312669591458] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2579s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2465s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0114s for 8192 events => throughput is 7.16E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050312669591458) differ by less than 4E-4 (7.923166284173888e-07) + +*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801178977086591] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.5607s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4343s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1264s for 90112 events => throughput is 7.13E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801178977086591) differ by less than 4E-4 (1.6840963823483435e-07) + +*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.978568e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.011663e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050312669591458] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2565s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2457s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.62E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050312669591458) differ by less than 4E-4 (7.923166284173888e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801178977086591] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.5367s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4216s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1151s for 90112 events => throughput is 7.83E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801178977086591) differ by less than 4E-4 (1.6840963823483435e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.722830e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.609197e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050317064675232] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2615s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.88E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050317064675232) differ by less than 4E-4 (6.23601559723852e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801181998460883] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.5932s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4384s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1548s for 90112 events => throughput is 5.82E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801181998460883) differ by less than 4E-4 (2.9822005642721194e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.435125e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.574503e+05 ) sec^-1 + +*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050319269579369] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.6750s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.52E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050319269579369) differ by less than 4E-4 (5.389614090578476e-07) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801186042050189] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8445s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8387s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 90112 events => throughput is 1.55E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801186042050189) differ by less than 4E-4 (1.556536803892783e-07) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.784538e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.616857e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.893676e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.761961e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.892834e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.847341e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.399446e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.280733e+07 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index ae1cc6d1c5..356eeee837 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:34:30 +DATE: 2023-10-26_16:28:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3113s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2340s - [COUNTERS] Fortran MEs ( 1 ) : 0.0773s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3104s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2337s + [COUNTERS] Fortran MEs ( 1 ) : 0.0767s for 8192 events => throughput is 1.07E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3077s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2312s - [COUNTERS] Fortran MEs ( 1 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3269s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2499s + [COUNTERS] Fortran MEs ( 1 ) : 0.0770s for 8192 events => throughput is 1.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2619s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4224s - [COUNTERS] Fortran MEs ( 1 ) : 0.8395s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2593s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4222s + [COUNTERS] Fortran MEs ( 1 ) : 0.8371s for 90112 events => throughput is 1.08E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,12 +132,471 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.276 [1.2757941960880730] fbridge_mode=1 - [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3925s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3207s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0718s for 8192 events => throughput is 1.14E+05 events/s + [XSECTION] Cross section = 0.2605 [0.26050333287021976] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3748s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3034s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0714s for 8192 events => throughput is 1.15E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (0.26050333309703716) and cpp (1.2757941960880730) differ by more than 2E-4 (3.8974198560939) +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333287021976) differ by less than 2E-4 (8.706890763932051e-10) + +*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182637309846] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 2.2653s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4786s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7867s for 90112 events => throughput is 1.15E+05 events/s + +*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637309846) differ by less than 2E-4 (5.185970541887741e-10) + +*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.138536e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.159613e+05 ) sec^-1 + +*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333287021976] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3084s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2698s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0386s for 8192 events => throughput is 2.12E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333287021976) differ by less than 2E-4 (8.706890763932051e-10) + +*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182637309841] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8771s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4553s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4219s for 90112 events => throughput is 2.14E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637309841) differ by less than 2E-4 (5.18597276233379e-10) + +*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.090427e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.088304e+05 ) sec^-1 + +*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333293296080] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2744s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2536s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0208s for 8192 events => throughput is 3.94E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333293296080) differ by less than 2E-4 (6.29843621702264e-10) + +*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182637602595] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.6781s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4467s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2314s for 90112 events => throughput is 3.89E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637602595) differ by less than 2E-4 (5.051689067059328e-10) + +*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.901746e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.831760e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333293296080] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2720s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2526s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0193s for 8192 events => throughput is 4.24E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333293296080) differ by less than 2E-4 (6.29843621702264e-10) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182637602595] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.6633s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4538s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2095s for 90112 events => throughput is 4.30E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637602595) differ by less than 2E-4 (5.051689067059328e-10) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.283227e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.421519e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333293296080] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2903s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2614s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0289s for 8192 events => throughput is 2.84E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333293296080) differ by less than 2E-4 (6.29843621702264e-10) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182637602595] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8753s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5307s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3445s for 90112 events => throughput is 2.62E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637602595) differ by less than 2E-4 (5.051689067059328e-10) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.807854e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.564558e+05 ) sec^-1 + +*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333301029693] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.6551s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6544s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333301029693) differ by less than 2E-4 (3.329716502520341e-10) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182637219937] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8418s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8341s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.18E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637219937) differ by less than 2E-4 (5.227210886360467e-10) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.554480e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.260564e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.452960e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.904295e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.454069e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.238408e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.441519e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.877685e+07 ) sec^-1 + +TEST COMPLETED From 34cf1c74b090e89d7d5fe9da9cca679e14e22a7a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 17:11:49 +0200 Subject: [PATCH 017/119] [oct23av] rerun 8 tput tests for gqttq - now runTest fails (I guess the ref needs updating after fixing the coupling order in PR #757) ./tput/teeThroughputX.sh -mix -hrd -makej -gqttq -makeclean ./tput/teeThroughputX.sh -makej -gqttq -flt -bridge -makeclean --- .../log_gqttq_mad_d_inl0_hrd0.txt | 174 +++-------------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 178 +++--------------- .../log_gqttq_mad_d_inl0_hrd1.txt | 174 +++-------------- .../log_gqttq_mad_f_inl0_hrd0.txt | 174 +++-------------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 178 +++--------------- .../log_gqttq_mad_f_inl0_hrd1.txt | 174 +++-------------- .../log_gqttq_mad_m_inl0_hrd0.txt | 174 +++-------------- .../log_gqttq_mad_m_inl0_hrd1.txt | 174 +++-------------- 8 files changed, 240 insertions(+), 1160 deletions(-) diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index db66144b99..4d32261a0d 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:43:12 +DATE: 2023-10-26_16:49:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.996510e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.551380e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.892828e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.438393 sec - 1,953,535,468 cycles # 3.006 GHz - 2,779,051,405 instructions # 1.42 insn per cycle - 0.706976484 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.729631e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.511740e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.903280e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.442673 sec + 1,967,879,288 cycles # 3.002 GHz + 2,802,500,107 instructions # 1.42 insn per cycle + 0.712646529 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.793351e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.668509e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.060188e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.516556 sec - 2,254,798,816 cycles # 3.015 GHz - 3,256,611,216 instructions # 1.44 insn per cycle - 0.804635125 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.471854e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.643008e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.105940e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.519141 sec + 2,281,793,183 cycles # 3.034 GHz + 3,260,400,346 instructions # 1.43 insn per cycle + 0.810273095 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224343220024076 -Relative difference = 2.984467216677476e-07 +Avg ME (C++/CUDA) = 1.424749e-01 +Avg ME (F77/CUDA) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.142180e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166897e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.166897e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.455956 sec - 4,526,705,701 cycles # 3.102 GHz - 12,813,772,224 instructions # 2.83 insn per cycle - 1.459879572 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.079444e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.102859e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.102859e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.540557 sec + 4,536,431,435 cycles # 2.951 GHz + 12,817,408,182 instructions # 2.83 insn per cycle + 1.544699619 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 730) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220084161 -Relative difference = 2.9844565299804477e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.059229e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.139815e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.139815e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.816563 sec - 2,541,682,069 cycles # 3.100 GHz - 7,194,219,151 instructions # 2.83 insn per cycle - 0.820635450 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3150) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220084161 -Relative difference = 2.9844565299804477e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.555525e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.809689e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.809689e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.482439 sec - 1,365,016,241 cycles # 2.809 GHz - 2,962,982,028 instructions # 2.17 insn per cycle - 0.486447941 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220031370 -Relative difference = 2.9844659193456305e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.042576e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.357609e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.357609e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.426283 sec - 1,250,204,594 cycles # 2.908 GHz - 2,816,555,243 instructions # 2.25 insn per cycle - 0.430386207 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220031370 -Relative difference = 2.9844659193456305e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.853383e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.013794e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.013794e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.597884 sec - 1,199,308,383 cycles # 1.995 GHz - 1,804,468,596 instructions # 1.50 insn per cycle - 0.601975092 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220031370 -Relative difference = 2.9844659193456305e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 55664f3ef7..4753c307e1 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_19:05:04 +DATE: 2023-10-26_17:13:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.715956e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.423473e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.423473e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.466815 sec - 2,046,266,197 cycles # 2.991 GHz - 3,039,133,939 instructions # 1.49 insn per cycle - 0.742578114 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.688403e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.196209e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.196209e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.466261 sec + 2,061,394,039 cycles # 3.003 GHz + 3,032,478,993 instructions # 1.47 insn per cycle + 0.743734864 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,20 +72,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.437711e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.623194e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.623194e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.730298 sec - 2,928,685,026 cycles # 3.010 GHz - 4,469,035,726 instructions # 1.53 insn per cycle - 1.030829307 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.365371e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.438222e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.438222e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.745064 sec + 2,974,326,147 cycles # 3.010 GHz + 4,582,543,246 instructions # 1.54 insn per cycle + 1.047030310 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224343220024076 -Relative difference = 2.984467216677476e-07 +Avg ME (C++/CUDA) = 1.424749e-01 +Avg ME (F77/CUDA) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -95,133 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.130473e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155364e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.155364e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.477047 sec - 4,559,807,823 cycles # 3.080 GHz - 12,820,937,643 instructions # 2.81 insn per cycle - 1.481171657 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.137225e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.161884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.161884e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.467678 sec + 4,553,662,396 cycles # 3.096 GHz + 12,819,000,364 instructions # 2.82 insn per cycle + 1.471758386 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 730) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220084161 -Relative difference = 2.9844565299804477e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.037342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.116140e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.116140e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.831756 sec - 2,576,245,613 cycles # 3.084 GHz - 7,244,217,190 instructions # 2.81 insn per cycle - 0.836091661 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3150) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220084161 -Relative difference = 2.9844565299804477e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.548861e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.803617e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.803617e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.489750 sec - 1,401,953,569 cycles # 2.841 GHz - 3,012,124,484 instructions # 2.15 insn per cycle - 0.494067218 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220031370 -Relative difference = 2.9844659193456305e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.888848e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.196973e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.196973e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.451008 sec - 1,291,042,478 cycles # 2.840 GHz - 2,867,771,305 instructions # 2.22 insn per cycle - 0.455219321 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220031370 -Relative difference = 2.9844659193456305e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.837117e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.990188e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.990188e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.606364 sec - 1,227,732,873 cycles # 2.013 GHz - 1,842,233,991 instructions # 1.50 insn per cycle - 0.610509212 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220031370 -Relative difference = 2.9844659193456305e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index b5138c5dae..a9a8163b9d 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:43:29 +DATE: 2023-10-26_16:49:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.920168e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.374674e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.702657e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.437720 sec - 1,950,126,017 cycles # 3.006 GHz - 2,773,154,214 instructions # 1.42 insn per cycle - 0.705954337 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.695969e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.369756e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.733382e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.440487 sec + 1,973,277,409 cycles # 3.017 GHz + 2,804,504,699 instructions # 1.42 insn per cycle + 0.710712051 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.766528e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.558717e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.939267e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.513041 sec - 2,254,005,524 cycles # 3.033 GHz - 3,263,301,476 instructions # 1.45 insn per cycle - 0.800914962 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.435954e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.475347e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.911743e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.519269 sec + 2,277,391,096 cycles # 3.024 GHz + 3,254,209,570 instructions # 1.43 insn per cycle + 0.810433700 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224343220024076 -Relative difference = 2.984467216677476e-07 +Avg ME (C++/CUDA) = 1.424749e-01 +Avg ME (F77/CUDA) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.153195e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.178574e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.178574e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.442048 sec - 4,474,102,808 cycles # 3.096 GHz - 12,693,000,655 instructions # 2.84 insn per cycle - 1.446019473 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 687) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.154348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.180003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.180003e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.441187 sec + 4,472,630,063 cycles # 3.096 GHz + 12,692,291,111 instructions # 2.84 insn per cycle + 1.445245845 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220084161 -Relative difference = 2.9844565299804477e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.053414e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.135311e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.135311e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.819184 sec - 2,493,704,947 cycles # 3.032 GHz - 7,048,175,291 instructions # 2.83 insn per cycle - 0.823254604 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2966) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220084161 -Relative difference = 2.9844565299804477e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.165376e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.368230e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.368230e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.539826 sec - 1,467,619,890 cycles # 2.700 GHz - 3,195,865,906 instructions # 2.18 insn per cycle - 0.544230401 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3078) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220031370 -Relative difference = 2.9844659193456305e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.602530e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.851517e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.851517e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.475458 sec - 1,396,242,750 cycles # 2.915 GHz - 3,099,467,840 instructions # 2.22 insn per cycle - 0.479442989 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2785) (512y: 257) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220031370 -Relative difference = 2.9844659193456305e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.759805e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.905242e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.905242e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.616827 sec - 1,241,766,049 cycles # 2.002 GHz - 2,069,716,754 instructions # 1.67 insn per cycle - 0.620974760 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1197) (512y: 194) (512z: 2426) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224343220031370 -Relative difference = 2.9844659193456305e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 8a44a3dd20..e57eccf909 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:43:45 +DATE: 2023-10-26_16:49:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.953088e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.256905e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.378672e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 -TOTAL : 0.434393 sec - 1,939,780,167 cycles # 3.008 GHz - 2,743,949,203 instructions # 1.41 insn per cycle - 0.703990460 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.343265e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.226255e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.365776e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.438160 sec + 1,929,021,635 cycles # 2.968 GHz + 2,682,927,208 instructions # 1.39 insn per cycle + 0.709175123 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 168 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.221116e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.860706e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.982864e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 -TOTAL : 0.466982 sec - 2,071,697,565 cycles # 3.013 GHz - 2,925,920,597 instructions # 1.41 insn per cycle - 0.745407679 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.496674e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.838576e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.980756e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 +TOTAL : 0.471866 sec + 2,133,132,090 cycles # 3.020 GHz + 3,018,979,515 instructions # 1.42 insn per cycle + 0.764992270 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629328206139 -Relative difference = 0.0005414933696496947 +Avg ME (C++/CUDA) = 1.424226e-01 +Avg ME (F77/CUDA) = 0.14247488790947038 +Relative difference = 0.0003671321087409729 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.175069e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.202207e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.202207e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 1.414014 sec - 4,387,994,343 cycles # 3.096 GHz - 12,757,087,191 instructions # 2.91 insn per cycle - 1.417904644 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 693) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.177979e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.204724e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204724e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.410662 sec + 4,391,627,324 cycles # 3.106 GHz + 12,756,963,824 instructions # 2.90 insn per cycle + 1.414598719 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.621794e-01 -Avg ME (F77/C++) = 0.56217939035956022 -Relative difference = 1.714833339642312e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.258633e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.477473e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.477473e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 0.522116 sec - 1,618,126,198 cycles # 3.079 GHz - 4,232,277,564 instructions # 2.62 insn per cycle - 0.526044496 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3709) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.621794e-01 -Avg ME (F77/C++) = 0.56217937649880412 -Relative difference = 4.180373005172264e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.608070e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.524156e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.524156e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.267436 sec - 793,046,821 cycles # 2.927 GHz - 1,796,478,483 instructions # 2.27 insn per cycle - 0.271487422 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622563e-01 -Avg ME (F77/C++) = 0.56225626625622027 -Relative difference = 6.001494295464523e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.057162e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.116228e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.116228e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.251834 sec - 743,002,845 cycles # 2.913 GHz - 1,717,820,666 instructions # 2.31 insn per cycle - 0.255754900 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622563e-01 -Avg ME (F77/C++) = 0.56225626625622027 -Relative difference = 6.001494295464523e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.409485e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.023233e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.023233e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.324434 sec - 678,869,673 cycles # 2.072 GHz - 1,206,887,131 instructions # 1.78 insn per cycle - 0.328433910 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622563e-01 -Avg ME (F77/C++) = 0.56225625653985389 -Relative difference = 7.729597009145631e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 35147cd718..b53646f0f6 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_19:05:21 +DATE: 2023-10-26_17:13:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.575149e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.561751e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.561751e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.419752e+01 +- 1.682900e+01 ) GeV^-2 -TOTAL : 0.447090 sec - 2,006,667,406 cycles # 3.008 GHz - 2,921,553,347 instructions # 1.46 insn per cycle - 0.724337546 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.754275e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.055276e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.055276e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 +TOTAL : 0.450227 sec + 1,995,210,844 cycles # 3.000 GHz + 2,942,950,603 instructions # 1.48 insn per cycle + 0.724345916 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,20 +72,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.387556e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.246561e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.246561e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.349385e+02 +- 2.541442e+02 ) GeV^-2 -TOTAL : 0.606579 sec - 2,519,662,089 cycles # 3.025 GHz - 3,861,788,300 instructions # 1.53 insn per cycle - 0.890689404 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.066363e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.838148e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.838148e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 +TOTAL : 0.621172 sec + 2,468,425,638 cycles # 2.898 GHz + 3,821,933,035 instructions # 1.55 insn per cycle + 0.908341434 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629328206139 -Relative difference = 0.0005414933696496947 +Avg ME (C++/CUDA) = 1.424226e-01 +Avg ME (F77/CUDA) = 0.14247488790947038 +Relative difference = 0.0003671321087409729 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -95,133 +95,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.174105e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.201186e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.201186e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 1.417963 sec - 4,402,481,360 cycles # 3.098 GHz - 12,761,539,634 instructions # 2.90 insn per cycle - 1.421844145 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 693) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.168769e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.195472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.195472e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.424772 sec + 4,406,963,349 cycles # 3.086 GHz + 12,761,462,870 instructions # 2.90 insn per cycle + 1.428790731 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.621794e-01 -Avg ME (F77/C++) = 0.56217939035956022 -Relative difference = 1.714833339642312e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.229854e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.445494e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.445494e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 0.530536 sec - 1,636,187,995 cycles # 3.064 GHz - 4,280,682,276 instructions # 2.62 insn per cycle - 0.534503101 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3709) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.621794e-01 -Avg ME (F77/C++) = 0.56217937649880412 -Relative difference = 4.180373005172264e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.470080e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.344369e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.344369e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.277101 sec - 811,784,719 cycles # 2.894 GHz - 1,833,505,900 instructions # 2.26 insn per cycle - 0.281119088 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622563e-01 -Avg ME (F77/C++) = 0.56225626625622027 -Relative difference = 6.001494295464523e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.578285e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.554614e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.554614e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.273667 sec - 760,813,920 cycles # 2.744 GHz - 1,755,015,790 instructions # 2.31 insn per cycle - 0.277772969 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622563e-01 -Avg ME (F77/C++) = 0.56225626625622027 -Relative difference = 6.001494295464523e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.356513e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.945123e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.945123e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.331761 sec - 698,245,569 cycles # 2.083 GHz - 1,248,346,490 instructions # 1.79 insn per cycle - 0.335801876 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622563e-01 -Avg ME (F77/C++) = 0.56225625653985389 -Relative difference = 7.729597009145631e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index da1ead0f77..519fa897c6 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:44:02 +DATE: 2023-10-26_16:50:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.781171e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.253056e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.376604e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 -TOTAL : 0.437609 sec - 1,935,685,013 cycles # 2.990 GHz - 2,706,812,030 instructions # 1.40 insn per cycle - 0.705999265 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.456844e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.249867e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.389367e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.437190 sec + 1,933,120,640 cycles # 2.988 GHz + 2,727,535,334 instructions # 1.41 insn per cycle + 0.706391886 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 162 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.205131e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.854735e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.976134e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 -TOTAL : 0.466415 sec - 2,068,328,114 cycles # 3.010 GHz - 2,987,091,963 instructions # 1.44 insn per cycle - 0.744952769 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.471256e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.820345e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.967386e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 +TOTAL : 0.470786 sec + 2,090,124,938 cycles # 3.012 GHz + 2,993,688,215 instructions # 1.43 insn per cycle + 0.750906447 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629328206139 -Relative difference = 0.0005414933696496947 +Avg ME (C++/CUDA) = 1.424226e-01 +Avg ME (F77/CUDA) = 0.14247488790947038 +Relative difference = 0.0003671321087409729 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.180324e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.207350e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.207350e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 1.407934 sec - 4,364,873,703 cycles # 3.093 GHz - 12,656,518,331 instructions # 2.90 insn per cycle - 1.411910028 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 644) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.185910e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.213603e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.213603e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.401164 sec + 4,362,926,983 cycles # 3.107 GHz + 12,656,529,955 instructions # 2.90 insn per cycle + 1.405093731 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 641) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.621794e-01 -Avg ME (F77/C++) = 0.56217939035956022 -Relative difference = 1.714833339642312e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.586065e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.857911e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.857911e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683421e+01 ) GeV^-2 -TOTAL : 0.475849 sec - 1,476,787,317 cycles # 3.082 GHz - 4,120,727,484 instructions # 2.79 insn per cycle - 0.479813364 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3414) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.621794e-01 -Avg ME (F77/C++) = 0.56217937649880412 -Relative difference = 4.180373005172264e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.086145e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.608974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.608974e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.341638 sec - 1,007,266,248 cycles # 2.920 GHz - 2,124,817,247 instructions # 2.11 insn per cycle - 0.345623716 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4206) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622563e-01 -Avg ME (F77/C++) = 0.56225626625622027 -Relative difference = 6.001494295464523e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.281396e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.848165e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.848165e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.329501 sec - 970,497,596 cycles # 2.916 GHz - 2,043,945,912 instructions # 2.11 insn per cycle - 0.333450865 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4013) (512y: 9) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622563e-01 -Avg ME (F77/C++) = 0.56225626625622027 -Relative difference = 6.001494295464523e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.073132e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.415220e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.415220e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422183e+01 +- 1.683665e+01 ) GeV^-2 -TOTAL : 0.424070 sec - 856,736,633 cycles # 2.004 GHz - 1,573,705,553 instructions # 1.84 insn per cycle - 0.428192842 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2446) (512y: 16) (512z: 2998) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622563e-01 -Avg ME (F77/C++) = 0.56225625653985389 -Relative difference = 7.729597009145631e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index a1190251f1..9491ee780f 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:44:18 +DATE: 2023-10-26_16:50:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.018464e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.595321e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.946310e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.439342 sec - 1,951,736,780 cycles # 3.004 GHz - 2,769,340,845 instructions # 1.42 insn per cycle - 0.707751210 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.725041e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.498321e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.893455e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.443565 sec + 1,967,428,977 cycles # 2.989 GHz + 2,773,135,186 instructions # 1.41 insn per cycle + 0.717269875 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.803845e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.713692e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.109529e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.513063 sec - 2,243,341,028 cycles # 3.023 GHz - 3,247,351,075 instructions # 1.45 insn per cycle - 0.800783669 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.464031e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.628890e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.088772e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.528891 sec + 2,186,923,468 cycles # 2.854 GHz + 3,145,929,563 instructions # 1.44 insn per cycle + 0.823288676 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224344354681244 -Relative difference = 2.782658397826986e-07 +Avg ME (C++/CUDA) = 1.424749e-01 +Avg ME (F77/CUDA) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.141045e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.165652e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.165652e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.457233 sec - 4,537,826,727 cycles # 3.107 GHz - 12,784,913,374 instructions # 2.82 insn per cycle - 1.461163978 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.138698e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.163005e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.163005e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.460325 sec + 4,538,591,528 cycles # 3.101 GHz + 12,784,673,558 instructions # 2.82 insn per cycle + 1.464322080 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 705) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224345333966297 -Relative difference = 2.608483884671339e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.061046e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.141573e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.141573e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.815514 sec - 2,537,540,226 cycles # 3.099 GHz - 7,116,439,666 instructions # 2.80 insn per cycle - 0.819589417 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3215) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224345333966297 -Relative difference = 2.608483884671339e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.732172e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.005113e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.005113e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.460418 sec - 1,348,361,279 cycles # 2.906 GHz - 2,936,931,800 instructions # 2.18 insn per cycle - 0.464548729 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3174) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224344383339586 -Relative difference = 2.777561258016791e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.149560e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.481402e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.481402e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.416544 sec - 1,218,162,673 cycles # 2.902 GHz - 2,791,024,677 instructions # 2.29 insn per cycle - 0.420753319 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2938) (512y: 110) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224344383339586 -Relative difference = 2.777561258016791e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.487755e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.613101e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.613101e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.683117 sec - 1,240,069,199 cycles # 1.806 GHz - 1,831,774,203 instructions # 1.48 insn per cycle - 0.687252060 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1728) (512y: 114) (512z: 2312) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224344383339586 -Relative difference = 2.777561258016791e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 2a285d3003..cfbd0f5b0a 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-25_18:44:34 +DATE: 2023-10-26_16:50:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.964309e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.404642e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.725368e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.438377 sec - 1,959,656,771 cycles # 3.015 GHz - 2,794,102,152 instructions # 1.43 insn per cycle - 0.707326409 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.707059e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.441351e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.838155e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.440265 sec + 1,976,352,261 cycles # 3.014 GHz + 2,799,587,867 instructions # 1.42 insn per cycle + 0.712947532 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.773177e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.592536e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.977031e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.515907 sec - 2,236,298,354 cycles # 2.994 GHz - 3,229,465,087 instructions # 1.44 insn per cycle - 0.804040631 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.441177e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.496551e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.947056e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.520648 sec + 2,308,723,413 cycles # 3.014 GHz + 3,274,512,364 instructions # 1.42 insn per cycle + 0.823500826 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224344354681244 -Relative difference = 2.782658397826986e-07 +Avg ME (C++/CUDA) = 1.424749e-01 +Avg ME (F77/CUDA) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.106563e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.131055e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.131055e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.502633 sec - 4,502,084,245 cycles # 2.989 GHz - 12,668,944,796 instructions # 2.81 insn per cycle - 1.506842459 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 659) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.146871e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.171774e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.171774e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.450328 sec + 4,495,331,122 cycles # 3.093 GHz + 12,668,907,573 instructions # 2.82 insn per cycle + 1.454308496 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 657) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224345333966297 -Relative difference = 2.608483884671339e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.101140e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.184323e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.184323e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.800328 sec - 2,487,833,256 cycles # 3.095 GHz - 6,905,789,276 instructions # 2.78 insn per cycle - 0.804489667 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3036) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224345333966297 -Relative difference = 2.608483884671339e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.392821e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.617920e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.617920e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.504314 sec - 1,480,047,698 cycles # 2.915 GHz - 3,168,067,665 instructions # 2.14 insn per cycle - 0.508419797 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3284) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224344383339586 -Relative difference = 2.777561258016791e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.646089e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.903192e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.903192e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.470441 sec - 1,380,675,105 cycles # 2.913 GHz - 3,040,126,384 instructions # 2.20 insn per cycle - 0.474602540 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2936) (512y: 265) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224344383339586 -Relative difference = 2.777561258016791e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.746832e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.887698e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.887698e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.620483 sec - 1,249,575,366 cycles # 2.003 GHz - 2,003,971,184 instructions # 1.60 insn per cycle - 0.624635502 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1520) (512y: 202) (512z: 2499) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 5.622436e-01 -Avg ME (F77/C++) = 0.56224344383339586 -Relative difference = 2.777561258016791e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS From 7e57228a3b32b416a5b696588dd2b4d1f601a3a2 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 17:19:24 +0200 Subject: [PATCH 018/119] [oct23av] in CODEGEN, update gqttq ref file for runTest after fixing coupling order in PR #757 cp dump_SIGMA_SM_GUX_TTXUX_CPU_MadgraphTest.CompareMomentaAndME_0.txt ../../../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/test/ref/ --- .../ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt | 1026 ++++++++--------- 1 file changed, 513 insertions(+), 513 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt index dd90c94acf..d596b33ae7 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt @@ -4,7 +4,7 @@ Event 0 Batch 0 2 2.647483690509011e+02 7.527657265342380e+01 -2.528976247704283e+02 -2.163164141117315e+01 3 6.252973211776936e+02 -5.721080498766041e+02 -1.578766990348905e+01 2.518727230515587e+02 4 6.099543097714056e+02 4.968314772231802e+02 2.686852946739174e+02 -2.302410816403857e+02 - ME 3.498510462248670e-04 + ME 6.254927412618323e-05 Event 1 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -12,7 +12,7 @@ Event 1 Batch 0 2 2.542827954151951e+02 1.482213322085297e+02 -1.988618298139058e+02 -5.607271498295615e+01 3 6.883656117507998e+02 1.265478873489434e+02 5.602777828023585e+02 3.793700749224233e+02 4 5.573515928340058e+02 -2.747692195574731e+02 -3.614159529884527e+02 -3.232973599394667e+02 - ME 7.257243108248426e-04 + ME 8.120933129385430e-05 Event 2 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -20,7 +20,7 @@ Event 2 Batch 0 2 4.301460683791099e+02 -3.656995432079240e+02 -2.257802895903974e+02 -1.768459985405173e+01 3 5.058528987551350e+02 2.755467101243707e+02 -2.034821274188550e+02 3.722313656043856e+02 4 5.640010328657550e+02 9.015283308355326e+01 4.292624170092524e+02 -3.545467657503340e+02 - ME 8.130044127338102e-04 + ME 1.104115154253218e-04 Event 3 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -28,7 +28,7 @@ Event 3 Batch 0 2 6.758793342627306e+02 1.455349847705337e+02 4.360940220328824e+02 -4.954335945799966e+02 3 3.008019460079605e+02 -1.607139834787174e+02 2.732727402256846e+01 2.527964523704278e+02 4 5.233187197293092e+02 1.517899870818368e+01 -4.634212960554508e+02 2.426371422095687e+02 - ME 7.753277710143621e-05 + ME 4.288074098478053e-05 Event 4 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -36,7 +36,7 @@ Event 4 Batch 0 2 3.540811678028369e+02 5.414642718170588e+01 -3.497885023717100e+02 -9.467915537920108e+00 3 7.415000547748695e+02 1.453779348794601e+00 7.277337852109665e+02 1.422102514562805e+02 4 4.044187774222938e+02 -5.560020653050046e+01 -3.779452828392566e+02 -1.327423359183605e+02 - ME 2.015528729476554e-04 + ME 1.304731284254719e-05 Event 5 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -44,7 +44,7 @@ Event 5 Batch 0 2 4.747467875786874e+02 2.462969907607520e+02 3.713870243947702e+02 1.636886763636381e+02 3 3.438196236093862e+02 -2.056491112573935e+02 2.636029701703988e+02 8.021128807897365e+01 4 6.814335888119255e+02 -4.064787950335840e+01 -6.349899945651691e+02 -2.438999644426124e+02 - ME 6.140777519977192e-04 + ME 1.932390649640220e-04 Event 6 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -52,7 +52,7 @@ Event 6 Batch 0 2 5.623951200922340e+02 4.644673798421034e+02 3.089047820108764e+02 -7.166700647426805e+01 3 2.268243199894467e+02 1.761899852590787e+02 -7.114332369064562e+01 -1.238748914321566e+02 4 7.107805599183188e+02 -6.406573651011822e+02 -2.377614583202307e+02 1.955418979064247e+02 - ME 8.375373201653861e-04 + ME 1.929702539767979e-04 Event 7 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -60,7 +60,7 @@ Event 7 Batch 0 2 4.922243378496302e+02 2.878585072835456e+02 -1.441537488072182e+02 -3.723465794939189e+02 3 2.873990637609374e+02 -5.400981623596619e+01 -8.913204919452846e+01 -2.678369642286231e+02 4 7.203765983894325e+02 -2.338486910475794e+02 2.332857980017467e+02 6.401835437225419e+02 - ME 2.045598717079573e-03 + ME 6.280412585349807e-04 Event 8 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -68,7 +68,7 @@ Event 8 Batch 0 2 3.353309706037128e+02 -7.529439061162444e+01 -4.917829145606096e+01 -3.230466069128648e+02 3 7.169322705461503e+02 -1.597426278178964e+02 -1.460012137440150e+01 6.987567601563110e+02 4 4.477367588501368e+02 2.350370184295208e+02 6.377841283046249e+01 -3.757101532434461e+02 - ME 5.176104304710922e-03 + ME 1.424871539111113e-03 Event 9 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -76,7 +76,7 @@ Event 9 Batch 0 2 2.557626120875720e+02 2.000882245504951e+02 -5.276260741790070e+01 -1.503174088272977e+02 3 7.044202058180884e+02 -6.969679478438196e+02 -1.019614549623775e+02 6.882422911146106e+00 4 5.398171820943397e+02 4.968797232933244e+02 1.547240623802783e+02 1.434349859161515e+02 - ME 6.498215193902510e-05 + ME 1.126010180174107e-05 Event 10 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -84,7 +84,7 @@ Event 10 Batch 0 2 3.466796552973448e+02 1.172124288883391e+02 -1.804077050554743e+02 2.718475489457261e+02 3 5.174471655316495e+02 -1.610456139025784e+02 -4.497410659869822e+02 -1.988689340353916e+02 4 6.358731791710053e+02 4.383318501423926e+01 6.301487710424565e+02 -7.297861491033444e+01 - ME 2.111165581639245e-04 + ME 8.292383053707579e-05 Event 11 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -92,7 +92,7 @@ Event 11 Batch 0 2 5.730783827248506e+02 -3.059484875398849e+01 3.466457017175528e+02 -4.553235612803233e+02 3 4.410994673708892e+02 -3.026218886155176e+02 -1.990641070399019e+01 3.203005892260318e+02 4 4.858221499042607e+02 3.332167373695061e+02 -3.267392910135624e+02 1.350229720542913e+02 - ME 5.129802099928076e-05 + ME 2.195851954305949e-05 Event 12 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -100,7 +100,7 @@ Event 12 Batch 0 2 2.275003875859171e+02 -1.247450244086003e+02 1.654605359856639e+02 9.390376067217456e+01 3 6.138170466352969e+02 3.363961838598331e+02 -2.139358085817026e+01 5.129827374509639e+02 4 6.586825657787861e+02 -2.116511594512328e+02 -1.440669551274935e+02 -6.068864981231385e+02 - ME 5.249882090061186e-02 + ME 3.843244876666358e-03 Event 13 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -108,7 +108,7 @@ Event 13 Batch 0 2 2.867684047377951e+02 7.055192702127012e+01 -2.028354730671929e+02 1.900429278217245e+02 3 6.990707050557395e+02 -5.605742285334717e+02 2.413419117565430e+02 -3.408965629057132e+02 4 5.141608902064654e+02 4.900223015122016e+02 -3.850643868935023e+01 1.508536350839886e+02 - ME 6.422048006176975e-05 + ME 1.780264803426774e-05 Event 14 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -116,7 +116,7 @@ Event 14 Batch 0 2 3.551549262960330e+02 1.090410064132905e+02 3.205839746298526e+02 1.071027348074892e+02 3 5.276349775014137e+02 3.895763694332612e+02 -2.529209653865598e+02 2.503196099590423e+02 4 6.172100962025531e+02 -4.986173758465519e+02 -6.766300924329285e+01 -3.574223447665315e+02 - ME 7.422587439250419e-04 + ME 1.172793340377339e-04 Event 15 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -124,7 +124,7 @@ Event 15 Batch 0 2 5.846731991828425e+02 7.106081559720657e+01 3.900476102503054e+02 4.297161529048979e+02 3 2.829885923647302e+02 -2.767806781033229e+02 5.223342094943639e+01 -2.732525156618249e+01 4 6.323382084524278e+02 2.057198625061163e+02 -4.422810311997417e+02 -4.023909013387152e+02 - ME 1.255922738422332e-03 + ME 2.768931482482754e-04 Event 16 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -132,7 +132,7 @@ Event 16 Batch 0 2 7.471577506095512e+02 1.666056475215676e+02 -5.784682380714994e+02 -4.425627187781379e+02 3 6.589296733908160e+02 -1.235441202519038e+02 5.251239647671507e+02 3.783780998595698e+02 4 9.391257599963087e+01 -4.306152726966400e+01 5.334427330434855e+01 6.418461891856485e+01 - ME 5.526726502577864e-05 + ME 3.619360847906487e-05 Event 17 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -140,7 +140,7 @@ Event 17 Batch 0 2 3.567490993131759e+02 3.856364495163717e+01 -1.708845728849435e+02 -3.107752047682324e+02 3 6.453207560475681e+02 4.468356462873772e+02 2.282834847349605e+02 4.057874246326636e+02 4 4.979301446392561e+02 -4.853992912390142e+02 -5.739891185001719e+01 -9.501221986443127e+01 - ME 1.327369996555111e-04 + ME 3.400819398697452e-05 Event 18 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -148,7 +148,7 @@ Event 18 Batch 0 2 4.856701782481425e+02 2.509110753153842e+02 -3.498523763974107e+02 -2.247720379690150e+02 3 3.014847498930008e+02 -1.059425909901355e+02 -2.435847754696140e+02 -1.426032222348426e+02 4 7.128450718588564e+02 -1.449684843252488e+02 5.934371518670247e+02 3.673752602038576e+02 - ME 1.018512933050835e-03 + ME 1.704840743724005e-04 Event 19 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -156,7 +156,7 @@ Event 19 Batch 0 2 5.848213503304410e+02 -3.141116763848333e+02 -1.950442390378232e+02 4.531088295091878e+02 3 5.769300027107226e+02 5.020221748138873e+02 2.252239828724832e+02 -1.734823378963534e+02 4 3.382486469588368e+02 -1.879104984290540e+02 -3.017974383465995e+01 -2.796264916128346e+02 - ME 4.267017342507976e-03 + ME 1.566312636528492e-04 Event 20 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -164,7 +164,7 @@ Event 20 Batch 0 2 5.550938429889906e+02 -4.478597170519693e+02 -1.958065402362923e+02 -2.630791652090858e+02 3 5.585686897587655e+02 3.351111310173187e+02 -1.360174455686903e+02 4.256744830831253e+02 4 3.863374672522434e+02 1.127485860346507e+02 3.318239858049826e+02 -1.625953178740396e+02 - ME 2.768271682113988e-04 + ME 4.443882992804106e-05 Event 21 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -172,7 +172,7 @@ Event 21 Batch 0 2 6.296556563991993e+02 -3.477135312394776e+02 -1.376147989324512e+02 -5.065804111325866e+02 3 3.137568007204202e+02 1.080474571851863e+02 -2.382188236683311e+02 1.732653140250679e+02 4 5.565875428803801e+02 2.396660740542913e+02 3.758336226007823e+02 3.333150971075189e+02 - ME 5.519034669639832e-05 + ME 2.195742323347977e-05 Event 22 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -180,7 +180,7 @@ Event 22 Batch 0 2 5.583338925767162e+02 2.471586228668332e+02 -1.597599499756147e+02 -4.744745610949311e+02 3 5.378723432497920e+02 9.149532098241385e+00 4.314513680009925e+02 3.210493120152684e+02 4 4.037937641734921e+02 -2.563081549650745e+02 -2.716914180253778e+02 1.534252490796627e+02 - ME 3.705224437539572e-05 + ME 1.393143104564022e-05 Event 23 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -188,7 +188,7 @@ Event 23 Batch 0 2 6.057340011976822e+02 6.848115528115159e+01 -5.207204912425279e+02 -3.017849923015605e+02 3 6.884459352783615e+02 -2.949639632364767e+01 6.680977958792448e+02 1.635026102131439e+02 4 2.058200635239559e+02 -3.898475895750391e+01 -1.473773046367171e+02 1.382823820884168e+02 - ME 2.946248744974782e-05 + ME 1.074117284514867e-05 Event 24 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -196,7 +196,7 @@ Event 24 Batch 0 2 4.702316790647315e+02 -1.210575128627593e+02 4.313728504035306e+02 -1.427598490831810e+02 3 7.180482366151732e+02 1.040047389253588e+02 -7.104588047260974e+02 4.956931953573291e+00 4 3.117200843200960e+02 1.705277393740069e+01 2.790859543225674e+02 1.378029171296075e+02 - ME 3.146557994448562e-05 + ME 5.213387311993420e-06 Event 25 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -204,7 +204,7 @@ Event 25 Batch 0 2 6.261365010744016e+02 -5.354018140499276e+02 -2.095559720530078e+02 2.479477970595020e+02 3 5.483958991041942e+02 5.199465180092641e+02 -9.843995208133505e+01 -1.438862620216537e+02 4 3.254675998214045e+02 1.545529604066345e+01 3.079959241343431e+02 -1.040615350378483e+02 - ME 1.657640191611339e-04 + ME 1.695323153210731e-05 Event 26 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -212,7 +212,7 @@ Event 26 Batch 0 2 4.635816356180677e+02 1.904702824079147e+02 -2.351549941335565e+02 -3.511853259118595e+02 3 3.686385821486527e+02 -2.712527815845713e+02 -6.015354190959191e+01 -2.422764621809819e+02 4 6.677797822332798e+02 8.078249917665664e+01 2.953085360431485e+02 5.934617880928415e+02 - ME 3.250975879010065e-04 + ME 1.052251904460155e-04 Event 27 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -220,7 +220,7 @@ Event 27 Batch 0 2 2.851713673150520e+02 1.387976072955998e+02 1.520424011317634e+02 -1.973348453858079e+02 3 6.747356481771329e+02 2.426633222154767e+02 -4.300238522839811e+02 4.598501858640580e+02 4 5.400929845078149e+02 -3.814609295110765e+02 2.779814511522176e+02 -2.625153404782502e+02 - ME 4.155279516527712e-04 + ME 7.957109124083736e-05 Event 28 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -228,7 +228,7 @@ Event 28 Batch 0 2 1.977804200471008e+02 -1.803202618401224e+02 -8.082809162516925e+01 -8.277519444290659e+00 3 7.197523834069627e+02 3.152541965091956e+02 6.467033971658861e+02 -2.080867841663842e+01 4 5.824671965459364e+02 -1.349339346690732e+02 -5.658753055407169e+02 2.908619786092899e+01 - ME 1.172809031809504e-04 + ME 1.748013159755222e-05 Event 29 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -236,7 +236,7 @@ Event 29 Batch 0 2 6.123364628491765e+02 -3.746492624245139e+02 3.785128791537567e+02 -3.021950929683376e+02 3 4.056577755659300e+02 1.796205570313495e+00 -8.781658530568643e+01 3.960344074293251e+02 4 4.820057615848937e+02 3.728530568542006e+02 -2.906962938480702e+02 -9.383931446098750e+01 - ME 5.496242925842306e-04 + ME 3.085570985177973e-04 Event 30 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -244,7 +244,7 @@ Event 30 Batch 0 2 7.349194950356053e+02 7.241679607953656e+02 1.425637322816703e+01 1.244354634469208e+02 3 7.321421454671275e+02 -7.253765693071590e+02 -2.895970851972107e+01 -9.498573130653318e+01 4 3.293835949726734e+01 1.208608511793152e+00 1.470333529155409e+01 -2.944973214038765e+01 - ME 5.147061682527938e-02 + ME 3.267107835672361e-04 Event 31 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -252,7 +252,7 @@ Event 31 Batch 0 2 1.718338270585457e+02 -1.344914872264095e+02 -1.021614404532311e+02 3.165350011824393e+01 3 6.313115253715935e+02 -2.849940593920691e+02 -7.916450257599642e+01 -5.577325610990745e+02 4 6.968546475698608e+02 4.194855466184786e+02 1.813259430292275e+02 5.260790609808306e+02 - ME 4.645345268703414e-04 + ME 1.685680846028125e-04 Event 32 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -260,7 +260,7 @@ Event 32 Batch 0 2 7.235176898898732e+02 -4.762113006241282e+02 -2.880822916693121e+01 5.439400065022983e+02 3 6.603902828461299e+02 4.672103814637360e+02 1.031050210016798e+02 -4.551913221650266e+02 4 1.160920272639969e+02 9.000919160392018e+00 -7.429679183474862e+01 -8.874868433727177e+01 - ME 4.476006843186700e-03 + ME 2.173072900368875e-04 Event 33 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -268,7 +268,7 @@ Event 33 Batch 0 2 4.786737271642286e+02 2.009638309376703e+02 4.090184839380260e+02 1.464443769121513e+02 3 3.795793219608408e+02 -6.057523839522271e+00 -8.244277697544294e+01 3.704685635647950e+02 4 6.417469508749314e+02 -1.949063070981495e+02 -3.265757069625828e+02 -5.169129404769461e+02 - ME 1.351709676586880e-02 + ME 3.322437827682699e-03 Event 34 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -276,7 +276,7 @@ Event 34 Batch 0 2 6.621583515140109e+02 -5.051303032557109e+02 -1.429543729176959e+02 4.035605363216953e+02 3 3.008522892707525e+02 8.677543723835062e+01 2.726747894692539e+02 -9.290092916351111e+01 4 5.369893592152367e+02 4.183548660173603e+02 -1.297204165515579e+02 -3.106596071581844e+02 - ME 6.460854093057828e-04 + ME 9.294666462955388e-05 Event 35 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -284,7 +284,7 @@ Event 35 Batch 0 2 6.158114977149372e+02 2.502256147979830e+02 4.233348779616202e+00 5.626659943296695e+02 3 1.476397433483021e+02 -1.670550278282843e+01 -6.055370982200890e+01 1.336101351676488e+02 4 7.365487589367605e+02 -2.335201120151546e+02 5.632036104239269e+01 -6.962761294973184e+02 - ME 2.101231899117793e+00 + ME 5.450893768264864e-01 Event 36 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -292,7 +292,7 @@ Event 36 Batch 0 2 7.182456511154913e+02 -7.463771462544163e+01 -6.667773110518942e+02 2.563475070450518e+02 3 4.860008755751825e+02 -7.840660561780868e+01 4.141081959217036e+02 -2.419992919944378e+02 4 2.957534733093268e+02 1.530443202432501e+02 2.526691151301903e+02 -1.434821505061448e+01 - ME 9.644531209480271e-05 + ME 1.793136635525090e-05 Event 37 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -300,7 +300,7 @@ Event 37 Batch 0 2 5.672182018814327e+02 -2.031706828392718e+00 -5.267408190306547e+02 2.104197478372323e+02 3 4.664069288608281e+02 3.712365792892206e+02 2.604523782658950e+02 -1.090109358856581e+02 4 4.663748692577387e+02 -3.692048724608279e+02 2.662884407647597e+02 -1.014088119515743e+02 - ME 1.216876552012178e-04 + ME 1.885829354904198e-05 Event 38 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -308,7 +308,7 @@ Event 38 Batch 0 2 5.068057345787187e+02 4.883513201966852e+02 -7.570036138649985e+01 -1.124032737511800e+02 3 3.871140338254017e+02 -1.153787089711745e+02 -3.599073977747533e+02 -8.373585688177315e+01 4 6.060802315958797e+02 -3.729726112255107e+02 4.356077591612532e+02 1.961391306329531e+02 - ME 1.006736553113524e-04 + ME 2.004468492837133e-05 Event 39 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -316,7 +316,7 @@ Event 39 Batch 0 2 4.960337392567769e+02 -3.669089247616476e+02 2.651961920161227e+02 -2.027271347192069e+02 3 2.837821967046824e+02 -2.822567153069604e+02 -2.935613327724534e+01 -1.303560381865560e+00 4 7.201840640385411e+02 6.491656400686079e+02 -2.358400587388775e+02 2.040306951010725e+02 - ME 1.372807525012575e-03 + ME 2.738639406673165e-04 Event 40 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -324,7 +324,7 @@ Event 40 Batch 0 2 3.080730228651936e+02 -3.065830270999447e+02 -2.484308296331460e+01 1.728167064871203e+01 3 6.842346640746094e+02 4.630487823766367e+02 8.554554725666550e+01 -4.964321303112498e+02 4 5.076923130601962e+02 -1.564657552766919e+02 -6.070246429335075e+01 4.791504596625378e+02 - ME 4.192363154074847e-05 + ME 4.316353181637933e-05 Event 41 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -332,7 +332,7 @@ Event 41 Batch 0 2 1.602650851118221e+02 -1.258781096038287e+02 -9.817642232798531e+01 1.417706342452912e+01 3 7.146392966623014e+02 6.799675591776853e+02 -1.019163870176435e+02 1.948499239342933e+02 4 6.250956182258764e+02 -5.540894495738563e+02 2.000928093456288e+02 -2.090269873588226e+02 - ME 4.523507186168379e-04 + ME 6.118266190948034e-05 Event 42 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -340,7 +340,7 @@ Event 42 Batch 0 2 1.687893235969910e+02 1.289401357197518e+02 4.788693514682045e+01 9.783209393213438e+01 3 7.042017295435162e+02 -1.022058447296739e+02 -6.640064324330017e+02 -2.110675220936915e+02 4 6.270089468594927e+02 -2.673429099007782e+01 6.161194972861812e+02 1.132354281615572e+02 - ME 1.686356189272381e-04 + ME 4.091574289077424e-05 Event 43 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -348,7 +348,7 @@ Event 43 Batch 0 2 4.729783670130408e+02 -7.983817933050123e+01 9.052957805204315e+01 4.573169538528310e+02 3 5.638402597824536e+02 4.785250044669658e+02 7.435095949863268e+01 -2.887933404236804e+02 4 4.631813732045056e+02 -3.986868251364646e+02 -1.648805375506758e+02 -1.685236134291506e+02 - ME 5.938757690519573e-04 + ME 2.654067897204875e-04 Event 44 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -356,7 +356,7 @@ Event 44 Batch 0 2 1.774791104122977e+02 -1.952605982635784e+01 6.371003613266313e+01 1.644949814321787e+02 3 7.194816205691247e+02 -3.678871192485065e+02 2.644831693887214e+01 -6.177486190667772e+02 4 6.030392690185777e+02 3.874131790748646e+02 -9.015835307153536e+01 4.532536376345985e+02 - ME 2.092333697371024e-04 + ME 1.390282437939369e-04 Event 45 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -364,7 +364,7 @@ Event 45 Batch 0 2 7.477488480180839e+02 -3.787655987618923e+02 1.634662296474455e+02 6.236535517992064e+02 3 7.458113398274099e+02 3.819163358711198e+02 -1.661042992235261e+02 -6.186952632673017e+02 4 6.439812154506046e+00 -3.150737109227506e+00 2.638069576080606e+00 -4.958288531904773e+00 - ME 9.377954359926730e-02 + ME 4.591622113024210e-03 Event 46 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -372,7 +372,7 @@ Event 46 Batch 0 2 3.243146757688279e+02 -4.392587631431587e+00 -2.496903827548322e+02 -2.069188895501946e+02 3 5.341608950426614e+02 -2.704482657861201e+02 2.711825143656835e+02 -3.723515022507137e+02 4 6.415244291885106e+02 2.748408534175518e+02 -2.149213161085120e+01 5.792703918009084e+02 - ME 1.879047912263320e-04 + ME 7.845213441237594e-05 Event 47 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -380,7 +380,7 @@ Event 47 Batch 0 2 6.742198761450968e+02 -3.282965096491567e+02 5.301803926793563e+02 -2.563251730900704e+02 3 6.484148720042493e+02 3.527030795571956e+02 -3.975273148506379e+02 3.715029176935211e+02 4 1.773652518506536e+02 -2.440656990803885e+01 -1.326530778287185e+02 -1.151777446034508e+02 - ME 1.136665455996279e-03 + ME 5.254395938575492e-05 Event 48 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -388,7 +388,7 @@ Event 48 Batch 0 2 7.321401810535270e+02 -1.843482647928687e+02 4.412348098999295e+02 5.543976952635381e+02 3 7.293058265076229e+02 2.182722651304250e+02 -4.435200216702997e+02 -5.362221528717154e+02 4 3.855399243885009e+01 -3.392400033755636e+01 2.285211770370227e+00 -1.817554239182278e+01 - ME 2.278442596973106e-03 + ME 2.330290263553363e-04 Event 49 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -396,7 +396,7 @@ Event 49 Batch 0 2 3.511117284856090e+02 -3.272266866652174e+02 5.199533974843238e+01 1.161835877338140e+02 3 7.326526490901410e+02 6.615045961628415e+02 -2.993354007364775e+02 -9.792799058578566e+01 4 4.162356224242500e+02 -3.342779094976241e+02 2.473400609880451e+02 -1.825559714802838e+01 - ME 8.806759903737244e-05 + ME 7.863589115869630e-06 Event 50 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -404,7 +404,7 @@ Event 50 Batch 0 2 7.322170903075255e+02 2.740692406080844e+02 1.952596610981929e+01 -6.787095515302592e+02 3 3.078559130669522e+02 -1.663333363406682e+02 8.625456119089935e+01 2.442716420418760e+02 4 4.599269966255216e+02 -1.077359042674159e+02 -1.057805273007185e+02 4.344379094883832e+02 - ME 7.579426018596712e-05 + ME 6.765758192049922e-05 Event 51 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -412,7 +412,7 @@ Event 51 Batch 0 2 3.473696038265160e+02 -2.922314643158454e+02 -6.759614889845234e+01 -1.752060888796554e+02 3 5.389399151999496e+02 -2.449040872454050e+02 9.346474502284556e+01 4.708954891311219e+02 4 6.136904809735339e+02 5.371355515612503e+02 -2.586859612439322e+01 -2.956894002514666e+02 - ME 4.687828430739845e-04 + ME 2.035652280642710e-04 Event 52 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -420,7 +420,7 @@ Event 52 Batch 0 2 6.818614816439094e+02 5.970116833066725e+02 3.013730734325877e+02 1.329902280423528e+02 3 2.108623144448950e+02 -4.198344769951654e+00 -1.698802183673395e+02 -1.248439063859965e+02 4 6.072762039111957e+02 -5.928133385367207e+02 -1.314928550652483e+02 -8.146321656356344e+00 - ME 1.636869658416981e-04 + ME 4.047005152694340e-05 Event 53 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -428,7 +428,7 @@ Event 53 Batch 0 2 5.157714002491656e+02 -5.140718537651751e+02 -4.182413977701254e+01 1.003899065692042e+00 3 5.148181840855221e+02 2.868792199999327e+02 1.974924151010656e+02 3.791237552236646e+02 4 4.694104156653124e+02 2.271926337652422e+02 -1.556682753240530e+02 -3.801276542893567e+02 - ME 3.182294022992135e-03 + ME 1.547751010871262e-04 Event 54 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -436,7 +436,7 @@ Event 54 Batch 0 2 6.433410767101752e+02 2.586883950027282e+02 -5.809813083922761e+02 9.710187728524583e+01 3 6.928799734080563e+02 -1.579832568796111e+02 6.405510983559769e+02 -2.117031848853746e+02 4 1.637789498817686e+02 -1.007051381231171e+02 -5.956978996370073e+01 1.146013076001288e+02 - ME 3.280140142776471e-05 + ME 1.302720215079095e-05 Event 55 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -444,7 +444,7 @@ Event 55 Batch 0 2 7.193759752058201e+02 -3.536444481659258e+02 -7.212523476050659e+01 -6.222823703878202e+02 3 5.307053661742267e+02 2.409461639849982e+02 1.900944302490854e+02 4.329633233142391e+02 4 2.499186586199529e+02 1.126982841809279e+02 -1.179691954885788e+02 1.893190470735813e+02 - ME 3.939174164528502e-05 + ME 3.087450123310173e-05 Event 56 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -452,7 +452,7 @@ Event 56 Batch 0 2 3.858864959547013e+02 1.815174721437793e+02 3.218581876578407e+02 -1.112074732396182e+02 3 4.484505297447187e+02 -3.244105157450006e+02 2.934585578803474e+02 -9.873079412811623e+01 4 6.656629743005793e+02 1.428930436012212e+02 -6.153167455381879e+02 2.099382673677345e+02 - ME 2.326138625268126e-04 + ME 4.275995533811995e-05 Event 57 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -460,7 +460,7 @@ Event 57 Batch 0 2 5.284589752749192e+02 3.868194647882293e+02 -1.709996888155517e+02 3.168575336559793e+02 3 6.299868555278971e+02 -1.587414880613579e+02 2.327134172236622e+02 -5.634971548731005e+02 4 3.415541691971835e+02 -2.280779767268714e+02 -6.171372840811043e+01 2.466396212171210e+02 - ME 3.474853710074164e-05 + ME 2.211478424702745e-05 Event 58 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -468,7 +468,7 @@ Event 58 Batch 0 2 6.172037319760957e+02 -2.246119436411400e+02 -2.286037628748728e+01 5.744278237820342e+02 3 5.117934503257735e+02 1.262762853074207e+02 3.215736628881853e+02 -3.775939815489577e+02 4 3.710028176981306e+02 9.833565833371921e+01 -2.987132866006979e+02 -1.968338422330765e+02 - ME 6.183305374210038e-04 + ME 1.857727050583390e-04 Event 59 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -476,7 +476,7 @@ Event 59 Batch 0 2 7.388935626701858e+02 -3.912134623809441e+02 -5.457789630286015e+02 3.082872805076099e+02 3 1.936051438730608e+02 1.561492575196544e+02 8.304673385628061e+01 -7.876294246644987e+01 4 5.675012934567535e+02 2.350642048612896e+02 4.627322291723209e+02 -2.295243380411600e+02 - ME 4.116991424436793e-04 + ME 6.745345781245190e-05 Event 60 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -484,7 +484,7 @@ Event 60 Batch 0 2 7.258141426633659e+02 -5.584991156701968e+02 1.635894950857984e+02 4.337319270970709e+02 3 2.789580074371136e+02 2.331554478032953e+02 6.512410160032128e+01 -1.386180308029247e+02 4 4.952278498995201e+02 3.253436678669015e+02 -2.287135966861195e+02 -2.951138962941461e+02 - ME 7.295672680059989e-04 + ME 9.170244877267536e-05 Event 61 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -492,15 +492,15 @@ Event 61 Batch 0 2 5.906141202026897e+02 4.485275282318680e+02 -2.043613424290570e+02 3.253990429020988e+02 3 4.163572165237975e+02 -4.021600557528675e+02 -4.112755461437413e+01 9.964509802161204e+01 4 4.930286632735124e+02 -4.636747247900051e+01 2.454888970434311e+02 -4.250441409237108e+02 - ME 5.845307122272604e-03 + ME 1.836685601489136e-04 Event 62 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 1 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 -7.500000000000000e+02 2 7.346180891175762e+02 3.693463141798367e+02 7.549194961263061e+01 -6.305140780380819e+02 3 4.420621433230785e+02 -2.806743363126464e+02 3.467380983154045e+01 3.397625382625571e+02 - 4 3.233197675593453e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755249e+02 - ME 3.963631774242112e-05 + 4 3.233197675593452e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755248e+02 + ME 3.490896135533686e-05 Event 63 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -508,7 +508,7 @@ Event 63 Batch 0 2 6.451039732729313e+02 -2.415045377667665e+02 1.990362537024482e+02 -5.641092662620230e+02 3 3.260870385294104e+02 2.061141051805976e+02 -2.496695602716584e+02 3.892098426606745e+01 4 5.288089881976584e+02 3.539043258616898e+01 5.063330656921013e+01 5.251882819959555e+02 - ME 4.832224458906289e-04 + ME 4.428689394331114e-04 Event 64 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -516,7 +516,7 @@ Event 64 Batch 0 2 5.275973380665291e+02 -6.064553482667328e+01 4.309976929667101e+02 -2.981980196075213e+02 3 5.799838776791826e+02 3.279821268626862e+02 -1.824214634122377e+02 4.421893627315650e+02 4 3.924187842542880e+02 -2.673365920360130e+02 -2.485762295544724e+02 -1.439913431240437e+02 - ME 2.175617604507715e-04 + ME 4.205989960223865e-05 Event 65 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -524,7 +524,7 @@ Event 65 Batch 0 2 6.480172869826541e+02 2.720879118036237e+02 -5.153900904044360e+02 -2.833154199679406e+02 3 7.075023253568394e+02 -3.440299289242928e+02 4.709796137500282e+02 4.004761563708322e+02 4 1.444803876605064e+02 7.194201712066916e+01 4.441047665440794e+01 -1.171607364028916e+02 - ME 4.989956280474397e-03 + ME 1.103463366798231e-04 Event 66 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -532,7 +532,7 @@ Event 66 Batch 0 2 5.472978185025795e+02 4.857452785131266e+02 -2.223654169683454e+02 -1.189119332799752e+02 3 3.203062148499983e+02 1.169702135976477e+02 2.922172461416276e+02 -5.935588816501102e+01 4 6.323959666474225e+02 -6.027154921107744e+02 -6.985182917328234e+01 1.782678214449862e+02 - ME 1.346850069104626e-04 + ME 2.913920636000223e-05 Event 67 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -540,7 +540,7 @@ Event 67 Batch 0 2 4.264671493042950e+02 1.195959046886511e+02 -2.647539231733031e+02 3.122121220929446e+02 3 5.059969655247565e+02 3.777175441887567e+02 -7.608313561896731e+00 -3.366073372596325e+02 4 5.675358851709483e+02 -4.973134488774080e+02 2.723622367352000e+02 2.439521516668857e+01 - ME 9.763221977220593e-05 + ME 4.009347519102052e-05 Event 68 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -548,7 +548,7 @@ Event 68 Batch 0 2 5.996105691520872e+02 -3.814725562071957e+02 -3.417794545715573e+02 3.117664637712124e+02 3 2.164196744806214e+02 1.292759463548889e+02 -1.184749651041615e+02 1.268419798013013e+02 4 6.839697563672917e+02 2.521966098523068e+02 4.602544196757188e+02 -4.386084435725137e+02 - ME 2.936083529685707e-03 + ME 6.175473672610461e-04 Event 69 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -556,7 +556,7 @@ Event 69 Batch 0 2 4.950546755511076e+02 -1.873718558932053e+02 -4.578972175289678e+02 -1.735101101888631e+01 3 4.768584394819691e+02 -1.830244097668608e+02 2.985566003539791e+02 -3.236664843936508e+02 4 5.280868849669230e+02 3.703962656600661e+02 1.593406171749887e+02 3.410174954125370e+02 - ME 5.234212626720279e-05 + ME 1.367292435278724e-05 Event 70 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -564,7 +564,7 @@ Event 70 Batch 0 2 6.918343395272258e+02 6.895733556028865e+02 -5.391072441382606e+01 -1.473005040127906e+01 3 2.169590284692678e+02 -1.127375202028747e+02 1.807969800614662e+02 4.091361110301506e+01 4 5.912066320035063e+02 -5.768358354000119e+02 -1.268862556476402e+02 -2.618356070173603e+01 - ME 1.591740981760110e-04 + ME 3.526540789264872e-05 Event 71 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -572,7 +572,7 @@ Event 71 Batch 0 2 5.156371334918733e+02 1.547202099034306e+02 -4.807172487652236e+02 1.041836686949964e+02 3 3.718518305526428e+02 -8.969821893462726e+01 -7.521366892975188e+01 -3.529460545344468e+02 4 6.125110359554843e+02 -6.502199096880338e+01 5.559309176949756e+02 2.487623858394504e+02 - ME 1.125100552069616e-04 + ME 2.860782472746935e-05 Event 72 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -580,7 +580,7 @@ Event 72 Batch 0 2 2.110577464974889e+02 5.009520239746097e+01 -1.453533690489527e+02 -1.445968227848547e+02 3 7.317124633441161e+02 -4.429659627226336e+02 5.264774879404380e+02 2.490095170354977e+02 4 5.572297901583943e+02 3.928707603251725e+02 -3.811241188914850e+02 -1.044126942506430e+02 - ME 1.823320413479066e-04 + ME 2.666441446531882e-05 Event 73 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -588,7 +588,7 @@ Event 73 Batch 0 2 3.932257450488246e+02 3.105005764664288e+01 -2.932679039283983e+02 2.601082794045340e+02 3 5.658879124646472e+02 3.645905401293642e+02 4.244364556305355e+02 8.459646951004230e+01 4 5.408863424865281e+02 -3.956405977760074e+02 -1.311685517021372e+02 -3.447047489145762e+02 - ME 8.953763196089171e-04 + ME 7.825486685913998e-05 Event 74 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -596,7 +596,7 @@ Event 74 Batch 0 2 1.374854102925440e+02 7.785209805930555e+01 4.289805712042688e+01 1.048858692406466e+02 3 6.381281910764947e+02 -1.004137270491618e+02 -1.591026937267357e+02 6.097630724433484e+02 4 7.243863986309617e+02 2.256162898985645e+01 1.162046366063089e+02 -7.146489416839951e+02 - ME 1.395531292378326e+01 + ME 1.919068868336380e+00 Event 75 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -604,7 +604,7 @@ Event 75 Batch 0 2 5.936883054156938e+02 -3.438525101293572e+00 -2.706855443967301e+02 5.283780053968293e+02 3 5.912298912592892e+02 1.109657062166288e+02 4.832067437414102e+02 -3.221034603433170e+02 4 3.150818033250173e+02 -1.075271811153352e+02 -2.125211993446803e+02 -2.062745450535123e+02 - ME 1.379908325625592e-03 + ME 1.642862842910461e-04 Event 76 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -612,7 +612,7 @@ Event 76 Batch 0 2 6.619486867997672e+02 2.801967015359571e+01 2.136411519593737e+02 6.258980909300584e+02 3 1.201252731414031e+02 2.274423842261747e+01 -8.754996679960182e+01 7.904292618103446e+01 4 7.179260400588295e+02 -5.076390857621322e+01 -1.260911851597719e+02 -7.049410171110928e+02 - ME 5.870483941147637e+00 + ME 7.362202483972824e-01 Event 77 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -620,7 +620,7 @@ Event 77 Batch 0 2 7.456676259451606e+02 -7.346624001550109e+02 6.511229493320701e+01 -1.097804865615983e+02 3 1.284204120828029e+02 1.251494694834492e+02 2.867183268690428e+01 2.708973588335753e+00 4 6.259119619720373e+02 6.095129306715618e+02 -9.378412762011118e+01 1.070715129732624e+02 - ME 1.662775178233579e-04 + ME 4.400761364703354e-05 Event 78 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -628,7 +628,7 @@ Event 78 Batch 0 2 7.040158920877628e+02 6.911264613612161e+02 -6.659640240533211e+01 -1.163937709034254e+02 3 5.185438503615327e+02 -4.976050220224222e+02 -1.270913363611937e+02 7.158742227342900e+01 4 2.774402575507044e+02 -1.935214393387939e+02 1.936877387665258e+02 4.480634862999637e+01 - ME 5.328004946641866e-05 + ME 9.352750539306009e-06 Event 79 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -636,7 +636,7 @@ Event 79 Batch 0 2 6.777589592768838e+02 1.742725197144059e+02 -4.776543849198212e+01 6.532264221831092e+02 3 5.725002211294488e+02 -1.786302554544233e+02 -1.627852110918317e+02 -5.189881598643107e+02 4 2.497408195936665e+02 4.357735740017474e+00 2.105506495838138e+02 -1.342382623187985e+02 - ME 9.179311580246363e-04 + ME 3.598558866345749e-04 Event 80 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -644,7 +644,7 @@ Event 80 Batch 0 2 6.240819586861880e+02 4.679310297228965e+02 -4.118464023828053e+02 -3.002304821964348e+01 3 6.688675489057649e+02 -5.494372353172420e+02 3.251429131208653e+02 1.994607943266771e+02 4 2.070504924080468e+02 8.150620559434545e+01 8.670348926194001e+01 -1.694377461070337e+02 - ME 3.575286400583300e-03 + ME 5.382869847396148e-05 Event 81 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -652,7 +652,7 @@ Event 81 Batch 0 2 5.198056748722776e+02 1.034797897616987e+02 -2.885605608993972e+02 4.197888462474007e+02 3 5.672098642055398e+02 -4.160331805498524e+02 2.087659545613757e+01 -3.849773895903518e+02 4 4.129844609221831e+02 3.125533907881537e+02 2.676839654432596e+02 -3.481145665704891e+01 - ME 1.018936778946332e-04 + ME 3.612255741613163e-05 Event 82 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -660,7 +660,7 @@ Event 82 Batch 0 2 2.057598609140514e+02 6.385349666266659e+01 -2.765433460911293e+01 1.936364870179372e+02 3 6.235840147705873e+02 4.654039114453895e+02 -3.828889383639962e+02 -1.601633028106901e+02 4 6.706561243153629e+02 -5.292574081080552e+02 4.105432729731107e+02 -3.347318420724690e+01 - ME 6.930850923220120e-04 + ME 3.172622561805068e-04 Event 83 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -668,7 +668,7 @@ Event 83 Batch 0 2 6.583322583736492e+02 1.865539504254553e+02 -1.926584839569474e+02 6.012334775737429e+02 3 3.620902826842561e+02 -3.107067244571256e+02 -1.177956631152976e+01 -1.855584705935048e+02 4 4.795774589420946e+02 1.241527740316703e+02 2.044380502684771e+02 -4.156750069802382e+02 - ME 8.385116111585099e-03 + ME 6.756528802944365e-04 Event 84 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -676,7 +676,7 @@ Event 84 Batch 0 2 4.849329564663161e+02 -2.622178945286150e+02 4.068620488841210e+02 -2.941124332559817e+01 3 4.737588937677760e+02 6.014532316188546e+01 -1.333934272225749e+02 4.505954095412368e+02 4 5.413081497659077e+02 2.020725713667296e+02 -2.734686216615461e+02 -4.211841662156386e+02 - ME 5.162990427398554e-03 + ME 1.017468409980153e-03 Event 85 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -684,7 +684,7 @@ Event 85 Batch 0 2 7.085742632080854e+02 -2.174614026040270e+02 -5.283468657604088e+02 -4.190914152061853e+02 3 5.315764222715953e+02 8.528530557199829e+00 3.820092234108129e+02 3.695533927738615e+02 4 2.598493145203187e+02 2.089328720468272e+02 1.463376423495959e+02 4.953802243232388e+01 - ME 6.335517668355978e-05 + ME 1.894143727100354e-05 Event 86 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -692,7 +692,7 @@ Event 86 Batch 0 2 1.724500140939190e+02 1.231518677708316e+02 -1.121928207497684e+01 1.201946443701656e+02 3 7.028475062724231e+02 -6.467096040851287e+01 -4.553168759141600e+02 -5.315061866629339e+02 4 6.247024796336580e+02 -5.848090736231883e+01 4.665361579891369e+02 4.113115422927684e+02 - ME 1.165531323127631e-04 + ME 5.311384036847167e-05 Event 87 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -700,7 +700,7 @@ Event 87 Batch 0 2 1.942099203196796e+02 -7.751148196958454e+01 -1.356691819650310e+02 -1.153400900745028e+02 3 7.314670447251594e+02 1.724617634710876e+02 7.020747158546045e+02 1.113196793791551e+02 4 5.743230349551606e+02 -9.495028150150301e+01 -5.664055338895735e+02 4.020410695347637e+00 - ME 1.237609879052555e-04 + ME 1.874087134673149e-05 Event 88 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -708,7 +708,7 @@ Event 88 Batch 0 2 6.382497024023744e+02 2.632142028760094e+02 -5.613974181649784e+02 1.513733956108635e+02 3 3.997044228265544e+02 -5.264940326118349e+01 3.435187961344461e+02 1.974500004195773e+02 4 4.620458747710724e+02 -2.105647996148253e+02 2.178786220305324e+02 -3.488233960304407e+02 - ME 1.863821317258467e-03 + ME 9.699609186666195e-05 Event 89 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -716,7 +716,7 @@ Event 89 Batch 0 2 1.419006640093282e+02 -8.677155154367878e+01 6.457545216231642e+01 -9.185046144153740e+01 3 7.131224514048055e+02 5.460003286026870e+02 -4.154556538506974e+02 -1.944836022569670e+02 4 6.449768845858670e+02 -4.592287770590082e+02 3.508802016883808e+02 2.863340636985044e+02 - ME 1.136115495374629e-04 + ME 2.974199953519439e-05 Event 90 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -724,7 +724,7 @@ Event 90 Batch 0 2 5.730615760623938e+02 -6.017783679015001e+01 -5.202921970507185e+02 -2.325386583054727e+02 3 5.389913703864468e+02 -6.302812531165206e+01 2.446311215742109e+02 4.761247390423042e+02 4 3.879470535511588e+02 1.232059621018019e+02 2.756610754765076e+02 -2.435860807368315e+02 - ME 1.094721025518881e-03 + ME 1.667772733247344e-04 Event 91 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -732,7 +732,7 @@ Event 91 Batch 0 2 4.546745139784350e+02 -1.470341619195494e+02 -1.726383255301703e+02 -3.940886669878754e+02 3 5.110976540119647e+02 -2.482119727393537e+02 -1.865817698532448e+02 4.059542728975803e+02 4 5.342278320096005e+02 3.952461346589030e+02 3.592200953834151e+02 -1.186560590970480e+01 - ME 8.789722587847313e-05 + ME 4.420313882846059e-05 Event 92 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -740,7 +740,7 @@ Event 92 Batch 0 2 6.683728375977241e+02 -1.148152650923627e+02 3.458291789782991e+02 5.603051703379153e+02 3 2.872567998557088e+02 1.635098024620329e+02 7.847331657016402e+01 -2.227620976482501e+02 4 5.443703625465666e+02 -4.869453736967034e+01 -4.243024955484631e+02 -3.375430726896653e+02 - ME 8.270083568815311e-04 + ME 2.265252332392545e-04 Event 93 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -748,7 +748,7 @@ Event 93 Batch 0 2 5.666948073002088e+02 5.408074886689032e+01 5.639942928586390e+02 -1.134525653745258e+01 3 6.168025492529713e+02 2.439040545997395e+02 -5.541969602989467e+02 1.175666879272316e+02 4 3.165026434468199e+02 -2.979848034666298e+02 -9.797332559692304e+00 -1.062214313897791e+02 - ME 1.664960428447917e-04 + ME 1.251778043268437e-05 Event 94 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -756,7 +756,7 @@ Event 94 Batch 0 2 4.964349376711385e+02 8.445930034540567e+01 -2.409007074648561e+02 -4.257712097695705e+02 3 5.660980232871289e+02 1.373833465612049e+02 5.210669225216058e+02 1.734417778711397e+02 4 4.374670390417324e+02 -2.218426469066104e+02 -2.801662150567495e+02 2.523294318984307e+02 - ME 3.431641292834382e-05 + ME 1.007141026120618e-05 Event 95 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -764,7 +764,7 @@ Event 95 Batch 0 2 7.117074025057361e+02 -3.227984571262278e+02 4.276971164854593e+02 -4.684055501468919e+02 3 1.264078228725325e+02 8.675876182178401e+01 5.074873328843479e+01 7.665781760618943e+01 4 6.618847746217315e+02 2.360396953044439e+02 -4.784458497738940e+02 3.917477325407025e+02 - ME 2.121249861094822e-04 + ME 8.653822330208906e-05 Event 96 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -772,7 +772,7 @@ Event 96 Batch 0 2 7.329769441659936e+02 -9.642859092211874e+01 6.903981466332597e+02 -2.265107649915406e+02 3 3.937873938465678e+02 -4.837693103302091e+01 -3.847118583018795e+02 6.873841850241256e+01 4 3.732356619874385e+02 1.448055219551397e+02 -3.056862883313802e+02 1.577723464891279e+02 - ME 3.473186069800973e-05 + ME 9.822975749896163e-06 Event 97 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -780,7 +780,7 @@ Event 97 Batch 0 2 3.394989963266853e+01 6.003767577498499e+00 -2.078495220615399e+01 2.616364312804199e+01 3 7.377311980366451e+02 -5.308290258162607e+02 4.681853362634530e+02 2.080152802450354e+02 4 7.283189023306861e+02 5.248252582387622e+02 -4.474003840572991e+02 -2.341789233730774e+02 - ME 2.063600678642283e-02 + ME 2.729355315721549e-03 Event 98 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -788,7 +788,7 @@ Event 98 Batch 0 2 2.496912687496082e+02 -2.485814905959506e+02 -5.435228288348340e-01 -2.350907922099247e+01 3 7.458289852530976e+02 7.373315781279124e+02 9.801365830907572e+01 -5.473885205171283e+01 4 5.044797459972945e+02 -4.887500875319618e+02 -9.747013548024091e+01 7.824793127270530e+01 - ME 6.800308216903296e-05 + ME 8.091578731489026e-06 Event 99 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -796,7 +796,7 @@ Event 99 Batch 0 2 1.698125854886770e+02 8.336002034290719e+01 8.774494220182726e+01 -1.191144253093525e+02 3 6.496622934125946e+02 5.714329899004554e+02 -6.230613627727958e+01 3.027265745152471e+02 4 6.805251210987285e+02 -6.547930102433627e+02 -2.543880592454771e+01 -1.836121492058947e+02 - ME 6.115029137493471e-04 + ME 1.856310681395454e-04 Event 100 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -804,7 +804,7 @@ Event 100 Batch 0 2 6.141460480129781e+02 -5.842473718080511e+02 -5.092222124447417e+01 1.823110095657221e+02 3 3.909476383151783e+02 2.539115798088024e+02 -2.930333502072385e+02 -5.000421191795168e+01 4 4.949063136718440e+02 3.303357919992488e+02 3.439555714517127e+02 -1.323067976477707e+02 - ME 1.550407956048336e-04 + ME 2.380755205932631e-05 Event 101 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -812,7 +812,7 @@ Event 101 Batch 0 2 7.469346538870473e+02 3.524232024688497e+02 -1.488240016505349e+02 -6.415299525912136e+02 3 6.502268999047169e+02 -2.777200960400715e+02 1.351761574712158e+02 5.721835160737410e+02 4 1.028384462082358e+02 -7.470310642877820e+01 1.364784417931910e+01 6.934643651747267e+01 - ME 1.080054053054822e-04 + ME 7.777208667430486e-05 Event 102 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -820,7 +820,7 @@ Event 102 Batch 0 2 7.426790432885583e+02 -3.141071077544728e+02 6.615000409077074e+02 1.238005738162371e+02 3 6.735764515788642e+01 -4.139700837311957e+00 -5.533298776898177e+01 -3.818606686673834e+01 4 6.899633115535552e+02 3.182468085917849e+02 -6.061670531387255e+02 -8.561450694949879e+01 - ME 6.292262541994918e-04 + ME 1.796768498680773e-04 Event 103 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -828,7 +828,7 @@ Event 103 Batch 0 2 4.837874798175253e+02 -2.731724972668680e+02 1.247027290420595e+02 -3.793103501549069e+02 3 4.466406321977809e+02 -2.904538080082218e+02 -1.536665846758871e+02 3.025078850172422e+02 4 5.695718879846930e+02 5.636263052750895e+02 2.896385563382777e+01 7.680246513766473e+01 - ME 8.140894767450013e-05 + ME 2.998858312831636e-05 Event 104 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -836,7 +836,7 @@ Event 104 Batch 0 2 5.788466572679498e+02 3.572346730226224e+02 -3.682137844992378e+02 2.680773207965347e+02 3 2.925711988065158e+02 2.155069407513812e+02 1.697995838195863e+02 -1.016010147279926e+02 4 6.285821439255348e+02 -5.727416137740034e+02 1.984142006796517e+02 -1.664763060685422e+02 - ME 2.849770726480251e-04 + ME 7.634200862908681e-05 Event 105 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -844,7 +844,7 @@ Event 105 Batch 0 2 3.361125455083114e+02 2.619004058447622e+02 4.338373361330959e+01 -2.061496357605196e+02 3 5.299016201311088e+02 2.892532450564946e+02 2.091058919093095e+02 3.916669672191841e+02 4 6.339858343605800e+02 -5.511536509012568e+02 -2.524896255226191e+02 -1.855173314586645e+02 - ME 2.866662317167052e-04 + ME 1.089382545947932e-04 Event 106 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -852,7 +852,7 @@ Event 106 Batch 0 2 3.578050478863485e+02 -2.265838270225943e+02 2.740910124726658e+02 -3.947579646386072e+01 3 5.202885196186892e+02 1.412729374205232e+02 1.631578432376887e+02 4.734148487210871e+02 4 6.219064324949621e+02 8.531088960207101e+01 -4.372488557103545e+02 -4.339390522572265e+02 - ME 1.912263829178338e-03 + ME 4.548955126640399e-04 Event 107 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -860,7 +860,7 @@ Event 107 Batch 0 2 5.409822745993889e+02 9.278463733038997e+01 5.102180459532771e+02 -1.540466750365499e+02 3 2.501852297905710e+02 1.682301834486207e+02 1.474652503315489e+02 1.120056004263085e+02 4 7.088324956100398e+02 -2.610148207790107e+02 -6.576832962848259e+02 4.204107461024153e+01 - ME 7.096163321035572e-04 + ME 2.159102073406285e-04 Event 108 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -868,7 +868,7 @@ Event 108 Batch 0 2 6.835202199428555e+02 6.670011709444186e+02 6.653656309718588e+01 1.337243986739828e+02 3 2.377887385005082e+02 -1.098327419601477e+02 7.667443498831059e+01 -1.964720946353502e+02 4 5.786910415566365e+02 -5.571684289842709e+02 -1.432109980854965e+02 6.274769596136723e+01 - ME 1.143500637563713e-04 + ME 2.960130886583330e-05 Event 109 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -876,7 +876,7 @@ Event 109 Batch 0 2 5.978180281189351e+02 4.291222314737005e+02 2.249703559956599e+02 3.501840146583366e+02 3 3.585061336071061e+02 -3.227227650115256e+02 1.541688059097761e+02 2.467071262824850e+01 4 5.436758382739589e+02 -1.063994664621746e+02 -3.791391619054360e+02 -3.748547272865851e+02 - ME 1.159187207430584e-03 + ME 1.100286424576873e-04 Event 110 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -884,7 +884,7 @@ Event 110 Batch 0 2 7.073952645543156e+01 -4.753982451958468e+01 4.872856968801237e+01 -1.922426029646691e+01 3 7.438039776014969e+02 1.707202332282495e+02 -7.225114374584515e+02 4.556513803361385e+01 4 6.854564959430718e+02 -1.231804087086648e+02 6.737828677704391e+02 -2.634087773714689e+01 - ME 5.177444310012934e-04 + ME 1.052942530962122e-04 Event 111 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -892,7 +892,7 @@ Event 111 Batch 0 2 5.206822291802364e+02 -3.873336848644893e+02 2.415505427333673e+02 -2.504714268307115e+02 3 5.478000561519707e+02 4.687653961676166e+02 -2.245690260344170e+02 -1.729527606656598e+02 4 4.315177146677929e+02 -8.143171130312743e+01 -1.698151669895031e+01 4.234241874963712e+02 - ME 1.041517236520828e-04 + ME 8.545692640795734e-05 Event 112 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -900,7 +900,7 @@ Event 112 Batch 0 2 3.610471238372959e+02 2.563298943277285e+02 9.635756626046441e+01 -2.352981732387216e+02 3 6.139063356201009e+02 1.031778254919422e+02 -4.257030126280926e+02 4.301305270271111e+02 4 5.250465405426031e+02 -3.595077198196707e+02 3.293454463676283e+02 -1.948323537883896e+02 - ME 2.333567140730066e-04 + ME 5.572029836371622e-05 Event 113 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -908,7 +908,7 @@ Event 113 Batch 0 2 5.886653054136124e+02 3.035646198144377e+02 3.278619896967805e+02 -3.832517176826292e+02 3 5.420023902452333e+02 -3.658357535838290e+02 -3.990519958595696e+02 2.623541560166928e+01 4 3.693323043411537e+02 6.227113376939163e+01 7.119000616278893e+01 3.570163020809600e+02 - ME 6.906402420910258e-05 + ME 4.986188449478774e-05 Event 114 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -916,7 +916,7 @@ Event 114 Batch 0 2 5.165204340356855e+02 2.346362244736889e+01 6.298471388966840e+00 5.159487827839334e+02 3 5.932916594323345e+02 3.608814360715946e+02 -5.336137507463695e+01 -4.678804824963537e+02 4 3.901879065319798e+02 -3.843450585189634e+02 4.706290368567026e+01 -4.806830028757967e+01 - ME 5.363382776736297e-04 + ME 4.029549711869195e-04 Event 115 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -924,7 +924,7 @@ Event 115 Batch 0 2 5.432307281524777e+02 2.250327918244370e+02 4.870559856477670e+02 -8.506664127290338e+01 3 4.265243530840496e+02 2.057819224248363e+02 -2.472237669715339e+02 2.801021835354204e+02 4 5.302449187634726e+02 -4.308147142492733e+02 -2.398322186762331e+02 -1.950355422625171e+02 - ME 2.364149932043149e-04 + ME 4.159321993514108e-05 Event 116 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -932,7 +932,7 @@ Event 116 Batch 0 2 4.402635748890415e+02 -4.240500842615081e+02 -5.733358735035193e+01 -1.035683405941509e+02 3 4.399967684638562e+02 1.183617589007452e+02 -1.041572505293867e+02 -4.107784286579766e+02 4 6.197396566471035e+02 3.056883253607625e+02 1.614908378797388e+02 5.143467692521278e+02 - ME 1.343295643586522e-04 + ME 4.172733678506819e-05 Event 117 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -940,7 +940,7 @@ Event 117 Batch 0 2 3.074085311587982e+02 -4.270248480828711e+01 -3.034838508096459e+02 2.395944736750828e+01 3 5.360984061023379e+02 3.510554986169303e+02 -1.596589010508530e+02 -3.723849798683070e+02 4 6.564930627388640e+02 -3.083530138086433e+02 4.631427518604987e+02 3.484255325007987e+02 - ME 1.795895763168496e-04 + ME 4.142391000026985e-05 Event 118 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -948,7 +948,7 @@ Event 118 Batch 0 2 5.403602961735903e+02 4.471526113902045e+02 -1.804334130868151e+02 -2.439007487679592e+02 3 5.654623567965698e+02 -5.534570111367966e+02 -1.157195831079003e+02 6.480112868522320e+00 4 3.941773470298406e+02 1.063043997465919e+02 2.961529961947150e+02 2.374206358994370e+02 - ME 3.055618730902428e-05 + ME 7.288650603673961e-06 Event 119 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -956,7 +956,7 @@ Event 119 Batch 0 2 8.009099446659010e+01 5.775399043490319e+01 -2.629604726664823e+01 4.886268393818209e+01 3 7.131140611332349e+02 2.472685400460709e+02 -2.870014097539109e+02 -6.041689532644716e+02 4 7.067949444001758e+02 -3.050225304809738e+02 3.132974570205592e+02 5.553062693262896e+02 - ME 6.861262467765907e-04 + ME 2.815424392761942e-04 Event 120 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -964,7 +964,7 @@ Event 120 Batch 0 2 5.007248873753321e+02 2.708997263130530e+02 -3.880896283797751e+02 1.634784128397387e+02 3 7.413897277398672e+02 -4.257033276374029e+02 5.921425482134987e+02 -1.334264135464211e+02 4 2.578853848848011e+02 1.548036013243502e+02 -2.040529198337238e+02 -3.005199929331748e+01 - ME 1.034513276694145e-04 + ME 6.003662532288496e-06 Event 121 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -972,7 +972,7 @@ Event 121 Batch 0 2 5.732265116821120e+02 -1.149395375629033e+02 4.260916136383032e+02 3.658189076403451e+02 3 4.323948798659248e+02 -2.148488009071912e+01 -4.178027098651986e+02 1.092914804138530e+02 4 4.943786084519640e+02 1.364244176536226e+02 -8.288903773105691e+00 -4.751103880541979e+02 - ME 8.074833733477824e-02 + ME 7.661241871407340e-04 Event 122 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -980,7 +980,7 @@ Event 122 Batch 0 2 3.423360304412701e+02 2.648046119434483e+02 2.369247279710451e+01 -2.156644197927059e+02 3 6.059487982275789e+02 2.457729689670163e+01 -4.569077875801422e+02 3.972469964635579e+02 4 5.517151713311508e+02 -2.893819088401499e+02 4.332153147830377e+02 -1.815825766708520e+02 - ME 2.180123533398812e-04 + ME 5.274300345459390e-05 Event 123 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -988,7 +988,7 @@ Event 123 Batch 0 2 1.430133297276668e+02 -4.205671322284506e+01 3.498095937953869e+01 1.321377229770999e+02 3 7.140350670908600e+02 -2.955397919833849e+01 -6.570980288365154e+02 -2.778395577453968e+02 4 6.429516031814733e+02 7.161069242118367e+01 6.221170694569771e+02 1.457018347682969e+02 - ME 5.626335206455025e-04 + ME 2.698780233597045e-04 Event 124 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -996,7 +996,7 @@ Event 124 Batch 0 2 6.053457283343441e+02 5.458657819531910e+02 -1.853964251366731e+01 -2.610177782464909e+02 3 7.499633671623128e+02 -6.784114238502394e+02 2.145325921506613e+01 3.189713933003628e+02 4 1.446909045033435e+02 1.325456418970486e+02 -2.913616701398675e+00 -5.795361505387172e+01 - ME 4.169465060943616e-04 + ME 2.629538535113942e-05 Event 125 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1004,7 +1004,7 @@ Event 125 Batch 0 2 6.695439244882118e+02 9.058534244088493e+01 6.586171675820721e+02 7.941529525294386e+01 3 9.341516463500346e+01 3.490868167113007e+01 5.232133368429144e+01 6.906703243419068e+01 4 7.370409108767834e+02 -1.254940241120154e+02 -7.109385012663632e+02 -1.484823276871337e+02 - ME 1.111472366347957e-02 + ME 4.436636984625360e-03 Event 126 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1012,7 +1012,7 @@ Event 126 Batch 0 2 6.465564354211967e+02 -2.094351601488127e+02 -1.930091683601272e+02 -5.804477571728034e+02 3 1.356182567235447e+02 -2.832094442380729e+01 9.735247446175231e+01 -9.007070211700794e+01 4 7.178253078552584e+02 2.377561045726200e+02 9.565669389837488e+01 6.705184592898115e+02 - ME 1.775660879411100e-03 + ME 1.230970446288030e-03 Event 127 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1020,7 +1020,7 @@ Event 127 Batch 0 2 4.508388003927651e+02 -3.846405138087858e+02 7.756355374444065e+01 2.220162025777267e+02 3 6.162879941073576e+02 2.174727303224461e+02 1.334711143222092e+02 -5.609830344035003e+02 4 4.328732054998774e+02 1.671677834863399e+02 -2.110346680666500e+02 3.389668318257735e+02 - ME 3.922171581774212e-05 + ME 2.127227557837123e-05 Event 128 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1028,7 +1028,7 @@ Event 128 Batch 0 2 7.468963146802857e+02 5.701805835528932e+02 -3.440982003215339e+02 -3.381488363986430e+02 3 1.196664332518719e+02 -9.337643239636876e+01 2.398139841985228e+01 7.089280393650260e+01 4 6.334372520678420e+02 -4.768041511565244e+02 3.201168019016817e+02 2.672560324621404e+02 - ME 2.053620454072734e-04 + ME 7.842790653965437e-05 Event 129 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1036,7 +1036,7 @@ Event 129 Batch 0 2 4.378966182438207e+02 -4.256397208622688e+02 4.624364030548149e+01 9.190104474357973e+01 3 7.127537996732577e+02 5.790589826349546e+02 -1.369827771626340e+02 -3.923574802896586e+02 4 3.493495820829217e+02 -1.534192617726859e+02 9.073913685715252e+01 3.004564355460789e+02 - ME 1.668072874757384e-05 + ME 1.046217618618756e-05 Event 130 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1044,7 +1044,7 @@ Event 130 Batch 0 2 6.322026526626455e+02 5.905875735566585e+02 -2.387291116192753e+01 -2.243136110600485e+02 3 5.268087771404591e+02 -3.287250458747471e+02 1.913681034684307e+02 3.644798771698754e+02 4 3.409885701968954e+02 -2.618625276819114e+02 -1.674951923065032e+02 -1.401662661098267e+02 - ME 2.766647151388132e-04 + ME 3.412796728096272e-05 Event 131 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1052,7 +1052,7 @@ Event 131 Batch 0 2 2.691964685177017e+02 -2.641651354044939e+02 4.065264362900757e+01 -3.210735842607325e+01 3 5.382709487855662e+02 -3.022535437819008e+02 -4.307865739991411e+02 1.131429946566680e+02 4 6.925325826967319e+02 5.664186791863947e+02 3.901339303701337e+02 -8.103563623059465e+01 - ME 5.354423766199649e-04 + ME 1.516502654737588e-04 Event 132 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1060,7 +1060,7 @@ Event 132 Batch 0 2 1.376388194981169e+02 -2.491804956023667e+01 3.114513197621116e+01 1.317327453336230e+02 3 7.332494677489981e+02 -3.054807357444667e+02 -6.882601889638243e+00 -6.665500220046781e+02 4 6.291117127528858e+02 3.303987853047034e+02 -2.426253008657308e+01 5.348172766710551e+02 - ME 3.625143788027957e-04 + ME 2.459616839911958e-04 Event 133 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1068,7 +1068,7 @@ Event 133 Batch 0 2 5.818916885738672e+02 -3.437736592641007e+02 -2.113522447259726e+02 -4.192228966514222e+02 3 7.075583625851592e+02 3.695171106849944e+02 9.875952986414086e+01 5.952667441040354e+02 4 2.105499488409736e+02 -2.574345142089370e+01 1.125927148618317e+02 -1.760438474526132e+02 - ME 6.644965721204062e-03 + ME 3.278402967978973e-04 Event 134 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1076,7 +1076,7 @@ Event 134 Batch 0 2 7.039051474789593e+02 -1.767404282002263e+02 5.832845063404937e+02 3.521710697233707e+02 3 6.740856043500099e+02 9.540039380435479e+01 -5.203258634262522e+02 -4.177932056695244e+02 4 1.220092481710302e+02 8.134003439587134e+01 -6.295864291424151e+01 6.562213594615410e+01 - ME 6.394436352069354e-05 + ME 3.621089826286842e-05 Event 135 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1084,7 +1084,7 @@ Event 135 Batch 0 2 7.491379873081086e+02 -6.603965492909807e+02 -9.243924572685610e+01 -3.413782470545817e+02 3 4.360367703469753e+02 3.763875731093294e+02 3.833030381995060e+01 2.167746473012021e+02 4 3.148252423449159e+02 2.840089761816513e+02 5.410894190690560e+01 1.246035997533796e+02 - ME 3.729096801849378e-05 + ME 1.170602675185252e-05 Event 136 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1092,7 +1092,7 @@ Event 136 Batch 0 2 6.907976432034611e+02 -8.965778913807024e+01 -5.375684903631193e+02 -4.244796613161184e+02 3 4.317447428217263e+02 2.541758793770707e+02 2.501815833403360e+02 2.433255445990286e+02 4 3.774576139748129e+02 -1.645180902390004e+02 2.873869070227833e+02 1.811541167170898e+02 - ME 3.295715598818487e-05 + ME 1.221598515374744e-05 Event 137 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1100,7 +1100,7 @@ Event 137 Batch 0 2 5.927917878715718e+02 -5.453882061843875e+02 -2.239274061847312e+02 6.172783069514800e+01 3 3.718333194205911e+02 2.859809174201715e+02 -2.363544177495510e+02 2.472896101988843e+01 4 5.353748927078371e+02 2.594072887642160e+02 4.602818239342820e+02 -8.645679171503701e+01 - ME 1.267334233155001e-04 + ME 2.222722395048600e-05 Event 138 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1108,7 +1108,7 @@ Event 138 Batch 0 2 1.164849493482387e+02 2.012854405109472e+01 -2.573298799707043e+01 -1.118096528381494e+02 3 7.481698498358139e+02 -1.044692284663333e+02 -4.003634472873074e+00 7.408294509656059e+02 4 6.353452008159477e+02 8.434068441523856e+01 2.973662246994375e+01 -6.290197981274564e+02 - ME 3.545594402685597e+00 + ME 1.183014588836486e-01 Event 139 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1116,7 +1116,7 @@ Event 139 Batch 0 2 3.415587822283577e+02 -2.468214832259765e+02 1.926082427237748e+02 1.365416492148350e+02 3 5.828887331044928e+02 -1.023403009989268e+02 -5.561813319045077e+02 1.412376154306548e+02 4 5.755524846671491e+02 3.491617842249035e+02 3.635730891807333e+02 -2.777792646454897e+02 - ME 4.142320485322521e-04 + ME 5.213154494000113e-05 Event 140 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1124,7 +1124,7 @@ Event 140 Batch 0 2 4.395392082109443e+02 -3.037880820376849e+02 -2.455930383243060e+02 -2.014735126343029e+02 3 4.709796125547878e+02 -2.826270024952004e+02 2.984919122515593e+02 2.298833426397907e+02 4 5.894811792342680e+02 5.864150845328855e+02 -5.289887392725340e+01 -2.840983000548780e+01 - ME 1.220048440917972e-04 + ME 2.990357782498624e-05 Event 141 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1132,7 +1132,7 @@ Event 141 Batch 0 2 3.025838986653694e+02 -2.680006525137058e+02 -6.218827689980458e+01 -1.259574698062632e+02 3 5.104624598690772e+02 -2.829910827131053e+02 4.173533268753467e+02 -7.939880721102661e+01 4 6.869536414655528e+02 5.509917352268112e+02 -3.551650499755422e+02 2.053562770172896e+02 - ME 3.735313583347012e-04 + ME 7.151804808113674e-05 Event 142 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1140,7 +1140,7 @@ Event 142 Batch 0 2 4.390011511178412e+02 -3.153925512561953e+02 3.992377088505197e+01 -3.027468279160259e+02 3 4.597282536099518e+02 2.984856708041211e+02 -2.221794712617382e+02 -2.699863960308454e+02 4 6.012705952722066e+02 1.690688045207421e+01 1.822557003766862e+02 5.727332239468712e+02 - ME 1.630913878361870e-04 + ME 8.945447985744934e-05 Event 143 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1148,7 +1148,7 @@ Event 143 Batch 0 2 7.103308443495001e+02 -3.626595603160224e+02 2.462759922459802e+02 5.589240443825270e+02 3 3.424564807343295e+02 4.507572778536915e+01 -2.357842367637252e+02 -2.442343416788665e+02 4 4.472126749161695e+02 3.175838325306533e+02 -1.049175548225529e+01 -3.146897027036604e+02 - ME 1.304325296055160e-03 + ME 1.789392510542836e-04 Event 144 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1156,7 +1156,7 @@ Event 144 Batch 0 2 6.893886390440568e+02 -2.470805413393656e+02 1.331686162420120e+02 6.296618309717105e+02 3 7.132719020730987e+02 2.482972988978650e+02 -2.304803220538649e+02 -6.276815106349294e+02 4 9.733945888284487e+01 -1.216757558499225e+00 9.731170581185302e+01 -1.980320336781234e+00 - ME 3.769348793094523e-04 + ME 1.486904409371019e-04 Event 145 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1164,7 +1164,7 @@ Event 145 Batch 0 2 3.784954309743686e+02 2.391836032855264e+02 1.115572896135236e+01 -2.931305935912622e+02 3 7.389406222827198e+02 -4.231861417520660e+02 1.513250860114713e+02 5.865555822189353e+02 4 3.825639467429113e+02 1.840025384665394e+02 -1.624808149728234e+02 -2.934249886276727e+02 - ME 2.193982780219728e-03 + ME 2.016505354100400e-04 Event 146 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1172,7 +1172,7 @@ Event 146 Batch 0 2 4.681255842987410e+02 -3.253195724522379e+01 1.754808059398437e+02 -4.327698247100133e+02 3 2.875849079819393e+02 2.091841587061404e+01 1.879781824316579e+02 -2.166372592748876e+02 4 7.442895077193195e+02 1.161354137460973e+01 -3.634589883715017e+02 6.494070839849006e+02 - ME 5.347932692815789e-02 + ME 1.210467216316050e-02 Event 147 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1180,7 +1180,7 @@ Event 147 Batch 0 2 2.442136391928777e+02 -1.784444843977844e+02 -1.666832492802189e+02 -3.816014311599316e+00 3 5.551361515401285e+02 1.378338123621512e+02 -5.199472642306259e+02 1.372327560591401e+02 4 7.006502092669938e+02 4.061067203563306e+01 6.866305135108448e+02 -1.334167417475408e+02 - ME 7.450632204513606e-04 + ME 2.360352365747709e-04 Event 148 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1188,7 +1188,7 @@ Event 148 Batch 0 2 4.547263863263726e+02 3.928375677411887e+02 5.145105706241225e+01 2.231759855356057e+02 3 7.397285466814292e+02 -5.611511356388266e+02 -1.533645573573770e+02 -4.569322031694095e+02 4 3.055450669921979e+02 1.683135678976379e+02 1.019135002949646e+02 2.337562176338038e+02 - ME 1.440225905683450e-05 + ME 6.307552439231181e-06 Event 149 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1196,7 +1196,7 @@ Event 149 Batch 0 2 2.343018799311635e+02 9.853424545130945e+01 1.924850318874441e+02 -9.021023174733594e+01 3 7.291173748950658e+02 3.429747374294529e+01 -5.990516617369192e+02 4.142136359886766e+02 4 5.365807451737705e+02 -1.328317191942547e+02 4.065666298494750e+02 -3.240034042413406e+02 - ME 8.405553848068603e-04 + ME 8.298171355094406e-05 Event 150 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1204,7 +1204,7 @@ Event 150 Batch 0 2 4.707648023587808e+02 -8.969278865174961e+01 -3.008719699078221e+02 3.507859183712497e+02 3 6.876639918976698e+02 3.906111988928598e+02 4.609284537794546e+02 -3.284046551871671e+02 4 3.415712057435500e+02 -3.009184102411105e+02 -1.600564838716325e+02 -2.238126318408256e+01 - ME 1.070125715137075e-04 + ME 1.887585788236135e-05 Event 151 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1212,7 +1212,7 @@ Event 151 Batch 0 2 6.503034458278056e+02 -1.575298496674962e+02 -3.658248853789647e+01 -6.298735108350154e+02 3 6.998690336552314e+02 1.302751858829802e+02 -1.019415103826456e+02 6.800389464387812e+02 4 1.498275205169629e+02 2.725466378451580e+01 1.385239989205421e+02 -5.016543560376590e+01 - ME 6.663776898009472e-04 + ME 4.060174493404880e-04 Event 152 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1220,7 +1220,7 @@ Event 152 Batch 0 2 7.401192382353395e+02 1.493701961830190e+02 6.288419447382046e+02 3.605867993093739e+02 3 7.332111095478891e+02 -1.230079111936445e+02 -6.287602831147091e+02 -3.565502647954901e+02 4 2.666965221677112e+01 -2.636228498937447e+01 -8.166162349550861e-02 -4.036534513883709e+00 - ME 8.446403371723604e-04 + ME 1.210964379505254e-04 Event 153 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1228,7 +1228,7 @@ Event 153 Batch 0 2 5.645797071775899e+02 7.941901905692946e+01 3.691428696980725e+02 -4.197337333594241e+02 3 6.079979027943974e+02 1.021455738177839e+02 -5.566920170809548e+02 2.220849604771994e+02 4 3.274223900280123e+02 -1.815645928747133e+02 1.875491473828823e+02 1.976487728822249e+02 - ME 2.846663840296023e-05 + ME 9.895323747190810e-06 Event 154 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1236,7 +1236,7 @@ Event 154 Batch 0 2 6.022174885419887e+02 -5.152457849782368e+02 -1.493252664732707e+02 -2.736597328082223e+02 3 3.617627670199851e+02 1.925398333816265e+02 -2.626238171638091e+02 1.575736108034646e+02 4 5.360197444380261e+02 3.227059515966102e+02 4.119490836370796e+02 1.160861220047577e+02 - ME 6.437319974597944e-05 + ME 1.660411512586943e-05 Event 155 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1244,7 +1244,7 @@ Event 155 Batch 0 2 6.202229507100907e+02 -2.107861924791831e+02 -3.212541876154504e+02 4.868690137883067e+02 3 2.943040328093193e+02 2.940980302320592e+02 1.073731199058907e+01 2.433613089266508e+00 4 5.854730164805898e+02 -8.331183775287627e+01 3.105168756248616e+02 -4.893026268775732e+02 - ME 5.904510654775639e-03 + ME 4.918845171174253e-04 Event 156 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1252,7 +1252,7 @@ Event 156 Batch 0 2 4.945486805149833e+02 4.540818864859257e+02 -1.431706201593249e+02 -1.337542944644701e+02 3 5.997303202813281e+02 -3.624214233270367e+02 -5.726286247273350e+01 4.743923835389624e+02 4 4.057209992036886e+02 -9.166046315888883e+01 2.004334826320584e+02 -3.406380890744924e+02 - ME 4.701306652347430e-03 + ME 1.986837824231628e-04 Event 157 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1260,7 +1260,7 @@ Event 157 Batch 0 2 4.617003083190191e+02 3.118400043328062e+02 3.404502064148864e+02 -4.079626411035589e+00 3 5.720097526413113e+02 -4.999240316044806e+01 -4.329264075474301e+02 -3.705005295422582e+02 4 4.662899390396696e+02 -2.618476011723578e+02 9.247620113254365e+01 3.745801559532937e+02 - ME 3.907978340087068e-05 + ME 1.403598809900552e-05 Event 158 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1268,7 +1268,7 @@ Event 158 Batch 0 2 6.784877363061535e+02 -5.707102180762959e+02 -3.102223423027389e+02 -1.959529373021938e+02 3 5.650909444059712e+02 5.525284805868615e+02 7.765167789879932e+01 8.950011457818250e+01 4 2.564213192878751e+02 1.818173748943443e+01 2.325706644039396e+02 1.064528227240114e+02 - ME 3.503179830087694e-05 + ME 8.470133063482862e-06 Event 159 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1276,7 +1276,7 @@ Event 159 Batch 0 2 5.369491563274252e+02 2.154713482252002e+02 -2.912667909729743e+02 3.962955349875316e+02 3 6.066564496499102e+02 -4.020061311781470e+01 5.572389608252350e+02 -2.364332868806716e+02 4 3.563943940226648e+02 -1.752707351073854e+02 -2.659721698522608e+02 -1.598622481068599e+02 - ME 3.198473025834927e-04 + ME 3.562393617300492e-05 Event 160 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1284,7 +1284,7 @@ Event 160 Batch 0 2 6.492474755438517e+02 3.490068395973682e+02 1.460348644657111e+02 -5.276270735801970e+02 3 2.857818814470013e+02 -2.550253586192556e+02 1.227259509083862e+02 3.964456076362119e+01 4 5.649706430091471e+02 -9.398148097811273e+01 -2.687608153740973e+02 4.879825128165764e+02 - ME 6.719464076924620e-05 + ME 3.516238941302227e-05 Event 161 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1292,7 +1292,7 @@ Event 161 Batch 0 2 6.770282049439580e+02 -2.863253153105184e+02 -4.911270786072976e+02 -3.676672364525180e+02 3 1.598243093356544e+02 -7.505362471426160e+01 1.299195075310522e+02 -5.506073768810752e+01 4 6.631474857203874e+02 3.613789400247800e+02 3.612075710762453e+02 4.227279741406256e+02 - ME 1.577168105051119e-04 + ME 5.970757951131334e-05 Event 162 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1300,7 +1300,7 @@ Event 162 Batch 0 2 5.178592782584632e+02 -3.271131571456631e+02 3.943743741889439e+02 -7.512700901574514e+01 3 3.730686930366258e+02 -2.885924195736573e+01 -1.360208443078026e+02 -3.461874113706257e+02 4 6.090720287049110e+02 3.559723991030290e+02 -2.583535298811414e+02 4.213144203863710e+02 - ME 1.031749267713353e-04 + ME 2.768303103320498e-05 Event 163 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1308,7 +1308,7 @@ Event 163 Batch 0 2 5.388642316037673e+02 3.152159924116781e+02 3.539969933522669e+01 -4.356149670486711e+02 3 5.364171791816749e+02 -5.299694218906361e+02 3.369785517714305e+01 7.576448071880543e+01 4 4.247185892145582e+02 2.147534294789580e+02 -6.909755451236977e+01 3.598504863298658e+02 - ME 3.508094027565679e-05 + ME 1.485600561394433e-05 Event 164 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1316,7 +1316,7 @@ Event 164 Batch 0 2 6.862697092177667e+02 4.132218376422068e+02 1.310202162324327e+02 -5.320221138485150e+02 3 4.476895523579005e+02 -2.769046850483522e+02 1.374187337517142e+02 3.238299280529301e+02 4 3.660407384243329e+02 -1.363171525938544e+02 -2.684389499841469e+02 2.081921857955847e+02 - ME 3.375894779915149e-05 + ME 1.755563256840939e-05 Event 165 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1324,7 +1324,7 @@ Event 165 Batch 0 2 2.382444910715278e+02 -2.158277263671036e+02 -9.471372817531817e+00 -1.004446273032522e+02 3 7.304591383576048e+02 4.619003715882296e+02 -1.223345688256177e+02 5.524969256086772e+02 4 5.312963705708673e+02 -2.460726452211260e+02 1.318059416431495e+02 -4.520522983054250e+02 - ME 6.966498968932957e-03 + ME 4.549138184301779e-04 Event 166 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1332,7 +1332,7 @@ Event 166 Batch 0 2 2.131352071380649e+02 -7.633553084455029e+01 -1.899581415396244e+02 5.929087379418958e+01 3 7.305557876753161e+02 8.980971292745940e+01 7.136333043711877e+02 1.279589045828712e+02 4 5.563090051866194e+02 -1.347418208290915e+01 -5.236751628315633e+02 -1.872497783770607e+02 - ME 3.314006956523505e-04 + ME 3.352199959657985e-05 Event 167 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1340,7 +1340,7 @@ Event 167 Batch 0 2 4.122964103002419e+02 -3.405127102276982e+02 6.366431608201744e+01 2.235761145061386e+02 3 4.697083356610920e+02 -2.521100678451879e+02 -2.856113063438232e+01 -3.952855880214881e+02 4 6.179952540386658e+02 5.926227780728861e+02 -3.510318544763516e+01 1.717094735153495e+02 - ME 1.146777177775239e-04 + ME 3.829535931496594e-05 Event 168 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1348,7 +1348,7 @@ Event 168 Batch 0 2 7.156643283953484e+02 -3.999734570317170e+02 4.816586825103861e+02 3.467009924560655e+02 3 6.192344221355605e+02 2.722545660880235e+02 -4.999454120042317e+02 -2.436869012025525e+02 4 1.651012494690919e+02 1.277188909436936e+02 1.828672949384504e+01 -1.030140912535133e+02 - ME 1.017624049822302e-03 + ME 5.027887292283473e-05 Event 169 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1356,7 +1356,7 @@ Event 169 Batch 0 2 3.626022684949455e+02 7.511110909567982e+01 -2.030941161665286e+02 -2.908461902563517e+02 3 5.580565590514408e+02 -2.529981754432838e+02 -3.439969378312538e+02 3.592842232626199e+02 4 5.793411724536141e+02 1.778870663476037e+02 5.470910539977822e+02 -6.843803300626824e+01 - ME 1.371698416063432e-04 + ME 4.350242525242475e-05 Event 170 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1364,7 +1364,7 @@ Event 170 Batch 0 2 6.602909342483501e+02 4.699653539595539e+02 -3.020118498241596e+02 3.520021683086903e+02 3 1.039297502933440e+02 3.247420585022842e+01 -9.851348423194945e+01 6.473976746580508e+00 4 7.357793154583061e+02 -5.024395598097824e+02 4.005253340561092e+02 -3.584761450552709e+02 - ME 1.673719496447659e-02 + ME 9.967260301798612e-03 Event 171 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1372,7 +1372,7 @@ Event 171 Batch 0 2 1.506693011949600e+02 -3.657300520509282e+01 -1.244227366169959e+02 -7.669834565089053e+01 3 6.344013325830570e+02 -2.026333084464634e+02 -4.956100871165362e+02 3.402578943089165e+02 4 7.149293662219835e+02 2.392063136515561e+02 6.200328237335323e+02 -2.635595486580261e+02 - ME 2.133207113512388e-03 + ME 9.157902172934166e-04 Event 172 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1380,7 +1380,7 @@ Event 172 Batch 0 2 5.352445157558213e+02 -2.018352690102651e+02 3.892440882325296e+02 -3.069825004886504e+02 3 6.716112180685394e+02 2.825227203806547e+02 -5.978593235713698e+02 1.175022124175027e+02 4 2.931442661756383e+02 -8.068745137038898e+01 2.086152353388391e+02 1.894802880711483e+02 - ME 2.630379932615259e-05 + ME 8.067092159940342e-06 Event 173 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1388,7 +1388,7 @@ Event 173 Batch 0 2 6.571348515648592e+02 -2.769863586381786e+02 5.805753619381593e+02 1.343019708712704e+02 3 5.332990408103321e+02 1.871824832342877e+02 -4.782426732337677e+02 1.437168410371092e+02 4 3.095661076248081e+02 8.980387540389081e+01 -1.023326887043915e+02 -2.780188119083794e+02 - ME 9.985413945498126e-03 + ME 1.269359653092767e-04 Event 174 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1396,7 +1396,7 @@ Event 174 Batch 0 2 6.091496911716730e+02 -4.752584064243671e+02 3.135726231883978e+01 -3.797492797588730e+02 3 6.417481529658018e+02 3.309293137608124e+02 9.015643604119191e+01 5.424004960996682e+02 4 2.491021558625255e+02 1.443290926635548e+02 -1.215136983600317e+02 -1.626512163407953e+02 - ME 1.319192968737130e-03 + ME 1.362612102685676e-04 Event 175 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1404,7 +1404,7 @@ Event 175 Batch 0 2 5.399801778396885e+02 1.966672297646830e+02 2.343185748302537e+02 -4.449667388535759e+02 3 6.987953575798327e+02 -1.857207036318898e+02 -9.664246188148675e+01 6.666955876403318e+02 4 2.612244645804785e+02 -1.094652613279307e+01 -1.376761129487668e+02 -2.217288487867561e+02 - ME 9.528877211334405e-03 + ME 9.613528518728674e-04 Event 176 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1412,7 +1412,7 @@ Event 176 Batch 0 2 6.615757321243968e+02 -4.129469954321281e+02 4.686878756164518e+02 -2.179194886871010e+02 3 1.607981401590110e+02 -6.355407199259605e+01 7.929314438200207e+00 1.474925346731048e+02 4 6.776261277165921e+02 4.765010674247242e+02 -4.766171900546519e+02 7.042695401399614e+01 - ME 6.965204353376922e-04 + ME 3.097907077728356e-04 Event 177 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1420,7 +1420,7 @@ Event 177 Batch 0 2 4.314334067424883e+02 -3.493619040652741e+02 -2.026482683689240e+01 -2.523299055494341e+02 3 4.840006500668400e+02 -1.846595828310067e+02 -1.450727057198388e+02 4.232155216776995e+02 4 5.845659431906716e+02 5.340214868962809e+02 1.653375325567312e+02 -1.708856161282654e+02 - ME 2.160100049311594e-04 + ME 1.084300812640113e-04 Event 178 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1428,7 +1428,7 @@ Event 178 Batch 0 2 4.528135981327372e+02 -2.544528544607913e+02 1.436928116455424e+02 3.458992272209776e+02 3 3.053350882587867e+02 -1.380299578048218e+02 2.072032295570572e+02 1.767599177741536e+02 4 7.418513136084770e+02 3.924828122656132e+02 -3.508960412025996e+02 -5.226591449951313e+02 - ME 7.384409254828141e-02 + ME 5.382438151181503e-02 Event 179 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1436,7 +1436,7 @@ Event 179 Batch 0 2 7.433145319259943e+02 -2.538538580850882e+02 -6.778753511348521e+02 -1.689962142519080e+02 3 1.647945947160298e+02 1.009041857568576e+02 1.171651165877689e+02 5.699069397138987e+01 4 5.918908733579761e+02 1.529496723282306e+02 5.607102345470832e+02 1.120055202805181e+02 - ME 1.335347052581446e-04 + ME 3.739915465576335e-05 Event 180 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1444,7 +1444,7 @@ Event 180 Batch 0 2 2.396120216689867e+02 1.204528233788652e+02 -1.081248155319049e+02 1.766750195544080e+02 3 5.541470271917004e+02 2.767127195685322e+02 2.999096875483201e+02 3.749175614572557e+02 4 7.062409511393131e+02 -3.971655429473975e+02 -1.917848720164151e+02 -5.515925810116636e+02 - ME 1.316593054412419e-02 + ME 2.792447184071457e-03 Event 181 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1452,7 +1452,7 @@ Event 181 Batch 0 2 2.165494222755782e+02 1.336973493521793e+02 -1.495065670853883e+02 -8.164837697364385e+01 3 6.960869932595207e+02 -2.848973600545249e+02 2.209041937252092e+01 6.347303441548928e+02 4 5.873635844649011e+02 1.512000107023455e+02 1.274161477128675e+02 -5.530819671812490e+02 - ME 6.164296623062663e-02 + ME 3.488874737600980e-03 Event 182 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1460,7 +1460,7 @@ Event 182 Batch 0 2 6.472681881349898e+02 4.279258056181361e+02 3.994050733201775e+02 -2.762448183472868e+02 3 5.337197582091030e+02 -3.479343829022644e+02 -4.034091782989213e+02 -3.254965992745409e+01 4 3.190120536559070e+02 -7.999142271587166e+01 4.004104978744005e+00 3.087944782747408e+02 - ME 6.393158381765308e-05 + ME 5.523679400573375e-05 Event 183 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1468,7 +1468,7 @@ Event 183 Batch 0 2 6.165307808531154e+02 -3.276949594572818e+02 8.808524820164887e+01 -5.147496540405800e+02 3 2.975460412740734e+02 -1.030095950018341e+02 -2.375020297789284e+02 1.466814775843215e+02 4 5.859231778728107e+02 4.307045544591158e+02 1.494167815772794e+02 3.680681764562588e+02 - ME 6.887775529805495e-05 + ME 2.562496117427957e-05 Event 184 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1476,7 +1476,7 @@ Event 184 Batch 0 2 5.645337360463252e+02 -3.940276919793660e+02 3.776398996283964e+02 1.443212503288767e+02 3 5.368100353438223e+02 2.392766596964613e+02 -1.719264331693737e+02 -4.487237410122139e+02 4 3.986562286098531e+02 1.547510322829050e+02 -2.057134664590229e+02 3.044024906833372e+02 - ME 3.553984578535888e-05 + ME 1.712138666139329e-05 Event 185 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1484,7 +1484,7 @@ Event 185 Batch 0 2 6.347397779710931e+02 2.522092504724420e+02 -1.599825720327363e+02 5.600809373302327e+02 3 4.566768168089404e+02 -3.359958684022406e+02 -1.272903681003782e+02 -2.818823400219340e+02 4 4.085834052199659e+02 8.378661792979838e+01 2.872729401331145e+02 -2.781985973082986e+02 - ME 1.184197550833168e-03 + ME 1.836859309200860e-04 Event 186 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1492,7 +1492,7 @@ Event 186 Batch 0 2 7.089823220133230e+02 -5.197119220861886e+02 4.248734840868308e+02 -2.281183322067745e+02 3 5.364076825758043e+02 3.588264146200084e+02 -3.973752875032956e+02 3.270606945152315e+01 4 2.546099954108725e+02 1.608855074661802e+02 -2.749819658353518e+01 1.954122627552515e+02 - ME 2.583895514537347e-05 + ME 1.318469173008218e-05 Event 187 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1500,7 +1500,7 @@ Event 187 Batch 0 2 4.835105223217566e+02 -2.128653471696258e+02 1.375287019182911e+02 -4.117725407538514e+02 3 7.240136612790383e+02 4.407273454759851e+02 -4.896543389042274e+01 5.723264583716990e+02 4 2.924758163992057e+02 -2.278619983063593e+02 -8.856326802786833e+01 -1.605539176178473e+02 - ME 5.307563978210835e-04 + ME 9.185777086042985e-05 Event 188 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1508,7 +1508,7 @@ Event 188 Batch 0 2 6.611118500396009e+02 3.502021063704277e+02 -2.011693879247277e+02 -5.234102027267809e+02 3 3.072944371702247e+02 -6.894916504330918e+01 -1.599953986835475e+02 2.531350551695447e+02 4 5.315937127901742e+02 -2.812529413271184e+02 3.611647866082752e+02 2.702751475572362e+02 - ME 6.863567490702385e-05 + ME 3.862980709292737e-05 Event 189 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1516,7 +1516,7 @@ Event 189 Batch 0 2 7.498478362545707e+02 6.780504955298834e+02 -3.199144947524264e+02 -1.319162971889924e+01 3 3.253008430749361e+02 -2.985087551774363e+02 1.291384938207140e+02 6.034152914782593e+00 4 4.248513206704935e+02 -3.795417403524470e+02 1.907760009317124e+02 7.157476804116639e+00 - ME 8.583750584152986e-05 + ME 1.504471760657040e-05 Event 190 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1524,7 +1524,7 @@ Event 190 Batch 0 2 4.938867893347995e+02 3.689671478502748e+02 -1.218724623869293e+02 3.048516153777389e+02 3 5.264063001598521e+02 6.631942569346465e+01 1.276367949726208e+02 -5.063735530147588e+02 4 4.797069105053494e+02 -4.352865735437401e+02 -5.764332585691415e+00 2.015219376370201e+02 - ME 4.759343488474735e-05 + ME 2.269926034328256e-05 Event 191 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1532,7 +1532,7 @@ Event 191 Batch 0 2 3.681793141805986e+02 -3.225132888415706e+02 1.579589482507471e+02 -8.117977937027918e+01 3 5.431126642386394e+02 4.058413736814005e+01 9.147123993851424e+01 5.338139246166097e+02 4 5.887080215807621e+02 2.819291514734305e+02 -2.494301881892614e+02 -4.526341452463304e+02 - ME 4.908990110546420e-03 + ME 1.427494731558637e-03 Event 192 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1540,7 +1540,7 @@ Event 192 Batch 0 2 6.054165399887861e+02 1.497087111729466e+02 8.905021611535379e+01 5.798159601983524e+02 3 2.106656439489222e+02 1.451894976721945e+02 -1.487249448604451e+02 3.436443048222171e+01 4 6.839178160622922e+02 -2.948982088451411e+02 5.967472874509133e+01 -6.141803906805740e+02 - ME 4.294450320853435e-02 + ME 6.984876913518998e-03 Event 193 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1548,7 +1548,7 @@ Event 193 Batch 0 2 2.753169163933055e+02 -1.695475157411122e+02 -2.139406274107579e+02 3.581134319495643e+01 3 5.760219428901971e+02 -3.264616044953138e+02 1.527507522369444e+02 -4.493231656306969e+02 4 6.486611407164972e+02 4.960091202364260e+02 6.118987517381347e+01 4.135118224357404e+02 - ME 1.537583375796735e-04 + ME 4.273063058931925e-05 Event 194 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1556,7 +1556,7 @@ Event 194 Batch 0 2 3.445934948105150e+02 -2.970257025567896e+02 -8.183019525038441e+01 1.543509890854414e+02 3 7.485441862377920e+02 6.623797851941252e+02 1.083400559332054e+02 -3.314119056355291e+02 4 4.068623189516925e+02 -3.653540826373358e+02 -2.650986068282081e+01 1.770609165500877e+02 - ME 3.024610065690235e-05 + ME 4.921158833271929e-06 Event 195 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1564,7 +1564,7 @@ Event 195 Batch 0 2 2.012122274303647e+02 -5.190018365965096e+01 1.322177369426910e+02 -1.425173724194237e+02 3 7.122630330184543e+02 -3.054768058087834e+02 -2.528097616133813e+02 5.916838461125119e+02 4 5.865247395511832e+02 3.573769894684365e+02 1.205920246706904e+02 -4.491664736930883e+02 - ME 3.011639483286710e-03 + ME 4.696445912229638e-04 Event 196 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1572,7 +1572,7 @@ Event 196 Batch 0 2 4.490485793345989e+02 3.485190427929747e+02 -2.661098616642627e+01 -2.819059396826192e+02 3 5.531554978829222e+02 -3.330165694254377e+02 4.416170126965178e+02 7.442003978758296e+00 4 4.977959227824785e+02 -1.550247336753688e+01 -4.150060265300915e+02 2.744639357038610e+02 - ME 4.340266456570635e-05 + ME 9.363355109875406e-06 Event 197 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1580,7 +1580,7 @@ Event 197 Batch 0 2 3.951249254444253e+02 -2.278358800090239e+02 3.101157211704546e+02 -8.968142489336992e+01 3 3.607080640108546e+02 -2.889948719219027e+02 2.155030307719242e+02 -1.227661082778765e+01 4 7.441670105447209e+02 5.168307519309257e+02 -5.256187519423792e+02 1.019580357211576e+02 - ME 3.377741088449004e-02 + ME 6.597373610109231e-03 Event 198 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1588,7 +1588,7 @@ Event 198 Batch 0 2 3.750236904637998e+02 1.183014344420310e+02 -1.005952209347265e+02 -3.413621838211424e+02 3 4.381296266085964e+02 -2.726825461625328e+02 1.003845461170281e+02 -3.279096546785175e+02 4 6.868466829276033e+02 1.543811117205018e+02 2.106748176980602e-01 6.692718384996598e+02 - ME 9.606390506705955e-04 + ME 6.145502577419889e-04 Event 199 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1596,7 +1596,7 @@ Event 199 Batch 0 2 2.454478562244572e+02 -2.058455361543722e+02 -1.131056012155068e+02 -7.126982772660261e+01 3 5.321797086694488e+02 -9.806778012582416e+01 -4.820333037417012e+02 -2.030808875905193e+02 4 7.223724351060940e+02 3.039133162801963e+02 5.951389049572081e+02 2.743507153171219e+02 - ME 1.577081887352965e-03 + ME 3.088173795554332e-04 Event 200 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1604,7 +1604,7 @@ Event 200 Batch 0 2 3.952431318363244e+02 3.031309873729303e+02 9.337877017948550e+01 2.358159092128122e+02 3 6.094031244332663e+02 -7.796753338981905e+01 -5.315426896439308e+02 -2.876727322709444e+02 4 4.953537437304092e+02 -2.251634539831113e+02 4.381639194644453e+02 5.185682305813224e+01 - ME 6.703240553489506e-05 + ME 1.668296552597111e-05 Event 201 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1612,7 +1612,7 @@ Event 201 Batch 0 2 6.497938633639732e+02 3.771120671245744e+02 3.553445817627057e+02 -3.921081252746440e+02 3 3.369790646193914e+02 -2.140351778515325e+02 1.061239955238163e+02 2.376584318047305e+02 4 5.132270720166357e+02 -1.630768892730420e+02 -4.614685772865220e+02 1.544496934699135e+02 - ME 6.283412004793947e-05 + ME 2.404518058628388e-05 Event 202 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1620,7 +1620,7 @@ Event 202 Batch 0 2 7.267802742470179e+02 6.523432021666289e+02 -1.481957728499301e+02 2.840702844913056e+02 3 3.546086620137576e+02 -3.102429173963679e+02 -5.939291787501398e+01 -1.611493614224694e+02 4 4.186110637392242e+02 -3.421002847702610e+02 2.075886907249440e+02 -1.229209230688360e+02 - ME 1.894138330341389e-04 + ME 2.830403199974809e-05 Event 203 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1628,7 +1628,7 @@ Event 203 Batch 0 2 4.830190702985662e+02 2.789429895135886e+02 -3.943102945050296e+02 -4.197918611657844e+00 3 5.247163710833165e+02 -4.266462829986153e+02 3.263988520595893e+01 3.037019215942698e+02 4 4.922645586181170e+02 1.477032934850268e+02 3.616704092990706e+02 -2.995040029826120e+02 - ME 5.831910678002871e-04 + ME 5.153190919865371e-05 Event 204 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1636,7 +1636,7 @@ Event 204 Batch 0 2 6.952375769935185e+02 3.823764713153302e+01 6.531840992713522e+02 -2.350397908115460e+02 3 6.250862947179036e+02 1.031861473443961e+02 -5.506835576815644e+02 2.771878679515999e+02 4 1.796761282885781e+02 -1.414237944759291e+02 -1.025005415897879e+02 -4.214807714005369e+01 - ME 1.802858800889920e-04 + ME 1.903000177287069e-05 Event 205 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1644,7 +1644,7 @@ Event 205 Batch 0 2 5.625197268936781e+02 2.955060596751036e+02 4.395356105446072e+02 -1.895074112086703e+02 3 3.144813194259642e+02 -1.941101430078122e+02 -7.073026664887073e+00 -2.473251401357733e+02 4 6.229989536803572e+02 -1.013959166672914e+02 -4.324625838797200e+02 4.368325513444433e+02 - ME 1.140145509231641e-04 + ME 3.163472493443465e-05 Event 206 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1652,7 +1652,7 @@ Event 206 Batch 0 2 5.487698581700869e+02 -4.771827558939671e+02 -2.639484985605369e+02 6.145050708573941e+01 3 4.357856725513919e+02 1.877155863290790e+02 1.701172104948722e+02 3.545872893148349e+02 4 5.154444692785200e+02 2.894671695648880e+02 9.383128806566407e+01 -4.160377964005746e+02 - ME 4.167786087259531e-03 + ME 3.341888001113221e-04 Event 207 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1660,7 +1660,7 @@ Event 207 Batch 0 2 5.289473514933904e+02 -3.230637718239221e+02 -3.258094337294262e+02 2.631792409740627e+02 3 3.730441408755686e+02 -1.145152671243400e+02 -7.298530142052728e+01 -3.474497523579300e+02 4 5.980085076310412e+02 4.375790389482623e+02 3.987947351499535e+02 8.427051138386733e+01 - ME 1.161501350367753e-04 + ME 3.789028948405571e-05 Event 208 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1668,7 +1668,7 @@ Event 208 Batch 0 2 3.144460531270953e+02 3.105028133645123e+02 -3.495125011961062e+01 3.525242310830974e+01 3 7.230517599976935e+02 -6.554206809343713e+02 2.220922910679198e+02 2.095294558946058e+02 4 4.625021868752117e+02 3.449178675698588e+02 -1.871410409483092e+02 -2.447818790029155e+02 - ME 4.858457850437588e-04 + ME 2.941989209837521e-05 Event 209 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1676,7 +1676,7 @@ Event 209 Batch 0 2 2.827014058170527e+02 -6.682954863774688e+01 -1.958656753088385e+02 -1.925890275057887e+02 3 5.969812148172332e+02 5.625717004655273e+02 1.060136244597389e+02 -1.692949027847388e+02 4 6.203173793657136e+02 -4.957421518277804e+02 8.985205084909943e+01 3.618839302905275e+02 - ME 1.004351001266980e-04 + ME 2.261939336541961e-05 Event 210 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1684,7 +1684,7 @@ Event 210 Batch 0 2 3.369223392964550e+02 -2.366581006943837e+02 8.850719545688517e+01 -2.228813191927023e+02 3 6.926279093100447e+02 9.835546321295956e+01 -1.581805884470998e+02 6.671120783270956e+02 4 4.704497513935005e+02 1.383026374814242e+02 6.967339299021461e+01 -4.442307591343933e+02 - ME 5.974710408786874e-02 + ME 3.044010300440331e-03 Event 211 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1692,7 +1692,7 @@ Event 211 Batch 0 2 5.754314663824422e+02 -1.965408456680789e+02 -5.399725108422632e+02 3.037689947684008e+01 3 6.656941886103589e+02 4.112771407945243e+02 5.114655840792436e+02 1.113679599883347e+02 4 2.588743450071987e+02 -2.147362951264454e+02 2.850692676301957e+01 -1.417448594651748e+02 - ME 4.382347812376007e-04 + ME 1.754510489093768e-05 Event 212 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1700,7 +1700,7 @@ Event 212 Batch 0 2 5.922157374848572e+02 8.073316194509509e+00 4.947261155542873e+02 -3.254233732830556e+02 3 3.635572903001510e+02 8.951663862813328e+01 4.011175755255380e+01 3.500738802669425e+02 4 5.442269722149914e+02 -9.758995482264278e+01 -5.348378731068407e+02 -2.465050698388706e+01 - ME 3.041427876287276e-04 + ME 1.919214373141161e-04 Event 213 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1708,7 +1708,7 @@ Event 213 Batch 0 2 7.434820262506830e+02 2.991548764052629e+02 2.111623598614188e+02 -6.470566753063675e+02 3 5.607612173038236e+02 -2.664197873565705e+02 -1.905271140771768e+02 4.551626726109781e+02 4 1.957567564454930e+02 -3.273508904869271e+01 -2.063524578424195e+01 1.918940026953895e+02 - ME 1.827786070323022e-04 + ME 1.896082550340891e-04 Event 214 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1716,7 +1716,7 @@ Event 214 Batch 0 2 5.400874280734793e+02 3.457358963402696e+02 2.445843697627679e+02 -3.351710101016577e+02 3 3.400793067879315e+02 1.482066942304564e+02 1.256466447865830e+02 2.791086371729012e+02 4 6.198332651385892e+02 -4.939425905707261e+02 -3.702310145493508e+02 5.606237292875651e+01 - ME 1.356968066378560e-04 + ME 6.515553919952984e-05 Event 215 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1724,7 +1724,7 @@ Event 215 Batch 0 2 3.916345321859864e+02 3.271767110560381e+02 -1.945589530122144e+02 9.208594000107233e+01 3 6.136750729169615e+02 -1.269585669220027e+02 2.644680756040779e+02 -5.390132228350478e+02 4 4.946903948970534e+02 -2.002181441340350e+02 -6.990912259186331e+01 4.469272828339764e+02 - ME 6.207321332343461e-05 + ME 3.427926940877871e-05 Event 216 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1732,7 +1732,7 @@ Event 216 Batch 0 2 3.767411090262154e+02 1.602503356822860e+02 2.758455349572533e+02 -2.004069210086422e+02 3 4.061922956351256e+02 3.340053729931861e+02 2.237650079776778e+02 5.798114391563544e+01 4 7.170665953386593e+02 -4.942557086754721e+02 -4.996105429349309e+02 1.424257770930068e+02 - ME 1.232271832865728e-03 + ME 2.360785017217177e-04 Event 217 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1740,7 +1740,7 @@ Event 217 Batch 0 2 6.474118977458852e+02 -5.378641111590873e+02 -3.279650037002520e+02 1.492759847325320e+02 3 5.088298200539713e+02 3.261878344469131e+02 1.555821256186315e+02 -3.581947579501665e+02 4 3.437582822001433e+02 2.116762767121744e+02 1.723828780816206e+02 2.089187732176345e+02 - ME 3.357118960820415e-05 + ME 1.388331578224744e-05 Event 218 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1748,7 +1748,7 @@ Event 218 Batch 0 2 6.658501161076259e+02 -6.577627036244854e+02 -3.020200479570956e+01 9.895676706252418e+01 3 2.516345839620714e+02 1.565221509782131e+02 -1.156477271957936e+02 1.595192254662914e+02 4 5.825152999303023e+02 5.012405526462722e+02 1.458497319915031e+02 -2.584759925288157e+02 - ME 5.956187308313417e-04 + ME 1.036808356896783e-04 Event 219 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1756,7 +1756,7 @@ Event 219 Batch 0 2 4.328556070633435e+02 6.122246558068494e+01 -1.687441385117925e+02 3.938796795879554e+02 3 6.500677455605621e+02 -3.703058656885360e+02 4.356876543064814e+02 -3.092537914719426e+02 4 4.170766473760945e+02 3.090834001078509e+02 -2.669435157946888e+02 -8.462588811601287e+01 - ME 2.797067114354785e-04 + ME 9.046106878448173e-05 Event 220 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1764,7 +1764,7 @@ Event 220 Batch 0 2 3.686297280598666e+02 -3.497113779929074e+02 -8.765282776369953e+01 7.685577594963354e+01 3 4.155522773953191e+02 -1.777404948015450e+02 -1.525848366500187e+02 3.432344379292750e+02 4 7.158179945448145e+02 5.274518727944524e+02 2.402376644137182e+02 -4.200902138789084e+02 - ME 3.485410710153060e-03 + ME 1.676729229638681e-03 Event 221 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1772,7 +1772,7 @@ Event 221 Batch 0 2 5.295220830718469e+02 3.654688468413813e+01 4.204675060608333e+02 3.197890523886257e+02 3 7.127556392876786e+02 -1.727486268095863e+02 -4.342549693537605e+02 -5.381460163035255e+02 4 2.577222776404743e+02 1.362017421254481e+02 1.378746329292729e+01 2.183569639148998e+02 - ME 2.819264207321091e-05 + ME 2.031931825964470e-05 Event 222 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1780,7 +1780,7 @@ Event 222 Batch 0 2 2.464305981122427e+02 -2.054199106396077e+02 6.127423271580306e+01 1.215572638876956e+02 3 6.926647117218595e+02 4.702892479611936e+02 3.872350261814336e+02 -3.296383785530530e+02 4 5.609046901658980e+02 -2.648693373215859e+02 -4.485092588972366e+02 2.080811146653574e+02 - ME 6.319142394583372e-05 + ME 1.678695785515194e-05 Event 223 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1788,7 +1788,7 @@ Event 223 Batch 0 2 2.463384302181125e+02 -1.209251938955738e+02 -2.140981972257043e+02 -1.488897673935926e+01 3 6.819620845265065e+02 -2.400891875757811e+02 5.819023806457059e+02 2.623339210620683e+02 4 5.716994852553812e+02 3.610143814713547e+02 -3.678041834200016e+02 -2.474449443227091e+02 - ME 3.931927185620913e-04 + ME 4.810915220985587e-05 Event 224 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1796,7 +1796,7 @@ Event 224 Batch 0 2 2.236851263016067e+02 -8.671871524968952e+01 1.717231909970332e+02 1.141317038679677e+02 3 5.308972974363861e+02 -3.715833295102001e+01 4.680039348616383e+02 2.478780257941054e+02 4 7.454175762620068e+02 1.238770482007099e+02 -6.397271258586715e+02 -3.620097296620728e+02 - ME 8.708656265179471e-02 + ME 6.017706528853119e-02 Event 225 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1804,7 +1804,7 @@ Event 225 Batch 0 2 5.094176014319268e+02 1.569347096242780e+02 -1.561291130928888e+00 -4.846394040251013e+02 3 7.252311334449815e+02 -3.845161955462210e+02 -4.374219820797174e+01 6.133466494377277e+02 4 2.653512651230916e+02 2.275814859219426e+02 4.530348933890067e+01 -1.287072454126262e+02 - ME 3.974215742688118e-04 + ME 1.151501859389029e-04 Event 226 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1812,7 +1812,7 @@ Event 226 Batch 0 2 6.863217264048350e+02 -2.391756120967483e+02 -6.171186323675804e+02 1.816511279850093e+02 3 5.332348374442744e+02 1.096335504493486e+02 4.112484130583279e+02 -3.212391931833643e+02 4 2.804434361508906e+02 1.295420616473995e+02 2.058702193092524e+02 1.395880651983551e+02 - ME 3.797053871351767e-05 + ME 1.438206074993319e-05 Event 227 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1820,7 +1820,7 @@ Event 227 Batch 0 2 7.243206345463230e+02 -5.280189925476210e+02 -1.406011303275692e+02 4.754657162080069e+02 3 5.487499634657129e+02 3.840442912861271e+02 -1.353123555187442e+01 -3.917312987222202e+02 4 2.269294019879644e+02 1.439747012614939e+02 1.541323658794436e+02 -8.373441748578679e+01 - ME 2.903986554770466e-04 + ME 5.165623507180856e-05 Event 228 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1828,7 +1828,7 @@ Event 228 Batch 0 2 2.119578664379945e+02 1.625437651479949e+01 -1.806612394559917e+02 1.096514885776142e+02 3 6.254097456672617e+02 -3.200704000326812e+01 3.158243706171928e+02 5.388579277416935e+02 4 6.626323878947439e+02 1.575266348846865e+01 -1.351631311612011e+02 -6.485094163193077e+02 - ME 8.951233069377997e-01 + ME 3.800526374221887e-02 Event 229 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1836,7 +1836,7 @@ Event 229 Batch 0 2 5.921227120343664e+02 -3.877491982207575e+02 4.449193714386763e+02 -4.802726626309342e+01 3 4.688278331283221e+02 3.470549659129084e+02 -1.517581364471262e+02 -2.762641051115459e+02 4 4.390494548373113e+02 4.069423230784909e+01 -2.931612349915501e+02 3.242913713746393e+02 - ME 3.492131538818778e-05 + ME 1.250052930035257e-05 Event 230 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1844,7 +1844,7 @@ Event 230 Batch 0 2 4.261952284727868e+02 2.153699775439378e+02 -1.171086083390750e+02 3.486312082969335e+02 3 3.540619701921573e+02 3.070144260847319e+01 1.307424531367546e+02 3.276029778648147e+02 4 7.197428013350559e+02 -2.460714201524109e+02 -1.363384479767965e+01 -6.762341861617483e+02 - ME 3.186738302883428e-01 + ME 4.711214236813061e-02 Event 231 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1852,7 +1852,7 @@ Event 231 Batch 0 2 4.205236024420392e+02 7.533931576750228e+01 -3.260217181731272e+02 -2.547036061581322e+02 3 5.397543491930860e+02 8.423195081267914e+01 -1.158376015978276e+02 5.204050211049134e+02 4 5.397220483648740e+02 -1.595712665801811e+02 4.418593197709548e+02 -2.657014149467809e+02 - ME 5.532186388062512e-04 + ME 3.265984123744224e-04 Event 232 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1860,7 +1860,7 @@ Event 232 Batch 0 2 4.295782852421121e+02 3.239064445356881e+02 9.240815775655221e-01 2.821724019337124e+02 3 7.183371274312143e+02 -6.155391061575082e+02 -1.955291718271078e+02 -3.144649112405858e+02 4 3.520845873266736e+02 2.916326616218201e+02 1.946050902495422e+02 3.229250930687335e+01 - ME 6.730603828970119e-05 + ME 1.049779024540051e-05 Event 233 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1868,7 +1868,7 @@ Event 233 Batch 0 2 3.640046126075324e+02 -2.220120664068515e+02 -1.165482463207536e+02 2.638683509799470e+02 3 4.682121509308883e+02 -1.009786196736112e+02 3.762431872847591e+02 2.597441061312976e+02 4 6.677832364615790e+02 3.229906860804628e+02 -2.596949409640055e+02 -5.236124571112447e+02 - ME 5.385640989777132e-03 + ME 7.598357868514145e-04 Event 234 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1876,7 +1876,7 @@ Event 234 Batch 0 2 8.690043548936441e+01 -2.607433849884744e+01 -7.258333015587984e+01 4.004341073848801e+01 3 6.785651905172676e+02 -3.574930335951373e+02 -4.725723606052789e+01 5.748184081539155e+02 4 7.345343739933678e+02 3.835673720939847e+02 1.198405662164078e+02 -6.148618188924036e+02 - ME 1.962113644780599e-01 + ME 8.152211059226219e-02 Event 235 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1884,7 +1884,7 @@ Event 235 Batch 0 2 3.000566282865331e+02 1.219146462304108e+01 -2.126850238006026e+02 2.113064812540423e+02 3 7.160981218147422e+02 2.575873756248088e+02 2.779062108697769e+02 -6.076293293985470e+02 4 4.838452498987246e+02 -2.697788402478500e+02 -6.522118706917435e+01 3.963228481445046e+02 - ME 3.940402333844027e-05 + ME 2.498899672933017e-05 Event 236 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1892,7 +1892,7 @@ Event 236 Batch 0 2 1.510518772182422e+02 -9.497518588910037e+01 1.467158067736534e+01 1.165380984781943e+02 3 6.955499852411461e+02 5.933480346078575e+02 3.495450158124774e+02 9.770452249822526e+01 4 6.533981375406115e+02 -4.983728487187572e+02 -3.642165964898426e+02 -2.142426209764196e+02 - ME 1.121647028585911e-03 + ME 2.623118294900277e-04 Event 237 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1900,7 +1900,7 @@ Event 237 Batch 0 2 2.173874152942701e+02 2.069918593916189e+02 -3.850229167793934e+01 -5.412237993169356e+01 3 7.305677895866185e+02 -6.701932224704495e+02 -2.421540700080861e+02 1.610333695687662e+02 4 5.520447951191120e+02 4.632013630788306e+02 2.806563616860255e+02 -1.069109896370727e+02 - ME 1.822378225061386e-04 + ME 2.170005261464319e-05 Event 238 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1908,7 +1908,7 @@ Event 238 Batch 0 2 6.349573912113930e+02 -3.336495545457479e+02 -4.785400196851591e+02 2.506956580500139e+02 3 5.768887318987100e+02 4.812119270965607e+02 2.334547330568691e+02 -2.161818165921041e+02 4 2.881538768898968e+02 -1.475623725508129e+02 2.450852866282900e+02 -3.451384145790988e+01 - ME 9.810731053503000e-05 + ME 1.383744831772315e-05 Event 239 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1916,7 +1916,7 @@ Event 239 Batch 0 2 5.349076725903783e+02 -5.331874414268931e+02 1.887721601290929e+01 -3.848403846142781e+01 3 3.658437465440003e+02 8.335465236419728e+01 1.670818061666301e+01 -3.558292926602242e+02 4 5.992485808656214e+02 4.498327890626960e+02 -3.558539662957234e+01 3.943133311216517e+02 - ME 9.226736931333760e-05 + ME 2.560110521983184e-05 Event 240 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1924,7 +1924,7 @@ Event 240 Batch 0 2 2.870582387324442e+02 1.830793600232297e+02 -1.562409872742485e+02 1.564389154054251e+02 3 6.007192677438852e+02 3.433229388031108e+02 4.688113613010560e+02 -1.523446941819630e+02 4 6.122224935236703e+02 -5.264022988263405e+02 -3.125703740268075e+02 -4.094221223461989e+00 - ME 1.424405912705748e-04 + ME 3.548113744927254e-05 Event 241 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1932,7 +1932,7 @@ Event 241 Batch 0 2 7.424696267657401e+02 4.823783107714221e+02 2.498315161211407e+02 5.061190823507636e+02 3 2.455726236162737e+02 -1.827879695947952e+02 -1.199757723946156e+02 -1.118046764652876e+02 4 5.119577496179861e+02 -2.995903411766270e+02 -1.298557437265251e+02 -3.943144058854759e+02 - ME 2.705973755259623e-03 + ME 2.366266620918590e-04 Event 242 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1940,7 +1940,7 @@ Event 242 Batch 0 2 7.249130370348905e+02 1.676828147928013e+02 6.059046362201677e+02 -3.609168279440810e+02 3 6.240672718074169e+02 -4.529413961306761e+01 -5.490982345027019e+02 2.930862151720549e+02 4 1.510196911576933e+02 -1.223886751797337e+02 -5.680640171746593e+01 6.783061277202641e+01 - ME 4.587322306592483e-05 + ME 1.668420503127583e-05 Event 243 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1948,7 +1948,7 @@ Event 243 Batch 0 2 4.655090712555229e+02 2.096323612054770e+02 2.113490506800235e+02 3.578890153850057e+02 3 5.764797256412519e+02 6.697224883641857e+01 -5.382210340689440e+02 -1.953502251008744e+02 4 4.580112031032257e+02 -2.766046100418949e+02 3.268719833889206e+02 -1.625387902841314e+02 - ME 2.309042201876567e-04 + ME 3.999521919602606e-05 Event 244 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1956,7 +1956,7 @@ Event 244 Batch 0 2 5.237109195354749e+02 1.305098338947756e+02 -4.868141165486322e+02 -1.423106687020528e+02 3 5.804450110242352e+02 -4.045654344879671e+02 2.643676733537771e+02 3.214855413949400e+02 4 3.958440694402901e+02 2.740556005931916e+02 2.224464431948551e+02 -1.791748726928872e+02 - ME 2.644202232750943e-04 + ME 2.634847163425152e-05 Event 245 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1964,7 +1964,7 @@ Event 245 Batch 0 2 2.629169357520612e+02 2.457511487795889e+02 -4.402365929491729e+01 -8.242333044139184e+01 3 6.931386101565748e+02 -5.195573187661655e+02 4.004017488088275e+02 -2.240084037645317e+02 4 5.439444540913644e+02 2.738061699865766e+02 -3.563780895139104e+02 3.064317342059234e+02 - ME 4.288053786412853e-05 + ME 1.052590061693975e-05 Event 246 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1972,7 +1972,7 @@ Event 246 Batch 0 2 6.300937687157445e+02 -5.459948028041557e+02 3.085954426748102e+02 6.063567799240802e+01 3 1.673910408536145e+02 -3.546130270298926e+01 7.662824936562275e+01 -1.445350060290698e+02 4 7.025151904306430e+02 5.814561055071442e+02 -3.852236920404341e+02 8.389932803666261e+01 - ME 6.282756509154168e-04 + ME 1.915763997923398e-04 Event 247 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1980,7 +1980,7 @@ Event 247 Batch 0 2 2.577847506495701e+02 2.418237207037818e+02 -8.449121421856779e+01 2.890502538162603e+01 3 5.130193185035739e+02 4.381905811488919e+02 1.366496386102691e+02 2.291390669832418e+02 4 7.291959308468561e+02 -6.800143018526737e+02 -5.215842439170134e+01 -2.580440923648679e+02 - ME 4.005872724472581e-03 + ME 1.831864018495938e-03 Event 248 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1988,7 +1988,7 @@ Event 248 Batch 0 2 7.033207479153643e+02 -5.040306065309413e+02 -2.020637997366072e+02 4.469714117975369e+02 3 1.758360012551320e+02 -1.471306652922549e+01 -4.035460943683606e+00 -1.751728862172264e+02 4 6.208432508295037e+02 5.187436730601667e+02 2.060992606802909e+02 -2.717985255803103e+02 - ME 5.592865021063005e-04 + ME 1.512538512828554e-04 Event 249 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1996,7 +1996,7 @@ Event 249 Batch 0 2 3.018816177222694e+02 5.523075638651412e+01 1.752331212074551e+02 2.395316845419020e+02 3 6.597415560701297e+02 6.315352823685419e+01 -6.561001191322722e+02 -2.834054254405022e+01 4 5.383768262076012e+02 -1.183842846233684e+02 4.808669979248172e+02 -2.111911419978518e+02 - ME 4.868100986861644e-04 + ME 9.225490912808109e-05 Event 250 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2004,7 +2004,7 @@ Event 250 Batch 0 2 2.166381935101301e+02 -1.289072913913530e+02 -1.189615590004073e+02 -1.271344351215279e+02 3 6.815426093761062e+02 -2.511966318704653e+02 5.323234433390903e+02 3.435583388650892e+02 4 6.018191971137635e+02 3.801039232618182e+02 -4.133618843386827e+02 -2.164239037435611e+02 - ME 3.468666532553966e-04 + ME 6.586594805989363e-05 Event 251 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2012,7 +2012,7 @@ Event 251 Batch 0 2 6.676961532387151e+02 -3.991265595084280e+01 -4.419965947723094e+02 4.988628500443886e+02 3 7.150412702460949e+02 3.921851524844908e+01 5.505653759000154e+02 -4.545587894617490e+02 4 1.172625765151894e+02 6.941407023942340e-01 -1.085687811277060e+02 -4.430406058263954e+01 - ME 5.615833562023813e-04 + ME 4.930952510857648e-05 Event 252 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2020,7 +2020,7 @@ Event 252 Batch 0 2 2.112668789066533e+02 -1.147554660376938e+02 3.364589711187055e+01 -1.741632301749357e+02 3 7.393007599584276e+02 2.529046383258835e+02 -3.593132473314827e+02 5.945576909606565e+02 4 5.494323611349191e+02 -1.381491722881897e+02 3.256673502196121e+02 -4.203944607857206e+02 - ME 2.709805393201018e-03 + ME 3.541023077707110e-04 Event 253 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2028,7 +2028,7 @@ Event 253 Batch 0 2 7.299659304470913e+01 -4.405884533650594e+01 -5.451291667290519e+01 2.038780663930336e+01 3 7.253475305576840e+02 3.245698054519170e+02 -1.402290280555607e+02 -6.333397991328418e+02 4 7.016558763976062e+02 -2.805109601154107e+02 1.947419447284657e+02 6.129519924935382e+02 - ME 6.484723438037138e-04 + ME 3.511004874943257e-04 Event 254 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2036,7 +2036,7 @@ Event 254 Batch 0 2 1.982520535096858e+02 -6.164633378269741e+01 1.773450413210087e+02 -6.365801262063783e+01 3 7.183815394471145e+02 -1.984891252513599e+02 -6.893152145826987e+02 -3.896971029099802e+01 4 5.833664070431995e+02 2.601354590340572e+02 5.119701732616900e+02 1.026277229116358e+02 - ME 9.210498573936143e-05 + ME 1.539519794804785e-05 Event 255 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2044,7 +2044,7 @@ Event 255 Batch 0 2 5.347080663542586e+02 -5.063606624096446e+02 1.592577719822621e+02 6.440929941880935e+01 3 2.475406015289465e+02 -1.856063881081879e+02 3.468010668896048e+00 -1.637516137347836e+02 4 7.177513321167953e+02 6.919670505178326e+02 -1.627257826511582e+02 9.934231431597431e+01 - ME 1.305481727349711e-03 + ME 3.137689362725149e-04 Event 0 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2052,7 +2052,7 @@ Event 0 Batch 1 2 5.775677821222389e+02 4.314431287975208e+02 -2.652567205762379e+02 -2.776332864556192e+02 3 6.023469575940325e+02 -3.228069847179709e+02 5.005558924007591e+02 8.978477890465942e+01 4 3.200852602837275e+02 -1.086361440795499e+02 -2.352991718245218e+02 1.878485075509607e+02 - ME 2.846168667868940e-05 + ME 7.533072458757011e-06 Event 1 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2060,7 +2060,7 @@ Event 1 Batch 1 2 7.241206267812560e+02 3.541578305635416e+02 -4.894807402105655e+02 3.991635230623179e+02 3 7.375567605136832e+02 -3.903081173548693e+02 4.920451519627784e+02 -3.867054653560791e+02 4 3.832261270506111e+01 3.615028679132773e+01 -2.564411752212873e+00 -1.245805770623896e+01 - ME 1.002871021831580e-03 + ME 7.043932941624384e-05 Event 2 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2068,7 +2068,7 @@ Event 2 Batch 1 2 4.849204091734790e+02 2.108660079931152e+02 4.054727376659824e+02 1.620962335024329e+02 3 2.728468517759738e+02 4.961449545460115e+01 2.005017763154939e+02 1.782774356422519e+02 4 7.422327390505470e+02 -2.604805034477164e+02 -6.059745139814763e+02 -3.403736691446848e+02 - ME 2.729395913593408e-02 + ME 1.721146206228212e-02 Event 3 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2076,7 +2076,7 @@ Event 3 Batch 1 2 4.264155576764489e+02 -4.170952165204416e+02 -7.054834331799705e+01 5.370977042744418e+01 3 7.108631972082329e+02 6.832597695609467e+02 -1.727180704166534e+02 -9.301097030017993e+01 4 3.627212451153183e+02 -2.661645530405051e+02 2.432664137346505e+02 3.930119987273574e+01 - ME 5.466137525204964e-05 + ME 5.739226791327231e-06 Event 4 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2084,7 +2084,7 @@ Event 4 Batch 1 2 7.183269968238449e+02 -3.584978055671311e+02 -5.048824553914336e+02 -3.640971079361008e+02 3 7.387431276480253e+02 4.013538934928407e+02 5.036810263913359e+02 3.618865629982628e+02 4 4.292987552812846e+01 -4.285608792570924e+01 1.201429000097643e+00 2.210544937839338e+00 - ME 3.145606575501715e-04 + ME 5.884725836744927e-05 Event 5 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2092,7 +2092,7 @@ Event 5 Batch 1 2 4.529780005473896e+02 -8.443182436392424e+01 4.445408460134587e+02 -2.106590230986445e+01 3 4.683757780543924e+02 -6.076819021151039e+01 -1.335482427838441e+02 -4.448010379662153e+02 4 5.786462213982179e+02 1.452000145754347e+02 -3.109926032296145e+02 4.658669402760799e+02 - ME 8.481958952475706e-05 + ME 2.851579396246287e-05 Event 6 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2100,7 +2100,7 @@ Event 6 Batch 1 2 6.238848262005389e+02 -1.065131260140052e+02 -4.741487807795934e+02 -3.912418229627633e+02 3 1.729069432107234e+02 -1.460869767542721e+02 -8.199113358821990e+01 4.281191710484079e+01 4 7.032082305887380e+02 2.526001027682771e+02 5.561399143678132e+02 3.484299058579224e+02 - ME 4.868510537699180e-04 + ME 1.468701510222534e-04 Event 7 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2108,7 +2108,7 @@ Event 7 Batch 1 2 6.977203086376783e+02 -6.126072843634399e+02 -1.744636661244187e+02 2.847602033865263e+02 3 1.614193396272251e+02 -4.571584237043670e+00 8.497734613495712e+01 -1.371646983269120e+02 4 6.408603517350967e+02 6.171788686004836e+02 8.948631998946138e+01 -1.475955050596143e+02 - ME 3.540796080305845e-04 + ME 9.523334397108766e-05 Event 8 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2116,7 +2116,7 @@ Event 8 Batch 1 2 6.871091945484288e+02 4.059708628308462e+02 2.886614153103366e+02 4.732666173272762e+02 3 5.653302025665631e+02 -2.838835484844413e+02 -7.353399035097291e+01 -4.833229987253825e+02 4 2.475606028850081e+02 -1.220873143464048e+02 -2.151274249593637e+02 1.005638139810634e+01 - ME 8.785466054587446e-05 + ME 3.726341895116938e-05 Event 9 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2124,7 +2124,7 @@ Event 9 Batch 1 2 1.618579955503452e+02 1.385215220188489e+01 1.601201234527701e+02 -1.917484467788566e+01 3 7.196660585644588e+02 -4.527189715496824e+02 -4.214090439733052e+02 3.679391067910628e+02 4 6.184759458851959e+02 4.388668193477974e+02 2.612889205205349e+02 -3.487642621131772e+02 - ME 1.054640649369016e-03 + ME 1.276556148007894e-04 Event 10 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2132,7 +2132,7 @@ Event 10 Batch 1 2 7.832785200561162e+01 1.027681340851886e+01 -7.242726264265977e+01 -2.799877018853974e+01 3 7.448007230566494e+02 2.520540107528716e+02 6.813719334665398e+02 1.641011304445167e+02 4 6.768714249377393e+02 -2.623308241613905e+02 -6.089446708238800e+02 -1.361023602559769e+02 - ME 5.876642887714617e-04 + ME 1.087112534498832e-04 Event 11 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2140,7 +2140,7 @@ Event 11 Batch 1 2 5.478627446486676e+02 2.070882322301630e+02 -4.708081692757452e+02 1.887000762823861e+02 3 6.997827604382593e+02 -4.209013422316021e+02 4.569873120768409e+02 -3.220257264800591e+02 4 2.523544949130733e+02 2.138131100014392e+02 1.382085719890436e+01 1.333256501976729e+02 - ME 2.703695959900953e-05 + ME 7.092902148917371e-06 Event 12 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2148,7 +2148,7 @@ Event 12 Batch 1 2 5.802868936311938e+02 -4.467002255894120e+01 5.211262762381961e+02 -2.513262266832405e+02 3 5.208038834706859e+02 2.151797013176283e+01 -4.993650129388666e+02 -1.463155694111945e+02 4 3.989092228981199e+02 2.315205242717860e+01 -2.176126329932955e+01 3.976417960944350e+02 - ME 5.046437564325244e-04 + ME 4.980323856672599e-04 Event 13 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2156,7 +2156,7 @@ Event 13 Batch 1 2 5.774880087360024e+02 1.576445054854711e+02 5.481077151088400e+02 -9.065617884226717e+01 3 5.915098138161557e+02 -3.018001633277128e+02 -3.808656371901898e+02 3.372564123391869e+02 4 3.310021774478421e+02 1.441556578422419e+02 -1.672420779186502e+02 -2.466002334969197e+02 - ME 1.505341700965184e-03 + ME 5.587942683639647e-05 Event 14 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2164,7 +2164,7 @@ Event 14 Batch 1 2 2.531797527967491e+02 -8.400833666640553e+01 -2.384535242035555e+02 -1.350938161690895e+01 3 5.261064571264828e+02 -1.751971590790252e+02 -3.334570051994592e+02 3.672878780523887e+02 4 7.207137900767681e+02 2.592054957454308e+02 5.719105294030147e+02 -3.537784964354798e+02 - ME 3.373121845959189e-03 + ME 1.659114310450813e-03 Event 15 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2172,7 +2172,7 @@ Event 15 Batch 1 2 4.605848765362425e+02 3.563504404614684e+02 1.735853700506503e+02 2.345653669687875e+02 3 4.216445088607453e+02 1.370719005416187e+02 -3.933730877164850e+02 6.521502736890037e+01 4 6.177706146030118e+02 -4.934223410030871e+02 2.197877176658347e+02 -2.997803943376878e+02 - ME 4.613631402771334e-04 + ME 9.110622752737525e-05 Event 16 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2180,7 +2180,7 @@ Event 16 Batch 1 2 4.972484926572777e+02 -1.474122335888775e+02 -4.748950276275915e+02 -6.399787981958280e-01 3 5.072511849723048e+02 4.846784046822065e+02 1.224000792205880e+02 -8.607455661990267e+01 4 4.955003223704169e+02 -3.372661710933285e+02 3.524949484070036e+02 8.671453541809866e+01 - ME 5.856804747367533e-05 + ME 1.035537635543116e-05 Event 17 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2188,7 +2188,7 @@ Event 17 Batch 1 2 3.182636773520259e+02 -9.176062613973060e+01 -1.890905041641619e+02 2.389906630959087e+02 3 6.376303990615819e+02 -4.240378519397394e+02 2.706855745366566e+02 -3.917827786765570e+02 4 5.441059235863918e+02 5.157984780794702e+02 -8.159507037249479e+01 1.527921155806483e+02 - ME 7.445984612273079e-05 + ME 2.964570775197734e-05 Event 18 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2196,7 +2196,7 @@ Event 18 Batch 1 2 5.532560008158404e+02 -4.148613005881325e+02 1.689647846464811e+02 -3.247047971041214e+02 3 3.650144721835348e+02 -1.597348634907620e+02 -2.160675866909894e+02 2.470529017650751e+02 4 5.817295270006244e+02 5.745961640788944e+02 4.710280204450838e+01 7.765189533904635e+01 - ME 9.119298978738387e-05 + ME 3.148325734685632e-05 Event 19 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2204,7 +2204,7 @@ Event 19 Batch 1 2 3.263687475619531e+02 -1.904667433734991e+02 2.390747946355329e+02 -1.143775398573919e+02 3 7.331345945903582e+02 2.597391859223821e+02 -6.739404183465077e+02 1.258022320965774e+02 4 4.404966578476884e+02 -6.927244254888298e+01 4.348656237109747e+02 -1.142469223918529e+01 - ME 8.793129888044293e-05 + ME 9.665339952809457e-06 Event 20 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2212,7 +2212,7 @@ Event 20 Batch 1 2 9.588718605412237e+01 4.259536217794532e+01 8.056474827260676e+01 -2.982128277051557e+01 3 7.250265356668370e+02 3.120913743414047e+02 -4.446787057645155e+02 4.801284204484703e+02 4 6.790862782790414e+02 -3.546867365193502e+02 3.641139574919093e+02 -4.503071376779550e+02 - ME 3.686389281265799e-03 + ME 6.402422614019696e-04 Event 21 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2220,7 +2220,7 @@ Event 21 Batch 1 2 1.825278201605081e+02 -1.533737674675502e+02 8.574830442242751e+01 4.939757963742074e+01 3 7.183016103669913e+02 1.713205736990392e+02 -6.275703015775031e+02 -3.045685162014731e+02 4 5.991705694725008e+02 -1.794680623148897e+01 5.418219971550755e+02 2.551709365640523e+02 - ME 7.470861105912214e-05 + ME 1.806434468406198e-05 Event 22 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2228,7 +2228,7 @@ Event 22 Batch 1 2 2.349542451120770e+02 9.235159917618290e+01 -2.156570331301489e+02 -1.291214495308476e+01 3 7.360601907662837e+02 -2.182033070539752e+02 6.568866822530020e+02 -2.503433799808774e+02 4 5.289855641216395e+02 1.258517078777923e+02 -4.412296491228531e+02 2.632555249339621e+02 - ME 3.893602972207037e-05 + ME 8.007442232312076e-06 Event 23 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2236,7 +2236,7 @@ Event 23 Batch 1 2 2.350908908124364e+02 -7.377772511691019e+00 -2.298431804723787e+02 -4.884063683135331e+01 3 6.797114625392685e+02 -5.485955088721076e+02 3.603976926464840e+02 1.765336882516069e+02 4 5.851976466482949e+02 5.559732813837987e+02 -1.305545121741055e+02 -1.276930514202538e+02 - ME 2.057468423101862e-04 + ME 3.185713653214173e-05 Event 24 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2244,7 +2244,7 @@ Event 24 Batch 1 2 4.355364173804401e+02 2.538053291625626e+02 -2.665393838801487e+02 -2.328767540869265e+02 3 4.093863144993796e+02 -1.953012891316528e+02 -3.573484670764558e+02 4.191221827828568e+01 4 6.550772681201798e+02 -5.850404003090968e+01 6.238878509566048e+02 1.909645358086408e+02 - ME 1.895168702655672e-04 + ME 3.721637657688893e-05 Event 25 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2252,7 +2252,7 @@ Event 25 Batch 1 2 7.365386968907909e+02 3.875876454009267e+02 3.151568854896985e+02 5.412404333367775e+02 3 5.208510884285567e+02 -2.430585576296288e+02 -1.518636440371932e+02 -4.349089876054084e+02 4 2.426102146806534e+02 -1.445290877712977e+02 -1.632932414525050e+02 -1.063314457313693e+02 - ME 3.717867207603688e-04 + ME 7.982561935336398e-05 Event 26 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2260,7 +2260,7 @@ Event 26 Batch 1 2 7.198867014174701e+02 5.189601929589824e+02 4.797253921416957e+02 -1.370428003807496e+02 3 3.889101953712928e+02 -1.847394503243419e+02 -2.837815501141775e+02 1.912864537085460e+02 4 3.912031032112371e+02 -3.342207426346404e+02 -1.959438420275183e+02 -5.424365332779646e+01 - ME 1.222836766708484e-04 + ME 1.928349098758061e-05 Event 27 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2268,7 +2268,7 @@ Event 27 Batch 1 2 6.732032222628646e+02 5.870808395006010e+02 -9.126179303429218e+01 3.165595544104447e+02 3 1.177373967283342e+02 7.847176641415683e+01 5.304379211899001e+00 -8.761358356661104e+01 4 7.090593810088013e+02 -6.655526059147578e+02 8.595741382239324e+01 -2.289459708438336e+02 - ME 1.603290018002586e-03 + ME 6.795383824785976e-04 Event 28 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2276,7 +2276,7 @@ Event 28 Batch 1 2 6.475300414228806e+02 3.136396845517189e+02 3.816259196370642e+02 -4.186728559156669e+02 3 7.290923529036073e+02 -2.791764769994177e+02 -4.112865540505715e+02 5.333662195995520e+02 4 1.233776056735125e+02 -3.446320755230100e+01 2.966063441350738e+01 -1.146933636838856e+02 - ME 5.037107889244314e-02 + ME 6.311296815400830e-04 Event 29 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2284,7 +2284,7 @@ Event 29 Batch 1 2 3.156754590345620e+02 -2.870540678871016e+02 4.159516713841874e+01 -1.245825012466667e+02 3 4.770060274033896e+02 -2.355061130652810e+02 -3.231858413754910e+02 -2.600433287405434e+02 4 7.073185135620483e+02 5.225601809523826e+02 2.815906742370723e+02 3.846258299872100e+02 - ME 7.956699356695784e-04 + ME 1.321807869823317e-04 Event 30 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2292,7 +2292,7 @@ Event 30 Batch 1 2 6.091290614220995e+02 1.543004089904798e+02 4.216196287493766e+00 -5.892468251447810e+02 3 2.079357839022729e+02 2.034647466922837e+02 4.185675980476618e+01 9.348729279626889e+00 4 6.829351546756266e+02 -3.577651556827627e+02 -4.607295609226003e+01 5.798980958651539e+02 - ME 3.902231064020147e-04 + ME 1.448382779935031e-04 Event 31 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2300,7 +2300,7 @@ Event 31 Batch 1 2 6.901710072855793e+02 1.433309098684656e+01 6.447948515477649e+02 -2.457034416076623e+02 3 5.898919363861644e+02 1.120085307876391e+02 -4.815950471622465e+02 3.217029626736535e+02 4 2.199370563282564e+02 -1.263416217744856e+02 -1.631998043855182e+02 -7.599952106599136e+01 - ME 2.415465849322543e-04 + ME 2.376400497996635e-05 Event 32 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2308,7 +2308,7 @@ Event 32 Batch 1 2 6.144498311923271e+02 5.832947925341469e+02 -1.925283703230110e+02 1.576726595169125e+01 3 2.478450424037004e+02 5.004284035329792e+01 2.389954177960992e+02 4.247433867565734e+01 4 6.377051264039724e+02 -6.333376328874447e+02 -4.646704747308818e+01 -5.824160462734862e+01 - ME 2.160220890176678e-04 + ME 5.390650629646604e-05 Event 33 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2316,7 +2316,7 @@ Event 33 Batch 1 2 6.134536717469736e+02 -1.625429495269566e+02 -1.853973484494194e+02 5.617232593785355e+02 3 5.361644687950269e+02 -3.755831293394986e+01 -9.992652347025609e+01 -5.254297294928764e+02 4 3.503818594579993e+02 2.001012624609065e+02 2.853238719196754e+02 -3.629352988565911e+01 - ME 1.224582992507153e-04 + ME 1.005452860076771e-04 Event 34 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2324,7 +2324,7 @@ Event 34 Batch 1 2 3.840838099420727e+02 -2.442269925519278e+02 -3.827314394217582e+01 -2.939535943332559e+02 3 6.022630974514659e+02 3.956891925431131e+01 5.086724982658299e+02 3.200116071158652e+02 4 5.136530926064613e+02 2.046580732976165e+02 -4.703993543236541e+02 -2.605801278260916e+01 - ME 9.608243105510499e-05 + ME 2.313941306740064e-05 Event 35 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2332,7 +2332,7 @@ Event 35 Batch 1 2 3.454350783663418e+02 -3.439607925797615e+02 2.363778141880094e+01 -2.139209721976717e+01 3 6.705698302143294e+02 5.215327591153251e+02 4.060443141865528e+02 -1.131171661597076e+02 4 4.839950914193290e+02 -1.775719665355635e+02 -4.296820956053536e+02 1.345092633794747e+02 - ME 4.862206803317224e-05 + ME 7.982017052260048e-06 Event 36 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2340,7 +2340,7 @@ Event 36 Batch 1 2 7.098652154429357e+02 2.489290984574327e+02 -1.674080692141068e+02 -6.433641786725617e+02 3 6.178479130357197e+02 -1.435715807033598e+02 2.588953561477193e+02 5.423065917191846e+02 4 1.722868715213448e+02 -1.053575177540730e+02 -9.148728693361247e+01 1.010575869533772e+02 - ME 6.680529568232270e-05 + ME 5.562249548714765e-05 Event 37 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2348,7 +2348,7 @@ Event 37 Batch 1 2 6.906872786346031e+02 1.495946561071237e+02 1.712833879510068e+02 6.521750966909805e+02 3 3.682276595245592e+02 -1.358558710218083e+02 1.194309698061993e+02 -3.207351477449753e+02 4 4.410850618408380e+02 -1.373878508531530e+01 -2.907143577572061e+02 -3.314399489460051e+02 - ME 2.014943348935539e-03 + ME 5.542438863722841e-04 Event 38 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2356,7 +2356,7 @@ Event 38 Batch 1 2 6.131720166645955e+02 -5.222102655174087e+02 6.340623138461877e+00 3.213038392347352e+02 3 4.540063357567760e+02 2.932429176443922e+02 -3.207297067242505e+02 -1.313879727496968e+02 4 4.328216475786277e+02 2.289673478730168e+02 3.143890835857886e+02 -1.899158664850380e+02 - ME 2.589645049118943e-04 + ME 3.150821423911933e-05 Event 39 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2364,7 +2364,7 @@ Event 39 Batch 1 2 2.929747896182304e+02 2.510117592312210e+02 -1.378648144805472e+02 6.181113983529403e+01 3 6.287164314722783e+02 3.864928360025993e+01 6.254120614625328e+02 5.148142827864510e+01 4 5.783087789094894e+02 -2.896610428314818e+02 -4.875472469819856e+02 -1.132925681139394e+02 - ME 1.708238325115053e-04 + ME 2.723120294663496e-05 Event 40 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2372,7 +2372,7 @@ Event 40 Batch 1 2 1.143487538112954e+02 -3.203572478439017e+01 1.022340126870988e+02 3.996944439980560e+01 3 7.361483923235807e+02 5.924235295921244e+02 -3.838567751530157e+02 -2.088128187524163e+02 4 6.495028538651248e+02 -5.603878048077345e+02 2.816227624659169e+02 1.688433743526105e+02 - ME 2.026369815874481e-04 + ME 4.279185076498264e-05 Event 41 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2380,7 +2380,7 @@ Event 41 Batch 1 2 6.384898508133350e+02 5.540399192408263e+02 -3.014826159773289e+02 -9.908223727147148e+01 3 3.510407251698805e+02 -1.719168197014114e+02 2.065966849440144e+02 -2.258140996521069e+02 4 5.104694240167846e+02 -3.821230995394149e+02 9.488593103331458e+01 3.248963369235784e+02 - ME 4.455092331482675e-05 + ME 1.488395965626735e-05 Event 42 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2388,7 +2388,7 @@ Event 42 Batch 1 2 3.291654598309212e+02 -1.090829060981258e+02 2.972891943885482e+02 -8.983292515941632e+01 3 6.884965239796815e+02 4.933628807557017e+02 -2.919492821202986e+02 3.812953554581829e+02 4 4.823380161893969e+02 -3.842799746575757e+02 -5.339912268249619e+00 -2.914624302987665e+02 - ME 6.690811667999076e-04 + ME 5.767145017550451e-05 Event 43 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2396,7 +2396,7 @@ Event 43 Batch 1 2 3.674173006007981e+02 2.791827424102563e+02 1.079644067383057e+02 2.130637369397045e+02 3 7.392205647816575e+02 -6.110484627794917e+02 -4.247874240022372e+01 -4.138385868609020e+02 4 3.933621346175442e+02 3.318657203692355e+02 -6.548566433808202e+01 2.007748499211975e+02 - ME 2.734436884563990e-05 + ME 6.513986915725277e-06 Event 44 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2404,7 +2404,7 @@ Event 44 Batch 1 2 2.081359682230012e+02 -1.082501549908087e+02 1.771964605001424e+02 1.427934167997762e+01 3 7.449563315308093e+02 5.092828751965591e+02 -5.388739609944279e+02 7.215083562608928e+01 4 5.469077002461893e+02 -4.010327202057504e+02 3.616775004942854e+02 -8.643017730606689e+01 - ME 1.760644262839344e-04 + ME 1.838899544278803e-05 Event 45 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2412,7 +2412,7 @@ Event 45 Batch 1 2 5.180982465404422e+02 4.470261481799612e+02 -3.368837017252423e+01 -2.597277606009553e+02 3 3.377595659674062e+02 -7.316527185649456e+01 2.454727770679006e+02 -2.201624016839132e+02 4 6.441421874921515e+02 -3.738608763234666e+02 -2.117844068953763e+02 4.798901622848684e+02 - ME 1.645403798734011e-04 + ME 4.091340785269233e-05 Event 46 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2420,7 +2420,7 @@ Event 46 Batch 1 2 6.296560291524888e+02 2.172411497655985e+02 5.821614514430422e+02 -1.017892054705761e+02 3 6.224001894826197e+02 1.405102091633609e+01 -6.218608257778048e+02 2.176414579432105e+01 4 2.479437813648912e+02 -2.312921706819346e+02 3.969937433476264e+01 8.002505967625511e+01 - ME 4.041878897626609e-05 + ME 7.434320230190137e-06 Event 47 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2428,7 +2428,7 @@ Event 47 Batch 1 2 5.458843469271557e+02 -1.019033861791133e+02 -1.559739004096151e+02 5.131058004898495e+02 3 2.573134207008558e+02 6.791700498899543e+01 -2.412204887508016e+02 5.839651284901167e+01 4 6.968022323719882e+02 3.398638119011781e+01 3.971943891604168e+02 -5.715023133388611e+02 - ME 1.408798022766008e-02 + ME 4.005478861198618e-03 Event 48 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2436,7 +2436,7 @@ Event 48 Batch 1 2 6.623920218006384e+02 -6.284562032939594e+02 -1.837527125398962e+02 -1.002044496053409e+02 3 1.251779629744606e+02 -7.502448682133647e+01 9.550779386908961e+01 3.031682869117444e+01 4 7.124300152249010e+02 7.034806901152959e+02 8.824491867080658e+01 6.988762091416655e+01 - ME 8.682321044518227e-04 + ME 3.004757451335502e-04 Event 49 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2444,7 +2444,7 @@ Event 49 Batch 1 2 2.397494808364364e+02 2.393958238941666e+02 -4.144666783354266e+00 -1.233996761053010e+01 3 6.782491241100328e+02 -3.516321535544010e+02 -2.705899831712919e+02 5.129890485673947e+02 4 5.820013950535307e+02 1.122363296602344e+02 2.747346499546462e+02 -5.006490809568646e+02 - ME 9.041285542966720e-03 + ME 6.040872325723622e-04 Event 50 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2452,7 +2452,7 @@ Event 50 Batch 1 2 4.764898792162554e+02 4.667163214316568e+02 5.900817880915086e+01 -7.573978570375913e+01 3 5.114228101321805e+02 -2.035689445851523e+02 -4.549677995197112e+02 -1.145306811477843e+02 4 5.120873106515638e+02 -2.631473768465044e+02 3.959596207105603e+02 1.902704668515434e+02 - ME 5.157319121365441e-05 + ME 9.692662313613028e-06 Event 51 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2460,7 +2460,7 @@ Event 51 Batch 1 2 4.678795643859630e+02 4.629737719234085e+02 5.365495313512251e+01 4.108186077915564e+01 3 6.311645871918951e+02 -4.500610707732837e+02 -4.345770688214700e+02 8.340587481742408e+01 4 4.009558484221416e+02 -1.291270115012470e+01 3.809221156863474e+02 -1.244877355965797e+02 - ME 1.517985021504320e-04 + ME 1.293558494013996e-05 Event 52 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2468,7 +2468,7 @@ Event 52 Batch 1 2 3.696230029266819e+02 2.516704934433110e+02 2.514038675722595e+02 1.003953305301004e+02 3 6.696174214325739e+02 -2.754912388418390e+01 -6.493999246431116e+02 -1.609604756850079e+02 4 4.607595756407442e+02 -2.241213695591271e+02 3.979960570708519e+02 6.056514515490756e+01 - ME 5.727699238559496e-05 + ME 8.655753222194317e-06 Event 53 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2476,7 +2476,7 @@ Event 53 Batch 1 2 7.284624742442375e+01 -4.271742504396477e+01 -2.683807109937144e+01 -5.255012179908527e+01 3 7.493542950735829e+02 3.356513586119740e+02 2.501807367708783e+02 6.215139772812374e+02 4 6.777994575019936e+02 -2.929339335680093e+02 -2.233426656715069e+02 -5.689638554821522e+02 - ME 1.612275481129464e-02 + ME 2.372423861687152e-03 Event 54 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2484,7 +2484,7 @@ Event 54 Batch 1 2 7.460259847230064e+02 2.055186857047568e+01 6.233229443227743e+02 4.093908861479223e+02 3 5.756222844616437e+02 2.606063779094539e+01 -4.696411468594731e+02 -3.318117699890848e+02 4 1.783517308153497e+02 -4.661250636142109e+01 -1.536817974633012e+02 -7.757911615883735e+01 - ME 4.374243668355642e-04 + ME 5.046268590690708e-05 Event 55 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2492,7 +2492,7 @@ Event 55 Batch 1 2 5.967428482894213e+02 -8.165820254184375e+01 5.098287527914877e+02 -2.991798919868828e+02 3 5.942526243827265e+02 5.606061544962815e+01 -2.905196430116550e+02 5.153559216750568e+02 4 3.090045273278509e+02 2.559758709221549e+01 -2.193091097798325e+02 -2.161760296881746e+02 - ME 1.779007466146034e-03 + ME 1.849048785615045e-04 Event 56 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2500,7 +2500,7 @@ Event 56 Batch 1 2 5.610874267302015e+02 -4.199055433713192e+02 3.580252469767042e+02 1.015694718309908e+02 3 6.303091265298390e+02 2.130872195586830e+02 -5.453843477211296e+02 -2.333224059286980e+02 4 3.086034467399593e+02 2.068183238126362e+02 1.873591007444254e+02 1.317529340977073e+02 - ME 3.258989367177766e-05 + ME 7.213009143835112e-06 Event 57 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2508,7 +2508,7 @@ Event 57 Batch 1 2 6.552053965855981e+02 4.516249927537604e+02 7.110694105335197e+00 4.746350341729917e+02 3 6.035190443408458e+02 -3.717228873476765e+02 2.148772607224587e+02 -4.241286299324850e+02 4 2.412755590735562e+02 -7.990210540608396e+01 -2.219879548277939e+02 -5.050640424050685e+01 - ME 1.623545585873121e-04 + ME 3.752873989265266e-05 Event 58 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2516,7 +2516,7 @@ Event 58 Batch 1 2 2.959982971085279e+02 1.850007048157144e+02 -2.304987961744356e+02 1.612563397119956e+01 3 7.018897389129390e+02 -3.764226030262936e+02 4.376344751014918e+02 3.992884868423144e+02 4 5.021119639785326e+02 1.914218982105791e+02 -2.071356789270567e+02 -4.154141208135139e+02 - ME 4.558573859477246e-03 + ME 1.901193343270815e-04 Event 59 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2524,7 +2524,7 @@ Event 59 Batch 1 2 5.521089721327345e+02 1.223876815062619e+02 -3.629066091228882e+01 -5.371485459866160e+02 3 4.098988410471214e+02 -5.841964900319319e+01 -3.626461945087767e+02 1.819119075553315e+02 4 5.379921868201441e+02 -6.396803250306872e+01 3.989368554210655e+02 3.552366384312845e+02 - ME 5.148841296796537e-05 + ME 1.780280399801712e-05 Event 60 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2532,7 +2532,7 @@ Event 60 Batch 1 2 7.143828168925960e+02 -4.584044193456332e+02 -2.419772079280938e+02 -4.915844060170314e+02 3 1.284110307517517e+02 8.324300347118127e+01 -7.889851197070540e+01 5.774963203893758e+01 4 6.572061523556514e+02 3.751614158744520e+02 3.208757198987992e+02 4.338347739780938e+02 - ME 1.673517837789511e-04 + ME 7.144001898958308e-05 Event 61 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2540,7 +2540,7 @@ Event 61 Batch 1 2 4.394390210968651e+02 -2.137451655543886e+02 -3.779414621253704e+02 -6.767502250635177e+01 3 4.431311911324728e+02 3.845666395406355e+02 -2.150363068358313e+02 4.725610065709574e+01 4 6.174297877706618e+02 -1.708214739862469e+02 5.929777689612018e+02 2.041892184925626e+01 - ME 1.368591177943825e-04 + ME 2.870354731125455e-05 Event 62 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2548,7 +2548,7 @@ Event 62 Batch 1 2 7.301725729481176e+02 4.281927891852710e+02 5.652737593150771e+02 -1.739784429324868e+02 3 7.567373964415995e+01 2.589885732647599e+01 -5.696550981957816e+01 4.255225906941358e+01 4 6.941536874077224e+02 -4.540916465117469e+02 -5.083082494954988e+02 1.314261838630732e+02 - ME 8.513592598060080e-04 + ME 2.379197431250548e-04 Event 63 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2556,7 +2556,7 @@ Event 63 Batch 1 2 4.361152320236988e+02 -3.738769057978321e+02 1.427754799584550e+02 -1.732850750548248e+02 3 5.817148313055657e+02 5.081993893256957e+02 2.829214478037172e+02 -8.998890070513914e+00 4 4.821699366707353e+02 -1.343224835278637e+02 -4.256969277621721e+02 1.822839651253387e+02 - ME 4.544766189571194e-05 + ME 8.350404272725701e-06 Event 64 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2564,7 +2564,7 @@ Event 64 Batch 1 2 6.097675704107204e+02 3.288514690970509e+02 4.971291587853200e+02 -1.285916042465611e+02 3 5.709532610348123e+02 -6.501292612520263e+01 -4.768258747557200e+02 3.072426254385416e+02 4 3.192791685544673e+02 -2.638385429718484e+02 -2.030328402960006e+01 -1.786510211919805e+02 - ME 4.598138986874043e-04 + ME 3.000969253297957e-05 Event 65 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2572,7 +2572,7 @@ Event 65 Batch 1 2 6.258641293880484e+02 3.743515439843765e+02 -1.622018320411498e+02 -4.746128903155367e+02 3 7.438702198751357e+02 -4.029113627030089e+02 2.325939036896868e+02 5.804355380128616e+02 4 1.302656507368158e+02 2.855981871863233e+01 -7.039207164853700e+01 -1.058226476973252e+02 - ME 6.427333508548903e-03 + ME 3.162776051460646e-04 Event 66 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2580,7 +2580,7 @@ Event 66 Batch 1 2 3.731957242404369e+02 1.596860493342637e+01 -3.714568973276624e+02 3.224632809376674e+01 3 6.079923612940432e+02 4.451199598539357e+02 3.189341902600864e+02 -2.642043054431177e+02 4 5.188119144655197e+02 -4.610885647873621e+02 5.252270706757586e+01 2.319579773493509e+02 - ME 4.681392980523237e-05 + ME 1.034065067393998e-05 Event 67 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2588,7 +2588,7 @@ Event 67 Batch 1 2 7.084256499213539e+02 6.318790977834966e+02 -2.229764540025608e+02 2.299504472951746e+02 3 5.168612394424738e+01 1.130069959366449e+01 -1.428140623590627e+01 4.837138651102398e+01 4 7.398882261343989e+02 -6.431797973771612e+02 2.372578602384670e+02 -2.783218338061985e+02 - ME 5.878400132197954e-02 + ME 1.479715191731530e-02 Event 68 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2596,7 +2596,7 @@ Event 68 Batch 1 2 5.644037677826096e+02 -7.446914007305443e+01 3.170710956176409e+02 4.609467220707991e+02 3 4.303832728799333e+02 -1.588265612792408e+02 -3.994808673830752e+02 -2.046757440246668e+01 4 5.052129593374568e+02 2.332957013522950e+02 8.240977176543441e+01 -4.404791476683325e+02 - ME 8.108482137897523e-03 + ME 3.274273226082449e-04 Event 69 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2604,7 +2604,7 @@ Event 69 Batch 1 2 2.379282923937934e+02 -4.413455715133102e+01 1.058497776082811e+02 -2.084654354245804e+02 3 5.822935131976616e+02 -5.806422676829345e+02 4.095409019445288e+01 -1.559022092337181e+01 4 6.797781944085444e+02 6.247768248342655e+02 -1.468038678027338e+02 2.240556563479522e+02 - ME 3.039802585689931e-04 + ME 6.379305675073031e-05 Event 70 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2612,7 +2612,7 @@ Event 70 Batch 1 2 5.861861307468000e+02 1.831219916849830e+02 2.904683423406074e+02 -4.750880530376756e+02 3 4.633200606614189e+02 -4.245314712871158e+02 -1.339518705596282e+02 1.284344380284135e+02 4 4.504938085917810e+02 2.414094796021329e+02 -1.565164717809791e+02 3.466536150092620e+02 - ME 3.530491740557932e-05 + ME 1.325653453486623e-05 Event 71 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2620,7 +2620,7 @@ Event 71 Batch 1 2 7.383412459951699e+02 5.748049255568963e+02 -1.639684737984460e+02 -4.334298474879633e+02 3 3.973981306646684e+02 -3.228684354469153e+02 -4.837114091238284e+00 2.316416412804533e+02 4 3.642606233401616e+02 -2.519364901099809e+02 1.688055878896842e+02 2.017882062075102e+02 - ME 3.103530482016079e-05 + ME 1.333441808219846e-05 Event 72 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2628,7 +2628,7 @@ Event 72 Batch 1 2 3.538199915090663e+02 3.512029503136998e+02 -6.467835580753929e+00 -4.246458742680748e+01 3 5.344234504985296e+02 1.310173344785605e+01 3.836805260246265e+01 5.328833470497182e+02 4 6.117565579924039e+02 -3.643046837615559e+02 -3.190021702170876e+01 -4.904187596229107e+02 - ME 9.376669006106200e-03 + ME 2.994704399169685e-03 Event 73 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2636,7 +2636,7 @@ Event 73 Batch 1 2 4.694927197571710e+02 1.451947293992222e+02 -1.807863847612341e+02 4.082379055705570e+02 3 5.537325951281179e+02 -5.796379956652479e+01 5.401382741253894e+02 -1.072876026015002e+02 4 4.767746851147115e+02 -8.723092983269744e+01 -3.593518893641554e+02 -3.009503029690568e+02 - ME 1.077472469645428e-03 + ME 1.535829386616431e-04 Event 74 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2644,7 +2644,7 @@ Event 74 Batch 1 2 6.258444305735198e+02 -3.349227552763227e+02 4.941036656040852e+02 1.880679848209580e+02 3 5.555040664889822e+02 3.765538795180102e+01 -5.474422011270130e+02 -8.645158222500005e+01 4 3.186515029374982e+02 2.972673673245214e+02 5.333853552292791e+01 -1.016164025959578e+02 - ME 1.623439923565115e-04 + ME 1.487896902219418e-05 Event 75 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2652,7 +2652,7 @@ Event 75 Batch 1 2 3.943316317993887e+02 5.588489849751632e+01 -2.552251009651266e+02 -2.953548066221912e+02 3 5.467466262348042e+02 -3.021648543602057e+02 -2.377479281839000e+02 3.887212326756534e+02 4 5.589217419658066e+02 2.462799558626894e+02 4.929730291490265e+02 -9.336642605346221e+01 - ME 1.348649436679123e-04 + ME 4.632408498797698e-05 Event 76 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2660,7 +2660,7 @@ Event 76 Batch 1 2 5.517772830004059e+02 2.282681125856672e+02 -4.885490190451381e+02 -1.169260227747471e+02 3 4.245403880864563e+02 -2.793100283061228e+02 1.521744876196477e+02 -2.811821020654221e+02 4 5.236823289131380e+02 5.104191572045557e+01 3.363745314254903e+02 3.981081248401691e+02 - ME 5.074216551061466e-05 + ME 1.645260485784409e-05 Event 77 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2668,7 +2668,7 @@ Event 77 Batch 1 2 3.781543446472003e+02 -5.926925448310480e+01 -1.775497893613220e+02 3.285786605157444e+02 3 6.702964816234122e+02 -6.066564226432872e+01 -1.057468051743550e+02 -6.591165802199176e+02 4 4.515491737293867e+02 1.199348967474336e+02 2.832965945356770e+02 3.305379197041734e+02 - ME 6.321080405055773e-05 + ME 5.041095643414513e-05 Event 78 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2676,7 +2676,7 @@ Event 78 Batch 1 2 4.564262045363139e+02 1.882572856930395e+02 1.751822011208171e+02 -3.770878823051468e+02 3 3.809544602625751e+02 -2.816334489555117e+02 1.992812047321844e+02 -1.615422627793184e+02 4 6.626193352011103e+02 9.337616326247226e+01 -3.744634058530013e+02 5.386301450844651e+02 - ME 2.572921643188974e-04 + ME 6.222463480998997e-05 Event 79 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2684,7 +2684,7 @@ Event 79 Batch 1 2 6.126536521478922e+02 6.075062399138452e+02 -4.178945028651393e+01 6.733726903166659e+01 3 2.872846052831658e+02 -1.084163947926161e+02 2.139961846825774e+01 2.651799127051085e+02 4 6.000617425689430e+02 -4.990898451212283e+02 2.038983181825616e+01 -3.325171817367756e+02 - ME 1.996659951821530e-03 + ME 6.289823950094716e-04 Event 80 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2692,7 +2692,7 @@ Event 80 Batch 1 2 4.171281258707700e+02 -2.756641813219371e+02 1.445082905894664e+01 3.127240094205691e+02 3 3.805235327384960e+02 -2.955852199231463e+02 2.395269588958384e+02 7.373784162959287e+00 4 7.023483413907342e+02 5.712494012450838e+02 -2.539777879547846e+02 -3.200977935835284e+02 - ME 1.297520069620947e-03 + ME 5.629434448779270e-04 Event 81 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2700,7 +2700,7 @@ Event 81 Batch 1 2 7.471091333863935e+02 -9.753029041192970e+01 7.407154559164039e+02 -7.162458282065091e-01 3 6.775352561453885e+02 9.550863422814814e+01 -6.702673865908516e+02 -2.595678293896889e+01 4 7.535561046821789e+01 2.021656183781575e+00 -7.044806932555213e+01 2.667302876717550e+01 - ME 1.022399816924924e-04 + ME 2.904529061551848e-05 Event 82 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2708,7 +2708,7 @@ Event 82 Batch 1 2 4.309094465924175e+02 3.042233433179616e+02 2.799835808203350e+02 -1.214096495919827e+02 3 5.540384887187945e+02 -4.824447657759213e+02 1.988969596446625e+02 1.861335391629672e+02 4 5.150520646887885e+02 1.782214224579596e+02 -4.788805404649973e+02 -6.472388957098450e+01 - ME 1.053635072607165e-04 + ME 1.778678120024833e-05 Event 83 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2716,7 +2716,7 @@ Event 83 Batch 1 2 4.869534474909295e+02 -4.727010820510885e+02 1.062322962656182e+02 4.890855018466118e+01 3 3.520990385354405e+02 -1.437544586613779e+02 -3.142298368411062e+02 6.758696761482639e+01 4 6.609475139736298e+02 6.164555407124665e+02 2.079975405754878e+02 -1.164955177994876e+02 - ME 2.998516055200512e-04 + ME 7.948516811691567e-05 Event 84 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2724,7 +2724,7 @@ Event 84 Batch 1 2 1.391975815431583e+01 -3.682657486111166e-01 -1.138840508663312e+01 -7.995516055627093e+00 3 7.493632094786751e+02 -3.452281541586202e+01 3.833012084573049e+02 6.429880080772211e+02 4 7.367170323670085e+02 3.489108116447313e+01 -3.719128033706718e+02 -6.349924920215940e+02 - ME 3.806217512266510e-01 + ME 8.671177508029917e-02 Event 85 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2732,7 +2732,7 @@ Event 85 Batch 1 2 7.362448947738020e+02 6.409220704967113e+02 3.243429451315054e+02 1.614840505254833e+02 3 1.517836214454495e+02 -1.266859291808411e+02 -6.780846852200752e+01 4.889738933094901e+01 4 6.119714837807480e+02 -5.142361413158706e+02 -2.565344766094980e+02 -2.103814398564324e+02 - ME 5.694785892689211e-04 + ME 1.062305495679385e-04 Event 86 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2740,7 +2740,7 @@ Event 86 Batch 1 2 5.451728369778392e+02 -6.605005893803180e+01 1.066920544886257e+02 -5.305352178712969e+02 3 3.158718592284829e+02 -1.755596039144849e+02 2.550395858012225e+02 6.251932981237656e+01 4 6.389553037936773e+02 2.416096628525165e+02 -3.617316402898481e+02 4.680158880589203e+02 - ME 1.469986179099727e-04 + ME 4.057626974930324e-05 Event 87 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2748,7 +2748,7 @@ Event 87 Batch 1 2 3.414211232216659e+02 1.437256906952883e+02 1.534640422371205e+02 -2.689983214749668e+02 3 5.081668091119999e+02 4.794742948200324e+02 -1.464748766741243e+02 8.296394996143997e+01 4 6.504120676663341e+02 -6.231999855153207e+02 -6.989165562996117e+00 1.860343715135268e+02 - ME 1.823135893899652e-04 + ME 3.656584417835253e-05 Event 88 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2756,7 +2756,7 @@ Event 88 Batch 1 2 2.925516585730864e+02 1.655911293372511e+01 2.598275245766865e+02 -1.334238591297045e+02 3 7.159840369510271e+02 -1.056844973272874e+02 -3.694097043713192e+02 6.041526284885822e+02 4 4.914643044758866e+02 8.912538439356234e+01 1.095821797946327e+02 -4.707287693588777e+02 - ME 8.728488941697977e-02 + ME 2.327745727475104e-03 Event 89 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2764,7 +2764,7 @@ Event 89 Batch 1 2 6.333634651097186e+02 1.209853522660007e+02 5.372166546881791e+02 -3.129058794565919e+02 3 6.221307427802806e+02 5.757192259699385e+01 -4.327483989541182e+02 4.432391657372765e+02 4 2.445057921100010e+02 -1.785572748629945e+02 -1.044682557340609e+02 -1.303332862806847e+02 - ME 5.497507832908574e-04 + ME 5.047204144927262e-05 Event 90 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2772,7 +2772,7 @@ Event 90 Batch 1 2 3.111538587406461e+02 2.628215106651484e+02 -6.985334981761831e+01 -1.512021390726355e+02 3 5.216486323898988e+02 1.252715366480781e+02 4.457714554600226e+02 -2.402335265468457e+02 4 6.671975088694549e+02 -3.880930473132266e+02 -3.759181056424042e+02 3.914356656194811e+02 - ME 2.329075524537458e-04 + ME 4.503542584588689e-05 Event 91 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2780,7 +2780,7 @@ Event 91 Batch 1 2 3.007803348469016e+02 8.390513937949677e+01 2.884042062049404e+02 -1.586667134655829e+01 3 6.256884422056424e+02 2.364580673743878e+02 -3.590826126759745e+02 -4.545693416378727e+02 4 5.735312229474563e+02 -3.203632067538847e+02 7.067840647103421e+01 4.704360129844310e+02 - ME 6.478111274774788e-05 + ME 2.635583378174906e-05 Event 92 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2788,7 +2788,7 @@ Event 92 Batch 1 2 6.843865618656529e+02 -2.264962467301474e+02 -5.909185329480341e+02 2.605757158639088e+02 3 6.645516272550811e+02 3.453347116263074e+02 4.983670680340538e+02 -2.720350487207341e+02 4 1.510618108792659e+02 -1.188384648961601e+02 9.255146491398015e+01 1.145933285682523e+01 - ME 9.365402433981294e-05 + ME 1.711437740567050e-05 Event 93 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2796,7 +2796,7 @@ Event 93 Batch 1 2 5.579763469381434e+02 2.180908585044468e+02 5.135246110359701e+02 8.151996049100932e+00 3 3.333821836060117e+02 1.681122988324202e+02 -1.261705574188212e+02 2.587719570738210e+02 4 6.086414694558448e+02 -3.862031573368670e+02 -3.873540536171486e+02 -2.669239531229223e+02 - ME 5.183695239236329e-04 + ME 1.157787815150910e-04 Event 94 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2804,7 +2804,7 @@ Event 94 Batch 1 2 4.534979734151987e+02 1.139662723650677e+02 2.686183171543304e+01 4.381216071501101e+02 3 3.856184698299744e+02 1.545134372854228e+02 -3.452526490806396e+02 7.501873282757614e+01 4 6.608835567548277e+02 -2.684797096504910e+02 3.183908173652065e+02 -5.131403399776862e+02 - ME 6.944325623628402e-03 + ME 1.545010233607317e-03 Event 95 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2812,7 +2812,7 @@ Event 95 Batch 1 2 2.828073115974175e+02 -5.711637476392460e+01 5.915078172645698e+01 -2.705898746219725e+02 3 6.809618671276158e+02 3.772100991821226e+02 3.247893528880094e+02 4.646864338535512e+02 4 5.362308212749670e+02 -3.200937244181981e+02 -3.839401346144663e+02 -1.940965592315787e+02 - ME 2.560512106670314e-04 + ME 6.408796328924562e-05 Event 96 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2820,7 +2820,7 @@ Event 96 Batch 1 2 4.639832102051440e+02 -4.275497908582962e+02 -1.317248975374901e+02 -1.230046627491649e+02 3 7.474114851375481e+02 6.594176555428718e+02 2.654537688070380e+02 2.309254864669502e+02 4 2.886053046573076e+02 -2.318678646845757e+02 -1.337288712695479e+02 -1.079208237177853e+02 - ME 2.440162169445852e-04 + ME 1.445191791082226e-05 Event 97 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2828,7 +2828,7 @@ Event 97 Batch 1 2 5.095921959312568e+02 3.190102848863560e+02 3.100341192456060e+02 2.485869851668986e+02 3 4.555541331018014e+02 -2.788120391899956e+02 2.221549471930723e+02 -2.836205112936887e+02 4 5.348536709669415e+02 -4.019824569636059e+01 -5.321890664386783e+02 3.503352612679014e+01 - ME 8.198891770965733e-05 + ME 2.250661525403011e-05 Event 98 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2836,7 +2836,7 @@ Event 98 Batch 1 2 5.299941952467790e+02 -2.570048161992350e+02 -4.630296380940593e+02 -2.111695271961878e+01 3 7.352146396921255e+02 2.361229278157243e+02 6.962552486063584e+02 3.893348873424185e+00 4 2.347911650610957e+02 2.088188838351074e+01 -2.332256105122990e+02 1.722360384619465e+01 - ME 6.760444392591968e-05 + ME 5.654417419793765e-06 Event 99 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2844,7 +2844,7 @@ Event 99 Batch 1 2 4.290897291078425e+02 3.747236205606835e+02 2.040795775432686e+02 -4.529602465443949e+01 3 6.438744429739487e+02 -5.215755139094103e+02 2.133414139578182e+01 3.769325350988583e+02 4 4.270358279182090e+02 1.468518933487271e+02 -2.254137189390505e+02 -3.316365104444187e+02 - ME 2.024851967866169e-03 + ME 8.457850707842401e-05 Event 100 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2852,7 +2852,7 @@ Event 100 Batch 1 2 5.119062275524872e+02 -4.721600394809319e+02 -1.845880136125884e+02 7.099400083769524e+01 3 4.523854579707449e+02 2.836789572262426e+02 -3.060214184981774e+02 -1.747276258374610e+02 4 5.357083144767672e+02 1.884810822546894e+02 4.906094321107658e+02 1.037336249997658e+02 - ME 6.898305006855298e-05 + ME 1.420495101373495e-05 Event 101 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2860,7 +2860,7 @@ Event 101 Batch 1 2 6.024072815192737e+02 -3.080418730730875e+02 -4.692284526425155e+02 2.186993289696520e+02 3 3.347434020484399e+02 8.940653726951260e+01 -3.939923552329941e+01 -3.201676381969582e+02 4 5.628493164322859e+02 2.186353358035749e+02 5.086276881658150e+02 1.014683092273061e+02 - ME 9.290725627447436e-05 + ME 2.743452031293993e-05 Event 102 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2868,7 +2868,7 @@ Event 102 Batch 1 2 5.910857738801296e+02 3.707548039128416e+02 -7.516477307090547e+01 -4.541734518311494e+02 3 2.311218706704979e+02 4.536804143672514e+01 -2.262982016400413e+02 1.217307902336991e+01 4 6.777923554493723e+02 -4.161228453495667e+02 3.014629747109467e+02 4.420003728077793e+02 - ME 2.633339755449651e-04 + ME 7.158169676479796e-05 Event 103 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2876,7 +2876,7 @@ Event 103 Batch 1 2 6.627949406417042e+02 7.189602123685950e+01 -6.391860825813610e+02 -1.599038689489492e+02 3 5.519979886399102e+02 1.442810582977179e+02 4.734454174874869e+02 2.444057944057306e+02 4 2.852070707183856e+02 -2.161770795345774e+02 1.657406650938741e+02 -8.450192545678139e+01 - ME 1.652798222861839e-04 + ME 1.658567428345252e-05 Event 104 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2884,7 +2884,7 @@ Event 104 Batch 1 2 4.368180791462563e+02 -3.483499330357901e+02 -2.596280064690262e+02 4.533935023690698e+01 3 4.635715977792429e+02 1.873023362819025e+02 -2.251347602994603e+02 -3.593477435519053e+02 4 5.996103230745010e+02 1.610475967538876e+02 4.847627667684865e+02 3.140083933149983e+02 - ME 9.158171748371188e-05 + ME 2.162124469235967e-05 Event 105 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2892,7 +2892,7 @@ Event 105 Batch 1 2 5.701708357490469e+02 2.288495716262106e+02 -4.521314661478370e+02 -2.613422905391967e+02 3 3.711008490497917e+02 -3.362590561223710e+02 -8.126001400906793e+01 1.343223639771668e+02 4 5.587283152011612e+02 1.074094844961603e+02 5.333914801569049e+02 1.270199265620299e+02 - ME 7.043372303967046e-05 + ME 1.720246557093887e-05 Event 106 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2900,7 +2900,7 @@ Event 106 Batch 1 2 6.775588183099673e+02 5.149765831731705e+02 3.445381345095063e+02 -2.741870619150275e+02 3 7.044100837534635e+02 -4.546975847980706e+02 -4.392260662935809e+02 3.106833358270535e+02 4 1.180310979365712e+02 -6.027899837509908e+01 9.468793178407486e+01 -3.649627391202603e+01 - ME 3.259673897057837e-04 + ME 2.786544600802367e-05 Event 107 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2908,7 +2908,7 @@ Event 107 Batch 1 2 6.046880513041550e+02 2.289413119004024e+02 -5.349774474143721e+02 -1.644160754103499e+02 3 3.366746442316215e+02 -7.166101576320902e+01 2.452245434825371e+01 3.280444544890399e+02 4 5.586373044642238e+02 -1.572802961371935e+02 5.104549930661184e+02 -1.636283790786902e+02 - ME 8.859556065170558e-04 + ME 4.667002706670146e-04 Event 108 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2916,7 +2916,7 @@ Event 108 Batch 1 2 6.239206451413978e+02 -2.218030564243363e+02 5.011455197099735e+02 -2.982172759400455e+02 3 2.841199272340513e+02 1.209406641294798e+02 7.967327320293104e+01 2.444374323800143e+02 4 5.919594276245514e+02 1.008623922948564e+02 -5.808187929129044e+02 5.377984356003120e+01 - ME 1.727643234936365e-04 + ME 7.961277501126149e-05 Event 109 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2924,7 +2924,7 @@ Event 109 Batch 1 2 3.093404598873124e+02 1.546999830656544e+02 1.629193992247174e+02 2.126421988200774e+02 3 5.287372542258961e+02 -2.136116696975048e+02 -1.865832176193536e+02 4.462284633214169e+02 4 6.619222858867909e+02 5.891168663185049e+01 2.366381839463621e+01 -6.588706621414941e+02 - ME 1.686695657867669e+01 + ME 2.902408960420708e-01 Event 110 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2932,7 +2932,7 @@ Event 110 Batch 1 2 4.920948406187608e+02 -8.595212543403569e+01 -4.824913009925944e+02 -4.440392734262522e+01 3 4.634042325716594e+02 -2.085760624772916e+00 1.255608851371819e+02 4.460645653843308e+02 4 5.445009268095798e+02 8.803788605880843e+01 3.569304158554124e+02 -4.016606380417056e+02 - ME 4.151412887207382e-03 + ME 1.043536440561108e-03 Event 111 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2940,7 +2940,7 @@ Event 111 Batch 1 2 4.637454700443120e+02 1.543048221589588e+02 -4.372769385391800e+02 6.225902899506631e+00 3 3.246747011850293e+02 -5.128652792678845e+01 -2.274142471268230e+02 2.259781269206006e+02 4 7.115798287706589e+02 -1.030182942321705e+02 6.646911856660031e+02 -2.322040298201072e+02 - ME 1.240833065187375e-03 + ME 5.219332617201280e-04 Event 112 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2948,7 +2948,7 @@ Event 112 Batch 1 2 6.923761777814550e+02 3.939190124845535e+02 4.398224952082178e+01 -5.676954684419625e+02 3 5.277418353503033e+02 -4.270527740856185e+02 4.970714905179168e+01 3.060499505927539e+02 4 2.798819868682421e+02 3.313376160106501e+01 -9.368939857261346e+01 2.616455178492087e+02 - ME 5.385735959435035e-05 + ME 4.381536575941429e-05 Event 113 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2956,7 +2956,7 @@ Event 113 Batch 1 2 7.174898838850694e+02 -6.130145063482008e+02 3.726797356942233e+02 1.071275347265524e+01 3 1.705115822510491e+02 3.993583199494100e+01 -1.624320619120163e+02 3.309311510932528e+01 4 6.119985338638814e+02 5.730786743532599e+02 -2.102476737822071e+02 -4.380586858198049e+01 - ME 2.197559713387976e-04 + ME 4.914674319256647e-05 Event 114 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2964,7 +2964,7 @@ Event 114 Batch 1 2 6.772826088252357e+02 -1.430288042596954e+02 -3.410390118171982e+02 5.674036356844296e+02 3 6.725037798358682e+02 3.626161999767239e+01 2.510744134018114e+02 -6.228226615527174e+02 4 1.502136113388951e+02 1.067671842620232e+02 8.996459841538707e+01 5.541902586828807e+01 - ME 8.926156406775035e-05 + ME 7.986648389935193e-05 Event 115 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2972,7 +2972,7 @@ Event 115 Batch 1 2 9.320551230331124e+01 1.288474310894606e+01 -2.581623869377880e+01 8.862715576190526e+01 3 6.672654287607164e+02 1.525114284892182e+02 2.829200767588875e+02 5.847560574856374e+02 4 7.395290589359720e+02 -1.653961715981643e+02 -2.571038380651088e+02 -6.733832132475428e+02 - ME 1.800237703627863e+00 + ME 4.304938165075599e-01 Event 116 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2980,7 +2980,7 @@ Event 116 Batch 1 2 4.951202926530015e+02 -4.575339943514647e+02 4.220102313368785e+01 1.844608951947751e+02 3 3.101750696753587e+02 -4.711582585559527e+01 2.172188132736168e+02 2.163438466008694e+02 4 6.947046376716394e+02 5.046498202070600e+02 -2.594198364073050e+02 -4.008047417956444e+02 - ME 1.933367100533606e-03 + ME 5.988625984136040e-04 Event 117 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2988,7 +2988,7 @@ Event 117 Batch 1 2 6.543248494478489e+02 1.390926466871539e+02 9.107024539473488e+01 6.328510524967589e+02 3 5.040443237953712e+02 6.874740772121054e+01 1.336336536624387e+02 -4.811200690999848e+02 4 3.416308267567792e+02 -2.078400544083643e+02 -2.247038990571737e+02 -1.517309833967742e+02 - ME 4.207453923038474e-04 + ME 3.026560085299302e-04 Event 118 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2996,7 +2996,7 @@ Event 118 Batch 1 2 5.829230400014206e+02 5.307803371482089e+02 -3.192285892796672e+01 2.388565162167381e+02 3 3.965113090906140e+02 -5.470249758902820e+01 2.256187790844517e+02 -3.214420966810604e+02 4 5.205656509079653e+02 -4.760778395591807e+02 -1.936959201564850e+02 8.258558046432242e+01 - ME 7.464562943747175e-05 + ME 2.168340782914014e-05 Event 119 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3004,7 +3004,7 @@ Event 119 Batch 1 2 3.549567073991255e+02 2.281637891139605e+02 1.474502150787006e+02 2.284600261271838e+02 3 4.727085372220640e+02 7.463684946128350e+01 -3.092948822053327e+02 3.495988811576870e+02 4 6.723347553788102e+02 -3.028006385752440e+02 1.618446671266322e+02 -5.780589072848707e+02 - ME 1.455012849105755e-02 + ME 1.664672733965846e-03 Event 120 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3012,7 +3012,7 @@ Event 120 Batch 1 2 7.192117275853698e+02 4.094232477570927e+02 -5.552624156333899e+02 -2.032775518283800e+02 3 3.685061529232585e+02 -2.522084621786424e+02 1.741347663658646e+02 2.046087962197375e+02 4 4.122821194913712e+02 -1.572147855784500e+02 3.811276492675253e+02 -1.331244391357209e+00 - ME 9.281995463485567e-05 + ME 1.900262756274459e-05 Event 121 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3020,7 +3020,7 @@ Event 121 Batch 1 2 1.923953846467517e+02 -5.182078839520096e+01 -1.486351786617837e+02 -1.106262789198433e+02 3 6.582127150877787e+02 -3.509182841037630e+02 -1.191939510078701e+02 5.439606035624541e+02 4 6.493919002654695e+02 4.027390724989639e+02 2.678291296696539e+02 -4.333343246426108e+02 - ME 1.925188892577692e-03 + ME 5.360055113881300e-04 Event 122 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3028,7 +3028,7 @@ Event 122 Batch 1 2 6.905732817636248e+02 3.462508192534570e+02 -5.375670569609784e+02 -2.608131264380775e+02 3 7.097575386120018e+02 -2.677396278645660e+02 5.849221766424142e+02 2.998954860604125e+02 4 9.966917962437387e+01 -7.851119138889094e+01 -4.735511968143584e+01 -3.908235962233509e+01 - ME 5.007312135859238e-04 + ME 3.451011759976180e-05 Event 123 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3036,7 +3036,7 @@ Event 123 Batch 1 2 4.035126033432560e+02 2.481103298242076e+01 -3.878573016343356e+02 -1.085059780294573e+02 3 3.541388771651666e+02 1.572344474048876e+02 -3.105653677404273e+02 -6.512161875550808e+01 4 7.423485194915780e+02 -1.820454803873083e+02 6.984226693747627e+02 1.736275967849660e+02 - ME 2.043564129780385e-02 + ME 3.471230489499830e-03 Event 124 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3044,7 +3044,7 @@ Event 124 Batch 1 2 5.353042728143347e+02 -4.785252055946481e+02 -2.279396245170433e+02 7.488537693644093e+01 3 7.454081943698113e+02 6.785307544150930e+02 3.069354144183444e+02 -3.193811081429426e+01 4 2.192875328158541e+02 -2.000055488204448e+02 -7.899578990130104e+01 -4.294726612214667e+01 - ME 1.399009675490331e-04 + ME 6.765427234678898e-06 Event 125 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3052,7 +3052,7 @@ Event 125 Batch 1 2 7.351681880566981e+02 -1.932492970253984e+01 -4.393064933429818e+02 -5.891592456452273e+02 3 6.537497908129355e+02 -2.883189353576726e+01 3.454898907503182e+02 5.542510679217788e+02 4 1.110820211303664e+02 4.815682323830688e+01 9.381660259266363e+01 3.490817772344844e+01 - ME 1.431077255619906e-04 + ME 6.639428548470109e-05 Event 126 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3060,7 +3060,7 @@ Event 126 Batch 1 2 5.568747108147126e+02 1.149185667256990e+02 4.264979152236775e+02 -3.391204725116689e+02 3 6.934211462641822e+02 -1.939160042589616e+02 -6.294239612595663e+02 2.169215212257340e+02 4 2.497041429211053e+02 7.899743753326281e+01 2.029260460358889e+02 1.221989512859350e+02 - ME 3.344185566612618e-05 + ME 9.143592130512915e-06 Event 127 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3068,7 +3068,7 @@ Event 127 Batch 1 2 7.108931196972316e+02 4.270547743949553e+02 5.664613189451065e+02 -4.598718776252147e+01 3 4.445675167124290e+02 -1.247884466860518e+02 -4.129475031266345e+02 1.074359351009545e+02 4 3.445393635903407e+02 -3.022663277089035e+02 -1.535138158184720e+02 -6.144874733843321e+01 - ME 1.180920695556687e-04 + ME 1.427738327825488e-05 Event 128 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3076,7 +3076,7 @@ Event 128 Batch 1 2 5.312407894292422e+02 -7.192118124205533e+01 -4.398126160332176e+02 -2.891521793453568e+02 3 5.717192413787027e+02 3.434745903572437e+02 1.811915566412192e+02 4.195923218357252e+02 4 3.970399691920551e+02 -2.715534091151883e+02 2.586210593919984e+02 -1.304401424903685e+02 - ME 1.848006274423395e-04 + ME 3.532660248239223e-05 Event 129 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3084,7 +3084,7 @@ Event 129 Batch 1 2 6.644129951428383e+02 -3.595672586482287e+02 4.645590915434784e+02 3.103882489514914e+02 3 1.967652372382455e+02 -5.204943416929049e+01 8.794498000645085e+00 -1.895522930301724e+02 4 6.388217676189169e+02 4.116166928175192e+02 -4.733535895441232e+02 -1.208359559213191e+02 - ME 3.082956717278722e-04 + ME 9.192558188476414e-05 Event 130 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3092,7 +3092,7 @@ Event 130 Batch 1 2 7.302263990443511e+02 -1.919590472356484e+02 3.836584700935805e+02 -5.909217345563752e+02 3 4.156541164903923e+02 2.203243106780774e+02 -1.767969453775071e+02 3.049071707664833e+02 4 3.541194844652567e+02 -2.836526344242890e+01 -2.068615247160734e+02 2.860145637898919e+02 - ME 3.110012368642411e-05 + ME 2.258971422042701e-05 Event 131 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3100,7 +3100,7 @@ Event 131 Batch 1 2 2.308323688168238e+02 -1.780469473698228e+02 1.469011263880862e+02 1.710582294195638e+00 3 7.308075033948297e+02 5.219262643529272e+02 -3.840435213624620e+02 3.379099810545737e+02 4 5.383601277883465e+02 -3.438793169831044e+02 2.371423949743758e+02 -3.396205633487694e+02 - ME 1.061667055612532e-03 + ME 7.770640764079256e-05 Event 132 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3108,7 +3108,7 @@ Event 132 Batch 1 2 5.909630762789660e+02 -4.293852116769707e+02 -3.988922148105424e+02 7.583335995300355e+01 3 5.415993952096327e+02 2.260703809971038e+02 3.221145619770360e+02 -3.721079100067703e+02 4 3.674375285114020e+02 2.033148306798666e+02 7.677765283350686e+01 2.962745500537670e+02 - ME 3.321676569401813e-05 + ME 1.628447412544396e-05 Event 133 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3116,7 +3116,7 @@ Event 133 Batch 1 2 4.506052863582997e+02 2.189991325227701e+02 -3.914006430783634e+02 -4.347459771134355e+01 3 4.043998006859111e+02 3.160348074769272e+02 8.738893432792010e+01 2.366946839598570e+02 4 6.449949129557901e+02 -5.350339399996973e+02 3.040117087504433e+02 -1.932200862485142e+02 - ME 3.121497332919934e-04 + ME 8.705579101282482e-05 Event 134 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3124,7 +3124,7 @@ Event 134 Batch 1 2 7.151470882937614e+02 -1.041377497037516e+01 -4.186394096729767e+01 7.138447461686595e+02 3 3.416424731356660e+02 1.638631808685801e+02 3.081581136487586e+01 -2.981925940995343e+02 4 4.432104385705719e+02 -1.534494058982047e+02 1.104812960242199e+01 -4.156521520691248e+02 - ME 5.534325530265236e-02 + ME 6.342792451335309e-03 Event 135 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3132,7 +3132,7 @@ Event 135 Batch 1 2 7.115730144432832e+02 -3.219296530898238e+02 2.184242454110169e+02 -5.958089478700319e+02 3 1.627059459894212e+02 -6.880794311551747e+01 -3.259803939022061e+01 1.437917231708342e+02 4 6.257210395672955e+02 3.907375962053413e+02 -1.858262060207963e+02 4.520172246991979e+02 - ME 2.112989182930814e-04 + ME 1.277979532321233e-04 Event 136 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3140,7 +3140,7 @@ Event 136 Batch 1 2 7.195404287114588e+02 -4.369992732083461e+02 -4.270318019286997e+02 3.800182941743402e+02 3 6.668605996318223e+02 3.634158794560479e+02 4.690430049045651e+02 -3.043527845290675e+02 4 1.135989716567186e+02 7.358339375229815e+01 -4.201120297586535e+01 -7.566550964527264e+01 - ME 1.804344388349211e-03 + ME 7.515399240093053e-05 Event 137 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3148,7 +3148,7 @@ Event 137 Batch 1 2 6.722782806744999e+02 -6.045581260407005e+02 -2.538460778300668e+02 1.484241478840623e+02 3 6.869263774705689e+02 6.661257235671316e+02 1.481819739565761e+02 -7.865412297735662e+01 4 1.407953418549304e+02 -6.156759752643097e+01 1.056641038734908e+02 -6.977002490670534e+01 - ME 5.192812231664224e-04 + ME 2.119149330726453e-05 Event 138 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3156,7 +3156,7 @@ Event 138 Batch 1 2 6.463287544295633e+02 8.684709774942756e+01 2.409249839962013e+02 -5.934253049048401e+02 3 3.917330799270068e+02 1.767690441671677e+02 4.696120064017492e+01 3.464132742372293e+02 4 4.619381656434300e+02 -2.636161419165952e+02 -2.878861846363762e+02 2.470120306676108e+02 - ME 5.804753959762886e-05 + ME 4.203806696206548e-05 Event 139 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3164,7 +3164,7 @@ Event 139 Batch 1 2 2.994802063237944e+02 -1.272876183039153e+02 6.552211336810879e+00 2.710042891410713e+02 3 7.257546970836092e+02 -8.848613612326799e+00 5.127896146768584e+00 -7.256826352181574e+02 4 4.747650965925943e+02 1.361362319162416e+02 -1.168010748357900e+01 4.546783460770868e+02 - ME 1.724196014694060e-04 + ME 1.500396153249019e-04 Event 140 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3172,7 +3172,7 @@ Event 140 Batch 1 2 7.326756101999780e+02 5.655005379385240e+02 4.343799907428446e+02 1.683351270988810e+02 3 7.428339005597779e+02 -5.680473426214219e+02 -4.534832054058505e+02 -1.532233754243464e+02 4 2.449048924024402e+01 2.546804682897962e+00 1.910321466300584e+01 -1.511175167453447e+01 - ME 4.669436438173466e-03 + ME 1.024603362434272e-04 Event 141 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3180,7 +3180,7 @@ Event 141 Batch 1 2 7.363238871411332e+02 -6.772722174663238e+02 -2.824373475598683e+02 -6.086341204880675e+01 3 5.504260535970963e+02 4.650298533191528e+02 2.914345410616540e+02 4.221355560271704e+01 4 2.132500592617708e+02 2.122423641471711e+02 -8.997193501785816e+00 1.864985644608987e+01 - ME 7.300791864660033e-05 + ME 1.166401869382226e-05 Event 142 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3188,7 +3188,7 @@ Event 142 Batch 1 2 5.862280565156834e+02 4.248793793115829e+01 -2.479279504752411e+02 -5.295184989682986e+02 3 4.287264749982929e+02 -3.025296967755320e+02 2.785471849307642e+02 1.212173201341831e+02 4 4.850454684860405e+02 2.600417588443628e+02 -3.061923445551928e+01 4.083011788341197e+02 - ME 4.569028399965169e-05 + ME 1.949810022878841e-05 Event 143 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3196,7 +3196,7 @@ Event 143 Batch 1 2 2.464531733710510e+02 4.046044690030688e+01 -2.103865804466287e+02 1.218179201483223e+02 3 5.378449948854583e+02 4.607829603950880e+02 -2.747641700963839e+02 3.822241180409925e+01 4 7.157018317434903e+02 -5.012434072953949e+02 4.851507505430126e+02 -1.600403319524219e+02 - ME 1.284493741497843e-03 + ME 4.863434295951330e-04 Event 144 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3204,7 +3204,7 @@ Event 144 Batch 1 2 5.367418008803521e+02 -1.343004856786532e+02 -4.048537736989352e+02 -3.258044847458254e+02 3 6.294877130859599e+02 3.313530054622211e+02 5.282137272543231e+02 8.631468610520756e+01 4 3.337704860336884e+02 -1.970525197835678e+02 -1.233599535553879e+02 2.394897986406179e+02 - ME 2.612855607885159e-05 + ME 8.754930746282009e-06 Event 145 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3212,7 +3212,7 @@ Event 145 Batch 1 2 6.805380148481771e+01 -3.411514819754512e+01 -4.339750646760406e+01 -3.980116822894492e+01 3 6.831461500979880e+02 -3.834019790669201e+02 -2.756424954453614e+02 -4.936727656514237e+02 4 7.488000484171945e+02 4.175171272644653e+02 3.190400019129655e+02 5.334739338803686e+02 - ME 4.832444287218038e-01 + ME 4.117012994651258e-01 Event 146 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3220,7 +3220,7 @@ Event 146 Batch 1 2 5.031746658797123e+02 4.202301876294930e+02 2.767377273314875e+02 2.750283520766640e+00 3 4.317115817339341e+02 -1.098088257924671e+02 -5.455162180567243e+01 4.139336083717602e+02 4 5.651137523863538e+02 -3.104213618370259e+02 -2.221861055258150e+02 -4.166838918925268e+02 - ME 4.446377084117306e-03 + ME 1.122040831263755e-03 Event 147 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3228,7 +3228,7 @@ Event 147 Batch 1 2 4.251223043705630e+02 -4.223502783198938e+02 -4.694338569631599e+01 1.206377286808446e+01 3 5.457819748703678e+02 2.791608945230574e+02 -4.384138579515959e+02 -1.665546403390879e+02 4 5.290957207590696e+02 1.431893837968364e+02 4.853572436479118e+02 1.544908674710035e+02 - ME 5.820013407126093e-05 + ME 1.117959404473985e-05 Event 148 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3236,7 +3236,7 @@ Event 148 Batch 1 2 6.905785821272525e+02 6.249608768654489e+02 -6.243387159972350e+01 -2.870970082698929e+02 3 1.361638260920089e+02 2.862044352088506e+01 1.704210379179796e+01 1.320266050727362e+02 4 6.732575917807402e+02 -6.535813203863343e+02 4.539176780792534e+01 1.550704031971573e+02 - ME 9.573948308169230e-04 + ME 5.047601105033982e-04 Event 149 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3244,7 +3244,7 @@ Event 149 Batch 1 2 6.694705528096943e+02 -5.216497821741067e+02 -3.785079074709545e+02 1.811189935345937e+02 3 2.821401257551277e+02 1.148500354702071e-01 2.786662494166578e+02 -4.413795199872407e+01 4 5.483893214351779e+02 5.215349321386365e+02 9.984165805429673e+01 -1.369810415358697e+02 - ME 1.943324414096923e-04 + ME 3.486097449584098e-05 Event 150 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3252,7 +3252,7 @@ Event 150 Batch 1 2 4.637486188995366e+02 -4.033412855298819e+02 -2.279949807412008e+02 -1.992178895453991e+01 3 3.756800751656199e+02 6.230662615514293e+01 -2.632310737913946e+02 -2.606967683041707e+02 4 6.605713059348438e+02 3.410346593747391e+02 4.912260545325952e+02 2.806185572587107e+02 - ME 2.156945366470290e-04 + ME 4.211370643652993e-05 Event 151 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3260,7 +3260,7 @@ Event 151 Batch 1 2 3.821954355913596e+02 -2.528320044280690e+02 2.861764538722267e+02 1.588602445142563e+01 3 6.796189325418250e+02 2.911670128135291e+02 -4.900375979142738e+02 3.700902818893582e+02 4 4.381856318668152e+02 -3.833500838546018e+01 2.038611440420471e+02 -3.859763063407838e+02 - ME 8.197229841786387e-03 + ME 1.923941526207248e-04 Event 152 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3268,7 +3268,7 @@ Event 152 Batch 1 2 6.751133298339792e+02 -2.999578895043981e+02 -2.855974213275218e+02 -5.331391803034741e+02 3 4.976977783498468e+02 -3.003988119418482e+00 1.843802943840355e+02 4.622747685874795e+02 4 3.271888918161745e+02 3.029618776238166e+02 1.012171269434863e+02 7.086441171599445e+01 - ME 1.204579535049519e-04 + ME 6.977738125195056e-05 Event 153 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3276,7 +3276,7 @@ Event 153 Batch 1 2 1.729293620257127e+02 1.558357805102956e+02 -7.193392860849491e+01 2.110174585940510e+01 3 6.524550819255464e+02 2.410158908712478e+02 5.786677971610501e+02 1.809766692333240e+02 4 6.746155560487412e+02 -3.968516713815435e+02 -5.067338685525552e+02 -2.020784150927291e+02 - ME 5.985591428637023e-04 + ME 1.391654510317005e-04 Event 154 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3284,7 +3284,7 @@ Event 154 Batch 1 2 6.585658455851002e+02 -2.410305357139302e+02 -2.116446673272157e+02 -5.751693564652295e+02 3 5.764400833248005e+02 3.388133979948972e+02 3.092747322371399e+02 3.490527051926400e+02 4 2.649940710900988e+02 -9.778286228096688e+01 -9.763006490992416e+01 2.261166512725894e+02 - ME 3.655181799213059e-05 + ME 2.686434432328395e-05 Event 155 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3292,7 +3292,7 @@ Event 155 Batch 1 2 5.686586231936359e+02 -1.693366246265498e+02 -1.542203680657918e+02 5.204938187588979e+02 3 1.882190564276536e+02 -1.089234770645493e+02 -9.145416397064866e+01 1.232810822434430e+02 4 7.431223203787102e+02 2.782601016910992e+02 2.456745320364404e+02 -6.437749010023409e+02 - ME 6.696396361607482e-01 + ME 4.701119881405690e-01 Event 156 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3300,7 +3300,7 @@ Event 156 Batch 1 2 6.143652095725128e+02 2.879464601546110e+02 5.379391909976823e+02 -7.178351904348040e+01 3 6.287751645293085e+02 -4.584164185734781e+02 -4.225140875260598e+02 -8.181956094447702e+01 4 2.568596258981782e+02 1.704699584188668e+02 -1.154251034716223e+02 1.536030799879581e+02 - ME 2.899571701789112e-05 + ME 7.769660148731367e-06 Event 157 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3308,7 +3308,7 @@ Event 157 Batch 1 2 5.050842109798973e+02 4.185498850973046e+02 -1.305174306570672e+02 -2.507812875014723e+02 3 5.170424494038050e+02 -3.084595065654854e+02 3.930456446728388e+02 -1.330441599566699e+02 4 4.778733396162975e+02 -1.100903785318191e+02 -2.625282140157716e+02 3.838254474581424e+02 - ME 4.033251359625283e-05 + ME 1.243977993100618e-05 Event 158 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3316,7 +3316,7 @@ Event 158 Batch 1 2 4.312542366204098e+02 -3.114503370626313e+02 2.737030704635235e+02 1.185982013584742e+02 3 6.944315393047829e+02 2.166643175309468e+02 -6.173965008138002e+02 -2.326226495269423e+02 4 3.743142240748070e+02 9.478601953168439e+01 3.436934303502764e+02 1.140244481684682e+02 - ME 3.680357310121394e-05 + ME 5.864250821924803e-06 Event 159 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3324,7 +3324,7 @@ Event 159 Batch 1 2 5.860112473308646e+02 -1.581297551692178e+02 4.935632758462007e+02 2.734948907463652e+02 3 3.772013313646349e+02 -2.371132827856262e+02 -1.305099443644436e+02 -2.627266448837395e+02 4 5.367874213045002e+02 3.952430379548442e+02 -3.630533314817573e+02 -1.076824586262577e+01 - ME 1.030382455754272e-04 + ME 2.805189658646002e-05 Event 160 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3332,7 +3332,7 @@ Event 160 Batch 1 2 5.883409724804535e+02 -3.739819298758817e+02 -2.887651121595530e+02 3.505671490956299e+02 3 4.300332553173178e+02 1.788055146224819e+02 3.829208006453583e+02 7.955406370837679e+01 4 4.816257722022287e+02 1.951764152533999e+02 -9.415568848580530e+01 -4.301212128040066e+02 - ME 9.797271586219467e-03 + ME 2.307516153071828e-04 Event 161 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3340,7 +3340,7 @@ Event 161 Batch 1 2 6.868305165969147e+02 4.119610488151656e+00 5.515184990814985e+02 4.093244831537709e+02 3 3.260821955312833e+02 -1.956999890649130e+02 -2.483451099187458e+02 -7.972338993006402e+01 4 4.870872878718022e+02 1.915803785767614e+02 -3.031733891627526e+02 -3.296010932237070e+02 - ME 1.075603053132144e-03 + ME 9.860610555787331e-05 Event 162 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3348,7 +3348,7 @@ Event 162 Batch 1 2 2.159818802305119e+02 -2.018126805027919e+02 4.096951387107715e+01 -6.512536763314942e+01 3 6.870078865581224e+02 4.896730732821633e+02 -2.356527215298929e+02 -4.203188222421333e+02 4 5.970102332113654e+02 -2.878603927793715e+02 1.946832076588156e+02 4.854441898752826e+02 - ME 5.344822454174306e-05 + ME 2.809071549115161e-05 Event 163 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3356,7 +3356,7 @@ Event 163 Batch 1 2 4.889699854403287e+02 -4.067839821807834e+01 -2.740835242435768e+02 4.028835269878222e+02 3 4.282392920294498e+02 4.007468150560176e+02 -8.832740907173851e+01 -1.224301852772270e+02 4 5.827907225302220e+02 -3.600684168379390e+02 3.624109333153153e+02 -2.804533417105952e+02 - ME 4.336231422638298e-04 + ME 1.173701793303044e-04 Event 164 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3364,7 +3364,7 @@ Event 164 Batch 1 2 6.224346677404150e+02 -1.282049393554146e+02 5.480608628970117e+02 -2.657399098565701e+02 3 7.444531740822750e+02 1.794330131141779e+02 -6.708967511266460e+02 2.681638893170603e+02 4 1.331121581773107e+02 -5.122807375876333e+01 1.228358882296343e+02 -2.423979460490191e+00 - ME 1.368953177788070e-04 + ME 1.571413941583783e-05 Event 165 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3372,7 +3372,7 @@ Event 165 Batch 1 2 6.980339706506675e+02 -5.154669325341684e+01 -4.947847840614098e+02 4.896757907618869e+02 3 1.362964882116331e+02 4.252532371924361e+01 -5.641238783031591e+01 -1.165588780002596e+02 4 6.656695411377010e+02 9.021369534174053e+00 5.511971718917263e+02 -3.731169127616273e+02 - ME 1.450267418906797e-03 + ME 4.238311927693088e-04 Event 166 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3380,7 +3380,7 @@ Event 166 Batch 1 2 3.060640747281171e+02 -1.981167412190918e+02 -9.095380261170779e+01 -2.148310510107333e+02 3 5.580104478575086e+02 -3.585720992432471e+02 -1.558095186186280e+02 3.981521109704927e+02 4 6.359254774143739e+02 5.566888404623389e+02 2.467633212303362e+02 -1.833210599597597e+02 - ME 3.000804338470548e-04 + ME 1.099447007687216e-04 Event 167 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3388,7 +3388,7 @@ Event 167 Batch 1 2 2.833153623322893e+02 2.526850217013923e+02 8.687924899084067e+01 9.417998957332070e+01 3 6.595685044563415e+02 -8.780626893611850e+01 -2.875856231737449e+02 -5.870393347553995e+02 4 5.571161332113688e+02 -1.648787527652738e+02 2.007063741829043e+02 4.928593451820789e+02 - ME 7.367447958524992e-05 + ME 4.244421486768831e-05 Event 168 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3396,7 +3396,7 @@ Event 168 Batch 1 2 6.026267479353969e+02 -5.987968578530475e+02 5.775180228477150e+00 6.758674164241529e+01 3 4.991211680715713e+02 3.812575567959843e+02 3.220701575873951e+02 -5.952259631185711e+00 4 3.982520839930309e+02 2.175393010570631e+02 -3.278453378158730e+02 -6.163448201122968e+01 - ME 9.606399998327532e-05 + ME 1.203107058680061e-05 Event 169 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3404,7 +3404,7 @@ Event 169 Batch 1 2 5.510662376679772e+02 -9.251111075413947e+01 -5.291920243323356e+02 -1.227660134875281e+02 3 5.034535790022877e+02 -2.816014265681677e+02 3.283802195198170e+02 2.575511098657944e+02 4 4.454801833297348e+02 3.741125373223072e+02 2.008118048125185e+02 -1.347850963782663e+02 - ME 1.532484123791625e-04 + ME 2.085195230877358e-05 Event 170 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3412,7 +3412,7 @@ Event 170 Batch 1 2 2.814808559369750e+02 3.658097943502287e+01 -1.412301634042880e+02 -2.407225480659935e+02 3 6.646522150540470e+02 2.753499086551696e+02 -1.631412967142655e+02 5.825203104495404e+02 4 5.538669290089779e+02 -3.119308880901926e+02 3.043714601185535e+02 -3.417977623835468e+02 - ME 7.823510217753851e-04 + ME 2.587160315460459e-04 Event 171 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3420,7 +3420,7 @@ Event 171 Batch 1 2 1.777965289077954e+02 -6.143496808852239e+01 -1.603735842336773e+00 1.668375809551635e+02 3 7.439290290569696e+02 2.163074211412066e+01 -1.907051550939623e+01 -7.433699124308462e+02 4 5.782744420352348e+02 3.980422597440174e+01 2.067425135173305e+01 5.765323314756826e+02 - ME 2.063755640794395e-03 + ME 1.981167274383509e-03 Event 172 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3428,7 +3428,7 @@ Event 172 Batch 1 2 1.369499454750680e+02 -1.250080331667568e+01 -3.518152151649629e+01 -1.317622025690455e+02 3 6.692885586315896e+02 -2.346283187163472e+02 -6.130705295376303e+02 1.305421486874673e+02 4 6.937614958933425e+02 2.471291220330227e+02 6.482520510541266e+02 1.220053881578238e+00 - ME 5.039586079692636e-04 + ME 1.548169060571347e-04 Event 173 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3436,7 +3436,7 @@ Event 173 Batch 1 2 7.088772083623137e+02 4.973951266878932e+01 3.171232495758680e+01 -7.064185769505260e+02 3 5.785136264307895e+02 8.584813303397833e+01 5.766505028397120e+01 5.691949191590089e+02 4 2.126091652068944e+02 -1.355876457027672e+02 -8.937737524155732e+01 1.372236577915166e+02 - ME 1.743760900867476e-04 + ME 1.732961413682620e-04 Event 174 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3444,7 +3444,7 @@ Event 174 Batch 1 2 4.367208701713482e+02 -3.923163287174704e+01 4.325755195957351e+02 -4.543585887727652e+01 3 3.528978856725088e+02 9.622572295106905e+01 1.987077746703234e+02 -2.753048278549415e+02 4 7.103812441561454e+02 -5.699409007932221e+01 -6.312832942660567e+02 3.207406867322186e+02 - ME 9.353677491192390e-04 + ME 1.541208918572365e-04 Event 175 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3452,7 +3452,7 @@ Event 175 Batch 1 2 6.418562164876806e+02 1.962785648722137e+02 -6.110736372974047e+02 -6.567908015856712e+00 3 4.843421844702149e+02 -1.886631806266161e+02 3.569879071908527e+02 -2.674942804112337e+02 4 3.738015990421035e+02 -7.615384245597569e+00 2.540857301065516e+02 2.740621884270906e+02 - ME 3.029111560812189e-05 + ME 1.279055979705581e-05 Event 176 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3460,7 +3460,7 @@ Event 176 Batch 1 2 6.288652703123263e+02 4.005522031116294e+02 3.691482793515075e+02 3.142594606996526e+02 3 7.209127580467475e+02 -4.124575135572966e+02 -5.165298058232565e+02 -2.877341896975221e+02 4 1.502219716409257e+02 1.190531044566666e+01 1.473815264717492e+02 -2.652527100213051e+01 - ME 1.719274466020296e-04 + ME 1.300720357566141e-05 Event 177 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3468,7 +3468,7 @@ Event 177 Batch 1 2 4.716578040000077e+02 -4.521622645932388e+02 -1.012739918234145e+01 1.338200520767543e+02 3 3.021382980750606e+02 -2.714821202364266e+02 6.773215888881064e+01 -1.140059832109250e+02 4 7.262038979249317e+02 7.236443848296653e+02 -5.760475970646905e+01 -1.981406886582933e+01 - ME 2.354271252348000e-03 + ME 6.442260552556652e-04 Event 178 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3476,7 +3476,7 @@ Event 178 Batch 1 2 7.350088877399502e+02 -3.684484945749095e+02 -2.561732769425163e+02 -5.821159885132296e+02 3 1.415495174310248e+02 7.181268644032879e+01 1.095010133995263e+02 5.374692563910759e+01 4 6.234415948290248e+02 2.966358081345808e+02 1.466722635429900e+02 5.283690628741219e+02 - ME 1.035408980291912e-04 + ME 6.828487731379645e-05 Event 179 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3484,7 +3484,7 @@ Event 179 Batch 1 2 7.426064621425413e+02 6.748632301344054e+01 7.201624948975951e+02 -1.681544967131679e+02 3 5.821031882499326e+02 8.394276920418550e-01 -5.588194474899291e+02 1.629854049874919e+02 4 1.752903496075256e+02 -6.832575070548241e+01 -1.613430474076661e+02 5.169091725675888e+00 - ME 9.197132478706931e-05 + ME 1.412410550503903e-05 Event 180 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3492,7 +3492,7 @@ Event 180 Batch 1 2 6.099515195485484e+02 2.272495331206023e+02 1.762692760011278e+02 -5.378918555193875e+02 3 5.718889655176699e+02 4.324570510796980e+01 -3.278409766521432e+02 4.665909256493895e+02 4 3.181595149337819e+02 -2.704952382285720e+02 1.515717006510154e+02 7.130092986999803e+01 - ME 5.401477812349802e-05 + ME 3.043963963928669e-05 Event 181 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3500,7 +3500,7 @@ Event 181 Batch 1 2 1.206370886915177e+02 -8.151225636567759e+01 1.767749325039422e+01 8.715827822142556e+01 3 6.451493408002739e+02 -6.748216257939080e+01 4.373428479320614e+02 4.694625256943417e+02 4 7.342135705082084e+02 1.489944189450684e+02 -4.550203411824557e+02 -5.566208039157672e+02 - ME 7.131653341377736e-02 + ME 2.625479922313071e-02 Event 182 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3508,7 +3508,7 @@ Event 182 Batch 1 2 4.626866082364760e+02 -3.084610429505738e+02 3.306629079434072e+02 9.794245113140897e+01 3 4.974966719253473e+02 3.582955998671217e+02 1.664640547097976e+02 -3.023523113558579e+02 4 5.398167198381765e+02 -4.983455691654795e+01 -4.971269626532048e+02 2.044098602244489e+02 - ME 5.959042767905828e-05 + ME 1.414799589613471e-05 Event 183 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3516,7 +3516,7 @@ Event 183 Batch 1 2 3.304723045950491e+02 3.244647182058462e+00 3.209425641774955e+02 7.872284845075714e+01 3 4.379804819457451e+02 2.312428523500660e+02 3.131807483468383e+02 2.006775141049615e+02 4 7.315472134592065e+02 -2.344874995321247e+02 -6.341233125243344e+02 -2.794003625557186e+02 - ME 4.899988668912175e-03 + ME 2.330806393221907e-03 Event 184 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3524,7 +3524,7 @@ Event 184 Batch 1 2 7.470051035005908e+02 -4.953964753944513e+02 -4.028924750569613e+02 3.876552725878485e+02 3 2.183325716323390e+02 1.119040172022777e+02 1.451703047217021e+02 -1.186262424448778e+02 4 5.346623248670695e+02 3.834924581921736e+02 2.577221703352594e+02 -2.690290301429710e+02 - ME 5.441344453720516e-04 + ME 7.987999480474686e-05 Event 185 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3532,7 +3532,7 @@ Event 185 Batch 1 2 4.448583927494090e+02 2.810173563272025e+02 -3.384637477435971e+02 6.610995769032235e+01 3 6.236443795626774e+02 -1.690803760724666e+02 5.125139620028374e+02 3.125277225134823e+02 4 4.314972276879136e+02 -1.119369802547359e+02 -1.740502142592404e+02 -3.786376802038046e+02 - ME 6.949230823829164e-03 + ME 1.405605442011058e-04 Event 186 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3540,7 +3540,7 @@ Event 186 Batch 1 2 6.802792190696962e+02 -1.681815241656754e+02 5.427923640013703e+02 3.739936368565512e+02 3 6.331554869749547e+02 3.172201723440435e+02 -4.588808692389625e+02 -2.994755095011972e+02 4 1.865652939553488e+02 -1.490386481783679e+02 -8.391149476240778e+01 -7.451812735535422e+01 - ME 3.276943053321406e-04 + ME 3.045129627255903e-05 Event 187 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3548,7 +3548,7 @@ Event 187 Batch 1 2 7.472897115267965e+02 -6.988402471604775e+02 -2.391684329048669e+02 1.134137672609268e+02 3 6.826908170748527e+02 6.328852277257668e+02 2.212839847556716e+02 -1.286718241709738e+02 4 7.001947139835140e+01 6.595501943471052e+01 1.788444814919547e+01 1.525805691004725e+01 - ME 1.461490870437387e-04 + ME 3.485925693242860e-05 Event 188 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3556,7 +3556,7 @@ Event 188 Batch 1 2 6.496068877140275e+02 -5.024316730938291e+02 -3.980061777252906e+02 -1.055585379310702e+02 3 4.885976180718368e+02 4.424928723138696e+02 1.459942636040002e+02 -1.470148473169288e+02 4 3.617954942141354e+02 5.993880077995960e+01 2.520119141212904e+02 2.525733852479991e+02 - ME 2.843805826594158e-05 + ME 1.006519408431335e-05 Event 189 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3564,7 +3564,7 @@ Event 189 Batch 1 2 4.082379946778654e+02 2.679237131173331e+02 -7.718184435750955e+01 2.981913934867987e+02 3 5.864211573889181e+02 -5.780822197382728e+02 -6.394893886953379e+01 7.497502433004084e+01 4 5.053408479332167e+02 3.101585066209396e+02 1.411307832270433e+02 -3.731664178168398e+02 - ME 1.937644878671120e-03 + ME 1.322787627040098e-04 Event 190 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3572,7 +3572,7 @@ Event 190 Batch 1 2 6.472516823166364e+02 6.463779961822676e+02 -3.289365889632791e+01 6.945035458816692e+00 3 4.318767277050750e+02 -3.286790725415815e+02 -7.183748821760624e+00 -2.800642229191639e+02 4 4.208715899782885e+02 -3.176989236406859e+02 4.007740771808847e+01 2.731191874603472e+02 - ME 3.409584379294133e-05 + ME 1.272332211942340e-05 Event 191 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3580,7 +3580,7 @@ Event 191 Batch 1 2 6.757500036387052e+02 6.222744522021635e+02 -2.261571472854044e+02 1.351499844096745e+02 3 3.644673602666567e+02 -2.020102809038697e+02 1.114149692296405e+02 -2.821613151026251e+02 4 4.597826360946380e+02 -4.202641712982938e+02 1.147421780557637e+02 1.470113306929507e+02 - ME 5.389305783035389e-05 + ME 1.560703181590231e-05 Event 192 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3588,7 +3588,7 @@ Event 192 Batch 1 2 7.394562478491531e+02 -7.307873850878615e+02 3.988568028534699e+01 1.056147375500683e+02 3 8.098058518630978e+01 5.419286926826393e+01 4.244928426361276e+00 -6.002473390399248e+01 4 6.795631669645365e+02 6.765945158195976e+02 -4.413060871170821e+01 -4.559000364607596e+01 - ME 4.204295748489254e-04 + ME 1.231033846344155e-04 Event 193 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3596,7 +3596,7 @@ Event 193 Batch 1 2 5.607395612273153e+02 -3.164229781907934e+02 -3.517992386171808e+02 -3.009030576558548e+02 3 3.741643617741927e+02 -2.156271676189966e+02 1.666697084176705e+02 2.563690747778811e+02 4 5.650960769984922e+02 5.320501458097899e+02 1.851295301995104e+02 4.453398287797368e+01 - ME 9.141090879934244e-05 + ME 3.026844143728605e-05 Event 194 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3604,7 +3604,7 @@ Event 194 Batch 1 2 5.729373416862012e+02 -2.155045544874616e+02 -1.679805246197324e+02 5.035846779262559e+02 3 2.831035485618876e+02 -2.543279085173982e+02 1.042261812492671e+02 -6.783684323208054e+01 4 6.439591097519118e+02 4.698324630048598e+02 6.375434337046515e+01 -4.357478346941756e+02 - ME 1.781231321893996e-03 + ME 5.497724763810379e-04 Event 195 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3612,7 +3612,7 @@ Event 195 Batch 1 2 5.572874060171201e+02 -5.433144409127298e+02 3.646295232533866e+01 1.185290019729285e+02 3 6.765845568040619e+02 5.574999049241243e+02 -1.212989803269169e+01 -3.831623469093195e+02 4 2.661280371788181e+02 -1.418546401139455e+01 -2.433305429264712e+01 2.646333449363910e+02 - ME 3.395618115588225e-04 + ME 3.378534889977447e-04 Event 196 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3620,7 +3620,7 @@ Event 196 Batch 1 2 5.405888343305829e+02 3.940239871950471e+02 -8.826690628749978e+01 -3.594305754554688e+02 3 6.983754392688073e+02 -3.888370902622853e+02 -5.513072771506098e+01 5.774898910559966e+02 4 2.610357264006097e+02 -5.186896932761887e+00 1.433976340025607e+02 -2.180593156005277e+02 - ME 5.539073969003598e-03 + ME 2.676929502290073e-04 Event 197 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3628,7 +3628,7 @@ Event 197 Batch 1 2 2.783346334111661e+02 2.282410890438732e+02 -1.474467226896361e+02 6.029624695020830e+01 3 6.434654504578666e+02 1.172104173128919e+01 6.205939438823057e+02 1.696277097949658e+02 4 5.781999161309674e+02 -2.399621307751624e+02 -4.731472211926695e+02 -2.299239567451741e+02 - ME 3.321087064690878e-04 + ME 4.280180350752636e-05 Event 198 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3636,7 +3636,7 @@ Event 198 Batch 1 2 4.349536439683943e+02 1.774777254208009e+02 -9.709992209949135e+01 3.850427697141142e+02 3 4.134500153047116e+02 7.095914770071803e+01 -4.041194890923881e+02 -5.092301099466194e+01 4 6.515963407268921e+02 -2.484368731215197e+02 5.012194111918782e+02 -3.341197587194521e+02 - ME 7.849443582399766e-04 + ME 2.926862112764983e-04 Event 199 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3644,7 +3644,7 @@ Event 199 Batch 1 2 6.682109290882580e+02 2.136897997740939e+02 -5.035763266519416e+02 3.837361052354048e+02 3 1.424120473397155e+02 8.952788458880865e+01 -4.686863299276860e+01 -1.003458038481504e+02 4 6.893770235720265e+02 -3.032176843629025e+02 5.504449596447103e+02 -2.833903013872543e+02 - ME 1.167594898598604e-03 + ME 4.183851150998592e-04 Event 200 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3652,7 +3652,7 @@ Event 200 Batch 1 2 5.959952693237885e+02 -4.878566955018547e+02 -2.510837703973929e+01 -3.414319479966339e+02 3 4.479637599869168e+02 4.499951041477978e+01 7.146287716862105e+01 4.399313940955211e+02 4 4.560409706892941e+02 4.428571850870749e+02 -4.635450012888173e+01 -9.849944609888662e+01 - ME 5.545496796633981e-04 + ME 3.228844805909175e-04 Event 201 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3660,7 +3660,7 @@ Event 201 Batch 1 2 5.203096708642927e+02 -1.112696379946441e+02 1.367824427202020e+02 4.895219960522141e+02 3 2.871951825199399e+02 -2.582762312778227e+02 1.200876310962787e+02 3.678888524092984e+01 4 6.924951466157675e+02 3.695458692724667e+02 -2.568700738164807e+02 -5.263108812931440e+02 - ME 6.577575910850049e-03 + ME 2.285182473348715e-03 Event 202 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3668,7 +3668,7 @@ Event 202 Batch 1 2 2.158792376054218e+02 2.112389782008981e+01 -7.195062193526132e+01 -2.024369881546198e+02 3 5.463652944256570e+02 2.787950008966254e+02 -3.108926376755554e+02 -3.523267663221479e+02 4 7.377554679689213e+02 -2.999188987167153e+02 3.828432596108168e+02 5.547637544767679e+02 - ME 8.695282964050810e-03 + ME 1.952686275320307e-03 Event 203 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3676,7 +3676,7 @@ Event 203 Batch 1 2 7.124273471334275e+02 4.879265047129839e+02 -1.059167473143779e+02 -5.081949365946950e+02 3 6.746108110440506e+02 -5.248642991835990e+02 4.352799102536777e+01 4.215714978711400e+02 4 1.129618418225217e+02 3.693779447061509e+01 6.238875628901040e+01 8.662343872355494e+01 - ME 5.361938367485652e-05 + ME 4.211918129012132e-05 Event 204 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3684,7 +3684,7 @@ Event 204 Batch 1 2 7.084787759842808e+02 4.992472551829619e+02 -4.528122431715626e+02 -2.183012291454193e+02 3 1.034373169902747e+02 -8.959882065299325e+01 -3.938861547415055e+01 -3.346441176487074e+01 4 6.880839070254444e+02 -4.096484345299685e+02 4.922008586457131e+02 2.517656409102901e+02 - ME 2.988048706021647e-04 + ME 1.033102023766027e-04 Event 205 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3692,7 +3692,7 @@ Event 205 Batch 1 2 6.496569846879349e+02 -5.869603795046561e+02 -2.345911576090251e+02 1.499956646614410e+02 3 2.543878192344406e+02 -1.851019090219859e+00 2.474675926596849e+02 -5.890268997594536e+01 4 5.959551960776247e+02 5.888113985948760e+02 -1.287643505065981e+01 -9.109297468549572e+01 - ME 1.871447246980874e-04 + ME 4.134215827558992e-05 Event 206 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3700,7 +3700,7 @@ Event 206 Batch 1 2 6.172060642836410e+02 2.978040691523503e+02 4.166709400833434e+02 3.444435946201744e+02 3 7.205754982426181e+02 -2.468045809177361e+02 -5.690387091428452e+02 -3.667580878490107e+02 4 1.622184374737409e+02 -5.099948823461420e+01 1.523677690595017e+02 2.231449322883641e+01 - ME 7.356489425273393e-05 + ME 1.138691716042452e-05 Event 207 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3708,7 +3708,7 @@ Event 207 Batch 1 2 5.250113096394139e+02 -1.091977068802181e+02 -4.322753509449321e+02 2.772196909074646e+02 3 5.240251005653129e+02 3.541948269240045e+02 3.738549241960732e+02 9.685466564450643e+01 4 4.509635897952731e+02 -2.449971200437864e+02 5.842042674885889e+01 -3.740743565519710e+02 - ME 3.378615964480245e-03 + ME 9.518274156960593e-05 Event 208 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3716,7 +3716,7 @@ Event 208 Batch 1 2 4.449444343820048e+02 1.928662436733418e+02 -3.595193210859464e+02 1.775500478872298e+02 3 4.894053462810564e+02 -2.195789585225567e+02 2.295326432211599e+02 3.723136307450180e+02 4 5.656502193369389e+02 2.671271484921488e+01 1.299866778647865e+02 -5.498636786322478e+02 - ME 2.068943926258950e-01 + ME 2.179806976662403e-03 Event 209 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3724,7 +3724,7 @@ Event 209 Batch 1 2 4.949423498078044e+02 -2.830370809537592e+02 -1.684680620467476e+02 -3.694271951395289e+02 3 6.326444171345161e+02 3.898538983719823e+02 -1.748162179498052e+02 4.665749526039372e+02 4 3.724132330576786e+02 -1.068168174182231e+02 3.432842799965525e+02 -9.714775746440780e+01 - ME 1.473942246791387e-04 + ME 3.638076645868775e-05 Event 210 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3732,7 +3732,7 @@ Event 210 Batch 1 2 5.469464199121014e+02 -4.947084169679945e+02 2.319240083666633e+02 -2.500445517953792e+01 3 2.929141603572806e+02 -5.602902696925145e+01 2.099470855189298e+01 2.867379913571110e+02 4 6.601394197306178e+02 5.507374439372461e+02 -2.529187169185561e+02 -2.617335361775729e+02 - ME 1.577330101330874e-03 + ME 7.792286450853471e-04 Event 211 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3740,7 +3740,7 @@ Event 211 Batch 1 2 5.484404249965427e+02 1.659778109685243e+01 3.514591842057613e+02 -4.206992456262192e+02 3 4.635537606517395e+02 -3.607884938122542e+02 -3.140996451540818e+01 2.893564685231623e+02 4 4.880058143517181e+02 3.441907127154018e+02 -3.200492196903532e+02 1.313427771030569e+02 - ME 4.999214184618137e-05 + ME 1.717788621912363e-05 Event 212 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3748,7 +3748,7 @@ Event 212 Batch 1 2 6.930853388432640e+02 -3.424793196872474e+02 -8.152110066892747e+01 5.970171795281683e+02 3 9.131624224772825e+01 6.738328155058525e+01 1.365968298972706e+01 6.009627714210347e+01 4 7.155984189090078e+02 2.750960381366621e+02 6.786141767920034e+01 -6.571134566702718e+02 - ME 3.224436999651524e-01 + ME 4.440767413899675e-02 Event 213 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3756,7 +3756,7 @@ Event 213 Batch 1 2 7.316448870278512e+02 4.203233031264803e+02 4.913598772661251e+02 -3.423419819067778e+02 3 4.750162603483208e+02 -1.726357548525294e+02 -3.708603862154638e+02 2.414537588813190e+02 4 2.933388526238279e+02 -2.476875482739507e+02 -1.204994910506614e+02 1.008882230254589e+02 - ME 4.008080891216109e-05 + ME 1.166473784051930e-05 Event 214 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3764,7 +3764,7 @@ Event 214 Batch 1 2 4.805779599533694e+02 3.904513572450257e+02 -1.742898429406511e+02 2.193763065287195e+02 3 6.164938851206517e+02 -5.563771061772993e+02 2.227142270499353e+02 1.445946028815716e+02 4 4.029281549259790e+02 1.659257489322735e+02 -4.842438410928419e+01 -3.639709094102910e+02 - ME 1.130096726278085e-02 + ME 1.644694060635318e-04 Event 215 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3772,7 +3772,7 @@ Event 215 Batch 1 2 4.610896439725640e+02 -3.106576460930037e+02 -3.050258363865880e+02 -1.518378274323046e+02 3 7.153470686812809e+02 2.726436938726979e+02 6.046054769368644e+02 2.680280994976061e+02 4 3.235632873461531e+02 3.801395222030658e+01 -2.995796405502758e+02 -1.161902720653026e+02 - ME 2.130646114222361e-04 + ME 1.638803663744001e-05 Event 216 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3780,7 +3780,7 @@ Event 216 Batch 1 2 5.309452696424389e+02 -4.912950836090372e+02 -3.608909251460832e+01 -1.980646298023531e+02 3 6.627369363365399e+02 4.479096066616000e+02 2.308759280187052e+02 4.304573578259469e+02 4 3.063177940210212e+02 4.338547694743724e+01 -1.947868355040969e+02 -2.323927280235938e+02 - ME 1.881406502208647e-03 + ME 7.684209531203918e-05 Event 217 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3788,7 +3788,7 @@ Event 217 Batch 1 2 4.608032244164870e+02 2.215832851737383e+02 3.318832460795877e+02 -2.304212888079594e+02 3 3.107022283044695e+02 -4.724697178681157e+01 2.830528592337836e+02 -1.190994425256424e+02 4 7.284945472790432e+02 -1.743363133869267e+02 -6.149361053133712e+02 3.495207313336019e+02 - ME 2.894775763457067e-03 + ME 4.426756984161849e-04 Event 218 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3796,7 +3796,7 @@ Event 218 Batch 1 2 6.336891602166270e+02 5.249943224110900e+02 1.648031440577737e+02 -3.142973702098814e+02 3 5.195346944320743e+02 -3.655895580768890e+02 -3.610279413409480e+02 7.693763263116504e+01 4 3.467761453512956e+02 -1.594047643342018e+02 1.962247972831736e+02 2.373597375787177e+02 - ME 2.703962034458943e-05 + ME 8.957256945094420e-06 Event 219 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3804,7 +3804,7 @@ Event 219 Batch 1 2 2.579228498517417e+02 -4.166553381892272e+01 1.191899344508913e+02 2.249042891828000e+02 3 7.453266221408651e+02 -3.354388163550532e+01 -3.947818065141064e+02 -6.312954196904914e+02 4 4.967505280073930e+02 7.520941545442813e+01 2.755918720632151e+02 4.063911305076915e+02 - ME 6.103184694489295e-05 + ME 4.019449398167179e-05 Event 220 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3812,7 +3812,7 @@ Event 220 Batch 1 2 4.940336288355577e+02 -2.383755021420815e+02 -2.918661661143953e+02 3.194690712363630e+02 3 7.129224521449780e+02 2.727447507998269e+02 2.535039959962389e+02 -6.079510240944473e+02 4 2.930439190194635e+02 -3.436924865774512e+01 3.836217011815621e+01 2.884819528580837e+02 - ME 1.761519882509421e-04 + ME 1.677977866215262e-04 Event 221 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3820,7 +3820,7 @@ Event 221 Batch 1 2 3.305414381337777e+02 -2.712796684963201e+02 -1.199910663213094e+02 -1.458325333632650e+02 3 7.388441803280767e+02 5.510455284380058e+02 4.375213740715825e+02 2.254209298704556e+02 4 4.306143815381457e+02 -2.797658599416856e+02 -3.175303077502730e+02 -7.958839650719051e+01 - ME 1.338118621913618e-04 + ME 1.392897982206581e-05 Event 222 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3828,7 +3828,7 @@ Event 222 Batch 1 2 4.657562074797755e+02 2.823280548971349e+02 2.956503281023745e+02 2.231828795335844e+02 3 4.791948192186352e+02 -3.228825926298714e+02 2.575611801233854e+02 -2.429747818931873e+02 4 5.550489733015891e+02 4.055453773273638e+01 -5.532115082257600e+02 1.979190235960287e+01 - ME 9.040551632672907e-05 + ME 2.328731171682892e-05 Event 223 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3836,7 +3836,7 @@ Event 223 Batch 1 2 1.612164685986321e+02 -4.527922182271191e+01 -1.095260585492910e+01 1.543391792239740e+02 3 6.984218503485876e+02 -4.629950983513680e+02 2.605715575888556e+02 -4.533553609726805e+02 4 6.403616810527805e+02 5.082743201740799e+02 -2.496189517339264e+02 2.990161817487066e+02 - ME 4.148580235863498e-04 + ME 2.446487784841432e-04 Event 224 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3844,7 +3844,7 @@ Event 224 Batch 1 2 1.663853414671972e+02 -1.350882138037309e+02 9.706071747767010e+01 3.804401292344658e+00 3 6.436745581417563e+02 -4.469273298203079e+02 -4.412749113764766e+02 -1.408877256838118e+02 4 6.899401003910457e+02 5.820155436240389e+02 3.442141938988058e+02 1.370833243914657e+02 - ME 3.449215697364171e-04 + ME 9.431632941984795e-05 Event 225 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3852,7 +3852,7 @@ Event 225 Batch 1 2 6.702356777533546e+02 6.117158080352369e+02 -2.649249521350114e+02 -6.952987609335720e+01 3 6.901224376513153e+02 -6.564819557015361e+02 1.560869289536550e+02 1.446972404640001e+02 4 1.396418845953297e+02 4.476614766629927e+01 1.088380231813564e+02 -7.516736437064299e+01 - ME 6.407468428023662e-04 + ME 2.456039108263569e-05 Event 226 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3860,7 +3860,7 @@ Event 226 Batch 1 2 7.307777643673112e+02 -4.569648094661606e+02 4.416236342013199e+02 -3.608155616351098e+02 3 1.446420186345137e+02 4.133161435221925e+01 -3.411742569426914e+01 1.343466131828505e+02 4 6.245802169981752e+02 4.156331951139413e+02 -4.075062085070508e+02 2.264689484522593e+02 - ME 4.858390443010437e-04 + ME 2.774761612267077e-04 Event 227 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3868,7 +3868,7 @@ Event 227 Batch 1 2 7.408615397889290e+02 -4.398089081634772e+02 -5.325812259979131e+02 2.679574278743413e+02 3 4.035753807128123e+02 3.000971513323747e+02 2.468113220276344e+02 -1.090823496201683e+02 4 3.555630794982585e+02 1.397117568311025e+02 2.857699039702786e+02 -1.588750782541728e+02 - ME 3.215647103618368e-04 + ME 3.077346064218035e-05 Event 228 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3876,7 +3876,7 @@ Event 228 Batch 1 2 5.775455372723294e+02 -3.656199842755111e+02 -6.289501053880601e+01 4.426342647953073e+02 3 3.247306314578497e+02 8.776645762339835e+01 3.116872137482897e+02 2.445634292125525e+01 4 5.977238312698206e+02 2.778535266521127e+02 -2.487922032094836e+02 -4.670906077165625e+02 - ME 3.156934429573604e-03 + ME 3.399241079583280e-04 Event 229 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3884,7 +3884,7 @@ Event 229 Batch 1 2 3.665477125629453e+02 -2.081014917770363e+02 2.317985113364040e+02 -1.931850016112187e+02 3 6.187040836990479e+02 -2.134593092471877e+02 -3.484367286517815e+02 4.645661552545953e+02 4 5.147482037380067e+02 4.215608010242241e+02 1.166382173153775e+02 -2.713811536433765e+02 - ME 4.392210547845218e-04 + ME 8.330968691049859e-05 Event 230 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3892,7 +3892,7 @@ Event 230 Batch 1 2 5.913978529013565e+02 -4.986092821675885e+02 -3.028328044703767e+02 9.712104143419764e+01 3 3.439186614041002e+02 -6.573524045766426e+01 3.216488491089061e+02 -1.024741025375549e+02 4 5.646834856945436e+02 5.643445226252528e+02 -1.881604463852933e+01 5.353061103357447e+00 - ME 1.067159092411647e-04 + ME 2.296146042402505e-05 Event 231 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3900,7 +3900,7 @@ Event 231 Batch 1 2 5.760768557894827e+02 -7.075794524290799e+01 5.609870884449791e+02 1.102331327656218e+02 3 6.038619762337338e+02 -2.467027894308989e+02 -5.464177649873398e+02 -7.221250677108812e+01 4 3.200611679767834e+02 3.174607346738069e+02 -1.456932345763944e+01 -3.802062599453370e+01 - ME 8.750887998909065e-05 + ME 9.438631267217403e-06 Event 232 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3908,7 +3908,7 @@ Event 232 Batch 1 2 7.230187249684843e+02 -2.426041066061352e+02 1.884455685697195e+02 -6.545132479937492e+02 3 4.821326920133732e+02 2.438648429837413e+02 -1.563760752388986e+01 4.156168142598493e+02 4 2.948485830181424e+02 -1.260736377606032e+00 -1.728079610458298e+02 2.388964337338999e+02 - ME 4.549716999825542e-05 + ME 3.745272037455064e-05 Event 233 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3916,7 +3916,7 @@ Event 233 Batch 1 2 3.540260977608100e+02 -1.904526694678991e+02 -1.042089619355360e+02 -2.796475475319170e+02 3 4.925592302096041e+02 1.195034224421750e+02 3.554637678715695e+02 -3.193415679485398e+02 4 6.534146720295859e+02 7.094924702572415e+01 -2.512548059360335e+02 5.989891154804569e+02 - ME 2.494643034161164e-04 + ME 1.035644942794080e-04 Event 234 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3924,7 +3924,7 @@ Event 234 Batch 1 2 1.866526101194276e+02 7.776953530733704e+01 -1.047503781897390e+01 1.693557493124073e+02 3 6.012752698516817e+02 5.974840035795012e+02 -4.570329760029643e+01 4.955829083294186e+01 4 7.120721200288899e+02 -6.752535388868379e+02 5.617833541927040e+01 -2.189140401453492e+02 - ME 2.154454342135980e-03 + ME 6.655948749153013e-04 Event 235 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3932,7 +3932,7 @@ Event 235 Batch 1 2 5.032945404607945e+02 1.612889276925247e+02 2.561838854094329e+02 -4.020710050699558e+02 3 7.153634726767370e+02 -3.739069589148947e+02 -1.979140468542061e+02 5.768609140624169e+02 4 2.813419868624690e+02 2.126180312223700e+02 -5.826983855522722e+01 -1.747899089924609e+02 - ME 8.184939555880423e-04 + ME 1.137471703441233e-04 Event 236 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3940,7 +3940,7 @@ Event 236 Batch 1 2 6.980797829886610e+02 -9.803971882836288e+00 4.740144261428889e+02 5.123764137440797e+02 3 5.519387921056282e+02 -1.638876688381594e+02 -3.209728652821290e+02 -4.180355032606608e+02 4 2.499814249057108e+02 1.736916407209956e+02 -1.530415608607599e+02 -9.434091048341891e+01 - ME 2.813360227943072e-04 + ME 5.842524801707843e-05 Event 237 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3948,7 +3948,7 @@ Event 237 Batch 1 2 1.604490925133743e+02 6.212857081252698e+01 9.075394990141041e+01 1.168232534834160e+02 3 6.578242662283152e+02 5.348507070161563e+02 -3.810396531957998e+02 3.842224792439630e+01 4 6.817266412583107e+02 -5.969792778286832e+02 2.902857032943894e+02 -1.552455014078122e+02 - ME 8.205069948818567e-04 + ME 1.834055676127939e-04 Event 238 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3956,7 +3956,7 @@ Event 238 Batch 1 2 2.789018340499539e+02 1.069933592962543e+02 -2.572713415352736e+02 1.225197647611563e+01 3 4.761759619803052e+02 7.755191627191856e+01 -4.591043622469822e+02 -9.976187456245104e+01 4 7.449222039697408e+02 -1.845452755681728e+02 7.163757037822556e+02 8.750989808633538e+01 - ME 4.130258343824905e-02 + ME 9.445005309896021e-03 Event 239 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3964,7 +3964,7 @@ Event 239 Batch 1 2 4.581461811054764e+02 -3.899520773556200e+02 2.006122777919944e+02 1.326273524830990e+02 3 3.013476461129690e+02 -2.996604136348060e+02 3.145663680794619e+01 4.951799549362093e+00 4 7.405061727815548e+02 6.896124909904260e+02 -2.320689145999406e+02 -1.375791520324611e+02 - ME 1.351152256907066e-02 + ME 4.970363634614722e-03 Event 240 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3972,7 +3972,7 @@ Event 240 Batch 1 2 5.932490652975304e+02 -4.094504138983958e+01 -3.300190662632461e+02 4.912793227530680e+02 3 3.147487537014150e+02 3.081803657249563e+02 4.097350029662016e+01 -4.912038692507519e+01 4 5.920021810010543e+02 -2.672353243351168e+02 2.890455659666260e+02 -4.421589358279927e+02 - ME 2.300291351402201e-03 + ME 3.420638167820422e-04 Event 241 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3980,7 +3980,7 @@ Event 241 Batch 1 2 4.438703186026563e+01 1.425431959717181e+01 -4.430288595443099e+00 -4.180186016371768e+01 3 7.139617398095604e+02 -8.415544716076485e+01 -5.657765076565163e+02 -4.272659242311072e+02 4 7.416512283301737e+02 6.990112756359306e+01 5.702067962519594e+02 4.690677843948249e+02 - ME 9.657825758456334e-03 + ME 9.983667466725972e-03 Event 242 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3988,7 +3988,7 @@ Event 242 Batch 1 2 3.798759956195423e+02 -1.259218082844715e+02 -3.429343473884153e+02 1.041417477651927e+02 3 6.208895880511435e+02 5.354328139337265e+02 1.248673426784089e+02 -2.884852319370315e+02 4 4.992344163293142e+02 -4.095110056492549e+02 2.180670047100064e+02 1.843434841718389e+02 - ME 4.523810239016752e-05 + ME 1.030886114253601e-05 Event 243 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3996,7 +3996,7 @@ Event 243 Batch 1 2 2.320641800899440e+02 1.658639294991472e+02 7.783463994856535e+01 1.424243988788334e+02 3 6.251485586341132e+02 -2.328139095298017e+02 -4.262931976140131e+02 3.935511574875350e+02 4 6.427872612759426e+02 6.694998003065477e+01 3.484585576654476e+02 -5.359755563663684e+02 - ME 1.068434238404496e-02 + ME 8.493072129055412e-04 Event 244 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4004,7 +4004,7 @@ Event 244 Batch 1 2 6.609991843787810e+02 -2.293678857540617e+02 -4.971623496474938e+02 -3.703240376037023e+02 3 1.091403980947070e+02 1.154537470975927e+01 -9.115666825632124e+00 -1.081445118228680e+02 4 7.298604175265119e+02 2.178225110443025e+02 5.062780164731259e+02 4.784685494265703e+02 - ME 2.129811247265830e-03 + ME 9.635755455313371e-04 Event 245 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4012,7 +4012,7 @@ Event 245 Batch 1 2 4.893629130846664e+02 -3.546974954177181e+02 3.112856868655738e+02 -1.294873298810978e+02 3 7.129026631852477e+02 5.703735458058533e+02 -4.257115617679147e+02 -4.091322034012423e+01 4 2.977344237300874e+02 -2.156760503881352e+02 1.144258749023406e+02 1.704005502212233e+02 - ME 2.548352504440589e-05 + ME 5.312368446054512e-06 Event 246 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4020,7 +4020,7 @@ Event 246 Batch 1 2 3.999457395350199e+02 9.605025124341067e+01 9.072234098128430e+01 3.774922524438975e+02 3 3.675469088581873e+02 -1.615841482674670e+01 2.570183669846762e+02 2.622426259669196e+02 4 7.325073516067924e+02 -7.989183641666393e+01 -3.477407079659604e+02 -6.397348784108170e+02 - ME 1.294421983622042e-01 + ME 5.023802198964801e-02 Event 247 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4028,7 +4028,7 @@ Event 247 Batch 1 2 6.711864521923226e+02 3.763073240556692e+02 5.338170415278108e+02 1.546719678644905e+02 3 5.231557804938882e+02 -1.057595517177888e+02 -5.121603131388773e+02 -1.409615302513522e+01 4 3.056577673137891e+02 -2.705477723378804e+02 -2.165672838893370e+01 -1.405758148393554e+02 - ME 2.873345328272106e-04 + ME 1.980507958825256e-05 Event 248 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4036,7 +4036,7 @@ Event 248 Batch 1 2 6.307803946875938e+02 -6.240065811552291e+01 -3.654556314590158e+02 5.103256270499047e+02 3 3.935347424219227e+02 -2.188782290807617e+02 2.916853933646314e+01 -3.257470040392325e+02 4 4.756848628904837e+02 2.812788871962847e+02 3.362870921225527e+02 -1.845786230106721e+02 - ME 2.418190194667681e-04 + ME 8.712398839363553e-05 Event 249 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4044,7 +4044,7 @@ Event 249 Batch 1 2 4.326970760901858e+02 -4.070406664121577e+02 -1.467447404863359e+02 3.261392852829594e+00 3 4.839435229991528e+02 2.335311811831339e+01 2.018595963184923e+02 -4.392136936630267e+02 4 5.833594009106607e+02 3.836875482938447e+02 -5.511485583215654e+01 4.359523008101972e+02 - ME 8.354140201035124e-05 + ME 2.487145538635957e-05 Event 250 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4052,7 +4052,7 @@ Event 250 Batch 1 2 7.010671671345858e+02 -6.122994886156980e+02 -2.473946684860857e+02 2.353303785738851e+02 3 5.574643785654457e+02 3.902114201641945e+02 2.260985614407801e+02 -3.276904354069721e+02 4 2.414684542999681e+02 2.220880684515034e+02 2.129610704530562e+01 9.236005683308701e+01 - ME 4.704118057291807e-05 + ME 1.645582299148298e-05 Event 251 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4060,7 +4060,7 @@ Event 251 Batch 1 2 7.364006127103795e+02 5.379960890463808e+02 4.302640987755426e+02 2.602285070392761e+02 3 3.051282143252570e+01 -2.901685968644106e+00 1.337962970917706e+01 -2.726899336532026e+01 4 7.330865658570956e+02 -5.350944030777371e+02 -4.436437284847198e+02 -2.329595136739561e+02 - ME 8.340546584740779e-03 + ME 6.389613086136084e-03 Event 252 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4068,7 +4068,7 @@ Event 252 Batch 1 2 5.965625584838610e+02 -7.369842915522101e+01 -5.671364104158780e+02 -1.697401534860145e+02 3 6.549338760881149e+02 -1.514014639568436e+02 6.313240788068730e+02 8.628954906696529e+01 4 2.485035654280235e+02 2.250998931120648e+02 -6.418766839099484e+01 8.345060441904938e+01 - ME 3.985162011735342e-05 + ME 7.225550854378042e-06 Event 253 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4076,7 +4076,7 @@ Event 253 Batch 1 2 5.728678540484714e+02 3.212236187283236e+01 -4.622666283104808e+02 -3.368312580807653e+02 3 7.160302400837320e+02 1.132435775281999e+02 5.206369974620781e+02 4.783433011307397e+02 4 2.111019058677967e+02 -1.453659394010323e+02 -5.837036915159722e+01 -1.415120430499744e+02 - ME 1.248429186447426e-03 + ME 7.499676590470843e-05 Event 254 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4084,7 +4084,7 @@ Event 254 Batch 1 2 5.579357369440610e+02 1.333150067790222e+02 -6.785864805882139e+01 5.375077668373273e+02 3 6.202682598689536e+02 -4.039338689731095e+02 2.012068793592834e+02 -4.255419314189536e+02 4 3.217960031869852e+02 2.706188621940872e+02 -1.333482313004621e+02 -1.119658354183736e+02 - ME 6.088720978226072e-04 + ME 2.226893396847405e-04 Event 255 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4092,5 +4092,5 @@ Event 255 Batch 1 2 7.263612771087843e+02 3.396063850675520e+02 -6.401091575508393e+02 5.028393902637355e+01 3 1.540578578981475e+02 -3.080387127739228e+01 1.060177193258910e+02 -1.074485378375538e+02 4 6.195808649930684e+02 -3.088025137901597e+02 5.340914382249483e+02 5.716459881118030e+01 - ME 1.547064591142216e-04 + ME 4.003666322732326e-05 From 4eaa0919e5b33cb04272ba45ebb2978867ca92f7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 19:41:10 +0200 Subject: [PATCH 019/119] [oct23av] regenerate gqttq sa and mad with the correct ref file for runTest.exe --- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 22 +- .../ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt | 1026 ++++++++--------- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 12 +- .../ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt | 1026 ++++++++--------- 4 files changed, 1043 insertions(+), 1043 deletions(-) diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 2527bafd35..7b525500f0 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005446910858154297  +DEBUG: model prefixing takes 0.0057849884033203125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.076 s +8 processes with 40 diagrams generated in 0.082 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -199,7 +199,7 @@ INFO: Creating files in directory P1_gu_ttxu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -238,7 +238,7 @@ INFO: Creating files in directory P1_gux_ttxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -273,17 +273,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s -Wrote files for 32 helas calls in 0.226 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s +Wrote files for 32 helas calls in 0.240 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.147 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.129 s +ALOHA: aloha creates 4 routines in 0.141 s FFV1 FFV1 FFV1 @@ -409,6 +409,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.593s -user 0m2.241s -sys 0m0.317s +real 0m2.720s +user 0m2.351s +sys 0m0.337s diff --git a/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt b/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt index dd90c94acf..d596b33ae7 100644 --- a/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt +++ b/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt @@ -4,7 +4,7 @@ Event 0 Batch 0 2 2.647483690509011e+02 7.527657265342380e+01 -2.528976247704283e+02 -2.163164141117315e+01 3 6.252973211776936e+02 -5.721080498766041e+02 -1.578766990348905e+01 2.518727230515587e+02 4 6.099543097714056e+02 4.968314772231802e+02 2.686852946739174e+02 -2.302410816403857e+02 - ME 3.498510462248670e-04 + ME 6.254927412618323e-05 Event 1 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -12,7 +12,7 @@ Event 1 Batch 0 2 2.542827954151951e+02 1.482213322085297e+02 -1.988618298139058e+02 -5.607271498295615e+01 3 6.883656117507998e+02 1.265478873489434e+02 5.602777828023585e+02 3.793700749224233e+02 4 5.573515928340058e+02 -2.747692195574731e+02 -3.614159529884527e+02 -3.232973599394667e+02 - ME 7.257243108248426e-04 + ME 8.120933129385430e-05 Event 2 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -20,7 +20,7 @@ Event 2 Batch 0 2 4.301460683791099e+02 -3.656995432079240e+02 -2.257802895903974e+02 -1.768459985405173e+01 3 5.058528987551350e+02 2.755467101243707e+02 -2.034821274188550e+02 3.722313656043856e+02 4 5.640010328657550e+02 9.015283308355326e+01 4.292624170092524e+02 -3.545467657503340e+02 - ME 8.130044127338102e-04 + ME 1.104115154253218e-04 Event 3 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -28,7 +28,7 @@ Event 3 Batch 0 2 6.758793342627306e+02 1.455349847705337e+02 4.360940220328824e+02 -4.954335945799966e+02 3 3.008019460079605e+02 -1.607139834787174e+02 2.732727402256846e+01 2.527964523704278e+02 4 5.233187197293092e+02 1.517899870818368e+01 -4.634212960554508e+02 2.426371422095687e+02 - ME 7.753277710143621e-05 + ME 4.288074098478053e-05 Event 4 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -36,7 +36,7 @@ Event 4 Batch 0 2 3.540811678028369e+02 5.414642718170588e+01 -3.497885023717100e+02 -9.467915537920108e+00 3 7.415000547748695e+02 1.453779348794601e+00 7.277337852109665e+02 1.422102514562805e+02 4 4.044187774222938e+02 -5.560020653050046e+01 -3.779452828392566e+02 -1.327423359183605e+02 - ME 2.015528729476554e-04 + ME 1.304731284254719e-05 Event 5 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -44,7 +44,7 @@ Event 5 Batch 0 2 4.747467875786874e+02 2.462969907607520e+02 3.713870243947702e+02 1.636886763636381e+02 3 3.438196236093862e+02 -2.056491112573935e+02 2.636029701703988e+02 8.021128807897365e+01 4 6.814335888119255e+02 -4.064787950335840e+01 -6.349899945651691e+02 -2.438999644426124e+02 - ME 6.140777519977192e-04 + ME 1.932390649640220e-04 Event 6 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -52,7 +52,7 @@ Event 6 Batch 0 2 5.623951200922340e+02 4.644673798421034e+02 3.089047820108764e+02 -7.166700647426805e+01 3 2.268243199894467e+02 1.761899852590787e+02 -7.114332369064562e+01 -1.238748914321566e+02 4 7.107805599183188e+02 -6.406573651011822e+02 -2.377614583202307e+02 1.955418979064247e+02 - ME 8.375373201653861e-04 + ME 1.929702539767979e-04 Event 7 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -60,7 +60,7 @@ Event 7 Batch 0 2 4.922243378496302e+02 2.878585072835456e+02 -1.441537488072182e+02 -3.723465794939189e+02 3 2.873990637609374e+02 -5.400981623596619e+01 -8.913204919452846e+01 -2.678369642286231e+02 4 7.203765983894325e+02 -2.338486910475794e+02 2.332857980017467e+02 6.401835437225419e+02 - ME 2.045598717079573e-03 + ME 6.280412585349807e-04 Event 8 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -68,7 +68,7 @@ Event 8 Batch 0 2 3.353309706037128e+02 -7.529439061162444e+01 -4.917829145606096e+01 -3.230466069128648e+02 3 7.169322705461503e+02 -1.597426278178964e+02 -1.460012137440150e+01 6.987567601563110e+02 4 4.477367588501368e+02 2.350370184295208e+02 6.377841283046249e+01 -3.757101532434461e+02 - ME 5.176104304710922e-03 + ME 1.424871539111113e-03 Event 9 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -76,7 +76,7 @@ Event 9 Batch 0 2 2.557626120875720e+02 2.000882245504951e+02 -5.276260741790070e+01 -1.503174088272977e+02 3 7.044202058180884e+02 -6.969679478438196e+02 -1.019614549623775e+02 6.882422911146106e+00 4 5.398171820943397e+02 4.968797232933244e+02 1.547240623802783e+02 1.434349859161515e+02 - ME 6.498215193902510e-05 + ME 1.126010180174107e-05 Event 10 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -84,7 +84,7 @@ Event 10 Batch 0 2 3.466796552973448e+02 1.172124288883391e+02 -1.804077050554743e+02 2.718475489457261e+02 3 5.174471655316495e+02 -1.610456139025784e+02 -4.497410659869822e+02 -1.988689340353916e+02 4 6.358731791710053e+02 4.383318501423926e+01 6.301487710424565e+02 -7.297861491033444e+01 - ME 2.111165581639245e-04 + ME 8.292383053707579e-05 Event 11 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -92,7 +92,7 @@ Event 11 Batch 0 2 5.730783827248506e+02 -3.059484875398849e+01 3.466457017175528e+02 -4.553235612803233e+02 3 4.410994673708892e+02 -3.026218886155176e+02 -1.990641070399019e+01 3.203005892260318e+02 4 4.858221499042607e+02 3.332167373695061e+02 -3.267392910135624e+02 1.350229720542913e+02 - ME 5.129802099928076e-05 + ME 2.195851954305949e-05 Event 12 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -100,7 +100,7 @@ Event 12 Batch 0 2 2.275003875859171e+02 -1.247450244086003e+02 1.654605359856639e+02 9.390376067217456e+01 3 6.138170466352969e+02 3.363961838598331e+02 -2.139358085817026e+01 5.129827374509639e+02 4 6.586825657787861e+02 -2.116511594512328e+02 -1.440669551274935e+02 -6.068864981231385e+02 - ME 5.249882090061186e-02 + ME 3.843244876666358e-03 Event 13 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -108,7 +108,7 @@ Event 13 Batch 0 2 2.867684047377951e+02 7.055192702127012e+01 -2.028354730671929e+02 1.900429278217245e+02 3 6.990707050557395e+02 -5.605742285334717e+02 2.413419117565430e+02 -3.408965629057132e+02 4 5.141608902064654e+02 4.900223015122016e+02 -3.850643868935023e+01 1.508536350839886e+02 - ME 6.422048006176975e-05 + ME 1.780264803426774e-05 Event 14 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -116,7 +116,7 @@ Event 14 Batch 0 2 3.551549262960330e+02 1.090410064132905e+02 3.205839746298526e+02 1.071027348074892e+02 3 5.276349775014137e+02 3.895763694332612e+02 -2.529209653865598e+02 2.503196099590423e+02 4 6.172100962025531e+02 -4.986173758465519e+02 -6.766300924329285e+01 -3.574223447665315e+02 - ME 7.422587439250419e-04 + ME 1.172793340377339e-04 Event 15 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -124,7 +124,7 @@ Event 15 Batch 0 2 5.846731991828425e+02 7.106081559720657e+01 3.900476102503054e+02 4.297161529048979e+02 3 2.829885923647302e+02 -2.767806781033229e+02 5.223342094943639e+01 -2.732525156618249e+01 4 6.323382084524278e+02 2.057198625061163e+02 -4.422810311997417e+02 -4.023909013387152e+02 - ME 1.255922738422332e-03 + ME 2.768931482482754e-04 Event 16 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -132,7 +132,7 @@ Event 16 Batch 0 2 7.471577506095512e+02 1.666056475215676e+02 -5.784682380714994e+02 -4.425627187781379e+02 3 6.589296733908160e+02 -1.235441202519038e+02 5.251239647671507e+02 3.783780998595698e+02 4 9.391257599963087e+01 -4.306152726966400e+01 5.334427330434855e+01 6.418461891856485e+01 - ME 5.526726502577864e-05 + ME 3.619360847906487e-05 Event 17 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -140,7 +140,7 @@ Event 17 Batch 0 2 3.567490993131759e+02 3.856364495163717e+01 -1.708845728849435e+02 -3.107752047682324e+02 3 6.453207560475681e+02 4.468356462873772e+02 2.282834847349605e+02 4.057874246326636e+02 4 4.979301446392561e+02 -4.853992912390142e+02 -5.739891185001719e+01 -9.501221986443127e+01 - ME 1.327369996555111e-04 + ME 3.400819398697452e-05 Event 18 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -148,7 +148,7 @@ Event 18 Batch 0 2 4.856701782481425e+02 2.509110753153842e+02 -3.498523763974107e+02 -2.247720379690150e+02 3 3.014847498930008e+02 -1.059425909901355e+02 -2.435847754696140e+02 -1.426032222348426e+02 4 7.128450718588564e+02 -1.449684843252488e+02 5.934371518670247e+02 3.673752602038576e+02 - ME 1.018512933050835e-03 + ME 1.704840743724005e-04 Event 19 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -156,7 +156,7 @@ Event 19 Batch 0 2 5.848213503304410e+02 -3.141116763848333e+02 -1.950442390378232e+02 4.531088295091878e+02 3 5.769300027107226e+02 5.020221748138873e+02 2.252239828724832e+02 -1.734823378963534e+02 4 3.382486469588368e+02 -1.879104984290540e+02 -3.017974383465995e+01 -2.796264916128346e+02 - ME 4.267017342507976e-03 + ME 1.566312636528492e-04 Event 20 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -164,7 +164,7 @@ Event 20 Batch 0 2 5.550938429889906e+02 -4.478597170519693e+02 -1.958065402362923e+02 -2.630791652090858e+02 3 5.585686897587655e+02 3.351111310173187e+02 -1.360174455686903e+02 4.256744830831253e+02 4 3.863374672522434e+02 1.127485860346507e+02 3.318239858049826e+02 -1.625953178740396e+02 - ME 2.768271682113988e-04 + ME 4.443882992804106e-05 Event 21 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -172,7 +172,7 @@ Event 21 Batch 0 2 6.296556563991993e+02 -3.477135312394776e+02 -1.376147989324512e+02 -5.065804111325866e+02 3 3.137568007204202e+02 1.080474571851863e+02 -2.382188236683311e+02 1.732653140250679e+02 4 5.565875428803801e+02 2.396660740542913e+02 3.758336226007823e+02 3.333150971075189e+02 - ME 5.519034669639832e-05 + ME 2.195742323347977e-05 Event 22 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -180,7 +180,7 @@ Event 22 Batch 0 2 5.583338925767162e+02 2.471586228668332e+02 -1.597599499756147e+02 -4.744745610949311e+02 3 5.378723432497920e+02 9.149532098241385e+00 4.314513680009925e+02 3.210493120152684e+02 4 4.037937641734921e+02 -2.563081549650745e+02 -2.716914180253778e+02 1.534252490796627e+02 - ME 3.705224437539572e-05 + ME 1.393143104564022e-05 Event 23 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -188,7 +188,7 @@ Event 23 Batch 0 2 6.057340011976822e+02 6.848115528115159e+01 -5.207204912425279e+02 -3.017849923015605e+02 3 6.884459352783615e+02 -2.949639632364767e+01 6.680977958792448e+02 1.635026102131439e+02 4 2.058200635239559e+02 -3.898475895750391e+01 -1.473773046367171e+02 1.382823820884168e+02 - ME 2.946248744974782e-05 + ME 1.074117284514867e-05 Event 24 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -196,7 +196,7 @@ Event 24 Batch 0 2 4.702316790647315e+02 -1.210575128627593e+02 4.313728504035306e+02 -1.427598490831810e+02 3 7.180482366151732e+02 1.040047389253588e+02 -7.104588047260974e+02 4.956931953573291e+00 4 3.117200843200960e+02 1.705277393740069e+01 2.790859543225674e+02 1.378029171296075e+02 - ME 3.146557994448562e-05 + ME 5.213387311993420e-06 Event 25 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -204,7 +204,7 @@ Event 25 Batch 0 2 6.261365010744016e+02 -5.354018140499276e+02 -2.095559720530078e+02 2.479477970595020e+02 3 5.483958991041942e+02 5.199465180092641e+02 -9.843995208133505e+01 -1.438862620216537e+02 4 3.254675998214045e+02 1.545529604066345e+01 3.079959241343431e+02 -1.040615350378483e+02 - ME 1.657640191611339e-04 + ME 1.695323153210731e-05 Event 26 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -212,7 +212,7 @@ Event 26 Batch 0 2 4.635816356180677e+02 1.904702824079147e+02 -2.351549941335565e+02 -3.511853259118595e+02 3 3.686385821486527e+02 -2.712527815845713e+02 -6.015354190959191e+01 -2.422764621809819e+02 4 6.677797822332798e+02 8.078249917665664e+01 2.953085360431485e+02 5.934617880928415e+02 - ME 3.250975879010065e-04 + ME 1.052251904460155e-04 Event 27 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -220,7 +220,7 @@ Event 27 Batch 0 2 2.851713673150520e+02 1.387976072955998e+02 1.520424011317634e+02 -1.973348453858079e+02 3 6.747356481771329e+02 2.426633222154767e+02 -4.300238522839811e+02 4.598501858640580e+02 4 5.400929845078149e+02 -3.814609295110765e+02 2.779814511522176e+02 -2.625153404782502e+02 - ME 4.155279516527712e-04 + ME 7.957109124083736e-05 Event 28 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -228,7 +228,7 @@ Event 28 Batch 0 2 1.977804200471008e+02 -1.803202618401224e+02 -8.082809162516925e+01 -8.277519444290659e+00 3 7.197523834069627e+02 3.152541965091956e+02 6.467033971658861e+02 -2.080867841663842e+01 4 5.824671965459364e+02 -1.349339346690732e+02 -5.658753055407169e+02 2.908619786092899e+01 - ME 1.172809031809504e-04 + ME 1.748013159755222e-05 Event 29 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -236,7 +236,7 @@ Event 29 Batch 0 2 6.123364628491765e+02 -3.746492624245139e+02 3.785128791537567e+02 -3.021950929683376e+02 3 4.056577755659300e+02 1.796205570313495e+00 -8.781658530568643e+01 3.960344074293251e+02 4 4.820057615848937e+02 3.728530568542006e+02 -2.906962938480702e+02 -9.383931446098750e+01 - ME 5.496242925842306e-04 + ME 3.085570985177973e-04 Event 30 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -244,7 +244,7 @@ Event 30 Batch 0 2 7.349194950356053e+02 7.241679607953656e+02 1.425637322816703e+01 1.244354634469208e+02 3 7.321421454671275e+02 -7.253765693071590e+02 -2.895970851972107e+01 -9.498573130653318e+01 4 3.293835949726734e+01 1.208608511793152e+00 1.470333529155409e+01 -2.944973214038765e+01 - ME 5.147061682527938e-02 + ME 3.267107835672361e-04 Event 31 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -252,7 +252,7 @@ Event 31 Batch 0 2 1.718338270585457e+02 -1.344914872264095e+02 -1.021614404532311e+02 3.165350011824393e+01 3 6.313115253715935e+02 -2.849940593920691e+02 -7.916450257599642e+01 -5.577325610990745e+02 4 6.968546475698608e+02 4.194855466184786e+02 1.813259430292275e+02 5.260790609808306e+02 - ME 4.645345268703414e-04 + ME 1.685680846028125e-04 Event 32 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -260,7 +260,7 @@ Event 32 Batch 0 2 7.235176898898732e+02 -4.762113006241282e+02 -2.880822916693121e+01 5.439400065022983e+02 3 6.603902828461299e+02 4.672103814637360e+02 1.031050210016798e+02 -4.551913221650266e+02 4 1.160920272639969e+02 9.000919160392018e+00 -7.429679183474862e+01 -8.874868433727177e+01 - ME 4.476006843186700e-03 + ME 2.173072900368875e-04 Event 33 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -268,7 +268,7 @@ Event 33 Batch 0 2 4.786737271642286e+02 2.009638309376703e+02 4.090184839380260e+02 1.464443769121513e+02 3 3.795793219608408e+02 -6.057523839522271e+00 -8.244277697544294e+01 3.704685635647950e+02 4 6.417469508749314e+02 -1.949063070981495e+02 -3.265757069625828e+02 -5.169129404769461e+02 - ME 1.351709676586880e-02 + ME 3.322437827682699e-03 Event 34 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -276,7 +276,7 @@ Event 34 Batch 0 2 6.621583515140109e+02 -5.051303032557109e+02 -1.429543729176959e+02 4.035605363216953e+02 3 3.008522892707525e+02 8.677543723835062e+01 2.726747894692539e+02 -9.290092916351111e+01 4 5.369893592152367e+02 4.183548660173603e+02 -1.297204165515579e+02 -3.106596071581844e+02 - ME 6.460854093057828e-04 + ME 9.294666462955388e-05 Event 35 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -284,7 +284,7 @@ Event 35 Batch 0 2 6.158114977149372e+02 2.502256147979830e+02 4.233348779616202e+00 5.626659943296695e+02 3 1.476397433483021e+02 -1.670550278282843e+01 -6.055370982200890e+01 1.336101351676488e+02 4 7.365487589367605e+02 -2.335201120151546e+02 5.632036104239269e+01 -6.962761294973184e+02 - ME 2.101231899117793e+00 + ME 5.450893768264864e-01 Event 36 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -292,7 +292,7 @@ Event 36 Batch 0 2 7.182456511154913e+02 -7.463771462544163e+01 -6.667773110518942e+02 2.563475070450518e+02 3 4.860008755751825e+02 -7.840660561780868e+01 4.141081959217036e+02 -2.419992919944378e+02 4 2.957534733093268e+02 1.530443202432501e+02 2.526691151301903e+02 -1.434821505061448e+01 - ME 9.644531209480271e-05 + ME 1.793136635525090e-05 Event 37 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -300,7 +300,7 @@ Event 37 Batch 0 2 5.672182018814327e+02 -2.031706828392718e+00 -5.267408190306547e+02 2.104197478372323e+02 3 4.664069288608281e+02 3.712365792892206e+02 2.604523782658950e+02 -1.090109358856581e+02 4 4.663748692577387e+02 -3.692048724608279e+02 2.662884407647597e+02 -1.014088119515743e+02 - ME 1.216876552012178e-04 + ME 1.885829354904198e-05 Event 38 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -308,7 +308,7 @@ Event 38 Batch 0 2 5.068057345787187e+02 4.883513201966852e+02 -7.570036138649985e+01 -1.124032737511800e+02 3 3.871140338254017e+02 -1.153787089711745e+02 -3.599073977747533e+02 -8.373585688177315e+01 4 6.060802315958797e+02 -3.729726112255107e+02 4.356077591612532e+02 1.961391306329531e+02 - ME 1.006736553113524e-04 + ME 2.004468492837133e-05 Event 39 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -316,7 +316,7 @@ Event 39 Batch 0 2 4.960337392567769e+02 -3.669089247616476e+02 2.651961920161227e+02 -2.027271347192069e+02 3 2.837821967046824e+02 -2.822567153069604e+02 -2.935613327724534e+01 -1.303560381865560e+00 4 7.201840640385411e+02 6.491656400686079e+02 -2.358400587388775e+02 2.040306951010725e+02 - ME 1.372807525012575e-03 + ME 2.738639406673165e-04 Event 40 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -324,7 +324,7 @@ Event 40 Batch 0 2 3.080730228651936e+02 -3.065830270999447e+02 -2.484308296331460e+01 1.728167064871203e+01 3 6.842346640746094e+02 4.630487823766367e+02 8.554554725666550e+01 -4.964321303112498e+02 4 5.076923130601962e+02 -1.564657552766919e+02 -6.070246429335075e+01 4.791504596625378e+02 - ME 4.192363154074847e-05 + ME 4.316353181637933e-05 Event 41 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -332,7 +332,7 @@ Event 41 Batch 0 2 1.602650851118221e+02 -1.258781096038287e+02 -9.817642232798531e+01 1.417706342452912e+01 3 7.146392966623014e+02 6.799675591776853e+02 -1.019163870176435e+02 1.948499239342933e+02 4 6.250956182258764e+02 -5.540894495738563e+02 2.000928093456288e+02 -2.090269873588226e+02 - ME 4.523507186168379e-04 + ME 6.118266190948034e-05 Event 42 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -340,7 +340,7 @@ Event 42 Batch 0 2 1.687893235969910e+02 1.289401357197518e+02 4.788693514682045e+01 9.783209393213438e+01 3 7.042017295435162e+02 -1.022058447296739e+02 -6.640064324330017e+02 -2.110675220936915e+02 4 6.270089468594927e+02 -2.673429099007782e+01 6.161194972861812e+02 1.132354281615572e+02 - ME 1.686356189272381e-04 + ME 4.091574289077424e-05 Event 43 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -348,7 +348,7 @@ Event 43 Batch 0 2 4.729783670130408e+02 -7.983817933050123e+01 9.052957805204315e+01 4.573169538528310e+02 3 5.638402597824536e+02 4.785250044669658e+02 7.435095949863268e+01 -2.887933404236804e+02 4 4.631813732045056e+02 -3.986868251364646e+02 -1.648805375506758e+02 -1.685236134291506e+02 - ME 5.938757690519573e-04 + ME 2.654067897204875e-04 Event 44 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -356,7 +356,7 @@ Event 44 Batch 0 2 1.774791104122977e+02 -1.952605982635784e+01 6.371003613266313e+01 1.644949814321787e+02 3 7.194816205691247e+02 -3.678871192485065e+02 2.644831693887214e+01 -6.177486190667772e+02 4 6.030392690185777e+02 3.874131790748646e+02 -9.015835307153536e+01 4.532536376345985e+02 - ME 2.092333697371024e-04 + ME 1.390282437939369e-04 Event 45 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -364,7 +364,7 @@ Event 45 Batch 0 2 7.477488480180839e+02 -3.787655987618923e+02 1.634662296474455e+02 6.236535517992064e+02 3 7.458113398274099e+02 3.819163358711198e+02 -1.661042992235261e+02 -6.186952632673017e+02 4 6.439812154506046e+00 -3.150737109227506e+00 2.638069576080606e+00 -4.958288531904773e+00 - ME 9.377954359926730e-02 + ME 4.591622113024210e-03 Event 46 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -372,7 +372,7 @@ Event 46 Batch 0 2 3.243146757688279e+02 -4.392587631431587e+00 -2.496903827548322e+02 -2.069188895501946e+02 3 5.341608950426614e+02 -2.704482657861201e+02 2.711825143656835e+02 -3.723515022507137e+02 4 6.415244291885106e+02 2.748408534175518e+02 -2.149213161085120e+01 5.792703918009084e+02 - ME 1.879047912263320e-04 + ME 7.845213441237594e-05 Event 47 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -380,7 +380,7 @@ Event 47 Batch 0 2 6.742198761450968e+02 -3.282965096491567e+02 5.301803926793563e+02 -2.563251730900704e+02 3 6.484148720042493e+02 3.527030795571956e+02 -3.975273148506379e+02 3.715029176935211e+02 4 1.773652518506536e+02 -2.440656990803885e+01 -1.326530778287185e+02 -1.151777446034508e+02 - ME 1.136665455996279e-03 + ME 5.254395938575492e-05 Event 48 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -388,7 +388,7 @@ Event 48 Batch 0 2 7.321401810535270e+02 -1.843482647928687e+02 4.412348098999295e+02 5.543976952635381e+02 3 7.293058265076229e+02 2.182722651304250e+02 -4.435200216702997e+02 -5.362221528717154e+02 4 3.855399243885009e+01 -3.392400033755636e+01 2.285211770370227e+00 -1.817554239182278e+01 - ME 2.278442596973106e-03 + ME 2.330290263553363e-04 Event 49 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -396,7 +396,7 @@ Event 49 Batch 0 2 3.511117284856090e+02 -3.272266866652174e+02 5.199533974843238e+01 1.161835877338140e+02 3 7.326526490901410e+02 6.615045961628415e+02 -2.993354007364775e+02 -9.792799058578566e+01 4 4.162356224242500e+02 -3.342779094976241e+02 2.473400609880451e+02 -1.825559714802838e+01 - ME 8.806759903737244e-05 + ME 7.863589115869630e-06 Event 50 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -404,7 +404,7 @@ Event 50 Batch 0 2 7.322170903075255e+02 2.740692406080844e+02 1.952596610981929e+01 -6.787095515302592e+02 3 3.078559130669522e+02 -1.663333363406682e+02 8.625456119089935e+01 2.442716420418760e+02 4 4.599269966255216e+02 -1.077359042674159e+02 -1.057805273007185e+02 4.344379094883832e+02 - ME 7.579426018596712e-05 + ME 6.765758192049922e-05 Event 51 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -412,7 +412,7 @@ Event 51 Batch 0 2 3.473696038265160e+02 -2.922314643158454e+02 -6.759614889845234e+01 -1.752060888796554e+02 3 5.389399151999496e+02 -2.449040872454050e+02 9.346474502284556e+01 4.708954891311219e+02 4 6.136904809735339e+02 5.371355515612503e+02 -2.586859612439322e+01 -2.956894002514666e+02 - ME 4.687828430739845e-04 + ME 2.035652280642710e-04 Event 52 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -420,7 +420,7 @@ Event 52 Batch 0 2 6.818614816439094e+02 5.970116833066725e+02 3.013730734325877e+02 1.329902280423528e+02 3 2.108623144448950e+02 -4.198344769951654e+00 -1.698802183673395e+02 -1.248439063859965e+02 4 6.072762039111957e+02 -5.928133385367207e+02 -1.314928550652483e+02 -8.146321656356344e+00 - ME 1.636869658416981e-04 + ME 4.047005152694340e-05 Event 53 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -428,7 +428,7 @@ Event 53 Batch 0 2 5.157714002491656e+02 -5.140718537651751e+02 -4.182413977701254e+01 1.003899065692042e+00 3 5.148181840855221e+02 2.868792199999327e+02 1.974924151010656e+02 3.791237552236646e+02 4 4.694104156653124e+02 2.271926337652422e+02 -1.556682753240530e+02 -3.801276542893567e+02 - ME 3.182294022992135e-03 + ME 1.547751010871262e-04 Event 54 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -436,7 +436,7 @@ Event 54 Batch 0 2 6.433410767101752e+02 2.586883950027282e+02 -5.809813083922761e+02 9.710187728524583e+01 3 6.928799734080563e+02 -1.579832568796111e+02 6.405510983559769e+02 -2.117031848853746e+02 4 1.637789498817686e+02 -1.007051381231171e+02 -5.956978996370073e+01 1.146013076001288e+02 - ME 3.280140142776471e-05 + ME 1.302720215079095e-05 Event 55 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -444,7 +444,7 @@ Event 55 Batch 0 2 7.193759752058201e+02 -3.536444481659258e+02 -7.212523476050659e+01 -6.222823703878202e+02 3 5.307053661742267e+02 2.409461639849982e+02 1.900944302490854e+02 4.329633233142391e+02 4 2.499186586199529e+02 1.126982841809279e+02 -1.179691954885788e+02 1.893190470735813e+02 - ME 3.939174164528502e-05 + ME 3.087450123310173e-05 Event 56 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -452,7 +452,7 @@ Event 56 Batch 0 2 3.858864959547013e+02 1.815174721437793e+02 3.218581876578407e+02 -1.112074732396182e+02 3 4.484505297447187e+02 -3.244105157450006e+02 2.934585578803474e+02 -9.873079412811623e+01 4 6.656629743005793e+02 1.428930436012212e+02 -6.153167455381879e+02 2.099382673677345e+02 - ME 2.326138625268126e-04 + ME 4.275995533811995e-05 Event 57 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -460,7 +460,7 @@ Event 57 Batch 0 2 5.284589752749192e+02 3.868194647882293e+02 -1.709996888155517e+02 3.168575336559793e+02 3 6.299868555278971e+02 -1.587414880613579e+02 2.327134172236622e+02 -5.634971548731005e+02 4 3.415541691971835e+02 -2.280779767268714e+02 -6.171372840811043e+01 2.466396212171210e+02 - ME 3.474853710074164e-05 + ME 2.211478424702745e-05 Event 58 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -468,7 +468,7 @@ Event 58 Batch 0 2 6.172037319760957e+02 -2.246119436411400e+02 -2.286037628748728e+01 5.744278237820342e+02 3 5.117934503257735e+02 1.262762853074207e+02 3.215736628881853e+02 -3.775939815489577e+02 4 3.710028176981306e+02 9.833565833371921e+01 -2.987132866006979e+02 -1.968338422330765e+02 - ME 6.183305374210038e-04 + ME 1.857727050583390e-04 Event 59 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -476,7 +476,7 @@ Event 59 Batch 0 2 7.388935626701858e+02 -3.912134623809441e+02 -5.457789630286015e+02 3.082872805076099e+02 3 1.936051438730608e+02 1.561492575196544e+02 8.304673385628061e+01 -7.876294246644987e+01 4 5.675012934567535e+02 2.350642048612896e+02 4.627322291723209e+02 -2.295243380411600e+02 - ME 4.116991424436793e-04 + ME 6.745345781245190e-05 Event 60 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -484,7 +484,7 @@ Event 60 Batch 0 2 7.258141426633659e+02 -5.584991156701968e+02 1.635894950857984e+02 4.337319270970709e+02 3 2.789580074371136e+02 2.331554478032953e+02 6.512410160032128e+01 -1.386180308029247e+02 4 4.952278498995201e+02 3.253436678669015e+02 -2.287135966861195e+02 -2.951138962941461e+02 - ME 7.295672680059989e-04 + ME 9.170244877267536e-05 Event 61 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -492,15 +492,15 @@ Event 61 Batch 0 2 5.906141202026897e+02 4.485275282318680e+02 -2.043613424290570e+02 3.253990429020988e+02 3 4.163572165237975e+02 -4.021600557528675e+02 -4.112755461437413e+01 9.964509802161204e+01 4 4.930286632735124e+02 -4.636747247900051e+01 2.454888970434311e+02 -4.250441409237108e+02 - ME 5.845307122272604e-03 + ME 1.836685601489136e-04 Event 62 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 1 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 -7.500000000000000e+02 2 7.346180891175762e+02 3.693463141798367e+02 7.549194961263061e+01 -6.305140780380819e+02 3 4.420621433230785e+02 -2.806743363126464e+02 3.467380983154045e+01 3.397625382625571e+02 - 4 3.233197675593453e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755249e+02 - ME 3.963631774242112e-05 + 4 3.233197675593452e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755248e+02 + ME 3.490896135533686e-05 Event 63 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -508,7 +508,7 @@ Event 63 Batch 0 2 6.451039732729313e+02 -2.415045377667665e+02 1.990362537024482e+02 -5.641092662620230e+02 3 3.260870385294104e+02 2.061141051805976e+02 -2.496695602716584e+02 3.892098426606745e+01 4 5.288089881976584e+02 3.539043258616898e+01 5.063330656921013e+01 5.251882819959555e+02 - ME 4.832224458906289e-04 + ME 4.428689394331114e-04 Event 64 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -516,7 +516,7 @@ Event 64 Batch 0 2 5.275973380665291e+02 -6.064553482667328e+01 4.309976929667101e+02 -2.981980196075213e+02 3 5.799838776791826e+02 3.279821268626862e+02 -1.824214634122377e+02 4.421893627315650e+02 4 3.924187842542880e+02 -2.673365920360130e+02 -2.485762295544724e+02 -1.439913431240437e+02 - ME 2.175617604507715e-04 + ME 4.205989960223865e-05 Event 65 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -524,7 +524,7 @@ Event 65 Batch 0 2 6.480172869826541e+02 2.720879118036237e+02 -5.153900904044360e+02 -2.833154199679406e+02 3 7.075023253568394e+02 -3.440299289242928e+02 4.709796137500282e+02 4.004761563708322e+02 4 1.444803876605064e+02 7.194201712066916e+01 4.441047665440794e+01 -1.171607364028916e+02 - ME 4.989956280474397e-03 + ME 1.103463366798231e-04 Event 66 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -532,7 +532,7 @@ Event 66 Batch 0 2 5.472978185025795e+02 4.857452785131266e+02 -2.223654169683454e+02 -1.189119332799752e+02 3 3.203062148499983e+02 1.169702135976477e+02 2.922172461416276e+02 -5.935588816501102e+01 4 6.323959666474225e+02 -6.027154921107744e+02 -6.985182917328234e+01 1.782678214449862e+02 - ME 1.346850069104626e-04 + ME 2.913920636000223e-05 Event 67 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -540,7 +540,7 @@ Event 67 Batch 0 2 4.264671493042950e+02 1.195959046886511e+02 -2.647539231733031e+02 3.122121220929446e+02 3 5.059969655247565e+02 3.777175441887567e+02 -7.608313561896731e+00 -3.366073372596325e+02 4 5.675358851709483e+02 -4.973134488774080e+02 2.723622367352000e+02 2.439521516668857e+01 - ME 9.763221977220593e-05 + ME 4.009347519102052e-05 Event 68 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -548,7 +548,7 @@ Event 68 Batch 0 2 5.996105691520872e+02 -3.814725562071957e+02 -3.417794545715573e+02 3.117664637712124e+02 3 2.164196744806214e+02 1.292759463548889e+02 -1.184749651041615e+02 1.268419798013013e+02 4 6.839697563672917e+02 2.521966098523068e+02 4.602544196757188e+02 -4.386084435725137e+02 - ME 2.936083529685707e-03 + ME 6.175473672610461e-04 Event 69 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -556,7 +556,7 @@ Event 69 Batch 0 2 4.950546755511076e+02 -1.873718558932053e+02 -4.578972175289678e+02 -1.735101101888631e+01 3 4.768584394819691e+02 -1.830244097668608e+02 2.985566003539791e+02 -3.236664843936508e+02 4 5.280868849669230e+02 3.703962656600661e+02 1.593406171749887e+02 3.410174954125370e+02 - ME 5.234212626720279e-05 + ME 1.367292435278724e-05 Event 70 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -564,7 +564,7 @@ Event 70 Batch 0 2 6.918343395272258e+02 6.895733556028865e+02 -5.391072441382606e+01 -1.473005040127906e+01 3 2.169590284692678e+02 -1.127375202028747e+02 1.807969800614662e+02 4.091361110301506e+01 4 5.912066320035063e+02 -5.768358354000119e+02 -1.268862556476402e+02 -2.618356070173603e+01 - ME 1.591740981760110e-04 + ME 3.526540789264872e-05 Event 71 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -572,7 +572,7 @@ Event 71 Batch 0 2 5.156371334918733e+02 1.547202099034306e+02 -4.807172487652236e+02 1.041836686949964e+02 3 3.718518305526428e+02 -8.969821893462726e+01 -7.521366892975188e+01 -3.529460545344468e+02 4 6.125110359554843e+02 -6.502199096880338e+01 5.559309176949756e+02 2.487623858394504e+02 - ME 1.125100552069616e-04 + ME 2.860782472746935e-05 Event 72 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -580,7 +580,7 @@ Event 72 Batch 0 2 2.110577464974889e+02 5.009520239746097e+01 -1.453533690489527e+02 -1.445968227848547e+02 3 7.317124633441161e+02 -4.429659627226336e+02 5.264774879404380e+02 2.490095170354977e+02 4 5.572297901583943e+02 3.928707603251725e+02 -3.811241188914850e+02 -1.044126942506430e+02 - ME 1.823320413479066e-04 + ME 2.666441446531882e-05 Event 73 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -588,7 +588,7 @@ Event 73 Batch 0 2 3.932257450488246e+02 3.105005764664288e+01 -2.932679039283983e+02 2.601082794045340e+02 3 5.658879124646472e+02 3.645905401293642e+02 4.244364556305355e+02 8.459646951004230e+01 4 5.408863424865281e+02 -3.956405977760074e+02 -1.311685517021372e+02 -3.447047489145762e+02 - ME 8.953763196089171e-04 + ME 7.825486685913998e-05 Event 74 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -596,7 +596,7 @@ Event 74 Batch 0 2 1.374854102925440e+02 7.785209805930555e+01 4.289805712042688e+01 1.048858692406466e+02 3 6.381281910764947e+02 -1.004137270491618e+02 -1.591026937267357e+02 6.097630724433484e+02 4 7.243863986309617e+02 2.256162898985645e+01 1.162046366063089e+02 -7.146489416839951e+02 - ME 1.395531292378326e+01 + ME 1.919068868336380e+00 Event 75 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -604,7 +604,7 @@ Event 75 Batch 0 2 5.936883054156938e+02 -3.438525101293572e+00 -2.706855443967301e+02 5.283780053968293e+02 3 5.912298912592892e+02 1.109657062166288e+02 4.832067437414102e+02 -3.221034603433170e+02 4 3.150818033250173e+02 -1.075271811153352e+02 -2.125211993446803e+02 -2.062745450535123e+02 - ME 1.379908325625592e-03 + ME 1.642862842910461e-04 Event 76 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -612,7 +612,7 @@ Event 76 Batch 0 2 6.619486867997672e+02 2.801967015359571e+01 2.136411519593737e+02 6.258980909300584e+02 3 1.201252731414031e+02 2.274423842261747e+01 -8.754996679960182e+01 7.904292618103446e+01 4 7.179260400588295e+02 -5.076390857621322e+01 -1.260911851597719e+02 -7.049410171110928e+02 - ME 5.870483941147637e+00 + ME 7.362202483972824e-01 Event 77 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -620,7 +620,7 @@ Event 77 Batch 0 2 7.456676259451606e+02 -7.346624001550109e+02 6.511229493320701e+01 -1.097804865615983e+02 3 1.284204120828029e+02 1.251494694834492e+02 2.867183268690428e+01 2.708973588335753e+00 4 6.259119619720373e+02 6.095129306715618e+02 -9.378412762011118e+01 1.070715129732624e+02 - ME 1.662775178233579e-04 + ME 4.400761364703354e-05 Event 78 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -628,7 +628,7 @@ Event 78 Batch 0 2 7.040158920877628e+02 6.911264613612161e+02 -6.659640240533211e+01 -1.163937709034254e+02 3 5.185438503615327e+02 -4.976050220224222e+02 -1.270913363611937e+02 7.158742227342900e+01 4 2.774402575507044e+02 -1.935214393387939e+02 1.936877387665258e+02 4.480634862999637e+01 - ME 5.328004946641866e-05 + ME 9.352750539306009e-06 Event 79 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -636,7 +636,7 @@ Event 79 Batch 0 2 6.777589592768838e+02 1.742725197144059e+02 -4.776543849198212e+01 6.532264221831092e+02 3 5.725002211294488e+02 -1.786302554544233e+02 -1.627852110918317e+02 -5.189881598643107e+02 4 2.497408195936665e+02 4.357735740017474e+00 2.105506495838138e+02 -1.342382623187985e+02 - ME 9.179311580246363e-04 + ME 3.598558866345749e-04 Event 80 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -644,7 +644,7 @@ Event 80 Batch 0 2 6.240819586861880e+02 4.679310297228965e+02 -4.118464023828053e+02 -3.002304821964348e+01 3 6.688675489057649e+02 -5.494372353172420e+02 3.251429131208653e+02 1.994607943266771e+02 4 2.070504924080468e+02 8.150620559434545e+01 8.670348926194001e+01 -1.694377461070337e+02 - ME 3.575286400583300e-03 + ME 5.382869847396148e-05 Event 81 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -652,7 +652,7 @@ Event 81 Batch 0 2 5.198056748722776e+02 1.034797897616987e+02 -2.885605608993972e+02 4.197888462474007e+02 3 5.672098642055398e+02 -4.160331805498524e+02 2.087659545613757e+01 -3.849773895903518e+02 4 4.129844609221831e+02 3.125533907881537e+02 2.676839654432596e+02 -3.481145665704891e+01 - ME 1.018936778946332e-04 + ME 3.612255741613163e-05 Event 82 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -660,7 +660,7 @@ Event 82 Batch 0 2 2.057598609140514e+02 6.385349666266659e+01 -2.765433460911293e+01 1.936364870179372e+02 3 6.235840147705873e+02 4.654039114453895e+02 -3.828889383639962e+02 -1.601633028106901e+02 4 6.706561243153629e+02 -5.292574081080552e+02 4.105432729731107e+02 -3.347318420724690e+01 - ME 6.930850923220120e-04 + ME 3.172622561805068e-04 Event 83 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -668,7 +668,7 @@ Event 83 Batch 0 2 6.583322583736492e+02 1.865539504254553e+02 -1.926584839569474e+02 6.012334775737429e+02 3 3.620902826842561e+02 -3.107067244571256e+02 -1.177956631152976e+01 -1.855584705935048e+02 4 4.795774589420946e+02 1.241527740316703e+02 2.044380502684771e+02 -4.156750069802382e+02 - ME 8.385116111585099e-03 + ME 6.756528802944365e-04 Event 84 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -676,7 +676,7 @@ Event 84 Batch 0 2 4.849329564663161e+02 -2.622178945286150e+02 4.068620488841210e+02 -2.941124332559817e+01 3 4.737588937677760e+02 6.014532316188546e+01 -1.333934272225749e+02 4.505954095412368e+02 4 5.413081497659077e+02 2.020725713667296e+02 -2.734686216615461e+02 -4.211841662156386e+02 - ME 5.162990427398554e-03 + ME 1.017468409980153e-03 Event 85 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -684,7 +684,7 @@ Event 85 Batch 0 2 7.085742632080854e+02 -2.174614026040270e+02 -5.283468657604088e+02 -4.190914152061853e+02 3 5.315764222715953e+02 8.528530557199829e+00 3.820092234108129e+02 3.695533927738615e+02 4 2.598493145203187e+02 2.089328720468272e+02 1.463376423495959e+02 4.953802243232388e+01 - ME 6.335517668355978e-05 + ME 1.894143727100354e-05 Event 86 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -692,7 +692,7 @@ Event 86 Batch 0 2 1.724500140939190e+02 1.231518677708316e+02 -1.121928207497684e+01 1.201946443701656e+02 3 7.028475062724231e+02 -6.467096040851287e+01 -4.553168759141600e+02 -5.315061866629339e+02 4 6.247024796336580e+02 -5.848090736231883e+01 4.665361579891369e+02 4.113115422927684e+02 - ME 1.165531323127631e-04 + ME 5.311384036847167e-05 Event 87 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -700,7 +700,7 @@ Event 87 Batch 0 2 1.942099203196796e+02 -7.751148196958454e+01 -1.356691819650310e+02 -1.153400900745028e+02 3 7.314670447251594e+02 1.724617634710876e+02 7.020747158546045e+02 1.113196793791551e+02 4 5.743230349551606e+02 -9.495028150150301e+01 -5.664055338895735e+02 4.020410695347637e+00 - ME 1.237609879052555e-04 + ME 1.874087134673149e-05 Event 88 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -708,7 +708,7 @@ Event 88 Batch 0 2 6.382497024023744e+02 2.632142028760094e+02 -5.613974181649784e+02 1.513733956108635e+02 3 3.997044228265544e+02 -5.264940326118349e+01 3.435187961344461e+02 1.974500004195773e+02 4 4.620458747710724e+02 -2.105647996148253e+02 2.178786220305324e+02 -3.488233960304407e+02 - ME 1.863821317258467e-03 + ME 9.699609186666195e-05 Event 89 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -716,7 +716,7 @@ Event 89 Batch 0 2 1.419006640093282e+02 -8.677155154367878e+01 6.457545216231642e+01 -9.185046144153740e+01 3 7.131224514048055e+02 5.460003286026870e+02 -4.154556538506974e+02 -1.944836022569670e+02 4 6.449768845858670e+02 -4.592287770590082e+02 3.508802016883808e+02 2.863340636985044e+02 - ME 1.136115495374629e-04 + ME 2.974199953519439e-05 Event 90 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -724,7 +724,7 @@ Event 90 Batch 0 2 5.730615760623938e+02 -6.017783679015001e+01 -5.202921970507185e+02 -2.325386583054727e+02 3 5.389913703864468e+02 -6.302812531165206e+01 2.446311215742109e+02 4.761247390423042e+02 4 3.879470535511588e+02 1.232059621018019e+02 2.756610754765076e+02 -2.435860807368315e+02 - ME 1.094721025518881e-03 + ME 1.667772733247344e-04 Event 91 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -732,7 +732,7 @@ Event 91 Batch 0 2 4.546745139784350e+02 -1.470341619195494e+02 -1.726383255301703e+02 -3.940886669878754e+02 3 5.110976540119647e+02 -2.482119727393537e+02 -1.865817698532448e+02 4.059542728975803e+02 4 5.342278320096005e+02 3.952461346589030e+02 3.592200953834151e+02 -1.186560590970480e+01 - ME 8.789722587847313e-05 + ME 4.420313882846059e-05 Event 92 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -740,7 +740,7 @@ Event 92 Batch 0 2 6.683728375977241e+02 -1.148152650923627e+02 3.458291789782991e+02 5.603051703379153e+02 3 2.872567998557088e+02 1.635098024620329e+02 7.847331657016402e+01 -2.227620976482501e+02 4 5.443703625465666e+02 -4.869453736967034e+01 -4.243024955484631e+02 -3.375430726896653e+02 - ME 8.270083568815311e-04 + ME 2.265252332392545e-04 Event 93 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -748,7 +748,7 @@ Event 93 Batch 0 2 5.666948073002088e+02 5.408074886689032e+01 5.639942928586390e+02 -1.134525653745258e+01 3 6.168025492529713e+02 2.439040545997395e+02 -5.541969602989467e+02 1.175666879272316e+02 4 3.165026434468199e+02 -2.979848034666298e+02 -9.797332559692304e+00 -1.062214313897791e+02 - ME 1.664960428447917e-04 + ME 1.251778043268437e-05 Event 94 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -756,7 +756,7 @@ Event 94 Batch 0 2 4.964349376711385e+02 8.445930034540567e+01 -2.409007074648561e+02 -4.257712097695705e+02 3 5.660980232871289e+02 1.373833465612049e+02 5.210669225216058e+02 1.734417778711397e+02 4 4.374670390417324e+02 -2.218426469066104e+02 -2.801662150567495e+02 2.523294318984307e+02 - ME 3.431641292834382e-05 + ME 1.007141026120618e-05 Event 95 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -764,7 +764,7 @@ Event 95 Batch 0 2 7.117074025057361e+02 -3.227984571262278e+02 4.276971164854593e+02 -4.684055501468919e+02 3 1.264078228725325e+02 8.675876182178401e+01 5.074873328843479e+01 7.665781760618943e+01 4 6.618847746217315e+02 2.360396953044439e+02 -4.784458497738940e+02 3.917477325407025e+02 - ME 2.121249861094822e-04 + ME 8.653822330208906e-05 Event 96 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -772,7 +772,7 @@ Event 96 Batch 0 2 7.329769441659936e+02 -9.642859092211874e+01 6.903981466332597e+02 -2.265107649915406e+02 3 3.937873938465678e+02 -4.837693103302091e+01 -3.847118583018795e+02 6.873841850241256e+01 4 3.732356619874385e+02 1.448055219551397e+02 -3.056862883313802e+02 1.577723464891279e+02 - ME 3.473186069800973e-05 + ME 9.822975749896163e-06 Event 97 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -780,7 +780,7 @@ Event 97 Batch 0 2 3.394989963266853e+01 6.003767577498499e+00 -2.078495220615399e+01 2.616364312804199e+01 3 7.377311980366451e+02 -5.308290258162607e+02 4.681853362634530e+02 2.080152802450354e+02 4 7.283189023306861e+02 5.248252582387622e+02 -4.474003840572991e+02 -2.341789233730774e+02 - ME 2.063600678642283e-02 + ME 2.729355315721549e-03 Event 98 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -788,7 +788,7 @@ Event 98 Batch 0 2 2.496912687496082e+02 -2.485814905959506e+02 -5.435228288348340e-01 -2.350907922099247e+01 3 7.458289852530976e+02 7.373315781279124e+02 9.801365830907572e+01 -5.473885205171283e+01 4 5.044797459972945e+02 -4.887500875319618e+02 -9.747013548024091e+01 7.824793127270530e+01 - ME 6.800308216903296e-05 + ME 8.091578731489026e-06 Event 99 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -796,7 +796,7 @@ Event 99 Batch 0 2 1.698125854886770e+02 8.336002034290719e+01 8.774494220182726e+01 -1.191144253093525e+02 3 6.496622934125946e+02 5.714329899004554e+02 -6.230613627727958e+01 3.027265745152471e+02 4 6.805251210987285e+02 -6.547930102433627e+02 -2.543880592454771e+01 -1.836121492058947e+02 - ME 6.115029137493471e-04 + ME 1.856310681395454e-04 Event 100 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -804,7 +804,7 @@ Event 100 Batch 0 2 6.141460480129781e+02 -5.842473718080511e+02 -5.092222124447417e+01 1.823110095657221e+02 3 3.909476383151783e+02 2.539115798088024e+02 -2.930333502072385e+02 -5.000421191795168e+01 4 4.949063136718440e+02 3.303357919992488e+02 3.439555714517127e+02 -1.323067976477707e+02 - ME 1.550407956048336e-04 + ME 2.380755205932631e-05 Event 101 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -812,7 +812,7 @@ Event 101 Batch 0 2 7.469346538870473e+02 3.524232024688497e+02 -1.488240016505349e+02 -6.415299525912136e+02 3 6.502268999047169e+02 -2.777200960400715e+02 1.351761574712158e+02 5.721835160737410e+02 4 1.028384462082358e+02 -7.470310642877820e+01 1.364784417931910e+01 6.934643651747267e+01 - ME 1.080054053054822e-04 + ME 7.777208667430486e-05 Event 102 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -820,7 +820,7 @@ Event 102 Batch 0 2 7.426790432885583e+02 -3.141071077544728e+02 6.615000409077074e+02 1.238005738162371e+02 3 6.735764515788642e+01 -4.139700837311957e+00 -5.533298776898177e+01 -3.818606686673834e+01 4 6.899633115535552e+02 3.182468085917849e+02 -6.061670531387255e+02 -8.561450694949879e+01 - ME 6.292262541994918e-04 + ME 1.796768498680773e-04 Event 103 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -828,7 +828,7 @@ Event 103 Batch 0 2 4.837874798175253e+02 -2.731724972668680e+02 1.247027290420595e+02 -3.793103501549069e+02 3 4.466406321977809e+02 -2.904538080082218e+02 -1.536665846758871e+02 3.025078850172422e+02 4 5.695718879846930e+02 5.636263052750895e+02 2.896385563382777e+01 7.680246513766473e+01 - ME 8.140894767450013e-05 + ME 2.998858312831636e-05 Event 104 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -836,7 +836,7 @@ Event 104 Batch 0 2 5.788466572679498e+02 3.572346730226224e+02 -3.682137844992378e+02 2.680773207965347e+02 3 2.925711988065158e+02 2.155069407513812e+02 1.697995838195863e+02 -1.016010147279926e+02 4 6.285821439255348e+02 -5.727416137740034e+02 1.984142006796517e+02 -1.664763060685422e+02 - ME 2.849770726480251e-04 + ME 7.634200862908681e-05 Event 105 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -844,7 +844,7 @@ Event 105 Batch 0 2 3.361125455083114e+02 2.619004058447622e+02 4.338373361330959e+01 -2.061496357605196e+02 3 5.299016201311088e+02 2.892532450564946e+02 2.091058919093095e+02 3.916669672191841e+02 4 6.339858343605800e+02 -5.511536509012568e+02 -2.524896255226191e+02 -1.855173314586645e+02 - ME 2.866662317167052e-04 + ME 1.089382545947932e-04 Event 106 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -852,7 +852,7 @@ Event 106 Batch 0 2 3.578050478863485e+02 -2.265838270225943e+02 2.740910124726658e+02 -3.947579646386072e+01 3 5.202885196186892e+02 1.412729374205232e+02 1.631578432376887e+02 4.734148487210871e+02 4 6.219064324949621e+02 8.531088960207101e+01 -4.372488557103545e+02 -4.339390522572265e+02 - ME 1.912263829178338e-03 + ME 4.548955126640399e-04 Event 107 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -860,7 +860,7 @@ Event 107 Batch 0 2 5.409822745993889e+02 9.278463733038997e+01 5.102180459532771e+02 -1.540466750365499e+02 3 2.501852297905710e+02 1.682301834486207e+02 1.474652503315489e+02 1.120056004263085e+02 4 7.088324956100398e+02 -2.610148207790107e+02 -6.576832962848259e+02 4.204107461024153e+01 - ME 7.096163321035572e-04 + ME 2.159102073406285e-04 Event 108 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -868,7 +868,7 @@ Event 108 Batch 0 2 6.835202199428555e+02 6.670011709444186e+02 6.653656309718588e+01 1.337243986739828e+02 3 2.377887385005082e+02 -1.098327419601477e+02 7.667443498831059e+01 -1.964720946353502e+02 4 5.786910415566365e+02 -5.571684289842709e+02 -1.432109980854965e+02 6.274769596136723e+01 - ME 1.143500637563713e-04 + ME 2.960130886583330e-05 Event 109 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -876,7 +876,7 @@ Event 109 Batch 0 2 5.978180281189351e+02 4.291222314737005e+02 2.249703559956599e+02 3.501840146583366e+02 3 3.585061336071061e+02 -3.227227650115256e+02 1.541688059097761e+02 2.467071262824850e+01 4 5.436758382739589e+02 -1.063994664621746e+02 -3.791391619054360e+02 -3.748547272865851e+02 - ME 1.159187207430584e-03 + ME 1.100286424576873e-04 Event 110 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -884,7 +884,7 @@ Event 110 Batch 0 2 7.073952645543156e+01 -4.753982451958468e+01 4.872856968801237e+01 -1.922426029646691e+01 3 7.438039776014969e+02 1.707202332282495e+02 -7.225114374584515e+02 4.556513803361385e+01 4 6.854564959430718e+02 -1.231804087086648e+02 6.737828677704391e+02 -2.634087773714689e+01 - ME 5.177444310012934e-04 + ME 1.052942530962122e-04 Event 111 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -892,7 +892,7 @@ Event 111 Batch 0 2 5.206822291802364e+02 -3.873336848644893e+02 2.415505427333673e+02 -2.504714268307115e+02 3 5.478000561519707e+02 4.687653961676166e+02 -2.245690260344170e+02 -1.729527606656598e+02 4 4.315177146677929e+02 -8.143171130312743e+01 -1.698151669895031e+01 4.234241874963712e+02 - ME 1.041517236520828e-04 + ME 8.545692640795734e-05 Event 112 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -900,7 +900,7 @@ Event 112 Batch 0 2 3.610471238372959e+02 2.563298943277285e+02 9.635756626046441e+01 -2.352981732387216e+02 3 6.139063356201009e+02 1.031778254919422e+02 -4.257030126280926e+02 4.301305270271111e+02 4 5.250465405426031e+02 -3.595077198196707e+02 3.293454463676283e+02 -1.948323537883896e+02 - ME 2.333567140730066e-04 + ME 5.572029836371622e-05 Event 113 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -908,7 +908,7 @@ Event 113 Batch 0 2 5.886653054136124e+02 3.035646198144377e+02 3.278619896967805e+02 -3.832517176826292e+02 3 5.420023902452333e+02 -3.658357535838290e+02 -3.990519958595696e+02 2.623541560166928e+01 4 3.693323043411537e+02 6.227113376939163e+01 7.119000616278893e+01 3.570163020809600e+02 - ME 6.906402420910258e-05 + ME 4.986188449478774e-05 Event 114 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -916,7 +916,7 @@ Event 114 Batch 0 2 5.165204340356855e+02 2.346362244736889e+01 6.298471388966840e+00 5.159487827839334e+02 3 5.932916594323345e+02 3.608814360715946e+02 -5.336137507463695e+01 -4.678804824963537e+02 4 3.901879065319798e+02 -3.843450585189634e+02 4.706290368567026e+01 -4.806830028757967e+01 - ME 5.363382776736297e-04 + ME 4.029549711869195e-04 Event 115 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -924,7 +924,7 @@ Event 115 Batch 0 2 5.432307281524777e+02 2.250327918244370e+02 4.870559856477670e+02 -8.506664127290338e+01 3 4.265243530840496e+02 2.057819224248363e+02 -2.472237669715339e+02 2.801021835354204e+02 4 5.302449187634726e+02 -4.308147142492733e+02 -2.398322186762331e+02 -1.950355422625171e+02 - ME 2.364149932043149e-04 + ME 4.159321993514108e-05 Event 116 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -932,7 +932,7 @@ Event 116 Batch 0 2 4.402635748890415e+02 -4.240500842615081e+02 -5.733358735035193e+01 -1.035683405941509e+02 3 4.399967684638562e+02 1.183617589007452e+02 -1.041572505293867e+02 -4.107784286579766e+02 4 6.197396566471035e+02 3.056883253607625e+02 1.614908378797388e+02 5.143467692521278e+02 - ME 1.343295643586522e-04 + ME 4.172733678506819e-05 Event 117 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -940,7 +940,7 @@ Event 117 Batch 0 2 3.074085311587982e+02 -4.270248480828711e+01 -3.034838508096459e+02 2.395944736750828e+01 3 5.360984061023379e+02 3.510554986169303e+02 -1.596589010508530e+02 -3.723849798683070e+02 4 6.564930627388640e+02 -3.083530138086433e+02 4.631427518604987e+02 3.484255325007987e+02 - ME 1.795895763168496e-04 + ME 4.142391000026985e-05 Event 118 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -948,7 +948,7 @@ Event 118 Batch 0 2 5.403602961735903e+02 4.471526113902045e+02 -1.804334130868151e+02 -2.439007487679592e+02 3 5.654623567965698e+02 -5.534570111367966e+02 -1.157195831079003e+02 6.480112868522320e+00 4 3.941773470298406e+02 1.063043997465919e+02 2.961529961947150e+02 2.374206358994370e+02 - ME 3.055618730902428e-05 + ME 7.288650603673961e-06 Event 119 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -956,7 +956,7 @@ Event 119 Batch 0 2 8.009099446659010e+01 5.775399043490319e+01 -2.629604726664823e+01 4.886268393818209e+01 3 7.131140611332349e+02 2.472685400460709e+02 -2.870014097539109e+02 -6.041689532644716e+02 4 7.067949444001758e+02 -3.050225304809738e+02 3.132974570205592e+02 5.553062693262896e+02 - ME 6.861262467765907e-04 + ME 2.815424392761942e-04 Event 120 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -964,7 +964,7 @@ Event 120 Batch 0 2 5.007248873753321e+02 2.708997263130530e+02 -3.880896283797751e+02 1.634784128397387e+02 3 7.413897277398672e+02 -4.257033276374029e+02 5.921425482134987e+02 -1.334264135464211e+02 4 2.578853848848011e+02 1.548036013243502e+02 -2.040529198337238e+02 -3.005199929331748e+01 - ME 1.034513276694145e-04 + ME 6.003662532288496e-06 Event 121 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -972,7 +972,7 @@ Event 121 Batch 0 2 5.732265116821120e+02 -1.149395375629033e+02 4.260916136383032e+02 3.658189076403451e+02 3 4.323948798659248e+02 -2.148488009071912e+01 -4.178027098651986e+02 1.092914804138530e+02 4 4.943786084519640e+02 1.364244176536226e+02 -8.288903773105691e+00 -4.751103880541979e+02 - ME 8.074833733477824e-02 + ME 7.661241871407340e-04 Event 122 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -980,7 +980,7 @@ Event 122 Batch 0 2 3.423360304412701e+02 2.648046119434483e+02 2.369247279710451e+01 -2.156644197927059e+02 3 6.059487982275789e+02 2.457729689670163e+01 -4.569077875801422e+02 3.972469964635579e+02 4 5.517151713311508e+02 -2.893819088401499e+02 4.332153147830377e+02 -1.815825766708520e+02 - ME 2.180123533398812e-04 + ME 5.274300345459390e-05 Event 123 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -988,7 +988,7 @@ Event 123 Batch 0 2 1.430133297276668e+02 -4.205671322284506e+01 3.498095937953869e+01 1.321377229770999e+02 3 7.140350670908600e+02 -2.955397919833849e+01 -6.570980288365154e+02 -2.778395577453968e+02 4 6.429516031814733e+02 7.161069242118367e+01 6.221170694569771e+02 1.457018347682969e+02 - ME 5.626335206455025e-04 + ME 2.698780233597045e-04 Event 124 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -996,7 +996,7 @@ Event 124 Batch 0 2 6.053457283343441e+02 5.458657819531910e+02 -1.853964251366731e+01 -2.610177782464909e+02 3 7.499633671623128e+02 -6.784114238502394e+02 2.145325921506613e+01 3.189713933003628e+02 4 1.446909045033435e+02 1.325456418970486e+02 -2.913616701398675e+00 -5.795361505387172e+01 - ME 4.169465060943616e-04 + ME 2.629538535113942e-05 Event 125 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1004,7 +1004,7 @@ Event 125 Batch 0 2 6.695439244882118e+02 9.058534244088493e+01 6.586171675820721e+02 7.941529525294386e+01 3 9.341516463500346e+01 3.490868167113007e+01 5.232133368429144e+01 6.906703243419068e+01 4 7.370409108767834e+02 -1.254940241120154e+02 -7.109385012663632e+02 -1.484823276871337e+02 - ME 1.111472366347957e-02 + ME 4.436636984625360e-03 Event 126 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1012,7 +1012,7 @@ Event 126 Batch 0 2 6.465564354211967e+02 -2.094351601488127e+02 -1.930091683601272e+02 -5.804477571728034e+02 3 1.356182567235447e+02 -2.832094442380729e+01 9.735247446175231e+01 -9.007070211700794e+01 4 7.178253078552584e+02 2.377561045726200e+02 9.565669389837488e+01 6.705184592898115e+02 - ME 1.775660879411100e-03 + ME 1.230970446288030e-03 Event 127 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1020,7 +1020,7 @@ Event 127 Batch 0 2 4.508388003927651e+02 -3.846405138087858e+02 7.756355374444065e+01 2.220162025777267e+02 3 6.162879941073576e+02 2.174727303224461e+02 1.334711143222092e+02 -5.609830344035003e+02 4 4.328732054998774e+02 1.671677834863399e+02 -2.110346680666500e+02 3.389668318257735e+02 - ME 3.922171581774212e-05 + ME 2.127227557837123e-05 Event 128 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1028,7 +1028,7 @@ Event 128 Batch 0 2 7.468963146802857e+02 5.701805835528932e+02 -3.440982003215339e+02 -3.381488363986430e+02 3 1.196664332518719e+02 -9.337643239636876e+01 2.398139841985228e+01 7.089280393650260e+01 4 6.334372520678420e+02 -4.768041511565244e+02 3.201168019016817e+02 2.672560324621404e+02 - ME 2.053620454072734e-04 + ME 7.842790653965437e-05 Event 129 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1036,7 +1036,7 @@ Event 129 Batch 0 2 4.378966182438207e+02 -4.256397208622688e+02 4.624364030548149e+01 9.190104474357973e+01 3 7.127537996732577e+02 5.790589826349546e+02 -1.369827771626340e+02 -3.923574802896586e+02 4 3.493495820829217e+02 -1.534192617726859e+02 9.073913685715252e+01 3.004564355460789e+02 - ME 1.668072874757384e-05 + ME 1.046217618618756e-05 Event 130 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1044,7 +1044,7 @@ Event 130 Batch 0 2 6.322026526626455e+02 5.905875735566585e+02 -2.387291116192753e+01 -2.243136110600485e+02 3 5.268087771404591e+02 -3.287250458747471e+02 1.913681034684307e+02 3.644798771698754e+02 4 3.409885701968954e+02 -2.618625276819114e+02 -1.674951923065032e+02 -1.401662661098267e+02 - ME 2.766647151388132e-04 + ME 3.412796728096272e-05 Event 131 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1052,7 +1052,7 @@ Event 131 Batch 0 2 2.691964685177017e+02 -2.641651354044939e+02 4.065264362900757e+01 -3.210735842607325e+01 3 5.382709487855662e+02 -3.022535437819008e+02 -4.307865739991411e+02 1.131429946566680e+02 4 6.925325826967319e+02 5.664186791863947e+02 3.901339303701337e+02 -8.103563623059465e+01 - ME 5.354423766199649e-04 + ME 1.516502654737588e-04 Event 132 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1060,7 +1060,7 @@ Event 132 Batch 0 2 1.376388194981169e+02 -2.491804956023667e+01 3.114513197621116e+01 1.317327453336230e+02 3 7.332494677489981e+02 -3.054807357444667e+02 -6.882601889638243e+00 -6.665500220046781e+02 4 6.291117127528858e+02 3.303987853047034e+02 -2.426253008657308e+01 5.348172766710551e+02 - ME 3.625143788027957e-04 + ME 2.459616839911958e-04 Event 133 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1068,7 +1068,7 @@ Event 133 Batch 0 2 5.818916885738672e+02 -3.437736592641007e+02 -2.113522447259726e+02 -4.192228966514222e+02 3 7.075583625851592e+02 3.695171106849944e+02 9.875952986414086e+01 5.952667441040354e+02 4 2.105499488409736e+02 -2.574345142089370e+01 1.125927148618317e+02 -1.760438474526132e+02 - ME 6.644965721204062e-03 + ME 3.278402967978973e-04 Event 134 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1076,7 +1076,7 @@ Event 134 Batch 0 2 7.039051474789593e+02 -1.767404282002263e+02 5.832845063404937e+02 3.521710697233707e+02 3 6.740856043500099e+02 9.540039380435479e+01 -5.203258634262522e+02 -4.177932056695244e+02 4 1.220092481710302e+02 8.134003439587134e+01 -6.295864291424151e+01 6.562213594615410e+01 - ME 6.394436352069354e-05 + ME 3.621089826286842e-05 Event 135 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1084,7 +1084,7 @@ Event 135 Batch 0 2 7.491379873081086e+02 -6.603965492909807e+02 -9.243924572685610e+01 -3.413782470545817e+02 3 4.360367703469753e+02 3.763875731093294e+02 3.833030381995060e+01 2.167746473012021e+02 4 3.148252423449159e+02 2.840089761816513e+02 5.410894190690560e+01 1.246035997533796e+02 - ME 3.729096801849378e-05 + ME 1.170602675185252e-05 Event 136 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1092,7 +1092,7 @@ Event 136 Batch 0 2 6.907976432034611e+02 -8.965778913807024e+01 -5.375684903631193e+02 -4.244796613161184e+02 3 4.317447428217263e+02 2.541758793770707e+02 2.501815833403360e+02 2.433255445990286e+02 4 3.774576139748129e+02 -1.645180902390004e+02 2.873869070227833e+02 1.811541167170898e+02 - ME 3.295715598818487e-05 + ME 1.221598515374744e-05 Event 137 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1100,7 +1100,7 @@ Event 137 Batch 0 2 5.927917878715718e+02 -5.453882061843875e+02 -2.239274061847312e+02 6.172783069514800e+01 3 3.718333194205911e+02 2.859809174201715e+02 -2.363544177495510e+02 2.472896101988843e+01 4 5.353748927078371e+02 2.594072887642160e+02 4.602818239342820e+02 -8.645679171503701e+01 - ME 1.267334233155001e-04 + ME 2.222722395048600e-05 Event 138 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1108,7 +1108,7 @@ Event 138 Batch 0 2 1.164849493482387e+02 2.012854405109472e+01 -2.573298799707043e+01 -1.118096528381494e+02 3 7.481698498358139e+02 -1.044692284663333e+02 -4.003634472873074e+00 7.408294509656059e+02 4 6.353452008159477e+02 8.434068441523856e+01 2.973662246994375e+01 -6.290197981274564e+02 - ME 3.545594402685597e+00 + ME 1.183014588836486e-01 Event 139 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1116,7 +1116,7 @@ Event 139 Batch 0 2 3.415587822283577e+02 -2.468214832259765e+02 1.926082427237748e+02 1.365416492148350e+02 3 5.828887331044928e+02 -1.023403009989268e+02 -5.561813319045077e+02 1.412376154306548e+02 4 5.755524846671491e+02 3.491617842249035e+02 3.635730891807333e+02 -2.777792646454897e+02 - ME 4.142320485322521e-04 + ME 5.213154494000113e-05 Event 140 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1124,7 +1124,7 @@ Event 140 Batch 0 2 4.395392082109443e+02 -3.037880820376849e+02 -2.455930383243060e+02 -2.014735126343029e+02 3 4.709796125547878e+02 -2.826270024952004e+02 2.984919122515593e+02 2.298833426397907e+02 4 5.894811792342680e+02 5.864150845328855e+02 -5.289887392725340e+01 -2.840983000548780e+01 - ME 1.220048440917972e-04 + ME 2.990357782498624e-05 Event 141 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1132,7 +1132,7 @@ Event 141 Batch 0 2 3.025838986653694e+02 -2.680006525137058e+02 -6.218827689980458e+01 -1.259574698062632e+02 3 5.104624598690772e+02 -2.829910827131053e+02 4.173533268753467e+02 -7.939880721102661e+01 4 6.869536414655528e+02 5.509917352268112e+02 -3.551650499755422e+02 2.053562770172896e+02 - ME 3.735313583347012e-04 + ME 7.151804808113674e-05 Event 142 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1140,7 +1140,7 @@ Event 142 Batch 0 2 4.390011511178412e+02 -3.153925512561953e+02 3.992377088505197e+01 -3.027468279160259e+02 3 4.597282536099518e+02 2.984856708041211e+02 -2.221794712617382e+02 -2.699863960308454e+02 4 6.012705952722066e+02 1.690688045207421e+01 1.822557003766862e+02 5.727332239468712e+02 - ME 1.630913878361870e-04 + ME 8.945447985744934e-05 Event 143 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1148,7 +1148,7 @@ Event 143 Batch 0 2 7.103308443495001e+02 -3.626595603160224e+02 2.462759922459802e+02 5.589240443825270e+02 3 3.424564807343295e+02 4.507572778536915e+01 -2.357842367637252e+02 -2.442343416788665e+02 4 4.472126749161695e+02 3.175838325306533e+02 -1.049175548225529e+01 -3.146897027036604e+02 - ME 1.304325296055160e-03 + ME 1.789392510542836e-04 Event 144 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1156,7 +1156,7 @@ Event 144 Batch 0 2 6.893886390440568e+02 -2.470805413393656e+02 1.331686162420120e+02 6.296618309717105e+02 3 7.132719020730987e+02 2.482972988978650e+02 -2.304803220538649e+02 -6.276815106349294e+02 4 9.733945888284487e+01 -1.216757558499225e+00 9.731170581185302e+01 -1.980320336781234e+00 - ME 3.769348793094523e-04 + ME 1.486904409371019e-04 Event 145 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1164,7 +1164,7 @@ Event 145 Batch 0 2 3.784954309743686e+02 2.391836032855264e+02 1.115572896135236e+01 -2.931305935912622e+02 3 7.389406222827198e+02 -4.231861417520660e+02 1.513250860114713e+02 5.865555822189353e+02 4 3.825639467429113e+02 1.840025384665394e+02 -1.624808149728234e+02 -2.934249886276727e+02 - ME 2.193982780219728e-03 + ME 2.016505354100400e-04 Event 146 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1172,7 +1172,7 @@ Event 146 Batch 0 2 4.681255842987410e+02 -3.253195724522379e+01 1.754808059398437e+02 -4.327698247100133e+02 3 2.875849079819393e+02 2.091841587061404e+01 1.879781824316579e+02 -2.166372592748876e+02 4 7.442895077193195e+02 1.161354137460973e+01 -3.634589883715017e+02 6.494070839849006e+02 - ME 5.347932692815789e-02 + ME 1.210467216316050e-02 Event 147 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1180,7 +1180,7 @@ Event 147 Batch 0 2 2.442136391928777e+02 -1.784444843977844e+02 -1.666832492802189e+02 -3.816014311599316e+00 3 5.551361515401285e+02 1.378338123621512e+02 -5.199472642306259e+02 1.372327560591401e+02 4 7.006502092669938e+02 4.061067203563306e+01 6.866305135108448e+02 -1.334167417475408e+02 - ME 7.450632204513606e-04 + ME 2.360352365747709e-04 Event 148 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1188,7 +1188,7 @@ Event 148 Batch 0 2 4.547263863263726e+02 3.928375677411887e+02 5.145105706241225e+01 2.231759855356057e+02 3 7.397285466814292e+02 -5.611511356388266e+02 -1.533645573573770e+02 -4.569322031694095e+02 4 3.055450669921979e+02 1.683135678976379e+02 1.019135002949646e+02 2.337562176338038e+02 - ME 1.440225905683450e-05 + ME 6.307552439231181e-06 Event 149 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1196,7 +1196,7 @@ Event 149 Batch 0 2 2.343018799311635e+02 9.853424545130945e+01 1.924850318874441e+02 -9.021023174733594e+01 3 7.291173748950658e+02 3.429747374294529e+01 -5.990516617369192e+02 4.142136359886766e+02 4 5.365807451737705e+02 -1.328317191942547e+02 4.065666298494750e+02 -3.240034042413406e+02 - ME 8.405553848068603e-04 + ME 8.298171355094406e-05 Event 150 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1204,7 +1204,7 @@ Event 150 Batch 0 2 4.707648023587808e+02 -8.969278865174961e+01 -3.008719699078221e+02 3.507859183712497e+02 3 6.876639918976698e+02 3.906111988928598e+02 4.609284537794546e+02 -3.284046551871671e+02 4 3.415712057435500e+02 -3.009184102411105e+02 -1.600564838716325e+02 -2.238126318408256e+01 - ME 1.070125715137075e-04 + ME 1.887585788236135e-05 Event 151 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1212,7 +1212,7 @@ Event 151 Batch 0 2 6.503034458278056e+02 -1.575298496674962e+02 -3.658248853789647e+01 -6.298735108350154e+02 3 6.998690336552314e+02 1.302751858829802e+02 -1.019415103826456e+02 6.800389464387812e+02 4 1.498275205169629e+02 2.725466378451580e+01 1.385239989205421e+02 -5.016543560376590e+01 - ME 6.663776898009472e-04 + ME 4.060174493404880e-04 Event 152 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1220,7 +1220,7 @@ Event 152 Batch 0 2 7.401192382353395e+02 1.493701961830190e+02 6.288419447382046e+02 3.605867993093739e+02 3 7.332111095478891e+02 -1.230079111936445e+02 -6.287602831147091e+02 -3.565502647954901e+02 4 2.666965221677112e+01 -2.636228498937447e+01 -8.166162349550861e-02 -4.036534513883709e+00 - ME 8.446403371723604e-04 + ME 1.210964379505254e-04 Event 153 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1228,7 +1228,7 @@ Event 153 Batch 0 2 5.645797071775899e+02 7.941901905692946e+01 3.691428696980725e+02 -4.197337333594241e+02 3 6.079979027943974e+02 1.021455738177839e+02 -5.566920170809548e+02 2.220849604771994e+02 4 3.274223900280123e+02 -1.815645928747133e+02 1.875491473828823e+02 1.976487728822249e+02 - ME 2.846663840296023e-05 + ME 9.895323747190810e-06 Event 154 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1236,7 +1236,7 @@ Event 154 Batch 0 2 6.022174885419887e+02 -5.152457849782368e+02 -1.493252664732707e+02 -2.736597328082223e+02 3 3.617627670199851e+02 1.925398333816265e+02 -2.626238171638091e+02 1.575736108034646e+02 4 5.360197444380261e+02 3.227059515966102e+02 4.119490836370796e+02 1.160861220047577e+02 - ME 6.437319974597944e-05 + ME 1.660411512586943e-05 Event 155 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1244,7 +1244,7 @@ Event 155 Batch 0 2 6.202229507100907e+02 -2.107861924791831e+02 -3.212541876154504e+02 4.868690137883067e+02 3 2.943040328093193e+02 2.940980302320592e+02 1.073731199058907e+01 2.433613089266508e+00 4 5.854730164805898e+02 -8.331183775287627e+01 3.105168756248616e+02 -4.893026268775732e+02 - ME 5.904510654775639e-03 + ME 4.918845171174253e-04 Event 156 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1252,7 +1252,7 @@ Event 156 Batch 0 2 4.945486805149833e+02 4.540818864859257e+02 -1.431706201593249e+02 -1.337542944644701e+02 3 5.997303202813281e+02 -3.624214233270367e+02 -5.726286247273350e+01 4.743923835389624e+02 4 4.057209992036886e+02 -9.166046315888883e+01 2.004334826320584e+02 -3.406380890744924e+02 - ME 4.701306652347430e-03 + ME 1.986837824231628e-04 Event 157 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1260,7 +1260,7 @@ Event 157 Batch 0 2 4.617003083190191e+02 3.118400043328062e+02 3.404502064148864e+02 -4.079626411035589e+00 3 5.720097526413113e+02 -4.999240316044806e+01 -4.329264075474301e+02 -3.705005295422582e+02 4 4.662899390396696e+02 -2.618476011723578e+02 9.247620113254365e+01 3.745801559532937e+02 - ME 3.907978340087068e-05 + ME 1.403598809900552e-05 Event 158 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1268,7 +1268,7 @@ Event 158 Batch 0 2 6.784877363061535e+02 -5.707102180762959e+02 -3.102223423027389e+02 -1.959529373021938e+02 3 5.650909444059712e+02 5.525284805868615e+02 7.765167789879932e+01 8.950011457818250e+01 4 2.564213192878751e+02 1.818173748943443e+01 2.325706644039396e+02 1.064528227240114e+02 - ME 3.503179830087694e-05 + ME 8.470133063482862e-06 Event 159 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1276,7 +1276,7 @@ Event 159 Batch 0 2 5.369491563274252e+02 2.154713482252002e+02 -2.912667909729743e+02 3.962955349875316e+02 3 6.066564496499102e+02 -4.020061311781470e+01 5.572389608252350e+02 -2.364332868806716e+02 4 3.563943940226648e+02 -1.752707351073854e+02 -2.659721698522608e+02 -1.598622481068599e+02 - ME 3.198473025834927e-04 + ME 3.562393617300492e-05 Event 160 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1284,7 +1284,7 @@ Event 160 Batch 0 2 6.492474755438517e+02 3.490068395973682e+02 1.460348644657111e+02 -5.276270735801970e+02 3 2.857818814470013e+02 -2.550253586192556e+02 1.227259509083862e+02 3.964456076362119e+01 4 5.649706430091471e+02 -9.398148097811273e+01 -2.687608153740973e+02 4.879825128165764e+02 - ME 6.719464076924620e-05 + ME 3.516238941302227e-05 Event 161 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1292,7 +1292,7 @@ Event 161 Batch 0 2 6.770282049439580e+02 -2.863253153105184e+02 -4.911270786072976e+02 -3.676672364525180e+02 3 1.598243093356544e+02 -7.505362471426160e+01 1.299195075310522e+02 -5.506073768810752e+01 4 6.631474857203874e+02 3.613789400247800e+02 3.612075710762453e+02 4.227279741406256e+02 - ME 1.577168105051119e-04 + ME 5.970757951131334e-05 Event 162 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1300,7 +1300,7 @@ Event 162 Batch 0 2 5.178592782584632e+02 -3.271131571456631e+02 3.943743741889439e+02 -7.512700901574514e+01 3 3.730686930366258e+02 -2.885924195736573e+01 -1.360208443078026e+02 -3.461874113706257e+02 4 6.090720287049110e+02 3.559723991030290e+02 -2.583535298811414e+02 4.213144203863710e+02 - ME 1.031749267713353e-04 + ME 2.768303103320498e-05 Event 163 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1308,7 +1308,7 @@ Event 163 Batch 0 2 5.388642316037673e+02 3.152159924116781e+02 3.539969933522669e+01 -4.356149670486711e+02 3 5.364171791816749e+02 -5.299694218906361e+02 3.369785517714305e+01 7.576448071880543e+01 4 4.247185892145582e+02 2.147534294789580e+02 -6.909755451236977e+01 3.598504863298658e+02 - ME 3.508094027565679e-05 + ME 1.485600561394433e-05 Event 164 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1316,7 +1316,7 @@ Event 164 Batch 0 2 6.862697092177667e+02 4.132218376422068e+02 1.310202162324327e+02 -5.320221138485150e+02 3 4.476895523579005e+02 -2.769046850483522e+02 1.374187337517142e+02 3.238299280529301e+02 4 3.660407384243329e+02 -1.363171525938544e+02 -2.684389499841469e+02 2.081921857955847e+02 - ME 3.375894779915149e-05 + ME 1.755563256840939e-05 Event 165 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1324,7 +1324,7 @@ Event 165 Batch 0 2 2.382444910715278e+02 -2.158277263671036e+02 -9.471372817531817e+00 -1.004446273032522e+02 3 7.304591383576048e+02 4.619003715882296e+02 -1.223345688256177e+02 5.524969256086772e+02 4 5.312963705708673e+02 -2.460726452211260e+02 1.318059416431495e+02 -4.520522983054250e+02 - ME 6.966498968932957e-03 + ME 4.549138184301779e-04 Event 166 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1332,7 +1332,7 @@ Event 166 Batch 0 2 2.131352071380649e+02 -7.633553084455029e+01 -1.899581415396244e+02 5.929087379418958e+01 3 7.305557876753161e+02 8.980971292745940e+01 7.136333043711877e+02 1.279589045828712e+02 4 5.563090051866194e+02 -1.347418208290915e+01 -5.236751628315633e+02 -1.872497783770607e+02 - ME 3.314006956523505e-04 + ME 3.352199959657985e-05 Event 167 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1340,7 +1340,7 @@ Event 167 Batch 0 2 4.122964103002419e+02 -3.405127102276982e+02 6.366431608201744e+01 2.235761145061386e+02 3 4.697083356610920e+02 -2.521100678451879e+02 -2.856113063438232e+01 -3.952855880214881e+02 4 6.179952540386658e+02 5.926227780728861e+02 -3.510318544763516e+01 1.717094735153495e+02 - ME 1.146777177775239e-04 + ME 3.829535931496594e-05 Event 168 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1348,7 +1348,7 @@ Event 168 Batch 0 2 7.156643283953484e+02 -3.999734570317170e+02 4.816586825103861e+02 3.467009924560655e+02 3 6.192344221355605e+02 2.722545660880235e+02 -4.999454120042317e+02 -2.436869012025525e+02 4 1.651012494690919e+02 1.277188909436936e+02 1.828672949384504e+01 -1.030140912535133e+02 - ME 1.017624049822302e-03 + ME 5.027887292283473e-05 Event 169 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1356,7 +1356,7 @@ Event 169 Batch 0 2 3.626022684949455e+02 7.511110909567982e+01 -2.030941161665286e+02 -2.908461902563517e+02 3 5.580565590514408e+02 -2.529981754432838e+02 -3.439969378312538e+02 3.592842232626199e+02 4 5.793411724536141e+02 1.778870663476037e+02 5.470910539977822e+02 -6.843803300626824e+01 - ME 1.371698416063432e-04 + ME 4.350242525242475e-05 Event 170 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1364,7 +1364,7 @@ Event 170 Batch 0 2 6.602909342483501e+02 4.699653539595539e+02 -3.020118498241596e+02 3.520021683086903e+02 3 1.039297502933440e+02 3.247420585022842e+01 -9.851348423194945e+01 6.473976746580508e+00 4 7.357793154583061e+02 -5.024395598097824e+02 4.005253340561092e+02 -3.584761450552709e+02 - ME 1.673719496447659e-02 + ME 9.967260301798612e-03 Event 171 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1372,7 +1372,7 @@ Event 171 Batch 0 2 1.506693011949600e+02 -3.657300520509282e+01 -1.244227366169959e+02 -7.669834565089053e+01 3 6.344013325830570e+02 -2.026333084464634e+02 -4.956100871165362e+02 3.402578943089165e+02 4 7.149293662219835e+02 2.392063136515561e+02 6.200328237335323e+02 -2.635595486580261e+02 - ME 2.133207113512388e-03 + ME 9.157902172934166e-04 Event 172 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1380,7 +1380,7 @@ Event 172 Batch 0 2 5.352445157558213e+02 -2.018352690102651e+02 3.892440882325296e+02 -3.069825004886504e+02 3 6.716112180685394e+02 2.825227203806547e+02 -5.978593235713698e+02 1.175022124175027e+02 4 2.931442661756383e+02 -8.068745137038898e+01 2.086152353388391e+02 1.894802880711483e+02 - ME 2.630379932615259e-05 + ME 8.067092159940342e-06 Event 173 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1388,7 +1388,7 @@ Event 173 Batch 0 2 6.571348515648592e+02 -2.769863586381786e+02 5.805753619381593e+02 1.343019708712704e+02 3 5.332990408103321e+02 1.871824832342877e+02 -4.782426732337677e+02 1.437168410371092e+02 4 3.095661076248081e+02 8.980387540389081e+01 -1.023326887043915e+02 -2.780188119083794e+02 - ME 9.985413945498126e-03 + ME 1.269359653092767e-04 Event 174 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1396,7 +1396,7 @@ Event 174 Batch 0 2 6.091496911716730e+02 -4.752584064243671e+02 3.135726231883978e+01 -3.797492797588730e+02 3 6.417481529658018e+02 3.309293137608124e+02 9.015643604119191e+01 5.424004960996682e+02 4 2.491021558625255e+02 1.443290926635548e+02 -1.215136983600317e+02 -1.626512163407953e+02 - ME 1.319192968737130e-03 + ME 1.362612102685676e-04 Event 175 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1404,7 +1404,7 @@ Event 175 Batch 0 2 5.399801778396885e+02 1.966672297646830e+02 2.343185748302537e+02 -4.449667388535759e+02 3 6.987953575798327e+02 -1.857207036318898e+02 -9.664246188148675e+01 6.666955876403318e+02 4 2.612244645804785e+02 -1.094652613279307e+01 -1.376761129487668e+02 -2.217288487867561e+02 - ME 9.528877211334405e-03 + ME 9.613528518728674e-04 Event 176 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1412,7 +1412,7 @@ Event 176 Batch 0 2 6.615757321243968e+02 -4.129469954321281e+02 4.686878756164518e+02 -2.179194886871010e+02 3 1.607981401590110e+02 -6.355407199259605e+01 7.929314438200207e+00 1.474925346731048e+02 4 6.776261277165921e+02 4.765010674247242e+02 -4.766171900546519e+02 7.042695401399614e+01 - ME 6.965204353376922e-04 + ME 3.097907077728356e-04 Event 177 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1420,7 +1420,7 @@ Event 177 Batch 0 2 4.314334067424883e+02 -3.493619040652741e+02 -2.026482683689240e+01 -2.523299055494341e+02 3 4.840006500668400e+02 -1.846595828310067e+02 -1.450727057198388e+02 4.232155216776995e+02 4 5.845659431906716e+02 5.340214868962809e+02 1.653375325567312e+02 -1.708856161282654e+02 - ME 2.160100049311594e-04 + ME 1.084300812640113e-04 Event 178 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1428,7 +1428,7 @@ Event 178 Batch 0 2 4.528135981327372e+02 -2.544528544607913e+02 1.436928116455424e+02 3.458992272209776e+02 3 3.053350882587867e+02 -1.380299578048218e+02 2.072032295570572e+02 1.767599177741536e+02 4 7.418513136084770e+02 3.924828122656132e+02 -3.508960412025996e+02 -5.226591449951313e+02 - ME 7.384409254828141e-02 + ME 5.382438151181503e-02 Event 179 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1436,7 +1436,7 @@ Event 179 Batch 0 2 7.433145319259943e+02 -2.538538580850882e+02 -6.778753511348521e+02 -1.689962142519080e+02 3 1.647945947160298e+02 1.009041857568576e+02 1.171651165877689e+02 5.699069397138987e+01 4 5.918908733579761e+02 1.529496723282306e+02 5.607102345470832e+02 1.120055202805181e+02 - ME 1.335347052581446e-04 + ME 3.739915465576335e-05 Event 180 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1444,7 +1444,7 @@ Event 180 Batch 0 2 2.396120216689867e+02 1.204528233788652e+02 -1.081248155319049e+02 1.766750195544080e+02 3 5.541470271917004e+02 2.767127195685322e+02 2.999096875483201e+02 3.749175614572557e+02 4 7.062409511393131e+02 -3.971655429473975e+02 -1.917848720164151e+02 -5.515925810116636e+02 - ME 1.316593054412419e-02 + ME 2.792447184071457e-03 Event 181 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1452,7 +1452,7 @@ Event 181 Batch 0 2 2.165494222755782e+02 1.336973493521793e+02 -1.495065670853883e+02 -8.164837697364385e+01 3 6.960869932595207e+02 -2.848973600545249e+02 2.209041937252092e+01 6.347303441548928e+02 4 5.873635844649011e+02 1.512000107023455e+02 1.274161477128675e+02 -5.530819671812490e+02 - ME 6.164296623062663e-02 + ME 3.488874737600980e-03 Event 182 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1460,7 +1460,7 @@ Event 182 Batch 0 2 6.472681881349898e+02 4.279258056181361e+02 3.994050733201775e+02 -2.762448183472868e+02 3 5.337197582091030e+02 -3.479343829022644e+02 -4.034091782989213e+02 -3.254965992745409e+01 4 3.190120536559070e+02 -7.999142271587166e+01 4.004104978744005e+00 3.087944782747408e+02 - ME 6.393158381765308e-05 + ME 5.523679400573375e-05 Event 183 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1468,7 +1468,7 @@ Event 183 Batch 0 2 6.165307808531154e+02 -3.276949594572818e+02 8.808524820164887e+01 -5.147496540405800e+02 3 2.975460412740734e+02 -1.030095950018341e+02 -2.375020297789284e+02 1.466814775843215e+02 4 5.859231778728107e+02 4.307045544591158e+02 1.494167815772794e+02 3.680681764562588e+02 - ME 6.887775529805495e-05 + ME 2.562496117427957e-05 Event 184 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1476,7 +1476,7 @@ Event 184 Batch 0 2 5.645337360463252e+02 -3.940276919793660e+02 3.776398996283964e+02 1.443212503288767e+02 3 5.368100353438223e+02 2.392766596964613e+02 -1.719264331693737e+02 -4.487237410122139e+02 4 3.986562286098531e+02 1.547510322829050e+02 -2.057134664590229e+02 3.044024906833372e+02 - ME 3.553984578535888e-05 + ME 1.712138666139329e-05 Event 185 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1484,7 +1484,7 @@ Event 185 Batch 0 2 6.347397779710931e+02 2.522092504724420e+02 -1.599825720327363e+02 5.600809373302327e+02 3 4.566768168089404e+02 -3.359958684022406e+02 -1.272903681003782e+02 -2.818823400219340e+02 4 4.085834052199659e+02 8.378661792979838e+01 2.872729401331145e+02 -2.781985973082986e+02 - ME 1.184197550833168e-03 + ME 1.836859309200860e-04 Event 186 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1492,7 +1492,7 @@ Event 186 Batch 0 2 7.089823220133230e+02 -5.197119220861886e+02 4.248734840868308e+02 -2.281183322067745e+02 3 5.364076825758043e+02 3.588264146200084e+02 -3.973752875032956e+02 3.270606945152315e+01 4 2.546099954108725e+02 1.608855074661802e+02 -2.749819658353518e+01 1.954122627552515e+02 - ME 2.583895514537347e-05 + ME 1.318469173008218e-05 Event 187 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1500,7 +1500,7 @@ Event 187 Batch 0 2 4.835105223217566e+02 -2.128653471696258e+02 1.375287019182911e+02 -4.117725407538514e+02 3 7.240136612790383e+02 4.407273454759851e+02 -4.896543389042274e+01 5.723264583716990e+02 4 2.924758163992057e+02 -2.278619983063593e+02 -8.856326802786833e+01 -1.605539176178473e+02 - ME 5.307563978210835e-04 + ME 9.185777086042985e-05 Event 188 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1508,7 +1508,7 @@ Event 188 Batch 0 2 6.611118500396009e+02 3.502021063704277e+02 -2.011693879247277e+02 -5.234102027267809e+02 3 3.072944371702247e+02 -6.894916504330918e+01 -1.599953986835475e+02 2.531350551695447e+02 4 5.315937127901742e+02 -2.812529413271184e+02 3.611647866082752e+02 2.702751475572362e+02 - ME 6.863567490702385e-05 + ME 3.862980709292737e-05 Event 189 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1516,7 +1516,7 @@ Event 189 Batch 0 2 7.498478362545707e+02 6.780504955298834e+02 -3.199144947524264e+02 -1.319162971889924e+01 3 3.253008430749361e+02 -2.985087551774363e+02 1.291384938207140e+02 6.034152914782593e+00 4 4.248513206704935e+02 -3.795417403524470e+02 1.907760009317124e+02 7.157476804116639e+00 - ME 8.583750584152986e-05 + ME 1.504471760657040e-05 Event 190 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1524,7 +1524,7 @@ Event 190 Batch 0 2 4.938867893347995e+02 3.689671478502748e+02 -1.218724623869293e+02 3.048516153777389e+02 3 5.264063001598521e+02 6.631942569346465e+01 1.276367949726208e+02 -5.063735530147588e+02 4 4.797069105053494e+02 -4.352865735437401e+02 -5.764332585691415e+00 2.015219376370201e+02 - ME 4.759343488474735e-05 + ME 2.269926034328256e-05 Event 191 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1532,7 +1532,7 @@ Event 191 Batch 0 2 3.681793141805986e+02 -3.225132888415706e+02 1.579589482507471e+02 -8.117977937027918e+01 3 5.431126642386394e+02 4.058413736814005e+01 9.147123993851424e+01 5.338139246166097e+02 4 5.887080215807621e+02 2.819291514734305e+02 -2.494301881892614e+02 -4.526341452463304e+02 - ME 4.908990110546420e-03 + ME 1.427494731558637e-03 Event 192 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1540,7 +1540,7 @@ Event 192 Batch 0 2 6.054165399887861e+02 1.497087111729466e+02 8.905021611535379e+01 5.798159601983524e+02 3 2.106656439489222e+02 1.451894976721945e+02 -1.487249448604451e+02 3.436443048222171e+01 4 6.839178160622922e+02 -2.948982088451411e+02 5.967472874509133e+01 -6.141803906805740e+02 - ME 4.294450320853435e-02 + ME 6.984876913518998e-03 Event 193 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1548,7 +1548,7 @@ Event 193 Batch 0 2 2.753169163933055e+02 -1.695475157411122e+02 -2.139406274107579e+02 3.581134319495643e+01 3 5.760219428901971e+02 -3.264616044953138e+02 1.527507522369444e+02 -4.493231656306969e+02 4 6.486611407164972e+02 4.960091202364260e+02 6.118987517381347e+01 4.135118224357404e+02 - ME 1.537583375796735e-04 + ME 4.273063058931925e-05 Event 194 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1556,7 +1556,7 @@ Event 194 Batch 0 2 3.445934948105150e+02 -2.970257025567896e+02 -8.183019525038441e+01 1.543509890854414e+02 3 7.485441862377920e+02 6.623797851941252e+02 1.083400559332054e+02 -3.314119056355291e+02 4 4.068623189516925e+02 -3.653540826373358e+02 -2.650986068282081e+01 1.770609165500877e+02 - ME 3.024610065690235e-05 + ME 4.921158833271929e-06 Event 195 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1564,7 +1564,7 @@ Event 195 Batch 0 2 2.012122274303647e+02 -5.190018365965096e+01 1.322177369426910e+02 -1.425173724194237e+02 3 7.122630330184543e+02 -3.054768058087834e+02 -2.528097616133813e+02 5.916838461125119e+02 4 5.865247395511832e+02 3.573769894684365e+02 1.205920246706904e+02 -4.491664736930883e+02 - ME 3.011639483286710e-03 + ME 4.696445912229638e-04 Event 196 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1572,7 +1572,7 @@ Event 196 Batch 0 2 4.490485793345989e+02 3.485190427929747e+02 -2.661098616642627e+01 -2.819059396826192e+02 3 5.531554978829222e+02 -3.330165694254377e+02 4.416170126965178e+02 7.442003978758296e+00 4 4.977959227824785e+02 -1.550247336753688e+01 -4.150060265300915e+02 2.744639357038610e+02 - ME 4.340266456570635e-05 + ME 9.363355109875406e-06 Event 197 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1580,7 +1580,7 @@ Event 197 Batch 0 2 3.951249254444253e+02 -2.278358800090239e+02 3.101157211704546e+02 -8.968142489336992e+01 3 3.607080640108546e+02 -2.889948719219027e+02 2.155030307719242e+02 -1.227661082778765e+01 4 7.441670105447209e+02 5.168307519309257e+02 -5.256187519423792e+02 1.019580357211576e+02 - ME 3.377741088449004e-02 + ME 6.597373610109231e-03 Event 198 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1588,7 +1588,7 @@ Event 198 Batch 0 2 3.750236904637998e+02 1.183014344420310e+02 -1.005952209347265e+02 -3.413621838211424e+02 3 4.381296266085964e+02 -2.726825461625328e+02 1.003845461170281e+02 -3.279096546785175e+02 4 6.868466829276033e+02 1.543811117205018e+02 2.106748176980602e-01 6.692718384996598e+02 - ME 9.606390506705955e-04 + ME 6.145502577419889e-04 Event 199 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1596,7 +1596,7 @@ Event 199 Batch 0 2 2.454478562244572e+02 -2.058455361543722e+02 -1.131056012155068e+02 -7.126982772660261e+01 3 5.321797086694488e+02 -9.806778012582416e+01 -4.820333037417012e+02 -2.030808875905193e+02 4 7.223724351060940e+02 3.039133162801963e+02 5.951389049572081e+02 2.743507153171219e+02 - ME 1.577081887352965e-03 + ME 3.088173795554332e-04 Event 200 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1604,7 +1604,7 @@ Event 200 Batch 0 2 3.952431318363244e+02 3.031309873729303e+02 9.337877017948550e+01 2.358159092128122e+02 3 6.094031244332663e+02 -7.796753338981905e+01 -5.315426896439308e+02 -2.876727322709444e+02 4 4.953537437304092e+02 -2.251634539831113e+02 4.381639194644453e+02 5.185682305813224e+01 - ME 6.703240553489506e-05 + ME 1.668296552597111e-05 Event 201 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1612,7 +1612,7 @@ Event 201 Batch 0 2 6.497938633639732e+02 3.771120671245744e+02 3.553445817627057e+02 -3.921081252746440e+02 3 3.369790646193914e+02 -2.140351778515325e+02 1.061239955238163e+02 2.376584318047305e+02 4 5.132270720166357e+02 -1.630768892730420e+02 -4.614685772865220e+02 1.544496934699135e+02 - ME 6.283412004793947e-05 + ME 2.404518058628388e-05 Event 202 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1620,7 +1620,7 @@ Event 202 Batch 0 2 7.267802742470179e+02 6.523432021666289e+02 -1.481957728499301e+02 2.840702844913056e+02 3 3.546086620137576e+02 -3.102429173963679e+02 -5.939291787501398e+01 -1.611493614224694e+02 4 4.186110637392242e+02 -3.421002847702610e+02 2.075886907249440e+02 -1.229209230688360e+02 - ME 1.894138330341389e-04 + ME 2.830403199974809e-05 Event 203 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1628,7 +1628,7 @@ Event 203 Batch 0 2 4.830190702985662e+02 2.789429895135886e+02 -3.943102945050296e+02 -4.197918611657844e+00 3 5.247163710833165e+02 -4.266462829986153e+02 3.263988520595893e+01 3.037019215942698e+02 4 4.922645586181170e+02 1.477032934850268e+02 3.616704092990706e+02 -2.995040029826120e+02 - ME 5.831910678002871e-04 + ME 5.153190919865371e-05 Event 204 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1636,7 +1636,7 @@ Event 204 Batch 0 2 6.952375769935185e+02 3.823764713153302e+01 6.531840992713522e+02 -2.350397908115460e+02 3 6.250862947179036e+02 1.031861473443961e+02 -5.506835576815644e+02 2.771878679515999e+02 4 1.796761282885781e+02 -1.414237944759291e+02 -1.025005415897879e+02 -4.214807714005369e+01 - ME 1.802858800889920e-04 + ME 1.903000177287069e-05 Event 205 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1644,7 +1644,7 @@ Event 205 Batch 0 2 5.625197268936781e+02 2.955060596751036e+02 4.395356105446072e+02 -1.895074112086703e+02 3 3.144813194259642e+02 -1.941101430078122e+02 -7.073026664887073e+00 -2.473251401357733e+02 4 6.229989536803572e+02 -1.013959166672914e+02 -4.324625838797200e+02 4.368325513444433e+02 - ME 1.140145509231641e-04 + ME 3.163472493443465e-05 Event 206 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1652,7 +1652,7 @@ Event 206 Batch 0 2 5.487698581700869e+02 -4.771827558939671e+02 -2.639484985605369e+02 6.145050708573941e+01 3 4.357856725513919e+02 1.877155863290790e+02 1.701172104948722e+02 3.545872893148349e+02 4 5.154444692785200e+02 2.894671695648880e+02 9.383128806566407e+01 -4.160377964005746e+02 - ME 4.167786087259531e-03 + ME 3.341888001113221e-04 Event 207 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1660,7 +1660,7 @@ Event 207 Batch 0 2 5.289473514933904e+02 -3.230637718239221e+02 -3.258094337294262e+02 2.631792409740627e+02 3 3.730441408755686e+02 -1.145152671243400e+02 -7.298530142052728e+01 -3.474497523579300e+02 4 5.980085076310412e+02 4.375790389482623e+02 3.987947351499535e+02 8.427051138386733e+01 - ME 1.161501350367753e-04 + ME 3.789028948405571e-05 Event 208 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1668,7 +1668,7 @@ Event 208 Batch 0 2 3.144460531270953e+02 3.105028133645123e+02 -3.495125011961062e+01 3.525242310830974e+01 3 7.230517599976935e+02 -6.554206809343713e+02 2.220922910679198e+02 2.095294558946058e+02 4 4.625021868752117e+02 3.449178675698588e+02 -1.871410409483092e+02 -2.447818790029155e+02 - ME 4.858457850437588e-04 + ME 2.941989209837521e-05 Event 209 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1676,7 +1676,7 @@ Event 209 Batch 0 2 2.827014058170527e+02 -6.682954863774688e+01 -1.958656753088385e+02 -1.925890275057887e+02 3 5.969812148172332e+02 5.625717004655273e+02 1.060136244597389e+02 -1.692949027847388e+02 4 6.203173793657136e+02 -4.957421518277804e+02 8.985205084909943e+01 3.618839302905275e+02 - ME 1.004351001266980e-04 + ME 2.261939336541961e-05 Event 210 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1684,7 +1684,7 @@ Event 210 Batch 0 2 3.369223392964550e+02 -2.366581006943837e+02 8.850719545688517e+01 -2.228813191927023e+02 3 6.926279093100447e+02 9.835546321295956e+01 -1.581805884470998e+02 6.671120783270956e+02 4 4.704497513935005e+02 1.383026374814242e+02 6.967339299021461e+01 -4.442307591343933e+02 - ME 5.974710408786874e-02 + ME 3.044010300440331e-03 Event 211 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1692,7 +1692,7 @@ Event 211 Batch 0 2 5.754314663824422e+02 -1.965408456680789e+02 -5.399725108422632e+02 3.037689947684008e+01 3 6.656941886103589e+02 4.112771407945243e+02 5.114655840792436e+02 1.113679599883347e+02 4 2.588743450071987e+02 -2.147362951264454e+02 2.850692676301957e+01 -1.417448594651748e+02 - ME 4.382347812376007e-04 + ME 1.754510489093768e-05 Event 212 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1700,7 +1700,7 @@ Event 212 Batch 0 2 5.922157374848572e+02 8.073316194509509e+00 4.947261155542873e+02 -3.254233732830556e+02 3 3.635572903001510e+02 8.951663862813328e+01 4.011175755255380e+01 3.500738802669425e+02 4 5.442269722149914e+02 -9.758995482264278e+01 -5.348378731068407e+02 -2.465050698388706e+01 - ME 3.041427876287276e-04 + ME 1.919214373141161e-04 Event 213 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1708,7 +1708,7 @@ Event 213 Batch 0 2 7.434820262506830e+02 2.991548764052629e+02 2.111623598614188e+02 -6.470566753063675e+02 3 5.607612173038236e+02 -2.664197873565705e+02 -1.905271140771768e+02 4.551626726109781e+02 4 1.957567564454930e+02 -3.273508904869271e+01 -2.063524578424195e+01 1.918940026953895e+02 - ME 1.827786070323022e-04 + ME 1.896082550340891e-04 Event 214 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1716,7 +1716,7 @@ Event 214 Batch 0 2 5.400874280734793e+02 3.457358963402696e+02 2.445843697627679e+02 -3.351710101016577e+02 3 3.400793067879315e+02 1.482066942304564e+02 1.256466447865830e+02 2.791086371729012e+02 4 6.198332651385892e+02 -4.939425905707261e+02 -3.702310145493508e+02 5.606237292875651e+01 - ME 1.356968066378560e-04 + ME 6.515553919952984e-05 Event 215 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1724,7 +1724,7 @@ Event 215 Batch 0 2 3.916345321859864e+02 3.271767110560381e+02 -1.945589530122144e+02 9.208594000107233e+01 3 6.136750729169615e+02 -1.269585669220027e+02 2.644680756040779e+02 -5.390132228350478e+02 4 4.946903948970534e+02 -2.002181441340350e+02 -6.990912259186331e+01 4.469272828339764e+02 - ME 6.207321332343461e-05 + ME 3.427926940877871e-05 Event 216 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1732,7 +1732,7 @@ Event 216 Batch 0 2 3.767411090262154e+02 1.602503356822860e+02 2.758455349572533e+02 -2.004069210086422e+02 3 4.061922956351256e+02 3.340053729931861e+02 2.237650079776778e+02 5.798114391563544e+01 4 7.170665953386593e+02 -4.942557086754721e+02 -4.996105429349309e+02 1.424257770930068e+02 - ME 1.232271832865728e-03 + ME 2.360785017217177e-04 Event 217 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1740,7 +1740,7 @@ Event 217 Batch 0 2 6.474118977458852e+02 -5.378641111590873e+02 -3.279650037002520e+02 1.492759847325320e+02 3 5.088298200539713e+02 3.261878344469131e+02 1.555821256186315e+02 -3.581947579501665e+02 4 3.437582822001433e+02 2.116762767121744e+02 1.723828780816206e+02 2.089187732176345e+02 - ME 3.357118960820415e-05 + ME 1.388331578224744e-05 Event 218 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1748,7 +1748,7 @@ Event 218 Batch 0 2 6.658501161076259e+02 -6.577627036244854e+02 -3.020200479570956e+01 9.895676706252418e+01 3 2.516345839620714e+02 1.565221509782131e+02 -1.156477271957936e+02 1.595192254662914e+02 4 5.825152999303023e+02 5.012405526462722e+02 1.458497319915031e+02 -2.584759925288157e+02 - ME 5.956187308313417e-04 + ME 1.036808356896783e-04 Event 219 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1756,7 +1756,7 @@ Event 219 Batch 0 2 4.328556070633435e+02 6.122246558068494e+01 -1.687441385117925e+02 3.938796795879554e+02 3 6.500677455605621e+02 -3.703058656885360e+02 4.356876543064814e+02 -3.092537914719426e+02 4 4.170766473760945e+02 3.090834001078509e+02 -2.669435157946888e+02 -8.462588811601287e+01 - ME 2.797067114354785e-04 + ME 9.046106878448173e-05 Event 220 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1764,7 +1764,7 @@ Event 220 Batch 0 2 3.686297280598666e+02 -3.497113779929074e+02 -8.765282776369953e+01 7.685577594963354e+01 3 4.155522773953191e+02 -1.777404948015450e+02 -1.525848366500187e+02 3.432344379292750e+02 4 7.158179945448145e+02 5.274518727944524e+02 2.402376644137182e+02 -4.200902138789084e+02 - ME 3.485410710153060e-03 + ME 1.676729229638681e-03 Event 221 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1772,7 +1772,7 @@ Event 221 Batch 0 2 5.295220830718469e+02 3.654688468413813e+01 4.204675060608333e+02 3.197890523886257e+02 3 7.127556392876786e+02 -1.727486268095863e+02 -4.342549693537605e+02 -5.381460163035255e+02 4 2.577222776404743e+02 1.362017421254481e+02 1.378746329292729e+01 2.183569639148998e+02 - ME 2.819264207321091e-05 + ME 2.031931825964470e-05 Event 222 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1780,7 +1780,7 @@ Event 222 Batch 0 2 2.464305981122427e+02 -2.054199106396077e+02 6.127423271580306e+01 1.215572638876956e+02 3 6.926647117218595e+02 4.702892479611936e+02 3.872350261814336e+02 -3.296383785530530e+02 4 5.609046901658980e+02 -2.648693373215859e+02 -4.485092588972366e+02 2.080811146653574e+02 - ME 6.319142394583372e-05 + ME 1.678695785515194e-05 Event 223 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1788,7 +1788,7 @@ Event 223 Batch 0 2 2.463384302181125e+02 -1.209251938955738e+02 -2.140981972257043e+02 -1.488897673935926e+01 3 6.819620845265065e+02 -2.400891875757811e+02 5.819023806457059e+02 2.623339210620683e+02 4 5.716994852553812e+02 3.610143814713547e+02 -3.678041834200016e+02 -2.474449443227091e+02 - ME 3.931927185620913e-04 + ME 4.810915220985587e-05 Event 224 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1796,7 +1796,7 @@ Event 224 Batch 0 2 2.236851263016067e+02 -8.671871524968952e+01 1.717231909970332e+02 1.141317038679677e+02 3 5.308972974363861e+02 -3.715833295102001e+01 4.680039348616383e+02 2.478780257941054e+02 4 7.454175762620068e+02 1.238770482007099e+02 -6.397271258586715e+02 -3.620097296620728e+02 - ME 8.708656265179471e-02 + ME 6.017706528853119e-02 Event 225 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1804,7 +1804,7 @@ Event 225 Batch 0 2 5.094176014319268e+02 1.569347096242780e+02 -1.561291130928888e+00 -4.846394040251013e+02 3 7.252311334449815e+02 -3.845161955462210e+02 -4.374219820797174e+01 6.133466494377277e+02 4 2.653512651230916e+02 2.275814859219426e+02 4.530348933890067e+01 -1.287072454126262e+02 - ME 3.974215742688118e-04 + ME 1.151501859389029e-04 Event 226 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1812,7 +1812,7 @@ Event 226 Batch 0 2 6.863217264048350e+02 -2.391756120967483e+02 -6.171186323675804e+02 1.816511279850093e+02 3 5.332348374442744e+02 1.096335504493486e+02 4.112484130583279e+02 -3.212391931833643e+02 4 2.804434361508906e+02 1.295420616473995e+02 2.058702193092524e+02 1.395880651983551e+02 - ME 3.797053871351767e-05 + ME 1.438206074993319e-05 Event 227 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1820,7 +1820,7 @@ Event 227 Batch 0 2 7.243206345463230e+02 -5.280189925476210e+02 -1.406011303275692e+02 4.754657162080069e+02 3 5.487499634657129e+02 3.840442912861271e+02 -1.353123555187442e+01 -3.917312987222202e+02 4 2.269294019879644e+02 1.439747012614939e+02 1.541323658794436e+02 -8.373441748578679e+01 - ME 2.903986554770466e-04 + ME 5.165623507180856e-05 Event 228 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1828,7 +1828,7 @@ Event 228 Batch 0 2 2.119578664379945e+02 1.625437651479949e+01 -1.806612394559917e+02 1.096514885776142e+02 3 6.254097456672617e+02 -3.200704000326812e+01 3.158243706171928e+02 5.388579277416935e+02 4 6.626323878947439e+02 1.575266348846865e+01 -1.351631311612011e+02 -6.485094163193077e+02 - ME 8.951233069377997e-01 + ME 3.800526374221887e-02 Event 229 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1836,7 +1836,7 @@ Event 229 Batch 0 2 5.921227120343664e+02 -3.877491982207575e+02 4.449193714386763e+02 -4.802726626309342e+01 3 4.688278331283221e+02 3.470549659129084e+02 -1.517581364471262e+02 -2.762641051115459e+02 4 4.390494548373113e+02 4.069423230784909e+01 -2.931612349915501e+02 3.242913713746393e+02 - ME 3.492131538818778e-05 + ME 1.250052930035257e-05 Event 230 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1844,7 +1844,7 @@ Event 230 Batch 0 2 4.261952284727868e+02 2.153699775439378e+02 -1.171086083390750e+02 3.486312082969335e+02 3 3.540619701921573e+02 3.070144260847319e+01 1.307424531367546e+02 3.276029778648147e+02 4 7.197428013350559e+02 -2.460714201524109e+02 -1.363384479767965e+01 -6.762341861617483e+02 - ME 3.186738302883428e-01 + ME 4.711214236813061e-02 Event 231 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1852,7 +1852,7 @@ Event 231 Batch 0 2 4.205236024420392e+02 7.533931576750228e+01 -3.260217181731272e+02 -2.547036061581322e+02 3 5.397543491930860e+02 8.423195081267914e+01 -1.158376015978276e+02 5.204050211049134e+02 4 5.397220483648740e+02 -1.595712665801811e+02 4.418593197709548e+02 -2.657014149467809e+02 - ME 5.532186388062512e-04 + ME 3.265984123744224e-04 Event 232 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1860,7 +1860,7 @@ Event 232 Batch 0 2 4.295782852421121e+02 3.239064445356881e+02 9.240815775655221e-01 2.821724019337124e+02 3 7.183371274312143e+02 -6.155391061575082e+02 -1.955291718271078e+02 -3.144649112405858e+02 4 3.520845873266736e+02 2.916326616218201e+02 1.946050902495422e+02 3.229250930687335e+01 - ME 6.730603828970119e-05 + ME 1.049779024540051e-05 Event 233 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1868,7 +1868,7 @@ Event 233 Batch 0 2 3.640046126075324e+02 -2.220120664068515e+02 -1.165482463207536e+02 2.638683509799470e+02 3 4.682121509308883e+02 -1.009786196736112e+02 3.762431872847591e+02 2.597441061312976e+02 4 6.677832364615790e+02 3.229906860804628e+02 -2.596949409640055e+02 -5.236124571112447e+02 - ME 5.385640989777132e-03 + ME 7.598357868514145e-04 Event 234 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1876,7 +1876,7 @@ Event 234 Batch 0 2 8.690043548936441e+01 -2.607433849884744e+01 -7.258333015587984e+01 4.004341073848801e+01 3 6.785651905172676e+02 -3.574930335951373e+02 -4.725723606052789e+01 5.748184081539155e+02 4 7.345343739933678e+02 3.835673720939847e+02 1.198405662164078e+02 -6.148618188924036e+02 - ME 1.962113644780599e-01 + ME 8.152211059226219e-02 Event 235 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1884,7 +1884,7 @@ Event 235 Batch 0 2 3.000566282865331e+02 1.219146462304108e+01 -2.126850238006026e+02 2.113064812540423e+02 3 7.160981218147422e+02 2.575873756248088e+02 2.779062108697769e+02 -6.076293293985470e+02 4 4.838452498987246e+02 -2.697788402478500e+02 -6.522118706917435e+01 3.963228481445046e+02 - ME 3.940402333844027e-05 + ME 2.498899672933017e-05 Event 236 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1892,7 +1892,7 @@ Event 236 Batch 0 2 1.510518772182422e+02 -9.497518588910037e+01 1.467158067736534e+01 1.165380984781943e+02 3 6.955499852411461e+02 5.933480346078575e+02 3.495450158124774e+02 9.770452249822526e+01 4 6.533981375406115e+02 -4.983728487187572e+02 -3.642165964898426e+02 -2.142426209764196e+02 - ME 1.121647028585911e-03 + ME 2.623118294900277e-04 Event 237 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1900,7 +1900,7 @@ Event 237 Batch 0 2 2.173874152942701e+02 2.069918593916189e+02 -3.850229167793934e+01 -5.412237993169356e+01 3 7.305677895866185e+02 -6.701932224704495e+02 -2.421540700080861e+02 1.610333695687662e+02 4 5.520447951191120e+02 4.632013630788306e+02 2.806563616860255e+02 -1.069109896370727e+02 - ME 1.822378225061386e-04 + ME 2.170005261464319e-05 Event 238 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1908,7 +1908,7 @@ Event 238 Batch 0 2 6.349573912113930e+02 -3.336495545457479e+02 -4.785400196851591e+02 2.506956580500139e+02 3 5.768887318987100e+02 4.812119270965607e+02 2.334547330568691e+02 -2.161818165921041e+02 4 2.881538768898968e+02 -1.475623725508129e+02 2.450852866282900e+02 -3.451384145790988e+01 - ME 9.810731053503000e-05 + ME 1.383744831772315e-05 Event 239 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1916,7 +1916,7 @@ Event 239 Batch 0 2 5.349076725903783e+02 -5.331874414268931e+02 1.887721601290929e+01 -3.848403846142781e+01 3 3.658437465440003e+02 8.335465236419728e+01 1.670818061666301e+01 -3.558292926602242e+02 4 5.992485808656214e+02 4.498327890626960e+02 -3.558539662957234e+01 3.943133311216517e+02 - ME 9.226736931333760e-05 + ME 2.560110521983184e-05 Event 240 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1924,7 +1924,7 @@ Event 240 Batch 0 2 2.870582387324442e+02 1.830793600232297e+02 -1.562409872742485e+02 1.564389154054251e+02 3 6.007192677438852e+02 3.433229388031108e+02 4.688113613010560e+02 -1.523446941819630e+02 4 6.122224935236703e+02 -5.264022988263405e+02 -3.125703740268075e+02 -4.094221223461989e+00 - ME 1.424405912705748e-04 + ME 3.548113744927254e-05 Event 241 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1932,7 +1932,7 @@ Event 241 Batch 0 2 7.424696267657401e+02 4.823783107714221e+02 2.498315161211407e+02 5.061190823507636e+02 3 2.455726236162737e+02 -1.827879695947952e+02 -1.199757723946156e+02 -1.118046764652876e+02 4 5.119577496179861e+02 -2.995903411766270e+02 -1.298557437265251e+02 -3.943144058854759e+02 - ME 2.705973755259623e-03 + ME 2.366266620918590e-04 Event 242 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1940,7 +1940,7 @@ Event 242 Batch 0 2 7.249130370348905e+02 1.676828147928013e+02 6.059046362201677e+02 -3.609168279440810e+02 3 6.240672718074169e+02 -4.529413961306761e+01 -5.490982345027019e+02 2.930862151720549e+02 4 1.510196911576933e+02 -1.223886751797337e+02 -5.680640171746593e+01 6.783061277202641e+01 - ME 4.587322306592483e-05 + ME 1.668420503127583e-05 Event 243 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1948,7 +1948,7 @@ Event 243 Batch 0 2 4.655090712555229e+02 2.096323612054770e+02 2.113490506800235e+02 3.578890153850057e+02 3 5.764797256412519e+02 6.697224883641857e+01 -5.382210340689440e+02 -1.953502251008744e+02 4 4.580112031032257e+02 -2.766046100418949e+02 3.268719833889206e+02 -1.625387902841314e+02 - ME 2.309042201876567e-04 + ME 3.999521919602606e-05 Event 244 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1956,7 +1956,7 @@ Event 244 Batch 0 2 5.237109195354749e+02 1.305098338947756e+02 -4.868141165486322e+02 -1.423106687020528e+02 3 5.804450110242352e+02 -4.045654344879671e+02 2.643676733537771e+02 3.214855413949400e+02 4 3.958440694402901e+02 2.740556005931916e+02 2.224464431948551e+02 -1.791748726928872e+02 - ME 2.644202232750943e-04 + ME 2.634847163425152e-05 Event 245 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1964,7 +1964,7 @@ Event 245 Batch 0 2 2.629169357520612e+02 2.457511487795889e+02 -4.402365929491729e+01 -8.242333044139184e+01 3 6.931386101565748e+02 -5.195573187661655e+02 4.004017488088275e+02 -2.240084037645317e+02 4 5.439444540913644e+02 2.738061699865766e+02 -3.563780895139104e+02 3.064317342059234e+02 - ME 4.288053786412853e-05 + ME 1.052590061693975e-05 Event 246 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1972,7 +1972,7 @@ Event 246 Batch 0 2 6.300937687157445e+02 -5.459948028041557e+02 3.085954426748102e+02 6.063567799240802e+01 3 1.673910408536145e+02 -3.546130270298926e+01 7.662824936562275e+01 -1.445350060290698e+02 4 7.025151904306430e+02 5.814561055071442e+02 -3.852236920404341e+02 8.389932803666261e+01 - ME 6.282756509154168e-04 + ME 1.915763997923398e-04 Event 247 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1980,7 +1980,7 @@ Event 247 Batch 0 2 2.577847506495701e+02 2.418237207037818e+02 -8.449121421856779e+01 2.890502538162603e+01 3 5.130193185035739e+02 4.381905811488919e+02 1.366496386102691e+02 2.291390669832418e+02 4 7.291959308468561e+02 -6.800143018526737e+02 -5.215842439170134e+01 -2.580440923648679e+02 - ME 4.005872724472581e-03 + ME 1.831864018495938e-03 Event 248 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1988,7 +1988,7 @@ Event 248 Batch 0 2 7.033207479153643e+02 -5.040306065309413e+02 -2.020637997366072e+02 4.469714117975369e+02 3 1.758360012551320e+02 -1.471306652922549e+01 -4.035460943683606e+00 -1.751728862172264e+02 4 6.208432508295037e+02 5.187436730601667e+02 2.060992606802909e+02 -2.717985255803103e+02 - ME 5.592865021063005e-04 + ME 1.512538512828554e-04 Event 249 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1996,7 +1996,7 @@ Event 249 Batch 0 2 3.018816177222694e+02 5.523075638651412e+01 1.752331212074551e+02 2.395316845419020e+02 3 6.597415560701297e+02 6.315352823685419e+01 -6.561001191322722e+02 -2.834054254405022e+01 4 5.383768262076012e+02 -1.183842846233684e+02 4.808669979248172e+02 -2.111911419978518e+02 - ME 4.868100986861644e-04 + ME 9.225490912808109e-05 Event 250 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2004,7 +2004,7 @@ Event 250 Batch 0 2 2.166381935101301e+02 -1.289072913913530e+02 -1.189615590004073e+02 -1.271344351215279e+02 3 6.815426093761062e+02 -2.511966318704653e+02 5.323234433390903e+02 3.435583388650892e+02 4 6.018191971137635e+02 3.801039232618182e+02 -4.133618843386827e+02 -2.164239037435611e+02 - ME 3.468666532553966e-04 + ME 6.586594805989363e-05 Event 251 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2012,7 +2012,7 @@ Event 251 Batch 0 2 6.676961532387151e+02 -3.991265595084280e+01 -4.419965947723094e+02 4.988628500443886e+02 3 7.150412702460949e+02 3.921851524844908e+01 5.505653759000154e+02 -4.545587894617490e+02 4 1.172625765151894e+02 6.941407023942340e-01 -1.085687811277060e+02 -4.430406058263954e+01 - ME 5.615833562023813e-04 + ME 4.930952510857648e-05 Event 252 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2020,7 +2020,7 @@ Event 252 Batch 0 2 2.112668789066533e+02 -1.147554660376938e+02 3.364589711187055e+01 -1.741632301749357e+02 3 7.393007599584276e+02 2.529046383258835e+02 -3.593132473314827e+02 5.945576909606565e+02 4 5.494323611349191e+02 -1.381491722881897e+02 3.256673502196121e+02 -4.203944607857206e+02 - ME 2.709805393201018e-03 + ME 3.541023077707110e-04 Event 253 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2028,7 +2028,7 @@ Event 253 Batch 0 2 7.299659304470913e+01 -4.405884533650594e+01 -5.451291667290519e+01 2.038780663930336e+01 3 7.253475305576840e+02 3.245698054519170e+02 -1.402290280555607e+02 -6.333397991328418e+02 4 7.016558763976062e+02 -2.805109601154107e+02 1.947419447284657e+02 6.129519924935382e+02 - ME 6.484723438037138e-04 + ME 3.511004874943257e-04 Event 254 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2036,7 +2036,7 @@ Event 254 Batch 0 2 1.982520535096858e+02 -6.164633378269741e+01 1.773450413210087e+02 -6.365801262063783e+01 3 7.183815394471145e+02 -1.984891252513599e+02 -6.893152145826987e+02 -3.896971029099802e+01 4 5.833664070431995e+02 2.601354590340572e+02 5.119701732616900e+02 1.026277229116358e+02 - ME 9.210498573936143e-05 + ME 1.539519794804785e-05 Event 255 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2044,7 +2044,7 @@ Event 255 Batch 0 2 5.347080663542586e+02 -5.063606624096446e+02 1.592577719822621e+02 6.440929941880935e+01 3 2.475406015289465e+02 -1.856063881081879e+02 3.468010668896048e+00 -1.637516137347836e+02 4 7.177513321167953e+02 6.919670505178326e+02 -1.627257826511582e+02 9.934231431597431e+01 - ME 1.305481727349711e-03 + ME 3.137689362725149e-04 Event 0 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2052,7 +2052,7 @@ Event 0 Batch 1 2 5.775677821222389e+02 4.314431287975208e+02 -2.652567205762379e+02 -2.776332864556192e+02 3 6.023469575940325e+02 -3.228069847179709e+02 5.005558924007591e+02 8.978477890465942e+01 4 3.200852602837275e+02 -1.086361440795499e+02 -2.352991718245218e+02 1.878485075509607e+02 - ME 2.846168667868940e-05 + ME 7.533072458757011e-06 Event 1 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2060,7 +2060,7 @@ Event 1 Batch 1 2 7.241206267812560e+02 3.541578305635416e+02 -4.894807402105655e+02 3.991635230623179e+02 3 7.375567605136832e+02 -3.903081173548693e+02 4.920451519627784e+02 -3.867054653560791e+02 4 3.832261270506111e+01 3.615028679132773e+01 -2.564411752212873e+00 -1.245805770623896e+01 - ME 1.002871021831580e-03 + ME 7.043932941624384e-05 Event 2 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2068,7 +2068,7 @@ Event 2 Batch 1 2 4.849204091734790e+02 2.108660079931152e+02 4.054727376659824e+02 1.620962335024329e+02 3 2.728468517759738e+02 4.961449545460115e+01 2.005017763154939e+02 1.782774356422519e+02 4 7.422327390505470e+02 -2.604805034477164e+02 -6.059745139814763e+02 -3.403736691446848e+02 - ME 2.729395913593408e-02 + ME 1.721146206228212e-02 Event 3 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2076,7 +2076,7 @@ Event 3 Batch 1 2 4.264155576764489e+02 -4.170952165204416e+02 -7.054834331799705e+01 5.370977042744418e+01 3 7.108631972082329e+02 6.832597695609467e+02 -1.727180704166534e+02 -9.301097030017993e+01 4 3.627212451153183e+02 -2.661645530405051e+02 2.432664137346505e+02 3.930119987273574e+01 - ME 5.466137525204964e-05 + ME 5.739226791327231e-06 Event 4 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2084,7 +2084,7 @@ Event 4 Batch 1 2 7.183269968238449e+02 -3.584978055671311e+02 -5.048824553914336e+02 -3.640971079361008e+02 3 7.387431276480253e+02 4.013538934928407e+02 5.036810263913359e+02 3.618865629982628e+02 4 4.292987552812846e+01 -4.285608792570924e+01 1.201429000097643e+00 2.210544937839338e+00 - ME 3.145606575501715e-04 + ME 5.884725836744927e-05 Event 5 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2092,7 +2092,7 @@ Event 5 Batch 1 2 4.529780005473896e+02 -8.443182436392424e+01 4.445408460134587e+02 -2.106590230986445e+01 3 4.683757780543924e+02 -6.076819021151039e+01 -1.335482427838441e+02 -4.448010379662153e+02 4 5.786462213982179e+02 1.452000145754347e+02 -3.109926032296145e+02 4.658669402760799e+02 - ME 8.481958952475706e-05 + ME 2.851579396246287e-05 Event 6 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2100,7 +2100,7 @@ Event 6 Batch 1 2 6.238848262005389e+02 -1.065131260140052e+02 -4.741487807795934e+02 -3.912418229627633e+02 3 1.729069432107234e+02 -1.460869767542721e+02 -8.199113358821990e+01 4.281191710484079e+01 4 7.032082305887380e+02 2.526001027682771e+02 5.561399143678132e+02 3.484299058579224e+02 - ME 4.868510537699180e-04 + ME 1.468701510222534e-04 Event 7 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2108,7 +2108,7 @@ Event 7 Batch 1 2 6.977203086376783e+02 -6.126072843634399e+02 -1.744636661244187e+02 2.847602033865263e+02 3 1.614193396272251e+02 -4.571584237043670e+00 8.497734613495712e+01 -1.371646983269120e+02 4 6.408603517350967e+02 6.171788686004836e+02 8.948631998946138e+01 -1.475955050596143e+02 - ME 3.540796080305845e-04 + ME 9.523334397108766e-05 Event 8 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2116,7 +2116,7 @@ Event 8 Batch 1 2 6.871091945484288e+02 4.059708628308462e+02 2.886614153103366e+02 4.732666173272762e+02 3 5.653302025665631e+02 -2.838835484844413e+02 -7.353399035097291e+01 -4.833229987253825e+02 4 2.475606028850081e+02 -1.220873143464048e+02 -2.151274249593637e+02 1.005638139810634e+01 - ME 8.785466054587446e-05 + ME 3.726341895116938e-05 Event 9 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2124,7 +2124,7 @@ Event 9 Batch 1 2 1.618579955503452e+02 1.385215220188489e+01 1.601201234527701e+02 -1.917484467788566e+01 3 7.196660585644588e+02 -4.527189715496824e+02 -4.214090439733052e+02 3.679391067910628e+02 4 6.184759458851959e+02 4.388668193477974e+02 2.612889205205349e+02 -3.487642621131772e+02 - ME 1.054640649369016e-03 + ME 1.276556148007894e-04 Event 10 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2132,7 +2132,7 @@ Event 10 Batch 1 2 7.832785200561162e+01 1.027681340851886e+01 -7.242726264265977e+01 -2.799877018853974e+01 3 7.448007230566494e+02 2.520540107528716e+02 6.813719334665398e+02 1.641011304445167e+02 4 6.768714249377393e+02 -2.623308241613905e+02 -6.089446708238800e+02 -1.361023602559769e+02 - ME 5.876642887714617e-04 + ME 1.087112534498832e-04 Event 11 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2140,7 +2140,7 @@ Event 11 Batch 1 2 5.478627446486676e+02 2.070882322301630e+02 -4.708081692757452e+02 1.887000762823861e+02 3 6.997827604382593e+02 -4.209013422316021e+02 4.569873120768409e+02 -3.220257264800591e+02 4 2.523544949130733e+02 2.138131100014392e+02 1.382085719890436e+01 1.333256501976729e+02 - ME 2.703695959900953e-05 + ME 7.092902148917371e-06 Event 12 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2148,7 +2148,7 @@ Event 12 Batch 1 2 5.802868936311938e+02 -4.467002255894120e+01 5.211262762381961e+02 -2.513262266832405e+02 3 5.208038834706859e+02 2.151797013176283e+01 -4.993650129388666e+02 -1.463155694111945e+02 4 3.989092228981199e+02 2.315205242717860e+01 -2.176126329932955e+01 3.976417960944350e+02 - ME 5.046437564325244e-04 + ME 4.980323856672599e-04 Event 13 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2156,7 +2156,7 @@ Event 13 Batch 1 2 5.774880087360024e+02 1.576445054854711e+02 5.481077151088400e+02 -9.065617884226717e+01 3 5.915098138161557e+02 -3.018001633277128e+02 -3.808656371901898e+02 3.372564123391869e+02 4 3.310021774478421e+02 1.441556578422419e+02 -1.672420779186502e+02 -2.466002334969197e+02 - ME 1.505341700965184e-03 + ME 5.587942683639647e-05 Event 14 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2164,7 +2164,7 @@ Event 14 Batch 1 2 2.531797527967491e+02 -8.400833666640553e+01 -2.384535242035555e+02 -1.350938161690895e+01 3 5.261064571264828e+02 -1.751971590790252e+02 -3.334570051994592e+02 3.672878780523887e+02 4 7.207137900767681e+02 2.592054957454308e+02 5.719105294030147e+02 -3.537784964354798e+02 - ME 3.373121845959189e-03 + ME 1.659114310450813e-03 Event 15 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2172,7 +2172,7 @@ Event 15 Batch 1 2 4.605848765362425e+02 3.563504404614684e+02 1.735853700506503e+02 2.345653669687875e+02 3 4.216445088607453e+02 1.370719005416187e+02 -3.933730877164850e+02 6.521502736890037e+01 4 6.177706146030118e+02 -4.934223410030871e+02 2.197877176658347e+02 -2.997803943376878e+02 - ME 4.613631402771334e-04 + ME 9.110622752737525e-05 Event 16 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2180,7 +2180,7 @@ Event 16 Batch 1 2 4.972484926572777e+02 -1.474122335888775e+02 -4.748950276275915e+02 -6.399787981958280e-01 3 5.072511849723048e+02 4.846784046822065e+02 1.224000792205880e+02 -8.607455661990267e+01 4 4.955003223704169e+02 -3.372661710933285e+02 3.524949484070036e+02 8.671453541809866e+01 - ME 5.856804747367533e-05 + ME 1.035537635543116e-05 Event 17 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2188,7 +2188,7 @@ Event 17 Batch 1 2 3.182636773520259e+02 -9.176062613973060e+01 -1.890905041641619e+02 2.389906630959087e+02 3 6.376303990615819e+02 -4.240378519397394e+02 2.706855745366566e+02 -3.917827786765570e+02 4 5.441059235863918e+02 5.157984780794702e+02 -8.159507037249479e+01 1.527921155806483e+02 - ME 7.445984612273079e-05 + ME 2.964570775197734e-05 Event 18 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2196,7 +2196,7 @@ Event 18 Batch 1 2 5.532560008158404e+02 -4.148613005881325e+02 1.689647846464811e+02 -3.247047971041214e+02 3 3.650144721835348e+02 -1.597348634907620e+02 -2.160675866909894e+02 2.470529017650751e+02 4 5.817295270006244e+02 5.745961640788944e+02 4.710280204450838e+01 7.765189533904635e+01 - ME 9.119298978738387e-05 + ME 3.148325734685632e-05 Event 19 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2204,7 +2204,7 @@ Event 19 Batch 1 2 3.263687475619531e+02 -1.904667433734991e+02 2.390747946355329e+02 -1.143775398573919e+02 3 7.331345945903582e+02 2.597391859223821e+02 -6.739404183465077e+02 1.258022320965774e+02 4 4.404966578476884e+02 -6.927244254888298e+01 4.348656237109747e+02 -1.142469223918529e+01 - ME 8.793129888044293e-05 + ME 9.665339952809457e-06 Event 20 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2212,7 +2212,7 @@ Event 20 Batch 1 2 9.588718605412237e+01 4.259536217794532e+01 8.056474827260676e+01 -2.982128277051557e+01 3 7.250265356668370e+02 3.120913743414047e+02 -4.446787057645155e+02 4.801284204484703e+02 4 6.790862782790414e+02 -3.546867365193502e+02 3.641139574919093e+02 -4.503071376779550e+02 - ME 3.686389281265799e-03 + ME 6.402422614019696e-04 Event 21 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2220,7 +2220,7 @@ Event 21 Batch 1 2 1.825278201605081e+02 -1.533737674675502e+02 8.574830442242751e+01 4.939757963742074e+01 3 7.183016103669913e+02 1.713205736990392e+02 -6.275703015775031e+02 -3.045685162014731e+02 4 5.991705694725008e+02 -1.794680623148897e+01 5.418219971550755e+02 2.551709365640523e+02 - ME 7.470861105912214e-05 + ME 1.806434468406198e-05 Event 22 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2228,7 +2228,7 @@ Event 22 Batch 1 2 2.349542451120770e+02 9.235159917618290e+01 -2.156570331301489e+02 -1.291214495308476e+01 3 7.360601907662837e+02 -2.182033070539752e+02 6.568866822530020e+02 -2.503433799808774e+02 4 5.289855641216395e+02 1.258517078777923e+02 -4.412296491228531e+02 2.632555249339621e+02 - ME 3.893602972207037e-05 + ME 8.007442232312076e-06 Event 23 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2236,7 +2236,7 @@ Event 23 Batch 1 2 2.350908908124364e+02 -7.377772511691019e+00 -2.298431804723787e+02 -4.884063683135331e+01 3 6.797114625392685e+02 -5.485955088721076e+02 3.603976926464840e+02 1.765336882516069e+02 4 5.851976466482949e+02 5.559732813837987e+02 -1.305545121741055e+02 -1.276930514202538e+02 - ME 2.057468423101862e-04 + ME 3.185713653214173e-05 Event 24 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2244,7 +2244,7 @@ Event 24 Batch 1 2 4.355364173804401e+02 2.538053291625626e+02 -2.665393838801487e+02 -2.328767540869265e+02 3 4.093863144993796e+02 -1.953012891316528e+02 -3.573484670764558e+02 4.191221827828568e+01 4 6.550772681201798e+02 -5.850404003090968e+01 6.238878509566048e+02 1.909645358086408e+02 - ME 1.895168702655672e-04 + ME 3.721637657688893e-05 Event 25 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2252,7 +2252,7 @@ Event 25 Batch 1 2 7.365386968907909e+02 3.875876454009267e+02 3.151568854896985e+02 5.412404333367775e+02 3 5.208510884285567e+02 -2.430585576296288e+02 -1.518636440371932e+02 -4.349089876054084e+02 4 2.426102146806534e+02 -1.445290877712977e+02 -1.632932414525050e+02 -1.063314457313693e+02 - ME 3.717867207603688e-04 + ME 7.982561935336398e-05 Event 26 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2260,7 +2260,7 @@ Event 26 Batch 1 2 7.198867014174701e+02 5.189601929589824e+02 4.797253921416957e+02 -1.370428003807496e+02 3 3.889101953712928e+02 -1.847394503243419e+02 -2.837815501141775e+02 1.912864537085460e+02 4 3.912031032112371e+02 -3.342207426346404e+02 -1.959438420275183e+02 -5.424365332779646e+01 - ME 1.222836766708484e-04 + ME 1.928349098758061e-05 Event 27 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2268,7 +2268,7 @@ Event 27 Batch 1 2 6.732032222628646e+02 5.870808395006010e+02 -9.126179303429218e+01 3.165595544104447e+02 3 1.177373967283342e+02 7.847176641415683e+01 5.304379211899001e+00 -8.761358356661104e+01 4 7.090593810088013e+02 -6.655526059147578e+02 8.595741382239324e+01 -2.289459708438336e+02 - ME 1.603290018002586e-03 + ME 6.795383824785976e-04 Event 28 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2276,7 +2276,7 @@ Event 28 Batch 1 2 6.475300414228806e+02 3.136396845517189e+02 3.816259196370642e+02 -4.186728559156669e+02 3 7.290923529036073e+02 -2.791764769994177e+02 -4.112865540505715e+02 5.333662195995520e+02 4 1.233776056735125e+02 -3.446320755230100e+01 2.966063441350738e+01 -1.146933636838856e+02 - ME 5.037107889244314e-02 + ME 6.311296815400830e-04 Event 29 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2284,7 +2284,7 @@ Event 29 Batch 1 2 3.156754590345620e+02 -2.870540678871016e+02 4.159516713841874e+01 -1.245825012466667e+02 3 4.770060274033896e+02 -2.355061130652810e+02 -3.231858413754910e+02 -2.600433287405434e+02 4 7.073185135620483e+02 5.225601809523826e+02 2.815906742370723e+02 3.846258299872100e+02 - ME 7.956699356695784e-04 + ME 1.321807869823317e-04 Event 30 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2292,7 +2292,7 @@ Event 30 Batch 1 2 6.091290614220995e+02 1.543004089904798e+02 4.216196287493766e+00 -5.892468251447810e+02 3 2.079357839022729e+02 2.034647466922837e+02 4.185675980476618e+01 9.348729279626889e+00 4 6.829351546756266e+02 -3.577651556827627e+02 -4.607295609226003e+01 5.798980958651539e+02 - ME 3.902231064020147e-04 + ME 1.448382779935031e-04 Event 31 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2300,7 +2300,7 @@ Event 31 Batch 1 2 6.901710072855793e+02 1.433309098684656e+01 6.447948515477649e+02 -2.457034416076623e+02 3 5.898919363861644e+02 1.120085307876391e+02 -4.815950471622465e+02 3.217029626736535e+02 4 2.199370563282564e+02 -1.263416217744856e+02 -1.631998043855182e+02 -7.599952106599136e+01 - ME 2.415465849322543e-04 + ME 2.376400497996635e-05 Event 32 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2308,7 +2308,7 @@ Event 32 Batch 1 2 6.144498311923271e+02 5.832947925341469e+02 -1.925283703230110e+02 1.576726595169125e+01 3 2.478450424037004e+02 5.004284035329792e+01 2.389954177960992e+02 4.247433867565734e+01 4 6.377051264039724e+02 -6.333376328874447e+02 -4.646704747308818e+01 -5.824160462734862e+01 - ME 2.160220890176678e-04 + ME 5.390650629646604e-05 Event 33 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2316,7 +2316,7 @@ Event 33 Batch 1 2 6.134536717469736e+02 -1.625429495269566e+02 -1.853973484494194e+02 5.617232593785355e+02 3 5.361644687950269e+02 -3.755831293394986e+01 -9.992652347025609e+01 -5.254297294928764e+02 4 3.503818594579993e+02 2.001012624609065e+02 2.853238719196754e+02 -3.629352988565911e+01 - ME 1.224582992507153e-04 + ME 1.005452860076771e-04 Event 34 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2324,7 +2324,7 @@ Event 34 Batch 1 2 3.840838099420727e+02 -2.442269925519278e+02 -3.827314394217582e+01 -2.939535943332559e+02 3 6.022630974514659e+02 3.956891925431131e+01 5.086724982658299e+02 3.200116071158652e+02 4 5.136530926064613e+02 2.046580732976165e+02 -4.703993543236541e+02 -2.605801278260916e+01 - ME 9.608243105510499e-05 + ME 2.313941306740064e-05 Event 35 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2332,7 +2332,7 @@ Event 35 Batch 1 2 3.454350783663418e+02 -3.439607925797615e+02 2.363778141880094e+01 -2.139209721976717e+01 3 6.705698302143294e+02 5.215327591153251e+02 4.060443141865528e+02 -1.131171661597076e+02 4 4.839950914193290e+02 -1.775719665355635e+02 -4.296820956053536e+02 1.345092633794747e+02 - ME 4.862206803317224e-05 + ME 7.982017052260048e-06 Event 36 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2340,7 +2340,7 @@ Event 36 Batch 1 2 7.098652154429357e+02 2.489290984574327e+02 -1.674080692141068e+02 -6.433641786725617e+02 3 6.178479130357197e+02 -1.435715807033598e+02 2.588953561477193e+02 5.423065917191846e+02 4 1.722868715213448e+02 -1.053575177540730e+02 -9.148728693361247e+01 1.010575869533772e+02 - ME 6.680529568232270e-05 + ME 5.562249548714765e-05 Event 37 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2348,7 +2348,7 @@ Event 37 Batch 1 2 6.906872786346031e+02 1.495946561071237e+02 1.712833879510068e+02 6.521750966909805e+02 3 3.682276595245592e+02 -1.358558710218083e+02 1.194309698061993e+02 -3.207351477449753e+02 4 4.410850618408380e+02 -1.373878508531530e+01 -2.907143577572061e+02 -3.314399489460051e+02 - ME 2.014943348935539e-03 + ME 5.542438863722841e-04 Event 38 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2356,7 +2356,7 @@ Event 38 Batch 1 2 6.131720166645955e+02 -5.222102655174087e+02 6.340623138461877e+00 3.213038392347352e+02 3 4.540063357567760e+02 2.932429176443922e+02 -3.207297067242505e+02 -1.313879727496968e+02 4 4.328216475786277e+02 2.289673478730168e+02 3.143890835857886e+02 -1.899158664850380e+02 - ME 2.589645049118943e-04 + ME 3.150821423911933e-05 Event 39 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2364,7 +2364,7 @@ Event 39 Batch 1 2 2.929747896182304e+02 2.510117592312210e+02 -1.378648144805472e+02 6.181113983529403e+01 3 6.287164314722783e+02 3.864928360025993e+01 6.254120614625328e+02 5.148142827864510e+01 4 5.783087789094894e+02 -2.896610428314818e+02 -4.875472469819856e+02 -1.132925681139394e+02 - ME 1.708238325115053e-04 + ME 2.723120294663496e-05 Event 40 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2372,7 +2372,7 @@ Event 40 Batch 1 2 1.143487538112954e+02 -3.203572478439017e+01 1.022340126870988e+02 3.996944439980560e+01 3 7.361483923235807e+02 5.924235295921244e+02 -3.838567751530157e+02 -2.088128187524163e+02 4 6.495028538651248e+02 -5.603878048077345e+02 2.816227624659169e+02 1.688433743526105e+02 - ME 2.026369815874481e-04 + ME 4.279185076498264e-05 Event 41 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2380,7 +2380,7 @@ Event 41 Batch 1 2 6.384898508133350e+02 5.540399192408263e+02 -3.014826159773289e+02 -9.908223727147148e+01 3 3.510407251698805e+02 -1.719168197014114e+02 2.065966849440144e+02 -2.258140996521069e+02 4 5.104694240167846e+02 -3.821230995394149e+02 9.488593103331458e+01 3.248963369235784e+02 - ME 4.455092331482675e-05 + ME 1.488395965626735e-05 Event 42 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2388,7 +2388,7 @@ Event 42 Batch 1 2 3.291654598309212e+02 -1.090829060981258e+02 2.972891943885482e+02 -8.983292515941632e+01 3 6.884965239796815e+02 4.933628807557017e+02 -2.919492821202986e+02 3.812953554581829e+02 4 4.823380161893969e+02 -3.842799746575757e+02 -5.339912268249619e+00 -2.914624302987665e+02 - ME 6.690811667999076e-04 + ME 5.767145017550451e-05 Event 43 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2396,7 +2396,7 @@ Event 43 Batch 1 2 3.674173006007981e+02 2.791827424102563e+02 1.079644067383057e+02 2.130637369397045e+02 3 7.392205647816575e+02 -6.110484627794917e+02 -4.247874240022372e+01 -4.138385868609020e+02 4 3.933621346175442e+02 3.318657203692355e+02 -6.548566433808202e+01 2.007748499211975e+02 - ME 2.734436884563990e-05 + ME 6.513986915725277e-06 Event 44 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2404,7 +2404,7 @@ Event 44 Batch 1 2 2.081359682230012e+02 -1.082501549908087e+02 1.771964605001424e+02 1.427934167997762e+01 3 7.449563315308093e+02 5.092828751965591e+02 -5.388739609944279e+02 7.215083562608928e+01 4 5.469077002461893e+02 -4.010327202057504e+02 3.616775004942854e+02 -8.643017730606689e+01 - ME 1.760644262839344e-04 + ME 1.838899544278803e-05 Event 45 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2412,7 +2412,7 @@ Event 45 Batch 1 2 5.180982465404422e+02 4.470261481799612e+02 -3.368837017252423e+01 -2.597277606009553e+02 3 3.377595659674062e+02 -7.316527185649456e+01 2.454727770679006e+02 -2.201624016839132e+02 4 6.441421874921515e+02 -3.738608763234666e+02 -2.117844068953763e+02 4.798901622848684e+02 - ME 1.645403798734011e-04 + ME 4.091340785269233e-05 Event 46 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2420,7 +2420,7 @@ Event 46 Batch 1 2 6.296560291524888e+02 2.172411497655985e+02 5.821614514430422e+02 -1.017892054705761e+02 3 6.224001894826197e+02 1.405102091633609e+01 -6.218608257778048e+02 2.176414579432105e+01 4 2.479437813648912e+02 -2.312921706819346e+02 3.969937433476264e+01 8.002505967625511e+01 - ME 4.041878897626609e-05 + ME 7.434320230190137e-06 Event 47 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2428,7 +2428,7 @@ Event 47 Batch 1 2 5.458843469271557e+02 -1.019033861791133e+02 -1.559739004096151e+02 5.131058004898495e+02 3 2.573134207008558e+02 6.791700498899543e+01 -2.412204887508016e+02 5.839651284901167e+01 4 6.968022323719882e+02 3.398638119011781e+01 3.971943891604168e+02 -5.715023133388611e+02 - ME 1.408798022766008e-02 + ME 4.005478861198618e-03 Event 48 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2436,7 +2436,7 @@ Event 48 Batch 1 2 6.623920218006384e+02 -6.284562032939594e+02 -1.837527125398962e+02 -1.002044496053409e+02 3 1.251779629744606e+02 -7.502448682133647e+01 9.550779386908961e+01 3.031682869117444e+01 4 7.124300152249010e+02 7.034806901152959e+02 8.824491867080658e+01 6.988762091416655e+01 - ME 8.682321044518227e-04 + ME 3.004757451335502e-04 Event 49 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2444,7 +2444,7 @@ Event 49 Batch 1 2 2.397494808364364e+02 2.393958238941666e+02 -4.144666783354266e+00 -1.233996761053010e+01 3 6.782491241100328e+02 -3.516321535544010e+02 -2.705899831712919e+02 5.129890485673947e+02 4 5.820013950535307e+02 1.122363296602344e+02 2.747346499546462e+02 -5.006490809568646e+02 - ME 9.041285542966720e-03 + ME 6.040872325723622e-04 Event 50 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2452,7 +2452,7 @@ Event 50 Batch 1 2 4.764898792162554e+02 4.667163214316568e+02 5.900817880915086e+01 -7.573978570375913e+01 3 5.114228101321805e+02 -2.035689445851523e+02 -4.549677995197112e+02 -1.145306811477843e+02 4 5.120873106515638e+02 -2.631473768465044e+02 3.959596207105603e+02 1.902704668515434e+02 - ME 5.157319121365441e-05 + ME 9.692662313613028e-06 Event 51 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2460,7 +2460,7 @@ Event 51 Batch 1 2 4.678795643859630e+02 4.629737719234085e+02 5.365495313512251e+01 4.108186077915564e+01 3 6.311645871918951e+02 -4.500610707732837e+02 -4.345770688214700e+02 8.340587481742408e+01 4 4.009558484221416e+02 -1.291270115012470e+01 3.809221156863474e+02 -1.244877355965797e+02 - ME 1.517985021504320e-04 + ME 1.293558494013996e-05 Event 52 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2468,7 +2468,7 @@ Event 52 Batch 1 2 3.696230029266819e+02 2.516704934433110e+02 2.514038675722595e+02 1.003953305301004e+02 3 6.696174214325739e+02 -2.754912388418390e+01 -6.493999246431116e+02 -1.609604756850079e+02 4 4.607595756407442e+02 -2.241213695591271e+02 3.979960570708519e+02 6.056514515490756e+01 - ME 5.727699238559496e-05 + ME 8.655753222194317e-06 Event 53 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2476,7 +2476,7 @@ Event 53 Batch 1 2 7.284624742442375e+01 -4.271742504396477e+01 -2.683807109937144e+01 -5.255012179908527e+01 3 7.493542950735829e+02 3.356513586119740e+02 2.501807367708783e+02 6.215139772812374e+02 4 6.777994575019936e+02 -2.929339335680093e+02 -2.233426656715069e+02 -5.689638554821522e+02 - ME 1.612275481129464e-02 + ME 2.372423861687152e-03 Event 54 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2484,7 +2484,7 @@ Event 54 Batch 1 2 7.460259847230064e+02 2.055186857047568e+01 6.233229443227743e+02 4.093908861479223e+02 3 5.756222844616437e+02 2.606063779094539e+01 -4.696411468594731e+02 -3.318117699890848e+02 4 1.783517308153497e+02 -4.661250636142109e+01 -1.536817974633012e+02 -7.757911615883735e+01 - ME 4.374243668355642e-04 + ME 5.046268590690708e-05 Event 55 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2492,7 +2492,7 @@ Event 55 Batch 1 2 5.967428482894213e+02 -8.165820254184375e+01 5.098287527914877e+02 -2.991798919868828e+02 3 5.942526243827265e+02 5.606061544962815e+01 -2.905196430116550e+02 5.153559216750568e+02 4 3.090045273278509e+02 2.559758709221549e+01 -2.193091097798325e+02 -2.161760296881746e+02 - ME 1.779007466146034e-03 + ME 1.849048785615045e-04 Event 56 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2500,7 +2500,7 @@ Event 56 Batch 1 2 5.610874267302015e+02 -4.199055433713192e+02 3.580252469767042e+02 1.015694718309908e+02 3 6.303091265298390e+02 2.130872195586830e+02 -5.453843477211296e+02 -2.333224059286980e+02 4 3.086034467399593e+02 2.068183238126362e+02 1.873591007444254e+02 1.317529340977073e+02 - ME 3.258989367177766e-05 + ME 7.213009143835112e-06 Event 57 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2508,7 +2508,7 @@ Event 57 Batch 1 2 6.552053965855981e+02 4.516249927537604e+02 7.110694105335197e+00 4.746350341729917e+02 3 6.035190443408458e+02 -3.717228873476765e+02 2.148772607224587e+02 -4.241286299324850e+02 4 2.412755590735562e+02 -7.990210540608396e+01 -2.219879548277939e+02 -5.050640424050685e+01 - ME 1.623545585873121e-04 + ME 3.752873989265266e-05 Event 58 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2516,7 +2516,7 @@ Event 58 Batch 1 2 2.959982971085279e+02 1.850007048157144e+02 -2.304987961744356e+02 1.612563397119956e+01 3 7.018897389129390e+02 -3.764226030262936e+02 4.376344751014918e+02 3.992884868423144e+02 4 5.021119639785326e+02 1.914218982105791e+02 -2.071356789270567e+02 -4.154141208135139e+02 - ME 4.558573859477246e-03 + ME 1.901193343270815e-04 Event 59 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2524,7 +2524,7 @@ Event 59 Batch 1 2 5.521089721327345e+02 1.223876815062619e+02 -3.629066091228882e+01 -5.371485459866160e+02 3 4.098988410471214e+02 -5.841964900319319e+01 -3.626461945087767e+02 1.819119075553315e+02 4 5.379921868201441e+02 -6.396803250306872e+01 3.989368554210655e+02 3.552366384312845e+02 - ME 5.148841296796537e-05 + ME 1.780280399801712e-05 Event 60 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2532,7 +2532,7 @@ Event 60 Batch 1 2 7.143828168925960e+02 -4.584044193456332e+02 -2.419772079280938e+02 -4.915844060170314e+02 3 1.284110307517517e+02 8.324300347118127e+01 -7.889851197070540e+01 5.774963203893758e+01 4 6.572061523556514e+02 3.751614158744520e+02 3.208757198987992e+02 4.338347739780938e+02 - ME 1.673517837789511e-04 + ME 7.144001898958308e-05 Event 61 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2540,7 +2540,7 @@ Event 61 Batch 1 2 4.394390210968651e+02 -2.137451655543886e+02 -3.779414621253704e+02 -6.767502250635177e+01 3 4.431311911324728e+02 3.845666395406355e+02 -2.150363068358313e+02 4.725610065709574e+01 4 6.174297877706618e+02 -1.708214739862469e+02 5.929777689612018e+02 2.041892184925626e+01 - ME 1.368591177943825e-04 + ME 2.870354731125455e-05 Event 62 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2548,7 +2548,7 @@ Event 62 Batch 1 2 7.301725729481176e+02 4.281927891852710e+02 5.652737593150771e+02 -1.739784429324868e+02 3 7.567373964415995e+01 2.589885732647599e+01 -5.696550981957816e+01 4.255225906941358e+01 4 6.941536874077224e+02 -4.540916465117469e+02 -5.083082494954988e+02 1.314261838630732e+02 - ME 8.513592598060080e-04 + ME 2.379197431250548e-04 Event 63 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2556,7 +2556,7 @@ Event 63 Batch 1 2 4.361152320236988e+02 -3.738769057978321e+02 1.427754799584550e+02 -1.732850750548248e+02 3 5.817148313055657e+02 5.081993893256957e+02 2.829214478037172e+02 -8.998890070513914e+00 4 4.821699366707353e+02 -1.343224835278637e+02 -4.256969277621721e+02 1.822839651253387e+02 - ME 4.544766189571194e-05 + ME 8.350404272725701e-06 Event 64 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2564,7 +2564,7 @@ Event 64 Batch 1 2 6.097675704107204e+02 3.288514690970509e+02 4.971291587853200e+02 -1.285916042465611e+02 3 5.709532610348123e+02 -6.501292612520263e+01 -4.768258747557200e+02 3.072426254385416e+02 4 3.192791685544673e+02 -2.638385429718484e+02 -2.030328402960006e+01 -1.786510211919805e+02 - ME 4.598138986874043e-04 + ME 3.000969253297957e-05 Event 65 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2572,7 +2572,7 @@ Event 65 Batch 1 2 6.258641293880484e+02 3.743515439843765e+02 -1.622018320411498e+02 -4.746128903155367e+02 3 7.438702198751357e+02 -4.029113627030089e+02 2.325939036896868e+02 5.804355380128616e+02 4 1.302656507368158e+02 2.855981871863233e+01 -7.039207164853700e+01 -1.058226476973252e+02 - ME 6.427333508548903e-03 + ME 3.162776051460646e-04 Event 66 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2580,7 +2580,7 @@ Event 66 Batch 1 2 3.731957242404369e+02 1.596860493342637e+01 -3.714568973276624e+02 3.224632809376674e+01 3 6.079923612940432e+02 4.451199598539357e+02 3.189341902600864e+02 -2.642043054431177e+02 4 5.188119144655197e+02 -4.610885647873621e+02 5.252270706757586e+01 2.319579773493509e+02 - ME 4.681392980523237e-05 + ME 1.034065067393998e-05 Event 67 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2588,7 +2588,7 @@ Event 67 Batch 1 2 7.084256499213539e+02 6.318790977834966e+02 -2.229764540025608e+02 2.299504472951746e+02 3 5.168612394424738e+01 1.130069959366449e+01 -1.428140623590627e+01 4.837138651102398e+01 4 7.398882261343989e+02 -6.431797973771612e+02 2.372578602384670e+02 -2.783218338061985e+02 - ME 5.878400132197954e-02 + ME 1.479715191731530e-02 Event 68 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2596,7 +2596,7 @@ Event 68 Batch 1 2 5.644037677826096e+02 -7.446914007305443e+01 3.170710956176409e+02 4.609467220707991e+02 3 4.303832728799333e+02 -1.588265612792408e+02 -3.994808673830752e+02 -2.046757440246668e+01 4 5.052129593374568e+02 2.332957013522950e+02 8.240977176543441e+01 -4.404791476683325e+02 - ME 8.108482137897523e-03 + ME 3.274273226082449e-04 Event 69 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2604,7 +2604,7 @@ Event 69 Batch 1 2 2.379282923937934e+02 -4.413455715133102e+01 1.058497776082811e+02 -2.084654354245804e+02 3 5.822935131976616e+02 -5.806422676829345e+02 4.095409019445288e+01 -1.559022092337181e+01 4 6.797781944085444e+02 6.247768248342655e+02 -1.468038678027338e+02 2.240556563479522e+02 - ME 3.039802585689931e-04 + ME 6.379305675073031e-05 Event 70 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2612,7 +2612,7 @@ Event 70 Batch 1 2 5.861861307468000e+02 1.831219916849830e+02 2.904683423406074e+02 -4.750880530376756e+02 3 4.633200606614189e+02 -4.245314712871158e+02 -1.339518705596282e+02 1.284344380284135e+02 4 4.504938085917810e+02 2.414094796021329e+02 -1.565164717809791e+02 3.466536150092620e+02 - ME 3.530491740557932e-05 + ME 1.325653453486623e-05 Event 71 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2620,7 +2620,7 @@ Event 71 Batch 1 2 7.383412459951699e+02 5.748049255568963e+02 -1.639684737984460e+02 -4.334298474879633e+02 3 3.973981306646684e+02 -3.228684354469153e+02 -4.837114091238284e+00 2.316416412804533e+02 4 3.642606233401616e+02 -2.519364901099809e+02 1.688055878896842e+02 2.017882062075102e+02 - ME 3.103530482016079e-05 + ME 1.333441808219846e-05 Event 72 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2628,7 +2628,7 @@ Event 72 Batch 1 2 3.538199915090663e+02 3.512029503136998e+02 -6.467835580753929e+00 -4.246458742680748e+01 3 5.344234504985296e+02 1.310173344785605e+01 3.836805260246265e+01 5.328833470497182e+02 4 6.117565579924039e+02 -3.643046837615559e+02 -3.190021702170876e+01 -4.904187596229107e+02 - ME 9.376669006106200e-03 + ME 2.994704399169685e-03 Event 73 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2636,7 +2636,7 @@ Event 73 Batch 1 2 4.694927197571710e+02 1.451947293992222e+02 -1.807863847612341e+02 4.082379055705570e+02 3 5.537325951281179e+02 -5.796379956652479e+01 5.401382741253894e+02 -1.072876026015002e+02 4 4.767746851147115e+02 -8.723092983269744e+01 -3.593518893641554e+02 -3.009503029690568e+02 - ME 1.077472469645428e-03 + ME 1.535829386616431e-04 Event 74 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2644,7 +2644,7 @@ Event 74 Batch 1 2 6.258444305735198e+02 -3.349227552763227e+02 4.941036656040852e+02 1.880679848209580e+02 3 5.555040664889822e+02 3.765538795180102e+01 -5.474422011270130e+02 -8.645158222500005e+01 4 3.186515029374982e+02 2.972673673245214e+02 5.333853552292791e+01 -1.016164025959578e+02 - ME 1.623439923565115e-04 + ME 1.487896902219418e-05 Event 75 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2652,7 +2652,7 @@ Event 75 Batch 1 2 3.943316317993887e+02 5.588489849751632e+01 -2.552251009651266e+02 -2.953548066221912e+02 3 5.467466262348042e+02 -3.021648543602057e+02 -2.377479281839000e+02 3.887212326756534e+02 4 5.589217419658066e+02 2.462799558626894e+02 4.929730291490265e+02 -9.336642605346221e+01 - ME 1.348649436679123e-04 + ME 4.632408498797698e-05 Event 76 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2660,7 +2660,7 @@ Event 76 Batch 1 2 5.517772830004059e+02 2.282681125856672e+02 -4.885490190451381e+02 -1.169260227747471e+02 3 4.245403880864563e+02 -2.793100283061228e+02 1.521744876196477e+02 -2.811821020654221e+02 4 5.236823289131380e+02 5.104191572045557e+01 3.363745314254903e+02 3.981081248401691e+02 - ME 5.074216551061466e-05 + ME 1.645260485784409e-05 Event 77 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2668,7 +2668,7 @@ Event 77 Batch 1 2 3.781543446472003e+02 -5.926925448310480e+01 -1.775497893613220e+02 3.285786605157444e+02 3 6.702964816234122e+02 -6.066564226432872e+01 -1.057468051743550e+02 -6.591165802199176e+02 4 4.515491737293867e+02 1.199348967474336e+02 2.832965945356770e+02 3.305379197041734e+02 - ME 6.321080405055773e-05 + ME 5.041095643414513e-05 Event 78 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2676,7 +2676,7 @@ Event 78 Batch 1 2 4.564262045363139e+02 1.882572856930395e+02 1.751822011208171e+02 -3.770878823051468e+02 3 3.809544602625751e+02 -2.816334489555117e+02 1.992812047321844e+02 -1.615422627793184e+02 4 6.626193352011103e+02 9.337616326247226e+01 -3.744634058530013e+02 5.386301450844651e+02 - ME 2.572921643188974e-04 + ME 6.222463480998997e-05 Event 79 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2684,7 +2684,7 @@ Event 79 Batch 1 2 6.126536521478922e+02 6.075062399138452e+02 -4.178945028651393e+01 6.733726903166659e+01 3 2.872846052831658e+02 -1.084163947926161e+02 2.139961846825774e+01 2.651799127051085e+02 4 6.000617425689430e+02 -4.990898451212283e+02 2.038983181825616e+01 -3.325171817367756e+02 - ME 1.996659951821530e-03 + ME 6.289823950094716e-04 Event 80 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2692,7 +2692,7 @@ Event 80 Batch 1 2 4.171281258707700e+02 -2.756641813219371e+02 1.445082905894664e+01 3.127240094205691e+02 3 3.805235327384960e+02 -2.955852199231463e+02 2.395269588958384e+02 7.373784162959287e+00 4 7.023483413907342e+02 5.712494012450838e+02 -2.539777879547846e+02 -3.200977935835284e+02 - ME 1.297520069620947e-03 + ME 5.629434448779270e-04 Event 81 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2700,7 +2700,7 @@ Event 81 Batch 1 2 7.471091333863935e+02 -9.753029041192970e+01 7.407154559164039e+02 -7.162458282065091e-01 3 6.775352561453885e+02 9.550863422814814e+01 -6.702673865908516e+02 -2.595678293896889e+01 4 7.535561046821789e+01 2.021656183781575e+00 -7.044806932555213e+01 2.667302876717550e+01 - ME 1.022399816924924e-04 + ME 2.904529061551848e-05 Event 82 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2708,7 +2708,7 @@ Event 82 Batch 1 2 4.309094465924175e+02 3.042233433179616e+02 2.799835808203350e+02 -1.214096495919827e+02 3 5.540384887187945e+02 -4.824447657759213e+02 1.988969596446625e+02 1.861335391629672e+02 4 5.150520646887885e+02 1.782214224579596e+02 -4.788805404649973e+02 -6.472388957098450e+01 - ME 1.053635072607165e-04 + ME 1.778678120024833e-05 Event 83 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2716,7 +2716,7 @@ Event 83 Batch 1 2 4.869534474909295e+02 -4.727010820510885e+02 1.062322962656182e+02 4.890855018466118e+01 3 3.520990385354405e+02 -1.437544586613779e+02 -3.142298368411062e+02 6.758696761482639e+01 4 6.609475139736298e+02 6.164555407124665e+02 2.079975405754878e+02 -1.164955177994876e+02 - ME 2.998516055200512e-04 + ME 7.948516811691567e-05 Event 84 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2724,7 +2724,7 @@ Event 84 Batch 1 2 1.391975815431583e+01 -3.682657486111166e-01 -1.138840508663312e+01 -7.995516055627093e+00 3 7.493632094786751e+02 -3.452281541586202e+01 3.833012084573049e+02 6.429880080772211e+02 4 7.367170323670085e+02 3.489108116447313e+01 -3.719128033706718e+02 -6.349924920215940e+02 - ME 3.806217512266510e-01 + ME 8.671177508029917e-02 Event 85 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2732,7 +2732,7 @@ Event 85 Batch 1 2 7.362448947738020e+02 6.409220704967113e+02 3.243429451315054e+02 1.614840505254833e+02 3 1.517836214454495e+02 -1.266859291808411e+02 -6.780846852200752e+01 4.889738933094901e+01 4 6.119714837807480e+02 -5.142361413158706e+02 -2.565344766094980e+02 -2.103814398564324e+02 - ME 5.694785892689211e-04 + ME 1.062305495679385e-04 Event 86 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2740,7 +2740,7 @@ Event 86 Batch 1 2 5.451728369778392e+02 -6.605005893803180e+01 1.066920544886257e+02 -5.305352178712969e+02 3 3.158718592284829e+02 -1.755596039144849e+02 2.550395858012225e+02 6.251932981237656e+01 4 6.389553037936773e+02 2.416096628525165e+02 -3.617316402898481e+02 4.680158880589203e+02 - ME 1.469986179099727e-04 + ME 4.057626974930324e-05 Event 87 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2748,7 +2748,7 @@ Event 87 Batch 1 2 3.414211232216659e+02 1.437256906952883e+02 1.534640422371205e+02 -2.689983214749668e+02 3 5.081668091119999e+02 4.794742948200324e+02 -1.464748766741243e+02 8.296394996143997e+01 4 6.504120676663341e+02 -6.231999855153207e+02 -6.989165562996117e+00 1.860343715135268e+02 - ME 1.823135893899652e-04 + ME 3.656584417835253e-05 Event 88 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2756,7 +2756,7 @@ Event 88 Batch 1 2 2.925516585730864e+02 1.655911293372511e+01 2.598275245766865e+02 -1.334238591297045e+02 3 7.159840369510271e+02 -1.056844973272874e+02 -3.694097043713192e+02 6.041526284885822e+02 4 4.914643044758866e+02 8.912538439356234e+01 1.095821797946327e+02 -4.707287693588777e+02 - ME 8.728488941697977e-02 + ME 2.327745727475104e-03 Event 89 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2764,7 +2764,7 @@ Event 89 Batch 1 2 6.333634651097186e+02 1.209853522660007e+02 5.372166546881791e+02 -3.129058794565919e+02 3 6.221307427802806e+02 5.757192259699385e+01 -4.327483989541182e+02 4.432391657372765e+02 4 2.445057921100010e+02 -1.785572748629945e+02 -1.044682557340609e+02 -1.303332862806847e+02 - ME 5.497507832908574e-04 + ME 5.047204144927262e-05 Event 90 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2772,7 +2772,7 @@ Event 90 Batch 1 2 3.111538587406461e+02 2.628215106651484e+02 -6.985334981761831e+01 -1.512021390726355e+02 3 5.216486323898988e+02 1.252715366480781e+02 4.457714554600226e+02 -2.402335265468457e+02 4 6.671975088694549e+02 -3.880930473132266e+02 -3.759181056424042e+02 3.914356656194811e+02 - ME 2.329075524537458e-04 + ME 4.503542584588689e-05 Event 91 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2780,7 +2780,7 @@ Event 91 Batch 1 2 3.007803348469016e+02 8.390513937949677e+01 2.884042062049404e+02 -1.586667134655829e+01 3 6.256884422056424e+02 2.364580673743878e+02 -3.590826126759745e+02 -4.545693416378727e+02 4 5.735312229474563e+02 -3.203632067538847e+02 7.067840647103421e+01 4.704360129844310e+02 - ME 6.478111274774788e-05 + ME 2.635583378174906e-05 Event 92 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2788,7 +2788,7 @@ Event 92 Batch 1 2 6.843865618656529e+02 -2.264962467301474e+02 -5.909185329480341e+02 2.605757158639088e+02 3 6.645516272550811e+02 3.453347116263074e+02 4.983670680340538e+02 -2.720350487207341e+02 4 1.510618108792659e+02 -1.188384648961601e+02 9.255146491398015e+01 1.145933285682523e+01 - ME 9.365402433981294e-05 + ME 1.711437740567050e-05 Event 93 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2796,7 +2796,7 @@ Event 93 Batch 1 2 5.579763469381434e+02 2.180908585044468e+02 5.135246110359701e+02 8.151996049100932e+00 3 3.333821836060117e+02 1.681122988324202e+02 -1.261705574188212e+02 2.587719570738210e+02 4 6.086414694558448e+02 -3.862031573368670e+02 -3.873540536171486e+02 -2.669239531229223e+02 - ME 5.183695239236329e-04 + ME 1.157787815150910e-04 Event 94 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2804,7 +2804,7 @@ Event 94 Batch 1 2 4.534979734151987e+02 1.139662723650677e+02 2.686183171543304e+01 4.381216071501101e+02 3 3.856184698299744e+02 1.545134372854228e+02 -3.452526490806396e+02 7.501873282757614e+01 4 6.608835567548277e+02 -2.684797096504910e+02 3.183908173652065e+02 -5.131403399776862e+02 - ME 6.944325623628402e-03 + ME 1.545010233607317e-03 Event 95 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2812,7 +2812,7 @@ Event 95 Batch 1 2 2.828073115974175e+02 -5.711637476392460e+01 5.915078172645698e+01 -2.705898746219725e+02 3 6.809618671276158e+02 3.772100991821226e+02 3.247893528880094e+02 4.646864338535512e+02 4 5.362308212749670e+02 -3.200937244181981e+02 -3.839401346144663e+02 -1.940965592315787e+02 - ME 2.560512106670314e-04 + ME 6.408796328924562e-05 Event 96 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2820,7 +2820,7 @@ Event 96 Batch 1 2 4.639832102051440e+02 -4.275497908582962e+02 -1.317248975374901e+02 -1.230046627491649e+02 3 7.474114851375481e+02 6.594176555428718e+02 2.654537688070380e+02 2.309254864669502e+02 4 2.886053046573076e+02 -2.318678646845757e+02 -1.337288712695479e+02 -1.079208237177853e+02 - ME 2.440162169445852e-04 + ME 1.445191791082226e-05 Event 97 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2828,7 +2828,7 @@ Event 97 Batch 1 2 5.095921959312568e+02 3.190102848863560e+02 3.100341192456060e+02 2.485869851668986e+02 3 4.555541331018014e+02 -2.788120391899956e+02 2.221549471930723e+02 -2.836205112936887e+02 4 5.348536709669415e+02 -4.019824569636059e+01 -5.321890664386783e+02 3.503352612679014e+01 - ME 8.198891770965733e-05 + ME 2.250661525403011e-05 Event 98 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2836,7 +2836,7 @@ Event 98 Batch 1 2 5.299941952467790e+02 -2.570048161992350e+02 -4.630296380940593e+02 -2.111695271961878e+01 3 7.352146396921255e+02 2.361229278157243e+02 6.962552486063584e+02 3.893348873424185e+00 4 2.347911650610957e+02 2.088188838351074e+01 -2.332256105122990e+02 1.722360384619465e+01 - ME 6.760444392591968e-05 + ME 5.654417419793765e-06 Event 99 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2844,7 +2844,7 @@ Event 99 Batch 1 2 4.290897291078425e+02 3.747236205606835e+02 2.040795775432686e+02 -4.529602465443949e+01 3 6.438744429739487e+02 -5.215755139094103e+02 2.133414139578182e+01 3.769325350988583e+02 4 4.270358279182090e+02 1.468518933487271e+02 -2.254137189390505e+02 -3.316365104444187e+02 - ME 2.024851967866169e-03 + ME 8.457850707842401e-05 Event 100 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2852,7 +2852,7 @@ Event 100 Batch 1 2 5.119062275524872e+02 -4.721600394809319e+02 -1.845880136125884e+02 7.099400083769524e+01 3 4.523854579707449e+02 2.836789572262426e+02 -3.060214184981774e+02 -1.747276258374610e+02 4 5.357083144767672e+02 1.884810822546894e+02 4.906094321107658e+02 1.037336249997658e+02 - ME 6.898305006855298e-05 + ME 1.420495101373495e-05 Event 101 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2860,7 +2860,7 @@ Event 101 Batch 1 2 6.024072815192737e+02 -3.080418730730875e+02 -4.692284526425155e+02 2.186993289696520e+02 3 3.347434020484399e+02 8.940653726951260e+01 -3.939923552329941e+01 -3.201676381969582e+02 4 5.628493164322859e+02 2.186353358035749e+02 5.086276881658150e+02 1.014683092273061e+02 - ME 9.290725627447436e-05 + ME 2.743452031293993e-05 Event 102 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2868,7 +2868,7 @@ Event 102 Batch 1 2 5.910857738801296e+02 3.707548039128416e+02 -7.516477307090547e+01 -4.541734518311494e+02 3 2.311218706704979e+02 4.536804143672514e+01 -2.262982016400413e+02 1.217307902336991e+01 4 6.777923554493723e+02 -4.161228453495667e+02 3.014629747109467e+02 4.420003728077793e+02 - ME 2.633339755449651e-04 + ME 7.158169676479796e-05 Event 103 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2876,7 +2876,7 @@ Event 103 Batch 1 2 6.627949406417042e+02 7.189602123685950e+01 -6.391860825813610e+02 -1.599038689489492e+02 3 5.519979886399102e+02 1.442810582977179e+02 4.734454174874869e+02 2.444057944057306e+02 4 2.852070707183856e+02 -2.161770795345774e+02 1.657406650938741e+02 -8.450192545678139e+01 - ME 1.652798222861839e-04 + ME 1.658567428345252e-05 Event 104 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2884,7 +2884,7 @@ Event 104 Batch 1 2 4.368180791462563e+02 -3.483499330357901e+02 -2.596280064690262e+02 4.533935023690698e+01 3 4.635715977792429e+02 1.873023362819025e+02 -2.251347602994603e+02 -3.593477435519053e+02 4 5.996103230745010e+02 1.610475967538876e+02 4.847627667684865e+02 3.140083933149983e+02 - ME 9.158171748371188e-05 + ME 2.162124469235967e-05 Event 105 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2892,7 +2892,7 @@ Event 105 Batch 1 2 5.701708357490469e+02 2.288495716262106e+02 -4.521314661478370e+02 -2.613422905391967e+02 3 3.711008490497917e+02 -3.362590561223710e+02 -8.126001400906793e+01 1.343223639771668e+02 4 5.587283152011612e+02 1.074094844961603e+02 5.333914801569049e+02 1.270199265620299e+02 - ME 7.043372303967046e-05 + ME 1.720246557093887e-05 Event 106 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2900,7 +2900,7 @@ Event 106 Batch 1 2 6.775588183099673e+02 5.149765831731705e+02 3.445381345095063e+02 -2.741870619150275e+02 3 7.044100837534635e+02 -4.546975847980706e+02 -4.392260662935809e+02 3.106833358270535e+02 4 1.180310979365712e+02 -6.027899837509908e+01 9.468793178407486e+01 -3.649627391202603e+01 - ME 3.259673897057837e-04 + ME 2.786544600802367e-05 Event 107 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2908,7 +2908,7 @@ Event 107 Batch 1 2 6.046880513041550e+02 2.289413119004024e+02 -5.349774474143721e+02 -1.644160754103499e+02 3 3.366746442316215e+02 -7.166101576320902e+01 2.452245434825371e+01 3.280444544890399e+02 4 5.586373044642238e+02 -1.572802961371935e+02 5.104549930661184e+02 -1.636283790786902e+02 - ME 8.859556065170558e-04 + ME 4.667002706670146e-04 Event 108 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2916,7 +2916,7 @@ Event 108 Batch 1 2 6.239206451413978e+02 -2.218030564243363e+02 5.011455197099735e+02 -2.982172759400455e+02 3 2.841199272340513e+02 1.209406641294798e+02 7.967327320293104e+01 2.444374323800143e+02 4 5.919594276245514e+02 1.008623922948564e+02 -5.808187929129044e+02 5.377984356003120e+01 - ME 1.727643234936365e-04 + ME 7.961277501126149e-05 Event 109 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2924,7 +2924,7 @@ Event 109 Batch 1 2 3.093404598873124e+02 1.546999830656544e+02 1.629193992247174e+02 2.126421988200774e+02 3 5.287372542258961e+02 -2.136116696975048e+02 -1.865832176193536e+02 4.462284633214169e+02 4 6.619222858867909e+02 5.891168663185049e+01 2.366381839463621e+01 -6.588706621414941e+02 - ME 1.686695657867669e+01 + ME 2.902408960420708e-01 Event 110 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2932,7 +2932,7 @@ Event 110 Batch 1 2 4.920948406187608e+02 -8.595212543403569e+01 -4.824913009925944e+02 -4.440392734262522e+01 3 4.634042325716594e+02 -2.085760624772916e+00 1.255608851371819e+02 4.460645653843308e+02 4 5.445009268095798e+02 8.803788605880843e+01 3.569304158554124e+02 -4.016606380417056e+02 - ME 4.151412887207382e-03 + ME 1.043536440561108e-03 Event 111 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2940,7 +2940,7 @@ Event 111 Batch 1 2 4.637454700443120e+02 1.543048221589588e+02 -4.372769385391800e+02 6.225902899506631e+00 3 3.246747011850293e+02 -5.128652792678845e+01 -2.274142471268230e+02 2.259781269206006e+02 4 7.115798287706589e+02 -1.030182942321705e+02 6.646911856660031e+02 -2.322040298201072e+02 - ME 1.240833065187375e-03 + ME 5.219332617201280e-04 Event 112 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2948,7 +2948,7 @@ Event 112 Batch 1 2 6.923761777814550e+02 3.939190124845535e+02 4.398224952082178e+01 -5.676954684419625e+02 3 5.277418353503033e+02 -4.270527740856185e+02 4.970714905179168e+01 3.060499505927539e+02 4 2.798819868682421e+02 3.313376160106501e+01 -9.368939857261346e+01 2.616455178492087e+02 - ME 5.385735959435035e-05 + ME 4.381536575941429e-05 Event 113 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2956,7 +2956,7 @@ Event 113 Batch 1 2 7.174898838850694e+02 -6.130145063482008e+02 3.726797356942233e+02 1.071275347265524e+01 3 1.705115822510491e+02 3.993583199494100e+01 -1.624320619120163e+02 3.309311510932528e+01 4 6.119985338638814e+02 5.730786743532599e+02 -2.102476737822071e+02 -4.380586858198049e+01 - ME 2.197559713387976e-04 + ME 4.914674319256647e-05 Event 114 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2964,7 +2964,7 @@ Event 114 Batch 1 2 6.772826088252357e+02 -1.430288042596954e+02 -3.410390118171982e+02 5.674036356844296e+02 3 6.725037798358682e+02 3.626161999767239e+01 2.510744134018114e+02 -6.228226615527174e+02 4 1.502136113388951e+02 1.067671842620232e+02 8.996459841538707e+01 5.541902586828807e+01 - ME 8.926156406775035e-05 + ME 7.986648389935193e-05 Event 115 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2972,7 +2972,7 @@ Event 115 Batch 1 2 9.320551230331124e+01 1.288474310894606e+01 -2.581623869377880e+01 8.862715576190526e+01 3 6.672654287607164e+02 1.525114284892182e+02 2.829200767588875e+02 5.847560574856374e+02 4 7.395290589359720e+02 -1.653961715981643e+02 -2.571038380651088e+02 -6.733832132475428e+02 - ME 1.800237703627863e+00 + ME 4.304938165075599e-01 Event 116 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2980,7 +2980,7 @@ Event 116 Batch 1 2 4.951202926530015e+02 -4.575339943514647e+02 4.220102313368785e+01 1.844608951947751e+02 3 3.101750696753587e+02 -4.711582585559527e+01 2.172188132736168e+02 2.163438466008694e+02 4 6.947046376716394e+02 5.046498202070600e+02 -2.594198364073050e+02 -4.008047417956444e+02 - ME 1.933367100533606e-03 + ME 5.988625984136040e-04 Event 117 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2988,7 +2988,7 @@ Event 117 Batch 1 2 6.543248494478489e+02 1.390926466871539e+02 9.107024539473488e+01 6.328510524967589e+02 3 5.040443237953712e+02 6.874740772121054e+01 1.336336536624387e+02 -4.811200690999848e+02 4 3.416308267567792e+02 -2.078400544083643e+02 -2.247038990571737e+02 -1.517309833967742e+02 - ME 4.207453923038474e-04 + ME 3.026560085299302e-04 Event 118 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2996,7 +2996,7 @@ Event 118 Batch 1 2 5.829230400014206e+02 5.307803371482089e+02 -3.192285892796672e+01 2.388565162167381e+02 3 3.965113090906140e+02 -5.470249758902820e+01 2.256187790844517e+02 -3.214420966810604e+02 4 5.205656509079653e+02 -4.760778395591807e+02 -1.936959201564850e+02 8.258558046432242e+01 - ME 7.464562943747175e-05 + ME 2.168340782914014e-05 Event 119 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3004,7 +3004,7 @@ Event 119 Batch 1 2 3.549567073991255e+02 2.281637891139605e+02 1.474502150787006e+02 2.284600261271838e+02 3 4.727085372220640e+02 7.463684946128350e+01 -3.092948822053327e+02 3.495988811576870e+02 4 6.723347553788102e+02 -3.028006385752440e+02 1.618446671266322e+02 -5.780589072848707e+02 - ME 1.455012849105755e-02 + ME 1.664672733965846e-03 Event 120 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3012,7 +3012,7 @@ Event 120 Batch 1 2 7.192117275853698e+02 4.094232477570927e+02 -5.552624156333899e+02 -2.032775518283800e+02 3 3.685061529232585e+02 -2.522084621786424e+02 1.741347663658646e+02 2.046087962197375e+02 4 4.122821194913712e+02 -1.572147855784500e+02 3.811276492675253e+02 -1.331244391357209e+00 - ME 9.281995463485567e-05 + ME 1.900262756274459e-05 Event 121 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3020,7 +3020,7 @@ Event 121 Batch 1 2 1.923953846467517e+02 -5.182078839520096e+01 -1.486351786617837e+02 -1.106262789198433e+02 3 6.582127150877787e+02 -3.509182841037630e+02 -1.191939510078701e+02 5.439606035624541e+02 4 6.493919002654695e+02 4.027390724989639e+02 2.678291296696539e+02 -4.333343246426108e+02 - ME 1.925188892577692e-03 + ME 5.360055113881300e-04 Event 122 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3028,7 +3028,7 @@ Event 122 Batch 1 2 6.905732817636248e+02 3.462508192534570e+02 -5.375670569609784e+02 -2.608131264380775e+02 3 7.097575386120018e+02 -2.677396278645660e+02 5.849221766424142e+02 2.998954860604125e+02 4 9.966917962437387e+01 -7.851119138889094e+01 -4.735511968143584e+01 -3.908235962233509e+01 - ME 5.007312135859238e-04 + ME 3.451011759976180e-05 Event 123 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3036,7 +3036,7 @@ Event 123 Batch 1 2 4.035126033432560e+02 2.481103298242076e+01 -3.878573016343356e+02 -1.085059780294573e+02 3 3.541388771651666e+02 1.572344474048876e+02 -3.105653677404273e+02 -6.512161875550808e+01 4 7.423485194915780e+02 -1.820454803873083e+02 6.984226693747627e+02 1.736275967849660e+02 - ME 2.043564129780385e-02 + ME 3.471230489499830e-03 Event 124 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3044,7 +3044,7 @@ Event 124 Batch 1 2 5.353042728143347e+02 -4.785252055946481e+02 -2.279396245170433e+02 7.488537693644093e+01 3 7.454081943698113e+02 6.785307544150930e+02 3.069354144183444e+02 -3.193811081429426e+01 4 2.192875328158541e+02 -2.000055488204448e+02 -7.899578990130104e+01 -4.294726612214667e+01 - ME 1.399009675490331e-04 + ME 6.765427234678898e-06 Event 125 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3052,7 +3052,7 @@ Event 125 Batch 1 2 7.351681880566981e+02 -1.932492970253984e+01 -4.393064933429818e+02 -5.891592456452273e+02 3 6.537497908129355e+02 -2.883189353576726e+01 3.454898907503182e+02 5.542510679217788e+02 4 1.110820211303664e+02 4.815682323830688e+01 9.381660259266363e+01 3.490817772344844e+01 - ME 1.431077255619906e-04 + ME 6.639428548470109e-05 Event 126 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3060,7 +3060,7 @@ Event 126 Batch 1 2 5.568747108147126e+02 1.149185667256990e+02 4.264979152236775e+02 -3.391204725116689e+02 3 6.934211462641822e+02 -1.939160042589616e+02 -6.294239612595663e+02 2.169215212257340e+02 4 2.497041429211053e+02 7.899743753326281e+01 2.029260460358889e+02 1.221989512859350e+02 - ME 3.344185566612618e-05 + ME 9.143592130512915e-06 Event 127 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3068,7 +3068,7 @@ Event 127 Batch 1 2 7.108931196972316e+02 4.270547743949553e+02 5.664613189451065e+02 -4.598718776252147e+01 3 4.445675167124290e+02 -1.247884466860518e+02 -4.129475031266345e+02 1.074359351009545e+02 4 3.445393635903407e+02 -3.022663277089035e+02 -1.535138158184720e+02 -6.144874733843321e+01 - ME 1.180920695556687e-04 + ME 1.427738327825488e-05 Event 128 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3076,7 +3076,7 @@ Event 128 Batch 1 2 5.312407894292422e+02 -7.192118124205533e+01 -4.398126160332176e+02 -2.891521793453568e+02 3 5.717192413787027e+02 3.434745903572437e+02 1.811915566412192e+02 4.195923218357252e+02 4 3.970399691920551e+02 -2.715534091151883e+02 2.586210593919984e+02 -1.304401424903685e+02 - ME 1.848006274423395e-04 + ME 3.532660248239223e-05 Event 129 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3084,7 +3084,7 @@ Event 129 Batch 1 2 6.644129951428383e+02 -3.595672586482287e+02 4.645590915434784e+02 3.103882489514914e+02 3 1.967652372382455e+02 -5.204943416929049e+01 8.794498000645085e+00 -1.895522930301724e+02 4 6.388217676189169e+02 4.116166928175192e+02 -4.733535895441232e+02 -1.208359559213191e+02 - ME 3.082956717278722e-04 + ME 9.192558188476414e-05 Event 130 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3092,7 +3092,7 @@ Event 130 Batch 1 2 7.302263990443511e+02 -1.919590472356484e+02 3.836584700935805e+02 -5.909217345563752e+02 3 4.156541164903923e+02 2.203243106780774e+02 -1.767969453775071e+02 3.049071707664833e+02 4 3.541194844652567e+02 -2.836526344242890e+01 -2.068615247160734e+02 2.860145637898919e+02 - ME 3.110012368642411e-05 + ME 2.258971422042701e-05 Event 131 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3100,7 +3100,7 @@ Event 131 Batch 1 2 2.308323688168238e+02 -1.780469473698228e+02 1.469011263880862e+02 1.710582294195638e+00 3 7.308075033948297e+02 5.219262643529272e+02 -3.840435213624620e+02 3.379099810545737e+02 4 5.383601277883465e+02 -3.438793169831044e+02 2.371423949743758e+02 -3.396205633487694e+02 - ME 1.061667055612532e-03 + ME 7.770640764079256e-05 Event 132 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3108,7 +3108,7 @@ Event 132 Batch 1 2 5.909630762789660e+02 -4.293852116769707e+02 -3.988922148105424e+02 7.583335995300355e+01 3 5.415993952096327e+02 2.260703809971038e+02 3.221145619770360e+02 -3.721079100067703e+02 4 3.674375285114020e+02 2.033148306798666e+02 7.677765283350686e+01 2.962745500537670e+02 - ME 3.321676569401813e-05 + ME 1.628447412544396e-05 Event 133 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3116,7 +3116,7 @@ Event 133 Batch 1 2 4.506052863582997e+02 2.189991325227701e+02 -3.914006430783634e+02 -4.347459771134355e+01 3 4.043998006859111e+02 3.160348074769272e+02 8.738893432792010e+01 2.366946839598570e+02 4 6.449949129557901e+02 -5.350339399996973e+02 3.040117087504433e+02 -1.932200862485142e+02 - ME 3.121497332919934e-04 + ME 8.705579101282482e-05 Event 134 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3124,7 +3124,7 @@ Event 134 Batch 1 2 7.151470882937614e+02 -1.041377497037516e+01 -4.186394096729767e+01 7.138447461686595e+02 3 3.416424731356660e+02 1.638631808685801e+02 3.081581136487586e+01 -2.981925940995343e+02 4 4.432104385705719e+02 -1.534494058982047e+02 1.104812960242199e+01 -4.156521520691248e+02 - ME 5.534325530265236e-02 + ME 6.342792451335309e-03 Event 135 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3132,7 +3132,7 @@ Event 135 Batch 1 2 7.115730144432832e+02 -3.219296530898238e+02 2.184242454110169e+02 -5.958089478700319e+02 3 1.627059459894212e+02 -6.880794311551747e+01 -3.259803939022061e+01 1.437917231708342e+02 4 6.257210395672955e+02 3.907375962053413e+02 -1.858262060207963e+02 4.520172246991979e+02 - ME 2.112989182930814e-04 + ME 1.277979532321233e-04 Event 136 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3140,7 +3140,7 @@ Event 136 Batch 1 2 7.195404287114588e+02 -4.369992732083461e+02 -4.270318019286997e+02 3.800182941743402e+02 3 6.668605996318223e+02 3.634158794560479e+02 4.690430049045651e+02 -3.043527845290675e+02 4 1.135989716567186e+02 7.358339375229815e+01 -4.201120297586535e+01 -7.566550964527264e+01 - ME 1.804344388349211e-03 + ME 7.515399240093053e-05 Event 137 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3148,7 +3148,7 @@ Event 137 Batch 1 2 6.722782806744999e+02 -6.045581260407005e+02 -2.538460778300668e+02 1.484241478840623e+02 3 6.869263774705689e+02 6.661257235671316e+02 1.481819739565761e+02 -7.865412297735662e+01 4 1.407953418549304e+02 -6.156759752643097e+01 1.056641038734908e+02 -6.977002490670534e+01 - ME 5.192812231664224e-04 + ME 2.119149330726453e-05 Event 138 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3156,7 +3156,7 @@ Event 138 Batch 1 2 6.463287544295633e+02 8.684709774942756e+01 2.409249839962013e+02 -5.934253049048401e+02 3 3.917330799270068e+02 1.767690441671677e+02 4.696120064017492e+01 3.464132742372293e+02 4 4.619381656434300e+02 -2.636161419165952e+02 -2.878861846363762e+02 2.470120306676108e+02 - ME 5.804753959762886e-05 + ME 4.203806696206548e-05 Event 139 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3164,7 +3164,7 @@ Event 139 Batch 1 2 2.994802063237944e+02 -1.272876183039153e+02 6.552211336810879e+00 2.710042891410713e+02 3 7.257546970836092e+02 -8.848613612326799e+00 5.127896146768584e+00 -7.256826352181574e+02 4 4.747650965925943e+02 1.361362319162416e+02 -1.168010748357900e+01 4.546783460770868e+02 - ME 1.724196014694060e-04 + ME 1.500396153249019e-04 Event 140 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3172,7 +3172,7 @@ Event 140 Batch 1 2 7.326756101999780e+02 5.655005379385240e+02 4.343799907428446e+02 1.683351270988810e+02 3 7.428339005597779e+02 -5.680473426214219e+02 -4.534832054058505e+02 -1.532233754243464e+02 4 2.449048924024402e+01 2.546804682897962e+00 1.910321466300584e+01 -1.511175167453447e+01 - ME 4.669436438173466e-03 + ME 1.024603362434272e-04 Event 141 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3180,7 +3180,7 @@ Event 141 Batch 1 2 7.363238871411332e+02 -6.772722174663238e+02 -2.824373475598683e+02 -6.086341204880675e+01 3 5.504260535970963e+02 4.650298533191528e+02 2.914345410616540e+02 4.221355560271704e+01 4 2.132500592617708e+02 2.122423641471711e+02 -8.997193501785816e+00 1.864985644608987e+01 - ME 7.300791864660033e-05 + ME 1.166401869382226e-05 Event 142 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3188,7 +3188,7 @@ Event 142 Batch 1 2 5.862280565156834e+02 4.248793793115829e+01 -2.479279504752411e+02 -5.295184989682986e+02 3 4.287264749982929e+02 -3.025296967755320e+02 2.785471849307642e+02 1.212173201341831e+02 4 4.850454684860405e+02 2.600417588443628e+02 -3.061923445551928e+01 4.083011788341197e+02 - ME 4.569028399965169e-05 + ME 1.949810022878841e-05 Event 143 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3196,7 +3196,7 @@ Event 143 Batch 1 2 2.464531733710510e+02 4.046044690030688e+01 -2.103865804466287e+02 1.218179201483223e+02 3 5.378449948854583e+02 4.607829603950880e+02 -2.747641700963839e+02 3.822241180409925e+01 4 7.157018317434903e+02 -5.012434072953949e+02 4.851507505430126e+02 -1.600403319524219e+02 - ME 1.284493741497843e-03 + ME 4.863434295951330e-04 Event 144 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3204,7 +3204,7 @@ Event 144 Batch 1 2 5.367418008803521e+02 -1.343004856786532e+02 -4.048537736989352e+02 -3.258044847458254e+02 3 6.294877130859599e+02 3.313530054622211e+02 5.282137272543231e+02 8.631468610520756e+01 4 3.337704860336884e+02 -1.970525197835678e+02 -1.233599535553879e+02 2.394897986406179e+02 - ME 2.612855607885159e-05 + ME 8.754930746282009e-06 Event 145 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3212,7 +3212,7 @@ Event 145 Batch 1 2 6.805380148481771e+01 -3.411514819754512e+01 -4.339750646760406e+01 -3.980116822894492e+01 3 6.831461500979880e+02 -3.834019790669201e+02 -2.756424954453614e+02 -4.936727656514237e+02 4 7.488000484171945e+02 4.175171272644653e+02 3.190400019129655e+02 5.334739338803686e+02 - ME 4.832444287218038e-01 + ME 4.117012994651258e-01 Event 146 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3220,7 +3220,7 @@ Event 146 Batch 1 2 5.031746658797123e+02 4.202301876294930e+02 2.767377273314875e+02 2.750283520766640e+00 3 4.317115817339341e+02 -1.098088257924671e+02 -5.455162180567243e+01 4.139336083717602e+02 4 5.651137523863538e+02 -3.104213618370259e+02 -2.221861055258150e+02 -4.166838918925268e+02 - ME 4.446377084117306e-03 + ME 1.122040831263755e-03 Event 147 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3228,7 +3228,7 @@ Event 147 Batch 1 2 4.251223043705630e+02 -4.223502783198938e+02 -4.694338569631599e+01 1.206377286808446e+01 3 5.457819748703678e+02 2.791608945230574e+02 -4.384138579515959e+02 -1.665546403390879e+02 4 5.290957207590696e+02 1.431893837968364e+02 4.853572436479118e+02 1.544908674710035e+02 - ME 5.820013407126093e-05 + ME 1.117959404473985e-05 Event 148 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3236,7 +3236,7 @@ Event 148 Batch 1 2 6.905785821272525e+02 6.249608768654489e+02 -6.243387159972350e+01 -2.870970082698929e+02 3 1.361638260920089e+02 2.862044352088506e+01 1.704210379179796e+01 1.320266050727362e+02 4 6.732575917807402e+02 -6.535813203863343e+02 4.539176780792534e+01 1.550704031971573e+02 - ME 9.573948308169230e-04 + ME 5.047601105033982e-04 Event 149 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3244,7 +3244,7 @@ Event 149 Batch 1 2 6.694705528096943e+02 -5.216497821741067e+02 -3.785079074709545e+02 1.811189935345937e+02 3 2.821401257551277e+02 1.148500354702071e-01 2.786662494166578e+02 -4.413795199872407e+01 4 5.483893214351779e+02 5.215349321386365e+02 9.984165805429673e+01 -1.369810415358697e+02 - ME 1.943324414096923e-04 + ME 3.486097449584098e-05 Event 150 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3252,7 +3252,7 @@ Event 150 Batch 1 2 4.637486188995366e+02 -4.033412855298819e+02 -2.279949807412008e+02 -1.992178895453991e+01 3 3.756800751656199e+02 6.230662615514293e+01 -2.632310737913946e+02 -2.606967683041707e+02 4 6.605713059348438e+02 3.410346593747391e+02 4.912260545325952e+02 2.806185572587107e+02 - ME 2.156945366470290e-04 + ME 4.211370643652993e-05 Event 151 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3260,7 +3260,7 @@ Event 151 Batch 1 2 3.821954355913596e+02 -2.528320044280690e+02 2.861764538722267e+02 1.588602445142563e+01 3 6.796189325418250e+02 2.911670128135291e+02 -4.900375979142738e+02 3.700902818893582e+02 4 4.381856318668152e+02 -3.833500838546018e+01 2.038611440420471e+02 -3.859763063407838e+02 - ME 8.197229841786387e-03 + ME 1.923941526207248e-04 Event 152 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3268,7 +3268,7 @@ Event 152 Batch 1 2 6.751133298339792e+02 -2.999578895043981e+02 -2.855974213275218e+02 -5.331391803034741e+02 3 4.976977783498468e+02 -3.003988119418482e+00 1.843802943840355e+02 4.622747685874795e+02 4 3.271888918161745e+02 3.029618776238166e+02 1.012171269434863e+02 7.086441171599445e+01 - ME 1.204579535049519e-04 + ME 6.977738125195056e-05 Event 153 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3276,7 +3276,7 @@ Event 153 Batch 1 2 1.729293620257127e+02 1.558357805102956e+02 -7.193392860849491e+01 2.110174585940510e+01 3 6.524550819255464e+02 2.410158908712478e+02 5.786677971610501e+02 1.809766692333240e+02 4 6.746155560487412e+02 -3.968516713815435e+02 -5.067338685525552e+02 -2.020784150927291e+02 - ME 5.985591428637023e-04 + ME 1.391654510317005e-04 Event 154 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3284,7 +3284,7 @@ Event 154 Batch 1 2 6.585658455851002e+02 -2.410305357139302e+02 -2.116446673272157e+02 -5.751693564652295e+02 3 5.764400833248005e+02 3.388133979948972e+02 3.092747322371399e+02 3.490527051926400e+02 4 2.649940710900988e+02 -9.778286228096688e+01 -9.763006490992416e+01 2.261166512725894e+02 - ME 3.655181799213059e-05 + ME 2.686434432328395e-05 Event 155 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3292,7 +3292,7 @@ Event 155 Batch 1 2 5.686586231936359e+02 -1.693366246265498e+02 -1.542203680657918e+02 5.204938187588979e+02 3 1.882190564276536e+02 -1.089234770645493e+02 -9.145416397064866e+01 1.232810822434430e+02 4 7.431223203787102e+02 2.782601016910992e+02 2.456745320364404e+02 -6.437749010023409e+02 - ME 6.696396361607482e-01 + ME 4.701119881405690e-01 Event 156 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3300,7 +3300,7 @@ Event 156 Batch 1 2 6.143652095725128e+02 2.879464601546110e+02 5.379391909976823e+02 -7.178351904348040e+01 3 6.287751645293085e+02 -4.584164185734781e+02 -4.225140875260598e+02 -8.181956094447702e+01 4 2.568596258981782e+02 1.704699584188668e+02 -1.154251034716223e+02 1.536030799879581e+02 - ME 2.899571701789112e-05 + ME 7.769660148731367e-06 Event 157 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3308,7 +3308,7 @@ Event 157 Batch 1 2 5.050842109798973e+02 4.185498850973046e+02 -1.305174306570672e+02 -2.507812875014723e+02 3 5.170424494038050e+02 -3.084595065654854e+02 3.930456446728388e+02 -1.330441599566699e+02 4 4.778733396162975e+02 -1.100903785318191e+02 -2.625282140157716e+02 3.838254474581424e+02 - ME 4.033251359625283e-05 + ME 1.243977993100618e-05 Event 158 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3316,7 +3316,7 @@ Event 158 Batch 1 2 4.312542366204098e+02 -3.114503370626313e+02 2.737030704635235e+02 1.185982013584742e+02 3 6.944315393047829e+02 2.166643175309468e+02 -6.173965008138002e+02 -2.326226495269423e+02 4 3.743142240748070e+02 9.478601953168439e+01 3.436934303502764e+02 1.140244481684682e+02 - ME 3.680357310121394e-05 + ME 5.864250821924803e-06 Event 159 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3324,7 +3324,7 @@ Event 159 Batch 1 2 5.860112473308646e+02 -1.581297551692178e+02 4.935632758462007e+02 2.734948907463652e+02 3 3.772013313646349e+02 -2.371132827856262e+02 -1.305099443644436e+02 -2.627266448837395e+02 4 5.367874213045002e+02 3.952430379548442e+02 -3.630533314817573e+02 -1.076824586262577e+01 - ME 1.030382455754272e-04 + ME 2.805189658646002e-05 Event 160 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3332,7 +3332,7 @@ Event 160 Batch 1 2 5.883409724804535e+02 -3.739819298758817e+02 -2.887651121595530e+02 3.505671490956299e+02 3 4.300332553173178e+02 1.788055146224819e+02 3.829208006453583e+02 7.955406370837679e+01 4 4.816257722022287e+02 1.951764152533999e+02 -9.415568848580530e+01 -4.301212128040066e+02 - ME 9.797271586219467e-03 + ME 2.307516153071828e-04 Event 161 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3340,7 +3340,7 @@ Event 161 Batch 1 2 6.868305165969147e+02 4.119610488151656e+00 5.515184990814985e+02 4.093244831537709e+02 3 3.260821955312833e+02 -1.956999890649130e+02 -2.483451099187458e+02 -7.972338993006402e+01 4 4.870872878718022e+02 1.915803785767614e+02 -3.031733891627526e+02 -3.296010932237070e+02 - ME 1.075603053132144e-03 + ME 9.860610555787331e-05 Event 162 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3348,7 +3348,7 @@ Event 162 Batch 1 2 2.159818802305119e+02 -2.018126805027919e+02 4.096951387107715e+01 -6.512536763314942e+01 3 6.870078865581224e+02 4.896730732821633e+02 -2.356527215298929e+02 -4.203188222421333e+02 4 5.970102332113654e+02 -2.878603927793715e+02 1.946832076588156e+02 4.854441898752826e+02 - ME 5.344822454174306e-05 + ME 2.809071549115161e-05 Event 163 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3356,7 +3356,7 @@ Event 163 Batch 1 2 4.889699854403287e+02 -4.067839821807834e+01 -2.740835242435768e+02 4.028835269878222e+02 3 4.282392920294498e+02 4.007468150560176e+02 -8.832740907173851e+01 -1.224301852772270e+02 4 5.827907225302220e+02 -3.600684168379390e+02 3.624109333153153e+02 -2.804533417105952e+02 - ME 4.336231422638298e-04 + ME 1.173701793303044e-04 Event 164 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3364,7 +3364,7 @@ Event 164 Batch 1 2 6.224346677404150e+02 -1.282049393554146e+02 5.480608628970117e+02 -2.657399098565701e+02 3 7.444531740822750e+02 1.794330131141779e+02 -6.708967511266460e+02 2.681638893170603e+02 4 1.331121581773107e+02 -5.122807375876333e+01 1.228358882296343e+02 -2.423979460490191e+00 - ME 1.368953177788070e-04 + ME 1.571413941583783e-05 Event 165 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3372,7 +3372,7 @@ Event 165 Batch 1 2 6.980339706506675e+02 -5.154669325341684e+01 -4.947847840614098e+02 4.896757907618869e+02 3 1.362964882116331e+02 4.252532371924361e+01 -5.641238783031591e+01 -1.165588780002596e+02 4 6.656695411377010e+02 9.021369534174053e+00 5.511971718917263e+02 -3.731169127616273e+02 - ME 1.450267418906797e-03 + ME 4.238311927693088e-04 Event 166 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3380,7 +3380,7 @@ Event 166 Batch 1 2 3.060640747281171e+02 -1.981167412190918e+02 -9.095380261170779e+01 -2.148310510107333e+02 3 5.580104478575086e+02 -3.585720992432471e+02 -1.558095186186280e+02 3.981521109704927e+02 4 6.359254774143739e+02 5.566888404623389e+02 2.467633212303362e+02 -1.833210599597597e+02 - ME 3.000804338470548e-04 + ME 1.099447007687216e-04 Event 167 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3388,7 +3388,7 @@ Event 167 Batch 1 2 2.833153623322893e+02 2.526850217013923e+02 8.687924899084067e+01 9.417998957332070e+01 3 6.595685044563415e+02 -8.780626893611850e+01 -2.875856231737449e+02 -5.870393347553995e+02 4 5.571161332113688e+02 -1.648787527652738e+02 2.007063741829043e+02 4.928593451820789e+02 - ME 7.367447958524992e-05 + ME 4.244421486768831e-05 Event 168 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3396,7 +3396,7 @@ Event 168 Batch 1 2 6.026267479353969e+02 -5.987968578530475e+02 5.775180228477150e+00 6.758674164241529e+01 3 4.991211680715713e+02 3.812575567959843e+02 3.220701575873951e+02 -5.952259631185711e+00 4 3.982520839930309e+02 2.175393010570631e+02 -3.278453378158730e+02 -6.163448201122968e+01 - ME 9.606399998327532e-05 + ME 1.203107058680061e-05 Event 169 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3404,7 +3404,7 @@ Event 169 Batch 1 2 5.510662376679772e+02 -9.251111075413947e+01 -5.291920243323356e+02 -1.227660134875281e+02 3 5.034535790022877e+02 -2.816014265681677e+02 3.283802195198170e+02 2.575511098657944e+02 4 4.454801833297348e+02 3.741125373223072e+02 2.008118048125185e+02 -1.347850963782663e+02 - ME 1.532484123791625e-04 + ME 2.085195230877358e-05 Event 170 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3412,7 +3412,7 @@ Event 170 Batch 1 2 2.814808559369750e+02 3.658097943502287e+01 -1.412301634042880e+02 -2.407225480659935e+02 3 6.646522150540470e+02 2.753499086551696e+02 -1.631412967142655e+02 5.825203104495404e+02 4 5.538669290089779e+02 -3.119308880901926e+02 3.043714601185535e+02 -3.417977623835468e+02 - ME 7.823510217753851e-04 + ME 2.587160315460459e-04 Event 171 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3420,7 +3420,7 @@ Event 171 Batch 1 2 1.777965289077954e+02 -6.143496808852239e+01 -1.603735842336773e+00 1.668375809551635e+02 3 7.439290290569696e+02 2.163074211412066e+01 -1.907051550939623e+01 -7.433699124308462e+02 4 5.782744420352348e+02 3.980422597440174e+01 2.067425135173305e+01 5.765323314756826e+02 - ME 2.063755640794395e-03 + ME 1.981167274383509e-03 Event 172 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3428,7 +3428,7 @@ Event 172 Batch 1 2 1.369499454750680e+02 -1.250080331667568e+01 -3.518152151649629e+01 -1.317622025690455e+02 3 6.692885586315896e+02 -2.346283187163472e+02 -6.130705295376303e+02 1.305421486874673e+02 4 6.937614958933425e+02 2.471291220330227e+02 6.482520510541266e+02 1.220053881578238e+00 - ME 5.039586079692636e-04 + ME 1.548169060571347e-04 Event 173 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3436,7 +3436,7 @@ Event 173 Batch 1 2 7.088772083623137e+02 4.973951266878932e+01 3.171232495758680e+01 -7.064185769505260e+02 3 5.785136264307895e+02 8.584813303397833e+01 5.766505028397120e+01 5.691949191590089e+02 4 2.126091652068944e+02 -1.355876457027672e+02 -8.937737524155732e+01 1.372236577915166e+02 - ME 1.743760900867476e-04 + ME 1.732961413682620e-04 Event 174 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3444,7 +3444,7 @@ Event 174 Batch 1 2 4.367208701713482e+02 -3.923163287174704e+01 4.325755195957351e+02 -4.543585887727652e+01 3 3.528978856725088e+02 9.622572295106905e+01 1.987077746703234e+02 -2.753048278549415e+02 4 7.103812441561454e+02 -5.699409007932221e+01 -6.312832942660567e+02 3.207406867322186e+02 - ME 9.353677491192390e-04 + ME 1.541208918572365e-04 Event 175 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3452,7 +3452,7 @@ Event 175 Batch 1 2 6.418562164876806e+02 1.962785648722137e+02 -6.110736372974047e+02 -6.567908015856712e+00 3 4.843421844702149e+02 -1.886631806266161e+02 3.569879071908527e+02 -2.674942804112337e+02 4 3.738015990421035e+02 -7.615384245597569e+00 2.540857301065516e+02 2.740621884270906e+02 - ME 3.029111560812189e-05 + ME 1.279055979705581e-05 Event 176 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3460,7 +3460,7 @@ Event 176 Batch 1 2 6.288652703123263e+02 4.005522031116294e+02 3.691482793515075e+02 3.142594606996526e+02 3 7.209127580467475e+02 -4.124575135572966e+02 -5.165298058232565e+02 -2.877341896975221e+02 4 1.502219716409257e+02 1.190531044566666e+01 1.473815264717492e+02 -2.652527100213051e+01 - ME 1.719274466020296e-04 + ME 1.300720357566141e-05 Event 177 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3468,7 +3468,7 @@ Event 177 Batch 1 2 4.716578040000077e+02 -4.521622645932388e+02 -1.012739918234145e+01 1.338200520767543e+02 3 3.021382980750606e+02 -2.714821202364266e+02 6.773215888881064e+01 -1.140059832109250e+02 4 7.262038979249317e+02 7.236443848296653e+02 -5.760475970646905e+01 -1.981406886582933e+01 - ME 2.354271252348000e-03 + ME 6.442260552556652e-04 Event 178 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3476,7 +3476,7 @@ Event 178 Batch 1 2 7.350088877399502e+02 -3.684484945749095e+02 -2.561732769425163e+02 -5.821159885132296e+02 3 1.415495174310248e+02 7.181268644032879e+01 1.095010133995263e+02 5.374692563910759e+01 4 6.234415948290248e+02 2.966358081345808e+02 1.466722635429900e+02 5.283690628741219e+02 - ME 1.035408980291912e-04 + ME 6.828487731379645e-05 Event 179 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3484,7 +3484,7 @@ Event 179 Batch 1 2 7.426064621425413e+02 6.748632301344054e+01 7.201624948975951e+02 -1.681544967131679e+02 3 5.821031882499326e+02 8.394276920418550e-01 -5.588194474899291e+02 1.629854049874919e+02 4 1.752903496075256e+02 -6.832575070548241e+01 -1.613430474076661e+02 5.169091725675888e+00 - ME 9.197132478706931e-05 + ME 1.412410550503903e-05 Event 180 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3492,7 +3492,7 @@ Event 180 Batch 1 2 6.099515195485484e+02 2.272495331206023e+02 1.762692760011278e+02 -5.378918555193875e+02 3 5.718889655176699e+02 4.324570510796980e+01 -3.278409766521432e+02 4.665909256493895e+02 4 3.181595149337819e+02 -2.704952382285720e+02 1.515717006510154e+02 7.130092986999803e+01 - ME 5.401477812349802e-05 + ME 3.043963963928669e-05 Event 181 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3500,7 +3500,7 @@ Event 181 Batch 1 2 1.206370886915177e+02 -8.151225636567759e+01 1.767749325039422e+01 8.715827822142556e+01 3 6.451493408002739e+02 -6.748216257939080e+01 4.373428479320614e+02 4.694625256943417e+02 4 7.342135705082084e+02 1.489944189450684e+02 -4.550203411824557e+02 -5.566208039157672e+02 - ME 7.131653341377736e-02 + ME 2.625479922313071e-02 Event 182 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3508,7 +3508,7 @@ Event 182 Batch 1 2 4.626866082364760e+02 -3.084610429505738e+02 3.306629079434072e+02 9.794245113140897e+01 3 4.974966719253473e+02 3.582955998671217e+02 1.664640547097976e+02 -3.023523113558579e+02 4 5.398167198381765e+02 -4.983455691654795e+01 -4.971269626532048e+02 2.044098602244489e+02 - ME 5.959042767905828e-05 + ME 1.414799589613471e-05 Event 183 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3516,7 +3516,7 @@ Event 183 Batch 1 2 3.304723045950491e+02 3.244647182058462e+00 3.209425641774955e+02 7.872284845075714e+01 3 4.379804819457451e+02 2.312428523500660e+02 3.131807483468383e+02 2.006775141049615e+02 4 7.315472134592065e+02 -2.344874995321247e+02 -6.341233125243344e+02 -2.794003625557186e+02 - ME 4.899988668912175e-03 + ME 2.330806393221907e-03 Event 184 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3524,7 +3524,7 @@ Event 184 Batch 1 2 7.470051035005908e+02 -4.953964753944513e+02 -4.028924750569613e+02 3.876552725878485e+02 3 2.183325716323390e+02 1.119040172022777e+02 1.451703047217021e+02 -1.186262424448778e+02 4 5.346623248670695e+02 3.834924581921736e+02 2.577221703352594e+02 -2.690290301429710e+02 - ME 5.441344453720516e-04 + ME 7.987999480474686e-05 Event 185 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3532,7 +3532,7 @@ Event 185 Batch 1 2 4.448583927494090e+02 2.810173563272025e+02 -3.384637477435971e+02 6.610995769032235e+01 3 6.236443795626774e+02 -1.690803760724666e+02 5.125139620028374e+02 3.125277225134823e+02 4 4.314972276879136e+02 -1.119369802547359e+02 -1.740502142592404e+02 -3.786376802038046e+02 - ME 6.949230823829164e-03 + ME 1.405605442011058e-04 Event 186 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3540,7 +3540,7 @@ Event 186 Batch 1 2 6.802792190696962e+02 -1.681815241656754e+02 5.427923640013703e+02 3.739936368565512e+02 3 6.331554869749547e+02 3.172201723440435e+02 -4.588808692389625e+02 -2.994755095011972e+02 4 1.865652939553488e+02 -1.490386481783679e+02 -8.391149476240778e+01 -7.451812735535422e+01 - ME 3.276943053321406e-04 + ME 3.045129627255903e-05 Event 187 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3548,7 +3548,7 @@ Event 187 Batch 1 2 7.472897115267965e+02 -6.988402471604775e+02 -2.391684329048669e+02 1.134137672609268e+02 3 6.826908170748527e+02 6.328852277257668e+02 2.212839847556716e+02 -1.286718241709738e+02 4 7.001947139835140e+01 6.595501943471052e+01 1.788444814919547e+01 1.525805691004725e+01 - ME 1.461490870437387e-04 + ME 3.485925693242860e-05 Event 188 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3556,7 +3556,7 @@ Event 188 Batch 1 2 6.496068877140275e+02 -5.024316730938291e+02 -3.980061777252906e+02 -1.055585379310702e+02 3 4.885976180718368e+02 4.424928723138696e+02 1.459942636040002e+02 -1.470148473169288e+02 4 3.617954942141354e+02 5.993880077995960e+01 2.520119141212904e+02 2.525733852479991e+02 - ME 2.843805826594158e-05 + ME 1.006519408431335e-05 Event 189 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3564,7 +3564,7 @@ Event 189 Batch 1 2 4.082379946778654e+02 2.679237131173331e+02 -7.718184435750955e+01 2.981913934867987e+02 3 5.864211573889181e+02 -5.780822197382728e+02 -6.394893886953379e+01 7.497502433004084e+01 4 5.053408479332167e+02 3.101585066209396e+02 1.411307832270433e+02 -3.731664178168398e+02 - ME 1.937644878671120e-03 + ME 1.322787627040098e-04 Event 190 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3572,7 +3572,7 @@ Event 190 Batch 1 2 6.472516823166364e+02 6.463779961822676e+02 -3.289365889632791e+01 6.945035458816692e+00 3 4.318767277050750e+02 -3.286790725415815e+02 -7.183748821760624e+00 -2.800642229191639e+02 4 4.208715899782885e+02 -3.176989236406859e+02 4.007740771808847e+01 2.731191874603472e+02 - ME 3.409584379294133e-05 + ME 1.272332211942340e-05 Event 191 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3580,7 +3580,7 @@ Event 191 Batch 1 2 6.757500036387052e+02 6.222744522021635e+02 -2.261571472854044e+02 1.351499844096745e+02 3 3.644673602666567e+02 -2.020102809038697e+02 1.114149692296405e+02 -2.821613151026251e+02 4 4.597826360946380e+02 -4.202641712982938e+02 1.147421780557637e+02 1.470113306929507e+02 - ME 5.389305783035389e-05 + ME 1.560703181590231e-05 Event 192 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3588,7 +3588,7 @@ Event 192 Batch 1 2 7.394562478491531e+02 -7.307873850878615e+02 3.988568028534699e+01 1.056147375500683e+02 3 8.098058518630978e+01 5.419286926826393e+01 4.244928426361276e+00 -6.002473390399248e+01 4 6.795631669645365e+02 6.765945158195976e+02 -4.413060871170821e+01 -4.559000364607596e+01 - ME 4.204295748489254e-04 + ME 1.231033846344155e-04 Event 193 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3596,7 +3596,7 @@ Event 193 Batch 1 2 5.607395612273153e+02 -3.164229781907934e+02 -3.517992386171808e+02 -3.009030576558548e+02 3 3.741643617741927e+02 -2.156271676189966e+02 1.666697084176705e+02 2.563690747778811e+02 4 5.650960769984922e+02 5.320501458097899e+02 1.851295301995104e+02 4.453398287797368e+01 - ME 9.141090879934244e-05 + ME 3.026844143728605e-05 Event 194 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3604,7 +3604,7 @@ Event 194 Batch 1 2 5.729373416862012e+02 -2.155045544874616e+02 -1.679805246197324e+02 5.035846779262559e+02 3 2.831035485618876e+02 -2.543279085173982e+02 1.042261812492671e+02 -6.783684323208054e+01 4 6.439591097519118e+02 4.698324630048598e+02 6.375434337046515e+01 -4.357478346941756e+02 - ME 1.781231321893996e-03 + ME 5.497724763810379e-04 Event 195 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3612,7 +3612,7 @@ Event 195 Batch 1 2 5.572874060171201e+02 -5.433144409127298e+02 3.646295232533866e+01 1.185290019729285e+02 3 6.765845568040619e+02 5.574999049241243e+02 -1.212989803269169e+01 -3.831623469093195e+02 4 2.661280371788181e+02 -1.418546401139455e+01 -2.433305429264712e+01 2.646333449363910e+02 - ME 3.395618115588225e-04 + ME 3.378534889977447e-04 Event 196 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3620,7 +3620,7 @@ Event 196 Batch 1 2 5.405888343305829e+02 3.940239871950471e+02 -8.826690628749978e+01 -3.594305754554688e+02 3 6.983754392688073e+02 -3.888370902622853e+02 -5.513072771506098e+01 5.774898910559966e+02 4 2.610357264006097e+02 -5.186896932761887e+00 1.433976340025607e+02 -2.180593156005277e+02 - ME 5.539073969003598e-03 + ME 2.676929502290073e-04 Event 197 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3628,7 +3628,7 @@ Event 197 Batch 1 2 2.783346334111661e+02 2.282410890438732e+02 -1.474467226896361e+02 6.029624695020830e+01 3 6.434654504578666e+02 1.172104173128919e+01 6.205939438823057e+02 1.696277097949658e+02 4 5.781999161309674e+02 -2.399621307751624e+02 -4.731472211926695e+02 -2.299239567451741e+02 - ME 3.321087064690878e-04 + ME 4.280180350752636e-05 Event 198 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3636,7 +3636,7 @@ Event 198 Batch 1 2 4.349536439683943e+02 1.774777254208009e+02 -9.709992209949135e+01 3.850427697141142e+02 3 4.134500153047116e+02 7.095914770071803e+01 -4.041194890923881e+02 -5.092301099466194e+01 4 6.515963407268921e+02 -2.484368731215197e+02 5.012194111918782e+02 -3.341197587194521e+02 - ME 7.849443582399766e-04 + ME 2.926862112764983e-04 Event 199 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3644,7 +3644,7 @@ Event 199 Batch 1 2 6.682109290882580e+02 2.136897997740939e+02 -5.035763266519416e+02 3.837361052354048e+02 3 1.424120473397155e+02 8.952788458880865e+01 -4.686863299276860e+01 -1.003458038481504e+02 4 6.893770235720265e+02 -3.032176843629025e+02 5.504449596447103e+02 -2.833903013872543e+02 - ME 1.167594898598604e-03 + ME 4.183851150998592e-04 Event 200 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3652,7 +3652,7 @@ Event 200 Batch 1 2 5.959952693237885e+02 -4.878566955018547e+02 -2.510837703973929e+01 -3.414319479966339e+02 3 4.479637599869168e+02 4.499951041477978e+01 7.146287716862105e+01 4.399313940955211e+02 4 4.560409706892941e+02 4.428571850870749e+02 -4.635450012888173e+01 -9.849944609888662e+01 - ME 5.545496796633981e-04 + ME 3.228844805909175e-04 Event 201 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3660,7 +3660,7 @@ Event 201 Batch 1 2 5.203096708642927e+02 -1.112696379946441e+02 1.367824427202020e+02 4.895219960522141e+02 3 2.871951825199399e+02 -2.582762312778227e+02 1.200876310962787e+02 3.678888524092984e+01 4 6.924951466157675e+02 3.695458692724667e+02 -2.568700738164807e+02 -5.263108812931440e+02 - ME 6.577575910850049e-03 + ME 2.285182473348715e-03 Event 202 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3668,7 +3668,7 @@ Event 202 Batch 1 2 2.158792376054218e+02 2.112389782008981e+01 -7.195062193526132e+01 -2.024369881546198e+02 3 5.463652944256570e+02 2.787950008966254e+02 -3.108926376755554e+02 -3.523267663221479e+02 4 7.377554679689213e+02 -2.999188987167153e+02 3.828432596108168e+02 5.547637544767679e+02 - ME 8.695282964050810e-03 + ME 1.952686275320307e-03 Event 203 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3676,7 +3676,7 @@ Event 203 Batch 1 2 7.124273471334275e+02 4.879265047129839e+02 -1.059167473143779e+02 -5.081949365946950e+02 3 6.746108110440506e+02 -5.248642991835990e+02 4.352799102536777e+01 4.215714978711400e+02 4 1.129618418225217e+02 3.693779447061509e+01 6.238875628901040e+01 8.662343872355494e+01 - ME 5.361938367485652e-05 + ME 4.211918129012132e-05 Event 204 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3684,7 +3684,7 @@ Event 204 Batch 1 2 7.084787759842808e+02 4.992472551829619e+02 -4.528122431715626e+02 -2.183012291454193e+02 3 1.034373169902747e+02 -8.959882065299325e+01 -3.938861547415055e+01 -3.346441176487074e+01 4 6.880839070254444e+02 -4.096484345299685e+02 4.922008586457131e+02 2.517656409102901e+02 - ME 2.988048706021647e-04 + ME 1.033102023766027e-04 Event 205 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3692,7 +3692,7 @@ Event 205 Batch 1 2 6.496569846879349e+02 -5.869603795046561e+02 -2.345911576090251e+02 1.499956646614410e+02 3 2.543878192344406e+02 -1.851019090219859e+00 2.474675926596849e+02 -5.890268997594536e+01 4 5.959551960776247e+02 5.888113985948760e+02 -1.287643505065981e+01 -9.109297468549572e+01 - ME 1.871447246980874e-04 + ME 4.134215827558992e-05 Event 206 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3700,7 +3700,7 @@ Event 206 Batch 1 2 6.172060642836410e+02 2.978040691523503e+02 4.166709400833434e+02 3.444435946201744e+02 3 7.205754982426181e+02 -2.468045809177361e+02 -5.690387091428452e+02 -3.667580878490107e+02 4 1.622184374737409e+02 -5.099948823461420e+01 1.523677690595017e+02 2.231449322883641e+01 - ME 7.356489425273393e-05 + ME 1.138691716042452e-05 Event 207 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3708,7 +3708,7 @@ Event 207 Batch 1 2 5.250113096394139e+02 -1.091977068802181e+02 -4.322753509449321e+02 2.772196909074646e+02 3 5.240251005653129e+02 3.541948269240045e+02 3.738549241960732e+02 9.685466564450643e+01 4 4.509635897952731e+02 -2.449971200437864e+02 5.842042674885889e+01 -3.740743565519710e+02 - ME 3.378615964480245e-03 + ME 9.518274156960593e-05 Event 208 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3716,7 +3716,7 @@ Event 208 Batch 1 2 4.449444343820048e+02 1.928662436733418e+02 -3.595193210859464e+02 1.775500478872298e+02 3 4.894053462810564e+02 -2.195789585225567e+02 2.295326432211599e+02 3.723136307450180e+02 4 5.656502193369389e+02 2.671271484921488e+01 1.299866778647865e+02 -5.498636786322478e+02 - ME 2.068943926258950e-01 + ME 2.179806976662403e-03 Event 209 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3724,7 +3724,7 @@ Event 209 Batch 1 2 4.949423498078044e+02 -2.830370809537592e+02 -1.684680620467476e+02 -3.694271951395289e+02 3 6.326444171345161e+02 3.898538983719823e+02 -1.748162179498052e+02 4.665749526039372e+02 4 3.724132330576786e+02 -1.068168174182231e+02 3.432842799965525e+02 -9.714775746440780e+01 - ME 1.473942246791387e-04 + ME 3.638076645868775e-05 Event 210 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3732,7 +3732,7 @@ Event 210 Batch 1 2 5.469464199121014e+02 -4.947084169679945e+02 2.319240083666633e+02 -2.500445517953792e+01 3 2.929141603572806e+02 -5.602902696925145e+01 2.099470855189298e+01 2.867379913571110e+02 4 6.601394197306178e+02 5.507374439372461e+02 -2.529187169185561e+02 -2.617335361775729e+02 - ME 1.577330101330874e-03 + ME 7.792286450853471e-04 Event 211 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3740,7 +3740,7 @@ Event 211 Batch 1 2 5.484404249965427e+02 1.659778109685243e+01 3.514591842057613e+02 -4.206992456262192e+02 3 4.635537606517395e+02 -3.607884938122542e+02 -3.140996451540818e+01 2.893564685231623e+02 4 4.880058143517181e+02 3.441907127154018e+02 -3.200492196903532e+02 1.313427771030569e+02 - ME 4.999214184618137e-05 + ME 1.717788621912363e-05 Event 212 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3748,7 +3748,7 @@ Event 212 Batch 1 2 6.930853388432640e+02 -3.424793196872474e+02 -8.152110066892747e+01 5.970171795281683e+02 3 9.131624224772825e+01 6.738328155058525e+01 1.365968298972706e+01 6.009627714210347e+01 4 7.155984189090078e+02 2.750960381366621e+02 6.786141767920034e+01 -6.571134566702718e+02 - ME 3.224436999651524e-01 + ME 4.440767413899675e-02 Event 213 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3756,7 +3756,7 @@ Event 213 Batch 1 2 7.316448870278512e+02 4.203233031264803e+02 4.913598772661251e+02 -3.423419819067778e+02 3 4.750162603483208e+02 -1.726357548525294e+02 -3.708603862154638e+02 2.414537588813190e+02 4 2.933388526238279e+02 -2.476875482739507e+02 -1.204994910506614e+02 1.008882230254589e+02 - ME 4.008080891216109e-05 + ME 1.166473784051930e-05 Event 214 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3764,7 +3764,7 @@ Event 214 Batch 1 2 4.805779599533694e+02 3.904513572450257e+02 -1.742898429406511e+02 2.193763065287195e+02 3 6.164938851206517e+02 -5.563771061772993e+02 2.227142270499353e+02 1.445946028815716e+02 4 4.029281549259790e+02 1.659257489322735e+02 -4.842438410928419e+01 -3.639709094102910e+02 - ME 1.130096726278085e-02 + ME 1.644694060635318e-04 Event 215 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3772,7 +3772,7 @@ Event 215 Batch 1 2 4.610896439725640e+02 -3.106576460930037e+02 -3.050258363865880e+02 -1.518378274323046e+02 3 7.153470686812809e+02 2.726436938726979e+02 6.046054769368644e+02 2.680280994976061e+02 4 3.235632873461531e+02 3.801395222030658e+01 -2.995796405502758e+02 -1.161902720653026e+02 - ME 2.130646114222361e-04 + ME 1.638803663744001e-05 Event 216 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3780,7 +3780,7 @@ Event 216 Batch 1 2 5.309452696424389e+02 -4.912950836090372e+02 -3.608909251460832e+01 -1.980646298023531e+02 3 6.627369363365399e+02 4.479096066616000e+02 2.308759280187052e+02 4.304573578259469e+02 4 3.063177940210212e+02 4.338547694743724e+01 -1.947868355040969e+02 -2.323927280235938e+02 - ME 1.881406502208647e-03 + ME 7.684209531203918e-05 Event 217 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3788,7 +3788,7 @@ Event 217 Batch 1 2 4.608032244164870e+02 2.215832851737383e+02 3.318832460795877e+02 -2.304212888079594e+02 3 3.107022283044695e+02 -4.724697178681157e+01 2.830528592337836e+02 -1.190994425256424e+02 4 7.284945472790432e+02 -1.743363133869267e+02 -6.149361053133712e+02 3.495207313336019e+02 - ME 2.894775763457067e-03 + ME 4.426756984161849e-04 Event 218 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3796,7 +3796,7 @@ Event 218 Batch 1 2 6.336891602166270e+02 5.249943224110900e+02 1.648031440577737e+02 -3.142973702098814e+02 3 5.195346944320743e+02 -3.655895580768890e+02 -3.610279413409480e+02 7.693763263116504e+01 4 3.467761453512956e+02 -1.594047643342018e+02 1.962247972831736e+02 2.373597375787177e+02 - ME 2.703962034458943e-05 + ME 8.957256945094420e-06 Event 219 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3804,7 +3804,7 @@ Event 219 Batch 1 2 2.579228498517417e+02 -4.166553381892272e+01 1.191899344508913e+02 2.249042891828000e+02 3 7.453266221408651e+02 -3.354388163550532e+01 -3.947818065141064e+02 -6.312954196904914e+02 4 4.967505280073930e+02 7.520941545442813e+01 2.755918720632151e+02 4.063911305076915e+02 - ME 6.103184694489295e-05 + ME 4.019449398167179e-05 Event 220 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3812,7 +3812,7 @@ Event 220 Batch 1 2 4.940336288355577e+02 -2.383755021420815e+02 -2.918661661143953e+02 3.194690712363630e+02 3 7.129224521449780e+02 2.727447507998269e+02 2.535039959962389e+02 -6.079510240944473e+02 4 2.930439190194635e+02 -3.436924865774512e+01 3.836217011815621e+01 2.884819528580837e+02 - ME 1.761519882509421e-04 + ME 1.677977866215262e-04 Event 221 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3820,7 +3820,7 @@ Event 221 Batch 1 2 3.305414381337777e+02 -2.712796684963201e+02 -1.199910663213094e+02 -1.458325333632650e+02 3 7.388441803280767e+02 5.510455284380058e+02 4.375213740715825e+02 2.254209298704556e+02 4 4.306143815381457e+02 -2.797658599416856e+02 -3.175303077502730e+02 -7.958839650719051e+01 - ME 1.338118621913618e-04 + ME 1.392897982206581e-05 Event 222 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3828,7 +3828,7 @@ Event 222 Batch 1 2 4.657562074797755e+02 2.823280548971349e+02 2.956503281023745e+02 2.231828795335844e+02 3 4.791948192186352e+02 -3.228825926298714e+02 2.575611801233854e+02 -2.429747818931873e+02 4 5.550489733015891e+02 4.055453773273638e+01 -5.532115082257600e+02 1.979190235960287e+01 - ME 9.040551632672907e-05 + ME 2.328731171682892e-05 Event 223 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3836,7 +3836,7 @@ Event 223 Batch 1 2 1.612164685986321e+02 -4.527922182271191e+01 -1.095260585492910e+01 1.543391792239740e+02 3 6.984218503485876e+02 -4.629950983513680e+02 2.605715575888556e+02 -4.533553609726805e+02 4 6.403616810527805e+02 5.082743201740799e+02 -2.496189517339264e+02 2.990161817487066e+02 - ME 4.148580235863498e-04 + ME 2.446487784841432e-04 Event 224 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3844,7 +3844,7 @@ Event 224 Batch 1 2 1.663853414671972e+02 -1.350882138037309e+02 9.706071747767010e+01 3.804401292344658e+00 3 6.436745581417563e+02 -4.469273298203079e+02 -4.412749113764766e+02 -1.408877256838118e+02 4 6.899401003910457e+02 5.820155436240389e+02 3.442141938988058e+02 1.370833243914657e+02 - ME 3.449215697364171e-04 + ME 9.431632941984795e-05 Event 225 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3852,7 +3852,7 @@ Event 225 Batch 1 2 6.702356777533546e+02 6.117158080352369e+02 -2.649249521350114e+02 -6.952987609335720e+01 3 6.901224376513153e+02 -6.564819557015361e+02 1.560869289536550e+02 1.446972404640001e+02 4 1.396418845953297e+02 4.476614766629927e+01 1.088380231813564e+02 -7.516736437064299e+01 - ME 6.407468428023662e-04 + ME 2.456039108263569e-05 Event 226 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3860,7 +3860,7 @@ Event 226 Batch 1 2 7.307777643673112e+02 -4.569648094661606e+02 4.416236342013199e+02 -3.608155616351098e+02 3 1.446420186345137e+02 4.133161435221925e+01 -3.411742569426914e+01 1.343466131828505e+02 4 6.245802169981752e+02 4.156331951139413e+02 -4.075062085070508e+02 2.264689484522593e+02 - ME 4.858390443010437e-04 + ME 2.774761612267077e-04 Event 227 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3868,7 +3868,7 @@ Event 227 Batch 1 2 7.408615397889290e+02 -4.398089081634772e+02 -5.325812259979131e+02 2.679574278743413e+02 3 4.035753807128123e+02 3.000971513323747e+02 2.468113220276344e+02 -1.090823496201683e+02 4 3.555630794982585e+02 1.397117568311025e+02 2.857699039702786e+02 -1.588750782541728e+02 - ME 3.215647103618368e-04 + ME 3.077346064218035e-05 Event 228 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3876,7 +3876,7 @@ Event 228 Batch 1 2 5.775455372723294e+02 -3.656199842755111e+02 -6.289501053880601e+01 4.426342647953073e+02 3 3.247306314578497e+02 8.776645762339835e+01 3.116872137482897e+02 2.445634292125525e+01 4 5.977238312698206e+02 2.778535266521127e+02 -2.487922032094836e+02 -4.670906077165625e+02 - ME 3.156934429573604e-03 + ME 3.399241079583280e-04 Event 229 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3884,7 +3884,7 @@ Event 229 Batch 1 2 3.665477125629453e+02 -2.081014917770363e+02 2.317985113364040e+02 -1.931850016112187e+02 3 6.187040836990479e+02 -2.134593092471877e+02 -3.484367286517815e+02 4.645661552545953e+02 4 5.147482037380067e+02 4.215608010242241e+02 1.166382173153775e+02 -2.713811536433765e+02 - ME 4.392210547845218e-04 + ME 8.330968691049859e-05 Event 230 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3892,7 +3892,7 @@ Event 230 Batch 1 2 5.913978529013565e+02 -4.986092821675885e+02 -3.028328044703767e+02 9.712104143419764e+01 3 3.439186614041002e+02 -6.573524045766426e+01 3.216488491089061e+02 -1.024741025375549e+02 4 5.646834856945436e+02 5.643445226252528e+02 -1.881604463852933e+01 5.353061103357447e+00 - ME 1.067159092411647e-04 + ME 2.296146042402505e-05 Event 231 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3900,7 +3900,7 @@ Event 231 Batch 1 2 5.760768557894827e+02 -7.075794524290799e+01 5.609870884449791e+02 1.102331327656218e+02 3 6.038619762337338e+02 -2.467027894308989e+02 -5.464177649873398e+02 -7.221250677108812e+01 4 3.200611679767834e+02 3.174607346738069e+02 -1.456932345763944e+01 -3.802062599453370e+01 - ME 8.750887998909065e-05 + ME 9.438631267217403e-06 Event 232 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3908,7 +3908,7 @@ Event 232 Batch 1 2 7.230187249684843e+02 -2.426041066061352e+02 1.884455685697195e+02 -6.545132479937492e+02 3 4.821326920133732e+02 2.438648429837413e+02 -1.563760752388986e+01 4.156168142598493e+02 4 2.948485830181424e+02 -1.260736377606032e+00 -1.728079610458298e+02 2.388964337338999e+02 - ME 4.549716999825542e-05 + ME 3.745272037455064e-05 Event 233 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3916,7 +3916,7 @@ Event 233 Batch 1 2 3.540260977608100e+02 -1.904526694678991e+02 -1.042089619355360e+02 -2.796475475319170e+02 3 4.925592302096041e+02 1.195034224421750e+02 3.554637678715695e+02 -3.193415679485398e+02 4 6.534146720295859e+02 7.094924702572415e+01 -2.512548059360335e+02 5.989891154804569e+02 - ME 2.494643034161164e-04 + ME 1.035644942794080e-04 Event 234 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3924,7 +3924,7 @@ Event 234 Batch 1 2 1.866526101194276e+02 7.776953530733704e+01 -1.047503781897390e+01 1.693557493124073e+02 3 6.012752698516817e+02 5.974840035795012e+02 -4.570329760029643e+01 4.955829083294186e+01 4 7.120721200288899e+02 -6.752535388868379e+02 5.617833541927040e+01 -2.189140401453492e+02 - ME 2.154454342135980e-03 + ME 6.655948749153013e-04 Event 235 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3932,7 +3932,7 @@ Event 235 Batch 1 2 5.032945404607945e+02 1.612889276925247e+02 2.561838854094329e+02 -4.020710050699558e+02 3 7.153634726767370e+02 -3.739069589148947e+02 -1.979140468542061e+02 5.768609140624169e+02 4 2.813419868624690e+02 2.126180312223700e+02 -5.826983855522722e+01 -1.747899089924609e+02 - ME 8.184939555880423e-04 + ME 1.137471703441233e-04 Event 236 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3940,7 +3940,7 @@ Event 236 Batch 1 2 6.980797829886610e+02 -9.803971882836288e+00 4.740144261428889e+02 5.123764137440797e+02 3 5.519387921056282e+02 -1.638876688381594e+02 -3.209728652821290e+02 -4.180355032606608e+02 4 2.499814249057108e+02 1.736916407209956e+02 -1.530415608607599e+02 -9.434091048341891e+01 - ME 2.813360227943072e-04 + ME 5.842524801707843e-05 Event 237 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3948,7 +3948,7 @@ Event 237 Batch 1 2 1.604490925133743e+02 6.212857081252698e+01 9.075394990141041e+01 1.168232534834160e+02 3 6.578242662283152e+02 5.348507070161563e+02 -3.810396531957998e+02 3.842224792439630e+01 4 6.817266412583107e+02 -5.969792778286832e+02 2.902857032943894e+02 -1.552455014078122e+02 - ME 8.205069948818567e-04 + ME 1.834055676127939e-04 Event 238 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3956,7 +3956,7 @@ Event 238 Batch 1 2 2.789018340499539e+02 1.069933592962543e+02 -2.572713415352736e+02 1.225197647611563e+01 3 4.761759619803052e+02 7.755191627191856e+01 -4.591043622469822e+02 -9.976187456245104e+01 4 7.449222039697408e+02 -1.845452755681728e+02 7.163757037822556e+02 8.750989808633538e+01 - ME 4.130258343824905e-02 + ME 9.445005309896021e-03 Event 239 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3964,7 +3964,7 @@ Event 239 Batch 1 2 4.581461811054764e+02 -3.899520773556200e+02 2.006122777919944e+02 1.326273524830990e+02 3 3.013476461129690e+02 -2.996604136348060e+02 3.145663680794619e+01 4.951799549362093e+00 4 7.405061727815548e+02 6.896124909904260e+02 -2.320689145999406e+02 -1.375791520324611e+02 - ME 1.351152256907066e-02 + ME 4.970363634614722e-03 Event 240 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3972,7 +3972,7 @@ Event 240 Batch 1 2 5.932490652975304e+02 -4.094504138983958e+01 -3.300190662632461e+02 4.912793227530680e+02 3 3.147487537014150e+02 3.081803657249563e+02 4.097350029662016e+01 -4.912038692507519e+01 4 5.920021810010543e+02 -2.672353243351168e+02 2.890455659666260e+02 -4.421589358279927e+02 - ME 2.300291351402201e-03 + ME 3.420638167820422e-04 Event 241 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3980,7 +3980,7 @@ Event 241 Batch 1 2 4.438703186026563e+01 1.425431959717181e+01 -4.430288595443099e+00 -4.180186016371768e+01 3 7.139617398095604e+02 -8.415544716076485e+01 -5.657765076565163e+02 -4.272659242311072e+02 4 7.416512283301737e+02 6.990112756359306e+01 5.702067962519594e+02 4.690677843948249e+02 - ME 9.657825758456334e-03 + ME 9.983667466725972e-03 Event 242 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3988,7 +3988,7 @@ Event 242 Batch 1 2 3.798759956195423e+02 -1.259218082844715e+02 -3.429343473884153e+02 1.041417477651927e+02 3 6.208895880511435e+02 5.354328139337265e+02 1.248673426784089e+02 -2.884852319370315e+02 4 4.992344163293142e+02 -4.095110056492549e+02 2.180670047100064e+02 1.843434841718389e+02 - ME 4.523810239016752e-05 + ME 1.030886114253601e-05 Event 243 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3996,7 +3996,7 @@ Event 243 Batch 1 2 2.320641800899440e+02 1.658639294991472e+02 7.783463994856535e+01 1.424243988788334e+02 3 6.251485586341132e+02 -2.328139095298017e+02 -4.262931976140131e+02 3.935511574875350e+02 4 6.427872612759426e+02 6.694998003065477e+01 3.484585576654476e+02 -5.359755563663684e+02 - ME 1.068434238404496e-02 + ME 8.493072129055412e-04 Event 244 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4004,7 +4004,7 @@ Event 244 Batch 1 2 6.609991843787810e+02 -2.293678857540617e+02 -4.971623496474938e+02 -3.703240376037023e+02 3 1.091403980947070e+02 1.154537470975927e+01 -9.115666825632124e+00 -1.081445118228680e+02 4 7.298604175265119e+02 2.178225110443025e+02 5.062780164731259e+02 4.784685494265703e+02 - ME 2.129811247265830e-03 + ME 9.635755455313371e-04 Event 245 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4012,7 +4012,7 @@ Event 245 Batch 1 2 4.893629130846664e+02 -3.546974954177181e+02 3.112856868655738e+02 -1.294873298810978e+02 3 7.129026631852477e+02 5.703735458058533e+02 -4.257115617679147e+02 -4.091322034012423e+01 4 2.977344237300874e+02 -2.156760503881352e+02 1.144258749023406e+02 1.704005502212233e+02 - ME 2.548352504440589e-05 + ME 5.312368446054512e-06 Event 246 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4020,7 +4020,7 @@ Event 246 Batch 1 2 3.999457395350199e+02 9.605025124341067e+01 9.072234098128430e+01 3.774922524438975e+02 3 3.675469088581873e+02 -1.615841482674670e+01 2.570183669846762e+02 2.622426259669196e+02 4 7.325073516067924e+02 -7.989183641666393e+01 -3.477407079659604e+02 -6.397348784108170e+02 - ME 1.294421983622042e-01 + ME 5.023802198964801e-02 Event 247 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4028,7 +4028,7 @@ Event 247 Batch 1 2 6.711864521923226e+02 3.763073240556692e+02 5.338170415278108e+02 1.546719678644905e+02 3 5.231557804938882e+02 -1.057595517177888e+02 -5.121603131388773e+02 -1.409615302513522e+01 4 3.056577673137891e+02 -2.705477723378804e+02 -2.165672838893370e+01 -1.405758148393554e+02 - ME 2.873345328272106e-04 + ME 1.980507958825256e-05 Event 248 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4036,7 +4036,7 @@ Event 248 Batch 1 2 6.307803946875938e+02 -6.240065811552291e+01 -3.654556314590158e+02 5.103256270499047e+02 3 3.935347424219227e+02 -2.188782290807617e+02 2.916853933646314e+01 -3.257470040392325e+02 4 4.756848628904837e+02 2.812788871962847e+02 3.362870921225527e+02 -1.845786230106721e+02 - ME 2.418190194667681e-04 + ME 8.712398839363553e-05 Event 249 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4044,7 +4044,7 @@ Event 249 Batch 1 2 4.326970760901858e+02 -4.070406664121577e+02 -1.467447404863359e+02 3.261392852829594e+00 3 4.839435229991528e+02 2.335311811831339e+01 2.018595963184923e+02 -4.392136936630267e+02 4 5.833594009106607e+02 3.836875482938447e+02 -5.511485583215654e+01 4.359523008101972e+02 - ME 8.354140201035124e-05 + ME 2.487145538635957e-05 Event 250 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4052,7 +4052,7 @@ Event 250 Batch 1 2 7.010671671345858e+02 -6.122994886156980e+02 -2.473946684860857e+02 2.353303785738851e+02 3 5.574643785654457e+02 3.902114201641945e+02 2.260985614407801e+02 -3.276904354069721e+02 4 2.414684542999681e+02 2.220880684515034e+02 2.129610704530562e+01 9.236005683308701e+01 - ME 4.704118057291807e-05 + ME 1.645582299148298e-05 Event 251 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4060,7 +4060,7 @@ Event 251 Batch 1 2 7.364006127103795e+02 5.379960890463808e+02 4.302640987755426e+02 2.602285070392761e+02 3 3.051282143252570e+01 -2.901685968644106e+00 1.337962970917706e+01 -2.726899336532026e+01 4 7.330865658570956e+02 -5.350944030777371e+02 -4.436437284847198e+02 -2.329595136739561e+02 - ME 8.340546584740779e-03 + ME 6.389613086136084e-03 Event 252 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4068,7 +4068,7 @@ Event 252 Batch 1 2 5.965625584838610e+02 -7.369842915522101e+01 -5.671364104158780e+02 -1.697401534860145e+02 3 6.549338760881149e+02 -1.514014639568436e+02 6.313240788068730e+02 8.628954906696529e+01 4 2.485035654280235e+02 2.250998931120648e+02 -6.418766839099484e+01 8.345060441904938e+01 - ME 3.985162011735342e-05 + ME 7.225550854378042e-06 Event 253 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4076,7 +4076,7 @@ Event 253 Batch 1 2 5.728678540484714e+02 3.212236187283236e+01 -4.622666283104808e+02 -3.368312580807653e+02 3 7.160302400837320e+02 1.132435775281999e+02 5.206369974620781e+02 4.783433011307397e+02 4 2.111019058677967e+02 -1.453659394010323e+02 -5.837036915159722e+01 -1.415120430499744e+02 - ME 1.248429186447426e-03 + ME 7.499676590470843e-05 Event 254 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4084,7 +4084,7 @@ Event 254 Batch 1 2 5.579357369440610e+02 1.333150067790222e+02 -6.785864805882139e+01 5.375077668373273e+02 3 6.202682598689536e+02 -4.039338689731095e+02 2.012068793592834e+02 -4.255419314189536e+02 4 3.217960031869852e+02 2.706188621940872e+02 -1.333482313004621e+02 -1.119658354183736e+02 - ME 6.088720978226072e-04 + ME 2.226893396847405e-04 Event 255 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4092,5 +4092,5 @@ Event 255 Batch 1 2 7.263612771087843e+02 3.396063850675520e+02 -6.401091575508393e+02 5.028393902637355e+01 3 1.540578578981475e+02 -3.080387127739228e+01 1.060177193258910e+02 -1.074485378375538e+02 4 6.195808649930684e+02 -3.088025137901597e+02 5.340914382249483e+02 5.716459881118030e+01 - ME 1.547064591142216e-04 + ME 4.003666322732326e-05 diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index f398fe393a..8b87565b21 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053250789642333984  +DEBUG: model prefixing takes 0.005353450775146484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.076 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -258,7 +258,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.153 s FFV1 FFV1 FFV1 @@ -281,6 +281,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.730s -user 0m0.666s -sys 0m0.058s +real 0m1.348s +user 0m0.696s +sys 0m0.062s diff --git a/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt b/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt index dd90c94acf..d596b33ae7 100644 --- a/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt +++ b/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt @@ -4,7 +4,7 @@ Event 0 Batch 0 2 2.647483690509011e+02 7.527657265342380e+01 -2.528976247704283e+02 -2.163164141117315e+01 3 6.252973211776936e+02 -5.721080498766041e+02 -1.578766990348905e+01 2.518727230515587e+02 4 6.099543097714056e+02 4.968314772231802e+02 2.686852946739174e+02 -2.302410816403857e+02 - ME 3.498510462248670e-04 + ME 6.254927412618323e-05 Event 1 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -12,7 +12,7 @@ Event 1 Batch 0 2 2.542827954151951e+02 1.482213322085297e+02 -1.988618298139058e+02 -5.607271498295615e+01 3 6.883656117507998e+02 1.265478873489434e+02 5.602777828023585e+02 3.793700749224233e+02 4 5.573515928340058e+02 -2.747692195574731e+02 -3.614159529884527e+02 -3.232973599394667e+02 - ME 7.257243108248426e-04 + ME 8.120933129385430e-05 Event 2 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -20,7 +20,7 @@ Event 2 Batch 0 2 4.301460683791099e+02 -3.656995432079240e+02 -2.257802895903974e+02 -1.768459985405173e+01 3 5.058528987551350e+02 2.755467101243707e+02 -2.034821274188550e+02 3.722313656043856e+02 4 5.640010328657550e+02 9.015283308355326e+01 4.292624170092524e+02 -3.545467657503340e+02 - ME 8.130044127338102e-04 + ME 1.104115154253218e-04 Event 3 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -28,7 +28,7 @@ Event 3 Batch 0 2 6.758793342627306e+02 1.455349847705337e+02 4.360940220328824e+02 -4.954335945799966e+02 3 3.008019460079605e+02 -1.607139834787174e+02 2.732727402256846e+01 2.527964523704278e+02 4 5.233187197293092e+02 1.517899870818368e+01 -4.634212960554508e+02 2.426371422095687e+02 - ME 7.753277710143621e-05 + ME 4.288074098478053e-05 Event 4 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -36,7 +36,7 @@ Event 4 Batch 0 2 3.540811678028369e+02 5.414642718170588e+01 -3.497885023717100e+02 -9.467915537920108e+00 3 7.415000547748695e+02 1.453779348794601e+00 7.277337852109665e+02 1.422102514562805e+02 4 4.044187774222938e+02 -5.560020653050046e+01 -3.779452828392566e+02 -1.327423359183605e+02 - ME 2.015528729476554e-04 + ME 1.304731284254719e-05 Event 5 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -44,7 +44,7 @@ Event 5 Batch 0 2 4.747467875786874e+02 2.462969907607520e+02 3.713870243947702e+02 1.636886763636381e+02 3 3.438196236093862e+02 -2.056491112573935e+02 2.636029701703988e+02 8.021128807897365e+01 4 6.814335888119255e+02 -4.064787950335840e+01 -6.349899945651691e+02 -2.438999644426124e+02 - ME 6.140777519977192e-04 + ME 1.932390649640220e-04 Event 6 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -52,7 +52,7 @@ Event 6 Batch 0 2 5.623951200922340e+02 4.644673798421034e+02 3.089047820108764e+02 -7.166700647426805e+01 3 2.268243199894467e+02 1.761899852590787e+02 -7.114332369064562e+01 -1.238748914321566e+02 4 7.107805599183188e+02 -6.406573651011822e+02 -2.377614583202307e+02 1.955418979064247e+02 - ME 8.375373201653861e-04 + ME 1.929702539767979e-04 Event 7 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -60,7 +60,7 @@ Event 7 Batch 0 2 4.922243378496302e+02 2.878585072835456e+02 -1.441537488072182e+02 -3.723465794939189e+02 3 2.873990637609374e+02 -5.400981623596619e+01 -8.913204919452846e+01 -2.678369642286231e+02 4 7.203765983894325e+02 -2.338486910475794e+02 2.332857980017467e+02 6.401835437225419e+02 - ME 2.045598717079573e-03 + ME 6.280412585349807e-04 Event 8 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -68,7 +68,7 @@ Event 8 Batch 0 2 3.353309706037128e+02 -7.529439061162444e+01 -4.917829145606096e+01 -3.230466069128648e+02 3 7.169322705461503e+02 -1.597426278178964e+02 -1.460012137440150e+01 6.987567601563110e+02 4 4.477367588501368e+02 2.350370184295208e+02 6.377841283046249e+01 -3.757101532434461e+02 - ME 5.176104304710922e-03 + ME 1.424871539111113e-03 Event 9 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -76,7 +76,7 @@ Event 9 Batch 0 2 2.557626120875720e+02 2.000882245504951e+02 -5.276260741790070e+01 -1.503174088272977e+02 3 7.044202058180884e+02 -6.969679478438196e+02 -1.019614549623775e+02 6.882422911146106e+00 4 5.398171820943397e+02 4.968797232933244e+02 1.547240623802783e+02 1.434349859161515e+02 - ME 6.498215193902510e-05 + ME 1.126010180174107e-05 Event 10 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -84,7 +84,7 @@ Event 10 Batch 0 2 3.466796552973448e+02 1.172124288883391e+02 -1.804077050554743e+02 2.718475489457261e+02 3 5.174471655316495e+02 -1.610456139025784e+02 -4.497410659869822e+02 -1.988689340353916e+02 4 6.358731791710053e+02 4.383318501423926e+01 6.301487710424565e+02 -7.297861491033444e+01 - ME 2.111165581639245e-04 + ME 8.292383053707579e-05 Event 11 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -92,7 +92,7 @@ Event 11 Batch 0 2 5.730783827248506e+02 -3.059484875398849e+01 3.466457017175528e+02 -4.553235612803233e+02 3 4.410994673708892e+02 -3.026218886155176e+02 -1.990641070399019e+01 3.203005892260318e+02 4 4.858221499042607e+02 3.332167373695061e+02 -3.267392910135624e+02 1.350229720542913e+02 - ME 5.129802099928076e-05 + ME 2.195851954305949e-05 Event 12 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -100,7 +100,7 @@ Event 12 Batch 0 2 2.275003875859171e+02 -1.247450244086003e+02 1.654605359856639e+02 9.390376067217456e+01 3 6.138170466352969e+02 3.363961838598331e+02 -2.139358085817026e+01 5.129827374509639e+02 4 6.586825657787861e+02 -2.116511594512328e+02 -1.440669551274935e+02 -6.068864981231385e+02 - ME 5.249882090061186e-02 + ME 3.843244876666358e-03 Event 13 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -108,7 +108,7 @@ Event 13 Batch 0 2 2.867684047377951e+02 7.055192702127012e+01 -2.028354730671929e+02 1.900429278217245e+02 3 6.990707050557395e+02 -5.605742285334717e+02 2.413419117565430e+02 -3.408965629057132e+02 4 5.141608902064654e+02 4.900223015122016e+02 -3.850643868935023e+01 1.508536350839886e+02 - ME 6.422048006176975e-05 + ME 1.780264803426774e-05 Event 14 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -116,7 +116,7 @@ Event 14 Batch 0 2 3.551549262960330e+02 1.090410064132905e+02 3.205839746298526e+02 1.071027348074892e+02 3 5.276349775014137e+02 3.895763694332612e+02 -2.529209653865598e+02 2.503196099590423e+02 4 6.172100962025531e+02 -4.986173758465519e+02 -6.766300924329285e+01 -3.574223447665315e+02 - ME 7.422587439250419e-04 + ME 1.172793340377339e-04 Event 15 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -124,7 +124,7 @@ Event 15 Batch 0 2 5.846731991828425e+02 7.106081559720657e+01 3.900476102503054e+02 4.297161529048979e+02 3 2.829885923647302e+02 -2.767806781033229e+02 5.223342094943639e+01 -2.732525156618249e+01 4 6.323382084524278e+02 2.057198625061163e+02 -4.422810311997417e+02 -4.023909013387152e+02 - ME 1.255922738422332e-03 + ME 2.768931482482754e-04 Event 16 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -132,7 +132,7 @@ Event 16 Batch 0 2 7.471577506095512e+02 1.666056475215676e+02 -5.784682380714994e+02 -4.425627187781379e+02 3 6.589296733908160e+02 -1.235441202519038e+02 5.251239647671507e+02 3.783780998595698e+02 4 9.391257599963087e+01 -4.306152726966400e+01 5.334427330434855e+01 6.418461891856485e+01 - ME 5.526726502577864e-05 + ME 3.619360847906487e-05 Event 17 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -140,7 +140,7 @@ Event 17 Batch 0 2 3.567490993131759e+02 3.856364495163717e+01 -1.708845728849435e+02 -3.107752047682324e+02 3 6.453207560475681e+02 4.468356462873772e+02 2.282834847349605e+02 4.057874246326636e+02 4 4.979301446392561e+02 -4.853992912390142e+02 -5.739891185001719e+01 -9.501221986443127e+01 - ME 1.327369996555111e-04 + ME 3.400819398697452e-05 Event 18 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -148,7 +148,7 @@ Event 18 Batch 0 2 4.856701782481425e+02 2.509110753153842e+02 -3.498523763974107e+02 -2.247720379690150e+02 3 3.014847498930008e+02 -1.059425909901355e+02 -2.435847754696140e+02 -1.426032222348426e+02 4 7.128450718588564e+02 -1.449684843252488e+02 5.934371518670247e+02 3.673752602038576e+02 - ME 1.018512933050835e-03 + ME 1.704840743724005e-04 Event 19 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -156,7 +156,7 @@ Event 19 Batch 0 2 5.848213503304410e+02 -3.141116763848333e+02 -1.950442390378232e+02 4.531088295091878e+02 3 5.769300027107226e+02 5.020221748138873e+02 2.252239828724832e+02 -1.734823378963534e+02 4 3.382486469588368e+02 -1.879104984290540e+02 -3.017974383465995e+01 -2.796264916128346e+02 - ME 4.267017342507976e-03 + ME 1.566312636528492e-04 Event 20 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -164,7 +164,7 @@ Event 20 Batch 0 2 5.550938429889906e+02 -4.478597170519693e+02 -1.958065402362923e+02 -2.630791652090858e+02 3 5.585686897587655e+02 3.351111310173187e+02 -1.360174455686903e+02 4.256744830831253e+02 4 3.863374672522434e+02 1.127485860346507e+02 3.318239858049826e+02 -1.625953178740396e+02 - ME 2.768271682113988e-04 + ME 4.443882992804106e-05 Event 21 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -172,7 +172,7 @@ Event 21 Batch 0 2 6.296556563991993e+02 -3.477135312394776e+02 -1.376147989324512e+02 -5.065804111325866e+02 3 3.137568007204202e+02 1.080474571851863e+02 -2.382188236683311e+02 1.732653140250679e+02 4 5.565875428803801e+02 2.396660740542913e+02 3.758336226007823e+02 3.333150971075189e+02 - ME 5.519034669639832e-05 + ME 2.195742323347977e-05 Event 22 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -180,7 +180,7 @@ Event 22 Batch 0 2 5.583338925767162e+02 2.471586228668332e+02 -1.597599499756147e+02 -4.744745610949311e+02 3 5.378723432497920e+02 9.149532098241385e+00 4.314513680009925e+02 3.210493120152684e+02 4 4.037937641734921e+02 -2.563081549650745e+02 -2.716914180253778e+02 1.534252490796627e+02 - ME 3.705224437539572e-05 + ME 1.393143104564022e-05 Event 23 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -188,7 +188,7 @@ Event 23 Batch 0 2 6.057340011976822e+02 6.848115528115159e+01 -5.207204912425279e+02 -3.017849923015605e+02 3 6.884459352783615e+02 -2.949639632364767e+01 6.680977958792448e+02 1.635026102131439e+02 4 2.058200635239559e+02 -3.898475895750391e+01 -1.473773046367171e+02 1.382823820884168e+02 - ME 2.946248744974782e-05 + ME 1.074117284514867e-05 Event 24 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -196,7 +196,7 @@ Event 24 Batch 0 2 4.702316790647315e+02 -1.210575128627593e+02 4.313728504035306e+02 -1.427598490831810e+02 3 7.180482366151732e+02 1.040047389253588e+02 -7.104588047260974e+02 4.956931953573291e+00 4 3.117200843200960e+02 1.705277393740069e+01 2.790859543225674e+02 1.378029171296075e+02 - ME 3.146557994448562e-05 + ME 5.213387311993420e-06 Event 25 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -204,7 +204,7 @@ Event 25 Batch 0 2 6.261365010744016e+02 -5.354018140499276e+02 -2.095559720530078e+02 2.479477970595020e+02 3 5.483958991041942e+02 5.199465180092641e+02 -9.843995208133505e+01 -1.438862620216537e+02 4 3.254675998214045e+02 1.545529604066345e+01 3.079959241343431e+02 -1.040615350378483e+02 - ME 1.657640191611339e-04 + ME 1.695323153210731e-05 Event 26 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -212,7 +212,7 @@ Event 26 Batch 0 2 4.635816356180677e+02 1.904702824079147e+02 -2.351549941335565e+02 -3.511853259118595e+02 3 3.686385821486527e+02 -2.712527815845713e+02 -6.015354190959191e+01 -2.422764621809819e+02 4 6.677797822332798e+02 8.078249917665664e+01 2.953085360431485e+02 5.934617880928415e+02 - ME 3.250975879010065e-04 + ME 1.052251904460155e-04 Event 27 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -220,7 +220,7 @@ Event 27 Batch 0 2 2.851713673150520e+02 1.387976072955998e+02 1.520424011317634e+02 -1.973348453858079e+02 3 6.747356481771329e+02 2.426633222154767e+02 -4.300238522839811e+02 4.598501858640580e+02 4 5.400929845078149e+02 -3.814609295110765e+02 2.779814511522176e+02 -2.625153404782502e+02 - ME 4.155279516527712e-04 + ME 7.957109124083736e-05 Event 28 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -228,7 +228,7 @@ Event 28 Batch 0 2 1.977804200471008e+02 -1.803202618401224e+02 -8.082809162516925e+01 -8.277519444290659e+00 3 7.197523834069627e+02 3.152541965091956e+02 6.467033971658861e+02 -2.080867841663842e+01 4 5.824671965459364e+02 -1.349339346690732e+02 -5.658753055407169e+02 2.908619786092899e+01 - ME 1.172809031809504e-04 + ME 1.748013159755222e-05 Event 29 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -236,7 +236,7 @@ Event 29 Batch 0 2 6.123364628491765e+02 -3.746492624245139e+02 3.785128791537567e+02 -3.021950929683376e+02 3 4.056577755659300e+02 1.796205570313495e+00 -8.781658530568643e+01 3.960344074293251e+02 4 4.820057615848937e+02 3.728530568542006e+02 -2.906962938480702e+02 -9.383931446098750e+01 - ME 5.496242925842306e-04 + ME 3.085570985177973e-04 Event 30 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -244,7 +244,7 @@ Event 30 Batch 0 2 7.349194950356053e+02 7.241679607953656e+02 1.425637322816703e+01 1.244354634469208e+02 3 7.321421454671275e+02 -7.253765693071590e+02 -2.895970851972107e+01 -9.498573130653318e+01 4 3.293835949726734e+01 1.208608511793152e+00 1.470333529155409e+01 -2.944973214038765e+01 - ME 5.147061682527938e-02 + ME 3.267107835672361e-04 Event 31 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -252,7 +252,7 @@ Event 31 Batch 0 2 1.718338270585457e+02 -1.344914872264095e+02 -1.021614404532311e+02 3.165350011824393e+01 3 6.313115253715935e+02 -2.849940593920691e+02 -7.916450257599642e+01 -5.577325610990745e+02 4 6.968546475698608e+02 4.194855466184786e+02 1.813259430292275e+02 5.260790609808306e+02 - ME 4.645345268703414e-04 + ME 1.685680846028125e-04 Event 32 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -260,7 +260,7 @@ Event 32 Batch 0 2 7.235176898898732e+02 -4.762113006241282e+02 -2.880822916693121e+01 5.439400065022983e+02 3 6.603902828461299e+02 4.672103814637360e+02 1.031050210016798e+02 -4.551913221650266e+02 4 1.160920272639969e+02 9.000919160392018e+00 -7.429679183474862e+01 -8.874868433727177e+01 - ME 4.476006843186700e-03 + ME 2.173072900368875e-04 Event 33 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -268,7 +268,7 @@ Event 33 Batch 0 2 4.786737271642286e+02 2.009638309376703e+02 4.090184839380260e+02 1.464443769121513e+02 3 3.795793219608408e+02 -6.057523839522271e+00 -8.244277697544294e+01 3.704685635647950e+02 4 6.417469508749314e+02 -1.949063070981495e+02 -3.265757069625828e+02 -5.169129404769461e+02 - ME 1.351709676586880e-02 + ME 3.322437827682699e-03 Event 34 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -276,7 +276,7 @@ Event 34 Batch 0 2 6.621583515140109e+02 -5.051303032557109e+02 -1.429543729176959e+02 4.035605363216953e+02 3 3.008522892707525e+02 8.677543723835062e+01 2.726747894692539e+02 -9.290092916351111e+01 4 5.369893592152367e+02 4.183548660173603e+02 -1.297204165515579e+02 -3.106596071581844e+02 - ME 6.460854093057828e-04 + ME 9.294666462955388e-05 Event 35 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -284,7 +284,7 @@ Event 35 Batch 0 2 6.158114977149372e+02 2.502256147979830e+02 4.233348779616202e+00 5.626659943296695e+02 3 1.476397433483021e+02 -1.670550278282843e+01 -6.055370982200890e+01 1.336101351676488e+02 4 7.365487589367605e+02 -2.335201120151546e+02 5.632036104239269e+01 -6.962761294973184e+02 - ME 2.101231899117793e+00 + ME 5.450893768264864e-01 Event 36 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -292,7 +292,7 @@ Event 36 Batch 0 2 7.182456511154913e+02 -7.463771462544163e+01 -6.667773110518942e+02 2.563475070450518e+02 3 4.860008755751825e+02 -7.840660561780868e+01 4.141081959217036e+02 -2.419992919944378e+02 4 2.957534733093268e+02 1.530443202432501e+02 2.526691151301903e+02 -1.434821505061448e+01 - ME 9.644531209480271e-05 + ME 1.793136635525090e-05 Event 37 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -300,7 +300,7 @@ Event 37 Batch 0 2 5.672182018814327e+02 -2.031706828392718e+00 -5.267408190306547e+02 2.104197478372323e+02 3 4.664069288608281e+02 3.712365792892206e+02 2.604523782658950e+02 -1.090109358856581e+02 4 4.663748692577387e+02 -3.692048724608279e+02 2.662884407647597e+02 -1.014088119515743e+02 - ME 1.216876552012178e-04 + ME 1.885829354904198e-05 Event 38 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -308,7 +308,7 @@ Event 38 Batch 0 2 5.068057345787187e+02 4.883513201966852e+02 -7.570036138649985e+01 -1.124032737511800e+02 3 3.871140338254017e+02 -1.153787089711745e+02 -3.599073977747533e+02 -8.373585688177315e+01 4 6.060802315958797e+02 -3.729726112255107e+02 4.356077591612532e+02 1.961391306329531e+02 - ME 1.006736553113524e-04 + ME 2.004468492837133e-05 Event 39 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -316,7 +316,7 @@ Event 39 Batch 0 2 4.960337392567769e+02 -3.669089247616476e+02 2.651961920161227e+02 -2.027271347192069e+02 3 2.837821967046824e+02 -2.822567153069604e+02 -2.935613327724534e+01 -1.303560381865560e+00 4 7.201840640385411e+02 6.491656400686079e+02 -2.358400587388775e+02 2.040306951010725e+02 - ME 1.372807525012575e-03 + ME 2.738639406673165e-04 Event 40 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -324,7 +324,7 @@ Event 40 Batch 0 2 3.080730228651936e+02 -3.065830270999447e+02 -2.484308296331460e+01 1.728167064871203e+01 3 6.842346640746094e+02 4.630487823766367e+02 8.554554725666550e+01 -4.964321303112498e+02 4 5.076923130601962e+02 -1.564657552766919e+02 -6.070246429335075e+01 4.791504596625378e+02 - ME 4.192363154074847e-05 + ME 4.316353181637933e-05 Event 41 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -332,7 +332,7 @@ Event 41 Batch 0 2 1.602650851118221e+02 -1.258781096038287e+02 -9.817642232798531e+01 1.417706342452912e+01 3 7.146392966623014e+02 6.799675591776853e+02 -1.019163870176435e+02 1.948499239342933e+02 4 6.250956182258764e+02 -5.540894495738563e+02 2.000928093456288e+02 -2.090269873588226e+02 - ME 4.523507186168379e-04 + ME 6.118266190948034e-05 Event 42 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -340,7 +340,7 @@ Event 42 Batch 0 2 1.687893235969910e+02 1.289401357197518e+02 4.788693514682045e+01 9.783209393213438e+01 3 7.042017295435162e+02 -1.022058447296739e+02 -6.640064324330017e+02 -2.110675220936915e+02 4 6.270089468594927e+02 -2.673429099007782e+01 6.161194972861812e+02 1.132354281615572e+02 - ME 1.686356189272381e-04 + ME 4.091574289077424e-05 Event 43 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -348,7 +348,7 @@ Event 43 Batch 0 2 4.729783670130408e+02 -7.983817933050123e+01 9.052957805204315e+01 4.573169538528310e+02 3 5.638402597824536e+02 4.785250044669658e+02 7.435095949863268e+01 -2.887933404236804e+02 4 4.631813732045056e+02 -3.986868251364646e+02 -1.648805375506758e+02 -1.685236134291506e+02 - ME 5.938757690519573e-04 + ME 2.654067897204875e-04 Event 44 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -356,7 +356,7 @@ Event 44 Batch 0 2 1.774791104122977e+02 -1.952605982635784e+01 6.371003613266313e+01 1.644949814321787e+02 3 7.194816205691247e+02 -3.678871192485065e+02 2.644831693887214e+01 -6.177486190667772e+02 4 6.030392690185777e+02 3.874131790748646e+02 -9.015835307153536e+01 4.532536376345985e+02 - ME 2.092333697371024e-04 + ME 1.390282437939369e-04 Event 45 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -364,7 +364,7 @@ Event 45 Batch 0 2 7.477488480180839e+02 -3.787655987618923e+02 1.634662296474455e+02 6.236535517992064e+02 3 7.458113398274099e+02 3.819163358711198e+02 -1.661042992235261e+02 -6.186952632673017e+02 4 6.439812154506046e+00 -3.150737109227506e+00 2.638069576080606e+00 -4.958288531904773e+00 - ME 9.377954359926730e-02 + ME 4.591622113024210e-03 Event 46 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -372,7 +372,7 @@ Event 46 Batch 0 2 3.243146757688279e+02 -4.392587631431587e+00 -2.496903827548322e+02 -2.069188895501946e+02 3 5.341608950426614e+02 -2.704482657861201e+02 2.711825143656835e+02 -3.723515022507137e+02 4 6.415244291885106e+02 2.748408534175518e+02 -2.149213161085120e+01 5.792703918009084e+02 - ME 1.879047912263320e-04 + ME 7.845213441237594e-05 Event 47 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -380,7 +380,7 @@ Event 47 Batch 0 2 6.742198761450968e+02 -3.282965096491567e+02 5.301803926793563e+02 -2.563251730900704e+02 3 6.484148720042493e+02 3.527030795571956e+02 -3.975273148506379e+02 3.715029176935211e+02 4 1.773652518506536e+02 -2.440656990803885e+01 -1.326530778287185e+02 -1.151777446034508e+02 - ME 1.136665455996279e-03 + ME 5.254395938575492e-05 Event 48 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -388,7 +388,7 @@ Event 48 Batch 0 2 7.321401810535270e+02 -1.843482647928687e+02 4.412348098999295e+02 5.543976952635381e+02 3 7.293058265076229e+02 2.182722651304250e+02 -4.435200216702997e+02 -5.362221528717154e+02 4 3.855399243885009e+01 -3.392400033755636e+01 2.285211770370227e+00 -1.817554239182278e+01 - ME 2.278442596973106e-03 + ME 2.330290263553363e-04 Event 49 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -396,7 +396,7 @@ Event 49 Batch 0 2 3.511117284856090e+02 -3.272266866652174e+02 5.199533974843238e+01 1.161835877338140e+02 3 7.326526490901410e+02 6.615045961628415e+02 -2.993354007364775e+02 -9.792799058578566e+01 4 4.162356224242500e+02 -3.342779094976241e+02 2.473400609880451e+02 -1.825559714802838e+01 - ME 8.806759903737244e-05 + ME 7.863589115869630e-06 Event 50 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -404,7 +404,7 @@ Event 50 Batch 0 2 7.322170903075255e+02 2.740692406080844e+02 1.952596610981929e+01 -6.787095515302592e+02 3 3.078559130669522e+02 -1.663333363406682e+02 8.625456119089935e+01 2.442716420418760e+02 4 4.599269966255216e+02 -1.077359042674159e+02 -1.057805273007185e+02 4.344379094883832e+02 - ME 7.579426018596712e-05 + ME 6.765758192049922e-05 Event 51 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -412,7 +412,7 @@ Event 51 Batch 0 2 3.473696038265160e+02 -2.922314643158454e+02 -6.759614889845234e+01 -1.752060888796554e+02 3 5.389399151999496e+02 -2.449040872454050e+02 9.346474502284556e+01 4.708954891311219e+02 4 6.136904809735339e+02 5.371355515612503e+02 -2.586859612439322e+01 -2.956894002514666e+02 - ME 4.687828430739845e-04 + ME 2.035652280642710e-04 Event 52 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -420,7 +420,7 @@ Event 52 Batch 0 2 6.818614816439094e+02 5.970116833066725e+02 3.013730734325877e+02 1.329902280423528e+02 3 2.108623144448950e+02 -4.198344769951654e+00 -1.698802183673395e+02 -1.248439063859965e+02 4 6.072762039111957e+02 -5.928133385367207e+02 -1.314928550652483e+02 -8.146321656356344e+00 - ME 1.636869658416981e-04 + ME 4.047005152694340e-05 Event 53 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -428,7 +428,7 @@ Event 53 Batch 0 2 5.157714002491656e+02 -5.140718537651751e+02 -4.182413977701254e+01 1.003899065692042e+00 3 5.148181840855221e+02 2.868792199999327e+02 1.974924151010656e+02 3.791237552236646e+02 4 4.694104156653124e+02 2.271926337652422e+02 -1.556682753240530e+02 -3.801276542893567e+02 - ME 3.182294022992135e-03 + ME 1.547751010871262e-04 Event 54 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -436,7 +436,7 @@ Event 54 Batch 0 2 6.433410767101752e+02 2.586883950027282e+02 -5.809813083922761e+02 9.710187728524583e+01 3 6.928799734080563e+02 -1.579832568796111e+02 6.405510983559769e+02 -2.117031848853746e+02 4 1.637789498817686e+02 -1.007051381231171e+02 -5.956978996370073e+01 1.146013076001288e+02 - ME 3.280140142776471e-05 + ME 1.302720215079095e-05 Event 55 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -444,7 +444,7 @@ Event 55 Batch 0 2 7.193759752058201e+02 -3.536444481659258e+02 -7.212523476050659e+01 -6.222823703878202e+02 3 5.307053661742267e+02 2.409461639849982e+02 1.900944302490854e+02 4.329633233142391e+02 4 2.499186586199529e+02 1.126982841809279e+02 -1.179691954885788e+02 1.893190470735813e+02 - ME 3.939174164528502e-05 + ME 3.087450123310173e-05 Event 56 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -452,7 +452,7 @@ Event 56 Batch 0 2 3.858864959547013e+02 1.815174721437793e+02 3.218581876578407e+02 -1.112074732396182e+02 3 4.484505297447187e+02 -3.244105157450006e+02 2.934585578803474e+02 -9.873079412811623e+01 4 6.656629743005793e+02 1.428930436012212e+02 -6.153167455381879e+02 2.099382673677345e+02 - ME 2.326138625268126e-04 + ME 4.275995533811995e-05 Event 57 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -460,7 +460,7 @@ Event 57 Batch 0 2 5.284589752749192e+02 3.868194647882293e+02 -1.709996888155517e+02 3.168575336559793e+02 3 6.299868555278971e+02 -1.587414880613579e+02 2.327134172236622e+02 -5.634971548731005e+02 4 3.415541691971835e+02 -2.280779767268714e+02 -6.171372840811043e+01 2.466396212171210e+02 - ME 3.474853710074164e-05 + ME 2.211478424702745e-05 Event 58 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -468,7 +468,7 @@ Event 58 Batch 0 2 6.172037319760957e+02 -2.246119436411400e+02 -2.286037628748728e+01 5.744278237820342e+02 3 5.117934503257735e+02 1.262762853074207e+02 3.215736628881853e+02 -3.775939815489577e+02 4 3.710028176981306e+02 9.833565833371921e+01 -2.987132866006979e+02 -1.968338422330765e+02 - ME 6.183305374210038e-04 + ME 1.857727050583390e-04 Event 59 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -476,7 +476,7 @@ Event 59 Batch 0 2 7.388935626701858e+02 -3.912134623809441e+02 -5.457789630286015e+02 3.082872805076099e+02 3 1.936051438730608e+02 1.561492575196544e+02 8.304673385628061e+01 -7.876294246644987e+01 4 5.675012934567535e+02 2.350642048612896e+02 4.627322291723209e+02 -2.295243380411600e+02 - ME 4.116991424436793e-04 + ME 6.745345781245190e-05 Event 60 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -484,7 +484,7 @@ Event 60 Batch 0 2 7.258141426633659e+02 -5.584991156701968e+02 1.635894950857984e+02 4.337319270970709e+02 3 2.789580074371136e+02 2.331554478032953e+02 6.512410160032128e+01 -1.386180308029247e+02 4 4.952278498995201e+02 3.253436678669015e+02 -2.287135966861195e+02 -2.951138962941461e+02 - ME 7.295672680059989e-04 + ME 9.170244877267536e-05 Event 61 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -492,15 +492,15 @@ Event 61 Batch 0 2 5.906141202026897e+02 4.485275282318680e+02 -2.043613424290570e+02 3.253990429020988e+02 3 4.163572165237975e+02 -4.021600557528675e+02 -4.112755461437413e+01 9.964509802161204e+01 4 4.930286632735124e+02 -4.636747247900051e+01 2.454888970434311e+02 -4.250441409237108e+02 - ME 5.845307122272604e-03 + ME 1.836685601489136e-04 Event 62 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 1 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 -7.500000000000000e+02 2 7.346180891175762e+02 3.693463141798367e+02 7.549194961263061e+01 -6.305140780380819e+02 3 4.420621433230785e+02 -2.806743363126464e+02 3.467380983154045e+01 3.397625382625571e+02 - 4 3.233197675593453e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755249e+02 - ME 3.963631774242112e-05 + 4 3.233197675593452e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755248e+02 + ME 3.490896135533686e-05 Event 63 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -508,7 +508,7 @@ Event 63 Batch 0 2 6.451039732729313e+02 -2.415045377667665e+02 1.990362537024482e+02 -5.641092662620230e+02 3 3.260870385294104e+02 2.061141051805976e+02 -2.496695602716584e+02 3.892098426606745e+01 4 5.288089881976584e+02 3.539043258616898e+01 5.063330656921013e+01 5.251882819959555e+02 - ME 4.832224458906289e-04 + ME 4.428689394331114e-04 Event 64 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -516,7 +516,7 @@ Event 64 Batch 0 2 5.275973380665291e+02 -6.064553482667328e+01 4.309976929667101e+02 -2.981980196075213e+02 3 5.799838776791826e+02 3.279821268626862e+02 -1.824214634122377e+02 4.421893627315650e+02 4 3.924187842542880e+02 -2.673365920360130e+02 -2.485762295544724e+02 -1.439913431240437e+02 - ME 2.175617604507715e-04 + ME 4.205989960223865e-05 Event 65 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -524,7 +524,7 @@ Event 65 Batch 0 2 6.480172869826541e+02 2.720879118036237e+02 -5.153900904044360e+02 -2.833154199679406e+02 3 7.075023253568394e+02 -3.440299289242928e+02 4.709796137500282e+02 4.004761563708322e+02 4 1.444803876605064e+02 7.194201712066916e+01 4.441047665440794e+01 -1.171607364028916e+02 - ME 4.989956280474397e-03 + ME 1.103463366798231e-04 Event 66 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -532,7 +532,7 @@ Event 66 Batch 0 2 5.472978185025795e+02 4.857452785131266e+02 -2.223654169683454e+02 -1.189119332799752e+02 3 3.203062148499983e+02 1.169702135976477e+02 2.922172461416276e+02 -5.935588816501102e+01 4 6.323959666474225e+02 -6.027154921107744e+02 -6.985182917328234e+01 1.782678214449862e+02 - ME 1.346850069104626e-04 + ME 2.913920636000223e-05 Event 67 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -540,7 +540,7 @@ Event 67 Batch 0 2 4.264671493042950e+02 1.195959046886511e+02 -2.647539231733031e+02 3.122121220929446e+02 3 5.059969655247565e+02 3.777175441887567e+02 -7.608313561896731e+00 -3.366073372596325e+02 4 5.675358851709483e+02 -4.973134488774080e+02 2.723622367352000e+02 2.439521516668857e+01 - ME 9.763221977220593e-05 + ME 4.009347519102052e-05 Event 68 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -548,7 +548,7 @@ Event 68 Batch 0 2 5.996105691520872e+02 -3.814725562071957e+02 -3.417794545715573e+02 3.117664637712124e+02 3 2.164196744806214e+02 1.292759463548889e+02 -1.184749651041615e+02 1.268419798013013e+02 4 6.839697563672917e+02 2.521966098523068e+02 4.602544196757188e+02 -4.386084435725137e+02 - ME 2.936083529685707e-03 + ME 6.175473672610461e-04 Event 69 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -556,7 +556,7 @@ Event 69 Batch 0 2 4.950546755511076e+02 -1.873718558932053e+02 -4.578972175289678e+02 -1.735101101888631e+01 3 4.768584394819691e+02 -1.830244097668608e+02 2.985566003539791e+02 -3.236664843936508e+02 4 5.280868849669230e+02 3.703962656600661e+02 1.593406171749887e+02 3.410174954125370e+02 - ME 5.234212626720279e-05 + ME 1.367292435278724e-05 Event 70 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -564,7 +564,7 @@ Event 70 Batch 0 2 6.918343395272258e+02 6.895733556028865e+02 -5.391072441382606e+01 -1.473005040127906e+01 3 2.169590284692678e+02 -1.127375202028747e+02 1.807969800614662e+02 4.091361110301506e+01 4 5.912066320035063e+02 -5.768358354000119e+02 -1.268862556476402e+02 -2.618356070173603e+01 - ME 1.591740981760110e-04 + ME 3.526540789264872e-05 Event 71 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -572,7 +572,7 @@ Event 71 Batch 0 2 5.156371334918733e+02 1.547202099034306e+02 -4.807172487652236e+02 1.041836686949964e+02 3 3.718518305526428e+02 -8.969821893462726e+01 -7.521366892975188e+01 -3.529460545344468e+02 4 6.125110359554843e+02 -6.502199096880338e+01 5.559309176949756e+02 2.487623858394504e+02 - ME 1.125100552069616e-04 + ME 2.860782472746935e-05 Event 72 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -580,7 +580,7 @@ Event 72 Batch 0 2 2.110577464974889e+02 5.009520239746097e+01 -1.453533690489527e+02 -1.445968227848547e+02 3 7.317124633441161e+02 -4.429659627226336e+02 5.264774879404380e+02 2.490095170354977e+02 4 5.572297901583943e+02 3.928707603251725e+02 -3.811241188914850e+02 -1.044126942506430e+02 - ME 1.823320413479066e-04 + ME 2.666441446531882e-05 Event 73 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -588,7 +588,7 @@ Event 73 Batch 0 2 3.932257450488246e+02 3.105005764664288e+01 -2.932679039283983e+02 2.601082794045340e+02 3 5.658879124646472e+02 3.645905401293642e+02 4.244364556305355e+02 8.459646951004230e+01 4 5.408863424865281e+02 -3.956405977760074e+02 -1.311685517021372e+02 -3.447047489145762e+02 - ME 8.953763196089171e-04 + ME 7.825486685913998e-05 Event 74 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -596,7 +596,7 @@ Event 74 Batch 0 2 1.374854102925440e+02 7.785209805930555e+01 4.289805712042688e+01 1.048858692406466e+02 3 6.381281910764947e+02 -1.004137270491618e+02 -1.591026937267357e+02 6.097630724433484e+02 4 7.243863986309617e+02 2.256162898985645e+01 1.162046366063089e+02 -7.146489416839951e+02 - ME 1.395531292378326e+01 + ME 1.919068868336380e+00 Event 75 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -604,7 +604,7 @@ Event 75 Batch 0 2 5.936883054156938e+02 -3.438525101293572e+00 -2.706855443967301e+02 5.283780053968293e+02 3 5.912298912592892e+02 1.109657062166288e+02 4.832067437414102e+02 -3.221034603433170e+02 4 3.150818033250173e+02 -1.075271811153352e+02 -2.125211993446803e+02 -2.062745450535123e+02 - ME 1.379908325625592e-03 + ME 1.642862842910461e-04 Event 76 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -612,7 +612,7 @@ Event 76 Batch 0 2 6.619486867997672e+02 2.801967015359571e+01 2.136411519593737e+02 6.258980909300584e+02 3 1.201252731414031e+02 2.274423842261747e+01 -8.754996679960182e+01 7.904292618103446e+01 4 7.179260400588295e+02 -5.076390857621322e+01 -1.260911851597719e+02 -7.049410171110928e+02 - ME 5.870483941147637e+00 + ME 7.362202483972824e-01 Event 77 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -620,7 +620,7 @@ Event 77 Batch 0 2 7.456676259451606e+02 -7.346624001550109e+02 6.511229493320701e+01 -1.097804865615983e+02 3 1.284204120828029e+02 1.251494694834492e+02 2.867183268690428e+01 2.708973588335753e+00 4 6.259119619720373e+02 6.095129306715618e+02 -9.378412762011118e+01 1.070715129732624e+02 - ME 1.662775178233579e-04 + ME 4.400761364703354e-05 Event 78 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -628,7 +628,7 @@ Event 78 Batch 0 2 7.040158920877628e+02 6.911264613612161e+02 -6.659640240533211e+01 -1.163937709034254e+02 3 5.185438503615327e+02 -4.976050220224222e+02 -1.270913363611937e+02 7.158742227342900e+01 4 2.774402575507044e+02 -1.935214393387939e+02 1.936877387665258e+02 4.480634862999637e+01 - ME 5.328004946641866e-05 + ME 9.352750539306009e-06 Event 79 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -636,7 +636,7 @@ Event 79 Batch 0 2 6.777589592768838e+02 1.742725197144059e+02 -4.776543849198212e+01 6.532264221831092e+02 3 5.725002211294488e+02 -1.786302554544233e+02 -1.627852110918317e+02 -5.189881598643107e+02 4 2.497408195936665e+02 4.357735740017474e+00 2.105506495838138e+02 -1.342382623187985e+02 - ME 9.179311580246363e-04 + ME 3.598558866345749e-04 Event 80 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -644,7 +644,7 @@ Event 80 Batch 0 2 6.240819586861880e+02 4.679310297228965e+02 -4.118464023828053e+02 -3.002304821964348e+01 3 6.688675489057649e+02 -5.494372353172420e+02 3.251429131208653e+02 1.994607943266771e+02 4 2.070504924080468e+02 8.150620559434545e+01 8.670348926194001e+01 -1.694377461070337e+02 - ME 3.575286400583300e-03 + ME 5.382869847396148e-05 Event 81 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -652,7 +652,7 @@ Event 81 Batch 0 2 5.198056748722776e+02 1.034797897616987e+02 -2.885605608993972e+02 4.197888462474007e+02 3 5.672098642055398e+02 -4.160331805498524e+02 2.087659545613757e+01 -3.849773895903518e+02 4 4.129844609221831e+02 3.125533907881537e+02 2.676839654432596e+02 -3.481145665704891e+01 - ME 1.018936778946332e-04 + ME 3.612255741613163e-05 Event 82 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -660,7 +660,7 @@ Event 82 Batch 0 2 2.057598609140514e+02 6.385349666266659e+01 -2.765433460911293e+01 1.936364870179372e+02 3 6.235840147705873e+02 4.654039114453895e+02 -3.828889383639962e+02 -1.601633028106901e+02 4 6.706561243153629e+02 -5.292574081080552e+02 4.105432729731107e+02 -3.347318420724690e+01 - ME 6.930850923220120e-04 + ME 3.172622561805068e-04 Event 83 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -668,7 +668,7 @@ Event 83 Batch 0 2 6.583322583736492e+02 1.865539504254553e+02 -1.926584839569474e+02 6.012334775737429e+02 3 3.620902826842561e+02 -3.107067244571256e+02 -1.177956631152976e+01 -1.855584705935048e+02 4 4.795774589420946e+02 1.241527740316703e+02 2.044380502684771e+02 -4.156750069802382e+02 - ME 8.385116111585099e-03 + ME 6.756528802944365e-04 Event 84 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -676,7 +676,7 @@ Event 84 Batch 0 2 4.849329564663161e+02 -2.622178945286150e+02 4.068620488841210e+02 -2.941124332559817e+01 3 4.737588937677760e+02 6.014532316188546e+01 -1.333934272225749e+02 4.505954095412368e+02 4 5.413081497659077e+02 2.020725713667296e+02 -2.734686216615461e+02 -4.211841662156386e+02 - ME 5.162990427398554e-03 + ME 1.017468409980153e-03 Event 85 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -684,7 +684,7 @@ Event 85 Batch 0 2 7.085742632080854e+02 -2.174614026040270e+02 -5.283468657604088e+02 -4.190914152061853e+02 3 5.315764222715953e+02 8.528530557199829e+00 3.820092234108129e+02 3.695533927738615e+02 4 2.598493145203187e+02 2.089328720468272e+02 1.463376423495959e+02 4.953802243232388e+01 - ME 6.335517668355978e-05 + ME 1.894143727100354e-05 Event 86 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -692,7 +692,7 @@ Event 86 Batch 0 2 1.724500140939190e+02 1.231518677708316e+02 -1.121928207497684e+01 1.201946443701656e+02 3 7.028475062724231e+02 -6.467096040851287e+01 -4.553168759141600e+02 -5.315061866629339e+02 4 6.247024796336580e+02 -5.848090736231883e+01 4.665361579891369e+02 4.113115422927684e+02 - ME 1.165531323127631e-04 + ME 5.311384036847167e-05 Event 87 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -700,7 +700,7 @@ Event 87 Batch 0 2 1.942099203196796e+02 -7.751148196958454e+01 -1.356691819650310e+02 -1.153400900745028e+02 3 7.314670447251594e+02 1.724617634710876e+02 7.020747158546045e+02 1.113196793791551e+02 4 5.743230349551606e+02 -9.495028150150301e+01 -5.664055338895735e+02 4.020410695347637e+00 - ME 1.237609879052555e-04 + ME 1.874087134673149e-05 Event 88 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -708,7 +708,7 @@ Event 88 Batch 0 2 6.382497024023744e+02 2.632142028760094e+02 -5.613974181649784e+02 1.513733956108635e+02 3 3.997044228265544e+02 -5.264940326118349e+01 3.435187961344461e+02 1.974500004195773e+02 4 4.620458747710724e+02 -2.105647996148253e+02 2.178786220305324e+02 -3.488233960304407e+02 - ME 1.863821317258467e-03 + ME 9.699609186666195e-05 Event 89 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -716,7 +716,7 @@ Event 89 Batch 0 2 1.419006640093282e+02 -8.677155154367878e+01 6.457545216231642e+01 -9.185046144153740e+01 3 7.131224514048055e+02 5.460003286026870e+02 -4.154556538506974e+02 -1.944836022569670e+02 4 6.449768845858670e+02 -4.592287770590082e+02 3.508802016883808e+02 2.863340636985044e+02 - ME 1.136115495374629e-04 + ME 2.974199953519439e-05 Event 90 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -724,7 +724,7 @@ Event 90 Batch 0 2 5.730615760623938e+02 -6.017783679015001e+01 -5.202921970507185e+02 -2.325386583054727e+02 3 5.389913703864468e+02 -6.302812531165206e+01 2.446311215742109e+02 4.761247390423042e+02 4 3.879470535511588e+02 1.232059621018019e+02 2.756610754765076e+02 -2.435860807368315e+02 - ME 1.094721025518881e-03 + ME 1.667772733247344e-04 Event 91 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -732,7 +732,7 @@ Event 91 Batch 0 2 4.546745139784350e+02 -1.470341619195494e+02 -1.726383255301703e+02 -3.940886669878754e+02 3 5.110976540119647e+02 -2.482119727393537e+02 -1.865817698532448e+02 4.059542728975803e+02 4 5.342278320096005e+02 3.952461346589030e+02 3.592200953834151e+02 -1.186560590970480e+01 - ME 8.789722587847313e-05 + ME 4.420313882846059e-05 Event 92 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -740,7 +740,7 @@ Event 92 Batch 0 2 6.683728375977241e+02 -1.148152650923627e+02 3.458291789782991e+02 5.603051703379153e+02 3 2.872567998557088e+02 1.635098024620329e+02 7.847331657016402e+01 -2.227620976482501e+02 4 5.443703625465666e+02 -4.869453736967034e+01 -4.243024955484631e+02 -3.375430726896653e+02 - ME 8.270083568815311e-04 + ME 2.265252332392545e-04 Event 93 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -748,7 +748,7 @@ Event 93 Batch 0 2 5.666948073002088e+02 5.408074886689032e+01 5.639942928586390e+02 -1.134525653745258e+01 3 6.168025492529713e+02 2.439040545997395e+02 -5.541969602989467e+02 1.175666879272316e+02 4 3.165026434468199e+02 -2.979848034666298e+02 -9.797332559692304e+00 -1.062214313897791e+02 - ME 1.664960428447917e-04 + ME 1.251778043268437e-05 Event 94 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -756,7 +756,7 @@ Event 94 Batch 0 2 4.964349376711385e+02 8.445930034540567e+01 -2.409007074648561e+02 -4.257712097695705e+02 3 5.660980232871289e+02 1.373833465612049e+02 5.210669225216058e+02 1.734417778711397e+02 4 4.374670390417324e+02 -2.218426469066104e+02 -2.801662150567495e+02 2.523294318984307e+02 - ME 3.431641292834382e-05 + ME 1.007141026120618e-05 Event 95 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -764,7 +764,7 @@ Event 95 Batch 0 2 7.117074025057361e+02 -3.227984571262278e+02 4.276971164854593e+02 -4.684055501468919e+02 3 1.264078228725325e+02 8.675876182178401e+01 5.074873328843479e+01 7.665781760618943e+01 4 6.618847746217315e+02 2.360396953044439e+02 -4.784458497738940e+02 3.917477325407025e+02 - ME 2.121249861094822e-04 + ME 8.653822330208906e-05 Event 96 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -772,7 +772,7 @@ Event 96 Batch 0 2 7.329769441659936e+02 -9.642859092211874e+01 6.903981466332597e+02 -2.265107649915406e+02 3 3.937873938465678e+02 -4.837693103302091e+01 -3.847118583018795e+02 6.873841850241256e+01 4 3.732356619874385e+02 1.448055219551397e+02 -3.056862883313802e+02 1.577723464891279e+02 - ME 3.473186069800973e-05 + ME 9.822975749896163e-06 Event 97 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -780,7 +780,7 @@ Event 97 Batch 0 2 3.394989963266853e+01 6.003767577498499e+00 -2.078495220615399e+01 2.616364312804199e+01 3 7.377311980366451e+02 -5.308290258162607e+02 4.681853362634530e+02 2.080152802450354e+02 4 7.283189023306861e+02 5.248252582387622e+02 -4.474003840572991e+02 -2.341789233730774e+02 - ME 2.063600678642283e-02 + ME 2.729355315721549e-03 Event 98 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -788,7 +788,7 @@ Event 98 Batch 0 2 2.496912687496082e+02 -2.485814905959506e+02 -5.435228288348340e-01 -2.350907922099247e+01 3 7.458289852530976e+02 7.373315781279124e+02 9.801365830907572e+01 -5.473885205171283e+01 4 5.044797459972945e+02 -4.887500875319618e+02 -9.747013548024091e+01 7.824793127270530e+01 - ME 6.800308216903296e-05 + ME 8.091578731489026e-06 Event 99 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -796,7 +796,7 @@ Event 99 Batch 0 2 1.698125854886770e+02 8.336002034290719e+01 8.774494220182726e+01 -1.191144253093525e+02 3 6.496622934125946e+02 5.714329899004554e+02 -6.230613627727958e+01 3.027265745152471e+02 4 6.805251210987285e+02 -6.547930102433627e+02 -2.543880592454771e+01 -1.836121492058947e+02 - ME 6.115029137493471e-04 + ME 1.856310681395454e-04 Event 100 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -804,7 +804,7 @@ Event 100 Batch 0 2 6.141460480129781e+02 -5.842473718080511e+02 -5.092222124447417e+01 1.823110095657221e+02 3 3.909476383151783e+02 2.539115798088024e+02 -2.930333502072385e+02 -5.000421191795168e+01 4 4.949063136718440e+02 3.303357919992488e+02 3.439555714517127e+02 -1.323067976477707e+02 - ME 1.550407956048336e-04 + ME 2.380755205932631e-05 Event 101 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -812,7 +812,7 @@ Event 101 Batch 0 2 7.469346538870473e+02 3.524232024688497e+02 -1.488240016505349e+02 -6.415299525912136e+02 3 6.502268999047169e+02 -2.777200960400715e+02 1.351761574712158e+02 5.721835160737410e+02 4 1.028384462082358e+02 -7.470310642877820e+01 1.364784417931910e+01 6.934643651747267e+01 - ME 1.080054053054822e-04 + ME 7.777208667430486e-05 Event 102 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -820,7 +820,7 @@ Event 102 Batch 0 2 7.426790432885583e+02 -3.141071077544728e+02 6.615000409077074e+02 1.238005738162371e+02 3 6.735764515788642e+01 -4.139700837311957e+00 -5.533298776898177e+01 -3.818606686673834e+01 4 6.899633115535552e+02 3.182468085917849e+02 -6.061670531387255e+02 -8.561450694949879e+01 - ME 6.292262541994918e-04 + ME 1.796768498680773e-04 Event 103 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -828,7 +828,7 @@ Event 103 Batch 0 2 4.837874798175253e+02 -2.731724972668680e+02 1.247027290420595e+02 -3.793103501549069e+02 3 4.466406321977809e+02 -2.904538080082218e+02 -1.536665846758871e+02 3.025078850172422e+02 4 5.695718879846930e+02 5.636263052750895e+02 2.896385563382777e+01 7.680246513766473e+01 - ME 8.140894767450013e-05 + ME 2.998858312831636e-05 Event 104 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -836,7 +836,7 @@ Event 104 Batch 0 2 5.788466572679498e+02 3.572346730226224e+02 -3.682137844992378e+02 2.680773207965347e+02 3 2.925711988065158e+02 2.155069407513812e+02 1.697995838195863e+02 -1.016010147279926e+02 4 6.285821439255348e+02 -5.727416137740034e+02 1.984142006796517e+02 -1.664763060685422e+02 - ME 2.849770726480251e-04 + ME 7.634200862908681e-05 Event 105 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -844,7 +844,7 @@ Event 105 Batch 0 2 3.361125455083114e+02 2.619004058447622e+02 4.338373361330959e+01 -2.061496357605196e+02 3 5.299016201311088e+02 2.892532450564946e+02 2.091058919093095e+02 3.916669672191841e+02 4 6.339858343605800e+02 -5.511536509012568e+02 -2.524896255226191e+02 -1.855173314586645e+02 - ME 2.866662317167052e-04 + ME 1.089382545947932e-04 Event 106 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -852,7 +852,7 @@ Event 106 Batch 0 2 3.578050478863485e+02 -2.265838270225943e+02 2.740910124726658e+02 -3.947579646386072e+01 3 5.202885196186892e+02 1.412729374205232e+02 1.631578432376887e+02 4.734148487210871e+02 4 6.219064324949621e+02 8.531088960207101e+01 -4.372488557103545e+02 -4.339390522572265e+02 - ME 1.912263829178338e-03 + ME 4.548955126640399e-04 Event 107 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -860,7 +860,7 @@ Event 107 Batch 0 2 5.409822745993889e+02 9.278463733038997e+01 5.102180459532771e+02 -1.540466750365499e+02 3 2.501852297905710e+02 1.682301834486207e+02 1.474652503315489e+02 1.120056004263085e+02 4 7.088324956100398e+02 -2.610148207790107e+02 -6.576832962848259e+02 4.204107461024153e+01 - ME 7.096163321035572e-04 + ME 2.159102073406285e-04 Event 108 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -868,7 +868,7 @@ Event 108 Batch 0 2 6.835202199428555e+02 6.670011709444186e+02 6.653656309718588e+01 1.337243986739828e+02 3 2.377887385005082e+02 -1.098327419601477e+02 7.667443498831059e+01 -1.964720946353502e+02 4 5.786910415566365e+02 -5.571684289842709e+02 -1.432109980854965e+02 6.274769596136723e+01 - ME 1.143500637563713e-04 + ME 2.960130886583330e-05 Event 109 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -876,7 +876,7 @@ Event 109 Batch 0 2 5.978180281189351e+02 4.291222314737005e+02 2.249703559956599e+02 3.501840146583366e+02 3 3.585061336071061e+02 -3.227227650115256e+02 1.541688059097761e+02 2.467071262824850e+01 4 5.436758382739589e+02 -1.063994664621746e+02 -3.791391619054360e+02 -3.748547272865851e+02 - ME 1.159187207430584e-03 + ME 1.100286424576873e-04 Event 110 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -884,7 +884,7 @@ Event 110 Batch 0 2 7.073952645543156e+01 -4.753982451958468e+01 4.872856968801237e+01 -1.922426029646691e+01 3 7.438039776014969e+02 1.707202332282495e+02 -7.225114374584515e+02 4.556513803361385e+01 4 6.854564959430718e+02 -1.231804087086648e+02 6.737828677704391e+02 -2.634087773714689e+01 - ME 5.177444310012934e-04 + ME 1.052942530962122e-04 Event 111 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -892,7 +892,7 @@ Event 111 Batch 0 2 5.206822291802364e+02 -3.873336848644893e+02 2.415505427333673e+02 -2.504714268307115e+02 3 5.478000561519707e+02 4.687653961676166e+02 -2.245690260344170e+02 -1.729527606656598e+02 4 4.315177146677929e+02 -8.143171130312743e+01 -1.698151669895031e+01 4.234241874963712e+02 - ME 1.041517236520828e-04 + ME 8.545692640795734e-05 Event 112 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -900,7 +900,7 @@ Event 112 Batch 0 2 3.610471238372959e+02 2.563298943277285e+02 9.635756626046441e+01 -2.352981732387216e+02 3 6.139063356201009e+02 1.031778254919422e+02 -4.257030126280926e+02 4.301305270271111e+02 4 5.250465405426031e+02 -3.595077198196707e+02 3.293454463676283e+02 -1.948323537883896e+02 - ME 2.333567140730066e-04 + ME 5.572029836371622e-05 Event 113 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -908,7 +908,7 @@ Event 113 Batch 0 2 5.886653054136124e+02 3.035646198144377e+02 3.278619896967805e+02 -3.832517176826292e+02 3 5.420023902452333e+02 -3.658357535838290e+02 -3.990519958595696e+02 2.623541560166928e+01 4 3.693323043411537e+02 6.227113376939163e+01 7.119000616278893e+01 3.570163020809600e+02 - ME 6.906402420910258e-05 + ME 4.986188449478774e-05 Event 114 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -916,7 +916,7 @@ Event 114 Batch 0 2 5.165204340356855e+02 2.346362244736889e+01 6.298471388966840e+00 5.159487827839334e+02 3 5.932916594323345e+02 3.608814360715946e+02 -5.336137507463695e+01 -4.678804824963537e+02 4 3.901879065319798e+02 -3.843450585189634e+02 4.706290368567026e+01 -4.806830028757967e+01 - ME 5.363382776736297e-04 + ME 4.029549711869195e-04 Event 115 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -924,7 +924,7 @@ Event 115 Batch 0 2 5.432307281524777e+02 2.250327918244370e+02 4.870559856477670e+02 -8.506664127290338e+01 3 4.265243530840496e+02 2.057819224248363e+02 -2.472237669715339e+02 2.801021835354204e+02 4 5.302449187634726e+02 -4.308147142492733e+02 -2.398322186762331e+02 -1.950355422625171e+02 - ME 2.364149932043149e-04 + ME 4.159321993514108e-05 Event 116 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -932,7 +932,7 @@ Event 116 Batch 0 2 4.402635748890415e+02 -4.240500842615081e+02 -5.733358735035193e+01 -1.035683405941509e+02 3 4.399967684638562e+02 1.183617589007452e+02 -1.041572505293867e+02 -4.107784286579766e+02 4 6.197396566471035e+02 3.056883253607625e+02 1.614908378797388e+02 5.143467692521278e+02 - ME 1.343295643586522e-04 + ME 4.172733678506819e-05 Event 117 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -940,7 +940,7 @@ Event 117 Batch 0 2 3.074085311587982e+02 -4.270248480828711e+01 -3.034838508096459e+02 2.395944736750828e+01 3 5.360984061023379e+02 3.510554986169303e+02 -1.596589010508530e+02 -3.723849798683070e+02 4 6.564930627388640e+02 -3.083530138086433e+02 4.631427518604987e+02 3.484255325007987e+02 - ME 1.795895763168496e-04 + ME 4.142391000026985e-05 Event 118 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -948,7 +948,7 @@ Event 118 Batch 0 2 5.403602961735903e+02 4.471526113902045e+02 -1.804334130868151e+02 -2.439007487679592e+02 3 5.654623567965698e+02 -5.534570111367966e+02 -1.157195831079003e+02 6.480112868522320e+00 4 3.941773470298406e+02 1.063043997465919e+02 2.961529961947150e+02 2.374206358994370e+02 - ME 3.055618730902428e-05 + ME 7.288650603673961e-06 Event 119 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -956,7 +956,7 @@ Event 119 Batch 0 2 8.009099446659010e+01 5.775399043490319e+01 -2.629604726664823e+01 4.886268393818209e+01 3 7.131140611332349e+02 2.472685400460709e+02 -2.870014097539109e+02 -6.041689532644716e+02 4 7.067949444001758e+02 -3.050225304809738e+02 3.132974570205592e+02 5.553062693262896e+02 - ME 6.861262467765907e-04 + ME 2.815424392761942e-04 Event 120 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -964,7 +964,7 @@ Event 120 Batch 0 2 5.007248873753321e+02 2.708997263130530e+02 -3.880896283797751e+02 1.634784128397387e+02 3 7.413897277398672e+02 -4.257033276374029e+02 5.921425482134987e+02 -1.334264135464211e+02 4 2.578853848848011e+02 1.548036013243502e+02 -2.040529198337238e+02 -3.005199929331748e+01 - ME 1.034513276694145e-04 + ME 6.003662532288496e-06 Event 121 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -972,7 +972,7 @@ Event 121 Batch 0 2 5.732265116821120e+02 -1.149395375629033e+02 4.260916136383032e+02 3.658189076403451e+02 3 4.323948798659248e+02 -2.148488009071912e+01 -4.178027098651986e+02 1.092914804138530e+02 4 4.943786084519640e+02 1.364244176536226e+02 -8.288903773105691e+00 -4.751103880541979e+02 - ME 8.074833733477824e-02 + ME 7.661241871407340e-04 Event 122 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -980,7 +980,7 @@ Event 122 Batch 0 2 3.423360304412701e+02 2.648046119434483e+02 2.369247279710451e+01 -2.156644197927059e+02 3 6.059487982275789e+02 2.457729689670163e+01 -4.569077875801422e+02 3.972469964635579e+02 4 5.517151713311508e+02 -2.893819088401499e+02 4.332153147830377e+02 -1.815825766708520e+02 - ME 2.180123533398812e-04 + ME 5.274300345459390e-05 Event 123 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -988,7 +988,7 @@ Event 123 Batch 0 2 1.430133297276668e+02 -4.205671322284506e+01 3.498095937953869e+01 1.321377229770999e+02 3 7.140350670908600e+02 -2.955397919833849e+01 -6.570980288365154e+02 -2.778395577453968e+02 4 6.429516031814733e+02 7.161069242118367e+01 6.221170694569771e+02 1.457018347682969e+02 - ME 5.626335206455025e-04 + ME 2.698780233597045e-04 Event 124 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -996,7 +996,7 @@ Event 124 Batch 0 2 6.053457283343441e+02 5.458657819531910e+02 -1.853964251366731e+01 -2.610177782464909e+02 3 7.499633671623128e+02 -6.784114238502394e+02 2.145325921506613e+01 3.189713933003628e+02 4 1.446909045033435e+02 1.325456418970486e+02 -2.913616701398675e+00 -5.795361505387172e+01 - ME 4.169465060943616e-04 + ME 2.629538535113942e-05 Event 125 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1004,7 +1004,7 @@ Event 125 Batch 0 2 6.695439244882118e+02 9.058534244088493e+01 6.586171675820721e+02 7.941529525294386e+01 3 9.341516463500346e+01 3.490868167113007e+01 5.232133368429144e+01 6.906703243419068e+01 4 7.370409108767834e+02 -1.254940241120154e+02 -7.109385012663632e+02 -1.484823276871337e+02 - ME 1.111472366347957e-02 + ME 4.436636984625360e-03 Event 126 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1012,7 +1012,7 @@ Event 126 Batch 0 2 6.465564354211967e+02 -2.094351601488127e+02 -1.930091683601272e+02 -5.804477571728034e+02 3 1.356182567235447e+02 -2.832094442380729e+01 9.735247446175231e+01 -9.007070211700794e+01 4 7.178253078552584e+02 2.377561045726200e+02 9.565669389837488e+01 6.705184592898115e+02 - ME 1.775660879411100e-03 + ME 1.230970446288030e-03 Event 127 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1020,7 +1020,7 @@ Event 127 Batch 0 2 4.508388003927651e+02 -3.846405138087858e+02 7.756355374444065e+01 2.220162025777267e+02 3 6.162879941073576e+02 2.174727303224461e+02 1.334711143222092e+02 -5.609830344035003e+02 4 4.328732054998774e+02 1.671677834863399e+02 -2.110346680666500e+02 3.389668318257735e+02 - ME 3.922171581774212e-05 + ME 2.127227557837123e-05 Event 128 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1028,7 +1028,7 @@ Event 128 Batch 0 2 7.468963146802857e+02 5.701805835528932e+02 -3.440982003215339e+02 -3.381488363986430e+02 3 1.196664332518719e+02 -9.337643239636876e+01 2.398139841985228e+01 7.089280393650260e+01 4 6.334372520678420e+02 -4.768041511565244e+02 3.201168019016817e+02 2.672560324621404e+02 - ME 2.053620454072734e-04 + ME 7.842790653965437e-05 Event 129 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1036,7 +1036,7 @@ Event 129 Batch 0 2 4.378966182438207e+02 -4.256397208622688e+02 4.624364030548149e+01 9.190104474357973e+01 3 7.127537996732577e+02 5.790589826349546e+02 -1.369827771626340e+02 -3.923574802896586e+02 4 3.493495820829217e+02 -1.534192617726859e+02 9.073913685715252e+01 3.004564355460789e+02 - ME 1.668072874757384e-05 + ME 1.046217618618756e-05 Event 130 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1044,7 +1044,7 @@ Event 130 Batch 0 2 6.322026526626455e+02 5.905875735566585e+02 -2.387291116192753e+01 -2.243136110600485e+02 3 5.268087771404591e+02 -3.287250458747471e+02 1.913681034684307e+02 3.644798771698754e+02 4 3.409885701968954e+02 -2.618625276819114e+02 -1.674951923065032e+02 -1.401662661098267e+02 - ME 2.766647151388132e-04 + ME 3.412796728096272e-05 Event 131 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1052,7 +1052,7 @@ Event 131 Batch 0 2 2.691964685177017e+02 -2.641651354044939e+02 4.065264362900757e+01 -3.210735842607325e+01 3 5.382709487855662e+02 -3.022535437819008e+02 -4.307865739991411e+02 1.131429946566680e+02 4 6.925325826967319e+02 5.664186791863947e+02 3.901339303701337e+02 -8.103563623059465e+01 - ME 5.354423766199649e-04 + ME 1.516502654737588e-04 Event 132 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1060,7 +1060,7 @@ Event 132 Batch 0 2 1.376388194981169e+02 -2.491804956023667e+01 3.114513197621116e+01 1.317327453336230e+02 3 7.332494677489981e+02 -3.054807357444667e+02 -6.882601889638243e+00 -6.665500220046781e+02 4 6.291117127528858e+02 3.303987853047034e+02 -2.426253008657308e+01 5.348172766710551e+02 - ME 3.625143788027957e-04 + ME 2.459616839911958e-04 Event 133 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1068,7 +1068,7 @@ Event 133 Batch 0 2 5.818916885738672e+02 -3.437736592641007e+02 -2.113522447259726e+02 -4.192228966514222e+02 3 7.075583625851592e+02 3.695171106849944e+02 9.875952986414086e+01 5.952667441040354e+02 4 2.105499488409736e+02 -2.574345142089370e+01 1.125927148618317e+02 -1.760438474526132e+02 - ME 6.644965721204062e-03 + ME 3.278402967978973e-04 Event 134 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1076,7 +1076,7 @@ Event 134 Batch 0 2 7.039051474789593e+02 -1.767404282002263e+02 5.832845063404937e+02 3.521710697233707e+02 3 6.740856043500099e+02 9.540039380435479e+01 -5.203258634262522e+02 -4.177932056695244e+02 4 1.220092481710302e+02 8.134003439587134e+01 -6.295864291424151e+01 6.562213594615410e+01 - ME 6.394436352069354e-05 + ME 3.621089826286842e-05 Event 135 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1084,7 +1084,7 @@ Event 135 Batch 0 2 7.491379873081086e+02 -6.603965492909807e+02 -9.243924572685610e+01 -3.413782470545817e+02 3 4.360367703469753e+02 3.763875731093294e+02 3.833030381995060e+01 2.167746473012021e+02 4 3.148252423449159e+02 2.840089761816513e+02 5.410894190690560e+01 1.246035997533796e+02 - ME 3.729096801849378e-05 + ME 1.170602675185252e-05 Event 136 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1092,7 +1092,7 @@ Event 136 Batch 0 2 6.907976432034611e+02 -8.965778913807024e+01 -5.375684903631193e+02 -4.244796613161184e+02 3 4.317447428217263e+02 2.541758793770707e+02 2.501815833403360e+02 2.433255445990286e+02 4 3.774576139748129e+02 -1.645180902390004e+02 2.873869070227833e+02 1.811541167170898e+02 - ME 3.295715598818487e-05 + ME 1.221598515374744e-05 Event 137 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1100,7 +1100,7 @@ Event 137 Batch 0 2 5.927917878715718e+02 -5.453882061843875e+02 -2.239274061847312e+02 6.172783069514800e+01 3 3.718333194205911e+02 2.859809174201715e+02 -2.363544177495510e+02 2.472896101988843e+01 4 5.353748927078371e+02 2.594072887642160e+02 4.602818239342820e+02 -8.645679171503701e+01 - ME 1.267334233155001e-04 + ME 2.222722395048600e-05 Event 138 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1108,7 +1108,7 @@ Event 138 Batch 0 2 1.164849493482387e+02 2.012854405109472e+01 -2.573298799707043e+01 -1.118096528381494e+02 3 7.481698498358139e+02 -1.044692284663333e+02 -4.003634472873074e+00 7.408294509656059e+02 4 6.353452008159477e+02 8.434068441523856e+01 2.973662246994375e+01 -6.290197981274564e+02 - ME 3.545594402685597e+00 + ME 1.183014588836486e-01 Event 139 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1116,7 +1116,7 @@ Event 139 Batch 0 2 3.415587822283577e+02 -2.468214832259765e+02 1.926082427237748e+02 1.365416492148350e+02 3 5.828887331044928e+02 -1.023403009989268e+02 -5.561813319045077e+02 1.412376154306548e+02 4 5.755524846671491e+02 3.491617842249035e+02 3.635730891807333e+02 -2.777792646454897e+02 - ME 4.142320485322521e-04 + ME 5.213154494000113e-05 Event 140 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1124,7 +1124,7 @@ Event 140 Batch 0 2 4.395392082109443e+02 -3.037880820376849e+02 -2.455930383243060e+02 -2.014735126343029e+02 3 4.709796125547878e+02 -2.826270024952004e+02 2.984919122515593e+02 2.298833426397907e+02 4 5.894811792342680e+02 5.864150845328855e+02 -5.289887392725340e+01 -2.840983000548780e+01 - ME 1.220048440917972e-04 + ME 2.990357782498624e-05 Event 141 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1132,7 +1132,7 @@ Event 141 Batch 0 2 3.025838986653694e+02 -2.680006525137058e+02 -6.218827689980458e+01 -1.259574698062632e+02 3 5.104624598690772e+02 -2.829910827131053e+02 4.173533268753467e+02 -7.939880721102661e+01 4 6.869536414655528e+02 5.509917352268112e+02 -3.551650499755422e+02 2.053562770172896e+02 - ME 3.735313583347012e-04 + ME 7.151804808113674e-05 Event 142 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1140,7 +1140,7 @@ Event 142 Batch 0 2 4.390011511178412e+02 -3.153925512561953e+02 3.992377088505197e+01 -3.027468279160259e+02 3 4.597282536099518e+02 2.984856708041211e+02 -2.221794712617382e+02 -2.699863960308454e+02 4 6.012705952722066e+02 1.690688045207421e+01 1.822557003766862e+02 5.727332239468712e+02 - ME 1.630913878361870e-04 + ME 8.945447985744934e-05 Event 143 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1148,7 +1148,7 @@ Event 143 Batch 0 2 7.103308443495001e+02 -3.626595603160224e+02 2.462759922459802e+02 5.589240443825270e+02 3 3.424564807343295e+02 4.507572778536915e+01 -2.357842367637252e+02 -2.442343416788665e+02 4 4.472126749161695e+02 3.175838325306533e+02 -1.049175548225529e+01 -3.146897027036604e+02 - ME 1.304325296055160e-03 + ME 1.789392510542836e-04 Event 144 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1156,7 +1156,7 @@ Event 144 Batch 0 2 6.893886390440568e+02 -2.470805413393656e+02 1.331686162420120e+02 6.296618309717105e+02 3 7.132719020730987e+02 2.482972988978650e+02 -2.304803220538649e+02 -6.276815106349294e+02 4 9.733945888284487e+01 -1.216757558499225e+00 9.731170581185302e+01 -1.980320336781234e+00 - ME 3.769348793094523e-04 + ME 1.486904409371019e-04 Event 145 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1164,7 +1164,7 @@ Event 145 Batch 0 2 3.784954309743686e+02 2.391836032855264e+02 1.115572896135236e+01 -2.931305935912622e+02 3 7.389406222827198e+02 -4.231861417520660e+02 1.513250860114713e+02 5.865555822189353e+02 4 3.825639467429113e+02 1.840025384665394e+02 -1.624808149728234e+02 -2.934249886276727e+02 - ME 2.193982780219728e-03 + ME 2.016505354100400e-04 Event 146 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1172,7 +1172,7 @@ Event 146 Batch 0 2 4.681255842987410e+02 -3.253195724522379e+01 1.754808059398437e+02 -4.327698247100133e+02 3 2.875849079819393e+02 2.091841587061404e+01 1.879781824316579e+02 -2.166372592748876e+02 4 7.442895077193195e+02 1.161354137460973e+01 -3.634589883715017e+02 6.494070839849006e+02 - ME 5.347932692815789e-02 + ME 1.210467216316050e-02 Event 147 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1180,7 +1180,7 @@ Event 147 Batch 0 2 2.442136391928777e+02 -1.784444843977844e+02 -1.666832492802189e+02 -3.816014311599316e+00 3 5.551361515401285e+02 1.378338123621512e+02 -5.199472642306259e+02 1.372327560591401e+02 4 7.006502092669938e+02 4.061067203563306e+01 6.866305135108448e+02 -1.334167417475408e+02 - ME 7.450632204513606e-04 + ME 2.360352365747709e-04 Event 148 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1188,7 +1188,7 @@ Event 148 Batch 0 2 4.547263863263726e+02 3.928375677411887e+02 5.145105706241225e+01 2.231759855356057e+02 3 7.397285466814292e+02 -5.611511356388266e+02 -1.533645573573770e+02 -4.569322031694095e+02 4 3.055450669921979e+02 1.683135678976379e+02 1.019135002949646e+02 2.337562176338038e+02 - ME 1.440225905683450e-05 + ME 6.307552439231181e-06 Event 149 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1196,7 +1196,7 @@ Event 149 Batch 0 2 2.343018799311635e+02 9.853424545130945e+01 1.924850318874441e+02 -9.021023174733594e+01 3 7.291173748950658e+02 3.429747374294529e+01 -5.990516617369192e+02 4.142136359886766e+02 4 5.365807451737705e+02 -1.328317191942547e+02 4.065666298494750e+02 -3.240034042413406e+02 - ME 8.405553848068603e-04 + ME 8.298171355094406e-05 Event 150 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1204,7 +1204,7 @@ Event 150 Batch 0 2 4.707648023587808e+02 -8.969278865174961e+01 -3.008719699078221e+02 3.507859183712497e+02 3 6.876639918976698e+02 3.906111988928598e+02 4.609284537794546e+02 -3.284046551871671e+02 4 3.415712057435500e+02 -3.009184102411105e+02 -1.600564838716325e+02 -2.238126318408256e+01 - ME 1.070125715137075e-04 + ME 1.887585788236135e-05 Event 151 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1212,7 +1212,7 @@ Event 151 Batch 0 2 6.503034458278056e+02 -1.575298496674962e+02 -3.658248853789647e+01 -6.298735108350154e+02 3 6.998690336552314e+02 1.302751858829802e+02 -1.019415103826456e+02 6.800389464387812e+02 4 1.498275205169629e+02 2.725466378451580e+01 1.385239989205421e+02 -5.016543560376590e+01 - ME 6.663776898009472e-04 + ME 4.060174493404880e-04 Event 152 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1220,7 +1220,7 @@ Event 152 Batch 0 2 7.401192382353395e+02 1.493701961830190e+02 6.288419447382046e+02 3.605867993093739e+02 3 7.332111095478891e+02 -1.230079111936445e+02 -6.287602831147091e+02 -3.565502647954901e+02 4 2.666965221677112e+01 -2.636228498937447e+01 -8.166162349550861e-02 -4.036534513883709e+00 - ME 8.446403371723604e-04 + ME 1.210964379505254e-04 Event 153 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1228,7 +1228,7 @@ Event 153 Batch 0 2 5.645797071775899e+02 7.941901905692946e+01 3.691428696980725e+02 -4.197337333594241e+02 3 6.079979027943974e+02 1.021455738177839e+02 -5.566920170809548e+02 2.220849604771994e+02 4 3.274223900280123e+02 -1.815645928747133e+02 1.875491473828823e+02 1.976487728822249e+02 - ME 2.846663840296023e-05 + ME 9.895323747190810e-06 Event 154 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1236,7 +1236,7 @@ Event 154 Batch 0 2 6.022174885419887e+02 -5.152457849782368e+02 -1.493252664732707e+02 -2.736597328082223e+02 3 3.617627670199851e+02 1.925398333816265e+02 -2.626238171638091e+02 1.575736108034646e+02 4 5.360197444380261e+02 3.227059515966102e+02 4.119490836370796e+02 1.160861220047577e+02 - ME 6.437319974597944e-05 + ME 1.660411512586943e-05 Event 155 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1244,7 +1244,7 @@ Event 155 Batch 0 2 6.202229507100907e+02 -2.107861924791831e+02 -3.212541876154504e+02 4.868690137883067e+02 3 2.943040328093193e+02 2.940980302320592e+02 1.073731199058907e+01 2.433613089266508e+00 4 5.854730164805898e+02 -8.331183775287627e+01 3.105168756248616e+02 -4.893026268775732e+02 - ME 5.904510654775639e-03 + ME 4.918845171174253e-04 Event 156 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1252,7 +1252,7 @@ Event 156 Batch 0 2 4.945486805149833e+02 4.540818864859257e+02 -1.431706201593249e+02 -1.337542944644701e+02 3 5.997303202813281e+02 -3.624214233270367e+02 -5.726286247273350e+01 4.743923835389624e+02 4 4.057209992036886e+02 -9.166046315888883e+01 2.004334826320584e+02 -3.406380890744924e+02 - ME 4.701306652347430e-03 + ME 1.986837824231628e-04 Event 157 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1260,7 +1260,7 @@ Event 157 Batch 0 2 4.617003083190191e+02 3.118400043328062e+02 3.404502064148864e+02 -4.079626411035589e+00 3 5.720097526413113e+02 -4.999240316044806e+01 -4.329264075474301e+02 -3.705005295422582e+02 4 4.662899390396696e+02 -2.618476011723578e+02 9.247620113254365e+01 3.745801559532937e+02 - ME 3.907978340087068e-05 + ME 1.403598809900552e-05 Event 158 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1268,7 +1268,7 @@ Event 158 Batch 0 2 6.784877363061535e+02 -5.707102180762959e+02 -3.102223423027389e+02 -1.959529373021938e+02 3 5.650909444059712e+02 5.525284805868615e+02 7.765167789879932e+01 8.950011457818250e+01 4 2.564213192878751e+02 1.818173748943443e+01 2.325706644039396e+02 1.064528227240114e+02 - ME 3.503179830087694e-05 + ME 8.470133063482862e-06 Event 159 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1276,7 +1276,7 @@ Event 159 Batch 0 2 5.369491563274252e+02 2.154713482252002e+02 -2.912667909729743e+02 3.962955349875316e+02 3 6.066564496499102e+02 -4.020061311781470e+01 5.572389608252350e+02 -2.364332868806716e+02 4 3.563943940226648e+02 -1.752707351073854e+02 -2.659721698522608e+02 -1.598622481068599e+02 - ME 3.198473025834927e-04 + ME 3.562393617300492e-05 Event 160 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1284,7 +1284,7 @@ Event 160 Batch 0 2 6.492474755438517e+02 3.490068395973682e+02 1.460348644657111e+02 -5.276270735801970e+02 3 2.857818814470013e+02 -2.550253586192556e+02 1.227259509083862e+02 3.964456076362119e+01 4 5.649706430091471e+02 -9.398148097811273e+01 -2.687608153740973e+02 4.879825128165764e+02 - ME 6.719464076924620e-05 + ME 3.516238941302227e-05 Event 161 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1292,7 +1292,7 @@ Event 161 Batch 0 2 6.770282049439580e+02 -2.863253153105184e+02 -4.911270786072976e+02 -3.676672364525180e+02 3 1.598243093356544e+02 -7.505362471426160e+01 1.299195075310522e+02 -5.506073768810752e+01 4 6.631474857203874e+02 3.613789400247800e+02 3.612075710762453e+02 4.227279741406256e+02 - ME 1.577168105051119e-04 + ME 5.970757951131334e-05 Event 162 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1300,7 +1300,7 @@ Event 162 Batch 0 2 5.178592782584632e+02 -3.271131571456631e+02 3.943743741889439e+02 -7.512700901574514e+01 3 3.730686930366258e+02 -2.885924195736573e+01 -1.360208443078026e+02 -3.461874113706257e+02 4 6.090720287049110e+02 3.559723991030290e+02 -2.583535298811414e+02 4.213144203863710e+02 - ME 1.031749267713353e-04 + ME 2.768303103320498e-05 Event 163 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1308,7 +1308,7 @@ Event 163 Batch 0 2 5.388642316037673e+02 3.152159924116781e+02 3.539969933522669e+01 -4.356149670486711e+02 3 5.364171791816749e+02 -5.299694218906361e+02 3.369785517714305e+01 7.576448071880543e+01 4 4.247185892145582e+02 2.147534294789580e+02 -6.909755451236977e+01 3.598504863298658e+02 - ME 3.508094027565679e-05 + ME 1.485600561394433e-05 Event 164 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1316,7 +1316,7 @@ Event 164 Batch 0 2 6.862697092177667e+02 4.132218376422068e+02 1.310202162324327e+02 -5.320221138485150e+02 3 4.476895523579005e+02 -2.769046850483522e+02 1.374187337517142e+02 3.238299280529301e+02 4 3.660407384243329e+02 -1.363171525938544e+02 -2.684389499841469e+02 2.081921857955847e+02 - ME 3.375894779915149e-05 + ME 1.755563256840939e-05 Event 165 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1324,7 +1324,7 @@ Event 165 Batch 0 2 2.382444910715278e+02 -2.158277263671036e+02 -9.471372817531817e+00 -1.004446273032522e+02 3 7.304591383576048e+02 4.619003715882296e+02 -1.223345688256177e+02 5.524969256086772e+02 4 5.312963705708673e+02 -2.460726452211260e+02 1.318059416431495e+02 -4.520522983054250e+02 - ME 6.966498968932957e-03 + ME 4.549138184301779e-04 Event 166 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1332,7 +1332,7 @@ Event 166 Batch 0 2 2.131352071380649e+02 -7.633553084455029e+01 -1.899581415396244e+02 5.929087379418958e+01 3 7.305557876753161e+02 8.980971292745940e+01 7.136333043711877e+02 1.279589045828712e+02 4 5.563090051866194e+02 -1.347418208290915e+01 -5.236751628315633e+02 -1.872497783770607e+02 - ME 3.314006956523505e-04 + ME 3.352199959657985e-05 Event 167 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1340,7 +1340,7 @@ Event 167 Batch 0 2 4.122964103002419e+02 -3.405127102276982e+02 6.366431608201744e+01 2.235761145061386e+02 3 4.697083356610920e+02 -2.521100678451879e+02 -2.856113063438232e+01 -3.952855880214881e+02 4 6.179952540386658e+02 5.926227780728861e+02 -3.510318544763516e+01 1.717094735153495e+02 - ME 1.146777177775239e-04 + ME 3.829535931496594e-05 Event 168 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1348,7 +1348,7 @@ Event 168 Batch 0 2 7.156643283953484e+02 -3.999734570317170e+02 4.816586825103861e+02 3.467009924560655e+02 3 6.192344221355605e+02 2.722545660880235e+02 -4.999454120042317e+02 -2.436869012025525e+02 4 1.651012494690919e+02 1.277188909436936e+02 1.828672949384504e+01 -1.030140912535133e+02 - ME 1.017624049822302e-03 + ME 5.027887292283473e-05 Event 169 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1356,7 +1356,7 @@ Event 169 Batch 0 2 3.626022684949455e+02 7.511110909567982e+01 -2.030941161665286e+02 -2.908461902563517e+02 3 5.580565590514408e+02 -2.529981754432838e+02 -3.439969378312538e+02 3.592842232626199e+02 4 5.793411724536141e+02 1.778870663476037e+02 5.470910539977822e+02 -6.843803300626824e+01 - ME 1.371698416063432e-04 + ME 4.350242525242475e-05 Event 170 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1364,7 +1364,7 @@ Event 170 Batch 0 2 6.602909342483501e+02 4.699653539595539e+02 -3.020118498241596e+02 3.520021683086903e+02 3 1.039297502933440e+02 3.247420585022842e+01 -9.851348423194945e+01 6.473976746580508e+00 4 7.357793154583061e+02 -5.024395598097824e+02 4.005253340561092e+02 -3.584761450552709e+02 - ME 1.673719496447659e-02 + ME 9.967260301798612e-03 Event 171 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1372,7 +1372,7 @@ Event 171 Batch 0 2 1.506693011949600e+02 -3.657300520509282e+01 -1.244227366169959e+02 -7.669834565089053e+01 3 6.344013325830570e+02 -2.026333084464634e+02 -4.956100871165362e+02 3.402578943089165e+02 4 7.149293662219835e+02 2.392063136515561e+02 6.200328237335323e+02 -2.635595486580261e+02 - ME 2.133207113512388e-03 + ME 9.157902172934166e-04 Event 172 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1380,7 +1380,7 @@ Event 172 Batch 0 2 5.352445157558213e+02 -2.018352690102651e+02 3.892440882325296e+02 -3.069825004886504e+02 3 6.716112180685394e+02 2.825227203806547e+02 -5.978593235713698e+02 1.175022124175027e+02 4 2.931442661756383e+02 -8.068745137038898e+01 2.086152353388391e+02 1.894802880711483e+02 - ME 2.630379932615259e-05 + ME 8.067092159940342e-06 Event 173 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1388,7 +1388,7 @@ Event 173 Batch 0 2 6.571348515648592e+02 -2.769863586381786e+02 5.805753619381593e+02 1.343019708712704e+02 3 5.332990408103321e+02 1.871824832342877e+02 -4.782426732337677e+02 1.437168410371092e+02 4 3.095661076248081e+02 8.980387540389081e+01 -1.023326887043915e+02 -2.780188119083794e+02 - ME 9.985413945498126e-03 + ME 1.269359653092767e-04 Event 174 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1396,7 +1396,7 @@ Event 174 Batch 0 2 6.091496911716730e+02 -4.752584064243671e+02 3.135726231883978e+01 -3.797492797588730e+02 3 6.417481529658018e+02 3.309293137608124e+02 9.015643604119191e+01 5.424004960996682e+02 4 2.491021558625255e+02 1.443290926635548e+02 -1.215136983600317e+02 -1.626512163407953e+02 - ME 1.319192968737130e-03 + ME 1.362612102685676e-04 Event 175 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1404,7 +1404,7 @@ Event 175 Batch 0 2 5.399801778396885e+02 1.966672297646830e+02 2.343185748302537e+02 -4.449667388535759e+02 3 6.987953575798327e+02 -1.857207036318898e+02 -9.664246188148675e+01 6.666955876403318e+02 4 2.612244645804785e+02 -1.094652613279307e+01 -1.376761129487668e+02 -2.217288487867561e+02 - ME 9.528877211334405e-03 + ME 9.613528518728674e-04 Event 176 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1412,7 +1412,7 @@ Event 176 Batch 0 2 6.615757321243968e+02 -4.129469954321281e+02 4.686878756164518e+02 -2.179194886871010e+02 3 1.607981401590110e+02 -6.355407199259605e+01 7.929314438200207e+00 1.474925346731048e+02 4 6.776261277165921e+02 4.765010674247242e+02 -4.766171900546519e+02 7.042695401399614e+01 - ME 6.965204353376922e-04 + ME 3.097907077728356e-04 Event 177 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1420,7 +1420,7 @@ Event 177 Batch 0 2 4.314334067424883e+02 -3.493619040652741e+02 -2.026482683689240e+01 -2.523299055494341e+02 3 4.840006500668400e+02 -1.846595828310067e+02 -1.450727057198388e+02 4.232155216776995e+02 4 5.845659431906716e+02 5.340214868962809e+02 1.653375325567312e+02 -1.708856161282654e+02 - ME 2.160100049311594e-04 + ME 1.084300812640113e-04 Event 178 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1428,7 +1428,7 @@ Event 178 Batch 0 2 4.528135981327372e+02 -2.544528544607913e+02 1.436928116455424e+02 3.458992272209776e+02 3 3.053350882587867e+02 -1.380299578048218e+02 2.072032295570572e+02 1.767599177741536e+02 4 7.418513136084770e+02 3.924828122656132e+02 -3.508960412025996e+02 -5.226591449951313e+02 - ME 7.384409254828141e-02 + ME 5.382438151181503e-02 Event 179 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1436,7 +1436,7 @@ Event 179 Batch 0 2 7.433145319259943e+02 -2.538538580850882e+02 -6.778753511348521e+02 -1.689962142519080e+02 3 1.647945947160298e+02 1.009041857568576e+02 1.171651165877689e+02 5.699069397138987e+01 4 5.918908733579761e+02 1.529496723282306e+02 5.607102345470832e+02 1.120055202805181e+02 - ME 1.335347052581446e-04 + ME 3.739915465576335e-05 Event 180 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1444,7 +1444,7 @@ Event 180 Batch 0 2 2.396120216689867e+02 1.204528233788652e+02 -1.081248155319049e+02 1.766750195544080e+02 3 5.541470271917004e+02 2.767127195685322e+02 2.999096875483201e+02 3.749175614572557e+02 4 7.062409511393131e+02 -3.971655429473975e+02 -1.917848720164151e+02 -5.515925810116636e+02 - ME 1.316593054412419e-02 + ME 2.792447184071457e-03 Event 181 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1452,7 +1452,7 @@ Event 181 Batch 0 2 2.165494222755782e+02 1.336973493521793e+02 -1.495065670853883e+02 -8.164837697364385e+01 3 6.960869932595207e+02 -2.848973600545249e+02 2.209041937252092e+01 6.347303441548928e+02 4 5.873635844649011e+02 1.512000107023455e+02 1.274161477128675e+02 -5.530819671812490e+02 - ME 6.164296623062663e-02 + ME 3.488874737600980e-03 Event 182 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1460,7 +1460,7 @@ Event 182 Batch 0 2 6.472681881349898e+02 4.279258056181361e+02 3.994050733201775e+02 -2.762448183472868e+02 3 5.337197582091030e+02 -3.479343829022644e+02 -4.034091782989213e+02 -3.254965992745409e+01 4 3.190120536559070e+02 -7.999142271587166e+01 4.004104978744005e+00 3.087944782747408e+02 - ME 6.393158381765308e-05 + ME 5.523679400573375e-05 Event 183 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1468,7 +1468,7 @@ Event 183 Batch 0 2 6.165307808531154e+02 -3.276949594572818e+02 8.808524820164887e+01 -5.147496540405800e+02 3 2.975460412740734e+02 -1.030095950018341e+02 -2.375020297789284e+02 1.466814775843215e+02 4 5.859231778728107e+02 4.307045544591158e+02 1.494167815772794e+02 3.680681764562588e+02 - ME 6.887775529805495e-05 + ME 2.562496117427957e-05 Event 184 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1476,7 +1476,7 @@ Event 184 Batch 0 2 5.645337360463252e+02 -3.940276919793660e+02 3.776398996283964e+02 1.443212503288767e+02 3 5.368100353438223e+02 2.392766596964613e+02 -1.719264331693737e+02 -4.487237410122139e+02 4 3.986562286098531e+02 1.547510322829050e+02 -2.057134664590229e+02 3.044024906833372e+02 - ME 3.553984578535888e-05 + ME 1.712138666139329e-05 Event 185 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1484,7 +1484,7 @@ Event 185 Batch 0 2 6.347397779710931e+02 2.522092504724420e+02 -1.599825720327363e+02 5.600809373302327e+02 3 4.566768168089404e+02 -3.359958684022406e+02 -1.272903681003782e+02 -2.818823400219340e+02 4 4.085834052199659e+02 8.378661792979838e+01 2.872729401331145e+02 -2.781985973082986e+02 - ME 1.184197550833168e-03 + ME 1.836859309200860e-04 Event 186 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1492,7 +1492,7 @@ Event 186 Batch 0 2 7.089823220133230e+02 -5.197119220861886e+02 4.248734840868308e+02 -2.281183322067745e+02 3 5.364076825758043e+02 3.588264146200084e+02 -3.973752875032956e+02 3.270606945152315e+01 4 2.546099954108725e+02 1.608855074661802e+02 -2.749819658353518e+01 1.954122627552515e+02 - ME 2.583895514537347e-05 + ME 1.318469173008218e-05 Event 187 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1500,7 +1500,7 @@ Event 187 Batch 0 2 4.835105223217566e+02 -2.128653471696258e+02 1.375287019182911e+02 -4.117725407538514e+02 3 7.240136612790383e+02 4.407273454759851e+02 -4.896543389042274e+01 5.723264583716990e+02 4 2.924758163992057e+02 -2.278619983063593e+02 -8.856326802786833e+01 -1.605539176178473e+02 - ME 5.307563978210835e-04 + ME 9.185777086042985e-05 Event 188 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1508,7 +1508,7 @@ Event 188 Batch 0 2 6.611118500396009e+02 3.502021063704277e+02 -2.011693879247277e+02 -5.234102027267809e+02 3 3.072944371702247e+02 -6.894916504330918e+01 -1.599953986835475e+02 2.531350551695447e+02 4 5.315937127901742e+02 -2.812529413271184e+02 3.611647866082752e+02 2.702751475572362e+02 - ME 6.863567490702385e-05 + ME 3.862980709292737e-05 Event 189 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1516,7 +1516,7 @@ Event 189 Batch 0 2 7.498478362545707e+02 6.780504955298834e+02 -3.199144947524264e+02 -1.319162971889924e+01 3 3.253008430749361e+02 -2.985087551774363e+02 1.291384938207140e+02 6.034152914782593e+00 4 4.248513206704935e+02 -3.795417403524470e+02 1.907760009317124e+02 7.157476804116639e+00 - ME 8.583750584152986e-05 + ME 1.504471760657040e-05 Event 190 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1524,7 +1524,7 @@ Event 190 Batch 0 2 4.938867893347995e+02 3.689671478502748e+02 -1.218724623869293e+02 3.048516153777389e+02 3 5.264063001598521e+02 6.631942569346465e+01 1.276367949726208e+02 -5.063735530147588e+02 4 4.797069105053494e+02 -4.352865735437401e+02 -5.764332585691415e+00 2.015219376370201e+02 - ME 4.759343488474735e-05 + ME 2.269926034328256e-05 Event 191 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1532,7 +1532,7 @@ Event 191 Batch 0 2 3.681793141805986e+02 -3.225132888415706e+02 1.579589482507471e+02 -8.117977937027918e+01 3 5.431126642386394e+02 4.058413736814005e+01 9.147123993851424e+01 5.338139246166097e+02 4 5.887080215807621e+02 2.819291514734305e+02 -2.494301881892614e+02 -4.526341452463304e+02 - ME 4.908990110546420e-03 + ME 1.427494731558637e-03 Event 192 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1540,7 +1540,7 @@ Event 192 Batch 0 2 6.054165399887861e+02 1.497087111729466e+02 8.905021611535379e+01 5.798159601983524e+02 3 2.106656439489222e+02 1.451894976721945e+02 -1.487249448604451e+02 3.436443048222171e+01 4 6.839178160622922e+02 -2.948982088451411e+02 5.967472874509133e+01 -6.141803906805740e+02 - ME 4.294450320853435e-02 + ME 6.984876913518998e-03 Event 193 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1548,7 +1548,7 @@ Event 193 Batch 0 2 2.753169163933055e+02 -1.695475157411122e+02 -2.139406274107579e+02 3.581134319495643e+01 3 5.760219428901971e+02 -3.264616044953138e+02 1.527507522369444e+02 -4.493231656306969e+02 4 6.486611407164972e+02 4.960091202364260e+02 6.118987517381347e+01 4.135118224357404e+02 - ME 1.537583375796735e-04 + ME 4.273063058931925e-05 Event 194 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1556,7 +1556,7 @@ Event 194 Batch 0 2 3.445934948105150e+02 -2.970257025567896e+02 -8.183019525038441e+01 1.543509890854414e+02 3 7.485441862377920e+02 6.623797851941252e+02 1.083400559332054e+02 -3.314119056355291e+02 4 4.068623189516925e+02 -3.653540826373358e+02 -2.650986068282081e+01 1.770609165500877e+02 - ME 3.024610065690235e-05 + ME 4.921158833271929e-06 Event 195 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1564,7 +1564,7 @@ Event 195 Batch 0 2 2.012122274303647e+02 -5.190018365965096e+01 1.322177369426910e+02 -1.425173724194237e+02 3 7.122630330184543e+02 -3.054768058087834e+02 -2.528097616133813e+02 5.916838461125119e+02 4 5.865247395511832e+02 3.573769894684365e+02 1.205920246706904e+02 -4.491664736930883e+02 - ME 3.011639483286710e-03 + ME 4.696445912229638e-04 Event 196 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1572,7 +1572,7 @@ Event 196 Batch 0 2 4.490485793345989e+02 3.485190427929747e+02 -2.661098616642627e+01 -2.819059396826192e+02 3 5.531554978829222e+02 -3.330165694254377e+02 4.416170126965178e+02 7.442003978758296e+00 4 4.977959227824785e+02 -1.550247336753688e+01 -4.150060265300915e+02 2.744639357038610e+02 - ME 4.340266456570635e-05 + ME 9.363355109875406e-06 Event 197 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1580,7 +1580,7 @@ Event 197 Batch 0 2 3.951249254444253e+02 -2.278358800090239e+02 3.101157211704546e+02 -8.968142489336992e+01 3 3.607080640108546e+02 -2.889948719219027e+02 2.155030307719242e+02 -1.227661082778765e+01 4 7.441670105447209e+02 5.168307519309257e+02 -5.256187519423792e+02 1.019580357211576e+02 - ME 3.377741088449004e-02 + ME 6.597373610109231e-03 Event 198 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1588,7 +1588,7 @@ Event 198 Batch 0 2 3.750236904637998e+02 1.183014344420310e+02 -1.005952209347265e+02 -3.413621838211424e+02 3 4.381296266085964e+02 -2.726825461625328e+02 1.003845461170281e+02 -3.279096546785175e+02 4 6.868466829276033e+02 1.543811117205018e+02 2.106748176980602e-01 6.692718384996598e+02 - ME 9.606390506705955e-04 + ME 6.145502577419889e-04 Event 199 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1596,7 +1596,7 @@ Event 199 Batch 0 2 2.454478562244572e+02 -2.058455361543722e+02 -1.131056012155068e+02 -7.126982772660261e+01 3 5.321797086694488e+02 -9.806778012582416e+01 -4.820333037417012e+02 -2.030808875905193e+02 4 7.223724351060940e+02 3.039133162801963e+02 5.951389049572081e+02 2.743507153171219e+02 - ME 1.577081887352965e-03 + ME 3.088173795554332e-04 Event 200 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1604,7 +1604,7 @@ Event 200 Batch 0 2 3.952431318363244e+02 3.031309873729303e+02 9.337877017948550e+01 2.358159092128122e+02 3 6.094031244332663e+02 -7.796753338981905e+01 -5.315426896439308e+02 -2.876727322709444e+02 4 4.953537437304092e+02 -2.251634539831113e+02 4.381639194644453e+02 5.185682305813224e+01 - ME 6.703240553489506e-05 + ME 1.668296552597111e-05 Event 201 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1612,7 +1612,7 @@ Event 201 Batch 0 2 6.497938633639732e+02 3.771120671245744e+02 3.553445817627057e+02 -3.921081252746440e+02 3 3.369790646193914e+02 -2.140351778515325e+02 1.061239955238163e+02 2.376584318047305e+02 4 5.132270720166357e+02 -1.630768892730420e+02 -4.614685772865220e+02 1.544496934699135e+02 - ME 6.283412004793947e-05 + ME 2.404518058628388e-05 Event 202 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1620,7 +1620,7 @@ Event 202 Batch 0 2 7.267802742470179e+02 6.523432021666289e+02 -1.481957728499301e+02 2.840702844913056e+02 3 3.546086620137576e+02 -3.102429173963679e+02 -5.939291787501398e+01 -1.611493614224694e+02 4 4.186110637392242e+02 -3.421002847702610e+02 2.075886907249440e+02 -1.229209230688360e+02 - ME 1.894138330341389e-04 + ME 2.830403199974809e-05 Event 203 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1628,7 +1628,7 @@ Event 203 Batch 0 2 4.830190702985662e+02 2.789429895135886e+02 -3.943102945050296e+02 -4.197918611657844e+00 3 5.247163710833165e+02 -4.266462829986153e+02 3.263988520595893e+01 3.037019215942698e+02 4 4.922645586181170e+02 1.477032934850268e+02 3.616704092990706e+02 -2.995040029826120e+02 - ME 5.831910678002871e-04 + ME 5.153190919865371e-05 Event 204 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1636,7 +1636,7 @@ Event 204 Batch 0 2 6.952375769935185e+02 3.823764713153302e+01 6.531840992713522e+02 -2.350397908115460e+02 3 6.250862947179036e+02 1.031861473443961e+02 -5.506835576815644e+02 2.771878679515999e+02 4 1.796761282885781e+02 -1.414237944759291e+02 -1.025005415897879e+02 -4.214807714005369e+01 - ME 1.802858800889920e-04 + ME 1.903000177287069e-05 Event 205 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1644,7 +1644,7 @@ Event 205 Batch 0 2 5.625197268936781e+02 2.955060596751036e+02 4.395356105446072e+02 -1.895074112086703e+02 3 3.144813194259642e+02 -1.941101430078122e+02 -7.073026664887073e+00 -2.473251401357733e+02 4 6.229989536803572e+02 -1.013959166672914e+02 -4.324625838797200e+02 4.368325513444433e+02 - ME 1.140145509231641e-04 + ME 3.163472493443465e-05 Event 206 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1652,7 +1652,7 @@ Event 206 Batch 0 2 5.487698581700869e+02 -4.771827558939671e+02 -2.639484985605369e+02 6.145050708573941e+01 3 4.357856725513919e+02 1.877155863290790e+02 1.701172104948722e+02 3.545872893148349e+02 4 5.154444692785200e+02 2.894671695648880e+02 9.383128806566407e+01 -4.160377964005746e+02 - ME 4.167786087259531e-03 + ME 3.341888001113221e-04 Event 207 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1660,7 +1660,7 @@ Event 207 Batch 0 2 5.289473514933904e+02 -3.230637718239221e+02 -3.258094337294262e+02 2.631792409740627e+02 3 3.730441408755686e+02 -1.145152671243400e+02 -7.298530142052728e+01 -3.474497523579300e+02 4 5.980085076310412e+02 4.375790389482623e+02 3.987947351499535e+02 8.427051138386733e+01 - ME 1.161501350367753e-04 + ME 3.789028948405571e-05 Event 208 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1668,7 +1668,7 @@ Event 208 Batch 0 2 3.144460531270953e+02 3.105028133645123e+02 -3.495125011961062e+01 3.525242310830974e+01 3 7.230517599976935e+02 -6.554206809343713e+02 2.220922910679198e+02 2.095294558946058e+02 4 4.625021868752117e+02 3.449178675698588e+02 -1.871410409483092e+02 -2.447818790029155e+02 - ME 4.858457850437588e-04 + ME 2.941989209837521e-05 Event 209 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1676,7 +1676,7 @@ Event 209 Batch 0 2 2.827014058170527e+02 -6.682954863774688e+01 -1.958656753088385e+02 -1.925890275057887e+02 3 5.969812148172332e+02 5.625717004655273e+02 1.060136244597389e+02 -1.692949027847388e+02 4 6.203173793657136e+02 -4.957421518277804e+02 8.985205084909943e+01 3.618839302905275e+02 - ME 1.004351001266980e-04 + ME 2.261939336541961e-05 Event 210 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1684,7 +1684,7 @@ Event 210 Batch 0 2 3.369223392964550e+02 -2.366581006943837e+02 8.850719545688517e+01 -2.228813191927023e+02 3 6.926279093100447e+02 9.835546321295956e+01 -1.581805884470998e+02 6.671120783270956e+02 4 4.704497513935005e+02 1.383026374814242e+02 6.967339299021461e+01 -4.442307591343933e+02 - ME 5.974710408786874e-02 + ME 3.044010300440331e-03 Event 211 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1692,7 +1692,7 @@ Event 211 Batch 0 2 5.754314663824422e+02 -1.965408456680789e+02 -5.399725108422632e+02 3.037689947684008e+01 3 6.656941886103589e+02 4.112771407945243e+02 5.114655840792436e+02 1.113679599883347e+02 4 2.588743450071987e+02 -2.147362951264454e+02 2.850692676301957e+01 -1.417448594651748e+02 - ME 4.382347812376007e-04 + ME 1.754510489093768e-05 Event 212 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1700,7 +1700,7 @@ Event 212 Batch 0 2 5.922157374848572e+02 8.073316194509509e+00 4.947261155542873e+02 -3.254233732830556e+02 3 3.635572903001510e+02 8.951663862813328e+01 4.011175755255380e+01 3.500738802669425e+02 4 5.442269722149914e+02 -9.758995482264278e+01 -5.348378731068407e+02 -2.465050698388706e+01 - ME 3.041427876287276e-04 + ME 1.919214373141161e-04 Event 213 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1708,7 +1708,7 @@ Event 213 Batch 0 2 7.434820262506830e+02 2.991548764052629e+02 2.111623598614188e+02 -6.470566753063675e+02 3 5.607612173038236e+02 -2.664197873565705e+02 -1.905271140771768e+02 4.551626726109781e+02 4 1.957567564454930e+02 -3.273508904869271e+01 -2.063524578424195e+01 1.918940026953895e+02 - ME 1.827786070323022e-04 + ME 1.896082550340891e-04 Event 214 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1716,7 +1716,7 @@ Event 214 Batch 0 2 5.400874280734793e+02 3.457358963402696e+02 2.445843697627679e+02 -3.351710101016577e+02 3 3.400793067879315e+02 1.482066942304564e+02 1.256466447865830e+02 2.791086371729012e+02 4 6.198332651385892e+02 -4.939425905707261e+02 -3.702310145493508e+02 5.606237292875651e+01 - ME 1.356968066378560e-04 + ME 6.515553919952984e-05 Event 215 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1724,7 +1724,7 @@ Event 215 Batch 0 2 3.916345321859864e+02 3.271767110560381e+02 -1.945589530122144e+02 9.208594000107233e+01 3 6.136750729169615e+02 -1.269585669220027e+02 2.644680756040779e+02 -5.390132228350478e+02 4 4.946903948970534e+02 -2.002181441340350e+02 -6.990912259186331e+01 4.469272828339764e+02 - ME 6.207321332343461e-05 + ME 3.427926940877871e-05 Event 216 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1732,7 +1732,7 @@ Event 216 Batch 0 2 3.767411090262154e+02 1.602503356822860e+02 2.758455349572533e+02 -2.004069210086422e+02 3 4.061922956351256e+02 3.340053729931861e+02 2.237650079776778e+02 5.798114391563544e+01 4 7.170665953386593e+02 -4.942557086754721e+02 -4.996105429349309e+02 1.424257770930068e+02 - ME 1.232271832865728e-03 + ME 2.360785017217177e-04 Event 217 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1740,7 +1740,7 @@ Event 217 Batch 0 2 6.474118977458852e+02 -5.378641111590873e+02 -3.279650037002520e+02 1.492759847325320e+02 3 5.088298200539713e+02 3.261878344469131e+02 1.555821256186315e+02 -3.581947579501665e+02 4 3.437582822001433e+02 2.116762767121744e+02 1.723828780816206e+02 2.089187732176345e+02 - ME 3.357118960820415e-05 + ME 1.388331578224744e-05 Event 218 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1748,7 +1748,7 @@ Event 218 Batch 0 2 6.658501161076259e+02 -6.577627036244854e+02 -3.020200479570956e+01 9.895676706252418e+01 3 2.516345839620714e+02 1.565221509782131e+02 -1.156477271957936e+02 1.595192254662914e+02 4 5.825152999303023e+02 5.012405526462722e+02 1.458497319915031e+02 -2.584759925288157e+02 - ME 5.956187308313417e-04 + ME 1.036808356896783e-04 Event 219 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1756,7 +1756,7 @@ Event 219 Batch 0 2 4.328556070633435e+02 6.122246558068494e+01 -1.687441385117925e+02 3.938796795879554e+02 3 6.500677455605621e+02 -3.703058656885360e+02 4.356876543064814e+02 -3.092537914719426e+02 4 4.170766473760945e+02 3.090834001078509e+02 -2.669435157946888e+02 -8.462588811601287e+01 - ME 2.797067114354785e-04 + ME 9.046106878448173e-05 Event 220 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1764,7 +1764,7 @@ Event 220 Batch 0 2 3.686297280598666e+02 -3.497113779929074e+02 -8.765282776369953e+01 7.685577594963354e+01 3 4.155522773953191e+02 -1.777404948015450e+02 -1.525848366500187e+02 3.432344379292750e+02 4 7.158179945448145e+02 5.274518727944524e+02 2.402376644137182e+02 -4.200902138789084e+02 - ME 3.485410710153060e-03 + ME 1.676729229638681e-03 Event 221 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1772,7 +1772,7 @@ Event 221 Batch 0 2 5.295220830718469e+02 3.654688468413813e+01 4.204675060608333e+02 3.197890523886257e+02 3 7.127556392876786e+02 -1.727486268095863e+02 -4.342549693537605e+02 -5.381460163035255e+02 4 2.577222776404743e+02 1.362017421254481e+02 1.378746329292729e+01 2.183569639148998e+02 - ME 2.819264207321091e-05 + ME 2.031931825964470e-05 Event 222 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1780,7 +1780,7 @@ Event 222 Batch 0 2 2.464305981122427e+02 -2.054199106396077e+02 6.127423271580306e+01 1.215572638876956e+02 3 6.926647117218595e+02 4.702892479611936e+02 3.872350261814336e+02 -3.296383785530530e+02 4 5.609046901658980e+02 -2.648693373215859e+02 -4.485092588972366e+02 2.080811146653574e+02 - ME 6.319142394583372e-05 + ME 1.678695785515194e-05 Event 223 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1788,7 +1788,7 @@ Event 223 Batch 0 2 2.463384302181125e+02 -1.209251938955738e+02 -2.140981972257043e+02 -1.488897673935926e+01 3 6.819620845265065e+02 -2.400891875757811e+02 5.819023806457059e+02 2.623339210620683e+02 4 5.716994852553812e+02 3.610143814713547e+02 -3.678041834200016e+02 -2.474449443227091e+02 - ME 3.931927185620913e-04 + ME 4.810915220985587e-05 Event 224 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1796,7 +1796,7 @@ Event 224 Batch 0 2 2.236851263016067e+02 -8.671871524968952e+01 1.717231909970332e+02 1.141317038679677e+02 3 5.308972974363861e+02 -3.715833295102001e+01 4.680039348616383e+02 2.478780257941054e+02 4 7.454175762620068e+02 1.238770482007099e+02 -6.397271258586715e+02 -3.620097296620728e+02 - ME 8.708656265179471e-02 + ME 6.017706528853119e-02 Event 225 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1804,7 +1804,7 @@ Event 225 Batch 0 2 5.094176014319268e+02 1.569347096242780e+02 -1.561291130928888e+00 -4.846394040251013e+02 3 7.252311334449815e+02 -3.845161955462210e+02 -4.374219820797174e+01 6.133466494377277e+02 4 2.653512651230916e+02 2.275814859219426e+02 4.530348933890067e+01 -1.287072454126262e+02 - ME 3.974215742688118e-04 + ME 1.151501859389029e-04 Event 226 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1812,7 +1812,7 @@ Event 226 Batch 0 2 6.863217264048350e+02 -2.391756120967483e+02 -6.171186323675804e+02 1.816511279850093e+02 3 5.332348374442744e+02 1.096335504493486e+02 4.112484130583279e+02 -3.212391931833643e+02 4 2.804434361508906e+02 1.295420616473995e+02 2.058702193092524e+02 1.395880651983551e+02 - ME 3.797053871351767e-05 + ME 1.438206074993319e-05 Event 227 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1820,7 +1820,7 @@ Event 227 Batch 0 2 7.243206345463230e+02 -5.280189925476210e+02 -1.406011303275692e+02 4.754657162080069e+02 3 5.487499634657129e+02 3.840442912861271e+02 -1.353123555187442e+01 -3.917312987222202e+02 4 2.269294019879644e+02 1.439747012614939e+02 1.541323658794436e+02 -8.373441748578679e+01 - ME 2.903986554770466e-04 + ME 5.165623507180856e-05 Event 228 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1828,7 +1828,7 @@ Event 228 Batch 0 2 2.119578664379945e+02 1.625437651479949e+01 -1.806612394559917e+02 1.096514885776142e+02 3 6.254097456672617e+02 -3.200704000326812e+01 3.158243706171928e+02 5.388579277416935e+02 4 6.626323878947439e+02 1.575266348846865e+01 -1.351631311612011e+02 -6.485094163193077e+02 - ME 8.951233069377997e-01 + ME 3.800526374221887e-02 Event 229 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1836,7 +1836,7 @@ Event 229 Batch 0 2 5.921227120343664e+02 -3.877491982207575e+02 4.449193714386763e+02 -4.802726626309342e+01 3 4.688278331283221e+02 3.470549659129084e+02 -1.517581364471262e+02 -2.762641051115459e+02 4 4.390494548373113e+02 4.069423230784909e+01 -2.931612349915501e+02 3.242913713746393e+02 - ME 3.492131538818778e-05 + ME 1.250052930035257e-05 Event 230 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1844,7 +1844,7 @@ Event 230 Batch 0 2 4.261952284727868e+02 2.153699775439378e+02 -1.171086083390750e+02 3.486312082969335e+02 3 3.540619701921573e+02 3.070144260847319e+01 1.307424531367546e+02 3.276029778648147e+02 4 7.197428013350559e+02 -2.460714201524109e+02 -1.363384479767965e+01 -6.762341861617483e+02 - ME 3.186738302883428e-01 + ME 4.711214236813061e-02 Event 231 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1852,7 +1852,7 @@ Event 231 Batch 0 2 4.205236024420392e+02 7.533931576750228e+01 -3.260217181731272e+02 -2.547036061581322e+02 3 5.397543491930860e+02 8.423195081267914e+01 -1.158376015978276e+02 5.204050211049134e+02 4 5.397220483648740e+02 -1.595712665801811e+02 4.418593197709548e+02 -2.657014149467809e+02 - ME 5.532186388062512e-04 + ME 3.265984123744224e-04 Event 232 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1860,7 +1860,7 @@ Event 232 Batch 0 2 4.295782852421121e+02 3.239064445356881e+02 9.240815775655221e-01 2.821724019337124e+02 3 7.183371274312143e+02 -6.155391061575082e+02 -1.955291718271078e+02 -3.144649112405858e+02 4 3.520845873266736e+02 2.916326616218201e+02 1.946050902495422e+02 3.229250930687335e+01 - ME 6.730603828970119e-05 + ME 1.049779024540051e-05 Event 233 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1868,7 +1868,7 @@ Event 233 Batch 0 2 3.640046126075324e+02 -2.220120664068515e+02 -1.165482463207536e+02 2.638683509799470e+02 3 4.682121509308883e+02 -1.009786196736112e+02 3.762431872847591e+02 2.597441061312976e+02 4 6.677832364615790e+02 3.229906860804628e+02 -2.596949409640055e+02 -5.236124571112447e+02 - ME 5.385640989777132e-03 + ME 7.598357868514145e-04 Event 234 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1876,7 +1876,7 @@ Event 234 Batch 0 2 8.690043548936441e+01 -2.607433849884744e+01 -7.258333015587984e+01 4.004341073848801e+01 3 6.785651905172676e+02 -3.574930335951373e+02 -4.725723606052789e+01 5.748184081539155e+02 4 7.345343739933678e+02 3.835673720939847e+02 1.198405662164078e+02 -6.148618188924036e+02 - ME 1.962113644780599e-01 + ME 8.152211059226219e-02 Event 235 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1884,7 +1884,7 @@ Event 235 Batch 0 2 3.000566282865331e+02 1.219146462304108e+01 -2.126850238006026e+02 2.113064812540423e+02 3 7.160981218147422e+02 2.575873756248088e+02 2.779062108697769e+02 -6.076293293985470e+02 4 4.838452498987246e+02 -2.697788402478500e+02 -6.522118706917435e+01 3.963228481445046e+02 - ME 3.940402333844027e-05 + ME 2.498899672933017e-05 Event 236 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1892,7 +1892,7 @@ Event 236 Batch 0 2 1.510518772182422e+02 -9.497518588910037e+01 1.467158067736534e+01 1.165380984781943e+02 3 6.955499852411461e+02 5.933480346078575e+02 3.495450158124774e+02 9.770452249822526e+01 4 6.533981375406115e+02 -4.983728487187572e+02 -3.642165964898426e+02 -2.142426209764196e+02 - ME 1.121647028585911e-03 + ME 2.623118294900277e-04 Event 237 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1900,7 +1900,7 @@ Event 237 Batch 0 2 2.173874152942701e+02 2.069918593916189e+02 -3.850229167793934e+01 -5.412237993169356e+01 3 7.305677895866185e+02 -6.701932224704495e+02 -2.421540700080861e+02 1.610333695687662e+02 4 5.520447951191120e+02 4.632013630788306e+02 2.806563616860255e+02 -1.069109896370727e+02 - ME 1.822378225061386e-04 + ME 2.170005261464319e-05 Event 238 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1908,7 +1908,7 @@ Event 238 Batch 0 2 6.349573912113930e+02 -3.336495545457479e+02 -4.785400196851591e+02 2.506956580500139e+02 3 5.768887318987100e+02 4.812119270965607e+02 2.334547330568691e+02 -2.161818165921041e+02 4 2.881538768898968e+02 -1.475623725508129e+02 2.450852866282900e+02 -3.451384145790988e+01 - ME 9.810731053503000e-05 + ME 1.383744831772315e-05 Event 239 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1916,7 +1916,7 @@ Event 239 Batch 0 2 5.349076725903783e+02 -5.331874414268931e+02 1.887721601290929e+01 -3.848403846142781e+01 3 3.658437465440003e+02 8.335465236419728e+01 1.670818061666301e+01 -3.558292926602242e+02 4 5.992485808656214e+02 4.498327890626960e+02 -3.558539662957234e+01 3.943133311216517e+02 - ME 9.226736931333760e-05 + ME 2.560110521983184e-05 Event 240 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1924,7 +1924,7 @@ Event 240 Batch 0 2 2.870582387324442e+02 1.830793600232297e+02 -1.562409872742485e+02 1.564389154054251e+02 3 6.007192677438852e+02 3.433229388031108e+02 4.688113613010560e+02 -1.523446941819630e+02 4 6.122224935236703e+02 -5.264022988263405e+02 -3.125703740268075e+02 -4.094221223461989e+00 - ME 1.424405912705748e-04 + ME 3.548113744927254e-05 Event 241 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1932,7 +1932,7 @@ Event 241 Batch 0 2 7.424696267657401e+02 4.823783107714221e+02 2.498315161211407e+02 5.061190823507636e+02 3 2.455726236162737e+02 -1.827879695947952e+02 -1.199757723946156e+02 -1.118046764652876e+02 4 5.119577496179861e+02 -2.995903411766270e+02 -1.298557437265251e+02 -3.943144058854759e+02 - ME 2.705973755259623e-03 + ME 2.366266620918590e-04 Event 242 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1940,7 +1940,7 @@ Event 242 Batch 0 2 7.249130370348905e+02 1.676828147928013e+02 6.059046362201677e+02 -3.609168279440810e+02 3 6.240672718074169e+02 -4.529413961306761e+01 -5.490982345027019e+02 2.930862151720549e+02 4 1.510196911576933e+02 -1.223886751797337e+02 -5.680640171746593e+01 6.783061277202641e+01 - ME 4.587322306592483e-05 + ME 1.668420503127583e-05 Event 243 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1948,7 +1948,7 @@ Event 243 Batch 0 2 4.655090712555229e+02 2.096323612054770e+02 2.113490506800235e+02 3.578890153850057e+02 3 5.764797256412519e+02 6.697224883641857e+01 -5.382210340689440e+02 -1.953502251008744e+02 4 4.580112031032257e+02 -2.766046100418949e+02 3.268719833889206e+02 -1.625387902841314e+02 - ME 2.309042201876567e-04 + ME 3.999521919602606e-05 Event 244 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1956,7 +1956,7 @@ Event 244 Batch 0 2 5.237109195354749e+02 1.305098338947756e+02 -4.868141165486322e+02 -1.423106687020528e+02 3 5.804450110242352e+02 -4.045654344879671e+02 2.643676733537771e+02 3.214855413949400e+02 4 3.958440694402901e+02 2.740556005931916e+02 2.224464431948551e+02 -1.791748726928872e+02 - ME 2.644202232750943e-04 + ME 2.634847163425152e-05 Event 245 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1964,7 +1964,7 @@ Event 245 Batch 0 2 2.629169357520612e+02 2.457511487795889e+02 -4.402365929491729e+01 -8.242333044139184e+01 3 6.931386101565748e+02 -5.195573187661655e+02 4.004017488088275e+02 -2.240084037645317e+02 4 5.439444540913644e+02 2.738061699865766e+02 -3.563780895139104e+02 3.064317342059234e+02 - ME 4.288053786412853e-05 + ME 1.052590061693975e-05 Event 246 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1972,7 +1972,7 @@ Event 246 Batch 0 2 6.300937687157445e+02 -5.459948028041557e+02 3.085954426748102e+02 6.063567799240802e+01 3 1.673910408536145e+02 -3.546130270298926e+01 7.662824936562275e+01 -1.445350060290698e+02 4 7.025151904306430e+02 5.814561055071442e+02 -3.852236920404341e+02 8.389932803666261e+01 - ME 6.282756509154168e-04 + ME 1.915763997923398e-04 Event 247 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1980,7 +1980,7 @@ Event 247 Batch 0 2 2.577847506495701e+02 2.418237207037818e+02 -8.449121421856779e+01 2.890502538162603e+01 3 5.130193185035739e+02 4.381905811488919e+02 1.366496386102691e+02 2.291390669832418e+02 4 7.291959308468561e+02 -6.800143018526737e+02 -5.215842439170134e+01 -2.580440923648679e+02 - ME 4.005872724472581e-03 + ME 1.831864018495938e-03 Event 248 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1988,7 +1988,7 @@ Event 248 Batch 0 2 7.033207479153643e+02 -5.040306065309413e+02 -2.020637997366072e+02 4.469714117975369e+02 3 1.758360012551320e+02 -1.471306652922549e+01 -4.035460943683606e+00 -1.751728862172264e+02 4 6.208432508295037e+02 5.187436730601667e+02 2.060992606802909e+02 -2.717985255803103e+02 - ME 5.592865021063005e-04 + ME 1.512538512828554e-04 Event 249 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1996,7 +1996,7 @@ Event 249 Batch 0 2 3.018816177222694e+02 5.523075638651412e+01 1.752331212074551e+02 2.395316845419020e+02 3 6.597415560701297e+02 6.315352823685419e+01 -6.561001191322722e+02 -2.834054254405022e+01 4 5.383768262076012e+02 -1.183842846233684e+02 4.808669979248172e+02 -2.111911419978518e+02 - ME 4.868100986861644e-04 + ME 9.225490912808109e-05 Event 250 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2004,7 +2004,7 @@ Event 250 Batch 0 2 2.166381935101301e+02 -1.289072913913530e+02 -1.189615590004073e+02 -1.271344351215279e+02 3 6.815426093761062e+02 -2.511966318704653e+02 5.323234433390903e+02 3.435583388650892e+02 4 6.018191971137635e+02 3.801039232618182e+02 -4.133618843386827e+02 -2.164239037435611e+02 - ME 3.468666532553966e-04 + ME 6.586594805989363e-05 Event 251 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2012,7 +2012,7 @@ Event 251 Batch 0 2 6.676961532387151e+02 -3.991265595084280e+01 -4.419965947723094e+02 4.988628500443886e+02 3 7.150412702460949e+02 3.921851524844908e+01 5.505653759000154e+02 -4.545587894617490e+02 4 1.172625765151894e+02 6.941407023942340e-01 -1.085687811277060e+02 -4.430406058263954e+01 - ME 5.615833562023813e-04 + ME 4.930952510857648e-05 Event 252 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2020,7 +2020,7 @@ Event 252 Batch 0 2 2.112668789066533e+02 -1.147554660376938e+02 3.364589711187055e+01 -1.741632301749357e+02 3 7.393007599584276e+02 2.529046383258835e+02 -3.593132473314827e+02 5.945576909606565e+02 4 5.494323611349191e+02 -1.381491722881897e+02 3.256673502196121e+02 -4.203944607857206e+02 - ME 2.709805393201018e-03 + ME 3.541023077707110e-04 Event 253 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2028,7 +2028,7 @@ Event 253 Batch 0 2 7.299659304470913e+01 -4.405884533650594e+01 -5.451291667290519e+01 2.038780663930336e+01 3 7.253475305576840e+02 3.245698054519170e+02 -1.402290280555607e+02 -6.333397991328418e+02 4 7.016558763976062e+02 -2.805109601154107e+02 1.947419447284657e+02 6.129519924935382e+02 - ME 6.484723438037138e-04 + ME 3.511004874943257e-04 Event 254 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2036,7 +2036,7 @@ Event 254 Batch 0 2 1.982520535096858e+02 -6.164633378269741e+01 1.773450413210087e+02 -6.365801262063783e+01 3 7.183815394471145e+02 -1.984891252513599e+02 -6.893152145826987e+02 -3.896971029099802e+01 4 5.833664070431995e+02 2.601354590340572e+02 5.119701732616900e+02 1.026277229116358e+02 - ME 9.210498573936143e-05 + ME 1.539519794804785e-05 Event 255 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2044,7 +2044,7 @@ Event 255 Batch 0 2 5.347080663542586e+02 -5.063606624096446e+02 1.592577719822621e+02 6.440929941880935e+01 3 2.475406015289465e+02 -1.856063881081879e+02 3.468010668896048e+00 -1.637516137347836e+02 4 7.177513321167953e+02 6.919670505178326e+02 -1.627257826511582e+02 9.934231431597431e+01 - ME 1.305481727349711e-03 + ME 3.137689362725149e-04 Event 0 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2052,7 +2052,7 @@ Event 0 Batch 1 2 5.775677821222389e+02 4.314431287975208e+02 -2.652567205762379e+02 -2.776332864556192e+02 3 6.023469575940325e+02 -3.228069847179709e+02 5.005558924007591e+02 8.978477890465942e+01 4 3.200852602837275e+02 -1.086361440795499e+02 -2.352991718245218e+02 1.878485075509607e+02 - ME 2.846168667868940e-05 + ME 7.533072458757011e-06 Event 1 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2060,7 +2060,7 @@ Event 1 Batch 1 2 7.241206267812560e+02 3.541578305635416e+02 -4.894807402105655e+02 3.991635230623179e+02 3 7.375567605136832e+02 -3.903081173548693e+02 4.920451519627784e+02 -3.867054653560791e+02 4 3.832261270506111e+01 3.615028679132773e+01 -2.564411752212873e+00 -1.245805770623896e+01 - ME 1.002871021831580e-03 + ME 7.043932941624384e-05 Event 2 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2068,7 +2068,7 @@ Event 2 Batch 1 2 4.849204091734790e+02 2.108660079931152e+02 4.054727376659824e+02 1.620962335024329e+02 3 2.728468517759738e+02 4.961449545460115e+01 2.005017763154939e+02 1.782774356422519e+02 4 7.422327390505470e+02 -2.604805034477164e+02 -6.059745139814763e+02 -3.403736691446848e+02 - ME 2.729395913593408e-02 + ME 1.721146206228212e-02 Event 3 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2076,7 +2076,7 @@ Event 3 Batch 1 2 4.264155576764489e+02 -4.170952165204416e+02 -7.054834331799705e+01 5.370977042744418e+01 3 7.108631972082329e+02 6.832597695609467e+02 -1.727180704166534e+02 -9.301097030017993e+01 4 3.627212451153183e+02 -2.661645530405051e+02 2.432664137346505e+02 3.930119987273574e+01 - ME 5.466137525204964e-05 + ME 5.739226791327231e-06 Event 4 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2084,7 +2084,7 @@ Event 4 Batch 1 2 7.183269968238449e+02 -3.584978055671311e+02 -5.048824553914336e+02 -3.640971079361008e+02 3 7.387431276480253e+02 4.013538934928407e+02 5.036810263913359e+02 3.618865629982628e+02 4 4.292987552812846e+01 -4.285608792570924e+01 1.201429000097643e+00 2.210544937839338e+00 - ME 3.145606575501715e-04 + ME 5.884725836744927e-05 Event 5 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2092,7 +2092,7 @@ Event 5 Batch 1 2 4.529780005473896e+02 -8.443182436392424e+01 4.445408460134587e+02 -2.106590230986445e+01 3 4.683757780543924e+02 -6.076819021151039e+01 -1.335482427838441e+02 -4.448010379662153e+02 4 5.786462213982179e+02 1.452000145754347e+02 -3.109926032296145e+02 4.658669402760799e+02 - ME 8.481958952475706e-05 + ME 2.851579396246287e-05 Event 6 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2100,7 +2100,7 @@ Event 6 Batch 1 2 6.238848262005389e+02 -1.065131260140052e+02 -4.741487807795934e+02 -3.912418229627633e+02 3 1.729069432107234e+02 -1.460869767542721e+02 -8.199113358821990e+01 4.281191710484079e+01 4 7.032082305887380e+02 2.526001027682771e+02 5.561399143678132e+02 3.484299058579224e+02 - ME 4.868510537699180e-04 + ME 1.468701510222534e-04 Event 7 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2108,7 +2108,7 @@ Event 7 Batch 1 2 6.977203086376783e+02 -6.126072843634399e+02 -1.744636661244187e+02 2.847602033865263e+02 3 1.614193396272251e+02 -4.571584237043670e+00 8.497734613495712e+01 -1.371646983269120e+02 4 6.408603517350967e+02 6.171788686004836e+02 8.948631998946138e+01 -1.475955050596143e+02 - ME 3.540796080305845e-04 + ME 9.523334397108766e-05 Event 8 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2116,7 +2116,7 @@ Event 8 Batch 1 2 6.871091945484288e+02 4.059708628308462e+02 2.886614153103366e+02 4.732666173272762e+02 3 5.653302025665631e+02 -2.838835484844413e+02 -7.353399035097291e+01 -4.833229987253825e+02 4 2.475606028850081e+02 -1.220873143464048e+02 -2.151274249593637e+02 1.005638139810634e+01 - ME 8.785466054587446e-05 + ME 3.726341895116938e-05 Event 9 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2124,7 +2124,7 @@ Event 9 Batch 1 2 1.618579955503452e+02 1.385215220188489e+01 1.601201234527701e+02 -1.917484467788566e+01 3 7.196660585644588e+02 -4.527189715496824e+02 -4.214090439733052e+02 3.679391067910628e+02 4 6.184759458851959e+02 4.388668193477974e+02 2.612889205205349e+02 -3.487642621131772e+02 - ME 1.054640649369016e-03 + ME 1.276556148007894e-04 Event 10 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2132,7 +2132,7 @@ Event 10 Batch 1 2 7.832785200561162e+01 1.027681340851886e+01 -7.242726264265977e+01 -2.799877018853974e+01 3 7.448007230566494e+02 2.520540107528716e+02 6.813719334665398e+02 1.641011304445167e+02 4 6.768714249377393e+02 -2.623308241613905e+02 -6.089446708238800e+02 -1.361023602559769e+02 - ME 5.876642887714617e-04 + ME 1.087112534498832e-04 Event 11 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2140,7 +2140,7 @@ Event 11 Batch 1 2 5.478627446486676e+02 2.070882322301630e+02 -4.708081692757452e+02 1.887000762823861e+02 3 6.997827604382593e+02 -4.209013422316021e+02 4.569873120768409e+02 -3.220257264800591e+02 4 2.523544949130733e+02 2.138131100014392e+02 1.382085719890436e+01 1.333256501976729e+02 - ME 2.703695959900953e-05 + ME 7.092902148917371e-06 Event 12 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2148,7 +2148,7 @@ Event 12 Batch 1 2 5.802868936311938e+02 -4.467002255894120e+01 5.211262762381961e+02 -2.513262266832405e+02 3 5.208038834706859e+02 2.151797013176283e+01 -4.993650129388666e+02 -1.463155694111945e+02 4 3.989092228981199e+02 2.315205242717860e+01 -2.176126329932955e+01 3.976417960944350e+02 - ME 5.046437564325244e-04 + ME 4.980323856672599e-04 Event 13 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2156,7 +2156,7 @@ Event 13 Batch 1 2 5.774880087360024e+02 1.576445054854711e+02 5.481077151088400e+02 -9.065617884226717e+01 3 5.915098138161557e+02 -3.018001633277128e+02 -3.808656371901898e+02 3.372564123391869e+02 4 3.310021774478421e+02 1.441556578422419e+02 -1.672420779186502e+02 -2.466002334969197e+02 - ME 1.505341700965184e-03 + ME 5.587942683639647e-05 Event 14 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2164,7 +2164,7 @@ Event 14 Batch 1 2 2.531797527967491e+02 -8.400833666640553e+01 -2.384535242035555e+02 -1.350938161690895e+01 3 5.261064571264828e+02 -1.751971590790252e+02 -3.334570051994592e+02 3.672878780523887e+02 4 7.207137900767681e+02 2.592054957454308e+02 5.719105294030147e+02 -3.537784964354798e+02 - ME 3.373121845959189e-03 + ME 1.659114310450813e-03 Event 15 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2172,7 +2172,7 @@ Event 15 Batch 1 2 4.605848765362425e+02 3.563504404614684e+02 1.735853700506503e+02 2.345653669687875e+02 3 4.216445088607453e+02 1.370719005416187e+02 -3.933730877164850e+02 6.521502736890037e+01 4 6.177706146030118e+02 -4.934223410030871e+02 2.197877176658347e+02 -2.997803943376878e+02 - ME 4.613631402771334e-04 + ME 9.110622752737525e-05 Event 16 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2180,7 +2180,7 @@ Event 16 Batch 1 2 4.972484926572777e+02 -1.474122335888775e+02 -4.748950276275915e+02 -6.399787981958280e-01 3 5.072511849723048e+02 4.846784046822065e+02 1.224000792205880e+02 -8.607455661990267e+01 4 4.955003223704169e+02 -3.372661710933285e+02 3.524949484070036e+02 8.671453541809866e+01 - ME 5.856804747367533e-05 + ME 1.035537635543116e-05 Event 17 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2188,7 +2188,7 @@ Event 17 Batch 1 2 3.182636773520259e+02 -9.176062613973060e+01 -1.890905041641619e+02 2.389906630959087e+02 3 6.376303990615819e+02 -4.240378519397394e+02 2.706855745366566e+02 -3.917827786765570e+02 4 5.441059235863918e+02 5.157984780794702e+02 -8.159507037249479e+01 1.527921155806483e+02 - ME 7.445984612273079e-05 + ME 2.964570775197734e-05 Event 18 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2196,7 +2196,7 @@ Event 18 Batch 1 2 5.532560008158404e+02 -4.148613005881325e+02 1.689647846464811e+02 -3.247047971041214e+02 3 3.650144721835348e+02 -1.597348634907620e+02 -2.160675866909894e+02 2.470529017650751e+02 4 5.817295270006244e+02 5.745961640788944e+02 4.710280204450838e+01 7.765189533904635e+01 - ME 9.119298978738387e-05 + ME 3.148325734685632e-05 Event 19 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2204,7 +2204,7 @@ Event 19 Batch 1 2 3.263687475619531e+02 -1.904667433734991e+02 2.390747946355329e+02 -1.143775398573919e+02 3 7.331345945903582e+02 2.597391859223821e+02 -6.739404183465077e+02 1.258022320965774e+02 4 4.404966578476884e+02 -6.927244254888298e+01 4.348656237109747e+02 -1.142469223918529e+01 - ME 8.793129888044293e-05 + ME 9.665339952809457e-06 Event 20 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2212,7 +2212,7 @@ Event 20 Batch 1 2 9.588718605412237e+01 4.259536217794532e+01 8.056474827260676e+01 -2.982128277051557e+01 3 7.250265356668370e+02 3.120913743414047e+02 -4.446787057645155e+02 4.801284204484703e+02 4 6.790862782790414e+02 -3.546867365193502e+02 3.641139574919093e+02 -4.503071376779550e+02 - ME 3.686389281265799e-03 + ME 6.402422614019696e-04 Event 21 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2220,7 +2220,7 @@ Event 21 Batch 1 2 1.825278201605081e+02 -1.533737674675502e+02 8.574830442242751e+01 4.939757963742074e+01 3 7.183016103669913e+02 1.713205736990392e+02 -6.275703015775031e+02 -3.045685162014731e+02 4 5.991705694725008e+02 -1.794680623148897e+01 5.418219971550755e+02 2.551709365640523e+02 - ME 7.470861105912214e-05 + ME 1.806434468406198e-05 Event 22 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2228,7 +2228,7 @@ Event 22 Batch 1 2 2.349542451120770e+02 9.235159917618290e+01 -2.156570331301489e+02 -1.291214495308476e+01 3 7.360601907662837e+02 -2.182033070539752e+02 6.568866822530020e+02 -2.503433799808774e+02 4 5.289855641216395e+02 1.258517078777923e+02 -4.412296491228531e+02 2.632555249339621e+02 - ME 3.893602972207037e-05 + ME 8.007442232312076e-06 Event 23 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2236,7 +2236,7 @@ Event 23 Batch 1 2 2.350908908124364e+02 -7.377772511691019e+00 -2.298431804723787e+02 -4.884063683135331e+01 3 6.797114625392685e+02 -5.485955088721076e+02 3.603976926464840e+02 1.765336882516069e+02 4 5.851976466482949e+02 5.559732813837987e+02 -1.305545121741055e+02 -1.276930514202538e+02 - ME 2.057468423101862e-04 + ME 3.185713653214173e-05 Event 24 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2244,7 +2244,7 @@ Event 24 Batch 1 2 4.355364173804401e+02 2.538053291625626e+02 -2.665393838801487e+02 -2.328767540869265e+02 3 4.093863144993796e+02 -1.953012891316528e+02 -3.573484670764558e+02 4.191221827828568e+01 4 6.550772681201798e+02 -5.850404003090968e+01 6.238878509566048e+02 1.909645358086408e+02 - ME 1.895168702655672e-04 + ME 3.721637657688893e-05 Event 25 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2252,7 +2252,7 @@ Event 25 Batch 1 2 7.365386968907909e+02 3.875876454009267e+02 3.151568854896985e+02 5.412404333367775e+02 3 5.208510884285567e+02 -2.430585576296288e+02 -1.518636440371932e+02 -4.349089876054084e+02 4 2.426102146806534e+02 -1.445290877712977e+02 -1.632932414525050e+02 -1.063314457313693e+02 - ME 3.717867207603688e-04 + ME 7.982561935336398e-05 Event 26 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2260,7 +2260,7 @@ Event 26 Batch 1 2 7.198867014174701e+02 5.189601929589824e+02 4.797253921416957e+02 -1.370428003807496e+02 3 3.889101953712928e+02 -1.847394503243419e+02 -2.837815501141775e+02 1.912864537085460e+02 4 3.912031032112371e+02 -3.342207426346404e+02 -1.959438420275183e+02 -5.424365332779646e+01 - ME 1.222836766708484e-04 + ME 1.928349098758061e-05 Event 27 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2268,7 +2268,7 @@ Event 27 Batch 1 2 6.732032222628646e+02 5.870808395006010e+02 -9.126179303429218e+01 3.165595544104447e+02 3 1.177373967283342e+02 7.847176641415683e+01 5.304379211899001e+00 -8.761358356661104e+01 4 7.090593810088013e+02 -6.655526059147578e+02 8.595741382239324e+01 -2.289459708438336e+02 - ME 1.603290018002586e-03 + ME 6.795383824785976e-04 Event 28 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2276,7 +2276,7 @@ Event 28 Batch 1 2 6.475300414228806e+02 3.136396845517189e+02 3.816259196370642e+02 -4.186728559156669e+02 3 7.290923529036073e+02 -2.791764769994177e+02 -4.112865540505715e+02 5.333662195995520e+02 4 1.233776056735125e+02 -3.446320755230100e+01 2.966063441350738e+01 -1.146933636838856e+02 - ME 5.037107889244314e-02 + ME 6.311296815400830e-04 Event 29 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2284,7 +2284,7 @@ Event 29 Batch 1 2 3.156754590345620e+02 -2.870540678871016e+02 4.159516713841874e+01 -1.245825012466667e+02 3 4.770060274033896e+02 -2.355061130652810e+02 -3.231858413754910e+02 -2.600433287405434e+02 4 7.073185135620483e+02 5.225601809523826e+02 2.815906742370723e+02 3.846258299872100e+02 - ME 7.956699356695784e-04 + ME 1.321807869823317e-04 Event 30 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2292,7 +2292,7 @@ Event 30 Batch 1 2 6.091290614220995e+02 1.543004089904798e+02 4.216196287493766e+00 -5.892468251447810e+02 3 2.079357839022729e+02 2.034647466922837e+02 4.185675980476618e+01 9.348729279626889e+00 4 6.829351546756266e+02 -3.577651556827627e+02 -4.607295609226003e+01 5.798980958651539e+02 - ME 3.902231064020147e-04 + ME 1.448382779935031e-04 Event 31 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2300,7 +2300,7 @@ Event 31 Batch 1 2 6.901710072855793e+02 1.433309098684656e+01 6.447948515477649e+02 -2.457034416076623e+02 3 5.898919363861644e+02 1.120085307876391e+02 -4.815950471622465e+02 3.217029626736535e+02 4 2.199370563282564e+02 -1.263416217744856e+02 -1.631998043855182e+02 -7.599952106599136e+01 - ME 2.415465849322543e-04 + ME 2.376400497996635e-05 Event 32 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2308,7 +2308,7 @@ Event 32 Batch 1 2 6.144498311923271e+02 5.832947925341469e+02 -1.925283703230110e+02 1.576726595169125e+01 3 2.478450424037004e+02 5.004284035329792e+01 2.389954177960992e+02 4.247433867565734e+01 4 6.377051264039724e+02 -6.333376328874447e+02 -4.646704747308818e+01 -5.824160462734862e+01 - ME 2.160220890176678e-04 + ME 5.390650629646604e-05 Event 33 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2316,7 +2316,7 @@ Event 33 Batch 1 2 6.134536717469736e+02 -1.625429495269566e+02 -1.853973484494194e+02 5.617232593785355e+02 3 5.361644687950269e+02 -3.755831293394986e+01 -9.992652347025609e+01 -5.254297294928764e+02 4 3.503818594579993e+02 2.001012624609065e+02 2.853238719196754e+02 -3.629352988565911e+01 - ME 1.224582992507153e-04 + ME 1.005452860076771e-04 Event 34 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2324,7 +2324,7 @@ Event 34 Batch 1 2 3.840838099420727e+02 -2.442269925519278e+02 -3.827314394217582e+01 -2.939535943332559e+02 3 6.022630974514659e+02 3.956891925431131e+01 5.086724982658299e+02 3.200116071158652e+02 4 5.136530926064613e+02 2.046580732976165e+02 -4.703993543236541e+02 -2.605801278260916e+01 - ME 9.608243105510499e-05 + ME 2.313941306740064e-05 Event 35 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2332,7 +2332,7 @@ Event 35 Batch 1 2 3.454350783663418e+02 -3.439607925797615e+02 2.363778141880094e+01 -2.139209721976717e+01 3 6.705698302143294e+02 5.215327591153251e+02 4.060443141865528e+02 -1.131171661597076e+02 4 4.839950914193290e+02 -1.775719665355635e+02 -4.296820956053536e+02 1.345092633794747e+02 - ME 4.862206803317224e-05 + ME 7.982017052260048e-06 Event 36 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2340,7 +2340,7 @@ Event 36 Batch 1 2 7.098652154429357e+02 2.489290984574327e+02 -1.674080692141068e+02 -6.433641786725617e+02 3 6.178479130357197e+02 -1.435715807033598e+02 2.588953561477193e+02 5.423065917191846e+02 4 1.722868715213448e+02 -1.053575177540730e+02 -9.148728693361247e+01 1.010575869533772e+02 - ME 6.680529568232270e-05 + ME 5.562249548714765e-05 Event 37 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2348,7 +2348,7 @@ Event 37 Batch 1 2 6.906872786346031e+02 1.495946561071237e+02 1.712833879510068e+02 6.521750966909805e+02 3 3.682276595245592e+02 -1.358558710218083e+02 1.194309698061993e+02 -3.207351477449753e+02 4 4.410850618408380e+02 -1.373878508531530e+01 -2.907143577572061e+02 -3.314399489460051e+02 - ME 2.014943348935539e-03 + ME 5.542438863722841e-04 Event 38 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2356,7 +2356,7 @@ Event 38 Batch 1 2 6.131720166645955e+02 -5.222102655174087e+02 6.340623138461877e+00 3.213038392347352e+02 3 4.540063357567760e+02 2.932429176443922e+02 -3.207297067242505e+02 -1.313879727496968e+02 4 4.328216475786277e+02 2.289673478730168e+02 3.143890835857886e+02 -1.899158664850380e+02 - ME 2.589645049118943e-04 + ME 3.150821423911933e-05 Event 39 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2364,7 +2364,7 @@ Event 39 Batch 1 2 2.929747896182304e+02 2.510117592312210e+02 -1.378648144805472e+02 6.181113983529403e+01 3 6.287164314722783e+02 3.864928360025993e+01 6.254120614625328e+02 5.148142827864510e+01 4 5.783087789094894e+02 -2.896610428314818e+02 -4.875472469819856e+02 -1.132925681139394e+02 - ME 1.708238325115053e-04 + ME 2.723120294663496e-05 Event 40 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2372,7 +2372,7 @@ Event 40 Batch 1 2 1.143487538112954e+02 -3.203572478439017e+01 1.022340126870988e+02 3.996944439980560e+01 3 7.361483923235807e+02 5.924235295921244e+02 -3.838567751530157e+02 -2.088128187524163e+02 4 6.495028538651248e+02 -5.603878048077345e+02 2.816227624659169e+02 1.688433743526105e+02 - ME 2.026369815874481e-04 + ME 4.279185076498264e-05 Event 41 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2380,7 +2380,7 @@ Event 41 Batch 1 2 6.384898508133350e+02 5.540399192408263e+02 -3.014826159773289e+02 -9.908223727147148e+01 3 3.510407251698805e+02 -1.719168197014114e+02 2.065966849440144e+02 -2.258140996521069e+02 4 5.104694240167846e+02 -3.821230995394149e+02 9.488593103331458e+01 3.248963369235784e+02 - ME 4.455092331482675e-05 + ME 1.488395965626735e-05 Event 42 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2388,7 +2388,7 @@ Event 42 Batch 1 2 3.291654598309212e+02 -1.090829060981258e+02 2.972891943885482e+02 -8.983292515941632e+01 3 6.884965239796815e+02 4.933628807557017e+02 -2.919492821202986e+02 3.812953554581829e+02 4 4.823380161893969e+02 -3.842799746575757e+02 -5.339912268249619e+00 -2.914624302987665e+02 - ME 6.690811667999076e-04 + ME 5.767145017550451e-05 Event 43 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2396,7 +2396,7 @@ Event 43 Batch 1 2 3.674173006007981e+02 2.791827424102563e+02 1.079644067383057e+02 2.130637369397045e+02 3 7.392205647816575e+02 -6.110484627794917e+02 -4.247874240022372e+01 -4.138385868609020e+02 4 3.933621346175442e+02 3.318657203692355e+02 -6.548566433808202e+01 2.007748499211975e+02 - ME 2.734436884563990e-05 + ME 6.513986915725277e-06 Event 44 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2404,7 +2404,7 @@ Event 44 Batch 1 2 2.081359682230012e+02 -1.082501549908087e+02 1.771964605001424e+02 1.427934167997762e+01 3 7.449563315308093e+02 5.092828751965591e+02 -5.388739609944279e+02 7.215083562608928e+01 4 5.469077002461893e+02 -4.010327202057504e+02 3.616775004942854e+02 -8.643017730606689e+01 - ME 1.760644262839344e-04 + ME 1.838899544278803e-05 Event 45 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2412,7 +2412,7 @@ Event 45 Batch 1 2 5.180982465404422e+02 4.470261481799612e+02 -3.368837017252423e+01 -2.597277606009553e+02 3 3.377595659674062e+02 -7.316527185649456e+01 2.454727770679006e+02 -2.201624016839132e+02 4 6.441421874921515e+02 -3.738608763234666e+02 -2.117844068953763e+02 4.798901622848684e+02 - ME 1.645403798734011e-04 + ME 4.091340785269233e-05 Event 46 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2420,7 +2420,7 @@ Event 46 Batch 1 2 6.296560291524888e+02 2.172411497655985e+02 5.821614514430422e+02 -1.017892054705761e+02 3 6.224001894826197e+02 1.405102091633609e+01 -6.218608257778048e+02 2.176414579432105e+01 4 2.479437813648912e+02 -2.312921706819346e+02 3.969937433476264e+01 8.002505967625511e+01 - ME 4.041878897626609e-05 + ME 7.434320230190137e-06 Event 47 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2428,7 +2428,7 @@ Event 47 Batch 1 2 5.458843469271557e+02 -1.019033861791133e+02 -1.559739004096151e+02 5.131058004898495e+02 3 2.573134207008558e+02 6.791700498899543e+01 -2.412204887508016e+02 5.839651284901167e+01 4 6.968022323719882e+02 3.398638119011781e+01 3.971943891604168e+02 -5.715023133388611e+02 - ME 1.408798022766008e-02 + ME 4.005478861198618e-03 Event 48 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2436,7 +2436,7 @@ Event 48 Batch 1 2 6.623920218006384e+02 -6.284562032939594e+02 -1.837527125398962e+02 -1.002044496053409e+02 3 1.251779629744606e+02 -7.502448682133647e+01 9.550779386908961e+01 3.031682869117444e+01 4 7.124300152249010e+02 7.034806901152959e+02 8.824491867080658e+01 6.988762091416655e+01 - ME 8.682321044518227e-04 + ME 3.004757451335502e-04 Event 49 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2444,7 +2444,7 @@ Event 49 Batch 1 2 2.397494808364364e+02 2.393958238941666e+02 -4.144666783354266e+00 -1.233996761053010e+01 3 6.782491241100328e+02 -3.516321535544010e+02 -2.705899831712919e+02 5.129890485673947e+02 4 5.820013950535307e+02 1.122363296602344e+02 2.747346499546462e+02 -5.006490809568646e+02 - ME 9.041285542966720e-03 + ME 6.040872325723622e-04 Event 50 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2452,7 +2452,7 @@ Event 50 Batch 1 2 4.764898792162554e+02 4.667163214316568e+02 5.900817880915086e+01 -7.573978570375913e+01 3 5.114228101321805e+02 -2.035689445851523e+02 -4.549677995197112e+02 -1.145306811477843e+02 4 5.120873106515638e+02 -2.631473768465044e+02 3.959596207105603e+02 1.902704668515434e+02 - ME 5.157319121365441e-05 + ME 9.692662313613028e-06 Event 51 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2460,7 +2460,7 @@ Event 51 Batch 1 2 4.678795643859630e+02 4.629737719234085e+02 5.365495313512251e+01 4.108186077915564e+01 3 6.311645871918951e+02 -4.500610707732837e+02 -4.345770688214700e+02 8.340587481742408e+01 4 4.009558484221416e+02 -1.291270115012470e+01 3.809221156863474e+02 -1.244877355965797e+02 - ME 1.517985021504320e-04 + ME 1.293558494013996e-05 Event 52 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2468,7 +2468,7 @@ Event 52 Batch 1 2 3.696230029266819e+02 2.516704934433110e+02 2.514038675722595e+02 1.003953305301004e+02 3 6.696174214325739e+02 -2.754912388418390e+01 -6.493999246431116e+02 -1.609604756850079e+02 4 4.607595756407442e+02 -2.241213695591271e+02 3.979960570708519e+02 6.056514515490756e+01 - ME 5.727699238559496e-05 + ME 8.655753222194317e-06 Event 53 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2476,7 +2476,7 @@ Event 53 Batch 1 2 7.284624742442375e+01 -4.271742504396477e+01 -2.683807109937144e+01 -5.255012179908527e+01 3 7.493542950735829e+02 3.356513586119740e+02 2.501807367708783e+02 6.215139772812374e+02 4 6.777994575019936e+02 -2.929339335680093e+02 -2.233426656715069e+02 -5.689638554821522e+02 - ME 1.612275481129464e-02 + ME 2.372423861687152e-03 Event 54 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2484,7 +2484,7 @@ Event 54 Batch 1 2 7.460259847230064e+02 2.055186857047568e+01 6.233229443227743e+02 4.093908861479223e+02 3 5.756222844616437e+02 2.606063779094539e+01 -4.696411468594731e+02 -3.318117699890848e+02 4 1.783517308153497e+02 -4.661250636142109e+01 -1.536817974633012e+02 -7.757911615883735e+01 - ME 4.374243668355642e-04 + ME 5.046268590690708e-05 Event 55 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2492,7 +2492,7 @@ Event 55 Batch 1 2 5.967428482894213e+02 -8.165820254184375e+01 5.098287527914877e+02 -2.991798919868828e+02 3 5.942526243827265e+02 5.606061544962815e+01 -2.905196430116550e+02 5.153559216750568e+02 4 3.090045273278509e+02 2.559758709221549e+01 -2.193091097798325e+02 -2.161760296881746e+02 - ME 1.779007466146034e-03 + ME 1.849048785615045e-04 Event 56 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2500,7 +2500,7 @@ Event 56 Batch 1 2 5.610874267302015e+02 -4.199055433713192e+02 3.580252469767042e+02 1.015694718309908e+02 3 6.303091265298390e+02 2.130872195586830e+02 -5.453843477211296e+02 -2.333224059286980e+02 4 3.086034467399593e+02 2.068183238126362e+02 1.873591007444254e+02 1.317529340977073e+02 - ME 3.258989367177766e-05 + ME 7.213009143835112e-06 Event 57 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2508,7 +2508,7 @@ Event 57 Batch 1 2 6.552053965855981e+02 4.516249927537604e+02 7.110694105335197e+00 4.746350341729917e+02 3 6.035190443408458e+02 -3.717228873476765e+02 2.148772607224587e+02 -4.241286299324850e+02 4 2.412755590735562e+02 -7.990210540608396e+01 -2.219879548277939e+02 -5.050640424050685e+01 - ME 1.623545585873121e-04 + ME 3.752873989265266e-05 Event 58 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2516,7 +2516,7 @@ Event 58 Batch 1 2 2.959982971085279e+02 1.850007048157144e+02 -2.304987961744356e+02 1.612563397119956e+01 3 7.018897389129390e+02 -3.764226030262936e+02 4.376344751014918e+02 3.992884868423144e+02 4 5.021119639785326e+02 1.914218982105791e+02 -2.071356789270567e+02 -4.154141208135139e+02 - ME 4.558573859477246e-03 + ME 1.901193343270815e-04 Event 59 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2524,7 +2524,7 @@ Event 59 Batch 1 2 5.521089721327345e+02 1.223876815062619e+02 -3.629066091228882e+01 -5.371485459866160e+02 3 4.098988410471214e+02 -5.841964900319319e+01 -3.626461945087767e+02 1.819119075553315e+02 4 5.379921868201441e+02 -6.396803250306872e+01 3.989368554210655e+02 3.552366384312845e+02 - ME 5.148841296796537e-05 + ME 1.780280399801712e-05 Event 60 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2532,7 +2532,7 @@ Event 60 Batch 1 2 7.143828168925960e+02 -4.584044193456332e+02 -2.419772079280938e+02 -4.915844060170314e+02 3 1.284110307517517e+02 8.324300347118127e+01 -7.889851197070540e+01 5.774963203893758e+01 4 6.572061523556514e+02 3.751614158744520e+02 3.208757198987992e+02 4.338347739780938e+02 - ME 1.673517837789511e-04 + ME 7.144001898958308e-05 Event 61 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2540,7 +2540,7 @@ Event 61 Batch 1 2 4.394390210968651e+02 -2.137451655543886e+02 -3.779414621253704e+02 -6.767502250635177e+01 3 4.431311911324728e+02 3.845666395406355e+02 -2.150363068358313e+02 4.725610065709574e+01 4 6.174297877706618e+02 -1.708214739862469e+02 5.929777689612018e+02 2.041892184925626e+01 - ME 1.368591177943825e-04 + ME 2.870354731125455e-05 Event 62 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2548,7 +2548,7 @@ Event 62 Batch 1 2 7.301725729481176e+02 4.281927891852710e+02 5.652737593150771e+02 -1.739784429324868e+02 3 7.567373964415995e+01 2.589885732647599e+01 -5.696550981957816e+01 4.255225906941358e+01 4 6.941536874077224e+02 -4.540916465117469e+02 -5.083082494954988e+02 1.314261838630732e+02 - ME 8.513592598060080e-04 + ME 2.379197431250548e-04 Event 63 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2556,7 +2556,7 @@ Event 63 Batch 1 2 4.361152320236988e+02 -3.738769057978321e+02 1.427754799584550e+02 -1.732850750548248e+02 3 5.817148313055657e+02 5.081993893256957e+02 2.829214478037172e+02 -8.998890070513914e+00 4 4.821699366707353e+02 -1.343224835278637e+02 -4.256969277621721e+02 1.822839651253387e+02 - ME 4.544766189571194e-05 + ME 8.350404272725701e-06 Event 64 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2564,7 +2564,7 @@ Event 64 Batch 1 2 6.097675704107204e+02 3.288514690970509e+02 4.971291587853200e+02 -1.285916042465611e+02 3 5.709532610348123e+02 -6.501292612520263e+01 -4.768258747557200e+02 3.072426254385416e+02 4 3.192791685544673e+02 -2.638385429718484e+02 -2.030328402960006e+01 -1.786510211919805e+02 - ME 4.598138986874043e-04 + ME 3.000969253297957e-05 Event 65 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2572,7 +2572,7 @@ Event 65 Batch 1 2 6.258641293880484e+02 3.743515439843765e+02 -1.622018320411498e+02 -4.746128903155367e+02 3 7.438702198751357e+02 -4.029113627030089e+02 2.325939036896868e+02 5.804355380128616e+02 4 1.302656507368158e+02 2.855981871863233e+01 -7.039207164853700e+01 -1.058226476973252e+02 - ME 6.427333508548903e-03 + ME 3.162776051460646e-04 Event 66 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2580,7 +2580,7 @@ Event 66 Batch 1 2 3.731957242404369e+02 1.596860493342637e+01 -3.714568973276624e+02 3.224632809376674e+01 3 6.079923612940432e+02 4.451199598539357e+02 3.189341902600864e+02 -2.642043054431177e+02 4 5.188119144655197e+02 -4.610885647873621e+02 5.252270706757586e+01 2.319579773493509e+02 - ME 4.681392980523237e-05 + ME 1.034065067393998e-05 Event 67 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2588,7 +2588,7 @@ Event 67 Batch 1 2 7.084256499213539e+02 6.318790977834966e+02 -2.229764540025608e+02 2.299504472951746e+02 3 5.168612394424738e+01 1.130069959366449e+01 -1.428140623590627e+01 4.837138651102398e+01 4 7.398882261343989e+02 -6.431797973771612e+02 2.372578602384670e+02 -2.783218338061985e+02 - ME 5.878400132197954e-02 + ME 1.479715191731530e-02 Event 68 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2596,7 +2596,7 @@ Event 68 Batch 1 2 5.644037677826096e+02 -7.446914007305443e+01 3.170710956176409e+02 4.609467220707991e+02 3 4.303832728799333e+02 -1.588265612792408e+02 -3.994808673830752e+02 -2.046757440246668e+01 4 5.052129593374568e+02 2.332957013522950e+02 8.240977176543441e+01 -4.404791476683325e+02 - ME 8.108482137897523e-03 + ME 3.274273226082449e-04 Event 69 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2604,7 +2604,7 @@ Event 69 Batch 1 2 2.379282923937934e+02 -4.413455715133102e+01 1.058497776082811e+02 -2.084654354245804e+02 3 5.822935131976616e+02 -5.806422676829345e+02 4.095409019445288e+01 -1.559022092337181e+01 4 6.797781944085444e+02 6.247768248342655e+02 -1.468038678027338e+02 2.240556563479522e+02 - ME 3.039802585689931e-04 + ME 6.379305675073031e-05 Event 70 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2612,7 +2612,7 @@ Event 70 Batch 1 2 5.861861307468000e+02 1.831219916849830e+02 2.904683423406074e+02 -4.750880530376756e+02 3 4.633200606614189e+02 -4.245314712871158e+02 -1.339518705596282e+02 1.284344380284135e+02 4 4.504938085917810e+02 2.414094796021329e+02 -1.565164717809791e+02 3.466536150092620e+02 - ME 3.530491740557932e-05 + ME 1.325653453486623e-05 Event 71 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2620,7 +2620,7 @@ Event 71 Batch 1 2 7.383412459951699e+02 5.748049255568963e+02 -1.639684737984460e+02 -4.334298474879633e+02 3 3.973981306646684e+02 -3.228684354469153e+02 -4.837114091238284e+00 2.316416412804533e+02 4 3.642606233401616e+02 -2.519364901099809e+02 1.688055878896842e+02 2.017882062075102e+02 - ME 3.103530482016079e-05 + ME 1.333441808219846e-05 Event 72 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2628,7 +2628,7 @@ Event 72 Batch 1 2 3.538199915090663e+02 3.512029503136998e+02 -6.467835580753929e+00 -4.246458742680748e+01 3 5.344234504985296e+02 1.310173344785605e+01 3.836805260246265e+01 5.328833470497182e+02 4 6.117565579924039e+02 -3.643046837615559e+02 -3.190021702170876e+01 -4.904187596229107e+02 - ME 9.376669006106200e-03 + ME 2.994704399169685e-03 Event 73 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2636,7 +2636,7 @@ Event 73 Batch 1 2 4.694927197571710e+02 1.451947293992222e+02 -1.807863847612341e+02 4.082379055705570e+02 3 5.537325951281179e+02 -5.796379956652479e+01 5.401382741253894e+02 -1.072876026015002e+02 4 4.767746851147115e+02 -8.723092983269744e+01 -3.593518893641554e+02 -3.009503029690568e+02 - ME 1.077472469645428e-03 + ME 1.535829386616431e-04 Event 74 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2644,7 +2644,7 @@ Event 74 Batch 1 2 6.258444305735198e+02 -3.349227552763227e+02 4.941036656040852e+02 1.880679848209580e+02 3 5.555040664889822e+02 3.765538795180102e+01 -5.474422011270130e+02 -8.645158222500005e+01 4 3.186515029374982e+02 2.972673673245214e+02 5.333853552292791e+01 -1.016164025959578e+02 - ME 1.623439923565115e-04 + ME 1.487896902219418e-05 Event 75 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2652,7 +2652,7 @@ Event 75 Batch 1 2 3.943316317993887e+02 5.588489849751632e+01 -2.552251009651266e+02 -2.953548066221912e+02 3 5.467466262348042e+02 -3.021648543602057e+02 -2.377479281839000e+02 3.887212326756534e+02 4 5.589217419658066e+02 2.462799558626894e+02 4.929730291490265e+02 -9.336642605346221e+01 - ME 1.348649436679123e-04 + ME 4.632408498797698e-05 Event 76 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2660,7 +2660,7 @@ Event 76 Batch 1 2 5.517772830004059e+02 2.282681125856672e+02 -4.885490190451381e+02 -1.169260227747471e+02 3 4.245403880864563e+02 -2.793100283061228e+02 1.521744876196477e+02 -2.811821020654221e+02 4 5.236823289131380e+02 5.104191572045557e+01 3.363745314254903e+02 3.981081248401691e+02 - ME 5.074216551061466e-05 + ME 1.645260485784409e-05 Event 77 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2668,7 +2668,7 @@ Event 77 Batch 1 2 3.781543446472003e+02 -5.926925448310480e+01 -1.775497893613220e+02 3.285786605157444e+02 3 6.702964816234122e+02 -6.066564226432872e+01 -1.057468051743550e+02 -6.591165802199176e+02 4 4.515491737293867e+02 1.199348967474336e+02 2.832965945356770e+02 3.305379197041734e+02 - ME 6.321080405055773e-05 + ME 5.041095643414513e-05 Event 78 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2676,7 +2676,7 @@ Event 78 Batch 1 2 4.564262045363139e+02 1.882572856930395e+02 1.751822011208171e+02 -3.770878823051468e+02 3 3.809544602625751e+02 -2.816334489555117e+02 1.992812047321844e+02 -1.615422627793184e+02 4 6.626193352011103e+02 9.337616326247226e+01 -3.744634058530013e+02 5.386301450844651e+02 - ME 2.572921643188974e-04 + ME 6.222463480998997e-05 Event 79 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2684,7 +2684,7 @@ Event 79 Batch 1 2 6.126536521478922e+02 6.075062399138452e+02 -4.178945028651393e+01 6.733726903166659e+01 3 2.872846052831658e+02 -1.084163947926161e+02 2.139961846825774e+01 2.651799127051085e+02 4 6.000617425689430e+02 -4.990898451212283e+02 2.038983181825616e+01 -3.325171817367756e+02 - ME 1.996659951821530e-03 + ME 6.289823950094716e-04 Event 80 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2692,7 +2692,7 @@ Event 80 Batch 1 2 4.171281258707700e+02 -2.756641813219371e+02 1.445082905894664e+01 3.127240094205691e+02 3 3.805235327384960e+02 -2.955852199231463e+02 2.395269588958384e+02 7.373784162959287e+00 4 7.023483413907342e+02 5.712494012450838e+02 -2.539777879547846e+02 -3.200977935835284e+02 - ME 1.297520069620947e-03 + ME 5.629434448779270e-04 Event 81 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2700,7 +2700,7 @@ Event 81 Batch 1 2 7.471091333863935e+02 -9.753029041192970e+01 7.407154559164039e+02 -7.162458282065091e-01 3 6.775352561453885e+02 9.550863422814814e+01 -6.702673865908516e+02 -2.595678293896889e+01 4 7.535561046821789e+01 2.021656183781575e+00 -7.044806932555213e+01 2.667302876717550e+01 - ME 1.022399816924924e-04 + ME 2.904529061551848e-05 Event 82 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2708,7 +2708,7 @@ Event 82 Batch 1 2 4.309094465924175e+02 3.042233433179616e+02 2.799835808203350e+02 -1.214096495919827e+02 3 5.540384887187945e+02 -4.824447657759213e+02 1.988969596446625e+02 1.861335391629672e+02 4 5.150520646887885e+02 1.782214224579596e+02 -4.788805404649973e+02 -6.472388957098450e+01 - ME 1.053635072607165e-04 + ME 1.778678120024833e-05 Event 83 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2716,7 +2716,7 @@ Event 83 Batch 1 2 4.869534474909295e+02 -4.727010820510885e+02 1.062322962656182e+02 4.890855018466118e+01 3 3.520990385354405e+02 -1.437544586613779e+02 -3.142298368411062e+02 6.758696761482639e+01 4 6.609475139736298e+02 6.164555407124665e+02 2.079975405754878e+02 -1.164955177994876e+02 - ME 2.998516055200512e-04 + ME 7.948516811691567e-05 Event 84 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2724,7 +2724,7 @@ Event 84 Batch 1 2 1.391975815431583e+01 -3.682657486111166e-01 -1.138840508663312e+01 -7.995516055627093e+00 3 7.493632094786751e+02 -3.452281541586202e+01 3.833012084573049e+02 6.429880080772211e+02 4 7.367170323670085e+02 3.489108116447313e+01 -3.719128033706718e+02 -6.349924920215940e+02 - ME 3.806217512266510e-01 + ME 8.671177508029917e-02 Event 85 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2732,7 +2732,7 @@ Event 85 Batch 1 2 7.362448947738020e+02 6.409220704967113e+02 3.243429451315054e+02 1.614840505254833e+02 3 1.517836214454495e+02 -1.266859291808411e+02 -6.780846852200752e+01 4.889738933094901e+01 4 6.119714837807480e+02 -5.142361413158706e+02 -2.565344766094980e+02 -2.103814398564324e+02 - ME 5.694785892689211e-04 + ME 1.062305495679385e-04 Event 86 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2740,7 +2740,7 @@ Event 86 Batch 1 2 5.451728369778392e+02 -6.605005893803180e+01 1.066920544886257e+02 -5.305352178712969e+02 3 3.158718592284829e+02 -1.755596039144849e+02 2.550395858012225e+02 6.251932981237656e+01 4 6.389553037936773e+02 2.416096628525165e+02 -3.617316402898481e+02 4.680158880589203e+02 - ME 1.469986179099727e-04 + ME 4.057626974930324e-05 Event 87 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2748,7 +2748,7 @@ Event 87 Batch 1 2 3.414211232216659e+02 1.437256906952883e+02 1.534640422371205e+02 -2.689983214749668e+02 3 5.081668091119999e+02 4.794742948200324e+02 -1.464748766741243e+02 8.296394996143997e+01 4 6.504120676663341e+02 -6.231999855153207e+02 -6.989165562996117e+00 1.860343715135268e+02 - ME 1.823135893899652e-04 + ME 3.656584417835253e-05 Event 88 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2756,7 +2756,7 @@ Event 88 Batch 1 2 2.925516585730864e+02 1.655911293372511e+01 2.598275245766865e+02 -1.334238591297045e+02 3 7.159840369510271e+02 -1.056844973272874e+02 -3.694097043713192e+02 6.041526284885822e+02 4 4.914643044758866e+02 8.912538439356234e+01 1.095821797946327e+02 -4.707287693588777e+02 - ME 8.728488941697977e-02 + ME 2.327745727475104e-03 Event 89 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2764,7 +2764,7 @@ Event 89 Batch 1 2 6.333634651097186e+02 1.209853522660007e+02 5.372166546881791e+02 -3.129058794565919e+02 3 6.221307427802806e+02 5.757192259699385e+01 -4.327483989541182e+02 4.432391657372765e+02 4 2.445057921100010e+02 -1.785572748629945e+02 -1.044682557340609e+02 -1.303332862806847e+02 - ME 5.497507832908574e-04 + ME 5.047204144927262e-05 Event 90 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2772,7 +2772,7 @@ Event 90 Batch 1 2 3.111538587406461e+02 2.628215106651484e+02 -6.985334981761831e+01 -1.512021390726355e+02 3 5.216486323898988e+02 1.252715366480781e+02 4.457714554600226e+02 -2.402335265468457e+02 4 6.671975088694549e+02 -3.880930473132266e+02 -3.759181056424042e+02 3.914356656194811e+02 - ME 2.329075524537458e-04 + ME 4.503542584588689e-05 Event 91 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2780,7 +2780,7 @@ Event 91 Batch 1 2 3.007803348469016e+02 8.390513937949677e+01 2.884042062049404e+02 -1.586667134655829e+01 3 6.256884422056424e+02 2.364580673743878e+02 -3.590826126759745e+02 -4.545693416378727e+02 4 5.735312229474563e+02 -3.203632067538847e+02 7.067840647103421e+01 4.704360129844310e+02 - ME 6.478111274774788e-05 + ME 2.635583378174906e-05 Event 92 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2788,7 +2788,7 @@ Event 92 Batch 1 2 6.843865618656529e+02 -2.264962467301474e+02 -5.909185329480341e+02 2.605757158639088e+02 3 6.645516272550811e+02 3.453347116263074e+02 4.983670680340538e+02 -2.720350487207341e+02 4 1.510618108792659e+02 -1.188384648961601e+02 9.255146491398015e+01 1.145933285682523e+01 - ME 9.365402433981294e-05 + ME 1.711437740567050e-05 Event 93 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2796,7 +2796,7 @@ Event 93 Batch 1 2 5.579763469381434e+02 2.180908585044468e+02 5.135246110359701e+02 8.151996049100932e+00 3 3.333821836060117e+02 1.681122988324202e+02 -1.261705574188212e+02 2.587719570738210e+02 4 6.086414694558448e+02 -3.862031573368670e+02 -3.873540536171486e+02 -2.669239531229223e+02 - ME 5.183695239236329e-04 + ME 1.157787815150910e-04 Event 94 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2804,7 +2804,7 @@ Event 94 Batch 1 2 4.534979734151987e+02 1.139662723650677e+02 2.686183171543304e+01 4.381216071501101e+02 3 3.856184698299744e+02 1.545134372854228e+02 -3.452526490806396e+02 7.501873282757614e+01 4 6.608835567548277e+02 -2.684797096504910e+02 3.183908173652065e+02 -5.131403399776862e+02 - ME 6.944325623628402e-03 + ME 1.545010233607317e-03 Event 95 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2812,7 +2812,7 @@ Event 95 Batch 1 2 2.828073115974175e+02 -5.711637476392460e+01 5.915078172645698e+01 -2.705898746219725e+02 3 6.809618671276158e+02 3.772100991821226e+02 3.247893528880094e+02 4.646864338535512e+02 4 5.362308212749670e+02 -3.200937244181981e+02 -3.839401346144663e+02 -1.940965592315787e+02 - ME 2.560512106670314e-04 + ME 6.408796328924562e-05 Event 96 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2820,7 +2820,7 @@ Event 96 Batch 1 2 4.639832102051440e+02 -4.275497908582962e+02 -1.317248975374901e+02 -1.230046627491649e+02 3 7.474114851375481e+02 6.594176555428718e+02 2.654537688070380e+02 2.309254864669502e+02 4 2.886053046573076e+02 -2.318678646845757e+02 -1.337288712695479e+02 -1.079208237177853e+02 - ME 2.440162169445852e-04 + ME 1.445191791082226e-05 Event 97 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2828,7 +2828,7 @@ Event 97 Batch 1 2 5.095921959312568e+02 3.190102848863560e+02 3.100341192456060e+02 2.485869851668986e+02 3 4.555541331018014e+02 -2.788120391899956e+02 2.221549471930723e+02 -2.836205112936887e+02 4 5.348536709669415e+02 -4.019824569636059e+01 -5.321890664386783e+02 3.503352612679014e+01 - ME 8.198891770965733e-05 + ME 2.250661525403011e-05 Event 98 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2836,7 +2836,7 @@ Event 98 Batch 1 2 5.299941952467790e+02 -2.570048161992350e+02 -4.630296380940593e+02 -2.111695271961878e+01 3 7.352146396921255e+02 2.361229278157243e+02 6.962552486063584e+02 3.893348873424185e+00 4 2.347911650610957e+02 2.088188838351074e+01 -2.332256105122990e+02 1.722360384619465e+01 - ME 6.760444392591968e-05 + ME 5.654417419793765e-06 Event 99 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2844,7 +2844,7 @@ Event 99 Batch 1 2 4.290897291078425e+02 3.747236205606835e+02 2.040795775432686e+02 -4.529602465443949e+01 3 6.438744429739487e+02 -5.215755139094103e+02 2.133414139578182e+01 3.769325350988583e+02 4 4.270358279182090e+02 1.468518933487271e+02 -2.254137189390505e+02 -3.316365104444187e+02 - ME 2.024851967866169e-03 + ME 8.457850707842401e-05 Event 100 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2852,7 +2852,7 @@ Event 100 Batch 1 2 5.119062275524872e+02 -4.721600394809319e+02 -1.845880136125884e+02 7.099400083769524e+01 3 4.523854579707449e+02 2.836789572262426e+02 -3.060214184981774e+02 -1.747276258374610e+02 4 5.357083144767672e+02 1.884810822546894e+02 4.906094321107658e+02 1.037336249997658e+02 - ME 6.898305006855298e-05 + ME 1.420495101373495e-05 Event 101 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2860,7 +2860,7 @@ Event 101 Batch 1 2 6.024072815192737e+02 -3.080418730730875e+02 -4.692284526425155e+02 2.186993289696520e+02 3 3.347434020484399e+02 8.940653726951260e+01 -3.939923552329941e+01 -3.201676381969582e+02 4 5.628493164322859e+02 2.186353358035749e+02 5.086276881658150e+02 1.014683092273061e+02 - ME 9.290725627447436e-05 + ME 2.743452031293993e-05 Event 102 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2868,7 +2868,7 @@ Event 102 Batch 1 2 5.910857738801296e+02 3.707548039128416e+02 -7.516477307090547e+01 -4.541734518311494e+02 3 2.311218706704979e+02 4.536804143672514e+01 -2.262982016400413e+02 1.217307902336991e+01 4 6.777923554493723e+02 -4.161228453495667e+02 3.014629747109467e+02 4.420003728077793e+02 - ME 2.633339755449651e-04 + ME 7.158169676479796e-05 Event 103 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2876,7 +2876,7 @@ Event 103 Batch 1 2 6.627949406417042e+02 7.189602123685950e+01 -6.391860825813610e+02 -1.599038689489492e+02 3 5.519979886399102e+02 1.442810582977179e+02 4.734454174874869e+02 2.444057944057306e+02 4 2.852070707183856e+02 -2.161770795345774e+02 1.657406650938741e+02 -8.450192545678139e+01 - ME 1.652798222861839e-04 + ME 1.658567428345252e-05 Event 104 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2884,7 +2884,7 @@ Event 104 Batch 1 2 4.368180791462563e+02 -3.483499330357901e+02 -2.596280064690262e+02 4.533935023690698e+01 3 4.635715977792429e+02 1.873023362819025e+02 -2.251347602994603e+02 -3.593477435519053e+02 4 5.996103230745010e+02 1.610475967538876e+02 4.847627667684865e+02 3.140083933149983e+02 - ME 9.158171748371188e-05 + ME 2.162124469235967e-05 Event 105 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2892,7 +2892,7 @@ Event 105 Batch 1 2 5.701708357490469e+02 2.288495716262106e+02 -4.521314661478370e+02 -2.613422905391967e+02 3 3.711008490497917e+02 -3.362590561223710e+02 -8.126001400906793e+01 1.343223639771668e+02 4 5.587283152011612e+02 1.074094844961603e+02 5.333914801569049e+02 1.270199265620299e+02 - ME 7.043372303967046e-05 + ME 1.720246557093887e-05 Event 106 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2900,7 +2900,7 @@ Event 106 Batch 1 2 6.775588183099673e+02 5.149765831731705e+02 3.445381345095063e+02 -2.741870619150275e+02 3 7.044100837534635e+02 -4.546975847980706e+02 -4.392260662935809e+02 3.106833358270535e+02 4 1.180310979365712e+02 -6.027899837509908e+01 9.468793178407486e+01 -3.649627391202603e+01 - ME 3.259673897057837e-04 + ME 2.786544600802367e-05 Event 107 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2908,7 +2908,7 @@ Event 107 Batch 1 2 6.046880513041550e+02 2.289413119004024e+02 -5.349774474143721e+02 -1.644160754103499e+02 3 3.366746442316215e+02 -7.166101576320902e+01 2.452245434825371e+01 3.280444544890399e+02 4 5.586373044642238e+02 -1.572802961371935e+02 5.104549930661184e+02 -1.636283790786902e+02 - ME 8.859556065170558e-04 + ME 4.667002706670146e-04 Event 108 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2916,7 +2916,7 @@ Event 108 Batch 1 2 6.239206451413978e+02 -2.218030564243363e+02 5.011455197099735e+02 -2.982172759400455e+02 3 2.841199272340513e+02 1.209406641294798e+02 7.967327320293104e+01 2.444374323800143e+02 4 5.919594276245514e+02 1.008623922948564e+02 -5.808187929129044e+02 5.377984356003120e+01 - ME 1.727643234936365e-04 + ME 7.961277501126149e-05 Event 109 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2924,7 +2924,7 @@ Event 109 Batch 1 2 3.093404598873124e+02 1.546999830656544e+02 1.629193992247174e+02 2.126421988200774e+02 3 5.287372542258961e+02 -2.136116696975048e+02 -1.865832176193536e+02 4.462284633214169e+02 4 6.619222858867909e+02 5.891168663185049e+01 2.366381839463621e+01 -6.588706621414941e+02 - ME 1.686695657867669e+01 + ME 2.902408960420708e-01 Event 110 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2932,7 +2932,7 @@ Event 110 Batch 1 2 4.920948406187608e+02 -8.595212543403569e+01 -4.824913009925944e+02 -4.440392734262522e+01 3 4.634042325716594e+02 -2.085760624772916e+00 1.255608851371819e+02 4.460645653843308e+02 4 5.445009268095798e+02 8.803788605880843e+01 3.569304158554124e+02 -4.016606380417056e+02 - ME 4.151412887207382e-03 + ME 1.043536440561108e-03 Event 111 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2940,7 +2940,7 @@ Event 111 Batch 1 2 4.637454700443120e+02 1.543048221589588e+02 -4.372769385391800e+02 6.225902899506631e+00 3 3.246747011850293e+02 -5.128652792678845e+01 -2.274142471268230e+02 2.259781269206006e+02 4 7.115798287706589e+02 -1.030182942321705e+02 6.646911856660031e+02 -2.322040298201072e+02 - ME 1.240833065187375e-03 + ME 5.219332617201280e-04 Event 112 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2948,7 +2948,7 @@ Event 112 Batch 1 2 6.923761777814550e+02 3.939190124845535e+02 4.398224952082178e+01 -5.676954684419625e+02 3 5.277418353503033e+02 -4.270527740856185e+02 4.970714905179168e+01 3.060499505927539e+02 4 2.798819868682421e+02 3.313376160106501e+01 -9.368939857261346e+01 2.616455178492087e+02 - ME 5.385735959435035e-05 + ME 4.381536575941429e-05 Event 113 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2956,7 +2956,7 @@ Event 113 Batch 1 2 7.174898838850694e+02 -6.130145063482008e+02 3.726797356942233e+02 1.071275347265524e+01 3 1.705115822510491e+02 3.993583199494100e+01 -1.624320619120163e+02 3.309311510932528e+01 4 6.119985338638814e+02 5.730786743532599e+02 -2.102476737822071e+02 -4.380586858198049e+01 - ME 2.197559713387976e-04 + ME 4.914674319256647e-05 Event 114 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2964,7 +2964,7 @@ Event 114 Batch 1 2 6.772826088252357e+02 -1.430288042596954e+02 -3.410390118171982e+02 5.674036356844296e+02 3 6.725037798358682e+02 3.626161999767239e+01 2.510744134018114e+02 -6.228226615527174e+02 4 1.502136113388951e+02 1.067671842620232e+02 8.996459841538707e+01 5.541902586828807e+01 - ME 8.926156406775035e-05 + ME 7.986648389935193e-05 Event 115 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2972,7 +2972,7 @@ Event 115 Batch 1 2 9.320551230331124e+01 1.288474310894606e+01 -2.581623869377880e+01 8.862715576190526e+01 3 6.672654287607164e+02 1.525114284892182e+02 2.829200767588875e+02 5.847560574856374e+02 4 7.395290589359720e+02 -1.653961715981643e+02 -2.571038380651088e+02 -6.733832132475428e+02 - ME 1.800237703627863e+00 + ME 4.304938165075599e-01 Event 116 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2980,7 +2980,7 @@ Event 116 Batch 1 2 4.951202926530015e+02 -4.575339943514647e+02 4.220102313368785e+01 1.844608951947751e+02 3 3.101750696753587e+02 -4.711582585559527e+01 2.172188132736168e+02 2.163438466008694e+02 4 6.947046376716394e+02 5.046498202070600e+02 -2.594198364073050e+02 -4.008047417956444e+02 - ME 1.933367100533606e-03 + ME 5.988625984136040e-04 Event 117 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2988,7 +2988,7 @@ Event 117 Batch 1 2 6.543248494478489e+02 1.390926466871539e+02 9.107024539473488e+01 6.328510524967589e+02 3 5.040443237953712e+02 6.874740772121054e+01 1.336336536624387e+02 -4.811200690999848e+02 4 3.416308267567792e+02 -2.078400544083643e+02 -2.247038990571737e+02 -1.517309833967742e+02 - ME 4.207453923038474e-04 + ME 3.026560085299302e-04 Event 118 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2996,7 +2996,7 @@ Event 118 Batch 1 2 5.829230400014206e+02 5.307803371482089e+02 -3.192285892796672e+01 2.388565162167381e+02 3 3.965113090906140e+02 -5.470249758902820e+01 2.256187790844517e+02 -3.214420966810604e+02 4 5.205656509079653e+02 -4.760778395591807e+02 -1.936959201564850e+02 8.258558046432242e+01 - ME 7.464562943747175e-05 + ME 2.168340782914014e-05 Event 119 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3004,7 +3004,7 @@ Event 119 Batch 1 2 3.549567073991255e+02 2.281637891139605e+02 1.474502150787006e+02 2.284600261271838e+02 3 4.727085372220640e+02 7.463684946128350e+01 -3.092948822053327e+02 3.495988811576870e+02 4 6.723347553788102e+02 -3.028006385752440e+02 1.618446671266322e+02 -5.780589072848707e+02 - ME 1.455012849105755e-02 + ME 1.664672733965846e-03 Event 120 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3012,7 +3012,7 @@ Event 120 Batch 1 2 7.192117275853698e+02 4.094232477570927e+02 -5.552624156333899e+02 -2.032775518283800e+02 3 3.685061529232585e+02 -2.522084621786424e+02 1.741347663658646e+02 2.046087962197375e+02 4 4.122821194913712e+02 -1.572147855784500e+02 3.811276492675253e+02 -1.331244391357209e+00 - ME 9.281995463485567e-05 + ME 1.900262756274459e-05 Event 121 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3020,7 +3020,7 @@ Event 121 Batch 1 2 1.923953846467517e+02 -5.182078839520096e+01 -1.486351786617837e+02 -1.106262789198433e+02 3 6.582127150877787e+02 -3.509182841037630e+02 -1.191939510078701e+02 5.439606035624541e+02 4 6.493919002654695e+02 4.027390724989639e+02 2.678291296696539e+02 -4.333343246426108e+02 - ME 1.925188892577692e-03 + ME 5.360055113881300e-04 Event 122 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3028,7 +3028,7 @@ Event 122 Batch 1 2 6.905732817636248e+02 3.462508192534570e+02 -5.375670569609784e+02 -2.608131264380775e+02 3 7.097575386120018e+02 -2.677396278645660e+02 5.849221766424142e+02 2.998954860604125e+02 4 9.966917962437387e+01 -7.851119138889094e+01 -4.735511968143584e+01 -3.908235962233509e+01 - ME 5.007312135859238e-04 + ME 3.451011759976180e-05 Event 123 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3036,7 +3036,7 @@ Event 123 Batch 1 2 4.035126033432560e+02 2.481103298242076e+01 -3.878573016343356e+02 -1.085059780294573e+02 3 3.541388771651666e+02 1.572344474048876e+02 -3.105653677404273e+02 -6.512161875550808e+01 4 7.423485194915780e+02 -1.820454803873083e+02 6.984226693747627e+02 1.736275967849660e+02 - ME 2.043564129780385e-02 + ME 3.471230489499830e-03 Event 124 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3044,7 +3044,7 @@ Event 124 Batch 1 2 5.353042728143347e+02 -4.785252055946481e+02 -2.279396245170433e+02 7.488537693644093e+01 3 7.454081943698113e+02 6.785307544150930e+02 3.069354144183444e+02 -3.193811081429426e+01 4 2.192875328158541e+02 -2.000055488204448e+02 -7.899578990130104e+01 -4.294726612214667e+01 - ME 1.399009675490331e-04 + ME 6.765427234678898e-06 Event 125 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3052,7 +3052,7 @@ Event 125 Batch 1 2 7.351681880566981e+02 -1.932492970253984e+01 -4.393064933429818e+02 -5.891592456452273e+02 3 6.537497908129355e+02 -2.883189353576726e+01 3.454898907503182e+02 5.542510679217788e+02 4 1.110820211303664e+02 4.815682323830688e+01 9.381660259266363e+01 3.490817772344844e+01 - ME 1.431077255619906e-04 + ME 6.639428548470109e-05 Event 126 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3060,7 +3060,7 @@ Event 126 Batch 1 2 5.568747108147126e+02 1.149185667256990e+02 4.264979152236775e+02 -3.391204725116689e+02 3 6.934211462641822e+02 -1.939160042589616e+02 -6.294239612595663e+02 2.169215212257340e+02 4 2.497041429211053e+02 7.899743753326281e+01 2.029260460358889e+02 1.221989512859350e+02 - ME 3.344185566612618e-05 + ME 9.143592130512915e-06 Event 127 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3068,7 +3068,7 @@ Event 127 Batch 1 2 7.108931196972316e+02 4.270547743949553e+02 5.664613189451065e+02 -4.598718776252147e+01 3 4.445675167124290e+02 -1.247884466860518e+02 -4.129475031266345e+02 1.074359351009545e+02 4 3.445393635903407e+02 -3.022663277089035e+02 -1.535138158184720e+02 -6.144874733843321e+01 - ME 1.180920695556687e-04 + ME 1.427738327825488e-05 Event 128 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3076,7 +3076,7 @@ Event 128 Batch 1 2 5.312407894292422e+02 -7.192118124205533e+01 -4.398126160332176e+02 -2.891521793453568e+02 3 5.717192413787027e+02 3.434745903572437e+02 1.811915566412192e+02 4.195923218357252e+02 4 3.970399691920551e+02 -2.715534091151883e+02 2.586210593919984e+02 -1.304401424903685e+02 - ME 1.848006274423395e-04 + ME 3.532660248239223e-05 Event 129 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3084,7 +3084,7 @@ Event 129 Batch 1 2 6.644129951428383e+02 -3.595672586482287e+02 4.645590915434784e+02 3.103882489514914e+02 3 1.967652372382455e+02 -5.204943416929049e+01 8.794498000645085e+00 -1.895522930301724e+02 4 6.388217676189169e+02 4.116166928175192e+02 -4.733535895441232e+02 -1.208359559213191e+02 - ME 3.082956717278722e-04 + ME 9.192558188476414e-05 Event 130 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3092,7 +3092,7 @@ Event 130 Batch 1 2 7.302263990443511e+02 -1.919590472356484e+02 3.836584700935805e+02 -5.909217345563752e+02 3 4.156541164903923e+02 2.203243106780774e+02 -1.767969453775071e+02 3.049071707664833e+02 4 3.541194844652567e+02 -2.836526344242890e+01 -2.068615247160734e+02 2.860145637898919e+02 - ME 3.110012368642411e-05 + ME 2.258971422042701e-05 Event 131 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3100,7 +3100,7 @@ Event 131 Batch 1 2 2.308323688168238e+02 -1.780469473698228e+02 1.469011263880862e+02 1.710582294195638e+00 3 7.308075033948297e+02 5.219262643529272e+02 -3.840435213624620e+02 3.379099810545737e+02 4 5.383601277883465e+02 -3.438793169831044e+02 2.371423949743758e+02 -3.396205633487694e+02 - ME 1.061667055612532e-03 + ME 7.770640764079256e-05 Event 132 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3108,7 +3108,7 @@ Event 132 Batch 1 2 5.909630762789660e+02 -4.293852116769707e+02 -3.988922148105424e+02 7.583335995300355e+01 3 5.415993952096327e+02 2.260703809971038e+02 3.221145619770360e+02 -3.721079100067703e+02 4 3.674375285114020e+02 2.033148306798666e+02 7.677765283350686e+01 2.962745500537670e+02 - ME 3.321676569401813e-05 + ME 1.628447412544396e-05 Event 133 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3116,7 +3116,7 @@ Event 133 Batch 1 2 4.506052863582997e+02 2.189991325227701e+02 -3.914006430783634e+02 -4.347459771134355e+01 3 4.043998006859111e+02 3.160348074769272e+02 8.738893432792010e+01 2.366946839598570e+02 4 6.449949129557901e+02 -5.350339399996973e+02 3.040117087504433e+02 -1.932200862485142e+02 - ME 3.121497332919934e-04 + ME 8.705579101282482e-05 Event 134 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3124,7 +3124,7 @@ Event 134 Batch 1 2 7.151470882937614e+02 -1.041377497037516e+01 -4.186394096729767e+01 7.138447461686595e+02 3 3.416424731356660e+02 1.638631808685801e+02 3.081581136487586e+01 -2.981925940995343e+02 4 4.432104385705719e+02 -1.534494058982047e+02 1.104812960242199e+01 -4.156521520691248e+02 - ME 5.534325530265236e-02 + ME 6.342792451335309e-03 Event 135 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3132,7 +3132,7 @@ Event 135 Batch 1 2 7.115730144432832e+02 -3.219296530898238e+02 2.184242454110169e+02 -5.958089478700319e+02 3 1.627059459894212e+02 -6.880794311551747e+01 -3.259803939022061e+01 1.437917231708342e+02 4 6.257210395672955e+02 3.907375962053413e+02 -1.858262060207963e+02 4.520172246991979e+02 - ME 2.112989182930814e-04 + ME 1.277979532321233e-04 Event 136 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3140,7 +3140,7 @@ Event 136 Batch 1 2 7.195404287114588e+02 -4.369992732083461e+02 -4.270318019286997e+02 3.800182941743402e+02 3 6.668605996318223e+02 3.634158794560479e+02 4.690430049045651e+02 -3.043527845290675e+02 4 1.135989716567186e+02 7.358339375229815e+01 -4.201120297586535e+01 -7.566550964527264e+01 - ME 1.804344388349211e-03 + ME 7.515399240093053e-05 Event 137 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3148,7 +3148,7 @@ Event 137 Batch 1 2 6.722782806744999e+02 -6.045581260407005e+02 -2.538460778300668e+02 1.484241478840623e+02 3 6.869263774705689e+02 6.661257235671316e+02 1.481819739565761e+02 -7.865412297735662e+01 4 1.407953418549304e+02 -6.156759752643097e+01 1.056641038734908e+02 -6.977002490670534e+01 - ME 5.192812231664224e-04 + ME 2.119149330726453e-05 Event 138 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3156,7 +3156,7 @@ Event 138 Batch 1 2 6.463287544295633e+02 8.684709774942756e+01 2.409249839962013e+02 -5.934253049048401e+02 3 3.917330799270068e+02 1.767690441671677e+02 4.696120064017492e+01 3.464132742372293e+02 4 4.619381656434300e+02 -2.636161419165952e+02 -2.878861846363762e+02 2.470120306676108e+02 - ME 5.804753959762886e-05 + ME 4.203806696206548e-05 Event 139 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3164,7 +3164,7 @@ Event 139 Batch 1 2 2.994802063237944e+02 -1.272876183039153e+02 6.552211336810879e+00 2.710042891410713e+02 3 7.257546970836092e+02 -8.848613612326799e+00 5.127896146768584e+00 -7.256826352181574e+02 4 4.747650965925943e+02 1.361362319162416e+02 -1.168010748357900e+01 4.546783460770868e+02 - ME 1.724196014694060e-04 + ME 1.500396153249019e-04 Event 140 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3172,7 +3172,7 @@ Event 140 Batch 1 2 7.326756101999780e+02 5.655005379385240e+02 4.343799907428446e+02 1.683351270988810e+02 3 7.428339005597779e+02 -5.680473426214219e+02 -4.534832054058505e+02 -1.532233754243464e+02 4 2.449048924024402e+01 2.546804682897962e+00 1.910321466300584e+01 -1.511175167453447e+01 - ME 4.669436438173466e-03 + ME 1.024603362434272e-04 Event 141 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3180,7 +3180,7 @@ Event 141 Batch 1 2 7.363238871411332e+02 -6.772722174663238e+02 -2.824373475598683e+02 -6.086341204880675e+01 3 5.504260535970963e+02 4.650298533191528e+02 2.914345410616540e+02 4.221355560271704e+01 4 2.132500592617708e+02 2.122423641471711e+02 -8.997193501785816e+00 1.864985644608987e+01 - ME 7.300791864660033e-05 + ME 1.166401869382226e-05 Event 142 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3188,7 +3188,7 @@ Event 142 Batch 1 2 5.862280565156834e+02 4.248793793115829e+01 -2.479279504752411e+02 -5.295184989682986e+02 3 4.287264749982929e+02 -3.025296967755320e+02 2.785471849307642e+02 1.212173201341831e+02 4 4.850454684860405e+02 2.600417588443628e+02 -3.061923445551928e+01 4.083011788341197e+02 - ME 4.569028399965169e-05 + ME 1.949810022878841e-05 Event 143 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3196,7 +3196,7 @@ Event 143 Batch 1 2 2.464531733710510e+02 4.046044690030688e+01 -2.103865804466287e+02 1.218179201483223e+02 3 5.378449948854583e+02 4.607829603950880e+02 -2.747641700963839e+02 3.822241180409925e+01 4 7.157018317434903e+02 -5.012434072953949e+02 4.851507505430126e+02 -1.600403319524219e+02 - ME 1.284493741497843e-03 + ME 4.863434295951330e-04 Event 144 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3204,7 +3204,7 @@ Event 144 Batch 1 2 5.367418008803521e+02 -1.343004856786532e+02 -4.048537736989352e+02 -3.258044847458254e+02 3 6.294877130859599e+02 3.313530054622211e+02 5.282137272543231e+02 8.631468610520756e+01 4 3.337704860336884e+02 -1.970525197835678e+02 -1.233599535553879e+02 2.394897986406179e+02 - ME 2.612855607885159e-05 + ME 8.754930746282009e-06 Event 145 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3212,7 +3212,7 @@ Event 145 Batch 1 2 6.805380148481771e+01 -3.411514819754512e+01 -4.339750646760406e+01 -3.980116822894492e+01 3 6.831461500979880e+02 -3.834019790669201e+02 -2.756424954453614e+02 -4.936727656514237e+02 4 7.488000484171945e+02 4.175171272644653e+02 3.190400019129655e+02 5.334739338803686e+02 - ME 4.832444287218038e-01 + ME 4.117012994651258e-01 Event 146 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3220,7 +3220,7 @@ Event 146 Batch 1 2 5.031746658797123e+02 4.202301876294930e+02 2.767377273314875e+02 2.750283520766640e+00 3 4.317115817339341e+02 -1.098088257924671e+02 -5.455162180567243e+01 4.139336083717602e+02 4 5.651137523863538e+02 -3.104213618370259e+02 -2.221861055258150e+02 -4.166838918925268e+02 - ME 4.446377084117306e-03 + ME 1.122040831263755e-03 Event 147 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3228,7 +3228,7 @@ Event 147 Batch 1 2 4.251223043705630e+02 -4.223502783198938e+02 -4.694338569631599e+01 1.206377286808446e+01 3 5.457819748703678e+02 2.791608945230574e+02 -4.384138579515959e+02 -1.665546403390879e+02 4 5.290957207590696e+02 1.431893837968364e+02 4.853572436479118e+02 1.544908674710035e+02 - ME 5.820013407126093e-05 + ME 1.117959404473985e-05 Event 148 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3236,7 +3236,7 @@ Event 148 Batch 1 2 6.905785821272525e+02 6.249608768654489e+02 -6.243387159972350e+01 -2.870970082698929e+02 3 1.361638260920089e+02 2.862044352088506e+01 1.704210379179796e+01 1.320266050727362e+02 4 6.732575917807402e+02 -6.535813203863343e+02 4.539176780792534e+01 1.550704031971573e+02 - ME 9.573948308169230e-04 + ME 5.047601105033982e-04 Event 149 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3244,7 +3244,7 @@ Event 149 Batch 1 2 6.694705528096943e+02 -5.216497821741067e+02 -3.785079074709545e+02 1.811189935345937e+02 3 2.821401257551277e+02 1.148500354702071e-01 2.786662494166578e+02 -4.413795199872407e+01 4 5.483893214351779e+02 5.215349321386365e+02 9.984165805429673e+01 -1.369810415358697e+02 - ME 1.943324414096923e-04 + ME 3.486097449584098e-05 Event 150 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3252,7 +3252,7 @@ Event 150 Batch 1 2 4.637486188995366e+02 -4.033412855298819e+02 -2.279949807412008e+02 -1.992178895453991e+01 3 3.756800751656199e+02 6.230662615514293e+01 -2.632310737913946e+02 -2.606967683041707e+02 4 6.605713059348438e+02 3.410346593747391e+02 4.912260545325952e+02 2.806185572587107e+02 - ME 2.156945366470290e-04 + ME 4.211370643652993e-05 Event 151 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3260,7 +3260,7 @@ Event 151 Batch 1 2 3.821954355913596e+02 -2.528320044280690e+02 2.861764538722267e+02 1.588602445142563e+01 3 6.796189325418250e+02 2.911670128135291e+02 -4.900375979142738e+02 3.700902818893582e+02 4 4.381856318668152e+02 -3.833500838546018e+01 2.038611440420471e+02 -3.859763063407838e+02 - ME 8.197229841786387e-03 + ME 1.923941526207248e-04 Event 152 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3268,7 +3268,7 @@ Event 152 Batch 1 2 6.751133298339792e+02 -2.999578895043981e+02 -2.855974213275218e+02 -5.331391803034741e+02 3 4.976977783498468e+02 -3.003988119418482e+00 1.843802943840355e+02 4.622747685874795e+02 4 3.271888918161745e+02 3.029618776238166e+02 1.012171269434863e+02 7.086441171599445e+01 - ME 1.204579535049519e-04 + ME 6.977738125195056e-05 Event 153 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3276,7 +3276,7 @@ Event 153 Batch 1 2 1.729293620257127e+02 1.558357805102956e+02 -7.193392860849491e+01 2.110174585940510e+01 3 6.524550819255464e+02 2.410158908712478e+02 5.786677971610501e+02 1.809766692333240e+02 4 6.746155560487412e+02 -3.968516713815435e+02 -5.067338685525552e+02 -2.020784150927291e+02 - ME 5.985591428637023e-04 + ME 1.391654510317005e-04 Event 154 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3284,7 +3284,7 @@ Event 154 Batch 1 2 6.585658455851002e+02 -2.410305357139302e+02 -2.116446673272157e+02 -5.751693564652295e+02 3 5.764400833248005e+02 3.388133979948972e+02 3.092747322371399e+02 3.490527051926400e+02 4 2.649940710900988e+02 -9.778286228096688e+01 -9.763006490992416e+01 2.261166512725894e+02 - ME 3.655181799213059e-05 + ME 2.686434432328395e-05 Event 155 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3292,7 +3292,7 @@ Event 155 Batch 1 2 5.686586231936359e+02 -1.693366246265498e+02 -1.542203680657918e+02 5.204938187588979e+02 3 1.882190564276536e+02 -1.089234770645493e+02 -9.145416397064866e+01 1.232810822434430e+02 4 7.431223203787102e+02 2.782601016910992e+02 2.456745320364404e+02 -6.437749010023409e+02 - ME 6.696396361607482e-01 + ME 4.701119881405690e-01 Event 156 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3300,7 +3300,7 @@ Event 156 Batch 1 2 6.143652095725128e+02 2.879464601546110e+02 5.379391909976823e+02 -7.178351904348040e+01 3 6.287751645293085e+02 -4.584164185734781e+02 -4.225140875260598e+02 -8.181956094447702e+01 4 2.568596258981782e+02 1.704699584188668e+02 -1.154251034716223e+02 1.536030799879581e+02 - ME 2.899571701789112e-05 + ME 7.769660148731367e-06 Event 157 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3308,7 +3308,7 @@ Event 157 Batch 1 2 5.050842109798973e+02 4.185498850973046e+02 -1.305174306570672e+02 -2.507812875014723e+02 3 5.170424494038050e+02 -3.084595065654854e+02 3.930456446728388e+02 -1.330441599566699e+02 4 4.778733396162975e+02 -1.100903785318191e+02 -2.625282140157716e+02 3.838254474581424e+02 - ME 4.033251359625283e-05 + ME 1.243977993100618e-05 Event 158 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3316,7 +3316,7 @@ Event 158 Batch 1 2 4.312542366204098e+02 -3.114503370626313e+02 2.737030704635235e+02 1.185982013584742e+02 3 6.944315393047829e+02 2.166643175309468e+02 -6.173965008138002e+02 -2.326226495269423e+02 4 3.743142240748070e+02 9.478601953168439e+01 3.436934303502764e+02 1.140244481684682e+02 - ME 3.680357310121394e-05 + ME 5.864250821924803e-06 Event 159 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3324,7 +3324,7 @@ Event 159 Batch 1 2 5.860112473308646e+02 -1.581297551692178e+02 4.935632758462007e+02 2.734948907463652e+02 3 3.772013313646349e+02 -2.371132827856262e+02 -1.305099443644436e+02 -2.627266448837395e+02 4 5.367874213045002e+02 3.952430379548442e+02 -3.630533314817573e+02 -1.076824586262577e+01 - ME 1.030382455754272e-04 + ME 2.805189658646002e-05 Event 160 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3332,7 +3332,7 @@ Event 160 Batch 1 2 5.883409724804535e+02 -3.739819298758817e+02 -2.887651121595530e+02 3.505671490956299e+02 3 4.300332553173178e+02 1.788055146224819e+02 3.829208006453583e+02 7.955406370837679e+01 4 4.816257722022287e+02 1.951764152533999e+02 -9.415568848580530e+01 -4.301212128040066e+02 - ME 9.797271586219467e-03 + ME 2.307516153071828e-04 Event 161 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3340,7 +3340,7 @@ Event 161 Batch 1 2 6.868305165969147e+02 4.119610488151656e+00 5.515184990814985e+02 4.093244831537709e+02 3 3.260821955312833e+02 -1.956999890649130e+02 -2.483451099187458e+02 -7.972338993006402e+01 4 4.870872878718022e+02 1.915803785767614e+02 -3.031733891627526e+02 -3.296010932237070e+02 - ME 1.075603053132144e-03 + ME 9.860610555787331e-05 Event 162 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3348,7 +3348,7 @@ Event 162 Batch 1 2 2.159818802305119e+02 -2.018126805027919e+02 4.096951387107715e+01 -6.512536763314942e+01 3 6.870078865581224e+02 4.896730732821633e+02 -2.356527215298929e+02 -4.203188222421333e+02 4 5.970102332113654e+02 -2.878603927793715e+02 1.946832076588156e+02 4.854441898752826e+02 - ME 5.344822454174306e-05 + ME 2.809071549115161e-05 Event 163 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3356,7 +3356,7 @@ Event 163 Batch 1 2 4.889699854403287e+02 -4.067839821807834e+01 -2.740835242435768e+02 4.028835269878222e+02 3 4.282392920294498e+02 4.007468150560176e+02 -8.832740907173851e+01 -1.224301852772270e+02 4 5.827907225302220e+02 -3.600684168379390e+02 3.624109333153153e+02 -2.804533417105952e+02 - ME 4.336231422638298e-04 + ME 1.173701793303044e-04 Event 164 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3364,7 +3364,7 @@ Event 164 Batch 1 2 6.224346677404150e+02 -1.282049393554146e+02 5.480608628970117e+02 -2.657399098565701e+02 3 7.444531740822750e+02 1.794330131141779e+02 -6.708967511266460e+02 2.681638893170603e+02 4 1.331121581773107e+02 -5.122807375876333e+01 1.228358882296343e+02 -2.423979460490191e+00 - ME 1.368953177788070e-04 + ME 1.571413941583783e-05 Event 165 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3372,7 +3372,7 @@ Event 165 Batch 1 2 6.980339706506675e+02 -5.154669325341684e+01 -4.947847840614098e+02 4.896757907618869e+02 3 1.362964882116331e+02 4.252532371924361e+01 -5.641238783031591e+01 -1.165588780002596e+02 4 6.656695411377010e+02 9.021369534174053e+00 5.511971718917263e+02 -3.731169127616273e+02 - ME 1.450267418906797e-03 + ME 4.238311927693088e-04 Event 166 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3380,7 +3380,7 @@ Event 166 Batch 1 2 3.060640747281171e+02 -1.981167412190918e+02 -9.095380261170779e+01 -2.148310510107333e+02 3 5.580104478575086e+02 -3.585720992432471e+02 -1.558095186186280e+02 3.981521109704927e+02 4 6.359254774143739e+02 5.566888404623389e+02 2.467633212303362e+02 -1.833210599597597e+02 - ME 3.000804338470548e-04 + ME 1.099447007687216e-04 Event 167 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3388,7 +3388,7 @@ Event 167 Batch 1 2 2.833153623322893e+02 2.526850217013923e+02 8.687924899084067e+01 9.417998957332070e+01 3 6.595685044563415e+02 -8.780626893611850e+01 -2.875856231737449e+02 -5.870393347553995e+02 4 5.571161332113688e+02 -1.648787527652738e+02 2.007063741829043e+02 4.928593451820789e+02 - ME 7.367447958524992e-05 + ME 4.244421486768831e-05 Event 168 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3396,7 +3396,7 @@ Event 168 Batch 1 2 6.026267479353969e+02 -5.987968578530475e+02 5.775180228477150e+00 6.758674164241529e+01 3 4.991211680715713e+02 3.812575567959843e+02 3.220701575873951e+02 -5.952259631185711e+00 4 3.982520839930309e+02 2.175393010570631e+02 -3.278453378158730e+02 -6.163448201122968e+01 - ME 9.606399998327532e-05 + ME 1.203107058680061e-05 Event 169 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3404,7 +3404,7 @@ Event 169 Batch 1 2 5.510662376679772e+02 -9.251111075413947e+01 -5.291920243323356e+02 -1.227660134875281e+02 3 5.034535790022877e+02 -2.816014265681677e+02 3.283802195198170e+02 2.575511098657944e+02 4 4.454801833297348e+02 3.741125373223072e+02 2.008118048125185e+02 -1.347850963782663e+02 - ME 1.532484123791625e-04 + ME 2.085195230877358e-05 Event 170 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3412,7 +3412,7 @@ Event 170 Batch 1 2 2.814808559369750e+02 3.658097943502287e+01 -1.412301634042880e+02 -2.407225480659935e+02 3 6.646522150540470e+02 2.753499086551696e+02 -1.631412967142655e+02 5.825203104495404e+02 4 5.538669290089779e+02 -3.119308880901926e+02 3.043714601185535e+02 -3.417977623835468e+02 - ME 7.823510217753851e-04 + ME 2.587160315460459e-04 Event 171 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3420,7 +3420,7 @@ Event 171 Batch 1 2 1.777965289077954e+02 -6.143496808852239e+01 -1.603735842336773e+00 1.668375809551635e+02 3 7.439290290569696e+02 2.163074211412066e+01 -1.907051550939623e+01 -7.433699124308462e+02 4 5.782744420352348e+02 3.980422597440174e+01 2.067425135173305e+01 5.765323314756826e+02 - ME 2.063755640794395e-03 + ME 1.981167274383509e-03 Event 172 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3428,7 +3428,7 @@ Event 172 Batch 1 2 1.369499454750680e+02 -1.250080331667568e+01 -3.518152151649629e+01 -1.317622025690455e+02 3 6.692885586315896e+02 -2.346283187163472e+02 -6.130705295376303e+02 1.305421486874673e+02 4 6.937614958933425e+02 2.471291220330227e+02 6.482520510541266e+02 1.220053881578238e+00 - ME 5.039586079692636e-04 + ME 1.548169060571347e-04 Event 173 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3436,7 +3436,7 @@ Event 173 Batch 1 2 7.088772083623137e+02 4.973951266878932e+01 3.171232495758680e+01 -7.064185769505260e+02 3 5.785136264307895e+02 8.584813303397833e+01 5.766505028397120e+01 5.691949191590089e+02 4 2.126091652068944e+02 -1.355876457027672e+02 -8.937737524155732e+01 1.372236577915166e+02 - ME 1.743760900867476e-04 + ME 1.732961413682620e-04 Event 174 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3444,7 +3444,7 @@ Event 174 Batch 1 2 4.367208701713482e+02 -3.923163287174704e+01 4.325755195957351e+02 -4.543585887727652e+01 3 3.528978856725088e+02 9.622572295106905e+01 1.987077746703234e+02 -2.753048278549415e+02 4 7.103812441561454e+02 -5.699409007932221e+01 -6.312832942660567e+02 3.207406867322186e+02 - ME 9.353677491192390e-04 + ME 1.541208918572365e-04 Event 175 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3452,7 +3452,7 @@ Event 175 Batch 1 2 6.418562164876806e+02 1.962785648722137e+02 -6.110736372974047e+02 -6.567908015856712e+00 3 4.843421844702149e+02 -1.886631806266161e+02 3.569879071908527e+02 -2.674942804112337e+02 4 3.738015990421035e+02 -7.615384245597569e+00 2.540857301065516e+02 2.740621884270906e+02 - ME 3.029111560812189e-05 + ME 1.279055979705581e-05 Event 176 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3460,7 +3460,7 @@ Event 176 Batch 1 2 6.288652703123263e+02 4.005522031116294e+02 3.691482793515075e+02 3.142594606996526e+02 3 7.209127580467475e+02 -4.124575135572966e+02 -5.165298058232565e+02 -2.877341896975221e+02 4 1.502219716409257e+02 1.190531044566666e+01 1.473815264717492e+02 -2.652527100213051e+01 - ME 1.719274466020296e-04 + ME 1.300720357566141e-05 Event 177 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3468,7 +3468,7 @@ Event 177 Batch 1 2 4.716578040000077e+02 -4.521622645932388e+02 -1.012739918234145e+01 1.338200520767543e+02 3 3.021382980750606e+02 -2.714821202364266e+02 6.773215888881064e+01 -1.140059832109250e+02 4 7.262038979249317e+02 7.236443848296653e+02 -5.760475970646905e+01 -1.981406886582933e+01 - ME 2.354271252348000e-03 + ME 6.442260552556652e-04 Event 178 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3476,7 +3476,7 @@ Event 178 Batch 1 2 7.350088877399502e+02 -3.684484945749095e+02 -2.561732769425163e+02 -5.821159885132296e+02 3 1.415495174310248e+02 7.181268644032879e+01 1.095010133995263e+02 5.374692563910759e+01 4 6.234415948290248e+02 2.966358081345808e+02 1.466722635429900e+02 5.283690628741219e+02 - ME 1.035408980291912e-04 + ME 6.828487731379645e-05 Event 179 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3484,7 +3484,7 @@ Event 179 Batch 1 2 7.426064621425413e+02 6.748632301344054e+01 7.201624948975951e+02 -1.681544967131679e+02 3 5.821031882499326e+02 8.394276920418550e-01 -5.588194474899291e+02 1.629854049874919e+02 4 1.752903496075256e+02 -6.832575070548241e+01 -1.613430474076661e+02 5.169091725675888e+00 - ME 9.197132478706931e-05 + ME 1.412410550503903e-05 Event 180 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3492,7 +3492,7 @@ Event 180 Batch 1 2 6.099515195485484e+02 2.272495331206023e+02 1.762692760011278e+02 -5.378918555193875e+02 3 5.718889655176699e+02 4.324570510796980e+01 -3.278409766521432e+02 4.665909256493895e+02 4 3.181595149337819e+02 -2.704952382285720e+02 1.515717006510154e+02 7.130092986999803e+01 - ME 5.401477812349802e-05 + ME 3.043963963928669e-05 Event 181 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3500,7 +3500,7 @@ Event 181 Batch 1 2 1.206370886915177e+02 -8.151225636567759e+01 1.767749325039422e+01 8.715827822142556e+01 3 6.451493408002739e+02 -6.748216257939080e+01 4.373428479320614e+02 4.694625256943417e+02 4 7.342135705082084e+02 1.489944189450684e+02 -4.550203411824557e+02 -5.566208039157672e+02 - ME 7.131653341377736e-02 + ME 2.625479922313071e-02 Event 182 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3508,7 +3508,7 @@ Event 182 Batch 1 2 4.626866082364760e+02 -3.084610429505738e+02 3.306629079434072e+02 9.794245113140897e+01 3 4.974966719253473e+02 3.582955998671217e+02 1.664640547097976e+02 -3.023523113558579e+02 4 5.398167198381765e+02 -4.983455691654795e+01 -4.971269626532048e+02 2.044098602244489e+02 - ME 5.959042767905828e-05 + ME 1.414799589613471e-05 Event 183 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3516,7 +3516,7 @@ Event 183 Batch 1 2 3.304723045950491e+02 3.244647182058462e+00 3.209425641774955e+02 7.872284845075714e+01 3 4.379804819457451e+02 2.312428523500660e+02 3.131807483468383e+02 2.006775141049615e+02 4 7.315472134592065e+02 -2.344874995321247e+02 -6.341233125243344e+02 -2.794003625557186e+02 - ME 4.899988668912175e-03 + ME 2.330806393221907e-03 Event 184 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3524,7 +3524,7 @@ Event 184 Batch 1 2 7.470051035005908e+02 -4.953964753944513e+02 -4.028924750569613e+02 3.876552725878485e+02 3 2.183325716323390e+02 1.119040172022777e+02 1.451703047217021e+02 -1.186262424448778e+02 4 5.346623248670695e+02 3.834924581921736e+02 2.577221703352594e+02 -2.690290301429710e+02 - ME 5.441344453720516e-04 + ME 7.987999480474686e-05 Event 185 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3532,7 +3532,7 @@ Event 185 Batch 1 2 4.448583927494090e+02 2.810173563272025e+02 -3.384637477435971e+02 6.610995769032235e+01 3 6.236443795626774e+02 -1.690803760724666e+02 5.125139620028374e+02 3.125277225134823e+02 4 4.314972276879136e+02 -1.119369802547359e+02 -1.740502142592404e+02 -3.786376802038046e+02 - ME 6.949230823829164e-03 + ME 1.405605442011058e-04 Event 186 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3540,7 +3540,7 @@ Event 186 Batch 1 2 6.802792190696962e+02 -1.681815241656754e+02 5.427923640013703e+02 3.739936368565512e+02 3 6.331554869749547e+02 3.172201723440435e+02 -4.588808692389625e+02 -2.994755095011972e+02 4 1.865652939553488e+02 -1.490386481783679e+02 -8.391149476240778e+01 -7.451812735535422e+01 - ME 3.276943053321406e-04 + ME 3.045129627255903e-05 Event 187 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3548,7 +3548,7 @@ Event 187 Batch 1 2 7.472897115267965e+02 -6.988402471604775e+02 -2.391684329048669e+02 1.134137672609268e+02 3 6.826908170748527e+02 6.328852277257668e+02 2.212839847556716e+02 -1.286718241709738e+02 4 7.001947139835140e+01 6.595501943471052e+01 1.788444814919547e+01 1.525805691004725e+01 - ME 1.461490870437387e-04 + ME 3.485925693242860e-05 Event 188 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3556,7 +3556,7 @@ Event 188 Batch 1 2 6.496068877140275e+02 -5.024316730938291e+02 -3.980061777252906e+02 -1.055585379310702e+02 3 4.885976180718368e+02 4.424928723138696e+02 1.459942636040002e+02 -1.470148473169288e+02 4 3.617954942141354e+02 5.993880077995960e+01 2.520119141212904e+02 2.525733852479991e+02 - ME 2.843805826594158e-05 + ME 1.006519408431335e-05 Event 189 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3564,7 +3564,7 @@ Event 189 Batch 1 2 4.082379946778654e+02 2.679237131173331e+02 -7.718184435750955e+01 2.981913934867987e+02 3 5.864211573889181e+02 -5.780822197382728e+02 -6.394893886953379e+01 7.497502433004084e+01 4 5.053408479332167e+02 3.101585066209396e+02 1.411307832270433e+02 -3.731664178168398e+02 - ME 1.937644878671120e-03 + ME 1.322787627040098e-04 Event 190 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3572,7 +3572,7 @@ Event 190 Batch 1 2 6.472516823166364e+02 6.463779961822676e+02 -3.289365889632791e+01 6.945035458816692e+00 3 4.318767277050750e+02 -3.286790725415815e+02 -7.183748821760624e+00 -2.800642229191639e+02 4 4.208715899782885e+02 -3.176989236406859e+02 4.007740771808847e+01 2.731191874603472e+02 - ME 3.409584379294133e-05 + ME 1.272332211942340e-05 Event 191 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3580,7 +3580,7 @@ Event 191 Batch 1 2 6.757500036387052e+02 6.222744522021635e+02 -2.261571472854044e+02 1.351499844096745e+02 3 3.644673602666567e+02 -2.020102809038697e+02 1.114149692296405e+02 -2.821613151026251e+02 4 4.597826360946380e+02 -4.202641712982938e+02 1.147421780557637e+02 1.470113306929507e+02 - ME 5.389305783035389e-05 + ME 1.560703181590231e-05 Event 192 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3588,7 +3588,7 @@ Event 192 Batch 1 2 7.394562478491531e+02 -7.307873850878615e+02 3.988568028534699e+01 1.056147375500683e+02 3 8.098058518630978e+01 5.419286926826393e+01 4.244928426361276e+00 -6.002473390399248e+01 4 6.795631669645365e+02 6.765945158195976e+02 -4.413060871170821e+01 -4.559000364607596e+01 - ME 4.204295748489254e-04 + ME 1.231033846344155e-04 Event 193 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3596,7 +3596,7 @@ Event 193 Batch 1 2 5.607395612273153e+02 -3.164229781907934e+02 -3.517992386171808e+02 -3.009030576558548e+02 3 3.741643617741927e+02 -2.156271676189966e+02 1.666697084176705e+02 2.563690747778811e+02 4 5.650960769984922e+02 5.320501458097899e+02 1.851295301995104e+02 4.453398287797368e+01 - ME 9.141090879934244e-05 + ME 3.026844143728605e-05 Event 194 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3604,7 +3604,7 @@ Event 194 Batch 1 2 5.729373416862012e+02 -2.155045544874616e+02 -1.679805246197324e+02 5.035846779262559e+02 3 2.831035485618876e+02 -2.543279085173982e+02 1.042261812492671e+02 -6.783684323208054e+01 4 6.439591097519118e+02 4.698324630048598e+02 6.375434337046515e+01 -4.357478346941756e+02 - ME 1.781231321893996e-03 + ME 5.497724763810379e-04 Event 195 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3612,7 +3612,7 @@ Event 195 Batch 1 2 5.572874060171201e+02 -5.433144409127298e+02 3.646295232533866e+01 1.185290019729285e+02 3 6.765845568040619e+02 5.574999049241243e+02 -1.212989803269169e+01 -3.831623469093195e+02 4 2.661280371788181e+02 -1.418546401139455e+01 -2.433305429264712e+01 2.646333449363910e+02 - ME 3.395618115588225e-04 + ME 3.378534889977447e-04 Event 196 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3620,7 +3620,7 @@ Event 196 Batch 1 2 5.405888343305829e+02 3.940239871950471e+02 -8.826690628749978e+01 -3.594305754554688e+02 3 6.983754392688073e+02 -3.888370902622853e+02 -5.513072771506098e+01 5.774898910559966e+02 4 2.610357264006097e+02 -5.186896932761887e+00 1.433976340025607e+02 -2.180593156005277e+02 - ME 5.539073969003598e-03 + ME 2.676929502290073e-04 Event 197 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3628,7 +3628,7 @@ Event 197 Batch 1 2 2.783346334111661e+02 2.282410890438732e+02 -1.474467226896361e+02 6.029624695020830e+01 3 6.434654504578666e+02 1.172104173128919e+01 6.205939438823057e+02 1.696277097949658e+02 4 5.781999161309674e+02 -2.399621307751624e+02 -4.731472211926695e+02 -2.299239567451741e+02 - ME 3.321087064690878e-04 + ME 4.280180350752636e-05 Event 198 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3636,7 +3636,7 @@ Event 198 Batch 1 2 4.349536439683943e+02 1.774777254208009e+02 -9.709992209949135e+01 3.850427697141142e+02 3 4.134500153047116e+02 7.095914770071803e+01 -4.041194890923881e+02 -5.092301099466194e+01 4 6.515963407268921e+02 -2.484368731215197e+02 5.012194111918782e+02 -3.341197587194521e+02 - ME 7.849443582399766e-04 + ME 2.926862112764983e-04 Event 199 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3644,7 +3644,7 @@ Event 199 Batch 1 2 6.682109290882580e+02 2.136897997740939e+02 -5.035763266519416e+02 3.837361052354048e+02 3 1.424120473397155e+02 8.952788458880865e+01 -4.686863299276860e+01 -1.003458038481504e+02 4 6.893770235720265e+02 -3.032176843629025e+02 5.504449596447103e+02 -2.833903013872543e+02 - ME 1.167594898598604e-03 + ME 4.183851150998592e-04 Event 200 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3652,7 +3652,7 @@ Event 200 Batch 1 2 5.959952693237885e+02 -4.878566955018547e+02 -2.510837703973929e+01 -3.414319479966339e+02 3 4.479637599869168e+02 4.499951041477978e+01 7.146287716862105e+01 4.399313940955211e+02 4 4.560409706892941e+02 4.428571850870749e+02 -4.635450012888173e+01 -9.849944609888662e+01 - ME 5.545496796633981e-04 + ME 3.228844805909175e-04 Event 201 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3660,7 +3660,7 @@ Event 201 Batch 1 2 5.203096708642927e+02 -1.112696379946441e+02 1.367824427202020e+02 4.895219960522141e+02 3 2.871951825199399e+02 -2.582762312778227e+02 1.200876310962787e+02 3.678888524092984e+01 4 6.924951466157675e+02 3.695458692724667e+02 -2.568700738164807e+02 -5.263108812931440e+02 - ME 6.577575910850049e-03 + ME 2.285182473348715e-03 Event 202 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3668,7 +3668,7 @@ Event 202 Batch 1 2 2.158792376054218e+02 2.112389782008981e+01 -7.195062193526132e+01 -2.024369881546198e+02 3 5.463652944256570e+02 2.787950008966254e+02 -3.108926376755554e+02 -3.523267663221479e+02 4 7.377554679689213e+02 -2.999188987167153e+02 3.828432596108168e+02 5.547637544767679e+02 - ME 8.695282964050810e-03 + ME 1.952686275320307e-03 Event 203 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3676,7 +3676,7 @@ Event 203 Batch 1 2 7.124273471334275e+02 4.879265047129839e+02 -1.059167473143779e+02 -5.081949365946950e+02 3 6.746108110440506e+02 -5.248642991835990e+02 4.352799102536777e+01 4.215714978711400e+02 4 1.129618418225217e+02 3.693779447061509e+01 6.238875628901040e+01 8.662343872355494e+01 - ME 5.361938367485652e-05 + ME 4.211918129012132e-05 Event 204 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3684,7 +3684,7 @@ Event 204 Batch 1 2 7.084787759842808e+02 4.992472551829619e+02 -4.528122431715626e+02 -2.183012291454193e+02 3 1.034373169902747e+02 -8.959882065299325e+01 -3.938861547415055e+01 -3.346441176487074e+01 4 6.880839070254444e+02 -4.096484345299685e+02 4.922008586457131e+02 2.517656409102901e+02 - ME 2.988048706021647e-04 + ME 1.033102023766027e-04 Event 205 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3692,7 +3692,7 @@ Event 205 Batch 1 2 6.496569846879349e+02 -5.869603795046561e+02 -2.345911576090251e+02 1.499956646614410e+02 3 2.543878192344406e+02 -1.851019090219859e+00 2.474675926596849e+02 -5.890268997594536e+01 4 5.959551960776247e+02 5.888113985948760e+02 -1.287643505065981e+01 -9.109297468549572e+01 - ME 1.871447246980874e-04 + ME 4.134215827558992e-05 Event 206 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3700,7 +3700,7 @@ Event 206 Batch 1 2 6.172060642836410e+02 2.978040691523503e+02 4.166709400833434e+02 3.444435946201744e+02 3 7.205754982426181e+02 -2.468045809177361e+02 -5.690387091428452e+02 -3.667580878490107e+02 4 1.622184374737409e+02 -5.099948823461420e+01 1.523677690595017e+02 2.231449322883641e+01 - ME 7.356489425273393e-05 + ME 1.138691716042452e-05 Event 207 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3708,7 +3708,7 @@ Event 207 Batch 1 2 5.250113096394139e+02 -1.091977068802181e+02 -4.322753509449321e+02 2.772196909074646e+02 3 5.240251005653129e+02 3.541948269240045e+02 3.738549241960732e+02 9.685466564450643e+01 4 4.509635897952731e+02 -2.449971200437864e+02 5.842042674885889e+01 -3.740743565519710e+02 - ME 3.378615964480245e-03 + ME 9.518274156960593e-05 Event 208 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3716,7 +3716,7 @@ Event 208 Batch 1 2 4.449444343820048e+02 1.928662436733418e+02 -3.595193210859464e+02 1.775500478872298e+02 3 4.894053462810564e+02 -2.195789585225567e+02 2.295326432211599e+02 3.723136307450180e+02 4 5.656502193369389e+02 2.671271484921488e+01 1.299866778647865e+02 -5.498636786322478e+02 - ME 2.068943926258950e-01 + ME 2.179806976662403e-03 Event 209 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3724,7 +3724,7 @@ Event 209 Batch 1 2 4.949423498078044e+02 -2.830370809537592e+02 -1.684680620467476e+02 -3.694271951395289e+02 3 6.326444171345161e+02 3.898538983719823e+02 -1.748162179498052e+02 4.665749526039372e+02 4 3.724132330576786e+02 -1.068168174182231e+02 3.432842799965525e+02 -9.714775746440780e+01 - ME 1.473942246791387e-04 + ME 3.638076645868775e-05 Event 210 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3732,7 +3732,7 @@ Event 210 Batch 1 2 5.469464199121014e+02 -4.947084169679945e+02 2.319240083666633e+02 -2.500445517953792e+01 3 2.929141603572806e+02 -5.602902696925145e+01 2.099470855189298e+01 2.867379913571110e+02 4 6.601394197306178e+02 5.507374439372461e+02 -2.529187169185561e+02 -2.617335361775729e+02 - ME 1.577330101330874e-03 + ME 7.792286450853471e-04 Event 211 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3740,7 +3740,7 @@ Event 211 Batch 1 2 5.484404249965427e+02 1.659778109685243e+01 3.514591842057613e+02 -4.206992456262192e+02 3 4.635537606517395e+02 -3.607884938122542e+02 -3.140996451540818e+01 2.893564685231623e+02 4 4.880058143517181e+02 3.441907127154018e+02 -3.200492196903532e+02 1.313427771030569e+02 - ME 4.999214184618137e-05 + ME 1.717788621912363e-05 Event 212 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3748,7 +3748,7 @@ Event 212 Batch 1 2 6.930853388432640e+02 -3.424793196872474e+02 -8.152110066892747e+01 5.970171795281683e+02 3 9.131624224772825e+01 6.738328155058525e+01 1.365968298972706e+01 6.009627714210347e+01 4 7.155984189090078e+02 2.750960381366621e+02 6.786141767920034e+01 -6.571134566702718e+02 - ME 3.224436999651524e-01 + ME 4.440767413899675e-02 Event 213 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3756,7 +3756,7 @@ Event 213 Batch 1 2 7.316448870278512e+02 4.203233031264803e+02 4.913598772661251e+02 -3.423419819067778e+02 3 4.750162603483208e+02 -1.726357548525294e+02 -3.708603862154638e+02 2.414537588813190e+02 4 2.933388526238279e+02 -2.476875482739507e+02 -1.204994910506614e+02 1.008882230254589e+02 - ME 4.008080891216109e-05 + ME 1.166473784051930e-05 Event 214 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3764,7 +3764,7 @@ Event 214 Batch 1 2 4.805779599533694e+02 3.904513572450257e+02 -1.742898429406511e+02 2.193763065287195e+02 3 6.164938851206517e+02 -5.563771061772993e+02 2.227142270499353e+02 1.445946028815716e+02 4 4.029281549259790e+02 1.659257489322735e+02 -4.842438410928419e+01 -3.639709094102910e+02 - ME 1.130096726278085e-02 + ME 1.644694060635318e-04 Event 215 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3772,7 +3772,7 @@ Event 215 Batch 1 2 4.610896439725640e+02 -3.106576460930037e+02 -3.050258363865880e+02 -1.518378274323046e+02 3 7.153470686812809e+02 2.726436938726979e+02 6.046054769368644e+02 2.680280994976061e+02 4 3.235632873461531e+02 3.801395222030658e+01 -2.995796405502758e+02 -1.161902720653026e+02 - ME 2.130646114222361e-04 + ME 1.638803663744001e-05 Event 216 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3780,7 +3780,7 @@ Event 216 Batch 1 2 5.309452696424389e+02 -4.912950836090372e+02 -3.608909251460832e+01 -1.980646298023531e+02 3 6.627369363365399e+02 4.479096066616000e+02 2.308759280187052e+02 4.304573578259469e+02 4 3.063177940210212e+02 4.338547694743724e+01 -1.947868355040969e+02 -2.323927280235938e+02 - ME 1.881406502208647e-03 + ME 7.684209531203918e-05 Event 217 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3788,7 +3788,7 @@ Event 217 Batch 1 2 4.608032244164870e+02 2.215832851737383e+02 3.318832460795877e+02 -2.304212888079594e+02 3 3.107022283044695e+02 -4.724697178681157e+01 2.830528592337836e+02 -1.190994425256424e+02 4 7.284945472790432e+02 -1.743363133869267e+02 -6.149361053133712e+02 3.495207313336019e+02 - ME 2.894775763457067e-03 + ME 4.426756984161849e-04 Event 218 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3796,7 +3796,7 @@ Event 218 Batch 1 2 6.336891602166270e+02 5.249943224110900e+02 1.648031440577737e+02 -3.142973702098814e+02 3 5.195346944320743e+02 -3.655895580768890e+02 -3.610279413409480e+02 7.693763263116504e+01 4 3.467761453512956e+02 -1.594047643342018e+02 1.962247972831736e+02 2.373597375787177e+02 - ME 2.703962034458943e-05 + ME 8.957256945094420e-06 Event 219 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3804,7 +3804,7 @@ Event 219 Batch 1 2 2.579228498517417e+02 -4.166553381892272e+01 1.191899344508913e+02 2.249042891828000e+02 3 7.453266221408651e+02 -3.354388163550532e+01 -3.947818065141064e+02 -6.312954196904914e+02 4 4.967505280073930e+02 7.520941545442813e+01 2.755918720632151e+02 4.063911305076915e+02 - ME 6.103184694489295e-05 + ME 4.019449398167179e-05 Event 220 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3812,7 +3812,7 @@ Event 220 Batch 1 2 4.940336288355577e+02 -2.383755021420815e+02 -2.918661661143953e+02 3.194690712363630e+02 3 7.129224521449780e+02 2.727447507998269e+02 2.535039959962389e+02 -6.079510240944473e+02 4 2.930439190194635e+02 -3.436924865774512e+01 3.836217011815621e+01 2.884819528580837e+02 - ME 1.761519882509421e-04 + ME 1.677977866215262e-04 Event 221 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3820,7 +3820,7 @@ Event 221 Batch 1 2 3.305414381337777e+02 -2.712796684963201e+02 -1.199910663213094e+02 -1.458325333632650e+02 3 7.388441803280767e+02 5.510455284380058e+02 4.375213740715825e+02 2.254209298704556e+02 4 4.306143815381457e+02 -2.797658599416856e+02 -3.175303077502730e+02 -7.958839650719051e+01 - ME 1.338118621913618e-04 + ME 1.392897982206581e-05 Event 222 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3828,7 +3828,7 @@ Event 222 Batch 1 2 4.657562074797755e+02 2.823280548971349e+02 2.956503281023745e+02 2.231828795335844e+02 3 4.791948192186352e+02 -3.228825926298714e+02 2.575611801233854e+02 -2.429747818931873e+02 4 5.550489733015891e+02 4.055453773273638e+01 -5.532115082257600e+02 1.979190235960287e+01 - ME 9.040551632672907e-05 + ME 2.328731171682892e-05 Event 223 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3836,7 +3836,7 @@ Event 223 Batch 1 2 1.612164685986321e+02 -4.527922182271191e+01 -1.095260585492910e+01 1.543391792239740e+02 3 6.984218503485876e+02 -4.629950983513680e+02 2.605715575888556e+02 -4.533553609726805e+02 4 6.403616810527805e+02 5.082743201740799e+02 -2.496189517339264e+02 2.990161817487066e+02 - ME 4.148580235863498e-04 + ME 2.446487784841432e-04 Event 224 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3844,7 +3844,7 @@ Event 224 Batch 1 2 1.663853414671972e+02 -1.350882138037309e+02 9.706071747767010e+01 3.804401292344658e+00 3 6.436745581417563e+02 -4.469273298203079e+02 -4.412749113764766e+02 -1.408877256838118e+02 4 6.899401003910457e+02 5.820155436240389e+02 3.442141938988058e+02 1.370833243914657e+02 - ME 3.449215697364171e-04 + ME 9.431632941984795e-05 Event 225 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3852,7 +3852,7 @@ Event 225 Batch 1 2 6.702356777533546e+02 6.117158080352369e+02 -2.649249521350114e+02 -6.952987609335720e+01 3 6.901224376513153e+02 -6.564819557015361e+02 1.560869289536550e+02 1.446972404640001e+02 4 1.396418845953297e+02 4.476614766629927e+01 1.088380231813564e+02 -7.516736437064299e+01 - ME 6.407468428023662e-04 + ME 2.456039108263569e-05 Event 226 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3860,7 +3860,7 @@ Event 226 Batch 1 2 7.307777643673112e+02 -4.569648094661606e+02 4.416236342013199e+02 -3.608155616351098e+02 3 1.446420186345137e+02 4.133161435221925e+01 -3.411742569426914e+01 1.343466131828505e+02 4 6.245802169981752e+02 4.156331951139413e+02 -4.075062085070508e+02 2.264689484522593e+02 - ME 4.858390443010437e-04 + ME 2.774761612267077e-04 Event 227 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3868,7 +3868,7 @@ Event 227 Batch 1 2 7.408615397889290e+02 -4.398089081634772e+02 -5.325812259979131e+02 2.679574278743413e+02 3 4.035753807128123e+02 3.000971513323747e+02 2.468113220276344e+02 -1.090823496201683e+02 4 3.555630794982585e+02 1.397117568311025e+02 2.857699039702786e+02 -1.588750782541728e+02 - ME 3.215647103618368e-04 + ME 3.077346064218035e-05 Event 228 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3876,7 +3876,7 @@ Event 228 Batch 1 2 5.775455372723294e+02 -3.656199842755111e+02 -6.289501053880601e+01 4.426342647953073e+02 3 3.247306314578497e+02 8.776645762339835e+01 3.116872137482897e+02 2.445634292125525e+01 4 5.977238312698206e+02 2.778535266521127e+02 -2.487922032094836e+02 -4.670906077165625e+02 - ME 3.156934429573604e-03 + ME 3.399241079583280e-04 Event 229 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3884,7 +3884,7 @@ Event 229 Batch 1 2 3.665477125629453e+02 -2.081014917770363e+02 2.317985113364040e+02 -1.931850016112187e+02 3 6.187040836990479e+02 -2.134593092471877e+02 -3.484367286517815e+02 4.645661552545953e+02 4 5.147482037380067e+02 4.215608010242241e+02 1.166382173153775e+02 -2.713811536433765e+02 - ME 4.392210547845218e-04 + ME 8.330968691049859e-05 Event 230 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3892,7 +3892,7 @@ Event 230 Batch 1 2 5.913978529013565e+02 -4.986092821675885e+02 -3.028328044703767e+02 9.712104143419764e+01 3 3.439186614041002e+02 -6.573524045766426e+01 3.216488491089061e+02 -1.024741025375549e+02 4 5.646834856945436e+02 5.643445226252528e+02 -1.881604463852933e+01 5.353061103357447e+00 - ME 1.067159092411647e-04 + ME 2.296146042402505e-05 Event 231 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3900,7 +3900,7 @@ Event 231 Batch 1 2 5.760768557894827e+02 -7.075794524290799e+01 5.609870884449791e+02 1.102331327656218e+02 3 6.038619762337338e+02 -2.467027894308989e+02 -5.464177649873398e+02 -7.221250677108812e+01 4 3.200611679767834e+02 3.174607346738069e+02 -1.456932345763944e+01 -3.802062599453370e+01 - ME 8.750887998909065e-05 + ME 9.438631267217403e-06 Event 232 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3908,7 +3908,7 @@ Event 232 Batch 1 2 7.230187249684843e+02 -2.426041066061352e+02 1.884455685697195e+02 -6.545132479937492e+02 3 4.821326920133732e+02 2.438648429837413e+02 -1.563760752388986e+01 4.156168142598493e+02 4 2.948485830181424e+02 -1.260736377606032e+00 -1.728079610458298e+02 2.388964337338999e+02 - ME 4.549716999825542e-05 + ME 3.745272037455064e-05 Event 233 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3916,7 +3916,7 @@ Event 233 Batch 1 2 3.540260977608100e+02 -1.904526694678991e+02 -1.042089619355360e+02 -2.796475475319170e+02 3 4.925592302096041e+02 1.195034224421750e+02 3.554637678715695e+02 -3.193415679485398e+02 4 6.534146720295859e+02 7.094924702572415e+01 -2.512548059360335e+02 5.989891154804569e+02 - ME 2.494643034161164e-04 + ME 1.035644942794080e-04 Event 234 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3924,7 +3924,7 @@ Event 234 Batch 1 2 1.866526101194276e+02 7.776953530733704e+01 -1.047503781897390e+01 1.693557493124073e+02 3 6.012752698516817e+02 5.974840035795012e+02 -4.570329760029643e+01 4.955829083294186e+01 4 7.120721200288899e+02 -6.752535388868379e+02 5.617833541927040e+01 -2.189140401453492e+02 - ME 2.154454342135980e-03 + ME 6.655948749153013e-04 Event 235 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3932,7 +3932,7 @@ Event 235 Batch 1 2 5.032945404607945e+02 1.612889276925247e+02 2.561838854094329e+02 -4.020710050699558e+02 3 7.153634726767370e+02 -3.739069589148947e+02 -1.979140468542061e+02 5.768609140624169e+02 4 2.813419868624690e+02 2.126180312223700e+02 -5.826983855522722e+01 -1.747899089924609e+02 - ME 8.184939555880423e-04 + ME 1.137471703441233e-04 Event 236 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3940,7 +3940,7 @@ Event 236 Batch 1 2 6.980797829886610e+02 -9.803971882836288e+00 4.740144261428889e+02 5.123764137440797e+02 3 5.519387921056282e+02 -1.638876688381594e+02 -3.209728652821290e+02 -4.180355032606608e+02 4 2.499814249057108e+02 1.736916407209956e+02 -1.530415608607599e+02 -9.434091048341891e+01 - ME 2.813360227943072e-04 + ME 5.842524801707843e-05 Event 237 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3948,7 +3948,7 @@ Event 237 Batch 1 2 1.604490925133743e+02 6.212857081252698e+01 9.075394990141041e+01 1.168232534834160e+02 3 6.578242662283152e+02 5.348507070161563e+02 -3.810396531957998e+02 3.842224792439630e+01 4 6.817266412583107e+02 -5.969792778286832e+02 2.902857032943894e+02 -1.552455014078122e+02 - ME 8.205069948818567e-04 + ME 1.834055676127939e-04 Event 238 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3956,7 +3956,7 @@ Event 238 Batch 1 2 2.789018340499539e+02 1.069933592962543e+02 -2.572713415352736e+02 1.225197647611563e+01 3 4.761759619803052e+02 7.755191627191856e+01 -4.591043622469822e+02 -9.976187456245104e+01 4 7.449222039697408e+02 -1.845452755681728e+02 7.163757037822556e+02 8.750989808633538e+01 - ME 4.130258343824905e-02 + ME 9.445005309896021e-03 Event 239 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3964,7 +3964,7 @@ Event 239 Batch 1 2 4.581461811054764e+02 -3.899520773556200e+02 2.006122777919944e+02 1.326273524830990e+02 3 3.013476461129690e+02 -2.996604136348060e+02 3.145663680794619e+01 4.951799549362093e+00 4 7.405061727815548e+02 6.896124909904260e+02 -2.320689145999406e+02 -1.375791520324611e+02 - ME 1.351152256907066e-02 + ME 4.970363634614722e-03 Event 240 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3972,7 +3972,7 @@ Event 240 Batch 1 2 5.932490652975304e+02 -4.094504138983958e+01 -3.300190662632461e+02 4.912793227530680e+02 3 3.147487537014150e+02 3.081803657249563e+02 4.097350029662016e+01 -4.912038692507519e+01 4 5.920021810010543e+02 -2.672353243351168e+02 2.890455659666260e+02 -4.421589358279927e+02 - ME 2.300291351402201e-03 + ME 3.420638167820422e-04 Event 241 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3980,7 +3980,7 @@ Event 241 Batch 1 2 4.438703186026563e+01 1.425431959717181e+01 -4.430288595443099e+00 -4.180186016371768e+01 3 7.139617398095604e+02 -8.415544716076485e+01 -5.657765076565163e+02 -4.272659242311072e+02 4 7.416512283301737e+02 6.990112756359306e+01 5.702067962519594e+02 4.690677843948249e+02 - ME 9.657825758456334e-03 + ME 9.983667466725972e-03 Event 242 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3988,7 +3988,7 @@ Event 242 Batch 1 2 3.798759956195423e+02 -1.259218082844715e+02 -3.429343473884153e+02 1.041417477651927e+02 3 6.208895880511435e+02 5.354328139337265e+02 1.248673426784089e+02 -2.884852319370315e+02 4 4.992344163293142e+02 -4.095110056492549e+02 2.180670047100064e+02 1.843434841718389e+02 - ME 4.523810239016752e-05 + ME 1.030886114253601e-05 Event 243 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3996,7 +3996,7 @@ Event 243 Batch 1 2 2.320641800899440e+02 1.658639294991472e+02 7.783463994856535e+01 1.424243988788334e+02 3 6.251485586341132e+02 -2.328139095298017e+02 -4.262931976140131e+02 3.935511574875350e+02 4 6.427872612759426e+02 6.694998003065477e+01 3.484585576654476e+02 -5.359755563663684e+02 - ME 1.068434238404496e-02 + ME 8.493072129055412e-04 Event 244 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4004,7 +4004,7 @@ Event 244 Batch 1 2 6.609991843787810e+02 -2.293678857540617e+02 -4.971623496474938e+02 -3.703240376037023e+02 3 1.091403980947070e+02 1.154537470975927e+01 -9.115666825632124e+00 -1.081445118228680e+02 4 7.298604175265119e+02 2.178225110443025e+02 5.062780164731259e+02 4.784685494265703e+02 - ME 2.129811247265830e-03 + ME 9.635755455313371e-04 Event 245 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4012,7 +4012,7 @@ Event 245 Batch 1 2 4.893629130846664e+02 -3.546974954177181e+02 3.112856868655738e+02 -1.294873298810978e+02 3 7.129026631852477e+02 5.703735458058533e+02 -4.257115617679147e+02 -4.091322034012423e+01 4 2.977344237300874e+02 -2.156760503881352e+02 1.144258749023406e+02 1.704005502212233e+02 - ME 2.548352504440589e-05 + ME 5.312368446054512e-06 Event 246 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4020,7 +4020,7 @@ Event 246 Batch 1 2 3.999457395350199e+02 9.605025124341067e+01 9.072234098128430e+01 3.774922524438975e+02 3 3.675469088581873e+02 -1.615841482674670e+01 2.570183669846762e+02 2.622426259669196e+02 4 7.325073516067924e+02 -7.989183641666393e+01 -3.477407079659604e+02 -6.397348784108170e+02 - ME 1.294421983622042e-01 + ME 5.023802198964801e-02 Event 247 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4028,7 +4028,7 @@ Event 247 Batch 1 2 6.711864521923226e+02 3.763073240556692e+02 5.338170415278108e+02 1.546719678644905e+02 3 5.231557804938882e+02 -1.057595517177888e+02 -5.121603131388773e+02 -1.409615302513522e+01 4 3.056577673137891e+02 -2.705477723378804e+02 -2.165672838893370e+01 -1.405758148393554e+02 - ME 2.873345328272106e-04 + ME 1.980507958825256e-05 Event 248 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4036,7 +4036,7 @@ Event 248 Batch 1 2 6.307803946875938e+02 -6.240065811552291e+01 -3.654556314590158e+02 5.103256270499047e+02 3 3.935347424219227e+02 -2.188782290807617e+02 2.916853933646314e+01 -3.257470040392325e+02 4 4.756848628904837e+02 2.812788871962847e+02 3.362870921225527e+02 -1.845786230106721e+02 - ME 2.418190194667681e-04 + ME 8.712398839363553e-05 Event 249 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4044,7 +4044,7 @@ Event 249 Batch 1 2 4.326970760901858e+02 -4.070406664121577e+02 -1.467447404863359e+02 3.261392852829594e+00 3 4.839435229991528e+02 2.335311811831339e+01 2.018595963184923e+02 -4.392136936630267e+02 4 5.833594009106607e+02 3.836875482938447e+02 -5.511485583215654e+01 4.359523008101972e+02 - ME 8.354140201035124e-05 + ME 2.487145538635957e-05 Event 250 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4052,7 +4052,7 @@ Event 250 Batch 1 2 7.010671671345858e+02 -6.122994886156980e+02 -2.473946684860857e+02 2.353303785738851e+02 3 5.574643785654457e+02 3.902114201641945e+02 2.260985614407801e+02 -3.276904354069721e+02 4 2.414684542999681e+02 2.220880684515034e+02 2.129610704530562e+01 9.236005683308701e+01 - ME 4.704118057291807e-05 + ME 1.645582299148298e-05 Event 251 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4060,7 +4060,7 @@ Event 251 Batch 1 2 7.364006127103795e+02 5.379960890463808e+02 4.302640987755426e+02 2.602285070392761e+02 3 3.051282143252570e+01 -2.901685968644106e+00 1.337962970917706e+01 -2.726899336532026e+01 4 7.330865658570956e+02 -5.350944030777371e+02 -4.436437284847198e+02 -2.329595136739561e+02 - ME 8.340546584740779e-03 + ME 6.389613086136084e-03 Event 252 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4068,7 +4068,7 @@ Event 252 Batch 1 2 5.965625584838610e+02 -7.369842915522101e+01 -5.671364104158780e+02 -1.697401534860145e+02 3 6.549338760881149e+02 -1.514014639568436e+02 6.313240788068730e+02 8.628954906696529e+01 4 2.485035654280235e+02 2.250998931120648e+02 -6.418766839099484e+01 8.345060441904938e+01 - ME 3.985162011735342e-05 + ME 7.225550854378042e-06 Event 253 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4076,7 +4076,7 @@ Event 253 Batch 1 2 5.728678540484714e+02 3.212236187283236e+01 -4.622666283104808e+02 -3.368312580807653e+02 3 7.160302400837320e+02 1.132435775281999e+02 5.206369974620781e+02 4.783433011307397e+02 4 2.111019058677967e+02 -1.453659394010323e+02 -5.837036915159722e+01 -1.415120430499744e+02 - ME 1.248429186447426e-03 + ME 7.499676590470843e-05 Event 254 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4084,7 +4084,7 @@ Event 254 Batch 1 2 5.579357369440610e+02 1.333150067790222e+02 -6.785864805882139e+01 5.375077668373273e+02 3 6.202682598689536e+02 -4.039338689731095e+02 2.012068793592834e+02 -4.255419314189536e+02 4 3.217960031869852e+02 2.706188621940872e+02 -1.333482313004621e+02 -1.119658354183736e+02 - ME 6.088720978226072e-04 + ME 2.226893396847405e-04 Event 255 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4092,5 +4092,5 @@ Event 255 Batch 1 2 7.263612771087843e+02 3.396063850675520e+02 -6.401091575508393e+02 5.028393902637355e+01 3 1.540578578981475e+02 -3.080387127739228e+01 1.060177193258910e+02 -1.074485378375538e+02 4 6.195808649930684e+02 -3.088025137901597e+02 5.340914382249483e+02 5.716459881118030e+01 - ME 1.547064591142216e-04 + ME 4.003666322732326e-05 From 77cd619023ac756854c3a3b9025b36fcb0412328 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 17:29:23 +0200 Subject: [PATCH 020/119] [oct23av] rerun 8 tput tests for gqttq - now all ok after fixing the ref file for runTest ./tput/teeThroughputX.sh -mix -hrd -makej -gqttq -makeclean ./tput/teeThroughputX.sh -makej -gqttq -flt -bridge -makeclean --- .../log_gqttq_mad_d_inl0_hrd0.txt | 160 ++++++++++++++--- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 164 +++++++++++++++--- .../log_gqttq_mad_d_inl0_hrd1.txt | 160 ++++++++++++++--- .../log_gqttq_mad_f_inl0_hrd0.txt | 160 ++++++++++++++--- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 164 +++++++++++++++--- .../log_gqttq_mad_f_inl0_hrd1.txt | 160 ++++++++++++++--- .../log_gqttq_mad_m_inl0_hrd0.txt | 160 ++++++++++++++--- .../log_gqttq_mad_m_inl0_hrd1.txt | 160 ++++++++++++++--- 8 files changed, 1104 insertions(+), 184 deletions(-) diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 4d32261a0d..40bad5a7b8 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_16:49:38 +DATE: 2023-10-26_17:25:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.729631e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.511740e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.903280e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.792703e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.539913e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.920245e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.442673 sec - 1,967,879,288 cycles # 3.002 GHz - 2,802,500,107 instructions # 1.42 insn per cycle - 0.712646529 seconds time elapsed +TOTAL : 0.447647 sec + 1,899,649,593 cycles # 2.843 GHz + 2,640,133,591 instructions # 1.39 insn per cycle + 0.725424703 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.471854e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.643008e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.105940e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.554122e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.661192e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.088747e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.519141 sec - 2,281,793,183 cycles # 3.034 GHz - 3,260,400,346 instructions # 1.43 insn per cycle - 0.810273095 seconds time elapsed +TOTAL : 0.525959 sec + 2,274,866,242 cycles # 2.999 GHz + 3,244,651,641 instructions # 1.43 insn per cycle + 0.817590808 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,129 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.079444e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.102859e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.102859e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.135936e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.160836e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.160836e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.540557 sec - 4,536,431,435 cycles # 2.951 GHz - 12,817,408,182 instructions # 2.83 insn per cycle - 1.544699619 seconds time elapsed +TOTAL : 1.464575 sec + 4,532,434,513 cycles # 3.088 GHz + 12,813,606,654 instructions # 2.83 insn per cycle + 1.468861702 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 730) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499475 +Relative difference = 5.286896515331313e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.041215e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.121607e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.121607e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.824113 sec + 2,544,894,419 cycles # 3.076 GHz + 7,194,033,887 instructions # 2.83 insn per cycle + 0.828181787 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3149) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499475 +Relative difference = 5.286896515331313e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.619791e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.876953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.876953e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.474444 sec + 1,367,981,707 cycles # 2.862 GHz + 2,963,399,076 instructions # 2.17 insn per cycle + 0.478653969 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.039550e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.354962e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.354962e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.426894 sec + 1,250,191,222 cycles # 2.906 GHz + 2,816,864,379 instructions # 2.25 insn per cycle + 0.430913373 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.854127e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.012217e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.012217e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.597649 sec + 1,200,960,787 cycles # 1.998 GHz + 1,804,460,217 instructions # 1.50 insn per cycle + 0.601806763 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 4753c307e1..b0a19a8e4d 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_17:13:31 +DATE: 2023-10-26_17:27:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.688403e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.196209e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.196209e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.649597e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.202624e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.202624e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.466261 sec - 2,061,394,039 cycles # 3.003 GHz - 3,032,478,993 instructions # 1.47 insn per cycle - 0.743734864 seconds time elapsed +TOTAL : 0.468562 sec + 2,056,154,943 cycles # 3.011 GHz + 3,068,292,883 instructions # 1.49 insn per cycle + 0.742344173 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.365371e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.438222e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.438222e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.351177e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.446309e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.446309e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.745064 sec - 2,974,326,147 cycles # 3.010 GHz - 4,582,543,246 instructions # 1.54 insn per cycle - 1.047030310 seconds time elapsed +TOTAL : 0.735555 sec + 2,957,630,146 cycles # 3.014 GHz + 4,532,632,907 instructions # 1.53 insn per cycle + 1.038724962 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,15 +95,133 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.137225e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.161884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.161884e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.132503e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.156927e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.156927e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.467678 sec - 4,553,662,396 cycles # 3.096 GHz - 12,819,000,364 instructions # 2.82 insn per cycle - 1.471758386 seconds time elapsed +TOTAL : 1.474566 sec + 4,557,819,252 cycles # 3.083 GHz + 12,820,933,724 instructions # 2.81 insn per cycle + 1.478788241 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 730) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499475 +Relative difference = 5.286896515331313e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.048121e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.128402e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.128402e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.826995 sec + 2,572,087,066 cycles # 3.097 GHz + 7,242,131,193 instructions # 2.82 insn per cycle + 0.831269361 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3149) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499475 +Relative difference = 5.286896515331313e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.651556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.911752e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.911752e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.476303 sec + 1,398,837,886 cycles # 2.914 GHz + 3,011,956,162 instructions # 2.15 insn per cycle + 0.480642444 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.989902e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.299552e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.299552e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.438250 sec + 1,282,387,350 cycles # 2.901 GHz + 2,865,664,755 instructions # 2.23 insn per cycle + 0.442674280 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.848300e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.002921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.002921e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.604375 sec + 1,228,098,241 cycles # 2.021 GHz + 1,842,126,953 instructions # 1.50 insn per cycle + 0.608691916 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index a9a8163b9d..d6b5e0aad5 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_16:49:49 +DATE: 2023-10-26_17:25:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.695969e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.369756e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.733382e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.773700e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.355869e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.712058e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.440487 sec - 1,973,277,409 cycles # 3.017 GHz - 2,804,504,699 instructions # 1.42 insn per cycle - 0.710712051 seconds time elapsed +TOTAL : 0.446838 sec + 1,946,931,730 cycles # 2.934 GHz + 2,811,438,850 instructions # 1.44 insn per cycle + 0.720668390 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.435954e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.475347e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.911743e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.498955e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.548357e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.964288e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.519269 sec - 2,277,391,096 cycles # 3.024 GHz - 3,254,209,570 instructions # 1.43 insn per cycle - 0.810433700 seconds time elapsed +TOTAL : 0.520214 sec + 2,265,273,066 cycles # 3.003 GHz + 3,228,567,893 instructions # 1.43 insn per cycle + 0.811558050 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,15 +82,129 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.154348e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.180003e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.180003e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.142548e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.167865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.167865e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.441187 sec - 4,472,630,063 cycles # 3.096 GHz - 12,692,291,111 instructions # 2.84 insn per cycle - 1.445245845 seconds time elapsed +TOTAL : 1.455879 sec + 4,479,902,790 cycles # 3.070 GHz + 12,692,171,530 instructions # 2.83 insn per cycle + 1.459907347 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499475 +Relative difference = 5.286896515331313e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.092357e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.175485e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.175485e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.803799 sec + 2,492,455,684 cycles # 3.088 GHz + 7,047,759,878 instructions # 2.83 insn per cycle + 0.807789046 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2965) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499475 +Relative difference = 5.286896515331313e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.451621e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.678035e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.678035e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.496055 sec + 1,458,437,445 cycles # 2.919 GHz + 3,195,504,881 instructions # 2.19 insn per cycle + 0.500214979 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3078) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.580569e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.825485e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.825485e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.478720 sec + 1,397,857,315 cycles # 2.899 GHz + 3,099,350,808 instructions # 2.22 insn per cycle + 0.482782307 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2785) (512y: 257) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.737963e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.886358e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.886358e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.621958 sec + 1,247,171,087 cycles # 1.996 GHz + 2,070,041,670 instructions # 1.66 insn per cycle + 0.626017428 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1197) (512y: 194) (512z: 2426) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index e57eccf909..f5a2639771 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_16:49:59 +DATE: 2023-10-26_17:25:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.343265e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.226255e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.365776e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.424188e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.234933e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.376824e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.438160 sec - 1,929,021,635 cycles # 2.968 GHz - 2,682,927,208 instructions # 1.39 insn per cycle - 0.709175123 seconds time elapsed +TOTAL : 0.436480 sec + 1,949,958,184 cycles # 3.004 GHz + 2,721,614,771 instructions # 1.40 insn per cycle + 0.705846857 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 168 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.496674e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.838576e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.980756e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.521674e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.845752e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989370e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.471866 sec - 2,133,132,090 cycles # 3.020 GHz - 3,018,979,515 instructions # 1.42 insn per cycle - 0.764992270 seconds time elapsed +TOTAL : 0.473781 sec + 2,121,776,214 cycles # 3.012 GHz + 3,003,472,057 instructions # 1.42 insn per cycle + 0.763259437 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,129 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.177979e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204724e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.204724e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.161603e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188157e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188157e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.410662 sec - 4,391,627,324 cycles # 3.106 GHz - 12,756,963,824 instructions # 2.90 insn per cycle - 1.414598719 seconds time elapsed +TOTAL : 1.430760 sec + 4,393,282,749 cycles # 3.064 GHz + 12,757,139,178 instructions # 2.90 insn per cycle + 1.434699006 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246861269162636 +Relative difference = 8.908367430417796e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.243958e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.460072e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.460072e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.524329 sec + 1,617,157,255 cycles # 3.064 GHz + 4,232,194,294 instructions # 2.62 insn per cycle + 0.528384284 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3708) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862328158905 +Relative difference = 1.6341558101589185e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.556556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.447650e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.447650e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 +TOTAL : 0.269642 sec + 793,561,755 cycles # 2.906 GHz + 1,796,372,240 instructions # 2.26 insn per cycle + 0.273627491 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247490055046619 +Relative difference = 3.863601240637253e-09 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.066042e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.112073e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.112073e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 +TOTAL : 0.251284 sec + 744,296,871 cycles # 2.924 GHz + 1,717,805,428 instructions # 2.31 insn per cycle + 0.255106971 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247490055046619 +Relative difference = 3.863601240637253e-09 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.420901e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.019958e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.019958e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 +TOTAL : 0.324025 sec + 679,485,098 cycles # 2.076 GHz + 1,206,749,407 instructions # 1.78 insn per cycle + 0.327968867 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247490833793641 +Relative difference = 5.8522142669546946e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index b53646f0f6..dcad28db83 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_17:13:41 +DATE: 2023-10-26_17:27:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.754275e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.055276e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.055276e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.698357e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.094772e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.094772e+07 ) sec^-1 MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 -TOTAL : 0.450227 sec - 1,995,210,844 cycles # 3.000 GHz - 2,942,950,603 instructions # 1.48 insn per cycle - 0.724345916 seconds time elapsed +TOTAL : 0.449674 sec + 2,000,719,723 cycles # 3.012 GHz + 2,923,895,808 instructions # 1.46 insn per cycle + 0.723109219 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.066363e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.838148e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.838148e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.069628e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.803608e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.803608e+07 ) sec^-1 MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 -TOTAL : 0.621172 sec - 2,468,425,638 cycles # 2.898 GHz - 3,821,933,035 instructions # 1.55 insn per cycle - 0.908341434 seconds time elapsed +TOTAL : 0.616457 sec + 2,529,130,127 cycles # 2.992 GHz + 3,855,862,028 instructions # 1.52 insn per cycle + 0.905495583 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,15 +95,133 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.168769e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.195472e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.195472e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.170511e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.197246e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.197246e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.424772 sec - 4,406,963,349 cycles # 3.086 GHz - 12,761,462,870 instructions # 2.90 insn per cycle - 1.428790731 seconds time elapsed +TOTAL : 1.422269 sec + 4,408,284,699 cycles # 3.092 GHz + 12,761,377,566 instructions # 2.89 insn per cycle + 1.426267924 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 690) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246861269162636 +Relative difference = 8.908367430417796e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.242542e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.458214e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.458214e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.528863 sec + 1,637,240,219 cycles # 3.076 GHz + 4,280,743,566 instructions # 2.61 insn per cycle + 0.532859494 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3708) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862328158905 +Relative difference = 1.6341558101589185e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.405455e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.295574e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.295574e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 +TOTAL : 0.280318 sec + 819,157,481 cycles # 2.885 GHz + 1,833,479,182 instructions # 2.24 insn per cycle + 0.284787120 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247490055046619 +Relative difference = 3.863601240637253e-09 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.993629e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.028908e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.028908e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 +TOTAL : 0.257906 sec + 762,645,935 cycles # 2.918 GHz + 1,754,864,061 instructions # 2.30 insn per cycle + 0.261947181 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247490055046619 +Relative difference = 3.863601240637253e-09 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.338183e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.942282e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.942282e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 +TOTAL : 0.332959 sec + 700,752,007 cycles # 2.083 GHz + 1,248,201,481 instructions # 1.78 insn per cycle + 0.336970237 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247490833793641 +Relative difference = 5.8522142669546946e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 519fa897c6..0b14ac2732 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_16:50:09 +DATE: 2023-10-26_17:25:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.456844e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.249867e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.389367e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.400429e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.233953e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.375182e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.437190 sec - 1,933,120,640 cycles # 2.988 GHz - 2,727,535,334 instructions # 1.41 insn per cycle - 0.706391886 seconds time elapsed +TOTAL : 0.442376 sec + 1,895,015,408 cycles # 2.892 GHz + 2,747,268,890 instructions # 1.45 insn per cycle + 0.712246758 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 162 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.471256e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.820345e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.967386e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.493600e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.828411e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.970176e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.470786 sec - 2,090,124,938 cycles # 3.012 GHz - 2,993,688,215 instructions # 1.43 insn per cycle - 0.750906447 seconds time elapsed +TOTAL : 0.474454 sec + 2,110,192,824 cycles # 2.992 GHz + 2,947,596,873 instructions # 1.40 insn per cycle + 0.764332989 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,15 +82,129 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.185910e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.213603e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.213603e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.158986e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.186080e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.186080e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.401164 sec - 4,362,926,983 cycles # 3.107 GHz - 12,656,529,955 instructions # 2.90 insn per cycle - 1.405093731 seconds time elapsed +TOTAL : 1.434486 sec + 4,371,663,810 cycles # 3.041 GHz + 12,657,634,893 instructions # 2.90 insn per cycle + 1.438463868 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 641) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246861269162636 +Relative difference = 8.908367430417796e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.446741e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.701875e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.701875e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.495234 sec + 1,479,620,343 cycles # 2.968 GHz + 4,121,009,416 instructions # 2.79 insn per cycle + 0.499281460 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3413) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862328158905 +Relative difference = 1.6341558101589185e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.995783e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.515263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.515263e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 +TOTAL : 0.348418 sec + 1,011,991,272 cycles # 2.876 GHz + 2,124,849,284 instructions # 2.10 insn per cycle + 0.352463895 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4206) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247490055046619 +Relative difference = 3.863601240637253e-09 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.230105e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.786980e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.786980e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 +TOTAL : 0.333455 sec + 970,827,927 cycles # 2.882 GHz + 2,043,935,074 instructions # 2.11 insn per cycle + 0.337479099 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4013) (512y: 9) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247490055046619 +Relative difference = 3.863601240637253e-09 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.954154e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.276017e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.276017e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 +TOTAL : 0.437007 sec + 857,837,231 cycles # 1.948 GHz + 1,573,448,481 instructions # 1.83 insn per cycle + 0.440984330 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2446) (512y: 16) (512z: 2998) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247490833793641 +Relative difference = 5.8522142669546946e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 9491ee780f..d8b101c2fc 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_16:50:19 +DATE: 2023-10-26_17:26:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.725041e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.498321e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.893455e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.458886e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.465906e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.924587e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.443565 sec - 1,967,428,977 cycles # 2.989 GHz - 2,773,135,186 instructions # 1.41 insn per cycle - 0.717269875 seconds time elapsed +TOTAL : 0.444243 sec + 2,002,421,839 cycles # 3.013 GHz + 2,741,217,649 instructions # 1.37 insn per cycle + 0.723275667 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.464031e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.628890e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.088772e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.489884e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.660066e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.119815e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.528891 sec - 2,186,923,468 cycles # 2.854 GHz - 3,145,929,563 instructions # 1.44 insn per cycle - 0.823288676 seconds time elapsed +TOTAL : 0.519948 sec + 2,276,444,720 cycles # 3.018 GHz + 3,229,105,748 instructions # 1.42 insn per cycle + 0.811078605 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,129 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.138698e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.163005e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.163005e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.134733e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.158878e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.158878e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.460325 sec - 4,538,591,528 cycles # 3.101 GHz - 12,784,673,558 instructions # 2.82 insn per cycle - 1.464322080 seconds time elapsed +TOTAL : 1.465494 sec + 4,537,654,473 cycles # 3.089 GHz + 12,784,835,228 instructions # 2.82 insn per cycle + 1.469495110 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 705) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.036919e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.117824e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.117824e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.825266 sec + 2,551,760,245 cycles # 3.080 GHz + 7,116,326,213 instructions # 2.79 insn per cycle + 0.829268114 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3214) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.663407e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.932756e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.932756e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.469135 sec + 1,350,418,007 cycles # 2.856 GHz + 2,936,810,576 instructions # 2.17 insn per cycle + 0.473330080 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3174) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.883756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.195233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.195233e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.444372 sec + 1,218,686,581 cycles # 2.721 GHz + 2,790,949,615 instructions # 2.29 insn per cycle + 0.448527550 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2938) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.750037e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.895766e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.895766e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.619232 sec + 1,238,990,774 cycles # 1.989 GHz + 1,831,327,319 instructions # 1.48 insn per cycle + 0.623295439 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1728) (512y: 114) (512z: 2312) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index cfbd0f5b0a..3d4a565469 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_16:50:29 +DATE: 2023-10-26_17:26:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.707059e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.441351e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.838155e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.459192e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.306531e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.735602e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.440265 sec - 1,976,352,261 cycles # 3.014 GHz - 2,799,587,867 instructions # 1.42 insn per cycle - 0.712947532 seconds time elapsed +TOTAL : 0.444146 sec + 2,013,998,621 cycles # 3.016 GHz + 2,802,542,390 instructions # 1.39 insn per cycle + 0.726529857 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.441177e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.496551e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.947056e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.467057e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.546570e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.958148e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.520648 sec - 2,308,723,413 cycles # 3.014 GHz - 3,274,512,364 instructions # 1.42 insn per cycle - 0.823500826 seconds time elapsed +TOTAL : 0.519273 sec + 2,271,413,411 cycles # 3.015 GHz + 3,255,005,016 instructions # 1.43 insn per cycle + 0.810510365 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,15 +82,129 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.146871e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.171774e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.171774e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.146828e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.171967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.171967e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.450328 sec - 4,495,331,122 cycles # 3.093 GHz - 12,668,907,573 instructions # 2.82 insn per cycle - 1.454308496 seconds time elapsed +TOTAL : 1.450294 sec + 4,498,136,326 cycles # 3.095 GHz + 12,668,545,692 instructions # 2.82 insn per cycle + 1.454272739 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 657) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.099313e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.184687e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.184687e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.801124 sec + 2,487,092,001 cycles # 3.092 GHz + 6,905,671,869 instructions # 2.78 insn per cycle + 0.805195084 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3035) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.381230e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.602049e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.602049e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.505477 sec + 1,479,054,792 cycles # 2.908 GHz + 3,168,233,594 instructions # 2.14 insn per cycle + 0.509509291 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3284) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.629283e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.881809e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.881809e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.472539 sec + 1,380,679,448 cycles # 2.901 GHz + 3,040,117,670 instructions # 2.20 insn per cycle + 0.476625589 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2936) (512y: 265) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.708530e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.847600e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.847600e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.628483 sec + 1,253,413,112 cycles # 1.987 GHz + 2,004,133,246 instructions # 1.60 insn per cycle + 0.632568870 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1520) (512y: 202) (512z: 2499) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED From c01ca6f4bf3523f24f2a896a55b159efe913ff21 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 17:58:55 +0200 Subject: [PATCH 021/119] [oct23av] in CODEGEN MatrixElementKernels.cc, fix clang-format after cherry-picking Olivier's 9fc9873d0 --- .../iolibs/template_files/gpu/MatrixElementKernels.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc index cfed7d2af5..91c46ebef1 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc @@ -112,12 +112,12 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined(__x86_64__) || defined(__i386__) +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; #else - bool known = false; // __builtin_cpu_supports is not supported + bool known = false; // __builtin_cpu_supports is not supported // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html // See https://stackoverflow.com/q/62783908 // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu From d8834781eecbb25e9caf1cc24e1c24537090d8c1 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 18:48:23 +0200 Subject: [PATCH 022/119] [oct23av] TEMPORARELY UNDO Olivier's changes to CODEGEN in 9fc9873d0 (keep only the mg5amcnlo update) git checkout ee9cee387c0aa4117080ec312e3f37d32c670370 CODEGEN/ --- .../MG5aMC_patches/PROD/patch.common | 12 ++-- .../gpu/MatrixElementKernels.cc | 9 +-- .../iolibs/template_files/gpu/cudacpp.mk | 5 +- .../CUDACPP_SA_OUTPUT/model_handling.py | 70 ++++++++++--------- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 31 +------- .../PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh | 52 ++++++++------ 6 files changed, 79 insertions(+), 100 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common index 559fc54270..2e272b419b 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common @@ -78,12 +78,12 @@ index 348c283be..74db44d84 100644 +CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") +###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) +###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -+CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>/dev/null | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) -+#ifeq ($(CUDACPP_BUILDDIR),) -+#$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) -+#else ++CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ++ifeq ($(CUDACPP_BUILDDIR),) ++$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) ++else +$(info CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)') -+#endif ++endif +CUDACPP_COMMONLIB=mg5amc_common +CUDACPP_CXXLIB=mg5amc_$(processid_short)_cpp +CUDACPP_CULIB=mg5amc_$(processid_short)_cuda @@ -110,7 +110,7 @@ index 348c283be..74db44d84 100644 -$(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) - $(FC) -o $(PROG) $(PROCESS) $(MATRIX) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -+#LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (flag not universal -> skip?) ++LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 -$(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) - $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc index 91c46ebef1..30257195b6 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 49a5856085..222d75f846 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -27,7 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -include ../../Source/make_opts #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -221,7 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = -fopenmp # disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,7 +554,7 @@ endif $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) - $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp + $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index c450898a5d..3c00046c6e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -726,6 +726,7 @@ def write_hardcoded_parameters(self, params): for par in pars.split(','): ###print(len(pardef_lines), par) # for debugging pardef_lines[par] = ( 'constexpr ' + type + ' ' + par ) + misc.sprint( 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) ) ###print( pardef_lines ) # for debugging ###for line in pardef_lines: misc.sprint(line) # for debugging parset_pars = [] @@ -1041,14 +1042,14 @@ class PLUGIN_OneProcessExporter(PLUGIN_export_cpp.OneProcessExporterGPU): # AV - overload export_cpp.OneProcessExporterGPU constructor (rename gCPPProcess to CPPProcess, set include_multi_channel) def __init__(self, *args, **kwargs): -# misc.sprint('Entering PLUGIN_OneProcessExporter.__init__') + misc.sprint('Entering PLUGIN_OneProcessExporter.__init__') for kwarg in kwargs: misc.sprint( 'kwargs[%s] = %s' %( kwarg, kwargs[kwarg] ) ) super().__init__(*args, **kwargs) self.process_class = 'CPPProcess' ###if self.in_madevent_mode: proc_id = kwargs['prefix']+1 # madevent+cudacpp (NB: HERE SELF.IN_MADEVENT_MODE DOES NOT WORK!) if 'prefix' in kwargs: proc_id = kwargs['prefix']+1 # madevent+cudacpp (ime+1 from ProcessExporterFortranMEGroup.generate_subprocess_directory) else: proc_id = 0 # standalone_cudacpp -# misc.sprint(proc_id) + misc.sprint(proc_id) self.proc_id = proc_id # AV - overload export_cpp.OneProcessExporterGPU method (indent comments in process_lines) @@ -1146,9 +1147,9 @@ def get_process_function_definitions(self, write=True): # AV - modify export_cpp.OneProcessExporterGPU method (add debug printouts for multichannel #342) def get_sigmaKin_lines(self, color_amplitudes, write=True): -# misc.sprint('Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines') -# misc.sprint(self.include_multi_channel) -# misc.sprint(self.support_multichannel) + misc.sprint('Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines') + misc.sprint(self.include_multi_channel) + misc.sprint(self.support_multichannel) replace_dict = super().get_sigmaKin_lines(color_amplitudes, write=False) replace_dict['proc_id'] = self.proc_id if self.proc_id>0 else 1 replace_dict['proc_id_source'] = 'madevent + cudacpp exporter' if self.proc_id>0 else 'standalone_cudacpp' # FIXME? use self.in_madevent_mode instead? @@ -1172,14 +1173,14 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name): ret_lines = [] if self.single_helicities: ###assert self.include_multi_channel # remove this assert: must handle both cases and produce two different code bases (#473) -# misc.sprint(type(self.helas_call_writer)) -# misc.sprint(self.support_multichannel, self.include_multi_channel) + misc.sprint(type(self.helas_call_writer)) + misc.sprint(self.support_multichannel, self.include_multi_channel) multi_channel = None if self.include_multi_channel: if not self.support_multichannel: raise Exception("link with madevent not supported") multi_channel = self.get_multi_channel_dictionary(self.matrix_elements[0].get('diagrams'), self.include_multi_channel) -# misc.sprint(multi_channel) + misc.sprint(multi_channel) ###misc.sprint( 'before get_matrix_element_calls', self.matrix_elements[0].get_number_of_wavefunctions() ) # WRONG value of nwf, eg 7 for gg_tt helas_calls = self.helas_call_writer.get_matrix_element_calls(\ self.matrix_elements[0], @@ -1307,9 +1308,12 @@ def get_process_info_lines(self, matrix_element): # AV - replace the export_cpp.OneProcessExporterGPU method (invert .cc/.cu, add debug printouts) def generate_process_files(self): """Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.generate_process_files') - if not self.include_multi_channel: - #this condition is likely wrong and need to be removed + misc.sprint('Entering PLUGIN_OneProcessExporter.generate_process_files') + if self.include_multi_channel: + misc.sprint('self.include_multi_channel is already defined: this is madevent+second_exporter mode') # FIXME? use self.in_madevent_mode instead? + else: + misc.sprint('self.include_multi_channel is not yet defined: this is standalone_cudacpp mode') # see issue #473 + # I move those line to standalone_cudacpp mode (but do we need those at all???) if self.matrix_elements[0].get('has_mirror_process'): self.matrix_elements[0].set('has_mirror_process', False) self.nprocesses/=2 @@ -1343,15 +1347,15 @@ def generate_process_files(self): ###template_ref = 'dump_CPUTest.'+self.process_name+'.txt' template_ref = self.template_path + '/../../../test/ref/' + 'dump_CPUTest.' + self.process_name + '.txt' if os.path.exists( template_ref ): -# misc.sprint( 'Copying test reference file: ', template_ref ) + misc.sprint( 'Copying test reference file: ', template_ref ) PLUGIN_export_cpp.cp( template_ref, self.path + '/../../test/ref' ) -# else: -# misc.sprint( 'Test reference file does not exist and will not be copied: ', template_ref ) + else: + misc.sprint( 'Test reference file does not exist and will not be copied: ', template_ref ) # SR - generate CMakeLists.txt file inside the P* directory def edit_CMakeLists(self): """Generate CMakeLists.txt""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_CMakeLists') + misc.sprint('Entering PLUGIN_OneProcessExporter.edit_CMakeLists') template = open(pjoin(self.template_path,'CMake/SubProcesses/CMakeLists_P.txt'),'r').read() ff = open(pjoin(self.path, 'CMakeLists.txt'),'w') ff.write(template) @@ -1360,7 +1364,7 @@ def edit_CMakeLists(self): # AV - replace the export_cpp.OneProcessExporterGPU method (invert .cc/.cu, add debug printouts) def edit_check_sa(self): """Generate check_sa.cc and fcheck_sa.f""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_check_sa') + misc.sprint('Entering PLUGIN_OneProcessExporter.edit_check_sa') ff = open(pjoin(self.path, 'check_sa.cc'),'w') template = open(pjoin(self.template_path,'gpu','check_sa.cc'),'r').read() ff.write(template) # nothing to replace in check_sa.cc @@ -1377,7 +1381,7 @@ def edit_check_sa(self): # AV - replace the export_cpp.OneProcessExporterGPU method (add debug printouts and multichannel handling #473) def edit_mgonGPU(self): """Generate mgOnGpuConfig.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_mgonGPU') + misc.sprint('Entering PLUGIN_OneProcessExporter.edit_mgonGPU') template = open(pjoin(self.template_path,'gpu','mgOnGpuConfig.h'),'r').read() replace_dict = {} nexternal, nincoming = self.matrix_elements[0].get_nexternal_ninitial() @@ -1397,7 +1401,7 @@ def edit_mgonGPU(self): # AV - new method def edit_processidfile(self): """Generate epoch_process_id.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_processidfile') + misc.sprint('Entering PLUGIN_OneProcessExporter.edit_processidfile') template = open(pjoin(self.template_path,'gpu','epoch_process_id.h'),'r').read() replace_dict = {} replace_dict['processid'] = self.get_process_name() @@ -1409,7 +1413,7 @@ def edit_processidfile(self): # AV - new method def edit_coloramps(self): """Generate coloramps.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_coloramps') + misc.sprint('Entering PLUGIN_OneProcessExporter.edit_coloramps') template = open(pjoin(self.template_path,'gpu','coloramps.h'),'r').read() ff = open(pjoin(self.path, 'coloramps.h'),'w') # The following five lines from OneProcessExporterCPP.get_sigmaKin_lines (using OneProcessExporterCPP.get_icolamp_lines) @@ -1427,7 +1431,7 @@ def edit_coloramps(self): # AV - new method def edit_testxxx(self): """Generate testxxx.cc""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_testxxx') + misc.sprint('Entering PLUGIN_OneProcessExporter.edit_testxxx') template = open(pjoin(self.template_path,'gpu','testxxx.cc'),'r').read() replace_dict = {} replace_dict['model_name'] = self.model_name @@ -1438,7 +1442,7 @@ def edit_testxxx(self): # AV - new method def edit_memorybuffers(self): """Generate MemoryBuffers.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memorybuffers') + misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memorybuffers') template = open(pjoin(self.template_path,'gpu','MemoryBuffers.h'),'r').read() replace_dict = {} replace_dict['model_name'] = self.model_name @@ -1449,7 +1453,7 @@ def edit_memorybuffers(self): # AV - new method def edit_memoryaccesscouplings(self): """Generate MemoryAccessCouplings.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings') + misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings') template = open(pjoin(self.template_path,'gpu','MemoryAccessCouplings.h'),'r').read() replace_dict = {} replace_dict['model_name'] = self.model_name @@ -1461,7 +1465,7 @@ def edit_memoryaccesscouplings(self): # [*NB export_cpp.UFOModelConverterGPU.write_process_h_file is not called!*] def write_process_h_file(self, writer): """Generate final gCPPProcess.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_h_file') + misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_h_file') out = super().write_process_h_file(writer) writer.seek(-1, os.SEEK_CUR) writer.truncate() @@ -1483,7 +1487,7 @@ def super_write_process_cc_file(self, writer): # AV - overload the export_cpp.OneProcessExporterGPU method (add debug printout and truncate last \n) def write_process_cc_file(self, writer): """Generate CPPProcess.cc""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_cc_file') + misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_cc_file') ###out = super().write_process_cc_file(writer) out = self.super_write_process_cc_file(writer) writer.seek(-1, os.SEEK_CUR) @@ -1662,7 +1666,7 @@ def super_get_matrix_element_calls(self, matrix_element, color_amplitudes, multi color[namp][njamp] = coeff me = matrix_element.get('diagrams') matrix_element.reuse_outdated_wavefunctions(me) -# misc.sprint(multi_channel_map) + misc.sprint(multi_channel_map) res = [] ###res.append('for(int i=0;i<%s;i++){jamp[i] = cxtype(0.,0.);}' % len(color_amplitudes)) res.append("""constexpr size_t nxcoup = ndcoup + nicoup; // both dependent and independent couplings @@ -1717,7 +1721,7 @@ def super_get_matrix_element_calls(self, matrix_element, color_amplitudes, multi sum([diagrams[idiag].get('amplitudes') for \ idiag in multi_channel_map[config]], [])] diag_to_config[amp[0]] = config -# misc.sprint(diag_to_config) + misc.sprint(diag_to_config) id_amp = 0 for diagram in matrix_element.get('diagrams'): ###print('DIAGRAM %3d: #wavefunctions=%3d, #diagrams=%3d' % @@ -1829,13 +1833,13 @@ def get_external_line(self, wf, argument): wf.get('me_id')-1, wf.get('number_external')-1) elif argument.is_boson(): -# misc.sprint(call) -# misc.sprint( (wf.get('mass'), -# wf.get('number_external')-1, -# # For boson, need initial/final here -# (-1) ** (wf.get('state') == 'initial'), -# wf.get('me_id')-1, -# wf.get('number_external')-1)) + misc.sprint(call) + misc.sprint( (wf.get('mass'), + wf.get('number_external')-1, + # For boson, need initial/final here + (-1) ** (wf.get('state') == 'initial'), + wf.get('me_id')-1, + wf.get('number_external')-1)) return self.format_coupling(call % \ (wf.get('mass'), wf.get('number_external')-1, diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index e261f08057..aebab6f1a7 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -4,7 +4,6 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2021-2023) for the MG5aMC CUDACPP plugin. import os -import subprocess # AV - load an independent 2nd copy of the export_cpp module (as PLUGIN_export_cpp) and use that within the plugin (workaround for #341) # See https://stackoverflow.com/a/11285504 @@ -35,7 +34,6 @@ from os.path import join as pjoin import madgraph.various.misc as misc -import madgraph.iolibs.files as files # AV - define the plugin's process exporter # (NB: this is the plugin's main class, enabled in the new_output dictionary in __init__.py) @@ -208,17 +206,9 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): self.add_input_for_banner() if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL'] else: patchlevel = '' - plugin_path = os.path.dirname(os.path.realpath( __file__ )) -# path = os.path.realpath(os.curdir + os.sep + 'PLUGIN' + os.sep + 'CUDACPP_OUTPUT') -# misc.sprint(path) - p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)]) - stdout, stderr = p.communicate() - if not p.returncode: - logger.debug("####### \n stdout is \n %s", stdout) - logger.info("####### \n stderr is \n %s", stderr) + path = os.path.realpath(os.curdir + os.sep + 'PLUGIN' + os.sep + 'CUDACPP_OUTPUT') + if os.system(path + os.sep + 'patchMad.sh ' + self.dir_path + ' PROD ' + patchlevel) != 0: raise Exception('ERROR! the O/S call to patchMad.sh failed') - - self.add_madevent_plugin_fct() return super().finalize(matrix_element, cmdhistory, MG5options, outputflag) # AV (default from OM's tutorial) - overload settings and add a debug printout @@ -239,21 +229,4 @@ def add_input_for_banner(self): for entry in new_parameters: finput.write(entry) - # OM adding a new way to "patch" python file such that the launch command of MG5aMC is working - # this consist in a file plugin_interface.py - # which contains a series of functions and one dictionary variable TO_OVERWRITE - # that will be used to have temporary overwrite of all the key variable passed as string by their value. - # all variable that are file related should be called as madgraph.dir.file.variable - def add_madevent_plugin_fct(self): - """this consist in a file plugin_interface.py - which contains a series of functions and one dictionary variable TO_OVERWRITE - that will be used to have temporary overwrite of all the key variable passed as string by their value. - all variable that are file related should be called as madgraph.dir.file.variable - """ - - plugin_path = os.path.dirname(os.path.realpath( __file__ )) - files.cp(pjoin(plugin_path, 'plugin_interface.py'), pjoin(self.dir_path, 'bin', 'internal')) - files.cp(pjoin(plugin_path, 'launch_plugin.py'), pjoin(self.dir_path, 'bin', 'internal')) - - #------------------------------------------------------------------------------------ diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh index aa553100e0..8ac638a193 100755 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh @@ -41,49 +41,59 @@ dir_patches=$2 if [ ! -e ${dir} ]; then echo "ERROR! Directory $dir does not exist"; exit 1; fi # These two steps are part of "cd Source; make" but they actually are code-generating steps -#${dir}/bin/madevent treatcards run -#${dir}/bin/madevent treatcards param +${dir}/bin/madevent treatcards run +${dir}/bin/madevent treatcards param # Cleanup -#\rm -f ${dir}/crossx.html -#\rm -f ${dir}/index.html -#\rm -f ${dir}/madevent.tar.gz -#\rm -f ${dir}/Cards/delphes_trigger.dat -#\rm -f ${dir}/Cards/plot_card.dat -#\rm -f ${dir}/bin/internal/run_plot* -#\rm -f ${dir}/HTML/* -#\rm -rf ${dir}/bin/internal/__pycache__ -#\rm -rf ${dir}/bin/internal/ufomodel/__pycache__ -#touch ${dir}/HTML/.keep # new file +\rm -f ${dir}/crossx.html +\rm -f ${dir}/index.html +\rm -f ${dir}/madevent.tar.gz +\rm -f ${dir}/Cards/delphes_trigger.dat +\rm -f ${dir}/Cards/plot_card.dat +\rm -f ${dir}/bin/internal/run_plot* +\rm -f ${dir}/HTML/* +\rm -rf ${dir}/bin/internal/__pycache__ +\rm -rf ${dir}/bin/internal/ufomodel/__pycache__ +touch ${dir}/HTML/.keep # new file # Exit here for patchlevel 0 (--upstream) if [ "${patchlevel}" == "0" ]; then exit $status; fi # Add global flag '-O3 -ffast-math -fbounds-check' as in previous gridpacks -#echo "GLOBAL_FLAG=-O3 -ffast-math -fbounds-check" > ${dir}/Source/make_opts.new -#cat ${dir}/Source/make_opts >> ${dir}/Source/make_opts.new -#\mv ${dir}/Source/make_opts.new ${dir}/Source/make_opts +echo "GLOBAL_FLAG=-O3 -ffast-math -fbounds-check" > ${dir}/Source/make_opts.new +cat ${dir}/Source/make_opts >> ${dir}/Source/make_opts.new +\mv ${dir}/Source/make_opts.new ${dir}/Source/make_opts # Patch the default Fortran code to provide the integration with the cudacpp plugin # (1) Process-independent patches touch ${dir}/Events/.keep # this file should already be present (mg5amcnlo copies it from Template/LO/Events/.keep) -cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/fbridge_common.inc ${dir}/SubProcesses # new file -#sed -i 's/2 = sde_strategy/1 = sde_strategy/' ${dir}/Cards/run_card.dat # use strategy SDE=1 in multichannel mode (see #419) -#sed -i 's/SDE_STRAT = 2/SDE_STRAT = 1/' ${dir}/Source/run_card.inc # use strategy SDE=1 in multichannel mode (see #419) +\cp -dpr ${scrdir}/MG5aMC_patches/${dir_patches}/fbridge_common.inc ${dir}/SubProcesses # new file +sed -i 's/2 = sde_strategy/1 = sde_strategy/' ${dir}/Cards/run_card.dat # use strategy SDE=1 in multichannel mode (see #419) +sed -i 's/SDE_STRAT = 2/SDE_STRAT = 1/' ${dir}/Source/run_card.inc # use strategy SDE=1 in multichannel mode (see #419) if [ "${patchlevel}" == "2" ]; then cd ${dir} - #sed -i 's/DEFAULT_F2PY_COMPILER=f2py3.*/DEFAULT_F2PY_COMPILER=f2py3/' Source/make_opts + sed -i 's/DEFAULT_F2PY_COMPILER=f2py3.*/DEFAULT_F2PY_COMPILER=f2py3/' Source/make_opts echo "DEBUG: cd ${PWD}; patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common" if ! patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common; then status=1; fi \rm -f Source/*.orig \rm -f bin/internal/*.orig + echo " +#********************************************************************* +# Options for the cudacpp plugin +#********************************************************************* + +# Set cudacpp-specific values of non-cudacpp-specific options +-O3 -ffast-math -fbounds-check = global_flag ! build flags for Fortran code (for a fair comparison to cudacpp) + +# New cudacpp-specific options (default values are defined in banner.py) +CPP = cudacpp_backend ! valid backends are FORTRAN, CPP, CUDA" >> Cards/run_card.dat cd - > /dev/null fi for p1dir in ${dir}/SubProcesses/P*; do cd $p1dir ln -sf ../fbridge_common.inc . # new file - cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/counters.cc . # new file - cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/ompnumthreads.cc . # new file + \cp -dpr ${scrdir}/MG5aMC_patches/${dir_patches}/counters.cc . # new file + \cp -dpr ${scrdir}/MG5aMC_patches/${dir_patches}/ompnumthreads.cc . # new file if [ "${patchlevel}" == "2" ]; then echo "DEBUG: cd ${PWD}; patch -p6 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.P1" if ! patch -p6 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.P1; then status=1; fi From 88f45f2779251271a856c575555c6e98a627d653 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 18:54:06 +0200 Subject: [PATCH 023/119] [oct23av] regenerate 7 processes mad and 6 sa (all except 2x eemumu) - no significant changes in code (note that my new gqttq ref also affected pptt012j) --- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 18 +- .../cudacpp/gg_tt.mad/bin/internal/banner.py | 3 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 14 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 22 +- .../gg_tt01g.mad/bin/internal/banner.py | 3 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 +- .../cudacpp/gg_ttg.mad/bin/internal/banner.py | 3 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 12 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +- .../gg_ttgg.mad/bin/internal/banner.py | 3 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 22 +- .../gg_ttggg.mad/bin/internal/banner.py | 3 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 24 +- .../cudacpp/gq_ttq.mad/bin/internal/banner.py | 3 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 12 +- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 6 +- .../CODEGEN_mad_pp_tt012j_log.txt | 56 +- .../pp_tt012j.mad/bin/internal/banner.py | 3 +- .../ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt | 1026 ++++++++--------- 21 files changed, 661 insertions(+), 640 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 104ac89849..ce5cfd8c71 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005397319793701172  +DEBUG: model prefixing takes 0.005429983139038086  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -215,16 +215,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.110 s +Wrote files for 10 helas calls in 0.112 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.143 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.131 s VVV1 FFV1 FFV1 @@ -278,6 +278,7 @@ Using default eps viewer "evince". Set another one in ./input/mg5_configuration. No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -317,6 +318,7 @@ patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 @@ -329,6 +331,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.336s -user 0m2.056s -sys 0m0.274s +real 0m2.594s +user 0m2.110s +sys 0m0.299s diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 2bd9dd5346..a1ba7b16b6 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055425167083740234  +DEBUG: model prefixing takes 0.005548000335693359  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -201,12 +201,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1350]  -Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.141 s VVV1 FFV1 FFV1 @@ -228,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.593s -user 0m0.522s -sys 0m0.063s +real 0m1.311s +user 0m0.535s +sys 0m0.058s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index ec552e612b..c40317e7b5 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005248308181762695  +DEBUG: model prefixing takes 0.005491971969604492  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -162,7 +162,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.019 s +1 processes with 16 diagrams generated in 0.020 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -186,7 +186,7 @@ INFO: Creating files in directory P2_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -229,7 +229,7 @@ INFO: Creating files in directory P1_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -267,14 +267,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.265 s +Wrote files for 46 helas calls in 0.269 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.324 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -282,7 +282,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.319 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -341,6 +341,7 @@ Using default eps viewer "evince". Set another one in ./input/mg5_configuration. No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -380,6 +381,7 @@ patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 @@ -402,6 +404,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.053s -user 0m2.731s -sys 0m0.311s +real 0m4.183s +user 0m2.629s +sys 0m0.332s diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index a080c269b8..46cb300397 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005520343780517578  +DEBUG: model prefixing takes 0.005367279052734375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -217,14 +217,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.165 s +Wrote files for 36 helas calls in 0.163 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.321 s +ALOHA: aloha creates 5 routines in 0.325 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -232,7 +232,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.307 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -291,6 +291,7 @@ Using default eps viewer "evince". Set another one in ./input/mg5_configuration. No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -330,6 +331,7 @@ patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 @@ -348,6 +350,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.825s -user 0m2.539s -sys 0m0.271s +real 0m3.040s +user 0m2.538s +sys 0m0.303s diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 04729cf52e..cd7321fc3d 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057621002197265625  +DEBUG: model prefixing takes 0.005832672119140625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -211,7 +211,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.318 s +ALOHA: aloha creates 5 routines in 0.325 s VVV1 VVV1 FFV1 @@ -238,6 +238,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.839s -user 0m0.765s -sys 0m0.061s +real 0m0.927s +user 0m0.790s +sys 0m0.053s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index e46a2aad90..ffe22b60ed 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005277156829833984  +DEBUG: model prefixing takes 0.005362749099731445  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -218,15 +218,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s -Wrote files for 222 helas calls in 0.724 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.423 s +Wrote files for 222 helas calls in 0.735 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.322 s +ALOHA: aloha creates 5 routines in 0.330 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -234,7 +234,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.307 s +ALOHA: aloha creates 10 routines in 0.314 s VVV1 VVV1 FFV1 @@ -296,6 +296,7 @@ Using default eps viewer "evince". Set another one in ./input/mg5_configuration. No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -335,6 +336,7 @@ patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 @@ -353,6 +355,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.067s -user 0m3.594s -sys 0m0.311s +real 0m3.973s +user 0m3.649s +sys 0m0.299s diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 4886319fa1..1dd6886647 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005265235900878906  +DEBUG: model prefixing takes 0.00539708137512207  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.156 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -205,7 +205,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1350]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.417 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -213,7 +213,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.314 s +ALOHA: aloha creates 5 routines in 0.315 s VVV1 VVV1 FFV1 @@ -243,6 +243,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m1.539s -user 0m1.427s -sys 0m0.051s +real 0m1.499s +user 0m1.428s +sys 0m0.057s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index f94189e829..bd0958642c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005403995513916016  +DEBUG: model prefixing takes 0.005446672439575195  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.849 s +1 processes with 1240 diagrams generated in 1.874 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -179,7 +179,7 @@ INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -222,15 +222,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.434 s -Wrote files for 2281 helas calls in 46.235 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.505 s +Wrote files for 2281 helas calls in 47.140 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.315 s +ALOHA: aloha creates 5 routines in 0.314 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -238,7 +238,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.307 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -300,6 +300,7 @@ Using default eps viewer "evince". Set another one in ./input/mg5_configuration. No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -339,6 +340,7 @@ patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 @@ -357,6 +359,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m57.407s -user 0m56.369s -sys 0m0.817s +real 0m58.428s +user 0m57.226s +sys 0m0.949s diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 41f7811981..3587866b17 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005392789840698242  +DEBUG: model prefixing takes 0.005717754364013672  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.829 s +1 processes with 1240 diagrams generated in 1.849 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -207,7 +207,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1350]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.427 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.583 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -215,7 +215,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.341 s +ALOHA: aloha creates 5 routines in 0.346 s VVV1 VVV1 FFV1 @@ -245,6 +245,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m12.777s -user 0m12.625s -sys 0m0.105s +real 0m13.619s +user 0m12.877s +sys 0m0.114s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 7b525500f0..08e638f93f 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057849884033203125  +DEBUG: model prefixing takes 0.005480527877807617  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.082 s +8 processes with 40 diagrams generated in 0.078 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -199,7 +199,7 @@ INFO: Creating files in directory P1_gu_ttxu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -238,7 +238,7 @@ INFO: Creating files in directory P1_gux_ttxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -273,17 +273,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s -Wrote files for 32 helas calls in 0.240 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Wrote files for 32 helas calls in 0.231 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.147 s +ALOHA: aloha creates 2 routines in 0.145 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.141 s +ALOHA: aloha creates 4 routines in 0.132 s FFV1 FFV1 FFV1 @@ -338,6 +338,7 @@ Using default eps viewer "evince". Set another one in ./input/mg5_configuration. No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -377,6 +378,7 @@ patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 @@ -409,6 +411,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.720s -user 0m2.351s -sys 0m0.337s +real 0m3.070s +user 0m2.298s +sys 0m0.288s diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 8b87565b21..9d11057ab5 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005353450775146484  +DEBUG: model prefixing takes 0.00550079345703125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.076 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -258,7 +258,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.153 s +ALOHA: aloha creates 2 routines in 0.145 s FFV1 FFV1 FFV1 @@ -281,6 +281,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m1.348s -user 0m0.696s -sys 0m0.062s +real 0m0.980s +user 0m0.677s +sys 0m0.055s diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 45f905069b..58c713c07f 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -177,7 +177,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.060 s +ALOHA: aloha creates 1 routines in 0.061 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -196,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.480s -user 0m0.423s +real 0m0.569s +user 0m0.428s sys 0m0.052s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 47ee63c762..e2de004e5e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005712747573852539  +DEBUG: model prefixing takes 0.005529165267944336  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.791 s +65 processes with 1119 diagrams generated in 1.916 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -499,7 +499,7 @@ INFO: Creating files in directory P2_gg_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -544,7 +544,7 @@ INFO: Creating files in directory P2_gg_ttxuux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -585,7 +585,7 @@ INFO: Creating files in directory P2_gu_ttxgu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -626,7 +626,7 @@ INFO: Creating files in directory P2_gux_ttxgux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -667,7 +667,7 @@ INFO: Creating files in directory P2_uux_ttxgg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -708,7 +708,7 @@ INFO: Creating files in directory P1_gg_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -751,7 +751,7 @@ INFO: Creating files in directory P2_uu_ttxuu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -788,7 +788,7 @@ INFO: Creating files in directory P2_uux_ttxuux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -825,7 +825,7 @@ INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -862,7 +862,7 @@ INFO: Creating files in directory P2_uc_ttxuc DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -899,7 +899,7 @@ INFO: Creating files in directory P2_uux_ttxccx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -936,7 +936,7 @@ INFO: Creating files in directory P2_ucx_ttxucx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -973,7 +973,7 @@ INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -1010,7 +1010,7 @@ INFO: Creating files in directory P1_gu_ttxu DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -1049,7 +1049,7 @@ INFO: Creating files in directory P1_gux_ttxux DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -1088,7 +1088,7 @@ INFO: Creating files in directory P1_uux_ttxg DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -1127,7 +1127,7 @@ INFO: Creating files in directory P0_gg_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -1168,7 +1168,7 @@ INFO: Creating files in directory P0_uux_ttx DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  @@ -1201,15 +1201,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.262 s -Wrote files for 810 helas calls in 3.360 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.276 s +Wrote files for 810 helas calls in 3.348 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.331 s +ALOHA: aloha creates 5 routines in 0.333 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -1217,7 +1217,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.311 s +ALOHA: aloha creates 10 routines in 0.312 s VVV1 VVV1 FFV1 @@ -1279,6 +1279,7 @@ Using default eps viewer "evince". Set another one in ./input/mg5_configuration. No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -1318,6 +1319,7 @@ patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 @@ -1530,6 +1532,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.518s -user 0m8.931s -sys 0m0.545s +real 0m9.679s +user 0m9.072s +sys 0m0.546s diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt b/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt index dd90c94acf..d596b33ae7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt +++ b/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt @@ -4,7 +4,7 @@ Event 0 Batch 0 2 2.647483690509011e+02 7.527657265342380e+01 -2.528976247704283e+02 -2.163164141117315e+01 3 6.252973211776936e+02 -5.721080498766041e+02 -1.578766990348905e+01 2.518727230515587e+02 4 6.099543097714056e+02 4.968314772231802e+02 2.686852946739174e+02 -2.302410816403857e+02 - ME 3.498510462248670e-04 + ME 6.254927412618323e-05 Event 1 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -12,7 +12,7 @@ Event 1 Batch 0 2 2.542827954151951e+02 1.482213322085297e+02 -1.988618298139058e+02 -5.607271498295615e+01 3 6.883656117507998e+02 1.265478873489434e+02 5.602777828023585e+02 3.793700749224233e+02 4 5.573515928340058e+02 -2.747692195574731e+02 -3.614159529884527e+02 -3.232973599394667e+02 - ME 7.257243108248426e-04 + ME 8.120933129385430e-05 Event 2 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -20,7 +20,7 @@ Event 2 Batch 0 2 4.301460683791099e+02 -3.656995432079240e+02 -2.257802895903974e+02 -1.768459985405173e+01 3 5.058528987551350e+02 2.755467101243707e+02 -2.034821274188550e+02 3.722313656043856e+02 4 5.640010328657550e+02 9.015283308355326e+01 4.292624170092524e+02 -3.545467657503340e+02 - ME 8.130044127338102e-04 + ME 1.104115154253218e-04 Event 3 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -28,7 +28,7 @@ Event 3 Batch 0 2 6.758793342627306e+02 1.455349847705337e+02 4.360940220328824e+02 -4.954335945799966e+02 3 3.008019460079605e+02 -1.607139834787174e+02 2.732727402256846e+01 2.527964523704278e+02 4 5.233187197293092e+02 1.517899870818368e+01 -4.634212960554508e+02 2.426371422095687e+02 - ME 7.753277710143621e-05 + ME 4.288074098478053e-05 Event 4 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -36,7 +36,7 @@ Event 4 Batch 0 2 3.540811678028369e+02 5.414642718170588e+01 -3.497885023717100e+02 -9.467915537920108e+00 3 7.415000547748695e+02 1.453779348794601e+00 7.277337852109665e+02 1.422102514562805e+02 4 4.044187774222938e+02 -5.560020653050046e+01 -3.779452828392566e+02 -1.327423359183605e+02 - ME 2.015528729476554e-04 + ME 1.304731284254719e-05 Event 5 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -44,7 +44,7 @@ Event 5 Batch 0 2 4.747467875786874e+02 2.462969907607520e+02 3.713870243947702e+02 1.636886763636381e+02 3 3.438196236093862e+02 -2.056491112573935e+02 2.636029701703988e+02 8.021128807897365e+01 4 6.814335888119255e+02 -4.064787950335840e+01 -6.349899945651691e+02 -2.438999644426124e+02 - ME 6.140777519977192e-04 + ME 1.932390649640220e-04 Event 6 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -52,7 +52,7 @@ Event 6 Batch 0 2 5.623951200922340e+02 4.644673798421034e+02 3.089047820108764e+02 -7.166700647426805e+01 3 2.268243199894467e+02 1.761899852590787e+02 -7.114332369064562e+01 -1.238748914321566e+02 4 7.107805599183188e+02 -6.406573651011822e+02 -2.377614583202307e+02 1.955418979064247e+02 - ME 8.375373201653861e-04 + ME 1.929702539767979e-04 Event 7 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -60,7 +60,7 @@ Event 7 Batch 0 2 4.922243378496302e+02 2.878585072835456e+02 -1.441537488072182e+02 -3.723465794939189e+02 3 2.873990637609374e+02 -5.400981623596619e+01 -8.913204919452846e+01 -2.678369642286231e+02 4 7.203765983894325e+02 -2.338486910475794e+02 2.332857980017467e+02 6.401835437225419e+02 - ME 2.045598717079573e-03 + ME 6.280412585349807e-04 Event 8 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -68,7 +68,7 @@ Event 8 Batch 0 2 3.353309706037128e+02 -7.529439061162444e+01 -4.917829145606096e+01 -3.230466069128648e+02 3 7.169322705461503e+02 -1.597426278178964e+02 -1.460012137440150e+01 6.987567601563110e+02 4 4.477367588501368e+02 2.350370184295208e+02 6.377841283046249e+01 -3.757101532434461e+02 - ME 5.176104304710922e-03 + ME 1.424871539111113e-03 Event 9 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -76,7 +76,7 @@ Event 9 Batch 0 2 2.557626120875720e+02 2.000882245504951e+02 -5.276260741790070e+01 -1.503174088272977e+02 3 7.044202058180884e+02 -6.969679478438196e+02 -1.019614549623775e+02 6.882422911146106e+00 4 5.398171820943397e+02 4.968797232933244e+02 1.547240623802783e+02 1.434349859161515e+02 - ME 6.498215193902510e-05 + ME 1.126010180174107e-05 Event 10 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -84,7 +84,7 @@ Event 10 Batch 0 2 3.466796552973448e+02 1.172124288883391e+02 -1.804077050554743e+02 2.718475489457261e+02 3 5.174471655316495e+02 -1.610456139025784e+02 -4.497410659869822e+02 -1.988689340353916e+02 4 6.358731791710053e+02 4.383318501423926e+01 6.301487710424565e+02 -7.297861491033444e+01 - ME 2.111165581639245e-04 + ME 8.292383053707579e-05 Event 11 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -92,7 +92,7 @@ Event 11 Batch 0 2 5.730783827248506e+02 -3.059484875398849e+01 3.466457017175528e+02 -4.553235612803233e+02 3 4.410994673708892e+02 -3.026218886155176e+02 -1.990641070399019e+01 3.203005892260318e+02 4 4.858221499042607e+02 3.332167373695061e+02 -3.267392910135624e+02 1.350229720542913e+02 - ME 5.129802099928076e-05 + ME 2.195851954305949e-05 Event 12 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -100,7 +100,7 @@ Event 12 Batch 0 2 2.275003875859171e+02 -1.247450244086003e+02 1.654605359856639e+02 9.390376067217456e+01 3 6.138170466352969e+02 3.363961838598331e+02 -2.139358085817026e+01 5.129827374509639e+02 4 6.586825657787861e+02 -2.116511594512328e+02 -1.440669551274935e+02 -6.068864981231385e+02 - ME 5.249882090061186e-02 + ME 3.843244876666358e-03 Event 13 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -108,7 +108,7 @@ Event 13 Batch 0 2 2.867684047377951e+02 7.055192702127012e+01 -2.028354730671929e+02 1.900429278217245e+02 3 6.990707050557395e+02 -5.605742285334717e+02 2.413419117565430e+02 -3.408965629057132e+02 4 5.141608902064654e+02 4.900223015122016e+02 -3.850643868935023e+01 1.508536350839886e+02 - ME 6.422048006176975e-05 + ME 1.780264803426774e-05 Event 14 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -116,7 +116,7 @@ Event 14 Batch 0 2 3.551549262960330e+02 1.090410064132905e+02 3.205839746298526e+02 1.071027348074892e+02 3 5.276349775014137e+02 3.895763694332612e+02 -2.529209653865598e+02 2.503196099590423e+02 4 6.172100962025531e+02 -4.986173758465519e+02 -6.766300924329285e+01 -3.574223447665315e+02 - ME 7.422587439250419e-04 + ME 1.172793340377339e-04 Event 15 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -124,7 +124,7 @@ Event 15 Batch 0 2 5.846731991828425e+02 7.106081559720657e+01 3.900476102503054e+02 4.297161529048979e+02 3 2.829885923647302e+02 -2.767806781033229e+02 5.223342094943639e+01 -2.732525156618249e+01 4 6.323382084524278e+02 2.057198625061163e+02 -4.422810311997417e+02 -4.023909013387152e+02 - ME 1.255922738422332e-03 + ME 2.768931482482754e-04 Event 16 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -132,7 +132,7 @@ Event 16 Batch 0 2 7.471577506095512e+02 1.666056475215676e+02 -5.784682380714994e+02 -4.425627187781379e+02 3 6.589296733908160e+02 -1.235441202519038e+02 5.251239647671507e+02 3.783780998595698e+02 4 9.391257599963087e+01 -4.306152726966400e+01 5.334427330434855e+01 6.418461891856485e+01 - ME 5.526726502577864e-05 + ME 3.619360847906487e-05 Event 17 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -140,7 +140,7 @@ Event 17 Batch 0 2 3.567490993131759e+02 3.856364495163717e+01 -1.708845728849435e+02 -3.107752047682324e+02 3 6.453207560475681e+02 4.468356462873772e+02 2.282834847349605e+02 4.057874246326636e+02 4 4.979301446392561e+02 -4.853992912390142e+02 -5.739891185001719e+01 -9.501221986443127e+01 - ME 1.327369996555111e-04 + ME 3.400819398697452e-05 Event 18 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -148,7 +148,7 @@ Event 18 Batch 0 2 4.856701782481425e+02 2.509110753153842e+02 -3.498523763974107e+02 -2.247720379690150e+02 3 3.014847498930008e+02 -1.059425909901355e+02 -2.435847754696140e+02 -1.426032222348426e+02 4 7.128450718588564e+02 -1.449684843252488e+02 5.934371518670247e+02 3.673752602038576e+02 - ME 1.018512933050835e-03 + ME 1.704840743724005e-04 Event 19 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -156,7 +156,7 @@ Event 19 Batch 0 2 5.848213503304410e+02 -3.141116763848333e+02 -1.950442390378232e+02 4.531088295091878e+02 3 5.769300027107226e+02 5.020221748138873e+02 2.252239828724832e+02 -1.734823378963534e+02 4 3.382486469588368e+02 -1.879104984290540e+02 -3.017974383465995e+01 -2.796264916128346e+02 - ME 4.267017342507976e-03 + ME 1.566312636528492e-04 Event 20 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -164,7 +164,7 @@ Event 20 Batch 0 2 5.550938429889906e+02 -4.478597170519693e+02 -1.958065402362923e+02 -2.630791652090858e+02 3 5.585686897587655e+02 3.351111310173187e+02 -1.360174455686903e+02 4.256744830831253e+02 4 3.863374672522434e+02 1.127485860346507e+02 3.318239858049826e+02 -1.625953178740396e+02 - ME 2.768271682113988e-04 + ME 4.443882992804106e-05 Event 21 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -172,7 +172,7 @@ Event 21 Batch 0 2 6.296556563991993e+02 -3.477135312394776e+02 -1.376147989324512e+02 -5.065804111325866e+02 3 3.137568007204202e+02 1.080474571851863e+02 -2.382188236683311e+02 1.732653140250679e+02 4 5.565875428803801e+02 2.396660740542913e+02 3.758336226007823e+02 3.333150971075189e+02 - ME 5.519034669639832e-05 + ME 2.195742323347977e-05 Event 22 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -180,7 +180,7 @@ Event 22 Batch 0 2 5.583338925767162e+02 2.471586228668332e+02 -1.597599499756147e+02 -4.744745610949311e+02 3 5.378723432497920e+02 9.149532098241385e+00 4.314513680009925e+02 3.210493120152684e+02 4 4.037937641734921e+02 -2.563081549650745e+02 -2.716914180253778e+02 1.534252490796627e+02 - ME 3.705224437539572e-05 + ME 1.393143104564022e-05 Event 23 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -188,7 +188,7 @@ Event 23 Batch 0 2 6.057340011976822e+02 6.848115528115159e+01 -5.207204912425279e+02 -3.017849923015605e+02 3 6.884459352783615e+02 -2.949639632364767e+01 6.680977958792448e+02 1.635026102131439e+02 4 2.058200635239559e+02 -3.898475895750391e+01 -1.473773046367171e+02 1.382823820884168e+02 - ME 2.946248744974782e-05 + ME 1.074117284514867e-05 Event 24 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -196,7 +196,7 @@ Event 24 Batch 0 2 4.702316790647315e+02 -1.210575128627593e+02 4.313728504035306e+02 -1.427598490831810e+02 3 7.180482366151732e+02 1.040047389253588e+02 -7.104588047260974e+02 4.956931953573291e+00 4 3.117200843200960e+02 1.705277393740069e+01 2.790859543225674e+02 1.378029171296075e+02 - ME 3.146557994448562e-05 + ME 5.213387311993420e-06 Event 25 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -204,7 +204,7 @@ Event 25 Batch 0 2 6.261365010744016e+02 -5.354018140499276e+02 -2.095559720530078e+02 2.479477970595020e+02 3 5.483958991041942e+02 5.199465180092641e+02 -9.843995208133505e+01 -1.438862620216537e+02 4 3.254675998214045e+02 1.545529604066345e+01 3.079959241343431e+02 -1.040615350378483e+02 - ME 1.657640191611339e-04 + ME 1.695323153210731e-05 Event 26 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -212,7 +212,7 @@ Event 26 Batch 0 2 4.635816356180677e+02 1.904702824079147e+02 -2.351549941335565e+02 -3.511853259118595e+02 3 3.686385821486527e+02 -2.712527815845713e+02 -6.015354190959191e+01 -2.422764621809819e+02 4 6.677797822332798e+02 8.078249917665664e+01 2.953085360431485e+02 5.934617880928415e+02 - ME 3.250975879010065e-04 + ME 1.052251904460155e-04 Event 27 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -220,7 +220,7 @@ Event 27 Batch 0 2 2.851713673150520e+02 1.387976072955998e+02 1.520424011317634e+02 -1.973348453858079e+02 3 6.747356481771329e+02 2.426633222154767e+02 -4.300238522839811e+02 4.598501858640580e+02 4 5.400929845078149e+02 -3.814609295110765e+02 2.779814511522176e+02 -2.625153404782502e+02 - ME 4.155279516527712e-04 + ME 7.957109124083736e-05 Event 28 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -228,7 +228,7 @@ Event 28 Batch 0 2 1.977804200471008e+02 -1.803202618401224e+02 -8.082809162516925e+01 -8.277519444290659e+00 3 7.197523834069627e+02 3.152541965091956e+02 6.467033971658861e+02 -2.080867841663842e+01 4 5.824671965459364e+02 -1.349339346690732e+02 -5.658753055407169e+02 2.908619786092899e+01 - ME 1.172809031809504e-04 + ME 1.748013159755222e-05 Event 29 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -236,7 +236,7 @@ Event 29 Batch 0 2 6.123364628491765e+02 -3.746492624245139e+02 3.785128791537567e+02 -3.021950929683376e+02 3 4.056577755659300e+02 1.796205570313495e+00 -8.781658530568643e+01 3.960344074293251e+02 4 4.820057615848937e+02 3.728530568542006e+02 -2.906962938480702e+02 -9.383931446098750e+01 - ME 5.496242925842306e-04 + ME 3.085570985177973e-04 Event 30 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -244,7 +244,7 @@ Event 30 Batch 0 2 7.349194950356053e+02 7.241679607953656e+02 1.425637322816703e+01 1.244354634469208e+02 3 7.321421454671275e+02 -7.253765693071590e+02 -2.895970851972107e+01 -9.498573130653318e+01 4 3.293835949726734e+01 1.208608511793152e+00 1.470333529155409e+01 -2.944973214038765e+01 - ME 5.147061682527938e-02 + ME 3.267107835672361e-04 Event 31 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -252,7 +252,7 @@ Event 31 Batch 0 2 1.718338270585457e+02 -1.344914872264095e+02 -1.021614404532311e+02 3.165350011824393e+01 3 6.313115253715935e+02 -2.849940593920691e+02 -7.916450257599642e+01 -5.577325610990745e+02 4 6.968546475698608e+02 4.194855466184786e+02 1.813259430292275e+02 5.260790609808306e+02 - ME 4.645345268703414e-04 + ME 1.685680846028125e-04 Event 32 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -260,7 +260,7 @@ Event 32 Batch 0 2 7.235176898898732e+02 -4.762113006241282e+02 -2.880822916693121e+01 5.439400065022983e+02 3 6.603902828461299e+02 4.672103814637360e+02 1.031050210016798e+02 -4.551913221650266e+02 4 1.160920272639969e+02 9.000919160392018e+00 -7.429679183474862e+01 -8.874868433727177e+01 - ME 4.476006843186700e-03 + ME 2.173072900368875e-04 Event 33 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -268,7 +268,7 @@ Event 33 Batch 0 2 4.786737271642286e+02 2.009638309376703e+02 4.090184839380260e+02 1.464443769121513e+02 3 3.795793219608408e+02 -6.057523839522271e+00 -8.244277697544294e+01 3.704685635647950e+02 4 6.417469508749314e+02 -1.949063070981495e+02 -3.265757069625828e+02 -5.169129404769461e+02 - ME 1.351709676586880e-02 + ME 3.322437827682699e-03 Event 34 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -276,7 +276,7 @@ Event 34 Batch 0 2 6.621583515140109e+02 -5.051303032557109e+02 -1.429543729176959e+02 4.035605363216953e+02 3 3.008522892707525e+02 8.677543723835062e+01 2.726747894692539e+02 -9.290092916351111e+01 4 5.369893592152367e+02 4.183548660173603e+02 -1.297204165515579e+02 -3.106596071581844e+02 - ME 6.460854093057828e-04 + ME 9.294666462955388e-05 Event 35 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -284,7 +284,7 @@ Event 35 Batch 0 2 6.158114977149372e+02 2.502256147979830e+02 4.233348779616202e+00 5.626659943296695e+02 3 1.476397433483021e+02 -1.670550278282843e+01 -6.055370982200890e+01 1.336101351676488e+02 4 7.365487589367605e+02 -2.335201120151546e+02 5.632036104239269e+01 -6.962761294973184e+02 - ME 2.101231899117793e+00 + ME 5.450893768264864e-01 Event 36 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -292,7 +292,7 @@ Event 36 Batch 0 2 7.182456511154913e+02 -7.463771462544163e+01 -6.667773110518942e+02 2.563475070450518e+02 3 4.860008755751825e+02 -7.840660561780868e+01 4.141081959217036e+02 -2.419992919944378e+02 4 2.957534733093268e+02 1.530443202432501e+02 2.526691151301903e+02 -1.434821505061448e+01 - ME 9.644531209480271e-05 + ME 1.793136635525090e-05 Event 37 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -300,7 +300,7 @@ Event 37 Batch 0 2 5.672182018814327e+02 -2.031706828392718e+00 -5.267408190306547e+02 2.104197478372323e+02 3 4.664069288608281e+02 3.712365792892206e+02 2.604523782658950e+02 -1.090109358856581e+02 4 4.663748692577387e+02 -3.692048724608279e+02 2.662884407647597e+02 -1.014088119515743e+02 - ME 1.216876552012178e-04 + ME 1.885829354904198e-05 Event 38 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -308,7 +308,7 @@ Event 38 Batch 0 2 5.068057345787187e+02 4.883513201966852e+02 -7.570036138649985e+01 -1.124032737511800e+02 3 3.871140338254017e+02 -1.153787089711745e+02 -3.599073977747533e+02 -8.373585688177315e+01 4 6.060802315958797e+02 -3.729726112255107e+02 4.356077591612532e+02 1.961391306329531e+02 - ME 1.006736553113524e-04 + ME 2.004468492837133e-05 Event 39 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -316,7 +316,7 @@ Event 39 Batch 0 2 4.960337392567769e+02 -3.669089247616476e+02 2.651961920161227e+02 -2.027271347192069e+02 3 2.837821967046824e+02 -2.822567153069604e+02 -2.935613327724534e+01 -1.303560381865560e+00 4 7.201840640385411e+02 6.491656400686079e+02 -2.358400587388775e+02 2.040306951010725e+02 - ME 1.372807525012575e-03 + ME 2.738639406673165e-04 Event 40 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -324,7 +324,7 @@ Event 40 Batch 0 2 3.080730228651936e+02 -3.065830270999447e+02 -2.484308296331460e+01 1.728167064871203e+01 3 6.842346640746094e+02 4.630487823766367e+02 8.554554725666550e+01 -4.964321303112498e+02 4 5.076923130601962e+02 -1.564657552766919e+02 -6.070246429335075e+01 4.791504596625378e+02 - ME 4.192363154074847e-05 + ME 4.316353181637933e-05 Event 41 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -332,7 +332,7 @@ Event 41 Batch 0 2 1.602650851118221e+02 -1.258781096038287e+02 -9.817642232798531e+01 1.417706342452912e+01 3 7.146392966623014e+02 6.799675591776853e+02 -1.019163870176435e+02 1.948499239342933e+02 4 6.250956182258764e+02 -5.540894495738563e+02 2.000928093456288e+02 -2.090269873588226e+02 - ME 4.523507186168379e-04 + ME 6.118266190948034e-05 Event 42 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -340,7 +340,7 @@ Event 42 Batch 0 2 1.687893235969910e+02 1.289401357197518e+02 4.788693514682045e+01 9.783209393213438e+01 3 7.042017295435162e+02 -1.022058447296739e+02 -6.640064324330017e+02 -2.110675220936915e+02 4 6.270089468594927e+02 -2.673429099007782e+01 6.161194972861812e+02 1.132354281615572e+02 - ME 1.686356189272381e-04 + ME 4.091574289077424e-05 Event 43 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -348,7 +348,7 @@ Event 43 Batch 0 2 4.729783670130408e+02 -7.983817933050123e+01 9.052957805204315e+01 4.573169538528310e+02 3 5.638402597824536e+02 4.785250044669658e+02 7.435095949863268e+01 -2.887933404236804e+02 4 4.631813732045056e+02 -3.986868251364646e+02 -1.648805375506758e+02 -1.685236134291506e+02 - ME 5.938757690519573e-04 + ME 2.654067897204875e-04 Event 44 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -356,7 +356,7 @@ Event 44 Batch 0 2 1.774791104122977e+02 -1.952605982635784e+01 6.371003613266313e+01 1.644949814321787e+02 3 7.194816205691247e+02 -3.678871192485065e+02 2.644831693887214e+01 -6.177486190667772e+02 4 6.030392690185777e+02 3.874131790748646e+02 -9.015835307153536e+01 4.532536376345985e+02 - ME 2.092333697371024e-04 + ME 1.390282437939369e-04 Event 45 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -364,7 +364,7 @@ Event 45 Batch 0 2 7.477488480180839e+02 -3.787655987618923e+02 1.634662296474455e+02 6.236535517992064e+02 3 7.458113398274099e+02 3.819163358711198e+02 -1.661042992235261e+02 -6.186952632673017e+02 4 6.439812154506046e+00 -3.150737109227506e+00 2.638069576080606e+00 -4.958288531904773e+00 - ME 9.377954359926730e-02 + ME 4.591622113024210e-03 Event 46 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -372,7 +372,7 @@ Event 46 Batch 0 2 3.243146757688279e+02 -4.392587631431587e+00 -2.496903827548322e+02 -2.069188895501946e+02 3 5.341608950426614e+02 -2.704482657861201e+02 2.711825143656835e+02 -3.723515022507137e+02 4 6.415244291885106e+02 2.748408534175518e+02 -2.149213161085120e+01 5.792703918009084e+02 - ME 1.879047912263320e-04 + ME 7.845213441237594e-05 Event 47 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -380,7 +380,7 @@ Event 47 Batch 0 2 6.742198761450968e+02 -3.282965096491567e+02 5.301803926793563e+02 -2.563251730900704e+02 3 6.484148720042493e+02 3.527030795571956e+02 -3.975273148506379e+02 3.715029176935211e+02 4 1.773652518506536e+02 -2.440656990803885e+01 -1.326530778287185e+02 -1.151777446034508e+02 - ME 1.136665455996279e-03 + ME 5.254395938575492e-05 Event 48 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -388,7 +388,7 @@ Event 48 Batch 0 2 7.321401810535270e+02 -1.843482647928687e+02 4.412348098999295e+02 5.543976952635381e+02 3 7.293058265076229e+02 2.182722651304250e+02 -4.435200216702997e+02 -5.362221528717154e+02 4 3.855399243885009e+01 -3.392400033755636e+01 2.285211770370227e+00 -1.817554239182278e+01 - ME 2.278442596973106e-03 + ME 2.330290263553363e-04 Event 49 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -396,7 +396,7 @@ Event 49 Batch 0 2 3.511117284856090e+02 -3.272266866652174e+02 5.199533974843238e+01 1.161835877338140e+02 3 7.326526490901410e+02 6.615045961628415e+02 -2.993354007364775e+02 -9.792799058578566e+01 4 4.162356224242500e+02 -3.342779094976241e+02 2.473400609880451e+02 -1.825559714802838e+01 - ME 8.806759903737244e-05 + ME 7.863589115869630e-06 Event 50 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -404,7 +404,7 @@ Event 50 Batch 0 2 7.322170903075255e+02 2.740692406080844e+02 1.952596610981929e+01 -6.787095515302592e+02 3 3.078559130669522e+02 -1.663333363406682e+02 8.625456119089935e+01 2.442716420418760e+02 4 4.599269966255216e+02 -1.077359042674159e+02 -1.057805273007185e+02 4.344379094883832e+02 - ME 7.579426018596712e-05 + ME 6.765758192049922e-05 Event 51 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -412,7 +412,7 @@ Event 51 Batch 0 2 3.473696038265160e+02 -2.922314643158454e+02 -6.759614889845234e+01 -1.752060888796554e+02 3 5.389399151999496e+02 -2.449040872454050e+02 9.346474502284556e+01 4.708954891311219e+02 4 6.136904809735339e+02 5.371355515612503e+02 -2.586859612439322e+01 -2.956894002514666e+02 - ME 4.687828430739845e-04 + ME 2.035652280642710e-04 Event 52 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -420,7 +420,7 @@ Event 52 Batch 0 2 6.818614816439094e+02 5.970116833066725e+02 3.013730734325877e+02 1.329902280423528e+02 3 2.108623144448950e+02 -4.198344769951654e+00 -1.698802183673395e+02 -1.248439063859965e+02 4 6.072762039111957e+02 -5.928133385367207e+02 -1.314928550652483e+02 -8.146321656356344e+00 - ME 1.636869658416981e-04 + ME 4.047005152694340e-05 Event 53 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -428,7 +428,7 @@ Event 53 Batch 0 2 5.157714002491656e+02 -5.140718537651751e+02 -4.182413977701254e+01 1.003899065692042e+00 3 5.148181840855221e+02 2.868792199999327e+02 1.974924151010656e+02 3.791237552236646e+02 4 4.694104156653124e+02 2.271926337652422e+02 -1.556682753240530e+02 -3.801276542893567e+02 - ME 3.182294022992135e-03 + ME 1.547751010871262e-04 Event 54 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -436,7 +436,7 @@ Event 54 Batch 0 2 6.433410767101752e+02 2.586883950027282e+02 -5.809813083922761e+02 9.710187728524583e+01 3 6.928799734080563e+02 -1.579832568796111e+02 6.405510983559769e+02 -2.117031848853746e+02 4 1.637789498817686e+02 -1.007051381231171e+02 -5.956978996370073e+01 1.146013076001288e+02 - ME 3.280140142776471e-05 + ME 1.302720215079095e-05 Event 55 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -444,7 +444,7 @@ Event 55 Batch 0 2 7.193759752058201e+02 -3.536444481659258e+02 -7.212523476050659e+01 -6.222823703878202e+02 3 5.307053661742267e+02 2.409461639849982e+02 1.900944302490854e+02 4.329633233142391e+02 4 2.499186586199529e+02 1.126982841809279e+02 -1.179691954885788e+02 1.893190470735813e+02 - ME 3.939174164528502e-05 + ME 3.087450123310173e-05 Event 56 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -452,7 +452,7 @@ Event 56 Batch 0 2 3.858864959547013e+02 1.815174721437793e+02 3.218581876578407e+02 -1.112074732396182e+02 3 4.484505297447187e+02 -3.244105157450006e+02 2.934585578803474e+02 -9.873079412811623e+01 4 6.656629743005793e+02 1.428930436012212e+02 -6.153167455381879e+02 2.099382673677345e+02 - ME 2.326138625268126e-04 + ME 4.275995533811995e-05 Event 57 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -460,7 +460,7 @@ Event 57 Batch 0 2 5.284589752749192e+02 3.868194647882293e+02 -1.709996888155517e+02 3.168575336559793e+02 3 6.299868555278971e+02 -1.587414880613579e+02 2.327134172236622e+02 -5.634971548731005e+02 4 3.415541691971835e+02 -2.280779767268714e+02 -6.171372840811043e+01 2.466396212171210e+02 - ME 3.474853710074164e-05 + ME 2.211478424702745e-05 Event 58 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -468,7 +468,7 @@ Event 58 Batch 0 2 6.172037319760957e+02 -2.246119436411400e+02 -2.286037628748728e+01 5.744278237820342e+02 3 5.117934503257735e+02 1.262762853074207e+02 3.215736628881853e+02 -3.775939815489577e+02 4 3.710028176981306e+02 9.833565833371921e+01 -2.987132866006979e+02 -1.968338422330765e+02 - ME 6.183305374210038e-04 + ME 1.857727050583390e-04 Event 59 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -476,7 +476,7 @@ Event 59 Batch 0 2 7.388935626701858e+02 -3.912134623809441e+02 -5.457789630286015e+02 3.082872805076099e+02 3 1.936051438730608e+02 1.561492575196544e+02 8.304673385628061e+01 -7.876294246644987e+01 4 5.675012934567535e+02 2.350642048612896e+02 4.627322291723209e+02 -2.295243380411600e+02 - ME 4.116991424436793e-04 + ME 6.745345781245190e-05 Event 60 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -484,7 +484,7 @@ Event 60 Batch 0 2 7.258141426633659e+02 -5.584991156701968e+02 1.635894950857984e+02 4.337319270970709e+02 3 2.789580074371136e+02 2.331554478032953e+02 6.512410160032128e+01 -1.386180308029247e+02 4 4.952278498995201e+02 3.253436678669015e+02 -2.287135966861195e+02 -2.951138962941461e+02 - ME 7.295672680059989e-04 + ME 9.170244877267536e-05 Event 61 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -492,15 +492,15 @@ Event 61 Batch 0 2 5.906141202026897e+02 4.485275282318680e+02 -2.043613424290570e+02 3.253990429020988e+02 3 4.163572165237975e+02 -4.021600557528675e+02 -4.112755461437413e+01 9.964509802161204e+01 4 4.930286632735124e+02 -4.636747247900051e+01 2.454888970434311e+02 -4.250441409237108e+02 - ME 5.845307122272604e-03 + ME 1.836685601489136e-04 Event 62 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 1 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 -7.500000000000000e+02 2 7.346180891175762e+02 3.693463141798367e+02 7.549194961263061e+01 -6.305140780380819e+02 3 4.420621433230785e+02 -2.806743363126464e+02 3.467380983154045e+01 3.397625382625571e+02 - 4 3.233197675593453e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755249e+02 - ME 3.963631774242112e-05 + 4 3.233197675593452e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755248e+02 + ME 3.490896135533686e-05 Event 63 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -508,7 +508,7 @@ Event 63 Batch 0 2 6.451039732729313e+02 -2.415045377667665e+02 1.990362537024482e+02 -5.641092662620230e+02 3 3.260870385294104e+02 2.061141051805976e+02 -2.496695602716584e+02 3.892098426606745e+01 4 5.288089881976584e+02 3.539043258616898e+01 5.063330656921013e+01 5.251882819959555e+02 - ME 4.832224458906289e-04 + ME 4.428689394331114e-04 Event 64 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -516,7 +516,7 @@ Event 64 Batch 0 2 5.275973380665291e+02 -6.064553482667328e+01 4.309976929667101e+02 -2.981980196075213e+02 3 5.799838776791826e+02 3.279821268626862e+02 -1.824214634122377e+02 4.421893627315650e+02 4 3.924187842542880e+02 -2.673365920360130e+02 -2.485762295544724e+02 -1.439913431240437e+02 - ME 2.175617604507715e-04 + ME 4.205989960223865e-05 Event 65 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -524,7 +524,7 @@ Event 65 Batch 0 2 6.480172869826541e+02 2.720879118036237e+02 -5.153900904044360e+02 -2.833154199679406e+02 3 7.075023253568394e+02 -3.440299289242928e+02 4.709796137500282e+02 4.004761563708322e+02 4 1.444803876605064e+02 7.194201712066916e+01 4.441047665440794e+01 -1.171607364028916e+02 - ME 4.989956280474397e-03 + ME 1.103463366798231e-04 Event 66 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -532,7 +532,7 @@ Event 66 Batch 0 2 5.472978185025795e+02 4.857452785131266e+02 -2.223654169683454e+02 -1.189119332799752e+02 3 3.203062148499983e+02 1.169702135976477e+02 2.922172461416276e+02 -5.935588816501102e+01 4 6.323959666474225e+02 -6.027154921107744e+02 -6.985182917328234e+01 1.782678214449862e+02 - ME 1.346850069104626e-04 + ME 2.913920636000223e-05 Event 67 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -540,7 +540,7 @@ Event 67 Batch 0 2 4.264671493042950e+02 1.195959046886511e+02 -2.647539231733031e+02 3.122121220929446e+02 3 5.059969655247565e+02 3.777175441887567e+02 -7.608313561896731e+00 -3.366073372596325e+02 4 5.675358851709483e+02 -4.973134488774080e+02 2.723622367352000e+02 2.439521516668857e+01 - ME 9.763221977220593e-05 + ME 4.009347519102052e-05 Event 68 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -548,7 +548,7 @@ Event 68 Batch 0 2 5.996105691520872e+02 -3.814725562071957e+02 -3.417794545715573e+02 3.117664637712124e+02 3 2.164196744806214e+02 1.292759463548889e+02 -1.184749651041615e+02 1.268419798013013e+02 4 6.839697563672917e+02 2.521966098523068e+02 4.602544196757188e+02 -4.386084435725137e+02 - ME 2.936083529685707e-03 + ME 6.175473672610461e-04 Event 69 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -556,7 +556,7 @@ Event 69 Batch 0 2 4.950546755511076e+02 -1.873718558932053e+02 -4.578972175289678e+02 -1.735101101888631e+01 3 4.768584394819691e+02 -1.830244097668608e+02 2.985566003539791e+02 -3.236664843936508e+02 4 5.280868849669230e+02 3.703962656600661e+02 1.593406171749887e+02 3.410174954125370e+02 - ME 5.234212626720279e-05 + ME 1.367292435278724e-05 Event 70 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -564,7 +564,7 @@ Event 70 Batch 0 2 6.918343395272258e+02 6.895733556028865e+02 -5.391072441382606e+01 -1.473005040127906e+01 3 2.169590284692678e+02 -1.127375202028747e+02 1.807969800614662e+02 4.091361110301506e+01 4 5.912066320035063e+02 -5.768358354000119e+02 -1.268862556476402e+02 -2.618356070173603e+01 - ME 1.591740981760110e-04 + ME 3.526540789264872e-05 Event 71 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -572,7 +572,7 @@ Event 71 Batch 0 2 5.156371334918733e+02 1.547202099034306e+02 -4.807172487652236e+02 1.041836686949964e+02 3 3.718518305526428e+02 -8.969821893462726e+01 -7.521366892975188e+01 -3.529460545344468e+02 4 6.125110359554843e+02 -6.502199096880338e+01 5.559309176949756e+02 2.487623858394504e+02 - ME 1.125100552069616e-04 + ME 2.860782472746935e-05 Event 72 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -580,7 +580,7 @@ Event 72 Batch 0 2 2.110577464974889e+02 5.009520239746097e+01 -1.453533690489527e+02 -1.445968227848547e+02 3 7.317124633441161e+02 -4.429659627226336e+02 5.264774879404380e+02 2.490095170354977e+02 4 5.572297901583943e+02 3.928707603251725e+02 -3.811241188914850e+02 -1.044126942506430e+02 - ME 1.823320413479066e-04 + ME 2.666441446531882e-05 Event 73 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -588,7 +588,7 @@ Event 73 Batch 0 2 3.932257450488246e+02 3.105005764664288e+01 -2.932679039283983e+02 2.601082794045340e+02 3 5.658879124646472e+02 3.645905401293642e+02 4.244364556305355e+02 8.459646951004230e+01 4 5.408863424865281e+02 -3.956405977760074e+02 -1.311685517021372e+02 -3.447047489145762e+02 - ME 8.953763196089171e-04 + ME 7.825486685913998e-05 Event 74 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -596,7 +596,7 @@ Event 74 Batch 0 2 1.374854102925440e+02 7.785209805930555e+01 4.289805712042688e+01 1.048858692406466e+02 3 6.381281910764947e+02 -1.004137270491618e+02 -1.591026937267357e+02 6.097630724433484e+02 4 7.243863986309617e+02 2.256162898985645e+01 1.162046366063089e+02 -7.146489416839951e+02 - ME 1.395531292378326e+01 + ME 1.919068868336380e+00 Event 75 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -604,7 +604,7 @@ Event 75 Batch 0 2 5.936883054156938e+02 -3.438525101293572e+00 -2.706855443967301e+02 5.283780053968293e+02 3 5.912298912592892e+02 1.109657062166288e+02 4.832067437414102e+02 -3.221034603433170e+02 4 3.150818033250173e+02 -1.075271811153352e+02 -2.125211993446803e+02 -2.062745450535123e+02 - ME 1.379908325625592e-03 + ME 1.642862842910461e-04 Event 76 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -612,7 +612,7 @@ Event 76 Batch 0 2 6.619486867997672e+02 2.801967015359571e+01 2.136411519593737e+02 6.258980909300584e+02 3 1.201252731414031e+02 2.274423842261747e+01 -8.754996679960182e+01 7.904292618103446e+01 4 7.179260400588295e+02 -5.076390857621322e+01 -1.260911851597719e+02 -7.049410171110928e+02 - ME 5.870483941147637e+00 + ME 7.362202483972824e-01 Event 77 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -620,7 +620,7 @@ Event 77 Batch 0 2 7.456676259451606e+02 -7.346624001550109e+02 6.511229493320701e+01 -1.097804865615983e+02 3 1.284204120828029e+02 1.251494694834492e+02 2.867183268690428e+01 2.708973588335753e+00 4 6.259119619720373e+02 6.095129306715618e+02 -9.378412762011118e+01 1.070715129732624e+02 - ME 1.662775178233579e-04 + ME 4.400761364703354e-05 Event 78 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -628,7 +628,7 @@ Event 78 Batch 0 2 7.040158920877628e+02 6.911264613612161e+02 -6.659640240533211e+01 -1.163937709034254e+02 3 5.185438503615327e+02 -4.976050220224222e+02 -1.270913363611937e+02 7.158742227342900e+01 4 2.774402575507044e+02 -1.935214393387939e+02 1.936877387665258e+02 4.480634862999637e+01 - ME 5.328004946641866e-05 + ME 9.352750539306009e-06 Event 79 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -636,7 +636,7 @@ Event 79 Batch 0 2 6.777589592768838e+02 1.742725197144059e+02 -4.776543849198212e+01 6.532264221831092e+02 3 5.725002211294488e+02 -1.786302554544233e+02 -1.627852110918317e+02 -5.189881598643107e+02 4 2.497408195936665e+02 4.357735740017474e+00 2.105506495838138e+02 -1.342382623187985e+02 - ME 9.179311580246363e-04 + ME 3.598558866345749e-04 Event 80 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -644,7 +644,7 @@ Event 80 Batch 0 2 6.240819586861880e+02 4.679310297228965e+02 -4.118464023828053e+02 -3.002304821964348e+01 3 6.688675489057649e+02 -5.494372353172420e+02 3.251429131208653e+02 1.994607943266771e+02 4 2.070504924080468e+02 8.150620559434545e+01 8.670348926194001e+01 -1.694377461070337e+02 - ME 3.575286400583300e-03 + ME 5.382869847396148e-05 Event 81 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -652,7 +652,7 @@ Event 81 Batch 0 2 5.198056748722776e+02 1.034797897616987e+02 -2.885605608993972e+02 4.197888462474007e+02 3 5.672098642055398e+02 -4.160331805498524e+02 2.087659545613757e+01 -3.849773895903518e+02 4 4.129844609221831e+02 3.125533907881537e+02 2.676839654432596e+02 -3.481145665704891e+01 - ME 1.018936778946332e-04 + ME 3.612255741613163e-05 Event 82 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -660,7 +660,7 @@ Event 82 Batch 0 2 2.057598609140514e+02 6.385349666266659e+01 -2.765433460911293e+01 1.936364870179372e+02 3 6.235840147705873e+02 4.654039114453895e+02 -3.828889383639962e+02 -1.601633028106901e+02 4 6.706561243153629e+02 -5.292574081080552e+02 4.105432729731107e+02 -3.347318420724690e+01 - ME 6.930850923220120e-04 + ME 3.172622561805068e-04 Event 83 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -668,7 +668,7 @@ Event 83 Batch 0 2 6.583322583736492e+02 1.865539504254553e+02 -1.926584839569474e+02 6.012334775737429e+02 3 3.620902826842561e+02 -3.107067244571256e+02 -1.177956631152976e+01 -1.855584705935048e+02 4 4.795774589420946e+02 1.241527740316703e+02 2.044380502684771e+02 -4.156750069802382e+02 - ME 8.385116111585099e-03 + ME 6.756528802944365e-04 Event 84 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -676,7 +676,7 @@ Event 84 Batch 0 2 4.849329564663161e+02 -2.622178945286150e+02 4.068620488841210e+02 -2.941124332559817e+01 3 4.737588937677760e+02 6.014532316188546e+01 -1.333934272225749e+02 4.505954095412368e+02 4 5.413081497659077e+02 2.020725713667296e+02 -2.734686216615461e+02 -4.211841662156386e+02 - ME 5.162990427398554e-03 + ME 1.017468409980153e-03 Event 85 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -684,7 +684,7 @@ Event 85 Batch 0 2 7.085742632080854e+02 -2.174614026040270e+02 -5.283468657604088e+02 -4.190914152061853e+02 3 5.315764222715953e+02 8.528530557199829e+00 3.820092234108129e+02 3.695533927738615e+02 4 2.598493145203187e+02 2.089328720468272e+02 1.463376423495959e+02 4.953802243232388e+01 - ME 6.335517668355978e-05 + ME 1.894143727100354e-05 Event 86 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -692,7 +692,7 @@ Event 86 Batch 0 2 1.724500140939190e+02 1.231518677708316e+02 -1.121928207497684e+01 1.201946443701656e+02 3 7.028475062724231e+02 -6.467096040851287e+01 -4.553168759141600e+02 -5.315061866629339e+02 4 6.247024796336580e+02 -5.848090736231883e+01 4.665361579891369e+02 4.113115422927684e+02 - ME 1.165531323127631e-04 + ME 5.311384036847167e-05 Event 87 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -700,7 +700,7 @@ Event 87 Batch 0 2 1.942099203196796e+02 -7.751148196958454e+01 -1.356691819650310e+02 -1.153400900745028e+02 3 7.314670447251594e+02 1.724617634710876e+02 7.020747158546045e+02 1.113196793791551e+02 4 5.743230349551606e+02 -9.495028150150301e+01 -5.664055338895735e+02 4.020410695347637e+00 - ME 1.237609879052555e-04 + ME 1.874087134673149e-05 Event 88 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -708,7 +708,7 @@ Event 88 Batch 0 2 6.382497024023744e+02 2.632142028760094e+02 -5.613974181649784e+02 1.513733956108635e+02 3 3.997044228265544e+02 -5.264940326118349e+01 3.435187961344461e+02 1.974500004195773e+02 4 4.620458747710724e+02 -2.105647996148253e+02 2.178786220305324e+02 -3.488233960304407e+02 - ME 1.863821317258467e-03 + ME 9.699609186666195e-05 Event 89 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -716,7 +716,7 @@ Event 89 Batch 0 2 1.419006640093282e+02 -8.677155154367878e+01 6.457545216231642e+01 -9.185046144153740e+01 3 7.131224514048055e+02 5.460003286026870e+02 -4.154556538506974e+02 -1.944836022569670e+02 4 6.449768845858670e+02 -4.592287770590082e+02 3.508802016883808e+02 2.863340636985044e+02 - ME 1.136115495374629e-04 + ME 2.974199953519439e-05 Event 90 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -724,7 +724,7 @@ Event 90 Batch 0 2 5.730615760623938e+02 -6.017783679015001e+01 -5.202921970507185e+02 -2.325386583054727e+02 3 5.389913703864468e+02 -6.302812531165206e+01 2.446311215742109e+02 4.761247390423042e+02 4 3.879470535511588e+02 1.232059621018019e+02 2.756610754765076e+02 -2.435860807368315e+02 - ME 1.094721025518881e-03 + ME 1.667772733247344e-04 Event 91 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -732,7 +732,7 @@ Event 91 Batch 0 2 4.546745139784350e+02 -1.470341619195494e+02 -1.726383255301703e+02 -3.940886669878754e+02 3 5.110976540119647e+02 -2.482119727393537e+02 -1.865817698532448e+02 4.059542728975803e+02 4 5.342278320096005e+02 3.952461346589030e+02 3.592200953834151e+02 -1.186560590970480e+01 - ME 8.789722587847313e-05 + ME 4.420313882846059e-05 Event 92 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -740,7 +740,7 @@ Event 92 Batch 0 2 6.683728375977241e+02 -1.148152650923627e+02 3.458291789782991e+02 5.603051703379153e+02 3 2.872567998557088e+02 1.635098024620329e+02 7.847331657016402e+01 -2.227620976482501e+02 4 5.443703625465666e+02 -4.869453736967034e+01 -4.243024955484631e+02 -3.375430726896653e+02 - ME 8.270083568815311e-04 + ME 2.265252332392545e-04 Event 93 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -748,7 +748,7 @@ Event 93 Batch 0 2 5.666948073002088e+02 5.408074886689032e+01 5.639942928586390e+02 -1.134525653745258e+01 3 6.168025492529713e+02 2.439040545997395e+02 -5.541969602989467e+02 1.175666879272316e+02 4 3.165026434468199e+02 -2.979848034666298e+02 -9.797332559692304e+00 -1.062214313897791e+02 - ME 1.664960428447917e-04 + ME 1.251778043268437e-05 Event 94 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -756,7 +756,7 @@ Event 94 Batch 0 2 4.964349376711385e+02 8.445930034540567e+01 -2.409007074648561e+02 -4.257712097695705e+02 3 5.660980232871289e+02 1.373833465612049e+02 5.210669225216058e+02 1.734417778711397e+02 4 4.374670390417324e+02 -2.218426469066104e+02 -2.801662150567495e+02 2.523294318984307e+02 - ME 3.431641292834382e-05 + ME 1.007141026120618e-05 Event 95 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -764,7 +764,7 @@ Event 95 Batch 0 2 7.117074025057361e+02 -3.227984571262278e+02 4.276971164854593e+02 -4.684055501468919e+02 3 1.264078228725325e+02 8.675876182178401e+01 5.074873328843479e+01 7.665781760618943e+01 4 6.618847746217315e+02 2.360396953044439e+02 -4.784458497738940e+02 3.917477325407025e+02 - ME 2.121249861094822e-04 + ME 8.653822330208906e-05 Event 96 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -772,7 +772,7 @@ Event 96 Batch 0 2 7.329769441659936e+02 -9.642859092211874e+01 6.903981466332597e+02 -2.265107649915406e+02 3 3.937873938465678e+02 -4.837693103302091e+01 -3.847118583018795e+02 6.873841850241256e+01 4 3.732356619874385e+02 1.448055219551397e+02 -3.056862883313802e+02 1.577723464891279e+02 - ME 3.473186069800973e-05 + ME 9.822975749896163e-06 Event 97 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -780,7 +780,7 @@ Event 97 Batch 0 2 3.394989963266853e+01 6.003767577498499e+00 -2.078495220615399e+01 2.616364312804199e+01 3 7.377311980366451e+02 -5.308290258162607e+02 4.681853362634530e+02 2.080152802450354e+02 4 7.283189023306861e+02 5.248252582387622e+02 -4.474003840572991e+02 -2.341789233730774e+02 - ME 2.063600678642283e-02 + ME 2.729355315721549e-03 Event 98 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -788,7 +788,7 @@ Event 98 Batch 0 2 2.496912687496082e+02 -2.485814905959506e+02 -5.435228288348340e-01 -2.350907922099247e+01 3 7.458289852530976e+02 7.373315781279124e+02 9.801365830907572e+01 -5.473885205171283e+01 4 5.044797459972945e+02 -4.887500875319618e+02 -9.747013548024091e+01 7.824793127270530e+01 - ME 6.800308216903296e-05 + ME 8.091578731489026e-06 Event 99 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -796,7 +796,7 @@ Event 99 Batch 0 2 1.698125854886770e+02 8.336002034290719e+01 8.774494220182726e+01 -1.191144253093525e+02 3 6.496622934125946e+02 5.714329899004554e+02 -6.230613627727958e+01 3.027265745152471e+02 4 6.805251210987285e+02 -6.547930102433627e+02 -2.543880592454771e+01 -1.836121492058947e+02 - ME 6.115029137493471e-04 + ME 1.856310681395454e-04 Event 100 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -804,7 +804,7 @@ Event 100 Batch 0 2 6.141460480129781e+02 -5.842473718080511e+02 -5.092222124447417e+01 1.823110095657221e+02 3 3.909476383151783e+02 2.539115798088024e+02 -2.930333502072385e+02 -5.000421191795168e+01 4 4.949063136718440e+02 3.303357919992488e+02 3.439555714517127e+02 -1.323067976477707e+02 - ME 1.550407956048336e-04 + ME 2.380755205932631e-05 Event 101 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -812,7 +812,7 @@ Event 101 Batch 0 2 7.469346538870473e+02 3.524232024688497e+02 -1.488240016505349e+02 -6.415299525912136e+02 3 6.502268999047169e+02 -2.777200960400715e+02 1.351761574712158e+02 5.721835160737410e+02 4 1.028384462082358e+02 -7.470310642877820e+01 1.364784417931910e+01 6.934643651747267e+01 - ME 1.080054053054822e-04 + ME 7.777208667430486e-05 Event 102 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -820,7 +820,7 @@ Event 102 Batch 0 2 7.426790432885583e+02 -3.141071077544728e+02 6.615000409077074e+02 1.238005738162371e+02 3 6.735764515788642e+01 -4.139700837311957e+00 -5.533298776898177e+01 -3.818606686673834e+01 4 6.899633115535552e+02 3.182468085917849e+02 -6.061670531387255e+02 -8.561450694949879e+01 - ME 6.292262541994918e-04 + ME 1.796768498680773e-04 Event 103 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -828,7 +828,7 @@ Event 103 Batch 0 2 4.837874798175253e+02 -2.731724972668680e+02 1.247027290420595e+02 -3.793103501549069e+02 3 4.466406321977809e+02 -2.904538080082218e+02 -1.536665846758871e+02 3.025078850172422e+02 4 5.695718879846930e+02 5.636263052750895e+02 2.896385563382777e+01 7.680246513766473e+01 - ME 8.140894767450013e-05 + ME 2.998858312831636e-05 Event 104 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -836,7 +836,7 @@ Event 104 Batch 0 2 5.788466572679498e+02 3.572346730226224e+02 -3.682137844992378e+02 2.680773207965347e+02 3 2.925711988065158e+02 2.155069407513812e+02 1.697995838195863e+02 -1.016010147279926e+02 4 6.285821439255348e+02 -5.727416137740034e+02 1.984142006796517e+02 -1.664763060685422e+02 - ME 2.849770726480251e-04 + ME 7.634200862908681e-05 Event 105 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -844,7 +844,7 @@ Event 105 Batch 0 2 3.361125455083114e+02 2.619004058447622e+02 4.338373361330959e+01 -2.061496357605196e+02 3 5.299016201311088e+02 2.892532450564946e+02 2.091058919093095e+02 3.916669672191841e+02 4 6.339858343605800e+02 -5.511536509012568e+02 -2.524896255226191e+02 -1.855173314586645e+02 - ME 2.866662317167052e-04 + ME 1.089382545947932e-04 Event 106 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -852,7 +852,7 @@ Event 106 Batch 0 2 3.578050478863485e+02 -2.265838270225943e+02 2.740910124726658e+02 -3.947579646386072e+01 3 5.202885196186892e+02 1.412729374205232e+02 1.631578432376887e+02 4.734148487210871e+02 4 6.219064324949621e+02 8.531088960207101e+01 -4.372488557103545e+02 -4.339390522572265e+02 - ME 1.912263829178338e-03 + ME 4.548955126640399e-04 Event 107 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -860,7 +860,7 @@ Event 107 Batch 0 2 5.409822745993889e+02 9.278463733038997e+01 5.102180459532771e+02 -1.540466750365499e+02 3 2.501852297905710e+02 1.682301834486207e+02 1.474652503315489e+02 1.120056004263085e+02 4 7.088324956100398e+02 -2.610148207790107e+02 -6.576832962848259e+02 4.204107461024153e+01 - ME 7.096163321035572e-04 + ME 2.159102073406285e-04 Event 108 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -868,7 +868,7 @@ Event 108 Batch 0 2 6.835202199428555e+02 6.670011709444186e+02 6.653656309718588e+01 1.337243986739828e+02 3 2.377887385005082e+02 -1.098327419601477e+02 7.667443498831059e+01 -1.964720946353502e+02 4 5.786910415566365e+02 -5.571684289842709e+02 -1.432109980854965e+02 6.274769596136723e+01 - ME 1.143500637563713e-04 + ME 2.960130886583330e-05 Event 109 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -876,7 +876,7 @@ Event 109 Batch 0 2 5.978180281189351e+02 4.291222314737005e+02 2.249703559956599e+02 3.501840146583366e+02 3 3.585061336071061e+02 -3.227227650115256e+02 1.541688059097761e+02 2.467071262824850e+01 4 5.436758382739589e+02 -1.063994664621746e+02 -3.791391619054360e+02 -3.748547272865851e+02 - ME 1.159187207430584e-03 + ME 1.100286424576873e-04 Event 110 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -884,7 +884,7 @@ Event 110 Batch 0 2 7.073952645543156e+01 -4.753982451958468e+01 4.872856968801237e+01 -1.922426029646691e+01 3 7.438039776014969e+02 1.707202332282495e+02 -7.225114374584515e+02 4.556513803361385e+01 4 6.854564959430718e+02 -1.231804087086648e+02 6.737828677704391e+02 -2.634087773714689e+01 - ME 5.177444310012934e-04 + ME 1.052942530962122e-04 Event 111 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -892,7 +892,7 @@ Event 111 Batch 0 2 5.206822291802364e+02 -3.873336848644893e+02 2.415505427333673e+02 -2.504714268307115e+02 3 5.478000561519707e+02 4.687653961676166e+02 -2.245690260344170e+02 -1.729527606656598e+02 4 4.315177146677929e+02 -8.143171130312743e+01 -1.698151669895031e+01 4.234241874963712e+02 - ME 1.041517236520828e-04 + ME 8.545692640795734e-05 Event 112 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -900,7 +900,7 @@ Event 112 Batch 0 2 3.610471238372959e+02 2.563298943277285e+02 9.635756626046441e+01 -2.352981732387216e+02 3 6.139063356201009e+02 1.031778254919422e+02 -4.257030126280926e+02 4.301305270271111e+02 4 5.250465405426031e+02 -3.595077198196707e+02 3.293454463676283e+02 -1.948323537883896e+02 - ME 2.333567140730066e-04 + ME 5.572029836371622e-05 Event 113 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -908,7 +908,7 @@ Event 113 Batch 0 2 5.886653054136124e+02 3.035646198144377e+02 3.278619896967805e+02 -3.832517176826292e+02 3 5.420023902452333e+02 -3.658357535838290e+02 -3.990519958595696e+02 2.623541560166928e+01 4 3.693323043411537e+02 6.227113376939163e+01 7.119000616278893e+01 3.570163020809600e+02 - ME 6.906402420910258e-05 + ME 4.986188449478774e-05 Event 114 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -916,7 +916,7 @@ Event 114 Batch 0 2 5.165204340356855e+02 2.346362244736889e+01 6.298471388966840e+00 5.159487827839334e+02 3 5.932916594323345e+02 3.608814360715946e+02 -5.336137507463695e+01 -4.678804824963537e+02 4 3.901879065319798e+02 -3.843450585189634e+02 4.706290368567026e+01 -4.806830028757967e+01 - ME 5.363382776736297e-04 + ME 4.029549711869195e-04 Event 115 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -924,7 +924,7 @@ Event 115 Batch 0 2 5.432307281524777e+02 2.250327918244370e+02 4.870559856477670e+02 -8.506664127290338e+01 3 4.265243530840496e+02 2.057819224248363e+02 -2.472237669715339e+02 2.801021835354204e+02 4 5.302449187634726e+02 -4.308147142492733e+02 -2.398322186762331e+02 -1.950355422625171e+02 - ME 2.364149932043149e-04 + ME 4.159321993514108e-05 Event 116 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -932,7 +932,7 @@ Event 116 Batch 0 2 4.402635748890415e+02 -4.240500842615081e+02 -5.733358735035193e+01 -1.035683405941509e+02 3 4.399967684638562e+02 1.183617589007452e+02 -1.041572505293867e+02 -4.107784286579766e+02 4 6.197396566471035e+02 3.056883253607625e+02 1.614908378797388e+02 5.143467692521278e+02 - ME 1.343295643586522e-04 + ME 4.172733678506819e-05 Event 117 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -940,7 +940,7 @@ Event 117 Batch 0 2 3.074085311587982e+02 -4.270248480828711e+01 -3.034838508096459e+02 2.395944736750828e+01 3 5.360984061023379e+02 3.510554986169303e+02 -1.596589010508530e+02 -3.723849798683070e+02 4 6.564930627388640e+02 -3.083530138086433e+02 4.631427518604987e+02 3.484255325007987e+02 - ME 1.795895763168496e-04 + ME 4.142391000026985e-05 Event 118 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -948,7 +948,7 @@ Event 118 Batch 0 2 5.403602961735903e+02 4.471526113902045e+02 -1.804334130868151e+02 -2.439007487679592e+02 3 5.654623567965698e+02 -5.534570111367966e+02 -1.157195831079003e+02 6.480112868522320e+00 4 3.941773470298406e+02 1.063043997465919e+02 2.961529961947150e+02 2.374206358994370e+02 - ME 3.055618730902428e-05 + ME 7.288650603673961e-06 Event 119 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -956,7 +956,7 @@ Event 119 Batch 0 2 8.009099446659010e+01 5.775399043490319e+01 -2.629604726664823e+01 4.886268393818209e+01 3 7.131140611332349e+02 2.472685400460709e+02 -2.870014097539109e+02 -6.041689532644716e+02 4 7.067949444001758e+02 -3.050225304809738e+02 3.132974570205592e+02 5.553062693262896e+02 - ME 6.861262467765907e-04 + ME 2.815424392761942e-04 Event 120 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -964,7 +964,7 @@ Event 120 Batch 0 2 5.007248873753321e+02 2.708997263130530e+02 -3.880896283797751e+02 1.634784128397387e+02 3 7.413897277398672e+02 -4.257033276374029e+02 5.921425482134987e+02 -1.334264135464211e+02 4 2.578853848848011e+02 1.548036013243502e+02 -2.040529198337238e+02 -3.005199929331748e+01 - ME 1.034513276694145e-04 + ME 6.003662532288496e-06 Event 121 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -972,7 +972,7 @@ Event 121 Batch 0 2 5.732265116821120e+02 -1.149395375629033e+02 4.260916136383032e+02 3.658189076403451e+02 3 4.323948798659248e+02 -2.148488009071912e+01 -4.178027098651986e+02 1.092914804138530e+02 4 4.943786084519640e+02 1.364244176536226e+02 -8.288903773105691e+00 -4.751103880541979e+02 - ME 8.074833733477824e-02 + ME 7.661241871407340e-04 Event 122 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -980,7 +980,7 @@ Event 122 Batch 0 2 3.423360304412701e+02 2.648046119434483e+02 2.369247279710451e+01 -2.156644197927059e+02 3 6.059487982275789e+02 2.457729689670163e+01 -4.569077875801422e+02 3.972469964635579e+02 4 5.517151713311508e+02 -2.893819088401499e+02 4.332153147830377e+02 -1.815825766708520e+02 - ME 2.180123533398812e-04 + ME 5.274300345459390e-05 Event 123 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -988,7 +988,7 @@ Event 123 Batch 0 2 1.430133297276668e+02 -4.205671322284506e+01 3.498095937953869e+01 1.321377229770999e+02 3 7.140350670908600e+02 -2.955397919833849e+01 -6.570980288365154e+02 -2.778395577453968e+02 4 6.429516031814733e+02 7.161069242118367e+01 6.221170694569771e+02 1.457018347682969e+02 - ME 5.626335206455025e-04 + ME 2.698780233597045e-04 Event 124 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -996,7 +996,7 @@ Event 124 Batch 0 2 6.053457283343441e+02 5.458657819531910e+02 -1.853964251366731e+01 -2.610177782464909e+02 3 7.499633671623128e+02 -6.784114238502394e+02 2.145325921506613e+01 3.189713933003628e+02 4 1.446909045033435e+02 1.325456418970486e+02 -2.913616701398675e+00 -5.795361505387172e+01 - ME 4.169465060943616e-04 + ME 2.629538535113942e-05 Event 125 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1004,7 +1004,7 @@ Event 125 Batch 0 2 6.695439244882118e+02 9.058534244088493e+01 6.586171675820721e+02 7.941529525294386e+01 3 9.341516463500346e+01 3.490868167113007e+01 5.232133368429144e+01 6.906703243419068e+01 4 7.370409108767834e+02 -1.254940241120154e+02 -7.109385012663632e+02 -1.484823276871337e+02 - ME 1.111472366347957e-02 + ME 4.436636984625360e-03 Event 126 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1012,7 +1012,7 @@ Event 126 Batch 0 2 6.465564354211967e+02 -2.094351601488127e+02 -1.930091683601272e+02 -5.804477571728034e+02 3 1.356182567235447e+02 -2.832094442380729e+01 9.735247446175231e+01 -9.007070211700794e+01 4 7.178253078552584e+02 2.377561045726200e+02 9.565669389837488e+01 6.705184592898115e+02 - ME 1.775660879411100e-03 + ME 1.230970446288030e-03 Event 127 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1020,7 +1020,7 @@ Event 127 Batch 0 2 4.508388003927651e+02 -3.846405138087858e+02 7.756355374444065e+01 2.220162025777267e+02 3 6.162879941073576e+02 2.174727303224461e+02 1.334711143222092e+02 -5.609830344035003e+02 4 4.328732054998774e+02 1.671677834863399e+02 -2.110346680666500e+02 3.389668318257735e+02 - ME 3.922171581774212e-05 + ME 2.127227557837123e-05 Event 128 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1028,7 +1028,7 @@ Event 128 Batch 0 2 7.468963146802857e+02 5.701805835528932e+02 -3.440982003215339e+02 -3.381488363986430e+02 3 1.196664332518719e+02 -9.337643239636876e+01 2.398139841985228e+01 7.089280393650260e+01 4 6.334372520678420e+02 -4.768041511565244e+02 3.201168019016817e+02 2.672560324621404e+02 - ME 2.053620454072734e-04 + ME 7.842790653965437e-05 Event 129 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1036,7 +1036,7 @@ Event 129 Batch 0 2 4.378966182438207e+02 -4.256397208622688e+02 4.624364030548149e+01 9.190104474357973e+01 3 7.127537996732577e+02 5.790589826349546e+02 -1.369827771626340e+02 -3.923574802896586e+02 4 3.493495820829217e+02 -1.534192617726859e+02 9.073913685715252e+01 3.004564355460789e+02 - ME 1.668072874757384e-05 + ME 1.046217618618756e-05 Event 130 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1044,7 +1044,7 @@ Event 130 Batch 0 2 6.322026526626455e+02 5.905875735566585e+02 -2.387291116192753e+01 -2.243136110600485e+02 3 5.268087771404591e+02 -3.287250458747471e+02 1.913681034684307e+02 3.644798771698754e+02 4 3.409885701968954e+02 -2.618625276819114e+02 -1.674951923065032e+02 -1.401662661098267e+02 - ME 2.766647151388132e-04 + ME 3.412796728096272e-05 Event 131 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1052,7 +1052,7 @@ Event 131 Batch 0 2 2.691964685177017e+02 -2.641651354044939e+02 4.065264362900757e+01 -3.210735842607325e+01 3 5.382709487855662e+02 -3.022535437819008e+02 -4.307865739991411e+02 1.131429946566680e+02 4 6.925325826967319e+02 5.664186791863947e+02 3.901339303701337e+02 -8.103563623059465e+01 - ME 5.354423766199649e-04 + ME 1.516502654737588e-04 Event 132 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1060,7 +1060,7 @@ Event 132 Batch 0 2 1.376388194981169e+02 -2.491804956023667e+01 3.114513197621116e+01 1.317327453336230e+02 3 7.332494677489981e+02 -3.054807357444667e+02 -6.882601889638243e+00 -6.665500220046781e+02 4 6.291117127528858e+02 3.303987853047034e+02 -2.426253008657308e+01 5.348172766710551e+02 - ME 3.625143788027957e-04 + ME 2.459616839911958e-04 Event 133 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1068,7 +1068,7 @@ Event 133 Batch 0 2 5.818916885738672e+02 -3.437736592641007e+02 -2.113522447259726e+02 -4.192228966514222e+02 3 7.075583625851592e+02 3.695171106849944e+02 9.875952986414086e+01 5.952667441040354e+02 4 2.105499488409736e+02 -2.574345142089370e+01 1.125927148618317e+02 -1.760438474526132e+02 - ME 6.644965721204062e-03 + ME 3.278402967978973e-04 Event 134 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1076,7 +1076,7 @@ Event 134 Batch 0 2 7.039051474789593e+02 -1.767404282002263e+02 5.832845063404937e+02 3.521710697233707e+02 3 6.740856043500099e+02 9.540039380435479e+01 -5.203258634262522e+02 -4.177932056695244e+02 4 1.220092481710302e+02 8.134003439587134e+01 -6.295864291424151e+01 6.562213594615410e+01 - ME 6.394436352069354e-05 + ME 3.621089826286842e-05 Event 135 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1084,7 +1084,7 @@ Event 135 Batch 0 2 7.491379873081086e+02 -6.603965492909807e+02 -9.243924572685610e+01 -3.413782470545817e+02 3 4.360367703469753e+02 3.763875731093294e+02 3.833030381995060e+01 2.167746473012021e+02 4 3.148252423449159e+02 2.840089761816513e+02 5.410894190690560e+01 1.246035997533796e+02 - ME 3.729096801849378e-05 + ME 1.170602675185252e-05 Event 136 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1092,7 +1092,7 @@ Event 136 Batch 0 2 6.907976432034611e+02 -8.965778913807024e+01 -5.375684903631193e+02 -4.244796613161184e+02 3 4.317447428217263e+02 2.541758793770707e+02 2.501815833403360e+02 2.433255445990286e+02 4 3.774576139748129e+02 -1.645180902390004e+02 2.873869070227833e+02 1.811541167170898e+02 - ME 3.295715598818487e-05 + ME 1.221598515374744e-05 Event 137 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1100,7 +1100,7 @@ Event 137 Batch 0 2 5.927917878715718e+02 -5.453882061843875e+02 -2.239274061847312e+02 6.172783069514800e+01 3 3.718333194205911e+02 2.859809174201715e+02 -2.363544177495510e+02 2.472896101988843e+01 4 5.353748927078371e+02 2.594072887642160e+02 4.602818239342820e+02 -8.645679171503701e+01 - ME 1.267334233155001e-04 + ME 2.222722395048600e-05 Event 138 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1108,7 +1108,7 @@ Event 138 Batch 0 2 1.164849493482387e+02 2.012854405109472e+01 -2.573298799707043e+01 -1.118096528381494e+02 3 7.481698498358139e+02 -1.044692284663333e+02 -4.003634472873074e+00 7.408294509656059e+02 4 6.353452008159477e+02 8.434068441523856e+01 2.973662246994375e+01 -6.290197981274564e+02 - ME 3.545594402685597e+00 + ME 1.183014588836486e-01 Event 139 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1116,7 +1116,7 @@ Event 139 Batch 0 2 3.415587822283577e+02 -2.468214832259765e+02 1.926082427237748e+02 1.365416492148350e+02 3 5.828887331044928e+02 -1.023403009989268e+02 -5.561813319045077e+02 1.412376154306548e+02 4 5.755524846671491e+02 3.491617842249035e+02 3.635730891807333e+02 -2.777792646454897e+02 - ME 4.142320485322521e-04 + ME 5.213154494000113e-05 Event 140 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1124,7 +1124,7 @@ Event 140 Batch 0 2 4.395392082109443e+02 -3.037880820376849e+02 -2.455930383243060e+02 -2.014735126343029e+02 3 4.709796125547878e+02 -2.826270024952004e+02 2.984919122515593e+02 2.298833426397907e+02 4 5.894811792342680e+02 5.864150845328855e+02 -5.289887392725340e+01 -2.840983000548780e+01 - ME 1.220048440917972e-04 + ME 2.990357782498624e-05 Event 141 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1132,7 +1132,7 @@ Event 141 Batch 0 2 3.025838986653694e+02 -2.680006525137058e+02 -6.218827689980458e+01 -1.259574698062632e+02 3 5.104624598690772e+02 -2.829910827131053e+02 4.173533268753467e+02 -7.939880721102661e+01 4 6.869536414655528e+02 5.509917352268112e+02 -3.551650499755422e+02 2.053562770172896e+02 - ME 3.735313583347012e-04 + ME 7.151804808113674e-05 Event 142 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1140,7 +1140,7 @@ Event 142 Batch 0 2 4.390011511178412e+02 -3.153925512561953e+02 3.992377088505197e+01 -3.027468279160259e+02 3 4.597282536099518e+02 2.984856708041211e+02 -2.221794712617382e+02 -2.699863960308454e+02 4 6.012705952722066e+02 1.690688045207421e+01 1.822557003766862e+02 5.727332239468712e+02 - ME 1.630913878361870e-04 + ME 8.945447985744934e-05 Event 143 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1148,7 +1148,7 @@ Event 143 Batch 0 2 7.103308443495001e+02 -3.626595603160224e+02 2.462759922459802e+02 5.589240443825270e+02 3 3.424564807343295e+02 4.507572778536915e+01 -2.357842367637252e+02 -2.442343416788665e+02 4 4.472126749161695e+02 3.175838325306533e+02 -1.049175548225529e+01 -3.146897027036604e+02 - ME 1.304325296055160e-03 + ME 1.789392510542836e-04 Event 144 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1156,7 +1156,7 @@ Event 144 Batch 0 2 6.893886390440568e+02 -2.470805413393656e+02 1.331686162420120e+02 6.296618309717105e+02 3 7.132719020730987e+02 2.482972988978650e+02 -2.304803220538649e+02 -6.276815106349294e+02 4 9.733945888284487e+01 -1.216757558499225e+00 9.731170581185302e+01 -1.980320336781234e+00 - ME 3.769348793094523e-04 + ME 1.486904409371019e-04 Event 145 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1164,7 +1164,7 @@ Event 145 Batch 0 2 3.784954309743686e+02 2.391836032855264e+02 1.115572896135236e+01 -2.931305935912622e+02 3 7.389406222827198e+02 -4.231861417520660e+02 1.513250860114713e+02 5.865555822189353e+02 4 3.825639467429113e+02 1.840025384665394e+02 -1.624808149728234e+02 -2.934249886276727e+02 - ME 2.193982780219728e-03 + ME 2.016505354100400e-04 Event 146 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1172,7 +1172,7 @@ Event 146 Batch 0 2 4.681255842987410e+02 -3.253195724522379e+01 1.754808059398437e+02 -4.327698247100133e+02 3 2.875849079819393e+02 2.091841587061404e+01 1.879781824316579e+02 -2.166372592748876e+02 4 7.442895077193195e+02 1.161354137460973e+01 -3.634589883715017e+02 6.494070839849006e+02 - ME 5.347932692815789e-02 + ME 1.210467216316050e-02 Event 147 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1180,7 +1180,7 @@ Event 147 Batch 0 2 2.442136391928777e+02 -1.784444843977844e+02 -1.666832492802189e+02 -3.816014311599316e+00 3 5.551361515401285e+02 1.378338123621512e+02 -5.199472642306259e+02 1.372327560591401e+02 4 7.006502092669938e+02 4.061067203563306e+01 6.866305135108448e+02 -1.334167417475408e+02 - ME 7.450632204513606e-04 + ME 2.360352365747709e-04 Event 148 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1188,7 +1188,7 @@ Event 148 Batch 0 2 4.547263863263726e+02 3.928375677411887e+02 5.145105706241225e+01 2.231759855356057e+02 3 7.397285466814292e+02 -5.611511356388266e+02 -1.533645573573770e+02 -4.569322031694095e+02 4 3.055450669921979e+02 1.683135678976379e+02 1.019135002949646e+02 2.337562176338038e+02 - ME 1.440225905683450e-05 + ME 6.307552439231181e-06 Event 149 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1196,7 +1196,7 @@ Event 149 Batch 0 2 2.343018799311635e+02 9.853424545130945e+01 1.924850318874441e+02 -9.021023174733594e+01 3 7.291173748950658e+02 3.429747374294529e+01 -5.990516617369192e+02 4.142136359886766e+02 4 5.365807451737705e+02 -1.328317191942547e+02 4.065666298494750e+02 -3.240034042413406e+02 - ME 8.405553848068603e-04 + ME 8.298171355094406e-05 Event 150 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1204,7 +1204,7 @@ Event 150 Batch 0 2 4.707648023587808e+02 -8.969278865174961e+01 -3.008719699078221e+02 3.507859183712497e+02 3 6.876639918976698e+02 3.906111988928598e+02 4.609284537794546e+02 -3.284046551871671e+02 4 3.415712057435500e+02 -3.009184102411105e+02 -1.600564838716325e+02 -2.238126318408256e+01 - ME 1.070125715137075e-04 + ME 1.887585788236135e-05 Event 151 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1212,7 +1212,7 @@ Event 151 Batch 0 2 6.503034458278056e+02 -1.575298496674962e+02 -3.658248853789647e+01 -6.298735108350154e+02 3 6.998690336552314e+02 1.302751858829802e+02 -1.019415103826456e+02 6.800389464387812e+02 4 1.498275205169629e+02 2.725466378451580e+01 1.385239989205421e+02 -5.016543560376590e+01 - ME 6.663776898009472e-04 + ME 4.060174493404880e-04 Event 152 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1220,7 +1220,7 @@ Event 152 Batch 0 2 7.401192382353395e+02 1.493701961830190e+02 6.288419447382046e+02 3.605867993093739e+02 3 7.332111095478891e+02 -1.230079111936445e+02 -6.287602831147091e+02 -3.565502647954901e+02 4 2.666965221677112e+01 -2.636228498937447e+01 -8.166162349550861e-02 -4.036534513883709e+00 - ME 8.446403371723604e-04 + ME 1.210964379505254e-04 Event 153 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1228,7 +1228,7 @@ Event 153 Batch 0 2 5.645797071775899e+02 7.941901905692946e+01 3.691428696980725e+02 -4.197337333594241e+02 3 6.079979027943974e+02 1.021455738177839e+02 -5.566920170809548e+02 2.220849604771994e+02 4 3.274223900280123e+02 -1.815645928747133e+02 1.875491473828823e+02 1.976487728822249e+02 - ME 2.846663840296023e-05 + ME 9.895323747190810e-06 Event 154 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1236,7 +1236,7 @@ Event 154 Batch 0 2 6.022174885419887e+02 -5.152457849782368e+02 -1.493252664732707e+02 -2.736597328082223e+02 3 3.617627670199851e+02 1.925398333816265e+02 -2.626238171638091e+02 1.575736108034646e+02 4 5.360197444380261e+02 3.227059515966102e+02 4.119490836370796e+02 1.160861220047577e+02 - ME 6.437319974597944e-05 + ME 1.660411512586943e-05 Event 155 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1244,7 +1244,7 @@ Event 155 Batch 0 2 6.202229507100907e+02 -2.107861924791831e+02 -3.212541876154504e+02 4.868690137883067e+02 3 2.943040328093193e+02 2.940980302320592e+02 1.073731199058907e+01 2.433613089266508e+00 4 5.854730164805898e+02 -8.331183775287627e+01 3.105168756248616e+02 -4.893026268775732e+02 - ME 5.904510654775639e-03 + ME 4.918845171174253e-04 Event 156 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1252,7 +1252,7 @@ Event 156 Batch 0 2 4.945486805149833e+02 4.540818864859257e+02 -1.431706201593249e+02 -1.337542944644701e+02 3 5.997303202813281e+02 -3.624214233270367e+02 -5.726286247273350e+01 4.743923835389624e+02 4 4.057209992036886e+02 -9.166046315888883e+01 2.004334826320584e+02 -3.406380890744924e+02 - ME 4.701306652347430e-03 + ME 1.986837824231628e-04 Event 157 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1260,7 +1260,7 @@ Event 157 Batch 0 2 4.617003083190191e+02 3.118400043328062e+02 3.404502064148864e+02 -4.079626411035589e+00 3 5.720097526413113e+02 -4.999240316044806e+01 -4.329264075474301e+02 -3.705005295422582e+02 4 4.662899390396696e+02 -2.618476011723578e+02 9.247620113254365e+01 3.745801559532937e+02 - ME 3.907978340087068e-05 + ME 1.403598809900552e-05 Event 158 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1268,7 +1268,7 @@ Event 158 Batch 0 2 6.784877363061535e+02 -5.707102180762959e+02 -3.102223423027389e+02 -1.959529373021938e+02 3 5.650909444059712e+02 5.525284805868615e+02 7.765167789879932e+01 8.950011457818250e+01 4 2.564213192878751e+02 1.818173748943443e+01 2.325706644039396e+02 1.064528227240114e+02 - ME 3.503179830087694e-05 + ME 8.470133063482862e-06 Event 159 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1276,7 +1276,7 @@ Event 159 Batch 0 2 5.369491563274252e+02 2.154713482252002e+02 -2.912667909729743e+02 3.962955349875316e+02 3 6.066564496499102e+02 -4.020061311781470e+01 5.572389608252350e+02 -2.364332868806716e+02 4 3.563943940226648e+02 -1.752707351073854e+02 -2.659721698522608e+02 -1.598622481068599e+02 - ME 3.198473025834927e-04 + ME 3.562393617300492e-05 Event 160 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1284,7 +1284,7 @@ Event 160 Batch 0 2 6.492474755438517e+02 3.490068395973682e+02 1.460348644657111e+02 -5.276270735801970e+02 3 2.857818814470013e+02 -2.550253586192556e+02 1.227259509083862e+02 3.964456076362119e+01 4 5.649706430091471e+02 -9.398148097811273e+01 -2.687608153740973e+02 4.879825128165764e+02 - ME 6.719464076924620e-05 + ME 3.516238941302227e-05 Event 161 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1292,7 +1292,7 @@ Event 161 Batch 0 2 6.770282049439580e+02 -2.863253153105184e+02 -4.911270786072976e+02 -3.676672364525180e+02 3 1.598243093356544e+02 -7.505362471426160e+01 1.299195075310522e+02 -5.506073768810752e+01 4 6.631474857203874e+02 3.613789400247800e+02 3.612075710762453e+02 4.227279741406256e+02 - ME 1.577168105051119e-04 + ME 5.970757951131334e-05 Event 162 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1300,7 +1300,7 @@ Event 162 Batch 0 2 5.178592782584632e+02 -3.271131571456631e+02 3.943743741889439e+02 -7.512700901574514e+01 3 3.730686930366258e+02 -2.885924195736573e+01 -1.360208443078026e+02 -3.461874113706257e+02 4 6.090720287049110e+02 3.559723991030290e+02 -2.583535298811414e+02 4.213144203863710e+02 - ME 1.031749267713353e-04 + ME 2.768303103320498e-05 Event 163 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1308,7 +1308,7 @@ Event 163 Batch 0 2 5.388642316037673e+02 3.152159924116781e+02 3.539969933522669e+01 -4.356149670486711e+02 3 5.364171791816749e+02 -5.299694218906361e+02 3.369785517714305e+01 7.576448071880543e+01 4 4.247185892145582e+02 2.147534294789580e+02 -6.909755451236977e+01 3.598504863298658e+02 - ME 3.508094027565679e-05 + ME 1.485600561394433e-05 Event 164 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1316,7 +1316,7 @@ Event 164 Batch 0 2 6.862697092177667e+02 4.132218376422068e+02 1.310202162324327e+02 -5.320221138485150e+02 3 4.476895523579005e+02 -2.769046850483522e+02 1.374187337517142e+02 3.238299280529301e+02 4 3.660407384243329e+02 -1.363171525938544e+02 -2.684389499841469e+02 2.081921857955847e+02 - ME 3.375894779915149e-05 + ME 1.755563256840939e-05 Event 165 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1324,7 +1324,7 @@ Event 165 Batch 0 2 2.382444910715278e+02 -2.158277263671036e+02 -9.471372817531817e+00 -1.004446273032522e+02 3 7.304591383576048e+02 4.619003715882296e+02 -1.223345688256177e+02 5.524969256086772e+02 4 5.312963705708673e+02 -2.460726452211260e+02 1.318059416431495e+02 -4.520522983054250e+02 - ME 6.966498968932957e-03 + ME 4.549138184301779e-04 Event 166 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1332,7 +1332,7 @@ Event 166 Batch 0 2 2.131352071380649e+02 -7.633553084455029e+01 -1.899581415396244e+02 5.929087379418958e+01 3 7.305557876753161e+02 8.980971292745940e+01 7.136333043711877e+02 1.279589045828712e+02 4 5.563090051866194e+02 -1.347418208290915e+01 -5.236751628315633e+02 -1.872497783770607e+02 - ME 3.314006956523505e-04 + ME 3.352199959657985e-05 Event 167 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1340,7 +1340,7 @@ Event 167 Batch 0 2 4.122964103002419e+02 -3.405127102276982e+02 6.366431608201744e+01 2.235761145061386e+02 3 4.697083356610920e+02 -2.521100678451879e+02 -2.856113063438232e+01 -3.952855880214881e+02 4 6.179952540386658e+02 5.926227780728861e+02 -3.510318544763516e+01 1.717094735153495e+02 - ME 1.146777177775239e-04 + ME 3.829535931496594e-05 Event 168 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1348,7 +1348,7 @@ Event 168 Batch 0 2 7.156643283953484e+02 -3.999734570317170e+02 4.816586825103861e+02 3.467009924560655e+02 3 6.192344221355605e+02 2.722545660880235e+02 -4.999454120042317e+02 -2.436869012025525e+02 4 1.651012494690919e+02 1.277188909436936e+02 1.828672949384504e+01 -1.030140912535133e+02 - ME 1.017624049822302e-03 + ME 5.027887292283473e-05 Event 169 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1356,7 +1356,7 @@ Event 169 Batch 0 2 3.626022684949455e+02 7.511110909567982e+01 -2.030941161665286e+02 -2.908461902563517e+02 3 5.580565590514408e+02 -2.529981754432838e+02 -3.439969378312538e+02 3.592842232626199e+02 4 5.793411724536141e+02 1.778870663476037e+02 5.470910539977822e+02 -6.843803300626824e+01 - ME 1.371698416063432e-04 + ME 4.350242525242475e-05 Event 170 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1364,7 +1364,7 @@ Event 170 Batch 0 2 6.602909342483501e+02 4.699653539595539e+02 -3.020118498241596e+02 3.520021683086903e+02 3 1.039297502933440e+02 3.247420585022842e+01 -9.851348423194945e+01 6.473976746580508e+00 4 7.357793154583061e+02 -5.024395598097824e+02 4.005253340561092e+02 -3.584761450552709e+02 - ME 1.673719496447659e-02 + ME 9.967260301798612e-03 Event 171 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1372,7 +1372,7 @@ Event 171 Batch 0 2 1.506693011949600e+02 -3.657300520509282e+01 -1.244227366169959e+02 -7.669834565089053e+01 3 6.344013325830570e+02 -2.026333084464634e+02 -4.956100871165362e+02 3.402578943089165e+02 4 7.149293662219835e+02 2.392063136515561e+02 6.200328237335323e+02 -2.635595486580261e+02 - ME 2.133207113512388e-03 + ME 9.157902172934166e-04 Event 172 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1380,7 +1380,7 @@ Event 172 Batch 0 2 5.352445157558213e+02 -2.018352690102651e+02 3.892440882325296e+02 -3.069825004886504e+02 3 6.716112180685394e+02 2.825227203806547e+02 -5.978593235713698e+02 1.175022124175027e+02 4 2.931442661756383e+02 -8.068745137038898e+01 2.086152353388391e+02 1.894802880711483e+02 - ME 2.630379932615259e-05 + ME 8.067092159940342e-06 Event 173 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1388,7 +1388,7 @@ Event 173 Batch 0 2 6.571348515648592e+02 -2.769863586381786e+02 5.805753619381593e+02 1.343019708712704e+02 3 5.332990408103321e+02 1.871824832342877e+02 -4.782426732337677e+02 1.437168410371092e+02 4 3.095661076248081e+02 8.980387540389081e+01 -1.023326887043915e+02 -2.780188119083794e+02 - ME 9.985413945498126e-03 + ME 1.269359653092767e-04 Event 174 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1396,7 +1396,7 @@ Event 174 Batch 0 2 6.091496911716730e+02 -4.752584064243671e+02 3.135726231883978e+01 -3.797492797588730e+02 3 6.417481529658018e+02 3.309293137608124e+02 9.015643604119191e+01 5.424004960996682e+02 4 2.491021558625255e+02 1.443290926635548e+02 -1.215136983600317e+02 -1.626512163407953e+02 - ME 1.319192968737130e-03 + ME 1.362612102685676e-04 Event 175 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1404,7 +1404,7 @@ Event 175 Batch 0 2 5.399801778396885e+02 1.966672297646830e+02 2.343185748302537e+02 -4.449667388535759e+02 3 6.987953575798327e+02 -1.857207036318898e+02 -9.664246188148675e+01 6.666955876403318e+02 4 2.612244645804785e+02 -1.094652613279307e+01 -1.376761129487668e+02 -2.217288487867561e+02 - ME 9.528877211334405e-03 + ME 9.613528518728674e-04 Event 176 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1412,7 +1412,7 @@ Event 176 Batch 0 2 6.615757321243968e+02 -4.129469954321281e+02 4.686878756164518e+02 -2.179194886871010e+02 3 1.607981401590110e+02 -6.355407199259605e+01 7.929314438200207e+00 1.474925346731048e+02 4 6.776261277165921e+02 4.765010674247242e+02 -4.766171900546519e+02 7.042695401399614e+01 - ME 6.965204353376922e-04 + ME 3.097907077728356e-04 Event 177 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1420,7 +1420,7 @@ Event 177 Batch 0 2 4.314334067424883e+02 -3.493619040652741e+02 -2.026482683689240e+01 -2.523299055494341e+02 3 4.840006500668400e+02 -1.846595828310067e+02 -1.450727057198388e+02 4.232155216776995e+02 4 5.845659431906716e+02 5.340214868962809e+02 1.653375325567312e+02 -1.708856161282654e+02 - ME 2.160100049311594e-04 + ME 1.084300812640113e-04 Event 178 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1428,7 +1428,7 @@ Event 178 Batch 0 2 4.528135981327372e+02 -2.544528544607913e+02 1.436928116455424e+02 3.458992272209776e+02 3 3.053350882587867e+02 -1.380299578048218e+02 2.072032295570572e+02 1.767599177741536e+02 4 7.418513136084770e+02 3.924828122656132e+02 -3.508960412025996e+02 -5.226591449951313e+02 - ME 7.384409254828141e-02 + ME 5.382438151181503e-02 Event 179 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1436,7 +1436,7 @@ Event 179 Batch 0 2 7.433145319259943e+02 -2.538538580850882e+02 -6.778753511348521e+02 -1.689962142519080e+02 3 1.647945947160298e+02 1.009041857568576e+02 1.171651165877689e+02 5.699069397138987e+01 4 5.918908733579761e+02 1.529496723282306e+02 5.607102345470832e+02 1.120055202805181e+02 - ME 1.335347052581446e-04 + ME 3.739915465576335e-05 Event 180 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1444,7 +1444,7 @@ Event 180 Batch 0 2 2.396120216689867e+02 1.204528233788652e+02 -1.081248155319049e+02 1.766750195544080e+02 3 5.541470271917004e+02 2.767127195685322e+02 2.999096875483201e+02 3.749175614572557e+02 4 7.062409511393131e+02 -3.971655429473975e+02 -1.917848720164151e+02 -5.515925810116636e+02 - ME 1.316593054412419e-02 + ME 2.792447184071457e-03 Event 181 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1452,7 +1452,7 @@ Event 181 Batch 0 2 2.165494222755782e+02 1.336973493521793e+02 -1.495065670853883e+02 -8.164837697364385e+01 3 6.960869932595207e+02 -2.848973600545249e+02 2.209041937252092e+01 6.347303441548928e+02 4 5.873635844649011e+02 1.512000107023455e+02 1.274161477128675e+02 -5.530819671812490e+02 - ME 6.164296623062663e-02 + ME 3.488874737600980e-03 Event 182 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1460,7 +1460,7 @@ Event 182 Batch 0 2 6.472681881349898e+02 4.279258056181361e+02 3.994050733201775e+02 -2.762448183472868e+02 3 5.337197582091030e+02 -3.479343829022644e+02 -4.034091782989213e+02 -3.254965992745409e+01 4 3.190120536559070e+02 -7.999142271587166e+01 4.004104978744005e+00 3.087944782747408e+02 - ME 6.393158381765308e-05 + ME 5.523679400573375e-05 Event 183 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1468,7 +1468,7 @@ Event 183 Batch 0 2 6.165307808531154e+02 -3.276949594572818e+02 8.808524820164887e+01 -5.147496540405800e+02 3 2.975460412740734e+02 -1.030095950018341e+02 -2.375020297789284e+02 1.466814775843215e+02 4 5.859231778728107e+02 4.307045544591158e+02 1.494167815772794e+02 3.680681764562588e+02 - ME 6.887775529805495e-05 + ME 2.562496117427957e-05 Event 184 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1476,7 +1476,7 @@ Event 184 Batch 0 2 5.645337360463252e+02 -3.940276919793660e+02 3.776398996283964e+02 1.443212503288767e+02 3 5.368100353438223e+02 2.392766596964613e+02 -1.719264331693737e+02 -4.487237410122139e+02 4 3.986562286098531e+02 1.547510322829050e+02 -2.057134664590229e+02 3.044024906833372e+02 - ME 3.553984578535888e-05 + ME 1.712138666139329e-05 Event 185 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1484,7 +1484,7 @@ Event 185 Batch 0 2 6.347397779710931e+02 2.522092504724420e+02 -1.599825720327363e+02 5.600809373302327e+02 3 4.566768168089404e+02 -3.359958684022406e+02 -1.272903681003782e+02 -2.818823400219340e+02 4 4.085834052199659e+02 8.378661792979838e+01 2.872729401331145e+02 -2.781985973082986e+02 - ME 1.184197550833168e-03 + ME 1.836859309200860e-04 Event 186 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1492,7 +1492,7 @@ Event 186 Batch 0 2 7.089823220133230e+02 -5.197119220861886e+02 4.248734840868308e+02 -2.281183322067745e+02 3 5.364076825758043e+02 3.588264146200084e+02 -3.973752875032956e+02 3.270606945152315e+01 4 2.546099954108725e+02 1.608855074661802e+02 -2.749819658353518e+01 1.954122627552515e+02 - ME 2.583895514537347e-05 + ME 1.318469173008218e-05 Event 187 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1500,7 +1500,7 @@ Event 187 Batch 0 2 4.835105223217566e+02 -2.128653471696258e+02 1.375287019182911e+02 -4.117725407538514e+02 3 7.240136612790383e+02 4.407273454759851e+02 -4.896543389042274e+01 5.723264583716990e+02 4 2.924758163992057e+02 -2.278619983063593e+02 -8.856326802786833e+01 -1.605539176178473e+02 - ME 5.307563978210835e-04 + ME 9.185777086042985e-05 Event 188 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1508,7 +1508,7 @@ Event 188 Batch 0 2 6.611118500396009e+02 3.502021063704277e+02 -2.011693879247277e+02 -5.234102027267809e+02 3 3.072944371702247e+02 -6.894916504330918e+01 -1.599953986835475e+02 2.531350551695447e+02 4 5.315937127901742e+02 -2.812529413271184e+02 3.611647866082752e+02 2.702751475572362e+02 - ME 6.863567490702385e-05 + ME 3.862980709292737e-05 Event 189 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1516,7 +1516,7 @@ Event 189 Batch 0 2 7.498478362545707e+02 6.780504955298834e+02 -3.199144947524264e+02 -1.319162971889924e+01 3 3.253008430749361e+02 -2.985087551774363e+02 1.291384938207140e+02 6.034152914782593e+00 4 4.248513206704935e+02 -3.795417403524470e+02 1.907760009317124e+02 7.157476804116639e+00 - ME 8.583750584152986e-05 + ME 1.504471760657040e-05 Event 190 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1524,7 +1524,7 @@ Event 190 Batch 0 2 4.938867893347995e+02 3.689671478502748e+02 -1.218724623869293e+02 3.048516153777389e+02 3 5.264063001598521e+02 6.631942569346465e+01 1.276367949726208e+02 -5.063735530147588e+02 4 4.797069105053494e+02 -4.352865735437401e+02 -5.764332585691415e+00 2.015219376370201e+02 - ME 4.759343488474735e-05 + ME 2.269926034328256e-05 Event 191 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1532,7 +1532,7 @@ Event 191 Batch 0 2 3.681793141805986e+02 -3.225132888415706e+02 1.579589482507471e+02 -8.117977937027918e+01 3 5.431126642386394e+02 4.058413736814005e+01 9.147123993851424e+01 5.338139246166097e+02 4 5.887080215807621e+02 2.819291514734305e+02 -2.494301881892614e+02 -4.526341452463304e+02 - ME 4.908990110546420e-03 + ME 1.427494731558637e-03 Event 192 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1540,7 +1540,7 @@ Event 192 Batch 0 2 6.054165399887861e+02 1.497087111729466e+02 8.905021611535379e+01 5.798159601983524e+02 3 2.106656439489222e+02 1.451894976721945e+02 -1.487249448604451e+02 3.436443048222171e+01 4 6.839178160622922e+02 -2.948982088451411e+02 5.967472874509133e+01 -6.141803906805740e+02 - ME 4.294450320853435e-02 + ME 6.984876913518998e-03 Event 193 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1548,7 +1548,7 @@ Event 193 Batch 0 2 2.753169163933055e+02 -1.695475157411122e+02 -2.139406274107579e+02 3.581134319495643e+01 3 5.760219428901971e+02 -3.264616044953138e+02 1.527507522369444e+02 -4.493231656306969e+02 4 6.486611407164972e+02 4.960091202364260e+02 6.118987517381347e+01 4.135118224357404e+02 - ME 1.537583375796735e-04 + ME 4.273063058931925e-05 Event 194 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1556,7 +1556,7 @@ Event 194 Batch 0 2 3.445934948105150e+02 -2.970257025567896e+02 -8.183019525038441e+01 1.543509890854414e+02 3 7.485441862377920e+02 6.623797851941252e+02 1.083400559332054e+02 -3.314119056355291e+02 4 4.068623189516925e+02 -3.653540826373358e+02 -2.650986068282081e+01 1.770609165500877e+02 - ME 3.024610065690235e-05 + ME 4.921158833271929e-06 Event 195 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1564,7 +1564,7 @@ Event 195 Batch 0 2 2.012122274303647e+02 -5.190018365965096e+01 1.322177369426910e+02 -1.425173724194237e+02 3 7.122630330184543e+02 -3.054768058087834e+02 -2.528097616133813e+02 5.916838461125119e+02 4 5.865247395511832e+02 3.573769894684365e+02 1.205920246706904e+02 -4.491664736930883e+02 - ME 3.011639483286710e-03 + ME 4.696445912229638e-04 Event 196 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1572,7 +1572,7 @@ Event 196 Batch 0 2 4.490485793345989e+02 3.485190427929747e+02 -2.661098616642627e+01 -2.819059396826192e+02 3 5.531554978829222e+02 -3.330165694254377e+02 4.416170126965178e+02 7.442003978758296e+00 4 4.977959227824785e+02 -1.550247336753688e+01 -4.150060265300915e+02 2.744639357038610e+02 - ME 4.340266456570635e-05 + ME 9.363355109875406e-06 Event 197 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1580,7 +1580,7 @@ Event 197 Batch 0 2 3.951249254444253e+02 -2.278358800090239e+02 3.101157211704546e+02 -8.968142489336992e+01 3 3.607080640108546e+02 -2.889948719219027e+02 2.155030307719242e+02 -1.227661082778765e+01 4 7.441670105447209e+02 5.168307519309257e+02 -5.256187519423792e+02 1.019580357211576e+02 - ME 3.377741088449004e-02 + ME 6.597373610109231e-03 Event 198 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1588,7 +1588,7 @@ Event 198 Batch 0 2 3.750236904637998e+02 1.183014344420310e+02 -1.005952209347265e+02 -3.413621838211424e+02 3 4.381296266085964e+02 -2.726825461625328e+02 1.003845461170281e+02 -3.279096546785175e+02 4 6.868466829276033e+02 1.543811117205018e+02 2.106748176980602e-01 6.692718384996598e+02 - ME 9.606390506705955e-04 + ME 6.145502577419889e-04 Event 199 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1596,7 +1596,7 @@ Event 199 Batch 0 2 2.454478562244572e+02 -2.058455361543722e+02 -1.131056012155068e+02 -7.126982772660261e+01 3 5.321797086694488e+02 -9.806778012582416e+01 -4.820333037417012e+02 -2.030808875905193e+02 4 7.223724351060940e+02 3.039133162801963e+02 5.951389049572081e+02 2.743507153171219e+02 - ME 1.577081887352965e-03 + ME 3.088173795554332e-04 Event 200 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1604,7 +1604,7 @@ Event 200 Batch 0 2 3.952431318363244e+02 3.031309873729303e+02 9.337877017948550e+01 2.358159092128122e+02 3 6.094031244332663e+02 -7.796753338981905e+01 -5.315426896439308e+02 -2.876727322709444e+02 4 4.953537437304092e+02 -2.251634539831113e+02 4.381639194644453e+02 5.185682305813224e+01 - ME 6.703240553489506e-05 + ME 1.668296552597111e-05 Event 201 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1612,7 +1612,7 @@ Event 201 Batch 0 2 6.497938633639732e+02 3.771120671245744e+02 3.553445817627057e+02 -3.921081252746440e+02 3 3.369790646193914e+02 -2.140351778515325e+02 1.061239955238163e+02 2.376584318047305e+02 4 5.132270720166357e+02 -1.630768892730420e+02 -4.614685772865220e+02 1.544496934699135e+02 - ME 6.283412004793947e-05 + ME 2.404518058628388e-05 Event 202 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1620,7 +1620,7 @@ Event 202 Batch 0 2 7.267802742470179e+02 6.523432021666289e+02 -1.481957728499301e+02 2.840702844913056e+02 3 3.546086620137576e+02 -3.102429173963679e+02 -5.939291787501398e+01 -1.611493614224694e+02 4 4.186110637392242e+02 -3.421002847702610e+02 2.075886907249440e+02 -1.229209230688360e+02 - ME 1.894138330341389e-04 + ME 2.830403199974809e-05 Event 203 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1628,7 +1628,7 @@ Event 203 Batch 0 2 4.830190702985662e+02 2.789429895135886e+02 -3.943102945050296e+02 -4.197918611657844e+00 3 5.247163710833165e+02 -4.266462829986153e+02 3.263988520595893e+01 3.037019215942698e+02 4 4.922645586181170e+02 1.477032934850268e+02 3.616704092990706e+02 -2.995040029826120e+02 - ME 5.831910678002871e-04 + ME 5.153190919865371e-05 Event 204 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1636,7 +1636,7 @@ Event 204 Batch 0 2 6.952375769935185e+02 3.823764713153302e+01 6.531840992713522e+02 -2.350397908115460e+02 3 6.250862947179036e+02 1.031861473443961e+02 -5.506835576815644e+02 2.771878679515999e+02 4 1.796761282885781e+02 -1.414237944759291e+02 -1.025005415897879e+02 -4.214807714005369e+01 - ME 1.802858800889920e-04 + ME 1.903000177287069e-05 Event 205 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1644,7 +1644,7 @@ Event 205 Batch 0 2 5.625197268936781e+02 2.955060596751036e+02 4.395356105446072e+02 -1.895074112086703e+02 3 3.144813194259642e+02 -1.941101430078122e+02 -7.073026664887073e+00 -2.473251401357733e+02 4 6.229989536803572e+02 -1.013959166672914e+02 -4.324625838797200e+02 4.368325513444433e+02 - ME 1.140145509231641e-04 + ME 3.163472493443465e-05 Event 206 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1652,7 +1652,7 @@ Event 206 Batch 0 2 5.487698581700869e+02 -4.771827558939671e+02 -2.639484985605369e+02 6.145050708573941e+01 3 4.357856725513919e+02 1.877155863290790e+02 1.701172104948722e+02 3.545872893148349e+02 4 5.154444692785200e+02 2.894671695648880e+02 9.383128806566407e+01 -4.160377964005746e+02 - ME 4.167786087259531e-03 + ME 3.341888001113221e-04 Event 207 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1660,7 +1660,7 @@ Event 207 Batch 0 2 5.289473514933904e+02 -3.230637718239221e+02 -3.258094337294262e+02 2.631792409740627e+02 3 3.730441408755686e+02 -1.145152671243400e+02 -7.298530142052728e+01 -3.474497523579300e+02 4 5.980085076310412e+02 4.375790389482623e+02 3.987947351499535e+02 8.427051138386733e+01 - ME 1.161501350367753e-04 + ME 3.789028948405571e-05 Event 208 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1668,7 +1668,7 @@ Event 208 Batch 0 2 3.144460531270953e+02 3.105028133645123e+02 -3.495125011961062e+01 3.525242310830974e+01 3 7.230517599976935e+02 -6.554206809343713e+02 2.220922910679198e+02 2.095294558946058e+02 4 4.625021868752117e+02 3.449178675698588e+02 -1.871410409483092e+02 -2.447818790029155e+02 - ME 4.858457850437588e-04 + ME 2.941989209837521e-05 Event 209 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1676,7 +1676,7 @@ Event 209 Batch 0 2 2.827014058170527e+02 -6.682954863774688e+01 -1.958656753088385e+02 -1.925890275057887e+02 3 5.969812148172332e+02 5.625717004655273e+02 1.060136244597389e+02 -1.692949027847388e+02 4 6.203173793657136e+02 -4.957421518277804e+02 8.985205084909943e+01 3.618839302905275e+02 - ME 1.004351001266980e-04 + ME 2.261939336541961e-05 Event 210 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1684,7 +1684,7 @@ Event 210 Batch 0 2 3.369223392964550e+02 -2.366581006943837e+02 8.850719545688517e+01 -2.228813191927023e+02 3 6.926279093100447e+02 9.835546321295956e+01 -1.581805884470998e+02 6.671120783270956e+02 4 4.704497513935005e+02 1.383026374814242e+02 6.967339299021461e+01 -4.442307591343933e+02 - ME 5.974710408786874e-02 + ME 3.044010300440331e-03 Event 211 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1692,7 +1692,7 @@ Event 211 Batch 0 2 5.754314663824422e+02 -1.965408456680789e+02 -5.399725108422632e+02 3.037689947684008e+01 3 6.656941886103589e+02 4.112771407945243e+02 5.114655840792436e+02 1.113679599883347e+02 4 2.588743450071987e+02 -2.147362951264454e+02 2.850692676301957e+01 -1.417448594651748e+02 - ME 4.382347812376007e-04 + ME 1.754510489093768e-05 Event 212 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1700,7 +1700,7 @@ Event 212 Batch 0 2 5.922157374848572e+02 8.073316194509509e+00 4.947261155542873e+02 -3.254233732830556e+02 3 3.635572903001510e+02 8.951663862813328e+01 4.011175755255380e+01 3.500738802669425e+02 4 5.442269722149914e+02 -9.758995482264278e+01 -5.348378731068407e+02 -2.465050698388706e+01 - ME 3.041427876287276e-04 + ME 1.919214373141161e-04 Event 213 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1708,7 +1708,7 @@ Event 213 Batch 0 2 7.434820262506830e+02 2.991548764052629e+02 2.111623598614188e+02 -6.470566753063675e+02 3 5.607612173038236e+02 -2.664197873565705e+02 -1.905271140771768e+02 4.551626726109781e+02 4 1.957567564454930e+02 -3.273508904869271e+01 -2.063524578424195e+01 1.918940026953895e+02 - ME 1.827786070323022e-04 + ME 1.896082550340891e-04 Event 214 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1716,7 +1716,7 @@ Event 214 Batch 0 2 5.400874280734793e+02 3.457358963402696e+02 2.445843697627679e+02 -3.351710101016577e+02 3 3.400793067879315e+02 1.482066942304564e+02 1.256466447865830e+02 2.791086371729012e+02 4 6.198332651385892e+02 -4.939425905707261e+02 -3.702310145493508e+02 5.606237292875651e+01 - ME 1.356968066378560e-04 + ME 6.515553919952984e-05 Event 215 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1724,7 +1724,7 @@ Event 215 Batch 0 2 3.916345321859864e+02 3.271767110560381e+02 -1.945589530122144e+02 9.208594000107233e+01 3 6.136750729169615e+02 -1.269585669220027e+02 2.644680756040779e+02 -5.390132228350478e+02 4 4.946903948970534e+02 -2.002181441340350e+02 -6.990912259186331e+01 4.469272828339764e+02 - ME 6.207321332343461e-05 + ME 3.427926940877871e-05 Event 216 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1732,7 +1732,7 @@ Event 216 Batch 0 2 3.767411090262154e+02 1.602503356822860e+02 2.758455349572533e+02 -2.004069210086422e+02 3 4.061922956351256e+02 3.340053729931861e+02 2.237650079776778e+02 5.798114391563544e+01 4 7.170665953386593e+02 -4.942557086754721e+02 -4.996105429349309e+02 1.424257770930068e+02 - ME 1.232271832865728e-03 + ME 2.360785017217177e-04 Event 217 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1740,7 +1740,7 @@ Event 217 Batch 0 2 6.474118977458852e+02 -5.378641111590873e+02 -3.279650037002520e+02 1.492759847325320e+02 3 5.088298200539713e+02 3.261878344469131e+02 1.555821256186315e+02 -3.581947579501665e+02 4 3.437582822001433e+02 2.116762767121744e+02 1.723828780816206e+02 2.089187732176345e+02 - ME 3.357118960820415e-05 + ME 1.388331578224744e-05 Event 218 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1748,7 +1748,7 @@ Event 218 Batch 0 2 6.658501161076259e+02 -6.577627036244854e+02 -3.020200479570956e+01 9.895676706252418e+01 3 2.516345839620714e+02 1.565221509782131e+02 -1.156477271957936e+02 1.595192254662914e+02 4 5.825152999303023e+02 5.012405526462722e+02 1.458497319915031e+02 -2.584759925288157e+02 - ME 5.956187308313417e-04 + ME 1.036808356896783e-04 Event 219 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1756,7 +1756,7 @@ Event 219 Batch 0 2 4.328556070633435e+02 6.122246558068494e+01 -1.687441385117925e+02 3.938796795879554e+02 3 6.500677455605621e+02 -3.703058656885360e+02 4.356876543064814e+02 -3.092537914719426e+02 4 4.170766473760945e+02 3.090834001078509e+02 -2.669435157946888e+02 -8.462588811601287e+01 - ME 2.797067114354785e-04 + ME 9.046106878448173e-05 Event 220 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1764,7 +1764,7 @@ Event 220 Batch 0 2 3.686297280598666e+02 -3.497113779929074e+02 -8.765282776369953e+01 7.685577594963354e+01 3 4.155522773953191e+02 -1.777404948015450e+02 -1.525848366500187e+02 3.432344379292750e+02 4 7.158179945448145e+02 5.274518727944524e+02 2.402376644137182e+02 -4.200902138789084e+02 - ME 3.485410710153060e-03 + ME 1.676729229638681e-03 Event 221 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1772,7 +1772,7 @@ Event 221 Batch 0 2 5.295220830718469e+02 3.654688468413813e+01 4.204675060608333e+02 3.197890523886257e+02 3 7.127556392876786e+02 -1.727486268095863e+02 -4.342549693537605e+02 -5.381460163035255e+02 4 2.577222776404743e+02 1.362017421254481e+02 1.378746329292729e+01 2.183569639148998e+02 - ME 2.819264207321091e-05 + ME 2.031931825964470e-05 Event 222 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1780,7 +1780,7 @@ Event 222 Batch 0 2 2.464305981122427e+02 -2.054199106396077e+02 6.127423271580306e+01 1.215572638876956e+02 3 6.926647117218595e+02 4.702892479611936e+02 3.872350261814336e+02 -3.296383785530530e+02 4 5.609046901658980e+02 -2.648693373215859e+02 -4.485092588972366e+02 2.080811146653574e+02 - ME 6.319142394583372e-05 + ME 1.678695785515194e-05 Event 223 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1788,7 +1788,7 @@ Event 223 Batch 0 2 2.463384302181125e+02 -1.209251938955738e+02 -2.140981972257043e+02 -1.488897673935926e+01 3 6.819620845265065e+02 -2.400891875757811e+02 5.819023806457059e+02 2.623339210620683e+02 4 5.716994852553812e+02 3.610143814713547e+02 -3.678041834200016e+02 -2.474449443227091e+02 - ME 3.931927185620913e-04 + ME 4.810915220985587e-05 Event 224 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1796,7 +1796,7 @@ Event 224 Batch 0 2 2.236851263016067e+02 -8.671871524968952e+01 1.717231909970332e+02 1.141317038679677e+02 3 5.308972974363861e+02 -3.715833295102001e+01 4.680039348616383e+02 2.478780257941054e+02 4 7.454175762620068e+02 1.238770482007099e+02 -6.397271258586715e+02 -3.620097296620728e+02 - ME 8.708656265179471e-02 + ME 6.017706528853119e-02 Event 225 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1804,7 +1804,7 @@ Event 225 Batch 0 2 5.094176014319268e+02 1.569347096242780e+02 -1.561291130928888e+00 -4.846394040251013e+02 3 7.252311334449815e+02 -3.845161955462210e+02 -4.374219820797174e+01 6.133466494377277e+02 4 2.653512651230916e+02 2.275814859219426e+02 4.530348933890067e+01 -1.287072454126262e+02 - ME 3.974215742688118e-04 + ME 1.151501859389029e-04 Event 226 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1812,7 +1812,7 @@ Event 226 Batch 0 2 6.863217264048350e+02 -2.391756120967483e+02 -6.171186323675804e+02 1.816511279850093e+02 3 5.332348374442744e+02 1.096335504493486e+02 4.112484130583279e+02 -3.212391931833643e+02 4 2.804434361508906e+02 1.295420616473995e+02 2.058702193092524e+02 1.395880651983551e+02 - ME 3.797053871351767e-05 + ME 1.438206074993319e-05 Event 227 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1820,7 +1820,7 @@ Event 227 Batch 0 2 7.243206345463230e+02 -5.280189925476210e+02 -1.406011303275692e+02 4.754657162080069e+02 3 5.487499634657129e+02 3.840442912861271e+02 -1.353123555187442e+01 -3.917312987222202e+02 4 2.269294019879644e+02 1.439747012614939e+02 1.541323658794436e+02 -8.373441748578679e+01 - ME 2.903986554770466e-04 + ME 5.165623507180856e-05 Event 228 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1828,7 +1828,7 @@ Event 228 Batch 0 2 2.119578664379945e+02 1.625437651479949e+01 -1.806612394559917e+02 1.096514885776142e+02 3 6.254097456672617e+02 -3.200704000326812e+01 3.158243706171928e+02 5.388579277416935e+02 4 6.626323878947439e+02 1.575266348846865e+01 -1.351631311612011e+02 -6.485094163193077e+02 - ME 8.951233069377997e-01 + ME 3.800526374221887e-02 Event 229 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1836,7 +1836,7 @@ Event 229 Batch 0 2 5.921227120343664e+02 -3.877491982207575e+02 4.449193714386763e+02 -4.802726626309342e+01 3 4.688278331283221e+02 3.470549659129084e+02 -1.517581364471262e+02 -2.762641051115459e+02 4 4.390494548373113e+02 4.069423230784909e+01 -2.931612349915501e+02 3.242913713746393e+02 - ME 3.492131538818778e-05 + ME 1.250052930035257e-05 Event 230 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1844,7 +1844,7 @@ Event 230 Batch 0 2 4.261952284727868e+02 2.153699775439378e+02 -1.171086083390750e+02 3.486312082969335e+02 3 3.540619701921573e+02 3.070144260847319e+01 1.307424531367546e+02 3.276029778648147e+02 4 7.197428013350559e+02 -2.460714201524109e+02 -1.363384479767965e+01 -6.762341861617483e+02 - ME 3.186738302883428e-01 + ME 4.711214236813061e-02 Event 231 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1852,7 +1852,7 @@ Event 231 Batch 0 2 4.205236024420392e+02 7.533931576750228e+01 -3.260217181731272e+02 -2.547036061581322e+02 3 5.397543491930860e+02 8.423195081267914e+01 -1.158376015978276e+02 5.204050211049134e+02 4 5.397220483648740e+02 -1.595712665801811e+02 4.418593197709548e+02 -2.657014149467809e+02 - ME 5.532186388062512e-04 + ME 3.265984123744224e-04 Event 232 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1860,7 +1860,7 @@ Event 232 Batch 0 2 4.295782852421121e+02 3.239064445356881e+02 9.240815775655221e-01 2.821724019337124e+02 3 7.183371274312143e+02 -6.155391061575082e+02 -1.955291718271078e+02 -3.144649112405858e+02 4 3.520845873266736e+02 2.916326616218201e+02 1.946050902495422e+02 3.229250930687335e+01 - ME 6.730603828970119e-05 + ME 1.049779024540051e-05 Event 233 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1868,7 +1868,7 @@ Event 233 Batch 0 2 3.640046126075324e+02 -2.220120664068515e+02 -1.165482463207536e+02 2.638683509799470e+02 3 4.682121509308883e+02 -1.009786196736112e+02 3.762431872847591e+02 2.597441061312976e+02 4 6.677832364615790e+02 3.229906860804628e+02 -2.596949409640055e+02 -5.236124571112447e+02 - ME 5.385640989777132e-03 + ME 7.598357868514145e-04 Event 234 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1876,7 +1876,7 @@ Event 234 Batch 0 2 8.690043548936441e+01 -2.607433849884744e+01 -7.258333015587984e+01 4.004341073848801e+01 3 6.785651905172676e+02 -3.574930335951373e+02 -4.725723606052789e+01 5.748184081539155e+02 4 7.345343739933678e+02 3.835673720939847e+02 1.198405662164078e+02 -6.148618188924036e+02 - ME 1.962113644780599e-01 + ME 8.152211059226219e-02 Event 235 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1884,7 +1884,7 @@ Event 235 Batch 0 2 3.000566282865331e+02 1.219146462304108e+01 -2.126850238006026e+02 2.113064812540423e+02 3 7.160981218147422e+02 2.575873756248088e+02 2.779062108697769e+02 -6.076293293985470e+02 4 4.838452498987246e+02 -2.697788402478500e+02 -6.522118706917435e+01 3.963228481445046e+02 - ME 3.940402333844027e-05 + ME 2.498899672933017e-05 Event 236 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1892,7 +1892,7 @@ Event 236 Batch 0 2 1.510518772182422e+02 -9.497518588910037e+01 1.467158067736534e+01 1.165380984781943e+02 3 6.955499852411461e+02 5.933480346078575e+02 3.495450158124774e+02 9.770452249822526e+01 4 6.533981375406115e+02 -4.983728487187572e+02 -3.642165964898426e+02 -2.142426209764196e+02 - ME 1.121647028585911e-03 + ME 2.623118294900277e-04 Event 237 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1900,7 +1900,7 @@ Event 237 Batch 0 2 2.173874152942701e+02 2.069918593916189e+02 -3.850229167793934e+01 -5.412237993169356e+01 3 7.305677895866185e+02 -6.701932224704495e+02 -2.421540700080861e+02 1.610333695687662e+02 4 5.520447951191120e+02 4.632013630788306e+02 2.806563616860255e+02 -1.069109896370727e+02 - ME 1.822378225061386e-04 + ME 2.170005261464319e-05 Event 238 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1908,7 +1908,7 @@ Event 238 Batch 0 2 6.349573912113930e+02 -3.336495545457479e+02 -4.785400196851591e+02 2.506956580500139e+02 3 5.768887318987100e+02 4.812119270965607e+02 2.334547330568691e+02 -2.161818165921041e+02 4 2.881538768898968e+02 -1.475623725508129e+02 2.450852866282900e+02 -3.451384145790988e+01 - ME 9.810731053503000e-05 + ME 1.383744831772315e-05 Event 239 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1916,7 +1916,7 @@ Event 239 Batch 0 2 5.349076725903783e+02 -5.331874414268931e+02 1.887721601290929e+01 -3.848403846142781e+01 3 3.658437465440003e+02 8.335465236419728e+01 1.670818061666301e+01 -3.558292926602242e+02 4 5.992485808656214e+02 4.498327890626960e+02 -3.558539662957234e+01 3.943133311216517e+02 - ME 9.226736931333760e-05 + ME 2.560110521983184e-05 Event 240 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1924,7 +1924,7 @@ Event 240 Batch 0 2 2.870582387324442e+02 1.830793600232297e+02 -1.562409872742485e+02 1.564389154054251e+02 3 6.007192677438852e+02 3.433229388031108e+02 4.688113613010560e+02 -1.523446941819630e+02 4 6.122224935236703e+02 -5.264022988263405e+02 -3.125703740268075e+02 -4.094221223461989e+00 - ME 1.424405912705748e-04 + ME 3.548113744927254e-05 Event 241 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1932,7 +1932,7 @@ Event 241 Batch 0 2 7.424696267657401e+02 4.823783107714221e+02 2.498315161211407e+02 5.061190823507636e+02 3 2.455726236162737e+02 -1.827879695947952e+02 -1.199757723946156e+02 -1.118046764652876e+02 4 5.119577496179861e+02 -2.995903411766270e+02 -1.298557437265251e+02 -3.943144058854759e+02 - ME 2.705973755259623e-03 + ME 2.366266620918590e-04 Event 242 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1940,7 +1940,7 @@ Event 242 Batch 0 2 7.249130370348905e+02 1.676828147928013e+02 6.059046362201677e+02 -3.609168279440810e+02 3 6.240672718074169e+02 -4.529413961306761e+01 -5.490982345027019e+02 2.930862151720549e+02 4 1.510196911576933e+02 -1.223886751797337e+02 -5.680640171746593e+01 6.783061277202641e+01 - ME 4.587322306592483e-05 + ME 1.668420503127583e-05 Event 243 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1948,7 +1948,7 @@ Event 243 Batch 0 2 4.655090712555229e+02 2.096323612054770e+02 2.113490506800235e+02 3.578890153850057e+02 3 5.764797256412519e+02 6.697224883641857e+01 -5.382210340689440e+02 -1.953502251008744e+02 4 4.580112031032257e+02 -2.766046100418949e+02 3.268719833889206e+02 -1.625387902841314e+02 - ME 2.309042201876567e-04 + ME 3.999521919602606e-05 Event 244 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1956,7 +1956,7 @@ Event 244 Batch 0 2 5.237109195354749e+02 1.305098338947756e+02 -4.868141165486322e+02 -1.423106687020528e+02 3 5.804450110242352e+02 -4.045654344879671e+02 2.643676733537771e+02 3.214855413949400e+02 4 3.958440694402901e+02 2.740556005931916e+02 2.224464431948551e+02 -1.791748726928872e+02 - ME 2.644202232750943e-04 + ME 2.634847163425152e-05 Event 245 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1964,7 +1964,7 @@ Event 245 Batch 0 2 2.629169357520612e+02 2.457511487795889e+02 -4.402365929491729e+01 -8.242333044139184e+01 3 6.931386101565748e+02 -5.195573187661655e+02 4.004017488088275e+02 -2.240084037645317e+02 4 5.439444540913644e+02 2.738061699865766e+02 -3.563780895139104e+02 3.064317342059234e+02 - ME 4.288053786412853e-05 + ME 1.052590061693975e-05 Event 246 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1972,7 +1972,7 @@ Event 246 Batch 0 2 6.300937687157445e+02 -5.459948028041557e+02 3.085954426748102e+02 6.063567799240802e+01 3 1.673910408536145e+02 -3.546130270298926e+01 7.662824936562275e+01 -1.445350060290698e+02 4 7.025151904306430e+02 5.814561055071442e+02 -3.852236920404341e+02 8.389932803666261e+01 - ME 6.282756509154168e-04 + ME 1.915763997923398e-04 Event 247 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1980,7 +1980,7 @@ Event 247 Batch 0 2 2.577847506495701e+02 2.418237207037818e+02 -8.449121421856779e+01 2.890502538162603e+01 3 5.130193185035739e+02 4.381905811488919e+02 1.366496386102691e+02 2.291390669832418e+02 4 7.291959308468561e+02 -6.800143018526737e+02 -5.215842439170134e+01 -2.580440923648679e+02 - ME 4.005872724472581e-03 + ME 1.831864018495938e-03 Event 248 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1988,7 +1988,7 @@ Event 248 Batch 0 2 7.033207479153643e+02 -5.040306065309413e+02 -2.020637997366072e+02 4.469714117975369e+02 3 1.758360012551320e+02 -1.471306652922549e+01 -4.035460943683606e+00 -1.751728862172264e+02 4 6.208432508295037e+02 5.187436730601667e+02 2.060992606802909e+02 -2.717985255803103e+02 - ME 5.592865021063005e-04 + ME 1.512538512828554e-04 Event 249 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1996,7 +1996,7 @@ Event 249 Batch 0 2 3.018816177222694e+02 5.523075638651412e+01 1.752331212074551e+02 2.395316845419020e+02 3 6.597415560701297e+02 6.315352823685419e+01 -6.561001191322722e+02 -2.834054254405022e+01 4 5.383768262076012e+02 -1.183842846233684e+02 4.808669979248172e+02 -2.111911419978518e+02 - ME 4.868100986861644e-04 + ME 9.225490912808109e-05 Event 250 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2004,7 +2004,7 @@ Event 250 Batch 0 2 2.166381935101301e+02 -1.289072913913530e+02 -1.189615590004073e+02 -1.271344351215279e+02 3 6.815426093761062e+02 -2.511966318704653e+02 5.323234433390903e+02 3.435583388650892e+02 4 6.018191971137635e+02 3.801039232618182e+02 -4.133618843386827e+02 -2.164239037435611e+02 - ME 3.468666532553966e-04 + ME 6.586594805989363e-05 Event 251 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2012,7 +2012,7 @@ Event 251 Batch 0 2 6.676961532387151e+02 -3.991265595084280e+01 -4.419965947723094e+02 4.988628500443886e+02 3 7.150412702460949e+02 3.921851524844908e+01 5.505653759000154e+02 -4.545587894617490e+02 4 1.172625765151894e+02 6.941407023942340e-01 -1.085687811277060e+02 -4.430406058263954e+01 - ME 5.615833562023813e-04 + ME 4.930952510857648e-05 Event 252 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2020,7 +2020,7 @@ Event 252 Batch 0 2 2.112668789066533e+02 -1.147554660376938e+02 3.364589711187055e+01 -1.741632301749357e+02 3 7.393007599584276e+02 2.529046383258835e+02 -3.593132473314827e+02 5.945576909606565e+02 4 5.494323611349191e+02 -1.381491722881897e+02 3.256673502196121e+02 -4.203944607857206e+02 - ME 2.709805393201018e-03 + ME 3.541023077707110e-04 Event 253 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2028,7 +2028,7 @@ Event 253 Batch 0 2 7.299659304470913e+01 -4.405884533650594e+01 -5.451291667290519e+01 2.038780663930336e+01 3 7.253475305576840e+02 3.245698054519170e+02 -1.402290280555607e+02 -6.333397991328418e+02 4 7.016558763976062e+02 -2.805109601154107e+02 1.947419447284657e+02 6.129519924935382e+02 - ME 6.484723438037138e-04 + ME 3.511004874943257e-04 Event 254 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2036,7 +2036,7 @@ Event 254 Batch 0 2 1.982520535096858e+02 -6.164633378269741e+01 1.773450413210087e+02 -6.365801262063783e+01 3 7.183815394471145e+02 -1.984891252513599e+02 -6.893152145826987e+02 -3.896971029099802e+01 4 5.833664070431995e+02 2.601354590340572e+02 5.119701732616900e+02 1.026277229116358e+02 - ME 9.210498573936143e-05 + ME 1.539519794804785e-05 Event 255 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2044,7 +2044,7 @@ Event 255 Batch 0 2 5.347080663542586e+02 -5.063606624096446e+02 1.592577719822621e+02 6.440929941880935e+01 3 2.475406015289465e+02 -1.856063881081879e+02 3.468010668896048e+00 -1.637516137347836e+02 4 7.177513321167953e+02 6.919670505178326e+02 -1.627257826511582e+02 9.934231431597431e+01 - ME 1.305481727349711e-03 + ME 3.137689362725149e-04 Event 0 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2052,7 +2052,7 @@ Event 0 Batch 1 2 5.775677821222389e+02 4.314431287975208e+02 -2.652567205762379e+02 -2.776332864556192e+02 3 6.023469575940325e+02 -3.228069847179709e+02 5.005558924007591e+02 8.978477890465942e+01 4 3.200852602837275e+02 -1.086361440795499e+02 -2.352991718245218e+02 1.878485075509607e+02 - ME 2.846168667868940e-05 + ME 7.533072458757011e-06 Event 1 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2060,7 +2060,7 @@ Event 1 Batch 1 2 7.241206267812560e+02 3.541578305635416e+02 -4.894807402105655e+02 3.991635230623179e+02 3 7.375567605136832e+02 -3.903081173548693e+02 4.920451519627784e+02 -3.867054653560791e+02 4 3.832261270506111e+01 3.615028679132773e+01 -2.564411752212873e+00 -1.245805770623896e+01 - ME 1.002871021831580e-03 + ME 7.043932941624384e-05 Event 2 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2068,7 +2068,7 @@ Event 2 Batch 1 2 4.849204091734790e+02 2.108660079931152e+02 4.054727376659824e+02 1.620962335024329e+02 3 2.728468517759738e+02 4.961449545460115e+01 2.005017763154939e+02 1.782774356422519e+02 4 7.422327390505470e+02 -2.604805034477164e+02 -6.059745139814763e+02 -3.403736691446848e+02 - ME 2.729395913593408e-02 + ME 1.721146206228212e-02 Event 3 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2076,7 +2076,7 @@ Event 3 Batch 1 2 4.264155576764489e+02 -4.170952165204416e+02 -7.054834331799705e+01 5.370977042744418e+01 3 7.108631972082329e+02 6.832597695609467e+02 -1.727180704166534e+02 -9.301097030017993e+01 4 3.627212451153183e+02 -2.661645530405051e+02 2.432664137346505e+02 3.930119987273574e+01 - ME 5.466137525204964e-05 + ME 5.739226791327231e-06 Event 4 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2084,7 +2084,7 @@ Event 4 Batch 1 2 7.183269968238449e+02 -3.584978055671311e+02 -5.048824553914336e+02 -3.640971079361008e+02 3 7.387431276480253e+02 4.013538934928407e+02 5.036810263913359e+02 3.618865629982628e+02 4 4.292987552812846e+01 -4.285608792570924e+01 1.201429000097643e+00 2.210544937839338e+00 - ME 3.145606575501715e-04 + ME 5.884725836744927e-05 Event 5 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2092,7 +2092,7 @@ Event 5 Batch 1 2 4.529780005473896e+02 -8.443182436392424e+01 4.445408460134587e+02 -2.106590230986445e+01 3 4.683757780543924e+02 -6.076819021151039e+01 -1.335482427838441e+02 -4.448010379662153e+02 4 5.786462213982179e+02 1.452000145754347e+02 -3.109926032296145e+02 4.658669402760799e+02 - ME 8.481958952475706e-05 + ME 2.851579396246287e-05 Event 6 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2100,7 +2100,7 @@ Event 6 Batch 1 2 6.238848262005389e+02 -1.065131260140052e+02 -4.741487807795934e+02 -3.912418229627633e+02 3 1.729069432107234e+02 -1.460869767542721e+02 -8.199113358821990e+01 4.281191710484079e+01 4 7.032082305887380e+02 2.526001027682771e+02 5.561399143678132e+02 3.484299058579224e+02 - ME 4.868510537699180e-04 + ME 1.468701510222534e-04 Event 7 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2108,7 +2108,7 @@ Event 7 Batch 1 2 6.977203086376783e+02 -6.126072843634399e+02 -1.744636661244187e+02 2.847602033865263e+02 3 1.614193396272251e+02 -4.571584237043670e+00 8.497734613495712e+01 -1.371646983269120e+02 4 6.408603517350967e+02 6.171788686004836e+02 8.948631998946138e+01 -1.475955050596143e+02 - ME 3.540796080305845e-04 + ME 9.523334397108766e-05 Event 8 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2116,7 +2116,7 @@ Event 8 Batch 1 2 6.871091945484288e+02 4.059708628308462e+02 2.886614153103366e+02 4.732666173272762e+02 3 5.653302025665631e+02 -2.838835484844413e+02 -7.353399035097291e+01 -4.833229987253825e+02 4 2.475606028850081e+02 -1.220873143464048e+02 -2.151274249593637e+02 1.005638139810634e+01 - ME 8.785466054587446e-05 + ME 3.726341895116938e-05 Event 9 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2124,7 +2124,7 @@ Event 9 Batch 1 2 1.618579955503452e+02 1.385215220188489e+01 1.601201234527701e+02 -1.917484467788566e+01 3 7.196660585644588e+02 -4.527189715496824e+02 -4.214090439733052e+02 3.679391067910628e+02 4 6.184759458851959e+02 4.388668193477974e+02 2.612889205205349e+02 -3.487642621131772e+02 - ME 1.054640649369016e-03 + ME 1.276556148007894e-04 Event 10 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2132,7 +2132,7 @@ Event 10 Batch 1 2 7.832785200561162e+01 1.027681340851886e+01 -7.242726264265977e+01 -2.799877018853974e+01 3 7.448007230566494e+02 2.520540107528716e+02 6.813719334665398e+02 1.641011304445167e+02 4 6.768714249377393e+02 -2.623308241613905e+02 -6.089446708238800e+02 -1.361023602559769e+02 - ME 5.876642887714617e-04 + ME 1.087112534498832e-04 Event 11 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2140,7 +2140,7 @@ Event 11 Batch 1 2 5.478627446486676e+02 2.070882322301630e+02 -4.708081692757452e+02 1.887000762823861e+02 3 6.997827604382593e+02 -4.209013422316021e+02 4.569873120768409e+02 -3.220257264800591e+02 4 2.523544949130733e+02 2.138131100014392e+02 1.382085719890436e+01 1.333256501976729e+02 - ME 2.703695959900953e-05 + ME 7.092902148917371e-06 Event 12 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2148,7 +2148,7 @@ Event 12 Batch 1 2 5.802868936311938e+02 -4.467002255894120e+01 5.211262762381961e+02 -2.513262266832405e+02 3 5.208038834706859e+02 2.151797013176283e+01 -4.993650129388666e+02 -1.463155694111945e+02 4 3.989092228981199e+02 2.315205242717860e+01 -2.176126329932955e+01 3.976417960944350e+02 - ME 5.046437564325244e-04 + ME 4.980323856672599e-04 Event 13 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2156,7 +2156,7 @@ Event 13 Batch 1 2 5.774880087360024e+02 1.576445054854711e+02 5.481077151088400e+02 -9.065617884226717e+01 3 5.915098138161557e+02 -3.018001633277128e+02 -3.808656371901898e+02 3.372564123391869e+02 4 3.310021774478421e+02 1.441556578422419e+02 -1.672420779186502e+02 -2.466002334969197e+02 - ME 1.505341700965184e-03 + ME 5.587942683639647e-05 Event 14 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2164,7 +2164,7 @@ Event 14 Batch 1 2 2.531797527967491e+02 -8.400833666640553e+01 -2.384535242035555e+02 -1.350938161690895e+01 3 5.261064571264828e+02 -1.751971590790252e+02 -3.334570051994592e+02 3.672878780523887e+02 4 7.207137900767681e+02 2.592054957454308e+02 5.719105294030147e+02 -3.537784964354798e+02 - ME 3.373121845959189e-03 + ME 1.659114310450813e-03 Event 15 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2172,7 +2172,7 @@ Event 15 Batch 1 2 4.605848765362425e+02 3.563504404614684e+02 1.735853700506503e+02 2.345653669687875e+02 3 4.216445088607453e+02 1.370719005416187e+02 -3.933730877164850e+02 6.521502736890037e+01 4 6.177706146030118e+02 -4.934223410030871e+02 2.197877176658347e+02 -2.997803943376878e+02 - ME 4.613631402771334e-04 + ME 9.110622752737525e-05 Event 16 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2180,7 +2180,7 @@ Event 16 Batch 1 2 4.972484926572777e+02 -1.474122335888775e+02 -4.748950276275915e+02 -6.399787981958280e-01 3 5.072511849723048e+02 4.846784046822065e+02 1.224000792205880e+02 -8.607455661990267e+01 4 4.955003223704169e+02 -3.372661710933285e+02 3.524949484070036e+02 8.671453541809866e+01 - ME 5.856804747367533e-05 + ME 1.035537635543116e-05 Event 17 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2188,7 +2188,7 @@ Event 17 Batch 1 2 3.182636773520259e+02 -9.176062613973060e+01 -1.890905041641619e+02 2.389906630959087e+02 3 6.376303990615819e+02 -4.240378519397394e+02 2.706855745366566e+02 -3.917827786765570e+02 4 5.441059235863918e+02 5.157984780794702e+02 -8.159507037249479e+01 1.527921155806483e+02 - ME 7.445984612273079e-05 + ME 2.964570775197734e-05 Event 18 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2196,7 +2196,7 @@ Event 18 Batch 1 2 5.532560008158404e+02 -4.148613005881325e+02 1.689647846464811e+02 -3.247047971041214e+02 3 3.650144721835348e+02 -1.597348634907620e+02 -2.160675866909894e+02 2.470529017650751e+02 4 5.817295270006244e+02 5.745961640788944e+02 4.710280204450838e+01 7.765189533904635e+01 - ME 9.119298978738387e-05 + ME 3.148325734685632e-05 Event 19 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2204,7 +2204,7 @@ Event 19 Batch 1 2 3.263687475619531e+02 -1.904667433734991e+02 2.390747946355329e+02 -1.143775398573919e+02 3 7.331345945903582e+02 2.597391859223821e+02 -6.739404183465077e+02 1.258022320965774e+02 4 4.404966578476884e+02 -6.927244254888298e+01 4.348656237109747e+02 -1.142469223918529e+01 - ME 8.793129888044293e-05 + ME 9.665339952809457e-06 Event 20 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2212,7 +2212,7 @@ Event 20 Batch 1 2 9.588718605412237e+01 4.259536217794532e+01 8.056474827260676e+01 -2.982128277051557e+01 3 7.250265356668370e+02 3.120913743414047e+02 -4.446787057645155e+02 4.801284204484703e+02 4 6.790862782790414e+02 -3.546867365193502e+02 3.641139574919093e+02 -4.503071376779550e+02 - ME 3.686389281265799e-03 + ME 6.402422614019696e-04 Event 21 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2220,7 +2220,7 @@ Event 21 Batch 1 2 1.825278201605081e+02 -1.533737674675502e+02 8.574830442242751e+01 4.939757963742074e+01 3 7.183016103669913e+02 1.713205736990392e+02 -6.275703015775031e+02 -3.045685162014731e+02 4 5.991705694725008e+02 -1.794680623148897e+01 5.418219971550755e+02 2.551709365640523e+02 - ME 7.470861105912214e-05 + ME 1.806434468406198e-05 Event 22 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2228,7 +2228,7 @@ Event 22 Batch 1 2 2.349542451120770e+02 9.235159917618290e+01 -2.156570331301489e+02 -1.291214495308476e+01 3 7.360601907662837e+02 -2.182033070539752e+02 6.568866822530020e+02 -2.503433799808774e+02 4 5.289855641216395e+02 1.258517078777923e+02 -4.412296491228531e+02 2.632555249339621e+02 - ME 3.893602972207037e-05 + ME 8.007442232312076e-06 Event 23 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2236,7 +2236,7 @@ Event 23 Batch 1 2 2.350908908124364e+02 -7.377772511691019e+00 -2.298431804723787e+02 -4.884063683135331e+01 3 6.797114625392685e+02 -5.485955088721076e+02 3.603976926464840e+02 1.765336882516069e+02 4 5.851976466482949e+02 5.559732813837987e+02 -1.305545121741055e+02 -1.276930514202538e+02 - ME 2.057468423101862e-04 + ME 3.185713653214173e-05 Event 24 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2244,7 +2244,7 @@ Event 24 Batch 1 2 4.355364173804401e+02 2.538053291625626e+02 -2.665393838801487e+02 -2.328767540869265e+02 3 4.093863144993796e+02 -1.953012891316528e+02 -3.573484670764558e+02 4.191221827828568e+01 4 6.550772681201798e+02 -5.850404003090968e+01 6.238878509566048e+02 1.909645358086408e+02 - ME 1.895168702655672e-04 + ME 3.721637657688893e-05 Event 25 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2252,7 +2252,7 @@ Event 25 Batch 1 2 7.365386968907909e+02 3.875876454009267e+02 3.151568854896985e+02 5.412404333367775e+02 3 5.208510884285567e+02 -2.430585576296288e+02 -1.518636440371932e+02 -4.349089876054084e+02 4 2.426102146806534e+02 -1.445290877712977e+02 -1.632932414525050e+02 -1.063314457313693e+02 - ME 3.717867207603688e-04 + ME 7.982561935336398e-05 Event 26 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2260,7 +2260,7 @@ Event 26 Batch 1 2 7.198867014174701e+02 5.189601929589824e+02 4.797253921416957e+02 -1.370428003807496e+02 3 3.889101953712928e+02 -1.847394503243419e+02 -2.837815501141775e+02 1.912864537085460e+02 4 3.912031032112371e+02 -3.342207426346404e+02 -1.959438420275183e+02 -5.424365332779646e+01 - ME 1.222836766708484e-04 + ME 1.928349098758061e-05 Event 27 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2268,7 +2268,7 @@ Event 27 Batch 1 2 6.732032222628646e+02 5.870808395006010e+02 -9.126179303429218e+01 3.165595544104447e+02 3 1.177373967283342e+02 7.847176641415683e+01 5.304379211899001e+00 -8.761358356661104e+01 4 7.090593810088013e+02 -6.655526059147578e+02 8.595741382239324e+01 -2.289459708438336e+02 - ME 1.603290018002586e-03 + ME 6.795383824785976e-04 Event 28 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2276,7 +2276,7 @@ Event 28 Batch 1 2 6.475300414228806e+02 3.136396845517189e+02 3.816259196370642e+02 -4.186728559156669e+02 3 7.290923529036073e+02 -2.791764769994177e+02 -4.112865540505715e+02 5.333662195995520e+02 4 1.233776056735125e+02 -3.446320755230100e+01 2.966063441350738e+01 -1.146933636838856e+02 - ME 5.037107889244314e-02 + ME 6.311296815400830e-04 Event 29 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2284,7 +2284,7 @@ Event 29 Batch 1 2 3.156754590345620e+02 -2.870540678871016e+02 4.159516713841874e+01 -1.245825012466667e+02 3 4.770060274033896e+02 -2.355061130652810e+02 -3.231858413754910e+02 -2.600433287405434e+02 4 7.073185135620483e+02 5.225601809523826e+02 2.815906742370723e+02 3.846258299872100e+02 - ME 7.956699356695784e-04 + ME 1.321807869823317e-04 Event 30 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2292,7 +2292,7 @@ Event 30 Batch 1 2 6.091290614220995e+02 1.543004089904798e+02 4.216196287493766e+00 -5.892468251447810e+02 3 2.079357839022729e+02 2.034647466922837e+02 4.185675980476618e+01 9.348729279626889e+00 4 6.829351546756266e+02 -3.577651556827627e+02 -4.607295609226003e+01 5.798980958651539e+02 - ME 3.902231064020147e-04 + ME 1.448382779935031e-04 Event 31 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2300,7 +2300,7 @@ Event 31 Batch 1 2 6.901710072855793e+02 1.433309098684656e+01 6.447948515477649e+02 -2.457034416076623e+02 3 5.898919363861644e+02 1.120085307876391e+02 -4.815950471622465e+02 3.217029626736535e+02 4 2.199370563282564e+02 -1.263416217744856e+02 -1.631998043855182e+02 -7.599952106599136e+01 - ME 2.415465849322543e-04 + ME 2.376400497996635e-05 Event 32 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2308,7 +2308,7 @@ Event 32 Batch 1 2 6.144498311923271e+02 5.832947925341469e+02 -1.925283703230110e+02 1.576726595169125e+01 3 2.478450424037004e+02 5.004284035329792e+01 2.389954177960992e+02 4.247433867565734e+01 4 6.377051264039724e+02 -6.333376328874447e+02 -4.646704747308818e+01 -5.824160462734862e+01 - ME 2.160220890176678e-04 + ME 5.390650629646604e-05 Event 33 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2316,7 +2316,7 @@ Event 33 Batch 1 2 6.134536717469736e+02 -1.625429495269566e+02 -1.853973484494194e+02 5.617232593785355e+02 3 5.361644687950269e+02 -3.755831293394986e+01 -9.992652347025609e+01 -5.254297294928764e+02 4 3.503818594579993e+02 2.001012624609065e+02 2.853238719196754e+02 -3.629352988565911e+01 - ME 1.224582992507153e-04 + ME 1.005452860076771e-04 Event 34 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2324,7 +2324,7 @@ Event 34 Batch 1 2 3.840838099420727e+02 -2.442269925519278e+02 -3.827314394217582e+01 -2.939535943332559e+02 3 6.022630974514659e+02 3.956891925431131e+01 5.086724982658299e+02 3.200116071158652e+02 4 5.136530926064613e+02 2.046580732976165e+02 -4.703993543236541e+02 -2.605801278260916e+01 - ME 9.608243105510499e-05 + ME 2.313941306740064e-05 Event 35 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2332,7 +2332,7 @@ Event 35 Batch 1 2 3.454350783663418e+02 -3.439607925797615e+02 2.363778141880094e+01 -2.139209721976717e+01 3 6.705698302143294e+02 5.215327591153251e+02 4.060443141865528e+02 -1.131171661597076e+02 4 4.839950914193290e+02 -1.775719665355635e+02 -4.296820956053536e+02 1.345092633794747e+02 - ME 4.862206803317224e-05 + ME 7.982017052260048e-06 Event 36 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2340,7 +2340,7 @@ Event 36 Batch 1 2 7.098652154429357e+02 2.489290984574327e+02 -1.674080692141068e+02 -6.433641786725617e+02 3 6.178479130357197e+02 -1.435715807033598e+02 2.588953561477193e+02 5.423065917191846e+02 4 1.722868715213448e+02 -1.053575177540730e+02 -9.148728693361247e+01 1.010575869533772e+02 - ME 6.680529568232270e-05 + ME 5.562249548714765e-05 Event 37 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2348,7 +2348,7 @@ Event 37 Batch 1 2 6.906872786346031e+02 1.495946561071237e+02 1.712833879510068e+02 6.521750966909805e+02 3 3.682276595245592e+02 -1.358558710218083e+02 1.194309698061993e+02 -3.207351477449753e+02 4 4.410850618408380e+02 -1.373878508531530e+01 -2.907143577572061e+02 -3.314399489460051e+02 - ME 2.014943348935539e-03 + ME 5.542438863722841e-04 Event 38 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2356,7 +2356,7 @@ Event 38 Batch 1 2 6.131720166645955e+02 -5.222102655174087e+02 6.340623138461877e+00 3.213038392347352e+02 3 4.540063357567760e+02 2.932429176443922e+02 -3.207297067242505e+02 -1.313879727496968e+02 4 4.328216475786277e+02 2.289673478730168e+02 3.143890835857886e+02 -1.899158664850380e+02 - ME 2.589645049118943e-04 + ME 3.150821423911933e-05 Event 39 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2364,7 +2364,7 @@ Event 39 Batch 1 2 2.929747896182304e+02 2.510117592312210e+02 -1.378648144805472e+02 6.181113983529403e+01 3 6.287164314722783e+02 3.864928360025993e+01 6.254120614625328e+02 5.148142827864510e+01 4 5.783087789094894e+02 -2.896610428314818e+02 -4.875472469819856e+02 -1.132925681139394e+02 - ME 1.708238325115053e-04 + ME 2.723120294663496e-05 Event 40 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2372,7 +2372,7 @@ Event 40 Batch 1 2 1.143487538112954e+02 -3.203572478439017e+01 1.022340126870988e+02 3.996944439980560e+01 3 7.361483923235807e+02 5.924235295921244e+02 -3.838567751530157e+02 -2.088128187524163e+02 4 6.495028538651248e+02 -5.603878048077345e+02 2.816227624659169e+02 1.688433743526105e+02 - ME 2.026369815874481e-04 + ME 4.279185076498264e-05 Event 41 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2380,7 +2380,7 @@ Event 41 Batch 1 2 6.384898508133350e+02 5.540399192408263e+02 -3.014826159773289e+02 -9.908223727147148e+01 3 3.510407251698805e+02 -1.719168197014114e+02 2.065966849440144e+02 -2.258140996521069e+02 4 5.104694240167846e+02 -3.821230995394149e+02 9.488593103331458e+01 3.248963369235784e+02 - ME 4.455092331482675e-05 + ME 1.488395965626735e-05 Event 42 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2388,7 +2388,7 @@ Event 42 Batch 1 2 3.291654598309212e+02 -1.090829060981258e+02 2.972891943885482e+02 -8.983292515941632e+01 3 6.884965239796815e+02 4.933628807557017e+02 -2.919492821202986e+02 3.812953554581829e+02 4 4.823380161893969e+02 -3.842799746575757e+02 -5.339912268249619e+00 -2.914624302987665e+02 - ME 6.690811667999076e-04 + ME 5.767145017550451e-05 Event 43 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2396,7 +2396,7 @@ Event 43 Batch 1 2 3.674173006007981e+02 2.791827424102563e+02 1.079644067383057e+02 2.130637369397045e+02 3 7.392205647816575e+02 -6.110484627794917e+02 -4.247874240022372e+01 -4.138385868609020e+02 4 3.933621346175442e+02 3.318657203692355e+02 -6.548566433808202e+01 2.007748499211975e+02 - ME 2.734436884563990e-05 + ME 6.513986915725277e-06 Event 44 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2404,7 +2404,7 @@ Event 44 Batch 1 2 2.081359682230012e+02 -1.082501549908087e+02 1.771964605001424e+02 1.427934167997762e+01 3 7.449563315308093e+02 5.092828751965591e+02 -5.388739609944279e+02 7.215083562608928e+01 4 5.469077002461893e+02 -4.010327202057504e+02 3.616775004942854e+02 -8.643017730606689e+01 - ME 1.760644262839344e-04 + ME 1.838899544278803e-05 Event 45 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2412,7 +2412,7 @@ Event 45 Batch 1 2 5.180982465404422e+02 4.470261481799612e+02 -3.368837017252423e+01 -2.597277606009553e+02 3 3.377595659674062e+02 -7.316527185649456e+01 2.454727770679006e+02 -2.201624016839132e+02 4 6.441421874921515e+02 -3.738608763234666e+02 -2.117844068953763e+02 4.798901622848684e+02 - ME 1.645403798734011e-04 + ME 4.091340785269233e-05 Event 46 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2420,7 +2420,7 @@ Event 46 Batch 1 2 6.296560291524888e+02 2.172411497655985e+02 5.821614514430422e+02 -1.017892054705761e+02 3 6.224001894826197e+02 1.405102091633609e+01 -6.218608257778048e+02 2.176414579432105e+01 4 2.479437813648912e+02 -2.312921706819346e+02 3.969937433476264e+01 8.002505967625511e+01 - ME 4.041878897626609e-05 + ME 7.434320230190137e-06 Event 47 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2428,7 +2428,7 @@ Event 47 Batch 1 2 5.458843469271557e+02 -1.019033861791133e+02 -1.559739004096151e+02 5.131058004898495e+02 3 2.573134207008558e+02 6.791700498899543e+01 -2.412204887508016e+02 5.839651284901167e+01 4 6.968022323719882e+02 3.398638119011781e+01 3.971943891604168e+02 -5.715023133388611e+02 - ME 1.408798022766008e-02 + ME 4.005478861198618e-03 Event 48 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2436,7 +2436,7 @@ Event 48 Batch 1 2 6.623920218006384e+02 -6.284562032939594e+02 -1.837527125398962e+02 -1.002044496053409e+02 3 1.251779629744606e+02 -7.502448682133647e+01 9.550779386908961e+01 3.031682869117444e+01 4 7.124300152249010e+02 7.034806901152959e+02 8.824491867080658e+01 6.988762091416655e+01 - ME 8.682321044518227e-04 + ME 3.004757451335502e-04 Event 49 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2444,7 +2444,7 @@ Event 49 Batch 1 2 2.397494808364364e+02 2.393958238941666e+02 -4.144666783354266e+00 -1.233996761053010e+01 3 6.782491241100328e+02 -3.516321535544010e+02 -2.705899831712919e+02 5.129890485673947e+02 4 5.820013950535307e+02 1.122363296602344e+02 2.747346499546462e+02 -5.006490809568646e+02 - ME 9.041285542966720e-03 + ME 6.040872325723622e-04 Event 50 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2452,7 +2452,7 @@ Event 50 Batch 1 2 4.764898792162554e+02 4.667163214316568e+02 5.900817880915086e+01 -7.573978570375913e+01 3 5.114228101321805e+02 -2.035689445851523e+02 -4.549677995197112e+02 -1.145306811477843e+02 4 5.120873106515638e+02 -2.631473768465044e+02 3.959596207105603e+02 1.902704668515434e+02 - ME 5.157319121365441e-05 + ME 9.692662313613028e-06 Event 51 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2460,7 +2460,7 @@ Event 51 Batch 1 2 4.678795643859630e+02 4.629737719234085e+02 5.365495313512251e+01 4.108186077915564e+01 3 6.311645871918951e+02 -4.500610707732837e+02 -4.345770688214700e+02 8.340587481742408e+01 4 4.009558484221416e+02 -1.291270115012470e+01 3.809221156863474e+02 -1.244877355965797e+02 - ME 1.517985021504320e-04 + ME 1.293558494013996e-05 Event 52 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2468,7 +2468,7 @@ Event 52 Batch 1 2 3.696230029266819e+02 2.516704934433110e+02 2.514038675722595e+02 1.003953305301004e+02 3 6.696174214325739e+02 -2.754912388418390e+01 -6.493999246431116e+02 -1.609604756850079e+02 4 4.607595756407442e+02 -2.241213695591271e+02 3.979960570708519e+02 6.056514515490756e+01 - ME 5.727699238559496e-05 + ME 8.655753222194317e-06 Event 53 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2476,7 +2476,7 @@ Event 53 Batch 1 2 7.284624742442375e+01 -4.271742504396477e+01 -2.683807109937144e+01 -5.255012179908527e+01 3 7.493542950735829e+02 3.356513586119740e+02 2.501807367708783e+02 6.215139772812374e+02 4 6.777994575019936e+02 -2.929339335680093e+02 -2.233426656715069e+02 -5.689638554821522e+02 - ME 1.612275481129464e-02 + ME 2.372423861687152e-03 Event 54 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2484,7 +2484,7 @@ Event 54 Batch 1 2 7.460259847230064e+02 2.055186857047568e+01 6.233229443227743e+02 4.093908861479223e+02 3 5.756222844616437e+02 2.606063779094539e+01 -4.696411468594731e+02 -3.318117699890848e+02 4 1.783517308153497e+02 -4.661250636142109e+01 -1.536817974633012e+02 -7.757911615883735e+01 - ME 4.374243668355642e-04 + ME 5.046268590690708e-05 Event 55 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2492,7 +2492,7 @@ Event 55 Batch 1 2 5.967428482894213e+02 -8.165820254184375e+01 5.098287527914877e+02 -2.991798919868828e+02 3 5.942526243827265e+02 5.606061544962815e+01 -2.905196430116550e+02 5.153559216750568e+02 4 3.090045273278509e+02 2.559758709221549e+01 -2.193091097798325e+02 -2.161760296881746e+02 - ME 1.779007466146034e-03 + ME 1.849048785615045e-04 Event 56 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2500,7 +2500,7 @@ Event 56 Batch 1 2 5.610874267302015e+02 -4.199055433713192e+02 3.580252469767042e+02 1.015694718309908e+02 3 6.303091265298390e+02 2.130872195586830e+02 -5.453843477211296e+02 -2.333224059286980e+02 4 3.086034467399593e+02 2.068183238126362e+02 1.873591007444254e+02 1.317529340977073e+02 - ME 3.258989367177766e-05 + ME 7.213009143835112e-06 Event 57 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2508,7 +2508,7 @@ Event 57 Batch 1 2 6.552053965855981e+02 4.516249927537604e+02 7.110694105335197e+00 4.746350341729917e+02 3 6.035190443408458e+02 -3.717228873476765e+02 2.148772607224587e+02 -4.241286299324850e+02 4 2.412755590735562e+02 -7.990210540608396e+01 -2.219879548277939e+02 -5.050640424050685e+01 - ME 1.623545585873121e-04 + ME 3.752873989265266e-05 Event 58 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2516,7 +2516,7 @@ Event 58 Batch 1 2 2.959982971085279e+02 1.850007048157144e+02 -2.304987961744356e+02 1.612563397119956e+01 3 7.018897389129390e+02 -3.764226030262936e+02 4.376344751014918e+02 3.992884868423144e+02 4 5.021119639785326e+02 1.914218982105791e+02 -2.071356789270567e+02 -4.154141208135139e+02 - ME 4.558573859477246e-03 + ME 1.901193343270815e-04 Event 59 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2524,7 +2524,7 @@ Event 59 Batch 1 2 5.521089721327345e+02 1.223876815062619e+02 -3.629066091228882e+01 -5.371485459866160e+02 3 4.098988410471214e+02 -5.841964900319319e+01 -3.626461945087767e+02 1.819119075553315e+02 4 5.379921868201441e+02 -6.396803250306872e+01 3.989368554210655e+02 3.552366384312845e+02 - ME 5.148841296796537e-05 + ME 1.780280399801712e-05 Event 60 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2532,7 +2532,7 @@ Event 60 Batch 1 2 7.143828168925960e+02 -4.584044193456332e+02 -2.419772079280938e+02 -4.915844060170314e+02 3 1.284110307517517e+02 8.324300347118127e+01 -7.889851197070540e+01 5.774963203893758e+01 4 6.572061523556514e+02 3.751614158744520e+02 3.208757198987992e+02 4.338347739780938e+02 - ME 1.673517837789511e-04 + ME 7.144001898958308e-05 Event 61 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2540,7 +2540,7 @@ Event 61 Batch 1 2 4.394390210968651e+02 -2.137451655543886e+02 -3.779414621253704e+02 -6.767502250635177e+01 3 4.431311911324728e+02 3.845666395406355e+02 -2.150363068358313e+02 4.725610065709574e+01 4 6.174297877706618e+02 -1.708214739862469e+02 5.929777689612018e+02 2.041892184925626e+01 - ME 1.368591177943825e-04 + ME 2.870354731125455e-05 Event 62 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2548,7 +2548,7 @@ Event 62 Batch 1 2 7.301725729481176e+02 4.281927891852710e+02 5.652737593150771e+02 -1.739784429324868e+02 3 7.567373964415995e+01 2.589885732647599e+01 -5.696550981957816e+01 4.255225906941358e+01 4 6.941536874077224e+02 -4.540916465117469e+02 -5.083082494954988e+02 1.314261838630732e+02 - ME 8.513592598060080e-04 + ME 2.379197431250548e-04 Event 63 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2556,7 +2556,7 @@ Event 63 Batch 1 2 4.361152320236988e+02 -3.738769057978321e+02 1.427754799584550e+02 -1.732850750548248e+02 3 5.817148313055657e+02 5.081993893256957e+02 2.829214478037172e+02 -8.998890070513914e+00 4 4.821699366707353e+02 -1.343224835278637e+02 -4.256969277621721e+02 1.822839651253387e+02 - ME 4.544766189571194e-05 + ME 8.350404272725701e-06 Event 64 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2564,7 +2564,7 @@ Event 64 Batch 1 2 6.097675704107204e+02 3.288514690970509e+02 4.971291587853200e+02 -1.285916042465611e+02 3 5.709532610348123e+02 -6.501292612520263e+01 -4.768258747557200e+02 3.072426254385416e+02 4 3.192791685544673e+02 -2.638385429718484e+02 -2.030328402960006e+01 -1.786510211919805e+02 - ME 4.598138986874043e-04 + ME 3.000969253297957e-05 Event 65 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2572,7 +2572,7 @@ Event 65 Batch 1 2 6.258641293880484e+02 3.743515439843765e+02 -1.622018320411498e+02 -4.746128903155367e+02 3 7.438702198751357e+02 -4.029113627030089e+02 2.325939036896868e+02 5.804355380128616e+02 4 1.302656507368158e+02 2.855981871863233e+01 -7.039207164853700e+01 -1.058226476973252e+02 - ME 6.427333508548903e-03 + ME 3.162776051460646e-04 Event 66 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2580,7 +2580,7 @@ Event 66 Batch 1 2 3.731957242404369e+02 1.596860493342637e+01 -3.714568973276624e+02 3.224632809376674e+01 3 6.079923612940432e+02 4.451199598539357e+02 3.189341902600864e+02 -2.642043054431177e+02 4 5.188119144655197e+02 -4.610885647873621e+02 5.252270706757586e+01 2.319579773493509e+02 - ME 4.681392980523237e-05 + ME 1.034065067393998e-05 Event 67 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2588,7 +2588,7 @@ Event 67 Batch 1 2 7.084256499213539e+02 6.318790977834966e+02 -2.229764540025608e+02 2.299504472951746e+02 3 5.168612394424738e+01 1.130069959366449e+01 -1.428140623590627e+01 4.837138651102398e+01 4 7.398882261343989e+02 -6.431797973771612e+02 2.372578602384670e+02 -2.783218338061985e+02 - ME 5.878400132197954e-02 + ME 1.479715191731530e-02 Event 68 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2596,7 +2596,7 @@ Event 68 Batch 1 2 5.644037677826096e+02 -7.446914007305443e+01 3.170710956176409e+02 4.609467220707991e+02 3 4.303832728799333e+02 -1.588265612792408e+02 -3.994808673830752e+02 -2.046757440246668e+01 4 5.052129593374568e+02 2.332957013522950e+02 8.240977176543441e+01 -4.404791476683325e+02 - ME 8.108482137897523e-03 + ME 3.274273226082449e-04 Event 69 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2604,7 +2604,7 @@ Event 69 Batch 1 2 2.379282923937934e+02 -4.413455715133102e+01 1.058497776082811e+02 -2.084654354245804e+02 3 5.822935131976616e+02 -5.806422676829345e+02 4.095409019445288e+01 -1.559022092337181e+01 4 6.797781944085444e+02 6.247768248342655e+02 -1.468038678027338e+02 2.240556563479522e+02 - ME 3.039802585689931e-04 + ME 6.379305675073031e-05 Event 70 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2612,7 +2612,7 @@ Event 70 Batch 1 2 5.861861307468000e+02 1.831219916849830e+02 2.904683423406074e+02 -4.750880530376756e+02 3 4.633200606614189e+02 -4.245314712871158e+02 -1.339518705596282e+02 1.284344380284135e+02 4 4.504938085917810e+02 2.414094796021329e+02 -1.565164717809791e+02 3.466536150092620e+02 - ME 3.530491740557932e-05 + ME 1.325653453486623e-05 Event 71 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2620,7 +2620,7 @@ Event 71 Batch 1 2 7.383412459951699e+02 5.748049255568963e+02 -1.639684737984460e+02 -4.334298474879633e+02 3 3.973981306646684e+02 -3.228684354469153e+02 -4.837114091238284e+00 2.316416412804533e+02 4 3.642606233401616e+02 -2.519364901099809e+02 1.688055878896842e+02 2.017882062075102e+02 - ME 3.103530482016079e-05 + ME 1.333441808219846e-05 Event 72 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2628,7 +2628,7 @@ Event 72 Batch 1 2 3.538199915090663e+02 3.512029503136998e+02 -6.467835580753929e+00 -4.246458742680748e+01 3 5.344234504985296e+02 1.310173344785605e+01 3.836805260246265e+01 5.328833470497182e+02 4 6.117565579924039e+02 -3.643046837615559e+02 -3.190021702170876e+01 -4.904187596229107e+02 - ME 9.376669006106200e-03 + ME 2.994704399169685e-03 Event 73 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2636,7 +2636,7 @@ Event 73 Batch 1 2 4.694927197571710e+02 1.451947293992222e+02 -1.807863847612341e+02 4.082379055705570e+02 3 5.537325951281179e+02 -5.796379956652479e+01 5.401382741253894e+02 -1.072876026015002e+02 4 4.767746851147115e+02 -8.723092983269744e+01 -3.593518893641554e+02 -3.009503029690568e+02 - ME 1.077472469645428e-03 + ME 1.535829386616431e-04 Event 74 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2644,7 +2644,7 @@ Event 74 Batch 1 2 6.258444305735198e+02 -3.349227552763227e+02 4.941036656040852e+02 1.880679848209580e+02 3 5.555040664889822e+02 3.765538795180102e+01 -5.474422011270130e+02 -8.645158222500005e+01 4 3.186515029374982e+02 2.972673673245214e+02 5.333853552292791e+01 -1.016164025959578e+02 - ME 1.623439923565115e-04 + ME 1.487896902219418e-05 Event 75 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2652,7 +2652,7 @@ Event 75 Batch 1 2 3.943316317993887e+02 5.588489849751632e+01 -2.552251009651266e+02 -2.953548066221912e+02 3 5.467466262348042e+02 -3.021648543602057e+02 -2.377479281839000e+02 3.887212326756534e+02 4 5.589217419658066e+02 2.462799558626894e+02 4.929730291490265e+02 -9.336642605346221e+01 - ME 1.348649436679123e-04 + ME 4.632408498797698e-05 Event 76 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2660,7 +2660,7 @@ Event 76 Batch 1 2 5.517772830004059e+02 2.282681125856672e+02 -4.885490190451381e+02 -1.169260227747471e+02 3 4.245403880864563e+02 -2.793100283061228e+02 1.521744876196477e+02 -2.811821020654221e+02 4 5.236823289131380e+02 5.104191572045557e+01 3.363745314254903e+02 3.981081248401691e+02 - ME 5.074216551061466e-05 + ME 1.645260485784409e-05 Event 77 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2668,7 +2668,7 @@ Event 77 Batch 1 2 3.781543446472003e+02 -5.926925448310480e+01 -1.775497893613220e+02 3.285786605157444e+02 3 6.702964816234122e+02 -6.066564226432872e+01 -1.057468051743550e+02 -6.591165802199176e+02 4 4.515491737293867e+02 1.199348967474336e+02 2.832965945356770e+02 3.305379197041734e+02 - ME 6.321080405055773e-05 + ME 5.041095643414513e-05 Event 78 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2676,7 +2676,7 @@ Event 78 Batch 1 2 4.564262045363139e+02 1.882572856930395e+02 1.751822011208171e+02 -3.770878823051468e+02 3 3.809544602625751e+02 -2.816334489555117e+02 1.992812047321844e+02 -1.615422627793184e+02 4 6.626193352011103e+02 9.337616326247226e+01 -3.744634058530013e+02 5.386301450844651e+02 - ME 2.572921643188974e-04 + ME 6.222463480998997e-05 Event 79 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2684,7 +2684,7 @@ Event 79 Batch 1 2 6.126536521478922e+02 6.075062399138452e+02 -4.178945028651393e+01 6.733726903166659e+01 3 2.872846052831658e+02 -1.084163947926161e+02 2.139961846825774e+01 2.651799127051085e+02 4 6.000617425689430e+02 -4.990898451212283e+02 2.038983181825616e+01 -3.325171817367756e+02 - ME 1.996659951821530e-03 + ME 6.289823950094716e-04 Event 80 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2692,7 +2692,7 @@ Event 80 Batch 1 2 4.171281258707700e+02 -2.756641813219371e+02 1.445082905894664e+01 3.127240094205691e+02 3 3.805235327384960e+02 -2.955852199231463e+02 2.395269588958384e+02 7.373784162959287e+00 4 7.023483413907342e+02 5.712494012450838e+02 -2.539777879547846e+02 -3.200977935835284e+02 - ME 1.297520069620947e-03 + ME 5.629434448779270e-04 Event 81 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2700,7 +2700,7 @@ Event 81 Batch 1 2 7.471091333863935e+02 -9.753029041192970e+01 7.407154559164039e+02 -7.162458282065091e-01 3 6.775352561453885e+02 9.550863422814814e+01 -6.702673865908516e+02 -2.595678293896889e+01 4 7.535561046821789e+01 2.021656183781575e+00 -7.044806932555213e+01 2.667302876717550e+01 - ME 1.022399816924924e-04 + ME 2.904529061551848e-05 Event 82 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2708,7 +2708,7 @@ Event 82 Batch 1 2 4.309094465924175e+02 3.042233433179616e+02 2.799835808203350e+02 -1.214096495919827e+02 3 5.540384887187945e+02 -4.824447657759213e+02 1.988969596446625e+02 1.861335391629672e+02 4 5.150520646887885e+02 1.782214224579596e+02 -4.788805404649973e+02 -6.472388957098450e+01 - ME 1.053635072607165e-04 + ME 1.778678120024833e-05 Event 83 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2716,7 +2716,7 @@ Event 83 Batch 1 2 4.869534474909295e+02 -4.727010820510885e+02 1.062322962656182e+02 4.890855018466118e+01 3 3.520990385354405e+02 -1.437544586613779e+02 -3.142298368411062e+02 6.758696761482639e+01 4 6.609475139736298e+02 6.164555407124665e+02 2.079975405754878e+02 -1.164955177994876e+02 - ME 2.998516055200512e-04 + ME 7.948516811691567e-05 Event 84 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2724,7 +2724,7 @@ Event 84 Batch 1 2 1.391975815431583e+01 -3.682657486111166e-01 -1.138840508663312e+01 -7.995516055627093e+00 3 7.493632094786751e+02 -3.452281541586202e+01 3.833012084573049e+02 6.429880080772211e+02 4 7.367170323670085e+02 3.489108116447313e+01 -3.719128033706718e+02 -6.349924920215940e+02 - ME 3.806217512266510e-01 + ME 8.671177508029917e-02 Event 85 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2732,7 +2732,7 @@ Event 85 Batch 1 2 7.362448947738020e+02 6.409220704967113e+02 3.243429451315054e+02 1.614840505254833e+02 3 1.517836214454495e+02 -1.266859291808411e+02 -6.780846852200752e+01 4.889738933094901e+01 4 6.119714837807480e+02 -5.142361413158706e+02 -2.565344766094980e+02 -2.103814398564324e+02 - ME 5.694785892689211e-04 + ME 1.062305495679385e-04 Event 86 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2740,7 +2740,7 @@ Event 86 Batch 1 2 5.451728369778392e+02 -6.605005893803180e+01 1.066920544886257e+02 -5.305352178712969e+02 3 3.158718592284829e+02 -1.755596039144849e+02 2.550395858012225e+02 6.251932981237656e+01 4 6.389553037936773e+02 2.416096628525165e+02 -3.617316402898481e+02 4.680158880589203e+02 - ME 1.469986179099727e-04 + ME 4.057626974930324e-05 Event 87 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2748,7 +2748,7 @@ Event 87 Batch 1 2 3.414211232216659e+02 1.437256906952883e+02 1.534640422371205e+02 -2.689983214749668e+02 3 5.081668091119999e+02 4.794742948200324e+02 -1.464748766741243e+02 8.296394996143997e+01 4 6.504120676663341e+02 -6.231999855153207e+02 -6.989165562996117e+00 1.860343715135268e+02 - ME 1.823135893899652e-04 + ME 3.656584417835253e-05 Event 88 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2756,7 +2756,7 @@ Event 88 Batch 1 2 2.925516585730864e+02 1.655911293372511e+01 2.598275245766865e+02 -1.334238591297045e+02 3 7.159840369510271e+02 -1.056844973272874e+02 -3.694097043713192e+02 6.041526284885822e+02 4 4.914643044758866e+02 8.912538439356234e+01 1.095821797946327e+02 -4.707287693588777e+02 - ME 8.728488941697977e-02 + ME 2.327745727475104e-03 Event 89 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2764,7 +2764,7 @@ Event 89 Batch 1 2 6.333634651097186e+02 1.209853522660007e+02 5.372166546881791e+02 -3.129058794565919e+02 3 6.221307427802806e+02 5.757192259699385e+01 -4.327483989541182e+02 4.432391657372765e+02 4 2.445057921100010e+02 -1.785572748629945e+02 -1.044682557340609e+02 -1.303332862806847e+02 - ME 5.497507832908574e-04 + ME 5.047204144927262e-05 Event 90 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2772,7 +2772,7 @@ Event 90 Batch 1 2 3.111538587406461e+02 2.628215106651484e+02 -6.985334981761831e+01 -1.512021390726355e+02 3 5.216486323898988e+02 1.252715366480781e+02 4.457714554600226e+02 -2.402335265468457e+02 4 6.671975088694549e+02 -3.880930473132266e+02 -3.759181056424042e+02 3.914356656194811e+02 - ME 2.329075524537458e-04 + ME 4.503542584588689e-05 Event 91 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2780,7 +2780,7 @@ Event 91 Batch 1 2 3.007803348469016e+02 8.390513937949677e+01 2.884042062049404e+02 -1.586667134655829e+01 3 6.256884422056424e+02 2.364580673743878e+02 -3.590826126759745e+02 -4.545693416378727e+02 4 5.735312229474563e+02 -3.203632067538847e+02 7.067840647103421e+01 4.704360129844310e+02 - ME 6.478111274774788e-05 + ME 2.635583378174906e-05 Event 92 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2788,7 +2788,7 @@ Event 92 Batch 1 2 6.843865618656529e+02 -2.264962467301474e+02 -5.909185329480341e+02 2.605757158639088e+02 3 6.645516272550811e+02 3.453347116263074e+02 4.983670680340538e+02 -2.720350487207341e+02 4 1.510618108792659e+02 -1.188384648961601e+02 9.255146491398015e+01 1.145933285682523e+01 - ME 9.365402433981294e-05 + ME 1.711437740567050e-05 Event 93 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2796,7 +2796,7 @@ Event 93 Batch 1 2 5.579763469381434e+02 2.180908585044468e+02 5.135246110359701e+02 8.151996049100932e+00 3 3.333821836060117e+02 1.681122988324202e+02 -1.261705574188212e+02 2.587719570738210e+02 4 6.086414694558448e+02 -3.862031573368670e+02 -3.873540536171486e+02 -2.669239531229223e+02 - ME 5.183695239236329e-04 + ME 1.157787815150910e-04 Event 94 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2804,7 +2804,7 @@ Event 94 Batch 1 2 4.534979734151987e+02 1.139662723650677e+02 2.686183171543304e+01 4.381216071501101e+02 3 3.856184698299744e+02 1.545134372854228e+02 -3.452526490806396e+02 7.501873282757614e+01 4 6.608835567548277e+02 -2.684797096504910e+02 3.183908173652065e+02 -5.131403399776862e+02 - ME 6.944325623628402e-03 + ME 1.545010233607317e-03 Event 95 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2812,7 +2812,7 @@ Event 95 Batch 1 2 2.828073115974175e+02 -5.711637476392460e+01 5.915078172645698e+01 -2.705898746219725e+02 3 6.809618671276158e+02 3.772100991821226e+02 3.247893528880094e+02 4.646864338535512e+02 4 5.362308212749670e+02 -3.200937244181981e+02 -3.839401346144663e+02 -1.940965592315787e+02 - ME 2.560512106670314e-04 + ME 6.408796328924562e-05 Event 96 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2820,7 +2820,7 @@ Event 96 Batch 1 2 4.639832102051440e+02 -4.275497908582962e+02 -1.317248975374901e+02 -1.230046627491649e+02 3 7.474114851375481e+02 6.594176555428718e+02 2.654537688070380e+02 2.309254864669502e+02 4 2.886053046573076e+02 -2.318678646845757e+02 -1.337288712695479e+02 -1.079208237177853e+02 - ME 2.440162169445852e-04 + ME 1.445191791082226e-05 Event 97 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2828,7 +2828,7 @@ Event 97 Batch 1 2 5.095921959312568e+02 3.190102848863560e+02 3.100341192456060e+02 2.485869851668986e+02 3 4.555541331018014e+02 -2.788120391899956e+02 2.221549471930723e+02 -2.836205112936887e+02 4 5.348536709669415e+02 -4.019824569636059e+01 -5.321890664386783e+02 3.503352612679014e+01 - ME 8.198891770965733e-05 + ME 2.250661525403011e-05 Event 98 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2836,7 +2836,7 @@ Event 98 Batch 1 2 5.299941952467790e+02 -2.570048161992350e+02 -4.630296380940593e+02 -2.111695271961878e+01 3 7.352146396921255e+02 2.361229278157243e+02 6.962552486063584e+02 3.893348873424185e+00 4 2.347911650610957e+02 2.088188838351074e+01 -2.332256105122990e+02 1.722360384619465e+01 - ME 6.760444392591968e-05 + ME 5.654417419793765e-06 Event 99 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2844,7 +2844,7 @@ Event 99 Batch 1 2 4.290897291078425e+02 3.747236205606835e+02 2.040795775432686e+02 -4.529602465443949e+01 3 6.438744429739487e+02 -5.215755139094103e+02 2.133414139578182e+01 3.769325350988583e+02 4 4.270358279182090e+02 1.468518933487271e+02 -2.254137189390505e+02 -3.316365104444187e+02 - ME 2.024851967866169e-03 + ME 8.457850707842401e-05 Event 100 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2852,7 +2852,7 @@ Event 100 Batch 1 2 5.119062275524872e+02 -4.721600394809319e+02 -1.845880136125884e+02 7.099400083769524e+01 3 4.523854579707449e+02 2.836789572262426e+02 -3.060214184981774e+02 -1.747276258374610e+02 4 5.357083144767672e+02 1.884810822546894e+02 4.906094321107658e+02 1.037336249997658e+02 - ME 6.898305006855298e-05 + ME 1.420495101373495e-05 Event 101 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2860,7 +2860,7 @@ Event 101 Batch 1 2 6.024072815192737e+02 -3.080418730730875e+02 -4.692284526425155e+02 2.186993289696520e+02 3 3.347434020484399e+02 8.940653726951260e+01 -3.939923552329941e+01 -3.201676381969582e+02 4 5.628493164322859e+02 2.186353358035749e+02 5.086276881658150e+02 1.014683092273061e+02 - ME 9.290725627447436e-05 + ME 2.743452031293993e-05 Event 102 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2868,7 +2868,7 @@ Event 102 Batch 1 2 5.910857738801296e+02 3.707548039128416e+02 -7.516477307090547e+01 -4.541734518311494e+02 3 2.311218706704979e+02 4.536804143672514e+01 -2.262982016400413e+02 1.217307902336991e+01 4 6.777923554493723e+02 -4.161228453495667e+02 3.014629747109467e+02 4.420003728077793e+02 - ME 2.633339755449651e-04 + ME 7.158169676479796e-05 Event 103 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2876,7 +2876,7 @@ Event 103 Batch 1 2 6.627949406417042e+02 7.189602123685950e+01 -6.391860825813610e+02 -1.599038689489492e+02 3 5.519979886399102e+02 1.442810582977179e+02 4.734454174874869e+02 2.444057944057306e+02 4 2.852070707183856e+02 -2.161770795345774e+02 1.657406650938741e+02 -8.450192545678139e+01 - ME 1.652798222861839e-04 + ME 1.658567428345252e-05 Event 104 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2884,7 +2884,7 @@ Event 104 Batch 1 2 4.368180791462563e+02 -3.483499330357901e+02 -2.596280064690262e+02 4.533935023690698e+01 3 4.635715977792429e+02 1.873023362819025e+02 -2.251347602994603e+02 -3.593477435519053e+02 4 5.996103230745010e+02 1.610475967538876e+02 4.847627667684865e+02 3.140083933149983e+02 - ME 9.158171748371188e-05 + ME 2.162124469235967e-05 Event 105 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2892,7 +2892,7 @@ Event 105 Batch 1 2 5.701708357490469e+02 2.288495716262106e+02 -4.521314661478370e+02 -2.613422905391967e+02 3 3.711008490497917e+02 -3.362590561223710e+02 -8.126001400906793e+01 1.343223639771668e+02 4 5.587283152011612e+02 1.074094844961603e+02 5.333914801569049e+02 1.270199265620299e+02 - ME 7.043372303967046e-05 + ME 1.720246557093887e-05 Event 106 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2900,7 +2900,7 @@ Event 106 Batch 1 2 6.775588183099673e+02 5.149765831731705e+02 3.445381345095063e+02 -2.741870619150275e+02 3 7.044100837534635e+02 -4.546975847980706e+02 -4.392260662935809e+02 3.106833358270535e+02 4 1.180310979365712e+02 -6.027899837509908e+01 9.468793178407486e+01 -3.649627391202603e+01 - ME 3.259673897057837e-04 + ME 2.786544600802367e-05 Event 107 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2908,7 +2908,7 @@ Event 107 Batch 1 2 6.046880513041550e+02 2.289413119004024e+02 -5.349774474143721e+02 -1.644160754103499e+02 3 3.366746442316215e+02 -7.166101576320902e+01 2.452245434825371e+01 3.280444544890399e+02 4 5.586373044642238e+02 -1.572802961371935e+02 5.104549930661184e+02 -1.636283790786902e+02 - ME 8.859556065170558e-04 + ME 4.667002706670146e-04 Event 108 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2916,7 +2916,7 @@ Event 108 Batch 1 2 6.239206451413978e+02 -2.218030564243363e+02 5.011455197099735e+02 -2.982172759400455e+02 3 2.841199272340513e+02 1.209406641294798e+02 7.967327320293104e+01 2.444374323800143e+02 4 5.919594276245514e+02 1.008623922948564e+02 -5.808187929129044e+02 5.377984356003120e+01 - ME 1.727643234936365e-04 + ME 7.961277501126149e-05 Event 109 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2924,7 +2924,7 @@ Event 109 Batch 1 2 3.093404598873124e+02 1.546999830656544e+02 1.629193992247174e+02 2.126421988200774e+02 3 5.287372542258961e+02 -2.136116696975048e+02 -1.865832176193536e+02 4.462284633214169e+02 4 6.619222858867909e+02 5.891168663185049e+01 2.366381839463621e+01 -6.588706621414941e+02 - ME 1.686695657867669e+01 + ME 2.902408960420708e-01 Event 110 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2932,7 +2932,7 @@ Event 110 Batch 1 2 4.920948406187608e+02 -8.595212543403569e+01 -4.824913009925944e+02 -4.440392734262522e+01 3 4.634042325716594e+02 -2.085760624772916e+00 1.255608851371819e+02 4.460645653843308e+02 4 5.445009268095798e+02 8.803788605880843e+01 3.569304158554124e+02 -4.016606380417056e+02 - ME 4.151412887207382e-03 + ME 1.043536440561108e-03 Event 111 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2940,7 +2940,7 @@ Event 111 Batch 1 2 4.637454700443120e+02 1.543048221589588e+02 -4.372769385391800e+02 6.225902899506631e+00 3 3.246747011850293e+02 -5.128652792678845e+01 -2.274142471268230e+02 2.259781269206006e+02 4 7.115798287706589e+02 -1.030182942321705e+02 6.646911856660031e+02 -2.322040298201072e+02 - ME 1.240833065187375e-03 + ME 5.219332617201280e-04 Event 112 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2948,7 +2948,7 @@ Event 112 Batch 1 2 6.923761777814550e+02 3.939190124845535e+02 4.398224952082178e+01 -5.676954684419625e+02 3 5.277418353503033e+02 -4.270527740856185e+02 4.970714905179168e+01 3.060499505927539e+02 4 2.798819868682421e+02 3.313376160106501e+01 -9.368939857261346e+01 2.616455178492087e+02 - ME 5.385735959435035e-05 + ME 4.381536575941429e-05 Event 113 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2956,7 +2956,7 @@ Event 113 Batch 1 2 7.174898838850694e+02 -6.130145063482008e+02 3.726797356942233e+02 1.071275347265524e+01 3 1.705115822510491e+02 3.993583199494100e+01 -1.624320619120163e+02 3.309311510932528e+01 4 6.119985338638814e+02 5.730786743532599e+02 -2.102476737822071e+02 -4.380586858198049e+01 - ME 2.197559713387976e-04 + ME 4.914674319256647e-05 Event 114 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2964,7 +2964,7 @@ Event 114 Batch 1 2 6.772826088252357e+02 -1.430288042596954e+02 -3.410390118171982e+02 5.674036356844296e+02 3 6.725037798358682e+02 3.626161999767239e+01 2.510744134018114e+02 -6.228226615527174e+02 4 1.502136113388951e+02 1.067671842620232e+02 8.996459841538707e+01 5.541902586828807e+01 - ME 8.926156406775035e-05 + ME 7.986648389935193e-05 Event 115 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2972,7 +2972,7 @@ Event 115 Batch 1 2 9.320551230331124e+01 1.288474310894606e+01 -2.581623869377880e+01 8.862715576190526e+01 3 6.672654287607164e+02 1.525114284892182e+02 2.829200767588875e+02 5.847560574856374e+02 4 7.395290589359720e+02 -1.653961715981643e+02 -2.571038380651088e+02 -6.733832132475428e+02 - ME 1.800237703627863e+00 + ME 4.304938165075599e-01 Event 116 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2980,7 +2980,7 @@ Event 116 Batch 1 2 4.951202926530015e+02 -4.575339943514647e+02 4.220102313368785e+01 1.844608951947751e+02 3 3.101750696753587e+02 -4.711582585559527e+01 2.172188132736168e+02 2.163438466008694e+02 4 6.947046376716394e+02 5.046498202070600e+02 -2.594198364073050e+02 -4.008047417956444e+02 - ME 1.933367100533606e-03 + ME 5.988625984136040e-04 Event 117 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2988,7 +2988,7 @@ Event 117 Batch 1 2 6.543248494478489e+02 1.390926466871539e+02 9.107024539473488e+01 6.328510524967589e+02 3 5.040443237953712e+02 6.874740772121054e+01 1.336336536624387e+02 -4.811200690999848e+02 4 3.416308267567792e+02 -2.078400544083643e+02 -2.247038990571737e+02 -1.517309833967742e+02 - ME 4.207453923038474e-04 + ME 3.026560085299302e-04 Event 118 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2996,7 +2996,7 @@ Event 118 Batch 1 2 5.829230400014206e+02 5.307803371482089e+02 -3.192285892796672e+01 2.388565162167381e+02 3 3.965113090906140e+02 -5.470249758902820e+01 2.256187790844517e+02 -3.214420966810604e+02 4 5.205656509079653e+02 -4.760778395591807e+02 -1.936959201564850e+02 8.258558046432242e+01 - ME 7.464562943747175e-05 + ME 2.168340782914014e-05 Event 119 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3004,7 +3004,7 @@ Event 119 Batch 1 2 3.549567073991255e+02 2.281637891139605e+02 1.474502150787006e+02 2.284600261271838e+02 3 4.727085372220640e+02 7.463684946128350e+01 -3.092948822053327e+02 3.495988811576870e+02 4 6.723347553788102e+02 -3.028006385752440e+02 1.618446671266322e+02 -5.780589072848707e+02 - ME 1.455012849105755e-02 + ME 1.664672733965846e-03 Event 120 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3012,7 +3012,7 @@ Event 120 Batch 1 2 7.192117275853698e+02 4.094232477570927e+02 -5.552624156333899e+02 -2.032775518283800e+02 3 3.685061529232585e+02 -2.522084621786424e+02 1.741347663658646e+02 2.046087962197375e+02 4 4.122821194913712e+02 -1.572147855784500e+02 3.811276492675253e+02 -1.331244391357209e+00 - ME 9.281995463485567e-05 + ME 1.900262756274459e-05 Event 121 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3020,7 +3020,7 @@ Event 121 Batch 1 2 1.923953846467517e+02 -5.182078839520096e+01 -1.486351786617837e+02 -1.106262789198433e+02 3 6.582127150877787e+02 -3.509182841037630e+02 -1.191939510078701e+02 5.439606035624541e+02 4 6.493919002654695e+02 4.027390724989639e+02 2.678291296696539e+02 -4.333343246426108e+02 - ME 1.925188892577692e-03 + ME 5.360055113881300e-04 Event 122 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3028,7 +3028,7 @@ Event 122 Batch 1 2 6.905732817636248e+02 3.462508192534570e+02 -5.375670569609784e+02 -2.608131264380775e+02 3 7.097575386120018e+02 -2.677396278645660e+02 5.849221766424142e+02 2.998954860604125e+02 4 9.966917962437387e+01 -7.851119138889094e+01 -4.735511968143584e+01 -3.908235962233509e+01 - ME 5.007312135859238e-04 + ME 3.451011759976180e-05 Event 123 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3036,7 +3036,7 @@ Event 123 Batch 1 2 4.035126033432560e+02 2.481103298242076e+01 -3.878573016343356e+02 -1.085059780294573e+02 3 3.541388771651666e+02 1.572344474048876e+02 -3.105653677404273e+02 -6.512161875550808e+01 4 7.423485194915780e+02 -1.820454803873083e+02 6.984226693747627e+02 1.736275967849660e+02 - ME 2.043564129780385e-02 + ME 3.471230489499830e-03 Event 124 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3044,7 +3044,7 @@ Event 124 Batch 1 2 5.353042728143347e+02 -4.785252055946481e+02 -2.279396245170433e+02 7.488537693644093e+01 3 7.454081943698113e+02 6.785307544150930e+02 3.069354144183444e+02 -3.193811081429426e+01 4 2.192875328158541e+02 -2.000055488204448e+02 -7.899578990130104e+01 -4.294726612214667e+01 - ME 1.399009675490331e-04 + ME 6.765427234678898e-06 Event 125 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3052,7 +3052,7 @@ Event 125 Batch 1 2 7.351681880566981e+02 -1.932492970253984e+01 -4.393064933429818e+02 -5.891592456452273e+02 3 6.537497908129355e+02 -2.883189353576726e+01 3.454898907503182e+02 5.542510679217788e+02 4 1.110820211303664e+02 4.815682323830688e+01 9.381660259266363e+01 3.490817772344844e+01 - ME 1.431077255619906e-04 + ME 6.639428548470109e-05 Event 126 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3060,7 +3060,7 @@ Event 126 Batch 1 2 5.568747108147126e+02 1.149185667256990e+02 4.264979152236775e+02 -3.391204725116689e+02 3 6.934211462641822e+02 -1.939160042589616e+02 -6.294239612595663e+02 2.169215212257340e+02 4 2.497041429211053e+02 7.899743753326281e+01 2.029260460358889e+02 1.221989512859350e+02 - ME 3.344185566612618e-05 + ME 9.143592130512915e-06 Event 127 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3068,7 +3068,7 @@ Event 127 Batch 1 2 7.108931196972316e+02 4.270547743949553e+02 5.664613189451065e+02 -4.598718776252147e+01 3 4.445675167124290e+02 -1.247884466860518e+02 -4.129475031266345e+02 1.074359351009545e+02 4 3.445393635903407e+02 -3.022663277089035e+02 -1.535138158184720e+02 -6.144874733843321e+01 - ME 1.180920695556687e-04 + ME 1.427738327825488e-05 Event 128 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3076,7 +3076,7 @@ Event 128 Batch 1 2 5.312407894292422e+02 -7.192118124205533e+01 -4.398126160332176e+02 -2.891521793453568e+02 3 5.717192413787027e+02 3.434745903572437e+02 1.811915566412192e+02 4.195923218357252e+02 4 3.970399691920551e+02 -2.715534091151883e+02 2.586210593919984e+02 -1.304401424903685e+02 - ME 1.848006274423395e-04 + ME 3.532660248239223e-05 Event 129 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3084,7 +3084,7 @@ Event 129 Batch 1 2 6.644129951428383e+02 -3.595672586482287e+02 4.645590915434784e+02 3.103882489514914e+02 3 1.967652372382455e+02 -5.204943416929049e+01 8.794498000645085e+00 -1.895522930301724e+02 4 6.388217676189169e+02 4.116166928175192e+02 -4.733535895441232e+02 -1.208359559213191e+02 - ME 3.082956717278722e-04 + ME 9.192558188476414e-05 Event 130 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3092,7 +3092,7 @@ Event 130 Batch 1 2 7.302263990443511e+02 -1.919590472356484e+02 3.836584700935805e+02 -5.909217345563752e+02 3 4.156541164903923e+02 2.203243106780774e+02 -1.767969453775071e+02 3.049071707664833e+02 4 3.541194844652567e+02 -2.836526344242890e+01 -2.068615247160734e+02 2.860145637898919e+02 - ME 3.110012368642411e-05 + ME 2.258971422042701e-05 Event 131 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3100,7 +3100,7 @@ Event 131 Batch 1 2 2.308323688168238e+02 -1.780469473698228e+02 1.469011263880862e+02 1.710582294195638e+00 3 7.308075033948297e+02 5.219262643529272e+02 -3.840435213624620e+02 3.379099810545737e+02 4 5.383601277883465e+02 -3.438793169831044e+02 2.371423949743758e+02 -3.396205633487694e+02 - ME 1.061667055612532e-03 + ME 7.770640764079256e-05 Event 132 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3108,7 +3108,7 @@ Event 132 Batch 1 2 5.909630762789660e+02 -4.293852116769707e+02 -3.988922148105424e+02 7.583335995300355e+01 3 5.415993952096327e+02 2.260703809971038e+02 3.221145619770360e+02 -3.721079100067703e+02 4 3.674375285114020e+02 2.033148306798666e+02 7.677765283350686e+01 2.962745500537670e+02 - ME 3.321676569401813e-05 + ME 1.628447412544396e-05 Event 133 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3116,7 +3116,7 @@ Event 133 Batch 1 2 4.506052863582997e+02 2.189991325227701e+02 -3.914006430783634e+02 -4.347459771134355e+01 3 4.043998006859111e+02 3.160348074769272e+02 8.738893432792010e+01 2.366946839598570e+02 4 6.449949129557901e+02 -5.350339399996973e+02 3.040117087504433e+02 -1.932200862485142e+02 - ME 3.121497332919934e-04 + ME 8.705579101282482e-05 Event 134 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3124,7 +3124,7 @@ Event 134 Batch 1 2 7.151470882937614e+02 -1.041377497037516e+01 -4.186394096729767e+01 7.138447461686595e+02 3 3.416424731356660e+02 1.638631808685801e+02 3.081581136487586e+01 -2.981925940995343e+02 4 4.432104385705719e+02 -1.534494058982047e+02 1.104812960242199e+01 -4.156521520691248e+02 - ME 5.534325530265236e-02 + ME 6.342792451335309e-03 Event 135 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3132,7 +3132,7 @@ Event 135 Batch 1 2 7.115730144432832e+02 -3.219296530898238e+02 2.184242454110169e+02 -5.958089478700319e+02 3 1.627059459894212e+02 -6.880794311551747e+01 -3.259803939022061e+01 1.437917231708342e+02 4 6.257210395672955e+02 3.907375962053413e+02 -1.858262060207963e+02 4.520172246991979e+02 - ME 2.112989182930814e-04 + ME 1.277979532321233e-04 Event 136 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3140,7 +3140,7 @@ Event 136 Batch 1 2 7.195404287114588e+02 -4.369992732083461e+02 -4.270318019286997e+02 3.800182941743402e+02 3 6.668605996318223e+02 3.634158794560479e+02 4.690430049045651e+02 -3.043527845290675e+02 4 1.135989716567186e+02 7.358339375229815e+01 -4.201120297586535e+01 -7.566550964527264e+01 - ME 1.804344388349211e-03 + ME 7.515399240093053e-05 Event 137 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3148,7 +3148,7 @@ Event 137 Batch 1 2 6.722782806744999e+02 -6.045581260407005e+02 -2.538460778300668e+02 1.484241478840623e+02 3 6.869263774705689e+02 6.661257235671316e+02 1.481819739565761e+02 -7.865412297735662e+01 4 1.407953418549304e+02 -6.156759752643097e+01 1.056641038734908e+02 -6.977002490670534e+01 - ME 5.192812231664224e-04 + ME 2.119149330726453e-05 Event 138 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3156,7 +3156,7 @@ Event 138 Batch 1 2 6.463287544295633e+02 8.684709774942756e+01 2.409249839962013e+02 -5.934253049048401e+02 3 3.917330799270068e+02 1.767690441671677e+02 4.696120064017492e+01 3.464132742372293e+02 4 4.619381656434300e+02 -2.636161419165952e+02 -2.878861846363762e+02 2.470120306676108e+02 - ME 5.804753959762886e-05 + ME 4.203806696206548e-05 Event 139 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3164,7 +3164,7 @@ Event 139 Batch 1 2 2.994802063237944e+02 -1.272876183039153e+02 6.552211336810879e+00 2.710042891410713e+02 3 7.257546970836092e+02 -8.848613612326799e+00 5.127896146768584e+00 -7.256826352181574e+02 4 4.747650965925943e+02 1.361362319162416e+02 -1.168010748357900e+01 4.546783460770868e+02 - ME 1.724196014694060e-04 + ME 1.500396153249019e-04 Event 140 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3172,7 +3172,7 @@ Event 140 Batch 1 2 7.326756101999780e+02 5.655005379385240e+02 4.343799907428446e+02 1.683351270988810e+02 3 7.428339005597779e+02 -5.680473426214219e+02 -4.534832054058505e+02 -1.532233754243464e+02 4 2.449048924024402e+01 2.546804682897962e+00 1.910321466300584e+01 -1.511175167453447e+01 - ME 4.669436438173466e-03 + ME 1.024603362434272e-04 Event 141 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3180,7 +3180,7 @@ Event 141 Batch 1 2 7.363238871411332e+02 -6.772722174663238e+02 -2.824373475598683e+02 -6.086341204880675e+01 3 5.504260535970963e+02 4.650298533191528e+02 2.914345410616540e+02 4.221355560271704e+01 4 2.132500592617708e+02 2.122423641471711e+02 -8.997193501785816e+00 1.864985644608987e+01 - ME 7.300791864660033e-05 + ME 1.166401869382226e-05 Event 142 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3188,7 +3188,7 @@ Event 142 Batch 1 2 5.862280565156834e+02 4.248793793115829e+01 -2.479279504752411e+02 -5.295184989682986e+02 3 4.287264749982929e+02 -3.025296967755320e+02 2.785471849307642e+02 1.212173201341831e+02 4 4.850454684860405e+02 2.600417588443628e+02 -3.061923445551928e+01 4.083011788341197e+02 - ME 4.569028399965169e-05 + ME 1.949810022878841e-05 Event 143 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3196,7 +3196,7 @@ Event 143 Batch 1 2 2.464531733710510e+02 4.046044690030688e+01 -2.103865804466287e+02 1.218179201483223e+02 3 5.378449948854583e+02 4.607829603950880e+02 -2.747641700963839e+02 3.822241180409925e+01 4 7.157018317434903e+02 -5.012434072953949e+02 4.851507505430126e+02 -1.600403319524219e+02 - ME 1.284493741497843e-03 + ME 4.863434295951330e-04 Event 144 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3204,7 +3204,7 @@ Event 144 Batch 1 2 5.367418008803521e+02 -1.343004856786532e+02 -4.048537736989352e+02 -3.258044847458254e+02 3 6.294877130859599e+02 3.313530054622211e+02 5.282137272543231e+02 8.631468610520756e+01 4 3.337704860336884e+02 -1.970525197835678e+02 -1.233599535553879e+02 2.394897986406179e+02 - ME 2.612855607885159e-05 + ME 8.754930746282009e-06 Event 145 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3212,7 +3212,7 @@ Event 145 Batch 1 2 6.805380148481771e+01 -3.411514819754512e+01 -4.339750646760406e+01 -3.980116822894492e+01 3 6.831461500979880e+02 -3.834019790669201e+02 -2.756424954453614e+02 -4.936727656514237e+02 4 7.488000484171945e+02 4.175171272644653e+02 3.190400019129655e+02 5.334739338803686e+02 - ME 4.832444287218038e-01 + ME 4.117012994651258e-01 Event 146 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3220,7 +3220,7 @@ Event 146 Batch 1 2 5.031746658797123e+02 4.202301876294930e+02 2.767377273314875e+02 2.750283520766640e+00 3 4.317115817339341e+02 -1.098088257924671e+02 -5.455162180567243e+01 4.139336083717602e+02 4 5.651137523863538e+02 -3.104213618370259e+02 -2.221861055258150e+02 -4.166838918925268e+02 - ME 4.446377084117306e-03 + ME 1.122040831263755e-03 Event 147 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3228,7 +3228,7 @@ Event 147 Batch 1 2 4.251223043705630e+02 -4.223502783198938e+02 -4.694338569631599e+01 1.206377286808446e+01 3 5.457819748703678e+02 2.791608945230574e+02 -4.384138579515959e+02 -1.665546403390879e+02 4 5.290957207590696e+02 1.431893837968364e+02 4.853572436479118e+02 1.544908674710035e+02 - ME 5.820013407126093e-05 + ME 1.117959404473985e-05 Event 148 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3236,7 +3236,7 @@ Event 148 Batch 1 2 6.905785821272525e+02 6.249608768654489e+02 -6.243387159972350e+01 -2.870970082698929e+02 3 1.361638260920089e+02 2.862044352088506e+01 1.704210379179796e+01 1.320266050727362e+02 4 6.732575917807402e+02 -6.535813203863343e+02 4.539176780792534e+01 1.550704031971573e+02 - ME 9.573948308169230e-04 + ME 5.047601105033982e-04 Event 149 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3244,7 +3244,7 @@ Event 149 Batch 1 2 6.694705528096943e+02 -5.216497821741067e+02 -3.785079074709545e+02 1.811189935345937e+02 3 2.821401257551277e+02 1.148500354702071e-01 2.786662494166578e+02 -4.413795199872407e+01 4 5.483893214351779e+02 5.215349321386365e+02 9.984165805429673e+01 -1.369810415358697e+02 - ME 1.943324414096923e-04 + ME 3.486097449584098e-05 Event 150 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3252,7 +3252,7 @@ Event 150 Batch 1 2 4.637486188995366e+02 -4.033412855298819e+02 -2.279949807412008e+02 -1.992178895453991e+01 3 3.756800751656199e+02 6.230662615514293e+01 -2.632310737913946e+02 -2.606967683041707e+02 4 6.605713059348438e+02 3.410346593747391e+02 4.912260545325952e+02 2.806185572587107e+02 - ME 2.156945366470290e-04 + ME 4.211370643652993e-05 Event 151 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3260,7 +3260,7 @@ Event 151 Batch 1 2 3.821954355913596e+02 -2.528320044280690e+02 2.861764538722267e+02 1.588602445142563e+01 3 6.796189325418250e+02 2.911670128135291e+02 -4.900375979142738e+02 3.700902818893582e+02 4 4.381856318668152e+02 -3.833500838546018e+01 2.038611440420471e+02 -3.859763063407838e+02 - ME 8.197229841786387e-03 + ME 1.923941526207248e-04 Event 152 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3268,7 +3268,7 @@ Event 152 Batch 1 2 6.751133298339792e+02 -2.999578895043981e+02 -2.855974213275218e+02 -5.331391803034741e+02 3 4.976977783498468e+02 -3.003988119418482e+00 1.843802943840355e+02 4.622747685874795e+02 4 3.271888918161745e+02 3.029618776238166e+02 1.012171269434863e+02 7.086441171599445e+01 - ME 1.204579535049519e-04 + ME 6.977738125195056e-05 Event 153 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3276,7 +3276,7 @@ Event 153 Batch 1 2 1.729293620257127e+02 1.558357805102956e+02 -7.193392860849491e+01 2.110174585940510e+01 3 6.524550819255464e+02 2.410158908712478e+02 5.786677971610501e+02 1.809766692333240e+02 4 6.746155560487412e+02 -3.968516713815435e+02 -5.067338685525552e+02 -2.020784150927291e+02 - ME 5.985591428637023e-04 + ME 1.391654510317005e-04 Event 154 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3284,7 +3284,7 @@ Event 154 Batch 1 2 6.585658455851002e+02 -2.410305357139302e+02 -2.116446673272157e+02 -5.751693564652295e+02 3 5.764400833248005e+02 3.388133979948972e+02 3.092747322371399e+02 3.490527051926400e+02 4 2.649940710900988e+02 -9.778286228096688e+01 -9.763006490992416e+01 2.261166512725894e+02 - ME 3.655181799213059e-05 + ME 2.686434432328395e-05 Event 155 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3292,7 +3292,7 @@ Event 155 Batch 1 2 5.686586231936359e+02 -1.693366246265498e+02 -1.542203680657918e+02 5.204938187588979e+02 3 1.882190564276536e+02 -1.089234770645493e+02 -9.145416397064866e+01 1.232810822434430e+02 4 7.431223203787102e+02 2.782601016910992e+02 2.456745320364404e+02 -6.437749010023409e+02 - ME 6.696396361607482e-01 + ME 4.701119881405690e-01 Event 156 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3300,7 +3300,7 @@ Event 156 Batch 1 2 6.143652095725128e+02 2.879464601546110e+02 5.379391909976823e+02 -7.178351904348040e+01 3 6.287751645293085e+02 -4.584164185734781e+02 -4.225140875260598e+02 -8.181956094447702e+01 4 2.568596258981782e+02 1.704699584188668e+02 -1.154251034716223e+02 1.536030799879581e+02 - ME 2.899571701789112e-05 + ME 7.769660148731367e-06 Event 157 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3308,7 +3308,7 @@ Event 157 Batch 1 2 5.050842109798973e+02 4.185498850973046e+02 -1.305174306570672e+02 -2.507812875014723e+02 3 5.170424494038050e+02 -3.084595065654854e+02 3.930456446728388e+02 -1.330441599566699e+02 4 4.778733396162975e+02 -1.100903785318191e+02 -2.625282140157716e+02 3.838254474581424e+02 - ME 4.033251359625283e-05 + ME 1.243977993100618e-05 Event 158 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3316,7 +3316,7 @@ Event 158 Batch 1 2 4.312542366204098e+02 -3.114503370626313e+02 2.737030704635235e+02 1.185982013584742e+02 3 6.944315393047829e+02 2.166643175309468e+02 -6.173965008138002e+02 -2.326226495269423e+02 4 3.743142240748070e+02 9.478601953168439e+01 3.436934303502764e+02 1.140244481684682e+02 - ME 3.680357310121394e-05 + ME 5.864250821924803e-06 Event 159 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3324,7 +3324,7 @@ Event 159 Batch 1 2 5.860112473308646e+02 -1.581297551692178e+02 4.935632758462007e+02 2.734948907463652e+02 3 3.772013313646349e+02 -2.371132827856262e+02 -1.305099443644436e+02 -2.627266448837395e+02 4 5.367874213045002e+02 3.952430379548442e+02 -3.630533314817573e+02 -1.076824586262577e+01 - ME 1.030382455754272e-04 + ME 2.805189658646002e-05 Event 160 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3332,7 +3332,7 @@ Event 160 Batch 1 2 5.883409724804535e+02 -3.739819298758817e+02 -2.887651121595530e+02 3.505671490956299e+02 3 4.300332553173178e+02 1.788055146224819e+02 3.829208006453583e+02 7.955406370837679e+01 4 4.816257722022287e+02 1.951764152533999e+02 -9.415568848580530e+01 -4.301212128040066e+02 - ME 9.797271586219467e-03 + ME 2.307516153071828e-04 Event 161 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3340,7 +3340,7 @@ Event 161 Batch 1 2 6.868305165969147e+02 4.119610488151656e+00 5.515184990814985e+02 4.093244831537709e+02 3 3.260821955312833e+02 -1.956999890649130e+02 -2.483451099187458e+02 -7.972338993006402e+01 4 4.870872878718022e+02 1.915803785767614e+02 -3.031733891627526e+02 -3.296010932237070e+02 - ME 1.075603053132144e-03 + ME 9.860610555787331e-05 Event 162 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3348,7 +3348,7 @@ Event 162 Batch 1 2 2.159818802305119e+02 -2.018126805027919e+02 4.096951387107715e+01 -6.512536763314942e+01 3 6.870078865581224e+02 4.896730732821633e+02 -2.356527215298929e+02 -4.203188222421333e+02 4 5.970102332113654e+02 -2.878603927793715e+02 1.946832076588156e+02 4.854441898752826e+02 - ME 5.344822454174306e-05 + ME 2.809071549115161e-05 Event 163 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3356,7 +3356,7 @@ Event 163 Batch 1 2 4.889699854403287e+02 -4.067839821807834e+01 -2.740835242435768e+02 4.028835269878222e+02 3 4.282392920294498e+02 4.007468150560176e+02 -8.832740907173851e+01 -1.224301852772270e+02 4 5.827907225302220e+02 -3.600684168379390e+02 3.624109333153153e+02 -2.804533417105952e+02 - ME 4.336231422638298e-04 + ME 1.173701793303044e-04 Event 164 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3364,7 +3364,7 @@ Event 164 Batch 1 2 6.224346677404150e+02 -1.282049393554146e+02 5.480608628970117e+02 -2.657399098565701e+02 3 7.444531740822750e+02 1.794330131141779e+02 -6.708967511266460e+02 2.681638893170603e+02 4 1.331121581773107e+02 -5.122807375876333e+01 1.228358882296343e+02 -2.423979460490191e+00 - ME 1.368953177788070e-04 + ME 1.571413941583783e-05 Event 165 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3372,7 +3372,7 @@ Event 165 Batch 1 2 6.980339706506675e+02 -5.154669325341684e+01 -4.947847840614098e+02 4.896757907618869e+02 3 1.362964882116331e+02 4.252532371924361e+01 -5.641238783031591e+01 -1.165588780002596e+02 4 6.656695411377010e+02 9.021369534174053e+00 5.511971718917263e+02 -3.731169127616273e+02 - ME 1.450267418906797e-03 + ME 4.238311927693088e-04 Event 166 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3380,7 +3380,7 @@ Event 166 Batch 1 2 3.060640747281171e+02 -1.981167412190918e+02 -9.095380261170779e+01 -2.148310510107333e+02 3 5.580104478575086e+02 -3.585720992432471e+02 -1.558095186186280e+02 3.981521109704927e+02 4 6.359254774143739e+02 5.566888404623389e+02 2.467633212303362e+02 -1.833210599597597e+02 - ME 3.000804338470548e-04 + ME 1.099447007687216e-04 Event 167 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3388,7 +3388,7 @@ Event 167 Batch 1 2 2.833153623322893e+02 2.526850217013923e+02 8.687924899084067e+01 9.417998957332070e+01 3 6.595685044563415e+02 -8.780626893611850e+01 -2.875856231737449e+02 -5.870393347553995e+02 4 5.571161332113688e+02 -1.648787527652738e+02 2.007063741829043e+02 4.928593451820789e+02 - ME 7.367447958524992e-05 + ME 4.244421486768831e-05 Event 168 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3396,7 +3396,7 @@ Event 168 Batch 1 2 6.026267479353969e+02 -5.987968578530475e+02 5.775180228477150e+00 6.758674164241529e+01 3 4.991211680715713e+02 3.812575567959843e+02 3.220701575873951e+02 -5.952259631185711e+00 4 3.982520839930309e+02 2.175393010570631e+02 -3.278453378158730e+02 -6.163448201122968e+01 - ME 9.606399998327532e-05 + ME 1.203107058680061e-05 Event 169 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3404,7 +3404,7 @@ Event 169 Batch 1 2 5.510662376679772e+02 -9.251111075413947e+01 -5.291920243323356e+02 -1.227660134875281e+02 3 5.034535790022877e+02 -2.816014265681677e+02 3.283802195198170e+02 2.575511098657944e+02 4 4.454801833297348e+02 3.741125373223072e+02 2.008118048125185e+02 -1.347850963782663e+02 - ME 1.532484123791625e-04 + ME 2.085195230877358e-05 Event 170 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3412,7 +3412,7 @@ Event 170 Batch 1 2 2.814808559369750e+02 3.658097943502287e+01 -1.412301634042880e+02 -2.407225480659935e+02 3 6.646522150540470e+02 2.753499086551696e+02 -1.631412967142655e+02 5.825203104495404e+02 4 5.538669290089779e+02 -3.119308880901926e+02 3.043714601185535e+02 -3.417977623835468e+02 - ME 7.823510217753851e-04 + ME 2.587160315460459e-04 Event 171 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3420,7 +3420,7 @@ Event 171 Batch 1 2 1.777965289077954e+02 -6.143496808852239e+01 -1.603735842336773e+00 1.668375809551635e+02 3 7.439290290569696e+02 2.163074211412066e+01 -1.907051550939623e+01 -7.433699124308462e+02 4 5.782744420352348e+02 3.980422597440174e+01 2.067425135173305e+01 5.765323314756826e+02 - ME 2.063755640794395e-03 + ME 1.981167274383509e-03 Event 172 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3428,7 +3428,7 @@ Event 172 Batch 1 2 1.369499454750680e+02 -1.250080331667568e+01 -3.518152151649629e+01 -1.317622025690455e+02 3 6.692885586315896e+02 -2.346283187163472e+02 -6.130705295376303e+02 1.305421486874673e+02 4 6.937614958933425e+02 2.471291220330227e+02 6.482520510541266e+02 1.220053881578238e+00 - ME 5.039586079692636e-04 + ME 1.548169060571347e-04 Event 173 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3436,7 +3436,7 @@ Event 173 Batch 1 2 7.088772083623137e+02 4.973951266878932e+01 3.171232495758680e+01 -7.064185769505260e+02 3 5.785136264307895e+02 8.584813303397833e+01 5.766505028397120e+01 5.691949191590089e+02 4 2.126091652068944e+02 -1.355876457027672e+02 -8.937737524155732e+01 1.372236577915166e+02 - ME 1.743760900867476e-04 + ME 1.732961413682620e-04 Event 174 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3444,7 +3444,7 @@ Event 174 Batch 1 2 4.367208701713482e+02 -3.923163287174704e+01 4.325755195957351e+02 -4.543585887727652e+01 3 3.528978856725088e+02 9.622572295106905e+01 1.987077746703234e+02 -2.753048278549415e+02 4 7.103812441561454e+02 -5.699409007932221e+01 -6.312832942660567e+02 3.207406867322186e+02 - ME 9.353677491192390e-04 + ME 1.541208918572365e-04 Event 175 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3452,7 +3452,7 @@ Event 175 Batch 1 2 6.418562164876806e+02 1.962785648722137e+02 -6.110736372974047e+02 -6.567908015856712e+00 3 4.843421844702149e+02 -1.886631806266161e+02 3.569879071908527e+02 -2.674942804112337e+02 4 3.738015990421035e+02 -7.615384245597569e+00 2.540857301065516e+02 2.740621884270906e+02 - ME 3.029111560812189e-05 + ME 1.279055979705581e-05 Event 176 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3460,7 +3460,7 @@ Event 176 Batch 1 2 6.288652703123263e+02 4.005522031116294e+02 3.691482793515075e+02 3.142594606996526e+02 3 7.209127580467475e+02 -4.124575135572966e+02 -5.165298058232565e+02 -2.877341896975221e+02 4 1.502219716409257e+02 1.190531044566666e+01 1.473815264717492e+02 -2.652527100213051e+01 - ME 1.719274466020296e-04 + ME 1.300720357566141e-05 Event 177 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3468,7 +3468,7 @@ Event 177 Batch 1 2 4.716578040000077e+02 -4.521622645932388e+02 -1.012739918234145e+01 1.338200520767543e+02 3 3.021382980750606e+02 -2.714821202364266e+02 6.773215888881064e+01 -1.140059832109250e+02 4 7.262038979249317e+02 7.236443848296653e+02 -5.760475970646905e+01 -1.981406886582933e+01 - ME 2.354271252348000e-03 + ME 6.442260552556652e-04 Event 178 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3476,7 +3476,7 @@ Event 178 Batch 1 2 7.350088877399502e+02 -3.684484945749095e+02 -2.561732769425163e+02 -5.821159885132296e+02 3 1.415495174310248e+02 7.181268644032879e+01 1.095010133995263e+02 5.374692563910759e+01 4 6.234415948290248e+02 2.966358081345808e+02 1.466722635429900e+02 5.283690628741219e+02 - ME 1.035408980291912e-04 + ME 6.828487731379645e-05 Event 179 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3484,7 +3484,7 @@ Event 179 Batch 1 2 7.426064621425413e+02 6.748632301344054e+01 7.201624948975951e+02 -1.681544967131679e+02 3 5.821031882499326e+02 8.394276920418550e-01 -5.588194474899291e+02 1.629854049874919e+02 4 1.752903496075256e+02 -6.832575070548241e+01 -1.613430474076661e+02 5.169091725675888e+00 - ME 9.197132478706931e-05 + ME 1.412410550503903e-05 Event 180 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3492,7 +3492,7 @@ Event 180 Batch 1 2 6.099515195485484e+02 2.272495331206023e+02 1.762692760011278e+02 -5.378918555193875e+02 3 5.718889655176699e+02 4.324570510796980e+01 -3.278409766521432e+02 4.665909256493895e+02 4 3.181595149337819e+02 -2.704952382285720e+02 1.515717006510154e+02 7.130092986999803e+01 - ME 5.401477812349802e-05 + ME 3.043963963928669e-05 Event 181 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3500,7 +3500,7 @@ Event 181 Batch 1 2 1.206370886915177e+02 -8.151225636567759e+01 1.767749325039422e+01 8.715827822142556e+01 3 6.451493408002739e+02 -6.748216257939080e+01 4.373428479320614e+02 4.694625256943417e+02 4 7.342135705082084e+02 1.489944189450684e+02 -4.550203411824557e+02 -5.566208039157672e+02 - ME 7.131653341377736e-02 + ME 2.625479922313071e-02 Event 182 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3508,7 +3508,7 @@ Event 182 Batch 1 2 4.626866082364760e+02 -3.084610429505738e+02 3.306629079434072e+02 9.794245113140897e+01 3 4.974966719253473e+02 3.582955998671217e+02 1.664640547097976e+02 -3.023523113558579e+02 4 5.398167198381765e+02 -4.983455691654795e+01 -4.971269626532048e+02 2.044098602244489e+02 - ME 5.959042767905828e-05 + ME 1.414799589613471e-05 Event 183 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3516,7 +3516,7 @@ Event 183 Batch 1 2 3.304723045950491e+02 3.244647182058462e+00 3.209425641774955e+02 7.872284845075714e+01 3 4.379804819457451e+02 2.312428523500660e+02 3.131807483468383e+02 2.006775141049615e+02 4 7.315472134592065e+02 -2.344874995321247e+02 -6.341233125243344e+02 -2.794003625557186e+02 - ME 4.899988668912175e-03 + ME 2.330806393221907e-03 Event 184 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3524,7 +3524,7 @@ Event 184 Batch 1 2 7.470051035005908e+02 -4.953964753944513e+02 -4.028924750569613e+02 3.876552725878485e+02 3 2.183325716323390e+02 1.119040172022777e+02 1.451703047217021e+02 -1.186262424448778e+02 4 5.346623248670695e+02 3.834924581921736e+02 2.577221703352594e+02 -2.690290301429710e+02 - ME 5.441344453720516e-04 + ME 7.987999480474686e-05 Event 185 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3532,7 +3532,7 @@ Event 185 Batch 1 2 4.448583927494090e+02 2.810173563272025e+02 -3.384637477435971e+02 6.610995769032235e+01 3 6.236443795626774e+02 -1.690803760724666e+02 5.125139620028374e+02 3.125277225134823e+02 4 4.314972276879136e+02 -1.119369802547359e+02 -1.740502142592404e+02 -3.786376802038046e+02 - ME 6.949230823829164e-03 + ME 1.405605442011058e-04 Event 186 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3540,7 +3540,7 @@ Event 186 Batch 1 2 6.802792190696962e+02 -1.681815241656754e+02 5.427923640013703e+02 3.739936368565512e+02 3 6.331554869749547e+02 3.172201723440435e+02 -4.588808692389625e+02 -2.994755095011972e+02 4 1.865652939553488e+02 -1.490386481783679e+02 -8.391149476240778e+01 -7.451812735535422e+01 - ME 3.276943053321406e-04 + ME 3.045129627255903e-05 Event 187 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3548,7 +3548,7 @@ Event 187 Batch 1 2 7.472897115267965e+02 -6.988402471604775e+02 -2.391684329048669e+02 1.134137672609268e+02 3 6.826908170748527e+02 6.328852277257668e+02 2.212839847556716e+02 -1.286718241709738e+02 4 7.001947139835140e+01 6.595501943471052e+01 1.788444814919547e+01 1.525805691004725e+01 - ME 1.461490870437387e-04 + ME 3.485925693242860e-05 Event 188 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3556,7 +3556,7 @@ Event 188 Batch 1 2 6.496068877140275e+02 -5.024316730938291e+02 -3.980061777252906e+02 -1.055585379310702e+02 3 4.885976180718368e+02 4.424928723138696e+02 1.459942636040002e+02 -1.470148473169288e+02 4 3.617954942141354e+02 5.993880077995960e+01 2.520119141212904e+02 2.525733852479991e+02 - ME 2.843805826594158e-05 + ME 1.006519408431335e-05 Event 189 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3564,7 +3564,7 @@ Event 189 Batch 1 2 4.082379946778654e+02 2.679237131173331e+02 -7.718184435750955e+01 2.981913934867987e+02 3 5.864211573889181e+02 -5.780822197382728e+02 -6.394893886953379e+01 7.497502433004084e+01 4 5.053408479332167e+02 3.101585066209396e+02 1.411307832270433e+02 -3.731664178168398e+02 - ME 1.937644878671120e-03 + ME 1.322787627040098e-04 Event 190 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3572,7 +3572,7 @@ Event 190 Batch 1 2 6.472516823166364e+02 6.463779961822676e+02 -3.289365889632791e+01 6.945035458816692e+00 3 4.318767277050750e+02 -3.286790725415815e+02 -7.183748821760624e+00 -2.800642229191639e+02 4 4.208715899782885e+02 -3.176989236406859e+02 4.007740771808847e+01 2.731191874603472e+02 - ME 3.409584379294133e-05 + ME 1.272332211942340e-05 Event 191 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3580,7 +3580,7 @@ Event 191 Batch 1 2 6.757500036387052e+02 6.222744522021635e+02 -2.261571472854044e+02 1.351499844096745e+02 3 3.644673602666567e+02 -2.020102809038697e+02 1.114149692296405e+02 -2.821613151026251e+02 4 4.597826360946380e+02 -4.202641712982938e+02 1.147421780557637e+02 1.470113306929507e+02 - ME 5.389305783035389e-05 + ME 1.560703181590231e-05 Event 192 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3588,7 +3588,7 @@ Event 192 Batch 1 2 7.394562478491531e+02 -7.307873850878615e+02 3.988568028534699e+01 1.056147375500683e+02 3 8.098058518630978e+01 5.419286926826393e+01 4.244928426361276e+00 -6.002473390399248e+01 4 6.795631669645365e+02 6.765945158195976e+02 -4.413060871170821e+01 -4.559000364607596e+01 - ME 4.204295748489254e-04 + ME 1.231033846344155e-04 Event 193 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3596,7 +3596,7 @@ Event 193 Batch 1 2 5.607395612273153e+02 -3.164229781907934e+02 -3.517992386171808e+02 -3.009030576558548e+02 3 3.741643617741927e+02 -2.156271676189966e+02 1.666697084176705e+02 2.563690747778811e+02 4 5.650960769984922e+02 5.320501458097899e+02 1.851295301995104e+02 4.453398287797368e+01 - ME 9.141090879934244e-05 + ME 3.026844143728605e-05 Event 194 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3604,7 +3604,7 @@ Event 194 Batch 1 2 5.729373416862012e+02 -2.155045544874616e+02 -1.679805246197324e+02 5.035846779262559e+02 3 2.831035485618876e+02 -2.543279085173982e+02 1.042261812492671e+02 -6.783684323208054e+01 4 6.439591097519118e+02 4.698324630048598e+02 6.375434337046515e+01 -4.357478346941756e+02 - ME 1.781231321893996e-03 + ME 5.497724763810379e-04 Event 195 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3612,7 +3612,7 @@ Event 195 Batch 1 2 5.572874060171201e+02 -5.433144409127298e+02 3.646295232533866e+01 1.185290019729285e+02 3 6.765845568040619e+02 5.574999049241243e+02 -1.212989803269169e+01 -3.831623469093195e+02 4 2.661280371788181e+02 -1.418546401139455e+01 -2.433305429264712e+01 2.646333449363910e+02 - ME 3.395618115588225e-04 + ME 3.378534889977447e-04 Event 196 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3620,7 +3620,7 @@ Event 196 Batch 1 2 5.405888343305829e+02 3.940239871950471e+02 -8.826690628749978e+01 -3.594305754554688e+02 3 6.983754392688073e+02 -3.888370902622853e+02 -5.513072771506098e+01 5.774898910559966e+02 4 2.610357264006097e+02 -5.186896932761887e+00 1.433976340025607e+02 -2.180593156005277e+02 - ME 5.539073969003598e-03 + ME 2.676929502290073e-04 Event 197 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3628,7 +3628,7 @@ Event 197 Batch 1 2 2.783346334111661e+02 2.282410890438732e+02 -1.474467226896361e+02 6.029624695020830e+01 3 6.434654504578666e+02 1.172104173128919e+01 6.205939438823057e+02 1.696277097949658e+02 4 5.781999161309674e+02 -2.399621307751624e+02 -4.731472211926695e+02 -2.299239567451741e+02 - ME 3.321087064690878e-04 + ME 4.280180350752636e-05 Event 198 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3636,7 +3636,7 @@ Event 198 Batch 1 2 4.349536439683943e+02 1.774777254208009e+02 -9.709992209949135e+01 3.850427697141142e+02 3 4.134500153047116e+02 7.095914770071803e+01 -4.041194890923881e+02 -5.092301099466194e+01 4 6.515963407268921e+02 -2.484368731215197e+02 5.012194111918782e+02 -3.341197587194521e+02 - ME 7.849443582399766e-04 + ME 2.926862112764983e-04 Event 199 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3644,7 +3644,7 @@ Event 199 Batch 1 2 6.682109290882580e+02 2.136897997740939e+02 -5.035763266519416e+02 3.837361052354048e+02 3 1.424120473397155e+02 8.952788458880865e+01 -4.686863299276860e+01 -1.003458038481504e+02 4 6.893770235720265e+02 -3.032176843629025e+02 5.504449596447103e+02 -2.833903013872543e+02 - ME 1.167594898598604e-03 + ME 4.183851150998592e-04 Event 200 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3652,7 +3652,7 @@ Event 200 Batch 1 2 5.959952693237885e+02 -4.878566955018547e+02 -2.510837703973929e+01 -3.414319479966339e+02 3 4.479637599869168e+02 4.499951041477978e+01 7.146287716862105e+01 4.399313940955211e+02 4 4.560409706892941e+02 4.428571850870749e+02 -4.635450012888173e+01 -9.849944609888662e+01 - ME 5.545496796633981e-04 + ME 3.228844805909175e-04 Event 201 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3660,7 +3660,7 @@ Event 201 Batch 1 2 5.203096708642927e+02 -1.112696379946441e+02 1.367824427202020e+02 4.895219960522141e+02 3 2.871951825199399e+02 -2.582762312778227e+02 1.200876310962787e+02 3.678888524092984e+01 4 6.924951466157675e+02 3.695458692724667e+02 -2.568700738164807e+02 -5.263108812931440e+02 - ME 6.577575910850049e-03 + ME 2.285182473348715e-03 Event 202 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3668,7 +3668,7 @@ Event 202 Batch 1 2 2.158792376054218e+02 2.112389782008981e+01 -7.195062193526132e+01 -2.024369881546198e+02 3 5.463652944256570e+02 2.787950008966254e+02 -3.108926376755554e+02 -3.523267663221479e+02 4 7.377554679689213e+02 -2.999188987167153e+02 3.828432596108168e+02 5.547637544767679e+02 - ME 8.695282964050810e-03 + ME 1.952686275320307e-03 Event 203 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3676,7 +3676,7 @@ Event 203 Batch 1 2 7.124273471334275e+02 4.879265047129839e+02 -1.059167473143779e+02 -5.081949365946950e+02 3 6.746108110440506e+02 -5.248642991835990e+02 4.352799102536777e+01 4.215714978711400e+02 4 1.129618418225217e+02 3.693779447061509e+01 6.238875628901040e+01 8.662343872355494e+01 - ME 5.361938367485652e-05 + ME 4.211918129012132e-05 Event 204 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3684,7 +3684,7 @@ Event 204 Batch 1 2 7.084787759842808e+02 4.992472551829619e+02 -4.528122431715626e+02 -2.183012291454193e+02 3 1.034373169902747e+02 -8.959882065299325e+01 -3.938861547415055e+01 -3.346441176487074e+01 4 6.880839070254444e+02 -4.096484345299685e+02 4.922008586457131e+02 2.517656409102901e+02 - ME 2.988048706021647e-04 + ME 1.033102023766027e-04 Event 205 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3692,7 +3692,7 @@ Event 205 Batch 1 2 6.496569846879349e+02 -5.869603795046561e+02 -2.345911576090251e+02 1.499956646614410e+02 3 2.543878192344406e+02 -1.851019090219859e+00 2.474675926596849e+02 -5.890268997594536e+01 4 5.959551960776247e+02 5.888113985948760e+02 -1.287643505065981e+01 -9.109297468549572e+01 - ME 1.871447246980874e-04 + ME 4.134215827558992e-05 Event 206 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3700,7 +3700,7 @@ Event 206 Batch 1 2 6.172060642836410e+02 2.978040691523503e+02 4.166709400833434e+02 3.444435946201744e+02 3 7.205754982426181e+02 -2.468045809177361e+02 -5.690387091428452e+02 -3.667580878490107e+02 4 1.622184374737409e+02 -5.099948823461420e+01 1.523677690595017e+02 2.231449322883641e+01 - ME 7.356489425273393e-05 + ME 1.138691716042452e-05 Event 207 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3708,7 +3708,7 @@ Event 207 Batch 1 2 5.250113096394139e+02 -1.091977068802181e+02 -4.322753509449321e+02 2.772196909074646e+02 3 5.240251005653129e+02 3.541948269240045e+02 3.738549241960732e+02 9.685466564450643e+01 4 4.509635897952731e+02 -2.449971200437864e+02 5.842042674885889e+01 -3.740743565519710e+02 - ME 3.378615964480245e-03 + ME 9.518274156960593e-05 Event 208 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3716,7 +3716,7 @@ Event 208 Batch 1 2 4.449444343820048e+02 1.928662436733418e+02 -3.595193210859464e+02 1.775500478872298e+02 3 4.894053462810564e+02 -2.195789585225567e+02 2.295326432211599e+02 3.723136307450180e+02 4 5.656502193369389e+02 2.671271484921488e+01 1.299866778647865e+02 -5.498636786322478e+02 - ME 2.068943926258950e-01 + ME 2.179806976662403e-03 Event 209 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3724,7 +3724,7 @@ Event 209 Batch 1 2 4.949423498078044e+02 -2.830370809537592e+02 -1.684680620467476e+02 -3.694271951395289e+02 3 6.326444171345161e+02 3.898538983719823e+02 -1.748162179498052e+02 4.665749526039372e+02 4 3.724132330576786e+02 -1.068168174182231e+02 3.432842799965525e+02 -9.714775746440780e+01 - ME 1.473942246791387e-04 + ME 3.638076645868775e-05 Event 210 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3732,7 +3732,7 @@ Event 210 Batch 1 2 5.469464199121014e+02 -4.947084169679945e+02 2.319240083666633e+02 -2.500445517953792e+01 3 2.929141603572806e+02 -5.602902696925145e+01 2.099470855189298e+01 2.867379913571110e+02 4 6.601394197306178e+02 5.507374439372461e+02 -2.529187169185561e+02 -2.617335361775729e+02 - ME 1.577330101330874e-03 + ME 7.792286450853471e-04 Event 211 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3740,7 +3740,7 @@ Event 211 Batch 1 2 5.484404249965427e+02 1.659778109685243e+01 3.514591842057613e+02 -4.206992456262192e+02 3 4.635537606517395e+02 -3.607884938122542e+02 -3.140996451540818e+01 2.893564685231623e+02 4 4.880058143517181e+02 3.441907127154018e+02 -3.200492196903532e+02 1.313427771030569e+02 - ME 4.999214184618137e-05 + ME 1.717788621912363e-05 Event 212 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3748,7 +3748,7 @@ Event 212 Batch 1 2 6.930853388432640e+02 -3.424793196872474e+02 -8.152110066892747e+01 5.970171795281683e+02 3 9.131624224772825e+01 6.738328155058525e+01 1.365968298972706e+01 6.009627714210347e+01 4 7.155984189090078e+02 2.750960381366621e+02 6.786141767920034e+01 -6.571134566702718e+02 - ME 3.224436999651524e-01 + ME 4.440767413899675e-02 Event 213 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3756,7 +3756,7 @@ Event 213 Batch 1 2 7.316448870278512e+02 4.203233031264803e+02 4.913598772661251e+02 -3.423419819067778e+02 3 4.750162603483208e+02 -1.726357548525294e+02 -3.708603862154638e+02 2.414537588813190e+02 4 2.933388526238279e+02 -2.476875482739507e+02 -1.204994910506614e+02 1.008882230254589e+02 - ME 4.008080891216109e-05 + ME 1.166473784051930e-05 Event 214 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3764,7 +3764,7 @@ Event 214 Batch 1 2 4.805779599533694e+02 3.904513572450257e+02 -1.742898429406511e+02 2.193763065287195e+02 3 6.164938851206517e+02 -5.563771061772993e+02 2.227142270499353e+02 1.445946028815716e+02 4 4.029281549259790e+02 1.659257489322735e+02 -4.842438410928419e+01 -3.639709094102910e+02 - ME 1.130096726278085e-02 + ME 1.644694060635318e-04 Event 215 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3772,7 +3772,7 @@ Event 215 Batch 1 2 4.610896439725640e+02 -3.106576460930037e+02 -3.050258363865880e+02 -1.518378274323046e+02 3 7.153470686812809e+02 2.726436938726979e+02 6.046054769368644e+02 2.680280994976061e+02 4 3.235632873461531e+02 3.801395222030658e+01 -2.995796405502758e+02 -1.161902720653026e+02 - ME 2.130646114222361e-04 + ME 1.638803663744001e-05 Event 216 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3780,7 +3780,7 @@ Event 216 Batch 1 2 5.309452696424389e+02 -4.912950836090372e+02 -3.608909251460832e+01 -1.980646298023531e+02 3 6.627369363365399e+02 4.479096066616000e+02 2.308759280187052e+02 4.304573578259469e+02 4 3.063177940210212e+02 4.338547694743724e+01 -1.947868355040969e+02 -2.323927280235938e+02 - ME 1.881406502208647e-03 + ME 7.684209531203918e-05 Event 217 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3788,7 +3788,7 @@ Event 217 Batch 1 2 4.608032244164870e+02 2.215832851737383e+02 3.318832460795877e+02 -2.304212888079594e+02 3 3.107022283044695e+02 -4.724697178681157e+01 2.830528592337836e+02 -1.190994425256424e+02 4 7.284945472790432e+02 -1.743363133869267e+02 -6.149361053133712e+02 3.495207313336019e+02 - ME 2.894775763457067e-03 + ME 4.426756984161849e-04 Event 218 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3796,7 +3796,7 @@ Event 218 Batch 1 2 6.336891602166270e+02 5.249943224110900e+02 1.648031440577737e+02 -3.142973702098814e+02 3 5.195346944320743e+02 -3.655895580768890e+02 -3.610279413409480e+02 7.693763263116504e+01 4 3.467761453512956e+02 -1.594047643342018e+02 1.962247972831736e+02 2.373597375787177e+02 - ME 2.703962034458943e-05 + ME 8.957256945094420e-06 Event 219 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3804,7 +3804,7 @@ Event 219 Batch 1 2 2.579228498517417e+02 -4.166553381892272e+01 1.191899344508913e+02 2.249042891828000e+02 3 7.453266221408651e+02 -3.354388163550532e+01 -3.947818065141064e+02 -6.312954196904914e+02 4 4.967505280073930e+02 7.520941545442813e+01 2.755918720632151e+02 4.063911305076915e+02 - ME 6.103184694489295e-05 + ME 4.019449398167179e-05 Event 220 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3812,7 +3812,7 @@ Event 220 Batch 1 2 4.940336288355577e+02 -2.383755021420815e+02 -2.918661661143953e+02 3.194690712363630e+02 3 7.129224521449780e+02 2.727447507998269e+02 2.535039959962389e+02 -6.079510240944473e+02 4 2.930439190194635e+02 -3.436924865774512e+01 3.836217011815621e+01 2.884819528580837e+02 - ME 1.761519882509421e-04 + ME 1.677977866215262e-04 Event 221 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3820,7 +3820,7 @@ Event 221 Batch 1 2 3.305414381337777e+02 -2.712796684963201e+02 -1.199910663213094e+02 -1.458325333632650e+02 3 7.388441803280767e+02 5.510455284380058e+02 4.375213740715825e+02 2.254209298704556e+02 4 4.306143815381457e+02 -2.797658599416856e+02 -3.175303077502730e+02 -7.958839650719051e+01 - ME 1.338118621913618e-04 + ME 1.392897982206581e-05 Event 222 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3828,7 +3828,7 @@ Event 222 Batch 1 2 4.657562074797755e+02 2.823280548971349e+02 2.956503281023745e+02 2.231828795335844e+02 3 4.791948192186352e+02 -3.228825926298714e+02 2.575611801233854e+02 -2.429747818931873e+02 4 5.550489733015891e+02 4.055453773273638e+01 -5.532115082257600e+02 1.979190235960287e+01 - ME 9.040551632672907e-05 + ME 2.328731171682892e-05 Event 223 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3836,7 +3836,7 @@ Event 223 Batch 1 2 1.612164685986321e+02 -4.527922182271191e+01 -1.095260585492910e+01 1.543391792239740e+02 3 6.984218503485876e+02 -4.629950983513680e+02 2.605715575888556e+02 -4.533553609726805e+02 4 6.403616810527805e+02 5.082743201740799e+02 -2.496189517339264e+02 2.990161817487066e+02 - ME 4.148580235863498e-04 + ME 2.446487784841432e-04 Event 224 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3844,7 +3844,7 @@ Event 224 Batch 1 2 1.663853414671972e+02 -1.350882138037309e+02 9.706071747767010e+01 3.804401292344658e+00 3 6.436745581417563e+02 -4.469273298203079e+02 -4.412749113764766e+02 -1.408877256838118e+02 4 6.899401003910457e+02 5.820155436240389e+02 3.442141938988058e+02 1.370833243914657e+02 - ME 3.449215697364171e-04 + ME 9.431632941984795e-05 Event 225 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3852,7 +3852,7 @@ Event 225 Batch 1 2 6.702356777533546e+02 6.117158080352369e+02 -2.649249521350114e+02 -6.952987609335720e+01 3 6.901224376513153e+02 -6.564819557015361e+02 1.560869289536550e+02 1.446972404640001e+02 4 1.396418845953297e+02 4.476614766629927e+01 1.088380231813564e+02 -7.516736437064299e+01 - ME 6.407468428023662e-04 + ME 2.456039108263569e-05 Event 226 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3860,7 +3860,7 @@ Event 226 Batch 1 2 7.307777643673112e+02 -4.569648094661606e+02 4.416236342013199e+02 -3.608155616351098e+02 3 1.446420186345137e+02 4.133161435221925e+01 -3.411742569426914e+01 1.343466131828505e+02 4 6.245802169981752e+02 4.156331951139413e+02 -4.075062085070508e+02 2.264689484522593e+02 - ME 4.858390443010437e-04 + ME 2.774761612267077e-04 Event 227 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3868,7 +3868,7 @@ Event 227 Batch 1 2 7.408615397889290e+02 -4.398089081634772e+02 -5.325812259979131e+02 2.679574278743413e+02 3 4.035753807128123e+02 3.000971513323747e+02 2.468113220276344e+02 -1.090823496201683e+02 4 3.555630794982585e+02 1.397117568311025e+02 2.857699039702786e+02 -1.588750782541728e+02 - ME 3.215647103618368e-04 + ME 3.077346064218035e-05 Event 228 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3876,7 +3876,7 @@ Event 228 Batch 1 2 5.775455372723294e+02 -3.656199842755111e+02 -6.289501053880601e+01 4.426342647953073e+02 3 3.247306314578497e+02 8.776645762339835e+01 3.116872137482897e+02 2.445634292125525e+01 4 5.977238312698206e+02 2.778535266521127e+02 -2.487922032094836e+02 -4.670906077165625e+02 - ME 3.156934429573604e-03 + ME 3.399241079583280e-04 Event 229 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3884,7 +3884,7 @@ Event 229 Batch 1 2 3.665477125629453e+02 -2.081014917770363e+02 2.317985113364040e+02 -1.931850016112187e+02 3 6.187040836990479e+02 -2.134593092471877e+02 -3.484367286517815e+02 4.645661552545953e+02 4 5.147482037380067e+02 4.215608010242241e+02 1.166382173153775e+02 -2.713811536433765e+02 - ME 4.392210547845218e-04 + ME 8.330968691049859e-05 Event 230 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3892,7 +3892,7 @@ Event 230 Batch 1 2 5.913978529013565e+02 -4.986092821675885e+02 -3.028328044703767e+02 9.712104143419764e+01 3 3.439186614041002e+02 -6.573524045766426e+01 3.216488491089061e+02 -1.024741025375549e+02 4 5.646834856945436e+02 5.643445226252528e+02 -1.881604463852933e+01 5.353061103357447e+00 - ME 1.067159092411647e-04 + ME 2.296146042402505e-05 Event 231 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3900,7 +3900,7 @@ Event 231 Batch 1 2 5.760768557894827e+02 -7.075794524290799e+01 5.609870884449791e+02 1.102331327656218e+02 3 6.038619762337338e+02 -2.467027894308989e+02 -5.464177649873398e+02 -7.221250677108812e+01 4 3.200611679767834e+02 3.174607346738069e+02 -1.456932345763944e+01 -3.802062599453370e+01 - ME 8.750887998909065e-05 + ME 9.438631267217403e-06 Event 232 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3908,7 +3908,7 @@ Event 232 Batch 1 2 7.230187249684843e+02 -2.426041066061352e+02 1.884455685697195e+02 -6.545132479937492e+02 3 4.821326920133732e+02 2.438648429837413e+02 -1.563760752388986e+01 4.156168142598493e+02 4 2.948485830181424e+02 -1.260736377606032e+00 -1.728079610458298e+02 2.388964337338999e+02 - ME 4.549716999825542e-05 + ME 3.745272037455064e-05 Event 233 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3916,7 +3916,7 @@ Event 233 Batch 1 2 3.540260977608100e+02 -1.904526694678991e+02 -1.042089619355360e+02 -2.796475475319170e+02 3 4.925592302096041e+02 1.195034224421750e+02 3.554637678715695e+02 -3.193415679485398e+02 4 6.534146720295859e+02 7.094924702572415e+01 -2.512548059360335e+02 5.989891154804569e+02 - ME 2.494643034161164e-04 + ME 1.035644942794080e-04 Event 234 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3924,7 +3924,7 @@ Event 234 Batch 1 2 1.866526101194276e+02 7.776953530733704e+01 -1.047503781897390e+01 1.693557493124073e+02 3 6.012752698516817e+02 5.974840035795012e+02 -4.570329760029643e+01 4.955829083294186e+01 4 7.120721200288899e+02 -6.752535388868379e+02 5.617833541927040e+01 -2.189140401453492e+02 - ME 2.154454342135980e-03 + ME 6.655948749153013e-04 Event 235 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3932,7 +3932,7 @@ Event 235 Batch 1 2 5.032945404607945e+02 1.612889276925247e+02 2.561838854094329e+02 -4.020710050699558e+02 3 7.153634726767370e+02 -3.739069589148947e+02 -1.979140468542061e+02 5.768609140624169e+02 4 2.813419868624690e+02 2.126180312223700e+02 -5.826983855522722e+01 -1.747899089924609e+02 - ME 8.184939555880423e-04 + ME 1.137471703441233e-04 Event 236 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3940,7 +3940,7 @@ Event 236 Batch 1 2 6.980797829886610e+02 -9.803971882836288e+00 4.740144261428889e+02 5.123764137440797e+02 3 5.519387921056282e+02 -1.638876688381594e+02 -3.209728652821290e+02 -4.180355032606608e+02 4 2.499814249057108e+02 1.736916407209956e+02 -1.530415608607599e+02 -9.434091048341891e+01 - ME 2.813360227943072e-04 + ME 5.842524801707843e-05 Event 237 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3948,7 +3948,7 @@ Event 237 Batch 1 2 1.604490925133743e+02 6.212857081252698e+01 9.075394990141041e+01 1.168232534834160e+02 3 6.578242662283152e+02 5.348507070161563e+02 -3.810396531957998e+02 3.842224792439630e+01 4 6.817266412583107e+02 -5.969792778286832e+02 2.902857032943894e+02 -1.552455014078122e+02 - ME 8.205069948818567e-04 + ME 1.834055676127939e-04 Event 238 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3956,7 +3956,7 @@ Event 238 Batch 1 2 2.789018340499539e+02 1.069933592962543e+02 -2.572713415352736e+02 1.225197647611563e+01 3 4.761759619803052e+02 7.755191627191856e+01 -4.591043622469822e+02 -9.976187456245104e+01 4 7.449222039697408e+02 -1.845452755681728e+02 7.163757037822556e+02 8.750989808633538e+01 - ME 4.130258343824905e-02 + ME 9.445005309896021e-03 Event 239 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3964,7 +3964,7 @@ Event 239 Batch 1 2 4.581461811054764e+02 -3.899520773556200e+02 2.006122777919944e+02 1.326273524830990e+02 3 3.013476461129690e+02 -2.996604136348060e+02 3.145663680794619e+01 4.951799549362093e+00 4 7.405061727815548e+02 6.896124909904260e+02 -2.320689145999406e+02 -1.375791520324611e+02 - ME 1.351152256907066e-02 + ME 4.970363634614722e-03 Event 240 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3972,7 +3972,7 @@ Event 240 Batch 1 2 5.932490652975304e+02 -4.094504138983958e+01 -3.300190662632461e+02 4.912793227530680e+02 3 3.147487537014150e+02 3.081803657249563e+02 4.097350029662016e+01 -4.912038692507519e+01 4 5.920021810010543e+02 -2.672353243351168e+02 2.890455659666260e+02 -4.421589358279927e+02 - ME 2.300291351402201e-03 + ME 3.420638167820422e-04 Event 241 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3980,7 +3980,7 @@ Event 241 Batch 1 2 4.438703186026563e+01 1.425431959717181e+01 -4.430288595443099e+00 -4.180186016371768e+01 3 7.139617398095604e+02 -8.415544716076485e+01 -5.657765076565163e+02 -4.272659242311072e+02 4 7.416512283301737e+02 6.990112756359306e+01 5.702067962519594e+02 4.690677843948249e+02 - ME 9.657825758456334e-03 + ME 9.983667466725972e-03 Event 242 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3988,7 +3988,7 @@ Event 242 Batch 1 2 3.798759956195423e+02 -1.259218082844715e+02 -3.429343473884153e+02 1.041417477651927e+02 3 6.208895880511435e+02 5.354328139337265e+02 1.248673426784089e+02 -2.884852319370315e+02 4 4.992344163293142e+02 -4.095110056492549e+02 2.180670047100064e+02 1.843434841718389e+02 - ME 4.523810239016752e-05 + ME 1.030886114253601e-05 Event 243 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3996,7 +3996,7 @@ Event 243 Batch 1 2 2.320641800899440e+02 1.658639294991472e+02 7.783463994856535e+01 1.424243988788334e+02 3 6.251485586341132e+02 -2.328139095298017e+02 -4.262931976140131e+02 3.935511574875350e+02 4 6.427872612759426e+02 6.694998003065477e+01 3.484585576654476e+02 -5.359755563663684e+02 - ME 1.068434238404496e-02 + ME 8.493072129055412e-04 Event 244 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4004,7 +4004,7 @@ Event 244 Batch 1 2 6.609991843787810e+02 -2.293678857540617e+02 -4.971623496474938e+02 -3.703240376037023e+02 3 1.091403980947070e+02 1.154537470975927e+01 -9.115666825632124e+00 -1.081445118228680e+02 4 7.298604175265119e+02 2.178225110443025e+02 5.062780164731259e+02 4.784685494265703e+02 - ME 2.129811247265830e-03 + ME 9.635755455313371e-04 Event 245 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4012,7 +4012,7 @@ Event 245 Batch 1 2 4.893629130846664e+02 -3.546974954177181e+02 3.112856868655738e+02 -1.294873298810978e+02 3 7.129026631852477e+02 5.703735458058533e+02 -4.257115617679147e+02 -4.091322034012423e+01 4 2.977344237300874e+02 -2.156760503881352e+02 1.144258749023406e+02 1.704005502212233e+02 - ME 2.548352504440589e-05 + ME 5.312368446054512e-06 Event 246 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4020,7 +4020,7 @@ Event 246 Batch 1 2 3.999457395350199e+02 9.605025124341067e+01 9.072234098128430e+01 3.774922524438975e+02 3 3.675469088581873e+02 -1.615841482674670e+01 2.570183669846762e+02 2.622426259669196e+02 4 7.325073516067924e+02 -7.989183641666393e+01 -3.477407079659604e+02 -6.397348784108170e+02 - ME 1.294421983622042e-01 + ME 5.023802198964801e-02 Event 247 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4028,7 +4028,7 @@ Event 247 Batch 1 2 6.711864521923226e+02 3.763073240556692e+02 5.338170415278108e+02 1.546719678644905e+02 3 5.231557804938882e+02 -1.057595517177888e+02 -5.121603131388773e+02 -1.409615302513522e+01 4 3.056577673137891e+02 -2.705477723378804e+02 -2.165672838893370e+01 -1.405758148393554e+02 - ME 2.873345328272106e-04 + ME 1.980507958825256e-05 Event 248 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4036,7 +4036,7 @@ Event 248 Batch 1 2 6.307803946875938e+02 -6.240065811552291e+01 -3.654556314590158e+02 5.103256270499047e+02 3 3.935347424219227e+02 -2.188782290807617e+02 2.916853933646314e+01 -3.257470040392325e+02 4 4.756848628904837e+02 2.812788871962847e+02 3.362870921225527e+02 -1.845786230106721e+02 - ME 2.418190194667681e-04 + ME 8.712398839363553e-05 Event 249 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4044,7 +4044,7 @@ Event 249 Batch 1 2 4.326970760901858e+02 -4.070406664121577e+02 -1.467447404863359e+02 3.261392852829594e+00 3 4.839435229991528e+02 2.335311811831339e+01 2.018595963184923e+02 -4.392136936630267e+02 4 5.833594009106607e+02 3.836875482938447e+02 -5.511485583215654e+01 4.359523008101972e+02 - ME 8.354140201035124e-05 + ME 2.487145538635957e-05 Event 250 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4052,7 +4052,7 @@ Event 250 Batch 1 2 7.010671671345858e+02 -6.122994886156980e+02 -2.473946684860857e+02 2.353303785738851e+02 3 5.574643785654457e+02 3.902114201641945e+02 2.260985614407801e+02 -3.276904354069721e+02 4 2.414684542999681e+02 2.220880684515034e+02 2.129610704530562e+01 9.236005683308701e+01 - ME 4.704118057291807e-05 + ME 1.645582299148298e-05 Event 251 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4060,7 +4060,7 @@ Event 251 Batch 1 2 7.364006127103795e+02 5.379960890463808e+02 4.302640987755426e+02 2.602285070392761e+02 3 3.051282143252570e+01 -2.901685968644106e+00 1.337962970917706e+01 -2.726899336532026e+01 4 7.330865658570956e+02 -5.350944030777371e+02 -4.436437284847198e+02 -2.329595136739561e+02 - ME 8.340546584740779e-03 + ME 6.389613086136084e-03 Event 252 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4068,7 +4068,7 @@ Event 252 Batch 1 2 5.965625584838610e+02 -7.369842915522101e+01 -5.671364104158780e+02 -1.697401534860145e+02 3 6.549338760881149e+02 -1.514014639568436e+02 6.313240788068730e+02 8.628954906696529e+01 4 2.485035654280235e+02 2.250998931120648e+02 -6.418766839099484e+01 8.345060441904938e+01 - ME 3.985162011735342e-05 + ME 7.225550854378042e-06 Event 253 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4076,7 +4076,7 @@ Event 253 Batch 1 2 5.728678540484714e+02 3.212236187283236e+01 -4.622666283104808e+02 -3.368312580807653e+02 3 7.160302400837320e+02 1.132435775281999e+02 5.206369974620781e+02 4.783433011307397e+02 4 2.111019058677967e+02 -1.453659394010323e+02 -5.837036915159722e+01 -1.415120430499744e+02 - ME 1.248429186447426e-03 + ME 7.499676590470843e-05 Event 254 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4084,7 +4084,7 @@ Event 254 Batch 1 2 5.579357369440610e+02 1.333150067790222e+02 -6.785864805882139e+01 5.375077668373273e+02 3 6.202682598689536e+02 -4.039338689731095e+02 2.012068793592834e+02 -4.255419314189536e+02 4 3.217960031869852e+02 2.706188621940872e+02 -1.333482313004621e+02 -1.119658354183736e+02 - ME 6.088720978226072e-04 + ME 2.226893396847405e-04 Event 255 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4092,5 +4092,5 @@ Event 255 Batch 1 2 7.263612771087843e+02 3.396063850675520e+02 -6.401091575508393e+02 5.028393902637355e+01 3 1.540578578981475e+02 -3.080387127739228e+01 1.060177193258910e+02 -1.074485378375538e+02 4 6.195808649930684e+02 -3.088025137901597e+02 5.340914382249483e+02 5.716459881118030e+01 - ME 1.547064591142216e-04 + ME 4.003666322732326e-05 From 6d449e990f5474ed26061c3bc604e9b29bbe539b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 26 Oct 2023 19:58:41 +0200 Subject: [PATCH 024/119] [oct23av] reapply previous patches by Olivier, otherwise the following merges will create conflicts Revert "[oct23av] TEMPORARELY UNDO Olivier's changes to CODEGEN in 9fc9873d0 (keep only the mg5amcnlo update)" This reverts commit d8834781eecbb25e9caf1cc24e1c24537090d8c1. --- .../MG5aMC_patches/PROD/patch.common | 12 ++-- .../gpu/MatrixElementKernels.cc | 9 ++- .../iolibs/template_files/gpu/cudacpp.mk | 5 +- .../CUDACPP_SA_OUTPUT/model_handling.py | 70 +++++++++---------- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 31 +++++++- .../PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh | 52 ++++++-------- 6 files changed, 100 insertions(+), 79 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common index 2e272b419b..559fc54270 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common @@ -78,12 +78,12 @@ index 348c283be..74db44d84 100644 +CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") +###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) +###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -+CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) -+ifeq ($(CUDACPP_BUILDDIR),) -+$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) -+else ++CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>/dev/null | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ++#ifeq ($(CUDACPP_BUILDDIR),) ++#$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) ++#else +$(info CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)') -+endif ++#endif +CUDACPP_COMMONLIB=mg5amc_common +CUDACPP_CXXLIB=mg5amc_$(processid_short)_cpp +CUDACPP_CULIB=mg5amc_$(processid_short)_cuda @@ -110,7 +110,7 @@ index 348c283be..74db44d84 100644 -$(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) - $(FC) -o $(PROG) $(PROCESS) $(MATRIX) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -+LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 ++#LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (flag not universal -> skip?) -$(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) - $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc index 30257195b6..91c46ebef1 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 222d75f846..49a5856085 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -27,6 +27,7 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +include ../../Source/make_opts #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +221,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -554,7 +555,7 @@ endif $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) - $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 3c00046c6e..c450898a5d 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -726,7 +726,6 @@ def write_hardcoded_parameters(self, params): for par in pars.split(','): ###print(len(pardef_lines), par) # for debugging pardef_lines[par] = ( 'constexpr ' + type + ' ' + par ) - misc.sprint( 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) ) ###print( pardef_lines ) # for debugging ###for line in pardef_lines: misc.sprint(line) # for debugging parset_pars = [] @@ -1042,14 +1041,14 @@ class PLUGIN_OneProcessExporter(PLUGIN_export_cpp.OneProcessExporterGPU): # AV - overload export_cpp.OneProcessExporterGPU constructor (rename gCPPProcess to CPPProcess, set include_multi_channel) def __init__(self, *args, **kwargs): - misc.sprint('Entering PLUGIN_OneProcessExporter.__init__') +# misc.sprint('Entering PLUGIN_OneProcessExporter.__init__') for kwarg in kwargs: misc.sprint( 'kwargs[%s] = %s' %( kwarg, kwargs[kwarg] ) ) super().__init__(*args, **kwargs) self.process_class = 'CPPProcess' ###if self.in_madevent_mode: proc_id = kwargs['prefix']+1 # madevent+cudacpp (NB: HERE SELF.IN_MADEVENT_MODE DOES NOT WORK!) if 'prefix' in kwargs: proc_id = kwargs['prefix']+1 # madevent+cudacpp (ime+1 from ProcessExporterFortranMEGroup.generate_subprocess_directory) else: proc_id = 0 # standalone_cudacpp - misc.sprint(proc_id) +# misc.sprint(proc_id) self.proc_id = proc_id # AV - overload export_cpp.OneProcessExporterGPU method (indent comments in process_lines) @@ -1147,9 +1146,9 @@ def get_process_function_definitions(self, write=True): # AV - modify export_cpp.OneProcessExporterGPU method (add debug printouts for multichannel #342) def get_sigmaKin_lines(self, color_amplitudes, write=True): - misc.sprint('Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines') - misc.sprint(self.include_multi_channel) - misc.sprint(self.support_multichannel) +# misc.sprint('Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines') +# misc.sprint(self.include_multi_channel) +# misc.sprint(self.support_multichannel) replace_dict = super().get_sigmaKin_lines(color_amplitudes, write=False) replace_dict['proc_id'] = self.proc_id if self.proc_id>0 else 1 replace_dict['proc_id_source'] = 'madevent + cudacpp exporter' if self.proc_id>0 else 'standalone_cudacpp' # FIXME? use self.in_madevent_mode instead? @@ -1173,14 +1172,14 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name): ret_lines = [] if self.single_helicities: ###assert self.include_multi_channel # remove this assert: must handle both cases and produce two different code bases (#473) - misc.sprint(type(self.helas_call_writer)) - misc.sprint(self.support_multichannel, self.include_multi_channel) +# misc.sprint(type(self.helas_call_writer)) +# misc.sprint(self.support_multichannel, self.include_multi_channel) multi_channel = None if self.include_multi_channel: if not self.support_multichannel: raise Exception("link with madevent not supported") multi_channel = self.get_multi_channel_dictionary(self.matrix_elements[0].get('diagrams'), self.include_multi_channel) - misc.sprint(multi_channel) +# misc.sprint(multi_channel) ###misc.sprint( 'before get_matrix_element_calls', self.matrix_elements[0].get_number_of_wavefunctions() ) # WRONG value of nwf, eg 7 for gg_tt helas_calls = self.helas_call_writer.get_matrix_element_calls(\ self.matrix_elements[0], @@ -1308,12 +1307,9 @@ def get_process_info_lines(self, matrix_element): # AV - replace the export_cpp.OneProcessExporterGPU method (invert .cc/.cu, add debug printouts) def generate_process_files(self): """Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links""" - misc.sprint('Entering PLUGIN_OneProcessExporter.generate_process_files') - if self.include_multi_channel: - misc.sprint('self.include_multi_channel is already defined: this is madevent+second_exporter mode') # FIXME? use self.in_madevent_mode instead? - else: - misc.sprint('self.include_multi_channel is not yet defined: this is standalone_cudacpp mode') # see issue #473 - # I move those line to standalone_cudacpp mode (but do we need those at all???) +# misc.sprint('Entering PLUGIN_OneProcessExporter.generate_process_files') + if not self.include_multi_channel: + #this condition is likely wrong and need to be removed if self.matrix_elements[0].get('has_mirror_process'): self.matrix_elements[0].set('has_mirror_process', False) self.nprocesses/=2 @@ -1347,15 +1343,15 @@ def generate_process_files(self): ###template_ref = 'dump_CPUTest.'+self.process_name+'.txt' template_ref = self.template_path + '/../../../test/ref/' + 'dump_CPUTest.' + self.process_name + '.txt' if os.path.exists( template_ref ): - misc.sprint( 'Copying test reference file: ', template_ref ) +# misc.sprint( 'Copying test reference file: ', template_ref ) PLUGIN_export_cpp.cp( template_ref, self.path + '/../../test/ref' ) - else: - misc.sprint( 'Test reference file does not exist and will not be copied: ', template_ref ) +# else: +# misc.sprint( 'Test reference file does not exist and will not be copied: ', template_ref ) # SR - generate CMakeLists.txt file inside the P* directory def edit_CMakeLists(self): """Generate CMakeLists.txt""" - misc.sprint('Entering PLUGIN_OneProcessExporter.edit_CMakeLists') +# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_CMakeLists') template = open(pjoin(self.template_path,'CMake/SubProcesses/CMakeLists_P.txt'),'r').read() ff = open(pjoin(self.path, 'CMakeLists.txt'),'w') ff.write(template) @@ -1364,7 +1360,7 @@ def edit_CMakeLists(self): # AV - replace the export_cpp.OneProcessExporterGPU method (invert .cc/.cu, add debug printouts) def edit_check_sa(self): """Generate check_sa.cc and fcheck_sa.f""" - misc.sprint('Entering PLUGIN_OneProcessExporter.edit_check_sa') +# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_check_sa') ff = open(pjoin(self.path, 'check_sa.cc'),'w') template = open(pjoin(self.template_path,'gpu','check_sa.cc'),'r').read() ff.write(template) # nothing to replace in check_sa.cc @@ -1381,7 +1377,7 @@ def edit_check_sa(self): # AV - replace the export_cpp.OneProcessExporterGPU method (add debug printouts and multichannel handling #473) def edit_mgonGPU(self): """Generate mgOnGpuConfig.h""" - misc.sprint('Entering PLUGIN_OneProcessExporter.edit_mgonGPU') +# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_mgonGPU') template = open(pjoin(self.template_path,'gpu','mgOnGpuConfig.h'),'r').read() replace_dict = {} nexternal, nincoming = self.matrix_elements[0].get_nexternal_ninitial() @@ -1401,7 +1397,7 @@ def edit_mgonGPU(self): # AV - new method def edit_processidfile(self): """Generate epoch_process_id.h""" - misc.sprint('Entering PLUGIN_OneProcessExporter.edit_processidfile') +# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_processidfile') template = open(pjoin(self.template_path,'gpu','epoch_process_id.h'),'r').read() replace_dict = {} replace_dict['processid'] = self.get_process_name() @@ -1413,7 +1409,7 @@ def edit_processidfile(self): # AV - new method def edit_coloramps(self): """Generate coloramps.h""" - misc.sprint('Entering PLUGIN_OneProcessExporter.edit_coloramps') +# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_coloramps') template = open(pjoin(self.template_path,'gpu','coloramps.h'),'r').read() ff = open(pjoin(self.path, 'coloramps.h'),'w') # The following five lines from OneProcessExporterCPP.get_sigmaKin_lines (using OneProcessExporterCPP.get_icolamp_lines) @@ -1431,7 +1427,7 @@ def edit_coloramps(self): # AV - new method def edit_testxxx(self): """Generate testxxx.cc""" - misc.sprint('Entering PLUGIN_OneProcessExporter.edit_testxxx') +# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_testxxx') template = open(pjoin(self.template_path,'gpu','testxxx.cc'),'r').read() replace_dict = {} replace_dict['model_name'] = self.model_name @@ -1442,7 +1438,7 @@ def edit_testxxx(self): # AV - new method def edit_memorybuffers(self): """Generate MemoryBuffers.h""" - misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memorybuffers') +# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memorybuffers') template = open(pjoin(self.template_path,'gpu','MemoryBuffers.h'),'r').read() replace_dict = {} replace_dict['model_name'] = self.model_name @@ -1453,7 +1449,7 @@ def edit_memorybuffers(self): # AV - new method def edit_memoryaccesscouplings(self): """Generate MemoryAccessCouplings.h""" - misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings') +# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings') template = open(pjoin(self.template_path,'gpu','MemoryAccessCouplings.h'),'r').read() replace_dict = {} replace_dict['model_name'] = self.model_name @@ -1465,7 +1461,7 @@ def edit_memoryaccesscouplings(self): # [*NB export_cpp.UFOModelConverterGPU.write_process_h_file is not called!*] def write_process_h_file(self, writer): """Generate final gCPPProcess.h""" - misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_h_file') +# misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_h_file') out = super().write_process_h_file(writer) writer.seek(-1, os.SEEK_CUR) writer.truncate() @@ -1487,7 +1483,7 @@ def super_write_process_cc_file(self, writer): # AV - overload the export_cpp.OneProcessExporterGPU method (add debug printout and truncate last \n) def write_process_cc_file(self, writer): """Generate CPPProcess.cc""" - misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_cc_file') +# misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_cc_file') ###out = super().write_process_cc_file(writer) out = self.super_write_process_cc_file(writer) writer.seek(-1, os.SEEK_CUR) @@ -1666,7 +1662,7 @@ def super_get_matrix_element_calls(self, matrix_element, color_amplitudes, multi color[namp][njamp] = coeff me = matrix_element.get('diagrams') matrix_element.reuse_outdated_wavefunctions(me) - misc.sprint(multi_channel_map) +# misc.sprint(multi_channel_map) res = [] ###res.append('for(int i=0;i<%s;i++){jamp[i] = cxtype(0.,0.);}' % len(color_amplitudes)) res.append("""constexpr size_t nxcoup = ndcoup + nicoup; // both dependent and independent couplings @@ -1721,7 +1717,7 @@ def super_get_matrix_element_calls(self, matrix_element, color_amplitudes, multi sum([diagrams[idiag].get('amplitudes') for \ idiag in multi_channel_map[config]], [])] diag_to_config[amp[0]] = config - misc.sprint(diag_to_config) +# misc.sprint(diag_to_config) id_amp = 0 for diagram in matrix_element.get('diagrams'): ###print('DIAGRAM %3d: #wavefunctions=%3d, #diagrams=%3d' % @@ -1833,13 +1829,13 @@ def get_external_line(self, wf, argument): wf.get('me_id')-1, wf.get('number_external')-1) elif argument.is_boson(): - misc.sprint(call) - misc.sprint( (wf.get('mass'), - wf.get('number_external')-1, - # For boson, need initial/final here - (-1) ** (wf.get('state') == 'initial'), - wf.get('me_id')-1, - wf.get('number_external')-1)) +# misc.sprint(call) +# misc.sprint( (wf.get('mass'), +# wf.get('number_external')-1, +# # For boson, need initial/final here +# (-1) ** (wf.get('state') == 'initial'), +# wf.get('me_id')-1, +# wf.get('number_external')-1)) return self.format_coupling(call % \ (wf.get('mass'), wf.get('number_external')-1, diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index aebab6f1a7..e261f08057 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -4,6 +4,7 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2021-2023) for the MG5aMC CUDACPP plugin. import os +import subprocess # AV - load an independent 2nd copy of the export_cpp module (as PLUGIN_export_cpp) and use that within the plugin (workaround for #341) # See https://stackoverflow.com/a/11285504 @@ -34,6 +35,7 @@ from os.path import join as pjoin import madgraph.various.misc as misc +import madgraph.iolibs.files as files # AV - define the plugin's process exporter # (NB: this is the plugin's main class, enabled in the new_output dictionary in __init__.py) @@ -206,9 +208,17 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): self.add_input_for_banner() if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL'] else: patchlevel = '' - path = os.path.realpath(os.curdir + os.sep + 'PLUGIN' + os.sep + 'CUDACPP_OUTPUT') - if os.system(path + os.sep + 'patchMad.sh ' + self.dir_path + ' PROD ' + patchlevel) != 0: + plugin_path = os.path.dirname(os.path.realpath( __file__ )) +# path = os.path.realpath(os.curdir + os.sep + 'PLUGIN' + os.sep + 'CUDACPP_OUTPUT') +# misc.sprint(path) + p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)]) + stdout, stderr = p.communicate() + if not p.returncode: + logger.debug("####### \n stdout is \n %s", stdout) + logger.info("####### \n stderr is \n %s", stderr) raise Exception('ERROR! the O/S call to patchMad.sh failed') + + self.add_madevent_plugin_fct() return super().finalize(matrix_element, cmdhistory, MG5options, outputflag) # AV (default from OM's tutorial) - overload settings and add a debug printout @@ -229,4 +239,21 @@ def add_input_for_banner(self): for entry in new_parameters: finput.write(entry) + # OM adding a new way to "patch" python file such that the launch command of MG5aMC is working + # this consist in a file plugin_interface.py + # which contains a series of functions and one dictionary variable TO_OVERWRITE + # that will be used to have temporary overwrite of all the key variable passed as string by their value. + # all variable that are file related should be called as madgraph.dir.file.variable + def add_madevent_plugin_fct(self): + """this consist in a file plugin_interface.py + which contains a series of functions and one dictionary variable TO_OVERWRITE + that will be used to have temporary overwrite of all the key variable passed as string by their value. + all variable that are file related should be called as madgraph.dir.file.variable + """ + + plugin_path = os.path.dirname(os.path.realpath( __file__ )) + files.cp(pjoin(plugin_path, 'plugin_interface.py'), pjoin(self.dir_path, 'bin', 'internal')) + files.cp(pjoin(plugin_path, 'launch_plugin.py'), pjoin(self.dir_path, 'bin', 'internal')) + + #------------------------------------------------------------------------------------ diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh index 8ac638a193..aa553100e0 100755 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh @@ -41,59 +41,49 @@ dir_patches=$2 if [ ! -e ${dir} ]; then echo "ERROR! Directory $dir does not exist"; exit 1; fi # These two steps are part of "cd Source; make" but they actually are code-generating steps -${dir}/bin/madevent treatcards run -${dir}/bin/madevent treatcards param +#${dir}/bin/madevent treatcards run +#${dir}/bin/madevent treatcards param # Cleanup -\rm -f ${dir}/crossx.html -\rm -f ${dir}/index.html -\rm -f ${dir}/madevent.tar.gz -\rm -f ${dir}/Cards/delphes_trigger.dat -\rm -f ${dir}/Cards/plot_card.dat -\rm -f ${dir}/bin/internal/run_plot* -\rm -f ${dir}/HTML/* -\rm -rf ${dir}/bin/internal/__pycache__ -\rm -rf ${dir}/bin/internal/ufomodel/__pycache__ -touch ${dir}/HTML/.keep # new file +#\rm -f ${dir}/crossx.html +#\rm -f ${dir}/index.html +#\rm -f ${dir}/madevent.tar.gz +#\rm -f ${dir}/Cards/delphes_trigger.dat +#\rm -f ${dir}/Cards/plot_card.dat +#\rm -f ${dir}/bin/internal/run_plot* +#\rm -f ${dir}/HTML/* +#\rm -rf ${dir}/bin/internal/__pycache__ +#\rm -rf ${dir}/bin/internal/ufomodel/__pycache__ +#touch ${dir}/HTML/.keep # new file # Exit here for patchlevel 0 (--upstream) if [ "${patchlevel}" == "0" ]; then exit $status; fi # Add global flag '-O3 -ffast-math -fbounds-check' as in previous gridpacks -echo "GLOBAL_FLAG=-O3 -ffast-math -fbounds-check" > ${dir}/Source/make_opts.new -cat ${dir}/Source/make_opts >> ${dir}/Source/make_opts.new -\mv ${dir}/Source/make_opts.new ${dir}/Source/make_opts +#echo "GLOBAL_FLAG=-O3 -ffast-math -fbounds-check" > ${dir}/Source/make_opts.new +#cat ${dir}/Source/make_opts >> ${dir}/Source/make_opts.new +#\mv ${dir}/Source/make_opts.new ${dir}/Source/make_opts # Patch the default Fortran code to provide the integration with the cudacpp plugin # (1) Process-independent patches touch ${dir}/Events/.keep # this file should already be present (mg5amcnlo copies it from Template/LO/Events/.keep) -\cp -dpr ${scrdir}/MG5aMC_patches/${dir_patches}/fbridge_common.inc ${dir}/SubProcesses # new file -sed -i 's/2 = sde_strategy/1 = sde_strategy/' ${dir}/Cards/run_card.dat # use strategy SDE=1 in multichannel mode (see #419) -sed -i 's/SDE_STRAT = 2/SDE_STRAT = 1/' ${dir}/Source/run_card.inc # use strategy SDE=1 in multichannel mode (see #419) +cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/fbridge_common.inc ${dir}/SubProcesses # new file +#sed -i 's/2 = sde_strategy/1 = sde_strategy/' ${dir}/Cards/run_card.dat # use strategy SDE=1 in multichannel mode (see #419) +#sed -i 's/SDE_STRAT = 2/SDE_STRAT = 1/' ${dir}/Source/run_card.inc # use strategy SDE=1 in multichannel mode (see #419) if [ "${patchlevel}" == "2" ]; then cd ${dir} - sed -i 's/DEFAULT_F2PY_COMPILER=f2py3.*/DEFAULT_F2PY_COMPILER=f2py3/' Source/make_opts + #sed -i 's/DEFAULT_F2PY_COMPILER=f2py3.*/DEFAULT_F2PY_COMPILER=f2py3/' Source/make_opts echo "DEBUG: cd ${PWD}; patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common" if ! patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common; then status=1; fi \rm -f Source/*.orig \rm -f bin/internal/*.orig - echo " -#********************************************************************* -# Options for the cudacpp plugin -#********************************************************************* - -# Set cudacpp-specific values of non-cudacpp-specific options --O3 -ffast-math -fbounds-check = global_flag ! build flags for Fortran code (for a fair comparison to cudacpp) - -# New cudacpp-specific options (default values are defined in banner.py) -CPP = cudacpp_backend ! valid backends are FORTRAN, CPP, CUDA" >> Cards/run_card.dat cd - > /dev/null fi for p1dir in ${dir}/SubProcesses/P*; do cd $p1dir ln -sf ../fbridge_common.inc . # new file - \cp -dpr ${scrdir}/MG5aMC_patches/${dir_patches}/counters.cc . # new file - \cp -dpr ${scrdir}/MG5aMC_patches/${dir_patches}/ompnumthreads.cc . # new file + cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/counters.cc . # new file + cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/ompnumthreads.cc . # new file if [ "${patchlevel}" == "2" ]; then echo "DEBUG: cd ${PWD}; patch -p6 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.P1" if ! patch -p6 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.P1; then status=1; fi From d5b9d55a9978089a86411b9ef7ea730f962da403 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 09:25:43 +0200 Subject: [PATCH 025/119] [oct23av] in CODEGEN, try to recover my 'tmad mode' in patchMad.sh, after Olivier's changes that commented it out A few specific points that need to be checked - will this build OK on Mac? on Olivier's system and on the CI? check CUDACPP_BUILDDIR and OpenMP - I removed plugin_interface.py as the file was missing - I went back to my implementation to locate the plugin path as the previous one was failing - before my changes , some hunks were silently failing in the patch command, this should break code generation with an error instead --- .../MG5aMC_patches/PROD/patch.common | 12 ++-- .../gpu/MatrixElementKernels.cc | 2 +- .../iolibs/template_files/gpu/cudacpp.mk | 9 ++- .../CUDACPP_SA_OUTPUT/model_handling.py | 69 ++++++++++--------- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 31 +++++---- .../PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh | 67 +++++++++++++----- epochX/cudacpp/CODEGEN/generateAndCompare.sh | 3 + 7 files changed, 118 insertions(+), 75 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common index 559fc54270..1b9b1eabed 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common @@ -78,12 +78,12 @@ index 348c283be..74db44d84 100644 +CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") +###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) +###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -+CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>/dev/null | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) -+#ifeq ($(CUDACPP_BUILDDIR),) -+#$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) -+#else ++CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ++ifeq ($(CUDACPP_BUILDDIR),) ++$(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) ++else +$(info CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)') -+#endif ++endif +CUDACPP_COMMONLIB=mg5amc_common +CUDACPP_CXXLIB=mg5amc_$(processid_short)_cpp +CUDACPP_CULIB=mg5amc_$(processid_short)_cuda @@ -110,7 +110,7 @@ index 348c283be..74db44d84 100644 -$(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) - $(FC) -o $(PROG) $(PROCESS) $(MATRIX) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -+#LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (flag not universal -> skip?) ++LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) -$(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) - $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc index 91c46ebef1..74b5239ebf 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc @@ -116,7 +116,7 @@ namespace mg5amcCpu bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted bool known = false; // __builtin_cpu_supports is not supported // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html // See https://stackoverflow.com/q/62783908 diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 49a5856085..6ad4e44620 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -27,7 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -include ../../Source/make_opts +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -221,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = -fopenmp # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,7 +557,8 @@ endif $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) - $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp + $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 345e307bb0..2393b152ce 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -726,6 +726,7 @@ def write_hardcoded_parameters(self, params): for par in pars.split(','): ###print(len(pardef_lines), par) # for debugging pardef_lines[par] = ( 'constexpr ' + type + ' ' + par ) + ###misc.sprint( 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) ) ###print( pardef_lines ) # for debugging ###for line in pardef_lines: misc.sprint(line) # for debugging parset_pars = [] @@ -1041,14 +1042,14 @@ class PLUGIN_OneProcessExporter(PLUGIN_export_cpp.OneProcessExporterGPU): # AV - overload export_cpp.OneProcessExporterGPU constructor (rename gCPPProcess to CPPProcess, set include_multi_channel) def __init__(self, *args, **kwargs): -# misc.sprint('Entering PLUGIN_OneProcessExporter.__init__') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.__init__') for kwarg in kwargs: misc.sprint( 'kwargs[%s] = %s' %( kwarg, kwargs[kwarg] ) ) super().__init__(*args, **kwargs) self.process_class = 'CPPProcess' ###if self.in_madevent_mode: proc_id = kwargs['prefix']+1 # madevent+cudacpp (NB: HERE SELF.IN_MADEVENT_MODE DOES NOT WORK!) if 'prefix' in kwargs: proc_id = kwargs['prefix']+1 # madevent+cudacpp (ime+1 from ProcessExporterFortranMEGroup.generate_subprocess_directory) else: proc_id = 0 # standalone_cudacpp -# misc.sprint(proc_id) + ###misc.sprint(proc_id) self.proc_id = proc_id # AV - overload export_cpp.OneProcessExporterGPU method (indent comments in process_lines) @@ -1146,9 +1147,9 @@ def get_process_function_definitions(self, write=True): # AV - modify export_cpp.OneProcessExporterGPU method (add debug printouts for multichannel #342) def get_sigmaKin_lines(self, color_amplitudes, write=True): -# misc.sprint('Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines') -# misc.sprint(self.include_multi_channel) -# misc.sprint(self.support_multichannel) + ###misc.sprint('Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines') + ###misc.sprint(self.include_multi_channel) + ###misc.sprint(self.support_multichannel) replace_dict = super().get_sigmaKin_lines(color_amplitudes, write=False) replace_dict['proc_id'] = self.proc_id if self.proc_id>0 else 1 replace_dict['proc_id_source'] = 'madevent + cudacpp exporter' if self.proc_id>0 else 'standalone_cudacpp' # FIXME? use self.in_madevent_mode instead? @@ -1172,14 +1173,14 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name): ret_lines = [] if self.single_helicities: ###assert self.include_multi_channel # remove this assert: must handle both cases and produce two different code bases (#473) -# misc.sprint(type(self.helas_call_writer)) -# misc.sprint(self.support_multichannel, self.include_multi_channel) + ###misc.sprint(type(self.helas_call_writer)) + ###misc.sprint(self.support_multichannel, self.include_multi_channel) multi_channel = None if self.include_multi_channel: if not self.support_multichannel: raise Exception("link with madevent not supported") multi_channel = self.get_multi_channel_dictionary(self.matrix_elements[0].get('diagrams'), self.include_multi_channel) -# misc.sprint(multi_channel) + ###misc.sprint(multi_channel) ###misc.sprint( 'before get_matrix_element_calls', self.matrix_elements[0].get_number_of_wavefunctions() ) # WRONG value of nwf, eg 7 for gg_tt helas_calls = self.helas_call_writer.get_matrix_element_calls(\ self.matrix_elements[0], @@ -1307,9 +1308,13 @@ def get_process_info_lines(self, matrix_element): # AV - replace the export_cpp.OneProcessExporterGPU method (invert .cc/.cu, add debug printouts) def generate_process_files(self): """Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.generate_process_files') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.generate_process_files') + ###if self.include_multi_channel: + ### misc.sprint('self.include_multi_channel is already defined: this is madevent+second_exporter mode') # FIXME? use self.in_madevent_mode instead? if not self.include_multi_channel: - #this condition is likely wrong and need to be removed + ###misc.sprint('self.include_multi_channel is not yet defined: this is standalone_cudacpp mode') # see issue #473 + # AV: needed for (moved to?) standalone_cudacpp mode (but do we need those lines at all???) + # OM: this condition is likely wrong and need to be removed if self.matrix_elements[0].get('has_mirror_process'): self.matrix_elements[0].set('has_mirror_process', False) self.nprocesses/=2 @@ -1343,15 +1348,15 @@ def generate_process_files(self): ###template_ref = 'dump_CPUTest.'+self.process_name+'.txt' template_ref = self.template_path + '/../../../test/ref/' + 'dump_CPUTest.' + self.process_name + '.txt' if os.path.exists( template_ref ): -# misc.sprint( 'Copying test reference file: ', template_ref ) + ###misc.sprint( 'Copying test reference file: ', template_ref ) PLUGIN_export_cpp.cp( template_ref, self.path + '/../../test/ref' ) -# else: -# misc.sprint( 'Test reference file does not exist and will not be copied: ', template_ref ) + ###else: + ###misc.sprint( 'Test reference file does not exist and will not be copied: ', template_ref ) # SR - generate CMakeLists.txt file inside the P* directory def edit_CMakeLists(self): """Generate CMakeLists.txt""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_CMakeLists') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.edit_CMakeLists') template = open(pjoin(self.template_path,'CMake/SubProcesses/CMakeLists_P.txt'),'r').read() ff = open(pjoin(self.path, 'CMakeLists.txt'),'w') ff.write(template) @@ -1360,7 +1365,7 @@ def edit_CMakeLists(self): # AV - replace the export_cpp.OneProcessExporterGPU method (invert .cc/.cu, add debug printouts) def edit_check_sa(self): """Generate check_sa.cc and fcheck_sa.f""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_check_sa') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.edit_check_sa') ff = open(pjoin(self.path, 'check_sa.cc'),'w') template = open(pjoin(self.template_path,'gpu','check_sa.cc'),'r').read() ff.write(template) # nothing to replace in check_sa.cc @@ -1377,7 +1382,7 @@ def edit_check_sa(self): # AV - replace the export_cpp.OneProcessExporterGPU method (add debug printouts and multichannel handling #473) def edit_mgonGPU(self): """Generate mgOnGpuConfig.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_mgonGPU') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.edit_mgonGPU') template = open(pjoin(self.template_path,'gpu','mgOnGpuConfig.h'),'r').read() replace_dict = {} nexternal, nincoming = self.matrix_elements[0].get_nexternal_ninitial() @@ -1397,7 +1402,7 @@ def edit_mgonGPU(self): # AV - new method def edit_processidfile(self): """Generate epoch_process_id.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_processidfile') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.edit_processidfile') template = open(pjoin(self.template_path,'gpu','epoch_process_id.h'),'r').read() replace_dict = {} replace_dict['processid'] = self.get_process_name() @@ -1409,7 +1414,7 @@ def edit_processidfile(self): # AV - new method def edit_coloramps(self): """Generate coloramps.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_coloramps') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.edit_coloramps') template = open(pjoin(self.template_path,'gpu','coloramps.h'),'r').read() ff = open(pjoin(self.path, 'coloramps.h'),'w') # The following five lines from OneProcessExporterCPP.get_sigmaKin_lines (using OneProcessExporterCPP.get_icolamp_lines) @@ -1427,7 +1432,7 @@ def edit_coloramps(self): # AV - new method def edit_testxxx(self): """Generate testxxx.cc""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_testxxx') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.edit_testxxx') template = open(pjoin(self.template_path,'gpu','testxxx.cc'),'r').read() replace_dict = {} replace_dict['model_name'] = self.model_name @@ -1438,7 +1443,7 @@ def edit_testxxx(self): # AV - new method def edit_memorybuffers(self): """Generate MemoryBuffers.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memorybuffers') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memorybuffers') template = open(pjoin(self.template_path,'gpu','MemoryBuffers.h'),'r').read() replace_dict = {} replace_dict['model_name'] = self.model_name @@ -1449,7 +1454,7 @@ def edit_memorybuffers(self): # AV - new method def edit_memoryaccesscouplings(self): """Generate MemoryAccessCouplings.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings') template = open(pjoin(self.template_path,'gpu','MemoryAccessCouplings.h'),'r').read() replace_dict = {} replace_dict['model_name'] = self.model_name @@ -1461,7 +1466,7 @@ def edit_memoryaccesscouplings(self): # [*NB export_cpp.UFOModelConverterGPU.write_process_h_file is not called!*] def write_process_h_file(self, writer): """Generate final gCPPProcess.h""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_h_file') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_h_file') out = super().write_process_h_file(writer) writer.seek(-1, os.SEEK_CUR) writer.truncate() @@ -1483,7 +1488,7 @@ def super_write_process_cc_file(self, writer): # AV - overload the export_cpp.OneProcessExporterGPU method (add debug printout and truncate last \n) def write_process_cc_file(self, writer): """Generate CPPProcess.cc""" -# misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_cc_file') + ###misc.sprint('Entering PLUGIN_OneProcessExporter.write_process_cc_file') ###out = super().write_process_cc_file(writer) out = self.super_write_process_cc_file(writer) writer.seek(-1, os.SEEK_CUR) @@ -1662,7 +1667,7 @@ def super_get_matrix_element_calls(self, matrix_element, color_amplitudes, multi color[namp][njamp] = coeff me = matrix_element.get('diagrams') matrix_element.reuse_outdated_wavefunctions(me) -# misc.sprint(multi_channel_map) + ###misc.sprint(multi_channel_map) res = [] ###res.append('for(int i=0;i<%s;i++){jamp[i] = cxtype(0.,0.);}' % len(color_amplitudes)) res.append("""constexpr size_t nxcoup = ndcoup + nicoup; // both dependent and independent couplings @@ -1717,7 +1722,7 @@ def super_get_matrix_element_calls(self, matrix_element, color_amplitudes, multi sum([diagrams[idiag].get('amplitudes') for \ idiag in multi_channel_map[config]], [])] diag_to_config[amp[0]] = config -# misc.sprint(diag_to_config) + ###misc.sprint(diag_to_config) id_amp = 0 for diagram in matrix_element.get('diagrams'): ###print('DIAGRAM %3d: #wavefunctions=%3d, #diagrams=%3d' % @@ -1828,13 +1833,13 @@ def get_external_line(self, wf, argument): wf.get('me_id')-1, wf.get('number_external')-1) elif argument.is_boson(): -# misc.sprint(call) -# misc.sprint( (wf.get('mass'), -# wf.get('number_external')-1, -# # For boson, need initial/final here -# (-1) ** (wf.get('state') == 'initial'), -# wf.get('me_id')-1, -# wf.get('number_external')-1)) + ###misc.sprint(call) + ###misc.sprint( (wf.get('mass'), + ### wf.get('number_external')-1, + ### # For boson, need initial/final here + ### (-1) ** (wf.get('state') == 'initial'), + ### wf.get('me_id')-1, + ### wf.get('number_external')-1)) return self.format_coupling(call % \ (wf.get('mass'), wf.get('number_external')-1, diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 1caabb1e56..465bb6971e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -34,8 +34,8 @@ #------------------------------------------------------------------------------------ from os.path import join as pjoin -import madgraph.various.misc as misc import madgraph.iolibs.files as files +import madgraph.various.misc as misc # AV - define the plugin's process exporter # (NB: this is the plugin's main class, enabled in the new_output dictionary in __init__.py) @@ -208,17 +208,22 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): self.add_input_for_banner() if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL'] else: patchlevel = '' - plugin_path = os.path.dirname(os.path.realpath( __file__ )) -# path = os.path.realpath(os.curdir + os.sep + 'PLUGIN' + os.sep + 'CUDACPP_OUTPUT') -# misc.sprint(path) - p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)]) - stdout, stderr = p.communicate() - if not p.returncode: + # OLD implementation (AV) + path = os.path.realpath(os.curdir + os.sep + 'PLUGIN' + os.sep + 'CUDACPP_OUTPUT') + misc.sprint(path) + if os.system(path + os.sep + 'patchMad.sh ' + self.dir_path + ' PROD ' + patchlevel) != 0: logger.debug("####### \n stdout is \n %s", stdout) logger.info("####### \n stderr is \n %s", stderr) - raise Exception('ERROR! the O/S call to patchMad.sh failed') - - self.add_madevent_plugin_fct() + raise Exception('ERROR! the O/S call to patchMad.sh failed') + # NEW implementation (OM) + #plugin_path = os.path.dirname(os.path.realpath( __file__ )) + #p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)]) + #stdout, stderr = p.communicate() + #if not p.returncode: + # logger.debug("####### \n stdout is \n %s", stdout) + # logger.info("####### \n stderr is \n %s", stderr) + # raise Exception('ERROR! the O/S call to patchMad.sh failed') + self.add_madevent_plugin_fct() # Added by OM return super().finalize(matrix_element, cmdhistory, MG5options, outputflag) # AV (default from OM's tutorial) - overload settings and add a debug printout @@ -249,12 +254,10 @@ def add_madevent_plugin_fct(self): which contains a series of functions and one dictionary variable TO_OVERWRITE that will be used to have temporary overwrite of all the key variable passed as string by their value. all variable that are file related should be called as madgraph.dir.file.variable - """ - + """ plugin_path = os.path.dirname(os.path.realpath( __file__ )) - files.cp(pjoin(plugin_path, 'plugin_interface.py'), pjoin(self.dir_path, 'bin', 'internal')) + ###files.cp(pjoin(plugin_path, 'plugin_interface.py'), pjoin(self.dir_path, 'bin', 'internal')) # AV FIXME (added by OM, but file is missing?) files.cp(pjoin(plugin_path, 'launch_plugin.py'), pjoin(self.dir_path, 'bin', 'internal')) files.ln( pjoin(self.dir_path, 'lib'), pjoin(self.dir_path, 'SubProcesses')) - #------------------------------------------------------------------------------------ diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh index aa553100e0..5654f4503b 100755 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh @@ -2,7 +2,7 @@ # Copyright (C) 2020-2023 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). # Created by: A. Valassi (Mar 2022) for the MG5aMC CUDACPP plugin. -# Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. set -e # immediate exit on error @@ -40,43 +40,72 @@ dir_patches=$2 if [ ! -e ${dir} ]; then echo "ERROR! Directory $dir does not exist"; exit 1; fi +# AV Recover special 'tmad' mode used by generateAndCompare.sh, after OM's changes that commented this out in patchMad.sh +tmadmode=0 +if [ "${MG5AMC_TMADMODE}" != "" ]; then + tmadmode=1 + echo "DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=${MG5AMC_TMADMODE})" +fi + # These two steps are part of "cd Source; make" but they actually are code-generating steps -#${dir}/bin/madevent treatcards run -#${dir}/bin/madevent treatcards param +if [ "${tmadmode}" != "0" ]; then + ${dir}/bin/madevent treatcards run + ${dir}/bin/madevent treatcards param +fi # Cleanup -#\rm -f ${dir}/crossx.html -#\rm -f ${dir}/index.html -#\rm -f ${dir}/madevent.tar.gz -#\rm -f ${dir}/Cards/delphes_trigger.dat -#\rm -f ${dir}/Cards/plot_card.dat -#\rm -f ${dir}/bin/internal/run_plot* -#\rm -f ${dir}/HTML/* -#\rm -rf ${dir}/bin/internal/__pycache__ -#\rm -rf ${dir}/bin/internal/ufomodel/__pycache__ -#touch ${dir}/HTML/.keep # new file +if [ "${tmadmode}" != "0" ]; then + \rm -f ${dir}/crossx.html + \rm -f ${dir}/index.html + \rm -f ${dir}/madevent.tar.gz + \rm -f ${dir}/Cards/delphes_trigger.dat + \rm -f ${dir}/Cards/plot_card.dat + \rm -f ${dir}/bin/internal/run_plot* + \rm -f ${dir}/HTML/* + \rm -rf ${dir}/bin/internal/__pycache__ + \rm -rf ${dir}/bin/internal/ufomodel/__pycache__ + touch ${dir}/HTML/.keep # new file +fi # Exit here for patchlevel 0 (--upstream) if [ "${patchlevel}" == "0" ]; then exit $status; fi # Add global flag '-O3 -ffast-math -fbounds-check' as in previous gridpacks -#echo "GLOBAL_FLAG=-O3 -ffast-math -fbounds-check" > ${dir}/Source/make_opts.new -#cat ${dir}/Source/make_opts >> ${dir}/Source/make_opts.new -#\mv ${dir}/Source/make_opts.new ${dir}/Source/make_opts +if [ "${tmadmode}" != "0" ]; then + echo "GLOBAL_FLAG=-O3 -ffast-math -fbounds-check" > ${dir}/Source/make_opts.new + cat ${dir}/Source/make_opts >> ${dir}/Source/make_opts.new + \mv ${dir}/Source/make_opts.new ${dir}/Source/make_opts +fi # Patch the default Fortran code to provide the integration with the cudacpp plugin # (1) Process-independent patches touch ${dir}/Events/.keep # this file should already be present (mg5amcnlo copies it from Template/LO/Events/.keep) cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/fbridge_common.inc ${dir}/SubProcesses # new file -#sed -i 's/2 = sde_strategy/1 = sde_strategy/' ${dir}/Cards/run_card.dat # use strategy SDE=1 in multichannel mode (see #419) -#sed -i 's/SDE_STRAT = 2/SDE_STRAT = 1/' ${dir}/Source/run_card.inc # use strategy SDE=1 in multichannel mode (see #419) +if [ "${tmadmode}" != "0" ]; then + sed -i 's/2 = sde_strategy/1 = sde_strategy/' ${dir}/Cards/run_card.dat # use strategy SDE=1 in multichannel mode (see #419) + sed -i 's/SDE_STRAT = 2/SDE_STRAT = 1/' ${dir}/Source/run_card.inc # use strategy SDE=1 in multichannel mode (see #419) +fi if [ "${patchlevel}" == "2" ]; then cd ${dir} - #sed -i 's/DEFAULT_F2PY_COMPILER=f2py3.*/DEFAULT_F2PY_COMPILER=f2py3/' Source/make_opts + if [ "${tmadmode}" != "0" ]; then + sed -i 's/DEFAULT_F2PY_COMPILER=f2py3.*/DEFAULT_F2PY_COMPILER=f2py3/' Source/make_opts + fi echo "DEBUG: cd ${PWD}; patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common" if ! patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common; then status=1; fi \rm -f Source/*.orig \rm -f bin/internal/*.orig + if [ "${tmadmode}" != "0" ]; then + echo " +#********************************************************************* +# Options for the cudacpp plugin +#********************************************************************* + +# Set cudacpp-specific values of non-cudacpp-specific options +-O3 -ffast-math -fbounds-check = global_flag ! build flags for Fortran code (for a fair comparison to cudacpp) + +# New cudacpp-specific options (default values are defined in banner.py) +CPP = cudacpp_backend ! valid backends are FORTRAN, CPP, CUDA" >> Cards/run_card.dat + fi cd - > /dev/null fi for p1dir in ${dir}/SubProcesses/P*; do diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index aab3b3944d..33e7b07669 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -6,6 +6,9 @@ set -e # fail on error +# AV Recover special 'tmad' mode used by generateAndCompare.sh, after OM's changes that commented this out in patchMad.sh +export MG5AMC_TMADMODE=1 + #-------------------------------------------------------------------------------------- function codeGenAndDiff() From 5720491949142ba0613b50f18662e2874780bfeb Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 09:44:07 +0200 Subject: [PATCH 026/119] [oct23av] regenerate 7 mad and 6 sa processes (all but 2x eemumu where codegen fails) after my 'tmadmode' changes over Olivier's patch All code generation logs are shorter without debug printouts. In most processes there are only minor code changes (mainly #ifdefs/bash/makefile changes for Mac). The only substantial code difference is that ixxx is used instead of imzx in gq_tttq and pp_tt012j. I checked with a quick test that tput and tmad tests look ok for gq_ttq (logs not saved) ./tput/teeThroughputX.sh -gqttq -makeclean -makej ./tmad/teeMadX.sh -gqttq +10x There appears to be a slight degradation in performance in gqttq due to the use of ixxx instead of imzx. I also made a similar quick test of ggtt, all ok with nothing to note. --- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 59 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gg_tt.mad/SubProcesses/makefile | 4 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 59 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 6 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 92 +-- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 6 +- .../gg_tt01g.mad/SubProcesses/makefile | 4 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 63 +-- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gg_ttg.mad/SubProcesses/makefile | 4 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 61 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 6 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 67 +-- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gg_ttgg.mad/SubProcesses/makefile | 4 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 63 +-- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 6 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 69 +-- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 6 +- .../gg_ttggg.mad/SubProcesses/makefile | 4 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 67 +-- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 6 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 88 +-- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 11 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 4 +- .../gq_ttq.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gq_ttq.mad/SubProcesses/makefile | 4 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 86 +-- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_gu_ttxu/CPPProcess.cc | 11 +- .../P1_Sigma_sm_gux_ttxux/CPPProcess.cc | 4 +- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 6 +- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 55 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../heft_gg_h.sa/SubProcesses/cudacpp.mk | 6 +- .../CODEGEN_mad_pp_tt012j_log.txt | 526 ++---------------- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P0_uux_ttx/CPPProcess.cc | 4 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 4 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 4 +- .../SubProcesses/P1_uux_ttxg/CPPProcess.cc | 4 +- .../SubProcesses/P2_gg_ttxuux/CPPProcess.cc | 11 +- .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc | 4 +- .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc | 4 +- .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc | 8 +- .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc | 8 +- .../SubProcesses/P2_uu_ttxuu/CPPProcess.cc | 8 +- .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc | 8 +- .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc | 4 +- .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc | 8 +- .../P2_uxcx_ttxuxcx/CPPProcess.cc | 8 +- .../P2_uxux_ttxuxux/CPPProcess.cc | 8 +- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 6 +- .../pp_tt012j.mad/SubProcesses/makefile | 4 +- 65 files changed, 449 insertions(+), 1254 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index ce5cfd8c71..43fc9daf0f 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005429983139038086  +DEBUG: model prefixing takes 0.0053479671478271484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -162,10 +162,10 @@ Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gg_tt INFO: remove old information in CODEGEN_mad_gg_tt -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  @@ -174,40 +174,13 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -215,16 +188,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.112 s +Wrote files for 10 helas calls in 0.070 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 2 routines in 0.142 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.131 s +ALOHA: aloha creates 4 routines in 0.130 s VVV1 FFV1 FFV1 @@ -232,13 +205,7 @@ ALOHA: aloha creates 4 routines in 0.131 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory @@ -249,7 +216,9 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -331,6 +300,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.594s -user 0m2.110s -sys 0m0.299s +real 0m2.280s +user 0m1.984s +sys 0m0.287s diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index a1ba7b16b6..58d59778d2 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005548000335693359  +DEBUG: model prefixing takes 0.005294084548950195  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,58 +155,33 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  -DEBUG: proc_id =  0 [model_handling.py at line 1052]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1350]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.145 s VVV1 FFV1 FFV1 @@ -214,20 +189,14 @@ ALOHA: aloha creates 2 routines in 0.141 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.311s -user 0m0.535s -sys 0m0.058s +real 0m0.537s +user 0m0.477s +sys 0m0.055s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index c40317e7b5..f32dfe22ac 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005491971969604492  +DEBUG: model prefixing takes 0.005346536636352539  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -162,17 +162,17 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.020 s +1 processes with 16 diagrams generated in 0.019 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gg_tt01g INFO: remove old information in CODEGEN_mad_gg_tt01g -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  @@ -183,42 +183,13 @@ INFO: Processing color information for process: g g > t t~ g @2 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -226,40 +197,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  @@ -267,22 +211,22 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.269 s +Wrote files for 46 helas calls in 0.180 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.324 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 5 routines in 0.320 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.305 s VVV1 VVV1 FFV1 @@ -295,13 +239,7 @@ ALOHA: aloha creates 10 routines in 0.310 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory @@ -312,7 +250,9 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -404,6 +344,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.183s -user 0m2.629s -sys 0m0.332s +real 0m2.911s +user 0m2.516s +sys 0m0.311s diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 46cb300397..a0950268a2 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005367279052734375  +DEBUG: model prefixing takes 0.005325794219970703  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,17 +155,17 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.023 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gg_ttg INFO: remove old information in CODEGEN_mad_gg_ttg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  @@ -174,42 +174,13 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -217,22 +188,22 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.163 s +Wrote files for 36 helas calls in 0.118 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.325 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 5 routines in 0.326 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.311 s VVV1 VVV1 FFV1 @@ -245,13 +216,7 @@ ALOHA: aloha creates 10 routines in 0.310 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory @@ -262,7 +227,9 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -350,6 +317,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.040s -user 0m2.538s -sys 0m0.303s +real 0m2.814s +user 0m2.484s +sys 0m0.289s diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index cd7321fc3d..2633865772 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005832672119140625  +DEBUG: model prefixing takes 0.005397796630859375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,63 +155,36 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  -DEBUG: proc_id =  0 [model_handling.py at line 1052]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1350]  Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.325 s +ALOHA: aloha creates 5 routines in 0.323 s VVV1 VVV1 FFV1 @@ -224,20 +197,14 @@ ALOHA: aloha creates 5 routines in 0.325 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.927s -user 0m0.790s -sys 0m0.053s +real 0m0.961s +user 0m0.719s +sys 0m0.054s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index ffe22b60ed..55d49a2465 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005362749099731445  +DEBUG: model prefixing takes 0.0052874088287353516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,17 +155,17 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.157 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gg_ttgg INFO: remove old information in CODEGEN_mad_gg_ttgg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  @@ -174,67 +174,36 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.423 s -Wrote files for 222 helas calls in 0.735 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s +Wrote files for 222 helas calls in 0.679 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.330 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 5 routines in 0.327 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.314 s +ALOHA: aloha creates 10 routines in 0.309 s VVV1 VVV1 FFV1 @@ -250,13 +219,7 @@ ALOHA: aloha creates 10 routines in 0.314 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory @@ -267,7 +230,9 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -355,6 +320,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.973s -user 0m3.649s -sys 0m0.299s +real 0m3.934s +user 0m3.533s +sys 0m0.319s diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 1dd6886647..7c54519581 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00539708137512207  +DEBUG: model prefixing takes 0.00528264045715332  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,58 +155,29 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.156 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  -DEBUG: proc_id =  0 [model_handling.py at line 1052]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1350]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +Generated helas calls for 1 subprocesses (123 diagrams) in 0.429 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -229,20 +200,14 @@ ALOHA: aloha creates 5 routines in 0.315 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.499s -user 0m1.428s -sys 0m0.057s +real 0m1.441s +user 0m1.382s +sys 0m0.046s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index bd0958642c..d5271622a9 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005446672439575195  +DEBUG: model prefixing takes 0.005560636520385742  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,17 +155,17 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.874 s +1 processes with 1240 diagrams generated in 1.838 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gg_ttggg INFO: remove old information in CODEGEN_mad_gg_ttggg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  @@ -176,69 +176,36 @@ INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 6: 3, 7: 4, 11: 5, 12: 6, 28: 7, 29: 8, 30: 9, 34: 10, 35: 11, 36: 12, 40: 13, 41: 14, 42: 15, 46: 16, 47: 17, 48: 18, 49: 19, 50: 20, 51: 21, 55: 22, 56: 23, 57: 24, 58: 25, 59: 26, 60: 27, 61: 28, 62: 29, 63: 30, 64: 31, 65: 32, 66: 33, 67: 34, 68: 35, 69: 36, 73: 37, 74: 38, 75: 39, 76: 40, 77: 41, 78: 42, 79: 43, 80: 44, 81: 45, 82: 46, 83: 47, 84: 48, 85: 49, 86: 50, 87: 51, 91: 52, 92: 53, 93: 54, 94: 55, 95: 56, 96: 57, 97: 58, 98: 59, 99: 60, 100: 61, 101: 62, 102: 63, 103: 64, 104: 65, 105: 66, 109: 67, 110: 68, 111: 69, 112: 70, 113: 71, 114: 72, 115: 73, 116: 74, 117: 75, 121: 76, 122: 77, 123: 78, 124: 79, 125: 80, 126: 81, 127: 82, 128: 83, 129: 84, 133: 85, 134: 86, 135: 87, 136: 88, 137: 89, 138: 90, 139: 91, 140: 92, 141: 93, 142: 94, 143: 95, 144: 96, 145: 97, 146: 98, 147: 99, 148: 100, 149: 101, 150: 102, 151: 103, 152: 104, 153: 105, 160: 106, 161: 107, 162: 108, 163: 109, 164: 110, 165: 111, 166: 112, 167: 113, 168: 114, 169: 115, 170: 116, 171: 117, 172: 118, 173: 119, 174: 120, 178: 121, 179: 122, 183: 123, 184: 124, 185: 125, 186: 126, 187: 127, 188: 128, 189: 129, 190: 130, 191: 131, 192: 132, 193: 133, 194: 134, 195: 135, 196: 136, 197: 137, 201: 138, 202: 139, 203: 140, 204: 141, 205: 142, 206: 143, 207: 144, 208: 145, 209: 146, 210: 147, 211: 148, 212: 149, 213: 150, 214: 151, 215: 152, 219: 153, 220: 154, 221: 155, 222: 156, 223: 157, 224: 158, 225: 159, 226: 160, 227: 161, 228: 162, 229: 163, 230: 164, 231: 165, 232: 166, 233: 167, 234: 168, 235: 169, 236: 170, 237: 171, 238: 172, 239: 173, 240: 174, 241: 175, 242: 176, 243: 177, 244: 178, 245: 179, 246: 180, 247: 181, 248: 182, 249: 183, 250: 184, 251: 185, 252: 186, 253: 187, 254: 188, 255: 189, 256: 190, 257: 191, 258: 192, 259: 193, 260: 194, 261: 195, 262: 196, 266: 197, 267: 198, 268: 199, 269: 200, 270: 201, 271: 202, 275: 203, 276: 204, 277: 205, 278: 206, 279: 207, 280: 208, 284: 209, 285: 210, 319: 211, 320: 212, 321: 213, 322: 214, 323: 215, 324: 216, 325: 217, 326: 218, 327: 219, 328: 220, 329: 221, 330: 222, 331: 223, 332: 224, 333: 225, 337: 226, 338: 227, 342: 228, 343: 229, 344: 230, 345: 231, 346: 232, 347: 233, 348: 234, 349: 235, 350: 236, 351: 237, 352: 238, 353: 239, 354: 240, 355: 241, 356: 242, 360: 243, 361: 244, 362: 245, 363: 246, 364: 247, 365: 248, 366: 249, 367: 250, 368: 251, 369: 252, 370: 253, 371: 254, 372: 255, 373: 256, 374: 257, 378: 258, 379: 259, 380: 260, 381: 261, 382: 262, 383: 263, 384: 264, 385: 265, 386: 266, 387: 267, 388: 268, 389: 269, 390: 270, 391: 271, 392: 272, 393: 273, 394: 274, 395: 275, 396: 276, 397: 277, 398: 278, 399: 279, 400: 280, 401: 281, 402: 282, 403: 283, 404: 284, 405: 285, 406: 286, 407: 287, 408: 288, 409: 289, 410: 290, 411: 291, 412: 292, 413: 293, 414: 294, 415: 295, 416: 296, 417: 297, 418: 298, 419: 299, 420: 300, 421: 301, 425: 302, 426: 303, 427: 304, 428: 305, 429: 306, 430: 307, 434: 308, 435: 309, 436: 310, 437: 311, 438: 312, 439: 313, 443: 314, 444: 315, 478: 316, 479: 317, 480: 318, 481: 319, 482: 320, 483: 321, 487: 322, 488: 323, 489: 324, 490: 325, 491: 326, 492: 327, 493: 328, 494: 329, 495: 330, 496: 331, 497: 332, 498: 333, 499: 334, 500: 335, 501: 336, 505: 337, 506: 338, 507: 339, 508: 340, 509: 341, 510: 342, 511: 343, 512: 344, 513: 345, 514: 346, 515: 347, 516: 348, 517: 349, 518: 350, 519: 351, 523: 352, 524: 353, 525: 354, 526: 355, 527: 356, 528: 357, 529: 358, 530: 359, 531: 360, 532: 361, 533: 362, 534: 363, 535: 364, 536: 365, 537: 366, 541: 367, 542: 368, 543: 369, 544: 370, 545: 371, 546: 372, 547: 373, 548: 374, 549: 375, 550: 376, 551: 377, 555: 378, 556: 379, 560: 380, 561: 381, 577: 382, 578: 383, 579: 384, 580: 385, 581: 386, 582: 387, 583: 388, 584: 389, 585: 390, 589: 391, 590: 392, 591: 393, 592: 394, 593: 395, 594: 396, 595: 397, 596: 398, 597: 399, 601: 400, 602: 401, 603: 402, 604: 403, 605: 404, 606: 405, 607: 406, 608: 407, 609: 408, 613: 409, 614: 410, 615: 411, 616: 412, 617: 413, 618: 414, 622: 415, 623: 416, 624: 417, 625: 418, 626: 419, 627: 420, 637: 421, 638: 422, 639: 423, 640: 424, 641: 425, 642: 426, 646: 427, 647: 428, 648: 429, 649: 430, 650: 431, 651: 432, 652: 433, 653: 434, 654: 435, 655: 436, 656: 437, 657: 438, 658: 439, 659: 440, 660: 441, 664: 442, 665: 443, 666: 444, 667: 445, 668: 446, 669: 447, 670: 448, 671: 449, 672: 450, 673: 451, 674: 452, 675: 453, 676: 454, 677: 455, 678: 456, 682: 457, 683: 458, 684: 459, 685: 460, 686: 461, 687: 462, 688: 463, 689: 464, 690: 465, 691: 466, 692: 467, 693: 468, 694: 469, 695: 470, 696: 471, 700: 472, 701: 473, 702: 474, 703: 475, 704: 476, 705: 477, 706: 478, 707: 479, 708: 480, 709: 481, 710: 482, 714: 483, 715: 484, 719: 485, 720: 486, 736: 487, 737: 488, 738: 489, 739: 490, 740: 491, 741: 492, 742: 493, 743: 494, 744: 495, 748: 496, 749: 497, 750: 498, 751: 499, 752: 500, 753: 501, 754: 502, 755: 503, 756: 504, 760: 505, 761: 506, 762: 507, 763: 508, 764: 509, 765: 510, 766: 511, 767: 512, 768: 513, 772: 514, 773: 515, 774: 516, 775: 517, 776: 518, 777: 519, 781: 520, 782: 521, 783: 522, 784: 523, 785: 524, 786: 525, 796: 526, 797: 527, 798: 528, 799: 529, 800: 530, 801: 531, 805: 532, 806: 533, 807: 534, 808: 535, 809: 536, 810: 537, 811: 538, 812: 539, 813: 540, 814: 541, 815: 542, 816: 543, 817: 544, 818: 545, 819: 546, 823: 547, 824: 548, 825: 549, 826: 550, 827: 551, 828: 552, 829: 553, 830: 554, 831: 555, 832: 556, 833: 557, 834: 558, 835: 559, 836: 560, 837: 561, 841: 562, 842: 563, 843: 564, 844: 565, 845: 566, 846: 567, 847: 568, 848: 569, 849: 570, 850: 571, 851: 572, 852: 573, 853: 574, 854: 575, 855: 576, 859: 577, 860: 578, 861: 579, 862: 580, 863: 581, 864: 582, 865: 583, 866: 584, 867: 585, 868: 586, 869: 587, 873: 588, 874: 589, 878: 590, 879: 591, 895: 592, 896: 593, 897: 594, 898: 595, 899: 596, 900: 597, 901: 598, 902: 599, 903: 600, 907: 601, 908: 602, 909: 603, 910: 604, 911: 605, 912: 606, 913: 607, 914: 608, 915: 609, 919: 610, 920: 611, 921: 612, 922: 613, 923: 614, 924: 615, 925: 616, 926: 617, 927: 618, 931: 619, 932: 620, 933: 621, 934: 622, 935: 623, 936: 624, 940: 625, 941: 626, 942: 627, 943: 628, 944: 629, 945: 630, 955: 631, 956: 632, 957: 633, 958: 634, 959: 635, 960: 636, 961: 637, 962: 638, 963: 639, 964: 640, 965: 641, 966: 642, 967: 643, 968: 644, 969: 645, 970: 646, 971: 647, 972: 648, 973: 649, 974: 650, 975: 651, 976: 652, 977: 653, 978: 654, 979: 655, 980: 656, 981: 657, 982: 658, 983: 659, 984: 660, 985: 661, 986: 662, 987: 663, 991: 664, 992: 665, 993: 666, 994: 667, 995: 668, 996: 669, 1000: 670, 1001: 671, 1002: 672, 1003: 673, 1004: 674, 1005: 675, 1015: 676, 1016: 677, 1017: 678, 1018: 679, 1019: 680, 1020: 681, 1021: 682, 1022: 683, 1023: 684, 1024: 685, 1025: 686, 1026: 687, 1027: 688, 1028: 689, 1029: 690, 1030: 691, 1031: 692, 1032: 693, 1033: 694, 1034: 695, 1035: 696, 1036: 697, 1037: 698, 1038: 699, 1039: 700, 1040: 701, 1041: 702, 1042: 703, 1043: 704, 1044: 705, 1045: 706, 1046: 707, 1047: 708, 1051: 709, 1052: 710, 1053: 711, 1054: 712, 1055: 713, 1056: 714, 1060: 715, 1061: 716, 1062: 717, 1063: 718, 1064: 719, 1065: 720, 1075: 721, 1076: 722, 1080: 723, 1081: 724, 1085: 725, 1086: 726, 1102: 727, 1103: 728, 1104: 729, 1105: 730, 1106: 731, 1107: 732, 1108: 733, 1109: 734, 1110: 735, 1114: 736, 1115: 737, 1116: 738, 1117: 739, 1118: 740, 1119: 741, 1120: 742, 1121: 743, 1122: 744, 1126: 745, 1127: 746, 1128: 747, 1129: 748, 1130: 749, 1131: 750, 1132: 751, 1133: 752, 1134: 753, 1138: 754, 1139: 755, 1140: 756, 1141: 757, 1142: 758, 1143: 759, 1147: 760, 1148: 761, 1149: 762, 1150: 763, 1151: 764, 1152: 765, 1153: 766, 1154: 767, 1158: 768, 1159: 769, 1163: 770, 1164: 771, 1180: 772, 1181: 773, 1182: 774, 1183: 775, 1184: 776, 1185: 777, 1186: 778, 1187: 779, 1188: 780, 1192: 781, 1193: 782, 1194: 783, 1195: 784, 1196: 785, 1197: 786, 1198: 787, 1199: 788, 1200: 789, 1204: 790, 1205: 791, 1206: 792, 1207: 793, 1208: 794, 1209: 795, 1210: 796, 1211: 797, 1212: 798, 1216: 799, 1217: 800, 1218: 801, 1219: 802, 1220: 803, 1221: 804, 1225: 805, 1226: 806, 1227: 807, 1228: 808, 1229: 809, 1230: 810, 1231: 811, 1232: 812, 1236: 813, 1237: 814, 1241: 815, 1242: 816, 1258: 817, 1259: 818, 1260: 819, 1261: 820, 1262: 821, 1263: 822, 1264: 823, 1265: 824, 1266: 825, 1270: 826, 1271: 827, 1272: 828, 1273: 829, 1274: 830, 1275: 831, 1276: 832, 1277: 833, 1278: 834, 1282: 835, 1283: 836, 1284: 837, 1285: 838, 1286: 839, 1287: 840, 1288: 841, 1289: 842, 1290: 843, 1294: 844, 1295: 845, 1296: 846, 1297: 847, 1298: 848, 1299: 849, 1303: 850, 1304: 851, 1305: 852, 1306: 853, 1307: 854, 1308: 855, 1309: 856, 1310: 857, 1314: 858, 1315: 859, 1319: 860, 1320: 861, 1333: 862, 1334: 863, 1338: 864, 1339: 865, 1343: 866, 1344: 867, 1357: 868, 1358: 869, 1362: 870, 1363: 871, 1367: 872, 1368: 873, 1396: 874, 1397: 875, 1398: 876, 1399: 877, 1400: 878, 1401: 879, 1402: 880, 1403: 881, 1404: 882, 1405: 883, 1406: 884, 1407: 885, 1408: 886, 1409: 887, 1410: 888, 1411: 889, 1412: 890, 1413: 891, 1417: 892, 1418: 893, 1419: 894, 1420: 895, 1421: 896, 1422: 897, 1423: 898, 1424: 899, 1425: 900, 1426: 901, 1427: 902, 1428: 903, 1429: 904, 1430: 905, 1431: 906, 1432: 907, 1433: 908, 1434: 909, 1438: 910, 1439: 911, 1440: 912, 1441: 913, 1442: 914, 1443: 915, 1444: 916, 1445: 917, 1446: 918, 1447: 919, 1448: 920, 1449: 921, 1450: 922, 1451: 923, 1452: 924, 1453: 925, 1454: 926, 1455: 927, 1459: 928, 1460: 929, 1461: 930, 1462: 931, 1463: 932, 1464: 933, 1468: 934, 1469: 935, 1470: 936, 1471: 937, 1472: 938, 1473: 939, 1477: 940, 1478: 941, 1479: 942, 1480: 943, 1481: 944, 1482: 945} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.505 s -Wrote files for 2281 helas calls in 47.140 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.442 s +Wrote files for 2281 helas calls in 46.519 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.314 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 5 routines in 0.315 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -254,13 +221,7 @@ ALOHA: aloha creates 10 routines in 0.308 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory @@ -271,7 +232,9 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -359,6 +322,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m58.428s -user 0m57.226s -sys 0m0.949s +real 0m57.762s +user 0m56.504s +sys 0m0.940s diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 3587866b17..c782410e30 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005717754364013672  +DEBUG: model prefixing takes 0.005506038665771484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,67 +155,36 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.849 s +1 processes with 1240 diagrams generated in 1.841 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  -DEBUG: proc_id =  0 [model_handling.py at line 1052]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1350]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.583 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.445 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.346 s +ALOHA: aloha creates 5 routines in 0.341 s VVV1 VVV1 FFV1 @@ -231,20 +200,14 @@ ALOHA: aloha creates 5 routines in 0.346 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m13.619s -user 0m12.877s -sys 0m0.114s +real 0m12.751s +user 0m12.589s +sys 0m0.108s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 08e638f93f..ef4699c04f 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005480527877807617  +DEBUG: model prefixing takes 0.0054738521575927734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,17 +169,17 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.078 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gq_ttq INFO: remove old information in CODEGEN_mad_gq_ttq -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  @@ -196,38 +196,13 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -235,55 +210,30 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.231 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Wrote files for 32 helas calls in 0.157 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.145 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 2 routines in 0.143 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.132 s +ALOHA: aloha creates 4 routines in 0.129 s FFV1 FFV1 FFV1 @@ -292,13 +242,7 @@ ALOHA: aloha creates 4 routines in 0.132 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory @@ -309,7 +253,9 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -411,6 +357,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.070s -user 0m2.298s -sys 0m0.288s +real 0m2.537s +user 0m2.168s +sys 0m0.315s diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index dcfa5aa1cf..7aeea1667e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -243,20 +243,13 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); -#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz -#else - if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz - else - ixxxxx( momenta, 0, cHel[ihel][1], +1, w_fp[1], 1 ); -#endif + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); FFV1_2( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index 8565935186..ec65d2ccae 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -243,13 +243,13 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); + ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); FFV1_2( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 9d11057ab5..7f29ac4333 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00550079345703125  +DEBUG: model prefixing takes 0.0052490234375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,8 +175,8 @@ output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 @@ -189,76 +189,30 @@ INFO: Processing color information for process: g u~ > t t~ u~ @1 INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  -DEBUG: proc_id =  0 [model_handling.py at line 1052]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=1 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  -DEBUG: proc_id =  0 [model_handling.py at line 1052]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=1 [output.py at line 192]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1350]  Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.147 s FFV1 FFV1 FFV1 @@ -267,20 +221,14 @@ ALOHA: aloha creates 2 routines in 0.145 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.980s -user 0m0.677s -sys 0m0.055s +real 0m0.659s +user 0m0.603s +sys 0m0.042s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc index 7c85fe10ef..184b12db36 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc @@ -243,20 +243,13 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); -#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz -#else - if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz - else - ixxxxx( momenta, 0, cHel[ihel][1], +1, w_fp[1], 1 ); -#endif + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); FFV1_2( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc index ddf0e84af6..a641bc4240 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc @@ -243,13 +243,13 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); + ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); FFV1_2( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 58c713c07f..28c0d86a0b 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -133,69 +133,38 @@ output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_h Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > h HIG<=1 HIW<=1 WEIGHTED<=2 @1 INFO: Processing color information for process: g g > h HIG<=1 HIW<=1 @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  -DEBUG: proc_id =  0 [model_handling.py at line 1052]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1315]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_heft_gg_h.txt [model_handling.py at line 1350]  Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.061 s +ALOHA: aloha creates 1 routines in 0.060 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 59 , keys size = 59 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 5 , keys size = 5 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 1 , keys size = 1 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 5 , keys size = 5 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 1 , keys size = 1 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 1 , keys size = 1 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.cc INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.569s -user 0m0.428s -sys 0m0.052s +real 0m0.462s +user 0m0.381s +sys 0m0.050s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index e2de004e5e..85441a5025 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005529165267944336  +DEBUG: model prefixing takes 0.005836009979248047  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.029 s +5 processes with 7 diagrams generated in 0.031 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.135 s +13 processes with 76 diagrams generated in 0.141 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,17 +378,17 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.916 s +65 processes with 1119 diagrams generated in 1.806 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_pp_tt012j INFO: remove old information in CODEGEN_mad_pp_tt012j -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  @@ -496,44 +496,13 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  @@ -541,40 +510,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxuux.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  @@ -582,40 +524,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxgu.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  2 [export_cpp.py at line 712]  @@ -623,40 +538,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxgux.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  3 [export_cpp.py at line 712]  @@ -664,40 +552,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxgg.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  4 [export_cpp.py at line 712]  @@ -705,42 +566,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  5 [export_cpp.py at line 712]  @@ -748,36 +580,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uu_ttxuu.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  6 [export_cpp.py at line 712]  @@ -785,36 +594,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxuux.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  7 [export_cpp.py at line 712]  @@ -822,36 +608,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxux_ttxuxux.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  8 [export_cpp.py at line 712]  @@ -859,36 +622,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uc_ttxuc.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  9 [export_cpp.py at line 712]  @@ -896,36 +636,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxccx.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  10 [export_cpp.py at line 712]  @@ -933,36 +650,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_ucx_ttxucx.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  11 [export_cpp.py at line 712]  @@ -970,36 +664,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxcx_ttxuxcx.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  12 [export_cpp.py at line 712]  @@ -1007,38 +678,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  13 [export_cpp.py at line 712]  @@ -1046,38 +692,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  14 [export_cpp.py at line 712]  @@ -1085,38 +706,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxg.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  15 [export_cpp.py at line 712]  @@ -1124,40 +720,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1724]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1837]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1836]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1837]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1350]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  16 [export_cpp.py at line 712]  @@ -1165,59 +734,36 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1045]  DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: proc_id =  1 [model_handling.py at line 1052]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1311]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1313]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1468]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1490]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1150]  -DEBUG: self.include_multi_channel =  [1] [model_handling.py at line 1151]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1152]  DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1176]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1] [model_handling.py at line 1177]  -DEBUG: multi_channel =  {1: [0]} [model_handling.py at line 1183]  -DEBUG: multi_channel_map =  {1: [0]} [model_handling.py at line 1669]  -DEBUG: diag_to_config =  {1: 1} [model_handling.py at line 1724]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1358]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1367]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1384]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1404]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1416]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1434]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1445]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1456]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttx.txt [model_handling.py at line 1353]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1] [export_cpp.py at line 711]  DEBUG: subproc_number =  17 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.276 s -Wrote files for 810 helas calls in 3.348 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.266 s +Wrote files for 810 helas calls in 3.032 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.333 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 5 routines in 0.332 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.312 s +ALOHA: aloha creates 10 routines in 0.307 s VVV1 VVV1 FFV1 @@ -1233,13 +779,7 @@ ALOHA: aloha creates 10 routines in 0.312 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory @@ -1250,7 +790,9 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -1532,6 +1074,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.679s -user 0m9.072s +real 0m9.245s +user 0m8.325s sys 0m0.546s diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc index 419ca8dad1..3d4424d157 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc @@ -241,9 +241,9 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 1 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index fdd4c7ce0d..91ca3b410f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -243,13 +243,13 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); FFV1_2( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index f2854178a3..fc53543eb6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -243,13 +243,13 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); + ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); FFV1_2( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc index 99ca7b88a2..4951b43b8d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc @@ -241,9 +241,9 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 5 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc index c1788e814a..4d62df6c3a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc @@ -249,16 +249,9 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); -#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); -#else - if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); - else - oxxxxx( momenta, 0, cHel[ihel][4], +1, w_fp[4], 4 ) -#endif + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc index abc5ef9719..2307f25625 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc @@ -243,7 +243,7 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); @@ -251,7 +251,7 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - oxzxxx( momenta, cHel[ihel][5], +1, w_fp[5], 5 ); + oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc index 2656f0ca15..305ce2fd5e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc @@ -243,7 +243,7 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); @@ -251,7 +251,7 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc index d1e9379b52..54ac36d31c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc @@ -243,17 +243,17 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - oxzxxx( momenta, cHel[ihel][5], +1, w_fp[5], 5 ); + oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[7] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc index 274c259bc6..34ff4139ab 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc @@ -249,17 +249,17 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc index cab6dd62ef..9dd3b1764a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc @@ -241,17 +241,17 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 14 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - oxzxxx( momenta, cHel[ihel][5], +1, w_fp[5], 5 ); + oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[7] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc index 60c41f8fb1..8e57cf0896 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc @@ -249,17 +249,17 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[6] ); FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[7] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc index 71dbdec476..13d360e5ad 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc @@ -241,9 +241,9 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 36 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc index bc743c56c3..dd25c56cba 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc @@ -241,17 +241,17 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 14 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[6] ); FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[7] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc index c09a29e015..61f388d16b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc @@ -243,17 +243,17 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); + ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc index d4a4794688..f0f57381de 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc @@ -241,17 +241,17 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 14 *** // Wavefunction(s) for diagram number 1 - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); + ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) From 99f00081c05767427afe3bd05723ffb340fceec3 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 10:30:05 +0200 Subject: [PATCH 027/119] [oct23av] in CODEGEN, minor improvements in comments and verbosity of model_handling.py --- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index c0ff7468a5..e4e1d68e95 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -196,7 +196,7 @@ def get_header_txt(self, name=None, couplings=None,mode=''): list_arg = '[]' # AV from cxtype_sv to fptype array (running alphas #373) point = self.type2def['pointer_coup'] args.append('%s %s%s%s'% (type, point, argname, list_arg)) - args.append('double Ccoeff%s'% argname[7:]) + args.append('double Ccoeff%s'% argname[7:]) # OM for 'unary minus' #628 else: args.append('%s %s%s'% (type, argname, list_arg)) if not self.offshell: @@ -535,12 +535,13 @@ def write_MultContainer(self, obj, prefactor=True): text = '%(factors)s' return text % data + # OM - overload aloha_writers.WriteALOHA and ALOHAWriterForCPP methods (handle 'unary minus' #628) def change_var_format(self, obj): """ """ if obj.startswith('COUP'): out = super().change_var_format(obj) postfix = out[4:] - return "Ccoeff%s*%s" % (postfix, out) + return "Ccoeff%s*%s" % (postfix, out) # OM for 'unary minus' #628 else: return super().change_var_format(obj) @@ -1642,13 +1643,14 @@ def format_coupling(self, call): alias[coup] = len(alias) if name == 'cIPD': call = call.replace('m_pars->%s%s' % (sign, coup), - '%s%s[%s]' % (sign, name, alias[coup])) + '%s%s[%s]' % (sign, name, alias[coup])) else: ###call = call.replace('m_pars->%s%s' % (sign, coup), ### '%scxmake( cIPC[%s], cIPC[%s] )' % ### (sign, 2*alias[coup],2*alias[coup]+1)) - misc.sprint(name, alias[coup]) + ###misc.sprint(name, alias[coup]) # AV from cIPCs to COUP array (running alphas #373) + # OM fix handling of 'unary minus' #628 call = call.replace('m_pars->%s%s' % (sign, coup), 'COUPs[%s], %s' % (alias[coup], '1.0' if not sign else '-1.0')) return call From b7122bf13cc3e471a4b0d35f7f20f34244afc6c4 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 10:36:28 +0200 Subject: [PATCH 028/119] [oct23av] regenerate 7 mad and 6 sa processes (all but eemumu) after Olivier's patch for unary minus #628 All FFV functions now have an extra argument Ccoeff in their signature. I checked with a quick test that tput and tmad tests look ok for gg_tt (logs not saved) ./tput/teeThroughputX.sh -ggtt -makeclean -makej ./tmad/teeMadX.sh -ggtt +10x There appears to be a ~10% improvement in performance in ggtt tests? To be tested on the other processes... NB: I also quickly tried to see if the unary minus isse itself has disappeared. This was failing with 'unary minus' issues before Olivier' patch, it now fails elsewhere after the patch. It seems that the 'unary minus' issue #628 itself has been fixed. ./CODEGEN/generateAndCompare.sh susy_gg_tt cd susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/ make -j HRDCOD=1 |& grep unary --- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 16 +- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 12 +- epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h | 8 + .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 12 +- .../P1_Sigma_sm_gg_ttx/CPPProcess.cc | 12 +- epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h | 8 + .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 26 +- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 12 +- .../SubProcesses/P2_gg_ttxg/CPPProcess.cc | 62 +- epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h | 18 + .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 24 +- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 62 +- epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h | 18 + .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 14 +- .../P1_Sigma_sm_gg_ttxg/CPPProcess.cc | 62 +- epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h | 18 + .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 24 +- .../SubProcesses/P1_gg_ttxgg/CPPProcess.cc | 432 +- epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h | 24 + .../CODEGEN_cudacpp_gg_ttgg_log.txt | 16 +- .../P1_Sigma_sm_gg_ttxgg/CPPProcess.cc | 432 +- epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h | 24 + .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 22 +- .../SubProcesses/P1_gg_ttxggg/CPPProcess.cc | 4548 ++++++++--------- epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h | 24 + .../CODEGEN_cudacpp_gg_ttggg_log.txt | 16 +- .../P1_Sigma_sm_gg_ttxggg/CPPProcess.cc | 4548 ++++++++--------- epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h | 24 + .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 28 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 22 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 22 +- epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h | 10 + .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 16 +- .../P1_Sigma_sm_gu_ttxu/CPPProcess.cc | 22 +- .../P1_Sigma_sm_gux_ttxux/CPPProcess.cc | 22 +- epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h | 10 + .../CODEGEN_cudacpp_heft_gg_h_log.txt | 8 +- .../P1_Sigma_heft_gg_h/CPPProcess.cc | 2 +- .../cudacpp/heft_gg_h.sa/src/HelAmps_heft.h | 2 + .../CODEGEN_mad_pp_tt012j_log.txt | 128 +- .../SubProcesses/P0_gg_ttx/CPPProcess.cc | 12 +- .../SubProcesses/P0_uux_ttx/CPPProcess.cc | 4 +- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 62 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 22 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 22 +- .../SubProcesses/P1_uux_ttxg/CPPProcess.cc | 22 +- .../SubProcesses/P2_gg_ttxgg/CPPProcess.cc | 432 +- .../SubProcesses/P2_gg_ttxuux/CPPProcess.cc | 134 +- .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc | 134 +- .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc | 134 +- .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc | 32 +- .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc | 32 +- .../SubProcesses/P2_uu_ttxuu/CPPProcess.cc | 60 +- .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc | 32 +- .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc | 134 +- .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc | 60 +- .../P2_uxcx_ttxuxcx/CPPProcess.cc | 32 +- .../P2_uxux_ttxuxux/CPPProcess.cc | 60 +- epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h | 24 + 59 files changed, 6233 insertions(+), 6021 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 43fc9daf0f..7ea852db91 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053479671478271484  +DEBUG: model prefixing takes 0.0053539276123046875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,12 +174,12 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  @@ -192,12 +192,12 @@ Wrote files for 10 helas calls in 0.070 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.145 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.131 s VVV1 FFV1 FFV1 @@ -300,6 +300,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.280s +real 0m2.347s user 0m1.984s -sys 0m0.287s +sys 0m0.295s diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 8d41a93302..02f655f48c 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -260,10 +260,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h index 94bf8aca52..07d0bfa887 100644 --- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h @@ -862,6 +862,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -875,6 +876,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -885,6 +887,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -897,6 +900,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -909,6 +913,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -947,6 +952,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -970,6 +976,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1001,6 +1008,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 58d59778d2..814a040ad0 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005294084548950195  +DEBUG: model prefixing takes 0.005364656448364258  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,14 +174,14 @@ INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.141 s VVV1 FFV1 FFV1 @@ -197,6 +197,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.537s -user 0m0.477s -sys 0m0.055s +real 0m0.541s +user 0m0.476s +sys 0m0.054s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc index 528f0c80d9..141d1f24ac 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -259,10 +259,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif diff --git a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h index 94bf8aca52..07d0bfa887 100644 --- a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h @@ -862,6 +862,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -875,6 +876,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -885,6 +887,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -897,6 +900,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -909,6 +913,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -947,6 +952,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -970,6 +976,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1001,6 +1008,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index f32dfe22ac..82fd1eb100 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005346536636352539  +DEBUG: model prefixing takes 0.005370140075683594  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -183,12 +183,12 @@ INFO: Processing color information for process: g g > t t~ g @2 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  @@ -197,12 +197,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  @@ -210,7 +210,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.041 s Wrote files for 46 helas calls in 0.180 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -218,7 +218,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.320 s +ALOHA: aloha creates 5 routines in 0.318 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -226,7 +226,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.305 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -344,6 +344,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.911s -user 0m2.516s -sys 0m0.311s +real 0m2.881s +user 0m2.519s +sys 0m0.314s diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 8d41a93302..02f655f48c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -260,10 +260,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc index 32555ba549..ce1badffca 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -265,10 +265,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -293,11 +293,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -307,10 +307,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -324,7 +324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -334,11 +334,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,7 +351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,7 +365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -375,10 +375,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -392,7 +392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -406,7 +406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -435,7 +435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -448,7 +448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -461,22 +461,22 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; diff --git a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h index 4a326fae62..8995b15c82 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -933,6 +939,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -946,6 +953,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -972,6 +981,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1006,6 +1016,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1044,6 +1055,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1067,6 +1079,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1098,6 +1111,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1129,6 +1143,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1160,6 +1175,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1194,6 +1210,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1228,6 +1245,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index a0950268a2..56445aa2f6 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005325794219970703  +DEBUG: model prefixing takes 0.0054433345794677734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.023 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -174,12 +174,12 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  @@ -187,15 +187,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.118 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s +Wrote files for 36 helas calls in 0.124 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.326 s +ALOHA: aloha creates 5 routines in 0.344 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -203,7 +203,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.311 s +ALOHA: aloha creates 10 routines in 0.328 s VVV1 VVV1 FFV1 @@ -317,6 +317,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.814s -user 0m2.484s -sys 0m0.289s +real 0m3.355s +user 0m2.552s +sys 0m0.301s diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 62e8d65a7d..f7f5899260 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -265,10 +265,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -293,11 +293,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -307,10 +307,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -324,7 +324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -334,11 +334,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,7 +351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,7 +365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -375,10 +375,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -392,7 +392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -406,7 +406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -435,7 +435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -448,7 +448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -461,22 +461,22 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; diff --git a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h index 4a326fae62..8995b15c82 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -933,6 +939,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -946,6 +953,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -972,6 +981,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1006,6 +1016,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1044,6 +1055,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1067,6 +1079,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1098,6 +1111,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1129,6 +1143,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1160,6 +1175,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1194,6 +1210,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1228,6 +1245,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 2633865772..f14e3d6d27 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005397796630859375  +DEBUG: model prefixing takes 0.005347490310668945  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,9 +174,9 @@ INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -184,7 +184,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.323 s +ALOHA: aloha creates 5 routines in 0.320 s VVV1 VVV1 FFV1 @@ -205,6 +205,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.961s -user 0m0.719s -sys 0m0.054s +real 0m0.782s +user 0m0.723s +sys 0m0.049s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc index 56af4b74c1..9393033e26 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -264,10 +264,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -277,10 +277,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -290,11 +290,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -319,7 +319,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -328,11 +328,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -344,7 +344,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -357,7 +357,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -366,10 +366,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -382,7 +382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -395,7 +395,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -410,7 +410,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -434,7 +434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -446,12 +446,12 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -459,7 +459,7 @@ namespace mg5amcCpu jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -467,7 +467,7 @@ namespace mg5amcCpu jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif diff --git a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h index 4a326fae62..8995b15c82 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -933,6 +939,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -946,6 +953,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -972,6 +981,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1006,6 +1016,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1044,6 +1055,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1067,6 +1079,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1098,6 +1111,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1129,6 +1143,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1160,6 +1175,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1194,6 +1210,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1228,6 +1245,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 55d49a2465..8f353c4129 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052874088287353516  +DEBUG: model prefixing takes 0.0053937435150146484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.157 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -174,12 +174,12 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  @@ -187,15 +187,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s -Wrote files for 222 helas calls in 0.679 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.422 s +Wrote files for 222 helas calls in 0.674 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.325 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -203,7 +203,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.309 s +ALOHA: aloha creates 10 routines in 0.307 s VVV1 VVV1 FFV1 @@ -320,6 +320,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.934s -user 0m3.533s -sys 0m0.319s +real 0m3.945s +user 0m3.541s +sys 0m0.300s diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc index 71c4eee18c..896d64343e 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc @@ -250,11 +250,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 1 - VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -263,7 +263,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -272,7 +272,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -285,10 +285,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 123 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -305,10 +305,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 123 *** // Wavefunction(s) for diagram number 3 - VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 123 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -345,11 +345,11 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 123 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -363,7 +363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -376,10 +376,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 123 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -390,10 +390,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 123 *** // Wavefunction(s) for diagram number 8 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,7 +407,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -420,10 +420,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 123 *** // Wavefunction(s) for diagram number 10 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -434,10 +434,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 123 *** // Wavefunction(s) for diagram number 11 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -451,7 +451,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -481,7 +481,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -497,7 +497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -513,7 +513,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -526,12 +526,12 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 123 *** // Wavefunction(s) for diagram number 17 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -541,10 +541,10 @@ namespace mg5amcCpu // *** DIAGRAM 18 OF 123 *** // Wavefunction(s) for diagram number 18 - FFV1_1( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -557,7 +557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -568,11 +568,11 @@ namespace mg5amcCpu // *** DIAGRAM 20 OF 123 *** // Wavefunction(s) for diagram number 20 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -588,7 +588,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -602,7 +602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -613,10 +613,10 @@ namespace mg5amcCpu // *** DIAGRAM 23 OF 123 *** // Wavefunction(s) for diagram number 23 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[18] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] ); // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -632,7 +632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -646,7 +646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -657,10 +657,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 123 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[19] ); + FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -673,7 +673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -686,7 +686,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -699,7 +699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -712,7 +712,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -726,7 +726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -739,22 +739,22 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 123 *** // Wavefunction(s) for diagram number 32 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[8] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -763,12 +763,12 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 123 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -778,10 +778,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 123 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,7 +794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -805,10 +805,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 123 *** // Wavefunction(s) for diagram number 36 - FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 0., 0., w_fp[22] ); + FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -824,7 +824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -838,7 +838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -852,7 +852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -868,7 +868,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -882,7 +882,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -893,10 +893,10 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 123 *** // Wavefunction(s) for diagram number 42 - FFV1_2( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -909,7 +909,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -922,7 +922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 44 - FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -935,7 +935,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -948,7 +948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -962,7 +962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -978,17 +978,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -997,11 +997,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 123 *** // Wavefunction(s) for diagram number 49 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[12] ); - FFV1_2( w_fp[3], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] ); + FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1012,10 +1012,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 123 *** // Wavefunction(s) for diagram number 50 - VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1031,7 +1031,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1042,10 +1042,10 @@ namespace mg5amcCpu // *** DIAGRAM 52 OF 123 *** // Wavefunction(s) for diagram number 52 - FFV1_1( w_fp[2], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1059,7 +1059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1075,7 +1075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1089,7 +1089,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1105,7 +1105,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1121,7 +1121,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1141,7 +1141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 58 - VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1150,7 +1150,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1159,7 +1159,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1172,10 +1172,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 123 *** // Wavefunction(s) for diagram number 59 - VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 59 - VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1195,7 +1195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1215,7 +1215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1231,7 +1231,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1245,7 +1245,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1261,7 +1261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1272,11 +1272,11 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 123 *** // Wavefunction(s) for diagram number 65 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1287,10 +1287,10 @@ namespace mg5amcCpu // *** DIAGRAM 66 OF 123 *** // Wavefunction(s) for diagram number 66 - VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1306,7 +1306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1317,10 +1317,10 @@ namespace mg5amcCpu // *** DIAGRAM 68 OF 123 *** // Wavefunction(s) for diagram number 68 - FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1334,7 +1334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1350,7 +1350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1364,7 +1364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1380,7 +1380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1396,7 +1396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1416,7 +1416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 74 - VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1425,7 +1425,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1434,7 +1434,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1447,10 +1447,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 123 *** // Wavefunction(s) for diagram number 75 - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 75 - VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1470,7 +1470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1490,7 +1490,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1506,7 +1506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1520,7 +1520,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1536,7 +1536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 80 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1547,10 +1547,10 @@ namespace mg5amcCpu // *** DIAGRAM 81 OF 123 *** // Wavefunction(s) for diagram number 81 - FFV1_1( w_fp[9], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1560,10 +1560,10 @@ namespace mg5amcCpu // *** DIAGRAM 82 OF 123 *** // Wavefunction(s) for diagram number 82 - FFV1_2( w_fp[15], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1576,7 +1576,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1586,10 +1586,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 123 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1602,7 +1602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1613,10 +1613,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 123 *** // Wavefunction(s) for diagram number 86 - VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1629,10 +1629,10 @@ namespace mg5amcCpu // *** DIAGRAM 87 OF 123 *** // Wavefunction(s) for diagram number 87 - FFV1_2( w_fp[16], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1642,10 +1642,10 @@ namespace mg5amcCpu // *** DIAGRAM 88 OF 123 *** // Wavefunction(s) for diagram number 88 - FFV1_1( w_fp[11], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 88 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1658,7 +1658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1668,10 +1668,10 @@ namespace mg5amcCpu // *** DIAGRAM 90 OF 123 *** // Wavefunction(s) for diagram number 90 - FFV1_1( w_fp[14], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); + FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 90 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1684,7 +1684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1698,7 +1698,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1714,7 +1714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1723,7 +1723,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1732,7 +1732,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1745,10 +1745,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 123 *** // Wavefunction(s) for diagram number 94 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 94 - VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1765,10 +1765,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 123 *** // Wavefunction(s) for diagram number 95 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 95 - VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1788,7 +1788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 96 - FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1804,7 +1804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1818,7 +1818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1834,7 +1834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1848,7 +1848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1857,7 +1857,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1866,7 +1866,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1879,10 +1879,10 @@ namespace mg5amcCpu // *** DIAGRAM 101 OF 123 *** // Wavefunction(s) for diagram number 101 - VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 101 - VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1902,7 +1902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1922,7 +1922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1938,7 +1938,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1952,7 +1952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1968,7 +1968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1982,7 +1982,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1991,7 +1991,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2000,7 +2000,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2016,7 +2016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2036,7 +2036,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2056,7 +2056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2069,7 +2069,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2082,7 +2082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2095,7 +2095,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2105,12 +2105,12 @@ namespace mg5amcCpu // *** DIAGRAM 114 OF 123 *** // Wavefunction(s) for diagram number 114 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[12] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 114 - VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2119,7 +2119,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2128,7 +2128,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2144,17 +2144,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -2166,17 +2166,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -2185,12 +2185,12 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 123 *** // Wavefunction(s) for diagram number 117 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 117 - VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2199,7 +2199,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2208,7 +2208,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2224,17 +2224,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -2246,17 +2246,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -2265,22 +2265,22 @@ namespace mg5amcCpu // *** DIAGRAM 120 OF 123 *** // Wavefunction(s) for diagram number 120 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -2292,17 +2292,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -2314,7 +2314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2323,7 +2323,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2332,7 +2332,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2348,7 +2348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2357,7 +2357,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2366,7 +2366,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; diff --git a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h index 9cea8bcbe7..9b946c21e1 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -934,6 +940,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -945,6 +952,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -970,6 +979,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -984,6 +994,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -995,6 +1006,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1008,6 +1020,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1042,6 +1055,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1080,6 +1094,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1103,6 +1118,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1134,6 +1150,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1165,6 +1182,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1197,6 +1215,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1225,6 +1244,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1260,6 +1280,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1288,6 +1309,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1323,6 +1345,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1351,6 +1374,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 7c54519581..37ebefdc12 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00528264045715332  +DEBUG: model prefixing takes 0.005511760711669922  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.156 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -174,9 +174,9 @@ INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.429 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.430 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -184,7 +184,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.315 s +ALOHA: aloha creates 5 routines in 0.316 s VVV1 VVV1 FFV1 @@ -208,6 +208,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.441s -user 0m1.382s -sys 0m0.046s +real 0m1.541s +user 0m1.391s +sys 0m0.043s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc index 55877e70c4..927a19a802 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc @@ -250,11 +250,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 1 - VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -266,7 +266,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -278,7 +278,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -294,10 +294,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 123 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -313,10 +313,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 123 *** // Wavefunction(s) for diagram number 3 - VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -332,10 +332,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 123 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -351,11 +351,11 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 123 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -368,7 +368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -380,10 +380,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 123 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -393,10 +393,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 123 *** // Wavefunction(s) for diagram number 8 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -409,7 +409,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -421,10 +421,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 123 *** // Wavefunction(s) for diagram number 10 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -434,10 +434,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 123 *** // Wavefunction(s) for diagram number 11 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -450,7 +450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -465,7 +465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -478,7 +478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -493,7 +493,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -508,7 +508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -520,12 +520,12 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 123 *** // Wavefunction(s) for diagram number 17 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -534,10 +534,10 @@ namespace mg5amcCpu // *** DIAGRAM 18 OF 123 *** // Wavefunction(s) for diagram number 18 - FFV1_1( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -549,7 +549,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -559,11 +559,11 @@ namespace mg5amcCpu // *** DIAGRAM 20 OF 123 *** // Wavefunction(s) for diagram number 20 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -578,7 +578,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -591,7 +591,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -601,10 +601,10 @@ namespace mg5amcCpu // *** DIAGRAM 23 OF 123 *** // Wavefunction(s) for diagram number 23 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[18] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] ); // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -619,7 +619,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -632,7 +632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -642,10 +642,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 123 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[19] ); + FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -657,7 +657,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -669,7 +669,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -681,7 +681,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -693,7 +693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -706,7 +706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -718,12 +718,12 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 123 *** // Wavefunction(s) for diagram number 32 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[8] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -731,7 +731,7 @@ namespace mg5amcCpu jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -739,7 +739,7 @@ namespace mg5amcCpu jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -751,12 +751,12 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 123 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -765,10 +765,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 123 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -780,7 +780,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -790,10 +790,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 123 *** // Wavefunction(s) for diagram number 36 - FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 0., 0., w_fp[22] ); + FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -808,7 +808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -821,7 +821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -834,7 +834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -849,7 +849,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -862,7 +862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -872,10 +872,10 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 123 *** // Wavefunction(s) for diagram number 42 - FFV1_2( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -887,7 +887,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -899,7 +899,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 44 - FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -911,7 +911,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -923,7 +923,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -936,7 +936,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -951,7 +951,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -959,7 +959,7 @@ namespace mg5amcCpu jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -967,7 +967,7 @@ namespace mg5amcCpu jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -979,11 +979,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 123 *** // Wavefunction(s) for diagram number 49 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[12] ); - FFV1_2( w_fp[3], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] ); + FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -993,10 +993,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 123 *** // Wavefunction(s) for diagram number 50 - VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1011,7 +1011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1021,10 +1021,10 @@ namespace mg5amcCpu // *** DIAGRAM 52 OF 123 *** // Wavefunction(s) for diagram number 52 - FFV1_1( w_fp[2], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1037,7 +1037,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1052,7 +1052,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1065,7 +1065,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1080,7 +1080,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1095,7 +1095,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1114,7 +1114,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 58 - VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1126,7 +1126,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1138,7 +1138,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1154,10 +1154,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 123 *** // Wavefunction(s) for diagram number 59 - VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 59 - VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1176,7 +1176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1195,7 +1195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1210,7 +1210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1223,7 +1223,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1238,7 +1238,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1248,11 +1248,11 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 123 *** // Wavefunction(s) for diagram number 65 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1262,10 +1262,10 @@ namespace mg5amcCpu // *** DIAGRAM 66 OF 123 *** // Wavefunction(s) for diagram number 66 - VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1280,7 +1280,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1290,10 +1290,10 @@ namespace mg5amcCpu // *** DIAGRAM 68 OF 123 *** // Wavefunction(s) for diagram number 68 - FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1306,7 +1306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1321,7 +1321,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1334,7 +1334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1349,7 +1349,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1364,7 +1364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1383,7 +1383,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 74 - VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1395,7 +1395,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1407,7 +1407,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1423,10 +1423,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 123 *** // Wavefunction(s) for diagram number 75 - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 75 - VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1445,7 +1445,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1464,7 +1464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1479,7 +1479,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1492,7 +1492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1507,7 +1507,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1517,10 +1517,10 @@ namespace mg5amcCpu // *** DIAGRAM 81 OF 123 *** // Wavefunction(s) for diagram number 81 - FFV1_1( w_fp[9], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1529,10 +1529,10 @@ namespace mg5amcCpu // *** DIAGRAM 82 OF 123 *** // Wavefunction(s) for diagram number 82 - FFV1_2( w_fp[15], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1544,7 +1544,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1553,10 +1553,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 123 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1568,7 +1568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1578,10 +1578,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 123 *** // Wavefunction(s) for diagram number 86 - VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1593,10 +1593,10 @@ namespace mg5amcCpu // *** DIAGRAM 87 OF 123 *** // Wavefunction(s) for diagram number 87 - FFV1_2( w_fp[16], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1605,10 +1605,10 @@ namespace mg5amcCpu // *** DIAGRAM 88 OF 123 *** // Wavefunction(s) for diagram number 88 - FFV1_1( w_fp[11], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 88 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1620,7 +1620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1629,10 +1629,10 @@ namespace mg5amcCpu // *** DIAGRAM 90 OF 123 *** // Wavefunction(s) for diagram number 90 - FFV1_1( w_fp[14], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); + FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1644,7 +1644,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1657,7 +1657,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1672,7 +1672,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1684,7 +1684,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1696,7 +1696,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1712,10 +1712,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 123 *** // Wavefunction(s) for diagram number 94 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 94 - VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1731,10 +1731,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 123 *** // Wavefunction(s) for diagram number 95 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 95 - VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1753,7 +1753,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 96 - FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1768,7 +1768,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1781,7 +1781,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1796,7 +1796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1809,7 +1809,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1821,7 +1821,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1833,7 +1833,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1849,10 +1849,10 @@ namespace mg5amcCpu // *** DIAGRAM 101 OF 123 *** // Wavefunction(s) for diagram number 101 - VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 101 - VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1871,7 +1871,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1890,7 +1890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1905,7 +1905,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1918,7 +1918,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1933,7 +1933,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1946,7 +1946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1958,7 +1958,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1970,7 +1970,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1989,7 +1989,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2008,7 +2008,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2027,7 +2027,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2039,7 +2039,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2051,7 +2051,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2063,7 +2063,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2072,12 +2072,12 @@ namespace mg5amcCpu // *** DIAGRAM 114 OF 123 *** // Wavefunction(s) for diagram number 114 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[12] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 114 - VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2089,7 +2089,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2101,7 +2101,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2120,7 +2120,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2128,7 +2128,7 @@ namespace mg5amcCpu jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2136,7 +2136,7 @@ namespace mg5amcCpu jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2151,7 +2151,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2159,7 +2159,7 @@ namespace mg5amcCpu jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2167,7 +2167,7 @@ namespace mg5amcCpu jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2179,12 +2179,12 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 123 *** // Wavefunction(s) for diagram number 117 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 117 - VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2196,7 +2196,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2208,7 +2208,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2227,7 +2227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2235,7 +2235,7 @@ namespace mg5amcCpu jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2243,7 +2243,7 @@ namespace mg5amcCpu jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2258,7 +2258,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2266,7 +2266,7 @@ namespace mg5amcCpu jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2274,7 +2274,7 @@ namespace mg5amcCpu jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2286,12 +2286,12 @@ namespace mg5amcCpu // *** DIAGRAM 120 OF 123 *** // Wavefunction(s) for diagram number 120 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2299,7 +2299,7 @@ namespace mg5amcCpu jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2307,7 +2307,7 @@ namespace mg5amcCpu jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2322,7 +2322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2330,7 +2330,7 @@ namespace mg5amcCpu jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2338,7 +2338,7 @@ namespace mg5amcCpu jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2353,7 +2353,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2365,7 +2365,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2377,7 +2377,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2396,7 +2396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2408,7 +2408,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2420,7 +2420,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif diff --git a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h index 9cea8bcbe7..9b946c21e1 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -934,6 +940,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -945,6 +952,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -970,6 +979,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -984,6 +994,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -995,6 +1006,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1008,6 +1020,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1042,6 +1055,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1080,6 +1094,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1103,6 +1118,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1134,6 +1150,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1165,6 +1182,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1197,6 +1215,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1225,6 +1244,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1260,6 +1280,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1288,6 +1309,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1323,6 +1345,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1351,6 +1374,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index d5271622a9..15a90b20cb 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005560636520385742  +DEBUG: model prefixing takes 0.005498170852661133  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.838 s +1 processes with 1240 diagrams generated in 1.836 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -176,12 +176,12 @@ INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  @@ -189,15 +189,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.442 s -Wrote files for 2281 helas calls in 46.519 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.483 s +Wrote files for 2281 helas calls in 46.276 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.315 s +ALOHA: aloha creates 5 routines in 0.310 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -322,6 +322,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m57.762s -user 0m56.504s -sys 0m0.940s +real 0m57.424s +user 0m56.469s +sys 0m0.771s diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc index ca9e346bf8..a525c4ba3f 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc @@ -252,13 +252,13 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][6], +1, w_fp[6], 6 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); - VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 0., 0., w_fp[9] ); - VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -283,10 +283,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 1240 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -314,7 +314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 3 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -331,7 +331,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -348,7 +348,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -369,11 +369,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 1240 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 0., 0., w_fp[12] ); - VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 0., 0., w_fp[13] ); + VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -401,7 +401,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,7 +429,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -446,7 +446,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -463,7 +463,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -484,10 +484,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 1240 *** // Wavefunction(s) for diagram number 7 - VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 0., 0., w_fp[14] ); + VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 7 - VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -515,7 +515,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -543,7 +543,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -560,7 +560,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -577,7 +577,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -598,12 +598,12 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 1240 *** // Wavefunction(s) for diagram number 10 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -620,7 +620,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -637,7 +637,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -658,12 +658,12 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 1240 *** // Wavefunction(s) for diagram number 11 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[18] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[20] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 11 - VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -680,7 +680,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -697,7 +697,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -718,12 +718,12 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 1240 *** // Wavefunction(s) for diagram number 12 - VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); + VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -740,7 +740,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -778,10 +778,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 1240 *** // Wavefunction(s) for diagram number 13 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 13 - VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[4] -= amp_sv[0]; @@ -798,7 +798,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -815,7 +815,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[28] -= amp_sv[0]; @@ -836,10 +836,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 1240 *** // Wavefunction(s) for diagram number 14 - VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 14 - VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -864,10 +864,10 @@ namespace mg5amcCpu // *** DIAGRAM 15 OF 1240 *** // Wavefunction(s) for diagram number 15 - VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 0., 0., w_fp[26] ); + VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] ); // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -895,7 +895,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -920,10 +920,10 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 1240 *** // Wavefunction(s) for diagram number 17 - VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] ); // Amplitude(s) for diagram number 17 - VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -940,7 +940,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -957,7 +957,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[26] -= amp_sv[0]; @@ -981,7 +981,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1006,10 +1006,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 1240 *** // Wavefunction(s) for diagram number 19 - VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 0., 0., w_fp[28] ); + VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] ); // Amplitude(s) for diagram number 19 - VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1037,7 +1037,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1062,10 +1062,10 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 1240 *** // Wavefunction(s) for diagram number 21 - VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 21 - VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -1082,7 +1082,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -1099,7 +1099,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -1123,7 +1123,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1151,7 +1151,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1176,10 +1176,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 1240 *** // Wavefunction(s) for diagram number 24 - VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 24 - VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1204,12 +1204,12 @@ namespace mg5amcCpu // *** DIAGRAM 25 OF 1240 *** // Wavefunction(s) for diagram number 25 - VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[30] ); - VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[31] ); - VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[32] ); + VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] ); + VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] ); + VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] ); // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -1226,7 +1226,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -1243,7 +1243,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -1264,12 +1264,12 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 1240 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[33] ); - FFV1_2( w_fp[3], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[33], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[35] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] ); + FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1280,10 +1280,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 1240 *** // Wavefunction(s) for diagram number 27 - FFV1_1( w_fp[33], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[36] ); + FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1294,10 +1294,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 1240 *** // Wavefunction(s) for diagram number 28 - FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 0., 0., w_fp[37] ); + FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] ); // Amplitude(s) for diagram number 28 - VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1317,7 +1317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1333,7 +1333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1353,7 +1353,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1369,7 +1369,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1378,7 +1378,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1387,7 +1387,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1400,11 +1400,11 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 1240 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[38] ); - FFV1_1( w_fp[33], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] ); + FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1415,10 +1415,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 1240 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[38], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1432,7 +1432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1445,10 +1445,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 1240 *** // Wavefunction(s) for diagram number 36 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[41] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] ); // Amplitude(s) for diagram number 36 - FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1459,10 +1459,10 @@ namespace mg5amcCpu // *** DIAGRAM 37 OF 1240 *** // Wavefunction(s) for diagram number 37 - FFV1_2( w_fp[41], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[42] ); + FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] ); // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1476,7 +1476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1492,7 +1492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1508,7 +1508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1524,7 +1524,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1541,11 +1541,11 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 1240 *** // Wavefunction(s) for diagram number 42 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); - FFV1_1( w_fp[39], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[43] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); + FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1556,10 +1556,10 @@ namespace mg5amcCpu // *** DIAGRAM 43 OF 1240 *** // Wavefunction(s) for diagram number 43 - FFV1_1( w_fp[39], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[44] ); + FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] ); // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1570,10 +1570,10 @@ namespace mg5amcCpu // *** DIAGRAM 44 OF 1240 *** // Wavefunction(s) for diagram number 44 - FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 0., 0., w_fp[45] ); + FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] ); // Amplitude(s) for diagram number 44 - VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1593,7 +1593,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1609,7 +1609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1629,7 +1629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1645,7 +1645,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1654,7 +1654,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1663,7 +1663,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1676,11 +1676,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 1240 *** // Wavefunction(s) for diagram number 49 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[46] ); - FFV1_1( w_fp[39], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] ); + FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1691,10 +1691,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 1240 *** // Wavefunction(s) for diagram number 50 - FFV1_2( w_fp[46], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1708,7 +1708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1724,7 +1724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1738,7 +1738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1752,7 +1752,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1768,7 +1768,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1784,7 +1784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1800,7 +1800,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1817,11 +1817,11 @@ namespace mg5amcCpu // *** DIAGRAM 58 OF 1240 *** // Wavefunction(s) for diagram number 58 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); - FFV1_1( w_fp[47], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[49] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); + FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] ); // Amplitude(s) for diagram number 58 - FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 58 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1832,10 +1832,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 1240 *** // Wavefunction(s) for diagram number 59 - FFV1_1( w_fp[47], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[50] ); + FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] ); // Amplitude(s) for diagram number 59 - FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1846,10 +1846,10 @@ namespace mg5amcCpu // *** DIAGRAM 60 OF 1240 *** // Wavefunction(s) for diagram number 60 - FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 0., 0., w_fp[51] ); + FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] ); // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1869,7 +1869,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1885,7 +1885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1905,7 +1905,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1921,7 +1921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1930,7 +1930,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1939,7 +1939,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1952,10 +1952,10 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 1240 *** // Wavefunction(s) for diagram number 65 - FFV1_1( w_fp[47], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1969,7 +1969,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1983,7 +1983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1999,7 +1999,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2013,7 +2013,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2027,7 +2027,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2043,7 +2043,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2059,7 +2059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2075,7 +2075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2092,11 +2092,11 @@ namespace mg5amcCpu // *** DIAGRAM 74 OF 1240 *** // Wavefunction(s) for diagram number 74 - FFV1_1( w_fp[2], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); - FFV1_2( w_fp[46], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); + FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 74 - FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 74 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2107,10 +2107,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 1240 *** // Wavefunction(s) for diagram number 75 - FFV1_2( w_fp[46], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[53] ); + FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] ); // Amplitude(s) for diagram number 75 - FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2121,10 +2121,10 @@ namespace mg5amcCpu // *** DIAGRAM 76 OF 1240 *** // Wavefunction(s) for diagram number 76 - FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 0., 0., w_fp[54] ); + FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] ); // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2144,7 +2144,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2160,7 +2160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2180,7 +2180,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2196,7 +2196,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2205,7 +2205,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2214,7 +2214,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2230,7 +2230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2246,7 +2246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2262,7 +2262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2279,10 +2279,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 1240 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[38], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[25] ); + FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2293,10 +2293,10 @@ namespace mg5amcCpu // *** DIAGRAM 85 OF 1240 *** // Wavefunction(s) for diagram number 85 - FFV1_2( w_fp[38], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2307,10 +2307,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 1240 *** // Wavefunction(s) for diagram number 86 - FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 0., 0., w_fp[23] ); + FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2330,7 +2330,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2346,7 +2346,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 88 - VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2366,7 +2366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2382,7 +2382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2391,7 +2391,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2400,7 +2400,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2416,7 +2416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2432,7 +2432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2448,7 +2448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 93 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2465,10 +2465,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 1240 *** // Wavefunction(s) for diagram number 94 - FFV1_2( w_fp[41], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[28] ); + FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] ); // Amplitude(s) for diagram number 94 - FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2479,10 +2479,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 1240 *** // Wavefunction(s) for diagram number 95 - FFV1_2( w_fp[41], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 95 - FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2493,10 +2493,10 @@ namespace mg5amcCpu // *** DIAGRAM 96 OF 1240 *** // Wavefunction(s) for diagram number 96 - FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 0., 0., w_fp[20] ); + FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 96 - VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2516,7 +2516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2532,7 +2532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2552,7 +2552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2568,7 +2568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2577,7 +2577,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2586,7 +2586,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2602,7 +2602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 101 - FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2618,7 +2618,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2634,7 +2634,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2651,10 +2651,10 @@ namespace mg5amcCpu // *** DIAGRAM 104 OF 1240 *** // Wavefunction(s) for diagram number 104 - FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[26] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] ); // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2667,10 +2667,10 @@ namespace mg5amcCpu // *** DIAGRAM 105 OF 1240 *** // Wavefunction(s) for diagram number 105 - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[42] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] ); // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2687,10 +2687,10 @@ namespace mg5amcCpu // *** DIAGRAM 106 OF 1240 *** // Wavefunction(s) for diagram number 106 - FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2706,7 +2706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 107 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2726,7 +2726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2746,7 +2746,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2763,10 +2763,10 @@ namespace mg5amcCpu // *** DIAGRAM 110 OF 1240 *** // Wavefunction(s) for diagram number 110 - FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2779,10 +2779,10 @@ namespace mg5amcCpu // *** DIAGRAM 111 OF 1240 *** // Wavefunction(s) for diagram number 111 - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2799,10 +2799,10 @@ namespace mg5amcCpu // *** DIAGRAM 112 OF 1240 *** // Wavefunction(s) for diagram number 112 - FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2818,7 +2818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2838,7 +2838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 114 - FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 114 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2858,7 +2858,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 115 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2875,10 +2875,10 @@ namespace mg5amcCpu // *** DIAGRAM 116 OF 1240 *** // Wavefunction(s) for diagram number 116 - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 116 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2891,10 +2891,10 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 1240 *** // Wavefunction(s) for diagram number 117 - VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 0., 0., w_fp[19] ); + VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] ); // Amplitude(s) for diagram number 117 - FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 117 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2911,10 +2911,10 @@ namespace mg5amcCpu // *** DIAGRAM 118 OF 1240 *** // Wavefunction(s) for diagram number 118 - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[18] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] ); // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 118 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2930,7 +2930,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 119 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2950,7 +2950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 120 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2970,7 +2970,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 121 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2990,7 +2990,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2999,7 +2999,7 @@ namespace mg5amcCpu jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3008,7 +3008,7 @@ namespace mg5amcCpu jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3024,7 +3024,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3033,7 +3033,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3042,7 +3042,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3055,13 +3055,13 @@ namespace mg5amcCpu // *** DIAGRAM 124 OF 1240 *** // Wavefunction(s) for diagram number 124 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); - FFV1_1( w_fp[34], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[52], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 124 - FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 124 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3071,10 +3071,10 @@ namespace mg5amcCpu // *** DIAGRAM 125 OF 1240 *** // Wavefunction(s) for diagram number 125 - FFV1_2( w_fp[52], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 125 - FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 125 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3084,11 +3084,11 @@ namespace mg5amcCpu // *** DIAGRAM 126 OF 1240 *** // Wavefunction(s) for diagram number 126 - FFV1_1( w_fp[34], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[55] ); - FFV1_2( w_fp[52], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[56] ); + FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] ); + FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] ); // Amplitude(s) for diagram number 126 - FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 126 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3101,7 +3101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 127 - FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 127 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3111,10 +3111,10 @@ namespace mg5amcCpu // *** DIAGRAM 128 OF 1240 *** // Wavefunction(s) for diagram number 128 - FFV1_1( w_fp[34], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[57] ); + FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] ); // Amplitude(s) for diagram number 128 - FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 128 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3127,7 +3127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 129 - FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 129 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3137,10 +3137,10 @@ namespace mg5amcCpu // *** DIAGRAM 130 OF 1240 *** // Wavefunction(s) for diagram number 130 - FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 0., 0., w_fp[58] ); + FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] ); // Amplitude(s) for diagram number 130 - VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 130 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3153,10 +3153,10 @@ namespace mg5amcCpu // *** DIAGRAM 131 OF 1240 *** // Wavefunction(s) for diagram number 131 - FFV1_1( w_fp[34], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); // Amplitude(s) for diagram number 131 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 131 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3170,7 +3170,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 132 - FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 132 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3184,7 +3184,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 133 - VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 133 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3197,10 +3197,10 @@ namespace mg5amcCpu // *** DIAGRAM 134 OF 1240 *** // Wavefunction(s) for diagram number 134 - FFV1_1( w_fp[34], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 134 - FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 134 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3214,7 +3214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 135 - FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 135 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3228,7 +3228,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 136 - VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 136 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3244,7 +3244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 137 - FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 137 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3255,10 +3255,10 @@ namespace mg5amcCpu // *** DIAGRAM 138 OF 1240 *** // Wavefunction(s) for diagram number 138 - FFV1_1( w_fp[34], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); + FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 138 - FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 138 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3272,17 +3272,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 139 - FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -3291,12 +3291,12 @@ namespace mg5amcCpu // *** DIAGRAM 140 OF 1240 *** // Wavefunction(s) for diagram number 140 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[61] ); - FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 0., 0., w_fp[63] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] ); + FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 140 - VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 140 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3313,10 +3313,10 @@ namespace mg5amcCpu // *** DIAGRAM 141 OF 1240 *** // Wavefunction(s) for diagram number 141 - VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 0., 0., w_fp[64] ); + VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] ); // Amplitude(s) for diagram number 141 - VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 141 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3336,7 +3336,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 142 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3345,7 +3345,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3354,7 +3354,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3367,10 +3367,10 @@ namespace mg5amcCpu // *** DIAGRAM 143 OF 1240 *** // Wavefunction(s) for diagram number 143 - FFV1_2( w_fp[3], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[65] ); + FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] ); // Amplitude(s) for diagram number 143 - FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 143 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3384,7 +3384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 144 - FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 144 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3400,7 +3400,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 145 - FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 145 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3414,7 +3414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 146 - FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 146 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3427,10 +3427,10 @@ namespace mg5amcCpu // *** DIAGRAM 147 OF 1240 *** // Wavefunction(s) for diagram number 147 - FFV1_1( w_fp[34], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); + FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 147 - FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 147 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3441,10 +3441,10 @@ namespace mg5amcCpu // *** DIAGRAM 148 OF 1240 *** // Wavefunction(s) for diagram number 148 - FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 0., 0., w_fp[67] ); + FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 148 - VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 148 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3460,7 +3460,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 149 - FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 149 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3474,7 +3474,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 150 - FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 150 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3485,10 +3485,10 @@ namespace mg5amcCpu // *** DIAGRAM 151 OF 1240 *** // Wavefunction(s) for diagram number 151 - FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 0., 0., w_fp[68] ); + FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 151 - VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 151 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3504,7 +3504,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 152 - FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 152 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3518,7 +3518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 153 - FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 153 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3534,7 +3534,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 154 - VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 154 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3554,7 +3554,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 155 - FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 155 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3567,11 +3567,11 @@ namespace mg5amcCpu // *** DIAGRAM 156 OF 1240 *** // Wavefunction(s) for diagram number 156 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 0., 0., w_fp[69] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 156 - VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 156 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3588,10 +3588,10 @@ namespace mg5amcCpu // *** DIAGRAM 157 OF 1240 *** // Wavefunction(s) for diagram number 157 - VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 0., 0., w_fp[70] ); + VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] ); // Amplitude(s) for diagram number 157 - VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 157 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3611,7 +3611,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 158 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3620,7 +3620,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3629,7 +3629,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3642,10 +3642,10 @@ namespace mg5amcCpu // *** DIAGRAM 159 OF 1240 *** // Wavefunction(s) for diagram number 159 - FFV1_2( w_fp[3], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 159 - FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 159 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3659,7 +3659,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 160 - FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 160 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3675,7 +3675,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 161 - FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 161 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3689,7 +3689,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 162 - FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 162 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3702,10 +3702,10 @@ namespace mg5amcCpu // *** DIAGRAM 163 OF 1240 *** // Wavefunction(s) for diagram number 163 - FFV1_1( w_fp[34], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); + FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 163 - FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 163 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3716,10 +3716,10 @@ namespace mg5amcCpu // *** DIAGRAM 164 OF 1240 *** // Wavefunction(s) for diagram number 164 - FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 0., 0., w_fp[73] ); + FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] ); // Amplitude(s) for diagram number 164 - VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 164 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3735,7 +3735,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 165 - FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 165 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3749,7 +3749,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 166 - FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 166 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3763,7 +3763,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 167 - VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 167 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3779,7 +3779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 168 - FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 168 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3793,7 +3793,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 169 - FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 169 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3809,7 +3809,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 170 - VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 170 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3829,7 +3829,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 171 - FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 171 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3842,11 +3842,11 @@ namespace mg5amcCpu // *** DIAGRAM 172 OF 1240 *** // Wavefunction(s) for diagram number 172 - VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 0., 0., w_fp[74] ); + VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] ); // Amplitude(s) for diagram number 172 - VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 172 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3863,10 +3863,10 @@ namespace mg5amcCpu // *** DIAGRAM 173 OF 1240 *** // Wavefunction(s) for diagram number 173 - VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 0., 0., w_fp[75] ); + VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] ); // Amplitude(s) for diagram number 173 - VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 173 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3886,7 +3886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 174 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3895,7 +3895,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3904,7 +3904,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3917,10 +3917,10 @@ namespace mg5amcCpu // *** DIAGRAM 175 OF 1240 *** // Wavefunction(s) for diagram number 175 - FFV1_2( w_fp[3], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[76] ); + FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] ); // Amplitude(s) for diagram number 175 - FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 175 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3934,7 +3934,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 176 - FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 176 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3950,7 +3950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 177 - FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 177 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3964,7 +3964,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 178 - FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 178 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3977,10 +3977,10 @@ namespace mg5amcCpu // *** DIAGRAM 179 OF 1240 *** // Wavefunction(s) for diagram number 179 - FFV1_1( w_fp[34], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 179 - FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 179 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3994,7 +3994,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 180 - VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 180 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4010,7 +4010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 181 - FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 181 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4024,7 +4024,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 182 - FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 182 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4038,7 +4038,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 183 - VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 183 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4054,7 +4054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 184 - FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 184 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4068,7 +4068,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 185 - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 185 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4084,7 +4084,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 186 - VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 186 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4104,7 +4104,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 187 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 187 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4117,10 +4117,10 @@ namespace mg5amcCpu // *** DIAGRAM 188 OF 1240 *** // Wavefunction(s) for diagram number 188 - FFV1_1( w_fp[34], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 188 - FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 188 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4133,7 +4133,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 189 - FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 189 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4143,10 +4143,10 @@ namespace mg5amcCpu // *** DIAGRAM 190 OF 1240 *** // Wavefunction(s) for diagram number 190 - FFV1_2( w_fp[46], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[78] ); + FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] ); // Amplitude(s) for diagram number 190 - FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 190 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4159,7 +4159,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 191 - FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 191 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4172,7 +4172,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 192 - FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 192 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4185,7 +4185,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 193 - FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 193 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4198,7 +4198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 194 - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 194 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4212,7 +4212,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 195 - VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 195 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4228,7 +4228,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 196 - FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 196 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4242,7 +4242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 197 - FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 197 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4255,7 +4255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 198 - FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 198 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4265,10 +4265,10 @@ namespace mg5amcCpu // *** DIAGRAM 199 OF 1240 *** // Wavefunction(s) for diagram number 199 - FFV1_2( w_fp[38], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); + FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 199 - FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 199 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4281,7 +4281,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 200 - FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 200 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4294,7 +4294,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 201 - FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 201 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4307,7 +4307,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 202 - FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 202 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4320,7 +4320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 203 - FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 203 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4334,7 +4334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 204 - VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 204 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4350,7 +4350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 205 - FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 205 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4364,7 +4364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 206 - FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 206 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4377,7 +4377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 207 - FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 207 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4387,10 +4387,10 @@ namespace mg5amcCpu // *** DIAGRAM 208 OF 1240 *** // Wavefunction(s) for diagram number 208 - FFV1_2( w_fp[41], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 208 - FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 208 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4403,7 +4403,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 209 - FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 209 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4416,7 +4416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 210 - FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 210 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4429,7 +4429,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 211 - FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 211 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4442,7 +4442,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 212 - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 212 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4456,7 +4456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 213 - VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 213 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4472,7 +4472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 214 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 214 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4486,7 +4486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 215 - FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 215 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4500,7 +4500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 216 - FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 216 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4513,10 +4513,10 @@ namespace mg5amcCpu // *** DIAGRAM 217 OF 1240 *** // Wavefunction(s) for diagram number 217 - VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] ); // Amplitude(s) for diagram number 217 - VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 217 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4536,7 +4536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 218 - VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 218 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4556,7 +4556,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 219 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4565,7 +4565,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4574,7 +4574,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -4590,7 +4590,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 220 - FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 220 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4606,7 +4606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 221 - FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 221 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4620,7 +4620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 222 - FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 222 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4634,7 +4634,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 223 - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 223 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4647,10 +4647,10 @@ namespace mg5amcCpu // *** DIAGRAM 224 OF 1240 *** // Wavefunction(s) for diagram number 224 - VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 224 - VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 224 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4670,7 +4670,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 225 - VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 225 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4690,7 +4690,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 226 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4699,7 +4699,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4708,7 +4708,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4724,7 +4724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 227 - FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 227 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4740,7 +4740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 228 - FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 228 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4754,7 +4754,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 229 - FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 229 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4768,7 +4768,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 230 - FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 230 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4781,10 +4781,10 @@ namespace mg5amcCpu // *** DIAGRAM 231 OF 1240 *** // Wavefunction(s) for diagram number 231 - VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 0., 0., w_fp[67] ); + VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 231 - VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 231 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4804,7 +4804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 232 - VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 232 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4824,7 +4824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 233 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4833,7 +4833,7 @@ namespace mg5amcCpu jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4842,7 +4842,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -4858,7 +4858,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 234 - FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 234 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4874,7 +4874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 235 - FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 235 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4885,12 +4885,12 @@ namespace mg5amcCpu // *** DIAGRAM 236 OF 1240 *** // Wavefunction(s) for diagram number 236 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[73] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[79] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[80] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] ); // Amplitude(s) for diagram number 236 - VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4899,7 +4899,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4908,7 +4908,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -4924,17 +4924,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 237 - FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -4946,17 +4946,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 238 - FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -4965,12 +4965,12 @@ namespace mg5amcCpu // *** DIAGRAM 239 OF 1240 *** // Wavefunction(s) for diagram number 239 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[57] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[81] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[82] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] ); // Amplitude(s) for diagram number 239 - VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4979,7 +4979,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4988,7 +4988,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5004,17 +5004,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 240 - FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -5026,17 +5026,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 241 - FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -5045,12 +5045,12 @@ namespace mg5amcCpu // *** DIAGRAM 242 OF 1240 *** // Wavefunction(s) for diagram number 242 - VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[55] ); - VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[83] ); - VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[84] ); + VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] ); + VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] ); + VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] ); // Amplitude(s) for diagram number 242 - VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5059,7 +5059,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5068,7 +5068,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5084,17 +5084,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 243 - FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -5106,17 +5106,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 244 - FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -5128,17 +5128,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 245 - FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -5150,7 +5150,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 246 - VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5159,7 +5159,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5168,7 +5168,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5181,13 +5181,13 @@ namespace mg5amcCpu // *** DIAGRAM 247 OF 1240 *** // Wavefunction(s) for diagram number 247 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); - FFV1_2( w_fp[62], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[77], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 247 - FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 247 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5197,10 +5197,10 @@ namespace mg5amcCpu // *** DIAGRAM 248 OF 1240 *** // Wavefunction(s) for diagram number 248 - FFV1_1( w_fp[77], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[85] ); + FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] ); // Amplitude(s) for diagram number 248 - FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 248 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5210,11 +5210,11 @@ namespace mg5amcCpu // *** DIAGRAM 249 OF 1240 *** // Wavefunction(s) for diagram number 249 - FFV1_2( w_fp[62], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); - FFV1_1( w_fp[77], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[87] ); + FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); + FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] ); // Amplitude(s) for diagram number 249 - FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 249 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5227,7 +5227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 250 - FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 250 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5237,10 +5237,10 @@ namespace mg5amcCpu // *** DIAGRAM 251 OF 1240 *** // Wavefunction(s) for diagram number 251 - FFV1_2( w_fp[62], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 251 - FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 251 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5253,7 +5253,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 252 - FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 252 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5263,10 +5263,10 @@ namespace mg5amcCpu // *** DIAGRAM 253 OF 1240 *** // Wavefunction(s) for diagram number 253 - FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 0., 0., w_fp[89] ); + FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] ); // Amplitude(s) for diagram number 253 - VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 253 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5279,10 +5279,10 @@ namespace mg5amcCpu // *** DIAGRAM 254 OF 1240 *** // Wavefunction(s) for diagram number 254 - FFV1_2( w_fp[62], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 254 - FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 254 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5296,7 +5296,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 255 - FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 255 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5310,7 +5310,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 256 - VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 256 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5323,10 +5323,10 @@ namespace mg5amcCpu // *** DIAGRAM 257 OF 1240 *** // Wavefunction(s) for diagram number 257 - FFV1_2( w_fp[62], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); + FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 257 - FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 257 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5340,7 +5340,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 258 - FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 258 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5354,7 +5354,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 259 - VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 259 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5370,7 +5370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 260 - FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 260 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5381,10 +5381,10 @@ namespace mg5amcCpu // *** DIAGRAM 261 OF 1240 *** // Wavefunction(s) for diagram number 261 - FFV1_2( w_fp[62], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); + FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 261 - FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 261 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5398,17 +5398,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 262 - FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[33] += amp_sv[0]; jamp_sv[35] -= amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[35] -= amp_sv[0]; jamp_sv[39] += amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[45] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[33] -= amp_sv[0]; jamp_sv[39] += amp_sv[0]; jamp_sv[45] += amp_sv[0]; @@ -5417,10 +5417,10 @@ namespace mg5amcCpu // *** DIAGRAM 263 OF 1240 *** // Wavefunction(s) for diagram number 263 - FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 0., 0., w_fp[92] ); + FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] ); // Amplitude(s) for diagram number 263 - VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 263 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5440,7 +5440,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 264 - VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 264 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5460,7 +5460,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 265 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5469,7 +5469,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5478,7 +5478,7 @@ namespace mg5amcCpu jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5491,10 +5491,10 @@ namespace mg5amcCpu // *** DIAGRAM 266 OF 1240 *** // Wavefunction(s) for diagram number 266 - FFV1_1( w_fp[2], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[93] ); + FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] ); // Amplitude(s) for diagram number 266 - FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 266 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5508,7 +5508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 267 - FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 267 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5524,7 +5524,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 268 - FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 268 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5538,7 +5538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 269 - FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 269 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5551,10 +5551,10 @@ namespace mg5amcCpu // *** DIAGRAM 270 OF 1240 *** // Wavefunction(s) for diagram number 270 - FFV1_2( w_fp[62], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); + FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 270 - FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 270 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5565,10 +5565,10 @@ namespace mg5amcCpu // *** DIAGRAM 271 OF 1240 *** // Wavefunction(s) for diagram number 271 - FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 0., 0., w_fp[95] ); + FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 271 - VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 271 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5584,7 +5584,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 272 - FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 272 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5598,7 +5598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 273 - FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 273 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5609,10 +5609,10 @@ namespace mg5amcCpu // *** DIAGRAM 274 OF 1240 *** // Wavefunction(s) for diagram number 274 - FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 0., 0., w_fp[96] ); + FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 274 - VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 274 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5628,7 +5628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 275 - FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 275 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5642,7 +5642,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 276 - FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 276 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5658,7 +5658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 277 - VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 277 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5678,7 +5678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 278 - FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 278 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5694,7 +5694,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 279 - VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 279 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5714,7 +5714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 280 - VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 280 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5734,7 +5734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 281 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5743,7 +5743,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5752,7 +5752,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5765,10 +5765,10 @@ namespace mg5amcCpu // *** DIAGRAM 282 OF 1240 *** // Wavefunction(s) for diagram number 282 - FFV1_1( w_fp[2], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); + FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 282 - FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 282 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5782,7 +5782,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 283 - FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 283 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5798,7 +5798,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 284 - FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 284 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5812,7 +5812,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 285 - FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 285 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5825,10 +5825,10 @@ namespace mg5amcCpu // *** DIAGRAM 286 OF 1240 *** // Wavefunction(s) for diagram number 286 - FFV1_2( w_fp[62], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); + FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 286 - FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 286 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5839,10 +5839,10 @@ namespace mg5amcCpu // *** DIAGRAM 287 OF 1240 *** // Wavefunction(s) for diagram number 287 - FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 0., 0., w_fp[98] ); + FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 287 - VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 287 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5858,7 +5858,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 288 - FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 288 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5872,7 +5872,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 289 - FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 289 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5886,7 +5886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 290 - VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 290 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5902,7 +5902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 291 - FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 291 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5916,7 +5916,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 292 - FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 292 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5932,7 +5932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 293 - VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 293 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5952,7 +5952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 294 - FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 294 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5968,7 +5968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 295 - VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 295 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5988,7 +5988,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 296 - VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 296 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6008,7 +6008,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 297 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6017,7 +6017,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6026,7 +6026,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6039,10 +6039,10 @@ namespace mg5amcCpu // *** DIAGRAM 298 OF 1240 *** // Wavefunction(s) for diagram number 298 - FFV1_1( w_fp[2], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); + FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 298 - FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 298 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6056,7 +6056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 299 - FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 299 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6072,7 +6072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 300 - FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 300 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6086,7 +6086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 301 - FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 301 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6099,10 +6099,10 @@ namespace mg5amcCpu // *** DIAGRAM 302 OF 1240 *** // Wavefunction(s) for diagram number 302 - FFV1_2( w_fp[62], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 302 - FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 302 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6116,7 +6116,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 303 - VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 303 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6132,7 +6132,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 304 - FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 304 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6146,7 +6146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 305 - FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 305 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6160,7 +6160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 306 - VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 306 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6176,7 +6176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 307 - FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 307 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6190,7 +6190,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 308 - FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 308 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6206,7 +6206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 309 - VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 309 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6226,7 +6226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 310 - FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 310 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6239,10 +6239,10 @@ namespace mg5amcCpu // *** DIAGRAM 311 OF 1240 *** // Wavefunction(s) for diagram number 311 - FFV1_2( w_fp[62], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 311 - FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 311 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6255,7 +6255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 312 - FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 312 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6265,10 +6265,10 @@ namespace mg5amcCpu // *** DIAGRAM 313 OF 1240 *** // Wavefunction(s) for diagram number 313 - FFV1_1( w_fp[33], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[100] ); + FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] ); // Amplitude(s) for diagram number 313 - FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 313 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6281,7 +6281,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 314 - FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 314 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6294,7 +6294,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 315 - FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 315 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6307,7 +6307,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 316 - FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 316 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6320,7 +6320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 317 - FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 317 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6334,7 +6334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 318 - VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 318 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6350,7 +6350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 319 - FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 319 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6364,7 +6364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 320 - FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 320 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6377,7 +6377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 321 - FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 321 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6387,10 +6387,10 @@ namespace mg5amcCpu // *** DIAGRAM 322 OF 1240 *** // Wavefunction(s) for diagram number 322 - FFV1_1( w_fp[39], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); + FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 322 - FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 322 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6403,7 +6403,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 323 - FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 323 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6416,7 +6416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 324 - FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 324 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6429,7 +6429,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 325 - FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 325 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6442,7 +6442,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 326 - FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 326 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6456,7 +6456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 327 - VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 327 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6472,7 +6472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 328 - FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 328 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6486,7 +6486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 329 - FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 329 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6499,7 +6499,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 330 - FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 330 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6509,10 +6509,10 @@ namespace mg5amcCpu // *** DIAGRAM 331 OF 1240 *** // Wavefunction(s) for diagram number 331 - FFV1_1( w_fp[47], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); + FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 331 - FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 331 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6525,7 +6525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 332 - FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 332 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6538,7 +6538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 333 - FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 333 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6551,7 +6551,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 334 - FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 334 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6564,7 +6564,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 335 - FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 335 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6578,7 +6578,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 336 - VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 336 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6594,7 +6594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 337 - FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 337 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6608,7 +6608,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 338 - FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 338 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6622,7 +6622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 339 - FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 339 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6638,7 +6638,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 340 - VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 340 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6658,7 +6658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 341 - VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 341 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6678,7 +6678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 342 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6687,7 +6687,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6696,7 +6696,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6712,7 +6712,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 343 - FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 343 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6728,7 +6728,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 344 - FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 344 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6742,7 +6742,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 345 - FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 345 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6756,7 +6756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 346 - FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 346 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6772,7 +6772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 347 - VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 347 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6792,7 +6792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 348 - VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 348 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6812,7 +6812,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 349 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6821,7 +6821,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6830,7 +6830,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6846,7 +6846,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 350 - FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 350 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6862,7 +6862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 351 - FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 351 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6876,7 +6876,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 352 - FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 352 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6890,7 +6890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 353 - FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 353 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6906,7 +6906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 354 - VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 354 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6926,7 +6926,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 355 - VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 355 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6946,7 +6946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 356 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6955,7 +6955,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6964,7 +6964,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6980,7 +6980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 357 - FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 357 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6996,7 +6996,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 358 - FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 358 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7010,7 +7010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 359 - VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7019,7 +7019,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7028,7 +7028,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7044,17 +7044,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 360 - FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[33] += amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[39] -= amp_sv[0]; jamp_sv[57] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[81] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[33] -= amp_sv[0]; jamp_sv[57] += amp_sv[0]; jamp_sv[81] += amp_sv[0]; @@ -7066,17 +7066,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 361 - FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[107] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[105] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; @@ -7088,7 +7088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 362 - VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7097,7 +7097,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7106,7 +7106,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7122,17 +7122,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 363 - FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[35] += amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[45] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[35] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; @@ -7144,17 +7144,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 364 - FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[81] += amp_sv[0]; jamp_sv[83] -= amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[83] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[81] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; @@ -7166,7 +7166,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 365 - VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7175,7 +7175,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7184,7 +7184,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7200,17 +7200,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 366 - FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[41] += amp_sv[0]; jamp_sv[47] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[47] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[41] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; jamp_sv[107] += amp_sv[0]; @@ -7222,17 +7222,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 367 - FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[57] += amp_sv[0]; jamp_sv[59] -= amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[59] -= amp_sv[0]; jamp_sv[63] += amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[69] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[57] -= amp_sv[0]; jamp_sv[63] += amp_sv[0]; jamp_sv[69] += amp_sv[0]; @@ -7244,17 +7244,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 368 - FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[65] += amp_sv[0]; jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[71] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[65] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; @@ -7266,7 +7266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 369 - VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7275,7 +7275,7 @@ namespace mg5amcCpu jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7284,7 +7284,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7297,11 +7297,11 @@ namespace mg5amcCpu // *** DIAGRAM 370 OF 1240 *** // Wavefunction(s) for diagram number 370 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 370 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 370 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7315,7 +7315,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 371 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 371 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7326,11 +7326,11 @@ namespace mg5amcCpu // *** DIAGRAM 372 OF 1240 *** // Wavefunction(s) for diagram number 372 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[62] ); - FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 0., 0., w_fp[34] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] ); + FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] ); // Amplitude(s) for diagram number 372 - VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 372 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7350,7 +7350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 373 - FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 373 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7363,10 +7363,10 @@ namespace mg5amcCpu // *** DIAGRAM 374 OF 1240 *** // Wavefunction(s) for diagram number 374 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 374 - VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 374 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7386,7 +7386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 375 - FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 375 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7399,12 +7399,12 @@ namespace mg5amcCpu // *** DIAGRAM 376 OF 1240 *** // Wavefunction(s) for diagram number 376 - VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); - VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); + VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); + VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 376 - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7413,7 +7413,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7422,7 +7422,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7435,10 +7435,10 @@ namespace mg5amcCpu // *** DIAGRAM 377 OF 1240 *** // Wavefunction(s) for diagram number 377 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[95] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] ); // Amplitude(s) for diagram number 377 - FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 377 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7449,10 +7449,10 @@ namespace mg5amcCpu // *** DIAGRAM 378 OF 1240 *** // Wavefunction(s) for diagram number 378 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 378 - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 378 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7466,7 +7466,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 379 - FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 379 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7482,7 +7482,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 380 - FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 380 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7493,10 +7493,10 @@ namespace mg5amcCpu // *** DIAGRAM 381 OF 1240 *** // Wavefunction(s) for diagram number 381 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[101] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] ); // Amplitude(s) for diagram number 381 - FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 381 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7510,7 +7510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 382 - FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 382 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7526,7 +7526,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 383 - FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 383 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7542,7 +7542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 384 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 384 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7555,10 +7555,10 @@ namespace mg5amcCpu // *** DIAGRAM 385 OF 1240 *** // Wavefunction(s) for diagram number 385 - VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 0., 0., w_fp[95] ); + VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 385 - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 385 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7575,10 +7575,10 @@ namespace mg5amcCpu // *** DIAGRAM 386 OF 1240 *** // Wavefunction(s) for diagram number 386 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 386 - FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 386 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7592,7 +7592,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 387 - FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 387 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7603,10 +7603,10 @@ namespace mg5amcCpu // *** DIAGRAM 388 OF 1240 *** // Wavefunction(s) for diagram number 388 - FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 0., 0., w_fp[103] ); + FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] ); // Amplitude(s) for diagram number 388 - VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 388 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7626,7 +7626,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 389 - FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 389 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7642,7 +7642,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 390 - VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 390 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7662,7 +7662,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 391 - FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 391 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7678,7 +7678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 392 - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7687,7 +7687,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7696,7 +7696,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7709,10 +7709,10 @@ namespace mg5amcCpu // *** DIAGRAM 393 OF 1240 *** // Wavefunction(s) for diagram number 393 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 393 - FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 393 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7723,10 +7723,10 @@ namespace mg5amcCpu // *** DIAGRAM 394 OF 1240 *** // Wavefunction(s) for diagram number 394 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[105] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] ); // Amplitude(s) for diagram number 394 - FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 394 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7740,7 +7740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 395 - FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 395 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7756,7 +7756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 396 - FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 396 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7767,10 +7767,10 @@ namespace mg5amcCpu // *** DIAGRAM 397 OF 1240 *** // Wavefunction(s) for diagram number 397 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 397 - FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 397 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7784,7 +7784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 398 - FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 398 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7800,7 +7800,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 399 - FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 399 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7816,7 +7816,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 400 - FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 400 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7832,7 +7832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 401 - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 401 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7852,7 +7852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 402 - FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 402 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7868,7 +7868,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 403 - FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 403 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7888,7 +7888,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 404 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 404 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7904,7 +7904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 405 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 405 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7924,7 +7924,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 406 - FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 406 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7944,7 +7944,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 407 - FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 407 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7964,7 +7964,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 408 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -7981,7 +7981,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -7998,7 +7998,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -8019,10 +8019,10 @@ namespace mg5amcCpu // *** DIAGRAM 409 OF 1240 *** // Wavefunction(s) for diagram number 409 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 409 - VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 409 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8047,10 +8047,10 @@ namespace mg5amcCpu // *** DIAGRAM 410 OF 1240 *** // Wavefunction(s) for diagram number 410 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[107] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 410 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 410 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8078,7 +8078,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 411 - VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 411 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8106,7 +8106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 412 - FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 412 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8126,7 +8126,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 413 - FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 413 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8142,7 +8142,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 414 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 414 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8158,7 +8158,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 415 - FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 415 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8178,7 +8178,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 416 - FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 416 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8194,7 +8194,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 417 - FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 417 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8210,7 +8210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 418 - FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 418 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8226,7 +8226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 419 - FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 419 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8246,7 +8246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 420 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 420 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8262,7 +8262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 421 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 421 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8282,7 +8282,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 422 - FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 422 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8302,7 +8302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 423 - FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 423 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8322,7 +8322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 424 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -8339,7 +8339,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -8356,7 +8356,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -8377,10 +8377,10 @@ namespace mg5amcCpu // *** DIAGRAM 425 OF 1240 *** // Wavefunction(s) for diagram number 425 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 425 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 425 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8408,7 +8408,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 426 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 426 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8436,7 +8436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 427 - VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 427 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8464,7 +8464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 428 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 428 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8484,7 +8484,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 429 - FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 429 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8500,7 +8500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 430 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 430 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8516,7 +8516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 431 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 431 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8536,7 +8536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 432 - FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 432 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8552,7 +8552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 433 - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 433 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8565,10 +8565,10 @@ namespace mg5amcCpu // *** DIAGRAM 434 OF 1240 *** // Wavefunction(s) for diagram number 434 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 434 - VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 434 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8596,7 +8596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 435 - VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 435 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8624,7 +8624,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 436 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -8641,7 +8641,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8658,7 +8658,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -8679,10 +8679,10 @@ namespace mg5amcCpu // *** DIAGRAM 437 OF 1240 *** // Wavefunction(s) for diagram number 437 - VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 0., 0., w_fp[108] ); + VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] ); // Amplitude(s) for diagram number 437 - VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 437 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8710,7 +8710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 438 - VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 438 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8738,7 +8738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 439 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -8755,7 +8755,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[115] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -8772,7 +8772,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8796,7 +8796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 440 - VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 440 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8824,7 +8824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 441 - VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 441 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8852,7 +8852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 442 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -8869,7 +8869,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -8886,7 +8886,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -8907,12 +8907,12 @@ namespace mg5amcCpu // *** DIAGRAM 443 OF 1240 *** // Wavefunction(s) for diagram number 443 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 443 - VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -8929,7 +8929,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8946,7 +8946,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8967,12 +8967,12 @@ namespace mg5amcCpu // *** DIAGRAM 444 OF 1240 *** // Wavefunction(s) for diagram number 444 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[113] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[114] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] ); // Amplitude(s) for diagram number 444 - VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -8989,7 +8989,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -9006,7 +9006,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -9030,7 +9030,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 445 - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -9047,7 +9047,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -9064,7 +9064,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -9088,7 +9088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 446 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -9105,7 +9105,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; @@ -9122,7 +9122,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -9146,7 +9146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 447 - VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 447 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9174,7 +9174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 448 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 448 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9202,7 +9202,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 449 - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 449 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9230,7 +9230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 450 - VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 450 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9250,7 +9250,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 451 - FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 451 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9266,7 +9266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 452 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 452 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9280,7 +9280,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 453 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 453 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9294,7 +9294,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 454 - FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 454 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9310,7 +9310,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 455 - VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 455 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9330,7 +9330,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 456 - FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9339,7 +9339,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9348,7 +9348,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &_fp[0] ); jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9364,7 +9364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 457 - FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 457 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9380,7 +9380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 458 - FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 458 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9394,7 +9394,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 459 - FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 459 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9408,7 +9408,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 460 - VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 460 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9428,7 +9428,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 461 - FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 461 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9444,7 +9444,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 462 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 462 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9458,7 +9458,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 463 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 463 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9472,7 +9472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 464 - FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 464 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9488,7 +9488,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 465 - VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 465 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9508,7 +9508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 466 - FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9517,7 +9517,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9526,7 +9526,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9542,7 +9542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 467 - FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 467 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9558,7 +9558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 468 - FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 468 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9572,7 +9572,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 469 - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 469 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9586,7 +9586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 470 - VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 470 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9606,7 +9606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 471 - FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 471 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9622,7 +9622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 472 - FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 472 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9636,7 +9636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 473 - FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 473 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9650,7 +9650,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 474 - FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 474 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9666,7 +9666,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 475 - VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 475 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9686,7 +9686,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 476 - FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9695,7 +9695,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9704,7 +9704,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9720,7 +9720,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 477 - VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 477 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9740,7 +9740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 478 - FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 478 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9756,7 +9756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 479 - FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 479 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9770,7 +9770,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 480 - FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 480 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9784,7 +9784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 481 - FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 481 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9800,7 +9800,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 482 - VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 482 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9820,7 +9820,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 483 - FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9829,7 +9829,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9838,7 +9838,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9854,7 +9854,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 484 - FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 484 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9874,7 +9874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 485 - FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 485 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9894,7 +9894,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 486 - FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 486 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9914,7 +9914,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 487 - FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 487 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9930,7 +9930,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 488 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 488 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9950,7 +9950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 489 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 489 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9966,7 +9966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 490 - FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9975,7 +9975,7 @@ namespace mg5amcCpu jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9984,7 +9984,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -10000,7 +10000,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 491 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10009,7 +10009,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -10018,7 +10018,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -10034,7 +10034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 492 - VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -10051,7 +10051,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -10068,7 +10068,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -10089,11 +10089,11 @@ namespace mg5amcCpu // *** DIAGRAM 493 OF 1240 *** // Wavefunction(s) for diagram number 493 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 493 - FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 493 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10107,7 +10107,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 494 - FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 494 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10118,10 +10118,10 @@ namespace mg5amcCpu // *** DIAGRAM 495 OF 1240 *** // Wavefunction(s) for diagram number 495 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 495 - VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 495 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10141,7 +10141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 496 - FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 496 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10154,10 +10154,10 @@ namespace mg5amcCpu // *** DIAGRAM 497 OF 1240 *** // Wavefunction(s) for diagram number 497 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 497 - VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 497 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10177,7 +10177,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 498 - FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 498 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10190,12 +10190,12 @@ namespace mg5amcCpu // *** DIAGRAM 499 OF 1240 *** // Wavefunction(s) for diagram number 499 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 499 - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10204,7 +10204,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10213,7 +10213,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10226,10 +10226,10 @@ namespace mg5amcCpu // *** DIAGRAM 500 OF 1240 *** // Wavefunction(s) for diagram number 500 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 500 - FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 500 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10240,10 +10240,10 @@ namespace mg5amcCpu // *** DIAGRAM 501 OF 1240 *** // Wavefunction(s) for diagram number 501 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 501 - FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 501 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10257,7 +10257,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 502 - FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 502 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10273,7 +10273,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 503 - FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 503 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10284,10 +10284,10 @@ namespace mg5amcCpu // *** DIAGRAM 504 OF 1240 *** // Wavefunction(s) for diagram number 504 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 504 - FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 504 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10301,7 +10301,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 505 - FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 505 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10317,7 +10317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 506 - FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 506 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10333,7 +10333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 507 - FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 507 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10346,10 +10346,10 @@ namespace mg5amcCpu // *** DIAGRAM 508 OF 1240 *** // Wavefunction(s) for diagram number 508 - VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] ); // Amplitude(s) for diagram number 508 - FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 508 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10366,10 +10366,10 @@ namespace mg5amcCpu // *** DIAGRAM 509 OF 1240 *** // Wavefunction(s) for diagram number 509 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[112] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] ); // Amplitude(s) for diagram number 509 - FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 509 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10383,7 +10383,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 510 - FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 510 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10397,7 +10397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 511 - VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 511 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10417,7 +10417,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 512 - FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 512 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10433,7 +10433,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 513 - VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 513 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10453,7 +10453,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 514 - FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 514 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10469,7 +10469,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 515 - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10478,7 +10478,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10487,7 +10487,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10500,10 +10500,10 @@ namespace mg5amcCpu // *** DIAGRAM 516 OF 1240 *** // Wavefunction(s) for diagram number 516 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); // Amplitude(s) for diagram number 516 - FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 516 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10514,10 +10514,10 @@ namespace mg5amcCpu // *** DIAGRAM 517 OF 1240 *** // Wavefunction(s) for diagram number 517 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 517 - FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 517 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10531,7 +10531,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 518 - FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 518 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10547,7 +10547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 519 - FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 519 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10558,10 +10558,10 @@ namespace mg5amcCpu // *** DIAGRAM 520 OF 1240 *** // Wavefunction(s) for diagram number 520 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 520 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 520 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10575,7 +10575,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 521 - FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 521 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10591,7 +10591,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 522 - FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 522 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10607,7 +10607,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 523 - FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 523 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10623,7 +10623,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 524 - FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 524 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10643,7 +10643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 525 - FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 525 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10659,7 +10659,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 526 - FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 526 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10679,7 +10679,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 527 - FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 527 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10695,7 +10695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 528 - FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 528 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10715,7 +10715,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 529 - FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 529 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10735,7 +10735,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 530 - FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 530 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10755,7 +10755,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 531 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -10772,7 +10772,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -10789,7 +10789,7 @@ namespace mg5amcCpu jamp_sv[105] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -10810,10 +10810,10 @@ namespace mg5amcCpu // *** DIAGRAM 532 OF 1240 *** // Wavefunction(s) for diagram number 532 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 532 - VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 532 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10838,10 +10838,10 @@ namespace mg5amcCpu // *** DIAGRAM 533 OF 1240 *** // Wavefunction(s) for diagram number 533 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 533 - VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 533 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10869,7 +10869,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 534 - VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 534 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10897,7 +10897,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 535 - FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 535 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10917,7 +10917,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 536 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 536 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10933,7 +10933,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 537 - FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 537 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10949,7 +10949,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 538 - FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 538 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10969,7 +10969,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 539 - FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 539 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10985,7 +10985,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 540 - FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 540 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11001,7 +11001,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 541 - FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 541 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11017,7 +11017,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 542 - FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 542 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11037,7 +11037,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 543 - FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 543 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11053,7 +11053,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 544 - FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 544 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11073,7 +11073,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 545 - FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 545 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11093,7 +11093,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 546 - FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 546 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11113,7 +11113,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 547 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -11130,7 +11130,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -11147,7 +11147,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -11168,10 +11168,10 @@ namespace mg5amcCpu // *** DIAGRAM 548 OF 1240 *** // Wavefunction(s) for diagram number 548 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 548 - VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 548 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11199,7 +11199,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 549 - VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 549 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11227,7 +11227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 550 - VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 550 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11255,7 +11255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 551 - FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 551 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11275,7 +11275,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 552 - FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 552 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11291,7 +11291,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 553 - FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 553 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11307,7 +11307,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 554 - FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 554 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11327,7 +11327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 555 - FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 555 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11343,7 +11343,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 556 - FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 556 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11356,10 +11356,10 @@ namespace mg5amcCpu // *** DIAGRAM 557 OF 1240 *** // Wavefunction(s) for diagram number 557 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 557 - VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 557 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11387,7 +11387,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 558 - VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 558 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11415,7 +11415,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 559 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11432,7 +11432,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -11449,7 +11449,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -11473,7 +11473,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 560 - VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 560 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11501,7 +11501,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 561 - VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 561 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11529,7 +11529,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 562 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11546,7 +11546,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -11563,7 +11563,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -11587,7 +11587,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 563 - VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 563 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11615,7 +11615,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 564 - VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 564 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11643,7 +11643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 565 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11660,7 +11660,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11677,7 +11677,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11698,12 +11698,12 @@ namespace mg5amcCpu // *** DIAGRAM 566 OF 1240 *** // Wavefunction(s) for diagram number 566 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 566 - VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -11720,7 +11720,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -11737,7 +11737,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -11758,12 +11758,12 @@ namespace mg5amcCpu // *** DIAGRAM 567 OF 1240 *** // Wavefunction(s) for diagram number 567 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); // Amplitude(s) for diagram number 567 - VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -11780,7 +11780,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11797,7 +11797,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11821,7 +11821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 568 - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -11838,7 +11838,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11855,7 +11855,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11879,7 +11879,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 569 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[26] -= amp_sv[0]; @@ -11896,7 +11896,7 @@ namespace mg5amcCpu jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; @@ -11913,7 +11913,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -11937,7 +11937,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 570 - VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 570 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11965,7 +11965,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 571 - VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 571 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11993,7 +11993,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 572 - VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 572 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12021,7 +12021,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 573 - VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 573 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12041,7 +12041,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 574 - FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 574 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12057,7 +12057,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 575 - FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 575 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12071,7 +12071,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 576 - FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 576 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12085,7 +12085,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 577 - FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 577 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12101,7 +12101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 578 - VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 578 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12121,7 +12121,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 579 - FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12130,7 +12130,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12139,7 +12139,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12155,7 +12155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 580 - FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 580 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12171,7 +12171,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 581 - FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 581 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12185,7 +12185,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 582 - FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 582 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12199,7 +12199,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 583 - VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 583 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12219,7 +12219,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 584 - FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 584 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12235,7 +12235,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 585 - FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 585 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12249,7 +12249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 586 - FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 586 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12263,7 +12263,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 587 - FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 587 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12279,7 +12279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 588 - VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 588 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12299,7 +12299,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 589 - FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12308,7 +12308,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12317,7 +12317,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12333,7 +12333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 590 - FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 590 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12349,7 +12349,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 591 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 591 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12363,7 +12363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 592 - FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 592 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12377,7 +12377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 593 - VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 593 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12397,7 +12397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 594 - FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 594 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12413,7 +12413,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 595 - FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 595 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12427,7 +12427,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 596 - FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 596 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12441,7 +12441,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 597 - FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 597 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12457,7 +12457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 598 - VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 598 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12477,7 +12477,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 599 - FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12486,7 +12486,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12495,7 +12495,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12511,7 +12511,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 600 - VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 600 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12531,7 +12531,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 601 - FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 601 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12547,7 +12547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 602 - FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 602 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12561,7 +12561,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 603 - FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 603 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12575,7 +12575,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 604 - FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 604 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12591,7 +12591,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 605 - VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 605 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12611,7 +12611,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 606 - FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12620,7 +12620,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12629,7 +12629,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12645,7 +12645,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 607 - FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 607 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12665,7 +12665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 608 - FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 608 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12685,7 +12685,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 609 - FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 609 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12705,7 +12705,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 610 - FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 610 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12721,7 +12721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 611 - FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 611 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12741,7 +12741,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 612 - FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 612 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12757,7 +12757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 613 - FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12766,7 +12766,7 @@ namespace mg5amcCpu jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12775,7 +12775,7 @@ namespace mg5amcCpu jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -12791,7 +12791,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 614 - FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12800,7 +12800,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -12809,7 +12809,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -12825,7 +12825,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 615 - VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -12842,7 +12842,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -12859,7 +12859,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -12880,11 +12880,11 @@ namespace mg5amcCpu // *** DIAGRAM 616 OF 1240 *** // Wavefunction(s) for diagram number 616 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 616 - FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 616 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12898,7 +12898,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 617 - FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 617 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12909,10 +12909,10 @@ namespace mg5amcCpu // *** DIAGRAM 618 OF 1240 *** // Wavefunction(s) for diagram number 618 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[112] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] ); // Amplitude(s) for diagram number 618 - VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 618 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12932,7 +12932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 619 - FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 619 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12945,10 +12945,10 @@ namespace mg5amcCpu // *** DIAGRAM 620 OF 1240 *** // Wavefunction(s) for diagram number 620 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 620 - VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 620 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12968,7 +12968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 621 - FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 621 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12981,12 +12981,12 @@ namespace mg5amcCpu // *** DIAGRAM 622 OF 1240 *** // Wavefunction(s) for diagram number 622 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 622 - FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12995,7 +12995,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13004,7 +13004,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13017,10 +13017,10 @@ namespace mg5amcCpu // *** DIAGRAM 623 OF 1240 *** // Wavefunction(s) for diagram number 623 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 623 - FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 623 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13031,10 +13031,10 @@ namespace mg5amcCpu // *** DIAGRAM 624 OF 1240 *** // Wavefunction(s) for diagram number 624 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 624 - FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 624 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13048,7 +13048,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 625 - FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 625 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13064,7 +13064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 626 - FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 626 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13075,10 +13075,10 @@ namespace mg5amcCpu // *** DIAGRAM 627 OF 1240 *** // Wavefunction(s) for diagram number 627 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 627 - FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 627 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13092,7 +13092,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 628 - FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 628 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13108,7 +13108,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 629 - FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 629 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13124,7 +13124,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 630 - FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 630 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13137,10 +13137,10 @@ namespace mg5amcCpu // *** DIAGRAM 631 OF 1240 *** // Wavefunction(s) for diagram number 631 - VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 631 - FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 631 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13157,10 +13157,10 @@ namespace mg5amcCpu // *** DIAGRAM 632 OF 1240 *** // Wavefunction(s) for diagram number 632 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[96] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] ); // Amplitude(s) for diagram number 632 - FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 632 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13174,7 +13174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 633 - FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 633 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13188,7 +13188,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 634 - VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 634 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13208,7 +13208,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 635 - FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 635 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13224,7 +13224,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 636 - VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 636 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13244,7 +13244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 637 - FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 637 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13260,7 +13260,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 638 - FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13269,7 +13269,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13278,7 +13278,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13291,10 +13291,10 @@ namespace mg5amcCpu // *** DIAGRAM 639 OF 1240 *** // Wavefunction(s) for diagram number 639 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 639 - FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 639 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13305,10 +13305,10 @@ namespace mg5amcCpu // *** DIAGRAM 640 OF 1240 *** // Wavefunction(s) for diagram number 640 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 640 - FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 640 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13322,7 +13322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 641 - FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 641 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13338,7 +13338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 642 - FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 642 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13349,10 +13349,10 @@ namespace mg5amcCpu // *** DIAGRAM 643 OF 1240 *** // Wavefunction(s) for diagram number 643 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 643 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 643 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13366,7 +13366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 644 - FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 644 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13382,7 +13382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 645 - FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 645 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13398,7 +13398,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 646 - FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 646 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13414,7 +13414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 647 - FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 647 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13434,7 +13434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 648 - FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 648 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13450,7 +13450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 649 - FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 649 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13470,7 +13470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 650 - FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 650 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13486,7 +13486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 651 - FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 651 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13506,7 +13506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 652 - FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 652 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13526,7 +13526,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 653 - FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 653 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13546,7 +13546,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 654 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -13563,7 +13563,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -13580,7 +13580,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -13601,10 +13601,10 @@ namespace mg5amcCpu // *** DIAGRAM 655 OF 1240 *** // Wavefunction(s) for diagram number 655 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 655 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 655 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13629,10 +13629,10 @@ namespace mg5amcCpu // *** DIAGRAM 656 OF 1240 *** // Wavefunction(s) for diagram number 656 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[113] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] ); // Amplitude(s) for diagram number 656 - VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 656 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13660,7 +13660,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 657 - VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 657 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13688,7 +13688,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 658 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 658 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13708,7 +13708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 659 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 659 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13724,7 +13724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 660 - FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 660 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13740,7 +13740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 661 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 661 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13760,7 +13760,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 662 - FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 662 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13776,7 +13776,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 663 - FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 663 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13792,7 +13792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 664 - FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 664 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13808,7 +13808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 665 - FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 665 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13828,7 +13828,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 666 - FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 666 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13844,7 +13844,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 667 - FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 667 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13864,7 +13864,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 668 - FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 668 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13884,7 +13884,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 669 - FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 669 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13904,7 +13904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 670 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -13921,7 +13921,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -13938,7 +13938,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -13959,10 +13959,10 @@ namespace mg5amcCpu // *** DIAGRAM 671 OF 1240 *** // Wavefunction(s) for diagram number 671 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 671 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 671 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13990,7 +13990,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 672 - VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 672 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14018,7 +14018,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 673 - VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 673 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14046,7 +14046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 674 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 674 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14066,7 +14066,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 675 - FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 675 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14082,7 +14082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 676 - FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 676 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14098,7 +14098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 677 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 677 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14118,7 +14118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 678 - FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 678 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14134,7 +14134,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 679 - FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 679 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14147,10 +14147,10 @@ namespace mg5amcCpu // *** DIAGRAM 680 OF 1240 *** // Wavefunction(s) for diagram number 680 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 680 - VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 680 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14178,7 +14178,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 681 - VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 681 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14206,7 +14206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 682 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -14223,7 +14223,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -14240,7 +14240,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -14264,7 +14264,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 683 - VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 683 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14292,7 +14292,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 684 - VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 684 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14320,7 +14320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 685 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14337,7 +14337,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -14354,7 +14354,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -14378,7 +14378,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 686 - VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 686 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14406,7 +14406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 687 - VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 687 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14434,7 +14434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 688 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14451,7 +14451,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14468,7 +14468,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -14489,12 +14489,12 @@ namespace mg5amcCpu // *** DIAGRAM 689 OF 1240 *** // Wavefunction(s) for diagram number 689 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[62] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[101] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 689 - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -14511,7 +14511,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -14528,7 +14528,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -14549,12 +14549,12 @@ namespace mg5amcCpu // *** DIAGRAM 690 OF 1240 *** // Wavefunction(s) for diagram number 690 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 690 - VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -14571,7 +14571,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -14588,7 +14588,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -14612,7 +14612,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 691 - VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -14629,7 +14629,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14646,7 +14646,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14670,7 +14670,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 692 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[28] -= amp_sv[0]; @@ -14687,7 +14687,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -14704,7 +14704,7 @@ namespace mg5amcCpu jamp_sv[97] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -14728,7 +14728,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 693 - VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 693 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14756,7 +14756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 694 - VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 694 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14784,7 +14784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 695 - VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 695 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14812,7 +14812,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 696 - VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 696 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14832,7 +14832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 697 - FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 697 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14848,7 +14848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 698 - FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 698 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14862,7 +14862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 699 - FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 699 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14876,7 +14876,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 700 - FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 700 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14892,7 +14892,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 701 - VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 701 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14912,7 +14912,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 702 - FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -14921,7 +14921,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -14930,7 +14930,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -14946,7 +14946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 703 - FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 703 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14962,7 +14962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 704 - FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 704 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14976,7 +14976,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 705 - FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 705 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14990,7 +14990,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 706 - VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 706 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15010,7 +15010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 707 - FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 707 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15026,7 +15026,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 708 - FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 708 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15040,7 +15040,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 709 - FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 709 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15054,7 +15054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 710 - FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 710 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15070,7 +15070,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 711 - VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 711 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15090,7 +15090,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 712 - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15099,7 +15099,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15108,7 +15108,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15124,7 +15124,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 713 - FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 713 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15140,7 +15140,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 714 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 714 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15154,7 +15154,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 715 - FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 715 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15168,7 +15168,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 716 - VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 716 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15188,7 +15188,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 717 - FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 717 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15204,7 +15204,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 718 - FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 718 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15218,7 +15218,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 719 - FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 719 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15232,7 +15232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 720 - FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 720 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15248,7 +15248,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 721 - VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 721 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15268,7 +15268,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 722 - FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15277,7 +15277,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15286,7 +15286,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15302,7 +15302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 723 - VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 723 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15322,7 +15322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 724 - FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 724 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15338,7 +15338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 725 - FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 725 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15352,7 +15352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 726 - FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 726 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15366,7 +15366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 727 - FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 727 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15382,7 +15382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 728 - VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 728 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15402,7 +15402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 729 - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15411,7 +15411,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15420,7 +15420,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15436,7 +15436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 730 - FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 730 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15456,7 +15456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 731 - FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 731 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15476,7 +15476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 732 - FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 732 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15496,7 +15496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 733 - FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 733 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15512,7 +15512,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 734 - FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 734 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15532,7 +15532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 735 - FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 735 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15548,7 +15548,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 736 - FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15557,7 +15557,7 @@ namespace mg5amcCpu jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15566,7 +15566,7 @@ namespace mg5amcCpu jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -15582,7 +15582,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 737 - FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15591,7 +15591,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -15600,7 +15600,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -15616,7 +15616,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 738 - VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -15633,7 +15633,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -15650,7 +15650,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -15671,10 +15671,10 @@ namespace mg5amcCpu // *** DIAGRAM 739 OF 1240 *** // Wavefunction(s) for diagram number 739 - FFV1_1( w_fp[77], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[92] ); + FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] ); // Amplitude(s) for diagram number 739 - FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 739 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15687,7 +15687,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 740 - FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 740 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15697,10 +15697,10 @@ namespace mg5amcCpu // *** DIAGRAM 741 OF 1240 *** // Wavefunction(s) for diagram number 741 - FFV1_2( w_fp[46], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 741 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 741 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15713,7 +15713,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 742 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 742 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15726,7 +15726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 743 - FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 743 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15739,7 +15739,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 744 - FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 744 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15752,7 +15752,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 745 - FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 745 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15766,7 +15766,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 746 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 746 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15777,10 +15777,10 @@ namespace mg5amcCpu // *** DIAGRAM 747 OF 1240 *** // Wavefunction(s) for diagram number 747 - VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 0., 0., w_fp[96] ); + VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 747 - FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 747 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15796,7 +15796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 748 - FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 748 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15809,7 +15809,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 749 - FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 749 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15819,10 +15819,10 @@ namespace mg5amcCpu // *** DIAGRAM 750 OF 1240 *** // Wavefunction(s) for diagram number 750 - FFV1_2( w_fp[38], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 750 - FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 750 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15835,7 +15835,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 751 - FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 751 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15848,7 +15848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 752 - FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 752 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15861,7 +15861,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 753 - FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 753 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15874,7 +15874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 754 - FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 754 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15888,7 +15888,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 755 - FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 755 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15899,10 +15899,10 @@ namespace mg5amcCpu // *** DIAGRAM 756 OF 1240 *** // Wavefunction(s) for diagram number 756 - VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 756 - FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 756 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15918,7 +15918,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 757 - FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 757 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15931,7 +15931,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 758 - FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 758 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15941,10 +15941,10 @@ namespace mg5amcCpu // *** DIAGRAM 759 OF 1240 *** // Wavefunction(s) for diagram number 759 - FFV1_2( w_fp[41], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); + FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 759 - FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 759 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15957,7 +15957,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 760 - FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 760 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15970,7 +15970,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 761 - FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 761 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15983,7 +15983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 762 - FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 762 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15996,7 +15996,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 763 - FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 763 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16010,7 +16010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 764 - FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 764 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16021,10 +16021,10 @@ namespace mg5amcCpu // *** DIAGRAM 765 OF 1240 *** // Wavefunction(s) for diagram number 765 - VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 765 - FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 765 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16040,7 +16040,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 766 - FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 766 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16054,7 +16054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 767 - FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 767 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16070,7 +16070,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 768 - VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 768 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16090,7 +16090,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 769 - FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 769 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16106,7 +16106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 770 - VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 770 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16126,7 +16126,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 771 - FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 771 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16137,12 +16137,12 @@ namespace mg5amcCpu // *** DIAGRAM 772 OF 1240 *** // Wavefunction(s) for diagram number 772 - VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[85] ); - VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); - VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] ); + VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); + VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 772 - FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16151,7 +16151,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16160,7 +16160,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16176,7 +16176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 773 - FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 773 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16190,7 +16190,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 774 - FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 774 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16206,7 +16206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 775 - VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 775 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16226,7 +16226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 776 - FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 776 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16242,7 +16242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 777 - VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 777 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16262,7 +16262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 778 - FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 778 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16273,12 +16273,12 @@ namespace mg5amcCpu // *** DIAGRAM 779 OF 1240 *** // Wavefunction(s) for diagram number 779 - VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[9] ); - VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] ); + VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 779 - FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16287,7 +16287,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16296,7 +16296,7 @@ namespace mg5amcCpu jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16312,7 +16312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 780 - FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 780 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16326,7 +16326,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 781 - FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 781 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16342,7 +16342,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 782 - VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 782 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16362,7 +16362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 783 - FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 783 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16378,7 +16378,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 784 - VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 784 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16398,7 +16398,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 785 - FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 785 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16409,12 +16409,12 @@ namespace mg5amcCpu // *** DIAGRAM 786 OF 1240 *** // Wavefunction(s) for diagram number 786 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[87] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[34] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[86] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 786 - FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16423,7 +16423,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16432,7 +16432,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16448,17 +16448,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 787 - FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -16467,12 +16467,12 @@ namespace mg5amcCpu // *** DIAGRAM 788 OF 1240 *** // Wavefunction(s) for diagram number 788 - VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 0., 0., w_fp[92] ); - VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 0., 0., w_fp[88] ); - VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 0., 0., w_fp[106] ); + VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] ); + VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] ); + VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] ); // Amplitude(s) for diagram number 788 - FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16481,7 +16481,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16490,7 +16490,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16503,10 +16503,10 @@ namespace mg5amcCpu // *** DIAGRAM 789 OF 1240 *** // Wavefunction(s) for diagram number 789 - FFV1_2( w_fp[52], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 789 - FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 789 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16519,7 +16519,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 790 - FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 790 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16529,10 +16529,10 @@ namespace mg5amcCpu // *** DIAGRAM 791 OF 1240 *** // Wavefunction(s) for diagram number 791 - FFV1_1( w_fp[33], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 791 - FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 791 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16545,7 +16545,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 792 - FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 792 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16558,7 +16558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 793 - FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 793 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16571,7 +16571,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 794 - FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 794 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16584,7 +16584,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 795 - FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 795 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16598,7 +16598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 796 - FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 796 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16612,7 +16612,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 797 - FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 797 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16628,7 +16628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 798 - FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 798 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16641,7 +16641,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 799 - FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 799 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16651,10 +16651,10 @@ namespace mg5amcCpu // *** DIAGRAM 800 OF 1240 *** // Wavefunction(s) for diagram number 800 - FFV1_1( w_fp[39], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 800 - FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 800 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16667,7 +16667,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 801 - FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 801 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16680,7 +16680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 802 - FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 802 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16693,7 +16693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 803 - FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 803 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16706,7 +16706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 804 - FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 804 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16720,7 +16720,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 805 - FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 805 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16734,7 +16734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 806 - FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 806 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16750,7 +16750,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 807 - FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 807 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16763,7 +16763,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 808 - FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 808 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16773,10 +16773,10 @@ namespace mg5amcCpu // *** DIAGRAM 809 OF 1240 *** // Wavefunction(s) for diagram number 809 - FFV1_1( w_fp[47], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); + FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 809 - FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 809 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16789,7 +16789,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 810 - FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 810 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16802,7 +16802,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 811 - FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 811 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16815,7 +16815,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 812 - FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 812 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16828,7 +16828,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 813 - FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 813 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16842,7 +16842,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 814 - FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 814 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16856,7 +16856,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 815 - FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 815 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16872,7 +16872,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 816 - FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 816 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16886,7 +16886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 817 - FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 817 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16902,7 +16902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 818 - VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 818 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16922,7 +16922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 819 - FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 819 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16938,7 +16938,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 820 - VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 820 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16958,7 +16958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 821 - FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 821 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16972,7 +16972,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 822 - FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16981,7 +16981,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16990,7 +16990,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17006,7 +17006,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 823 - FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 823 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17020,7 +17020,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 824 - FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 824 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17036,7 +17036,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 825 - VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 825 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17056,7 +17056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 826 - FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 826 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17072,7 +17072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 827 - VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 827 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17092,7 +17092,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 828 - FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 828 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17106,7 +17106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 829 - FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17115,7 +17115,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17124,7 +17124,7 @@ namespace mg5amcCpu jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17140,7 +17140,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 830 - FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 830 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17154,7 +17154,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 831 - FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 831 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17170,7 +17170,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 832 - VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 832 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17190,7 +17190,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 833 - FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 833 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17206,7 +17206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 834 - VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 834 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17226,7 +17226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 835 - FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 835 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17240,7 +17240,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 836 - FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; @@ -17249,7 +17249,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17258,7 +17258,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17274,17 +17274,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 837 - FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[64] += amp_sv[0]; jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[70] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[64] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; @@ -17296,7 +17296,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 838 - FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; @@ -17305,7 +17305,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; @@ -17314,7 +17314,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17327,10 +17327,10 @@ namespace mg5amcCpu // *** DIAGRAM 839 OF 1240 *** // Wavefunction(s) for diagram number 839 - VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 0., 0., w_fp[90] ); + VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] ); // Amplitude(s) for diagram number 839 - VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 839 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17358,7 +17358,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 840 - VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 840 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17386,7 +17386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 841 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -17403,7 +17403,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -17420,7 +17420,7 @@ namespace mg5amcCpu jamp_sv[115] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[6] += amp_sv[0]; @@ -17441,10 +17441,10 @@ namespace mg5amcCpu // *** DIAGRAM 842 OF 1240 *** // Wavefunction(s) for diagram number 842 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[56] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] ); // Amplitude(s) for diagram number 842 - VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 842 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17472,7 +17472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 843 - VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 843 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17500,7 +17500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 844 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17517,7 +17517,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17534,7 +17534,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[6] += amp_sv[0]; @@ -17558,7 +17558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 845 - VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 845 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17586,7 +17586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 846 - VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 846 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17611,12 +17611,12 @@ namespace mg5amcCpu // *** DIAGRAM 847 OF 1240 *** // Wavefunction(s) for diagram number 847 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[103] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 847 - VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17633,7 +17633,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -17650,7 +17650,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -17671,12 +17671,12 @@ namespace mg5amcCpu // *** DIAGRAM 848 OF 1240 *** // Wavefunction(s) for diagram number 848 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 848 - VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -17693,7 +17693,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -17710,7 +17710,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[98] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -17731,12 +17731,12 @@ namespace mg5amcCpu // *** DIAGRAM 849 OF 1240 *** // Wavefunction(s) for diagram number 849 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[115] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[116] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[117] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] ); // Amplitude(s) for diagram number 849 - VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -17753,7 +17753,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -17770,7 +17770,7 @@ namespace mg5amcCpu jamp_sv[105] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17791,12 +17791,12 @@ namespace mg5amcCpu // *** DIAGRAM 850 OF 1240 *** // Wavefunction(s) for diagram number 850 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[118] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[119] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[120] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] ); // Amplitude(s) for diagram number 850 - VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -17813,7 +17813,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -17830,7 +17830,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17854,7 +17854,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 851 - VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -17871,7 +17871,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -17888,7 +17888,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -17912,7 +17912,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 852 - VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 852 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17940,7 +17940,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 853 - VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 853 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17968,7 +17968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 854 - VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 854 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17996,7 +17996,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 855 - VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 855 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18016,7 +18016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 856 - FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 856 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18032,7 +18032,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 857 - FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 857 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18046,7 +18046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 858 - FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 858 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18062,7 +18062,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 859 - FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 859 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18076,7 +18076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 860 - VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 860 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18096,7 +18096,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 861 - FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18105,7 +18105,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18114,7 +18114,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18130,7 +18130,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 862 - FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 862 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18146,7 +18146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 863 - FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 863 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18160,7 +18160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 864 - FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 864 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18174,7 +18174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 865 - VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 865 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18194,7 +18194,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 866 - FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 866 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18210,7 +18210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 867 - FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 867 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18224,7 +18224,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 868 - FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 868 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18240,7 +18240,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 869 - FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 869 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18254,7 +18254,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 870 - VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 870 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18274,7 +18274,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 871 - FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18283,7 +18283,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18292,7 +18292,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18308,7 +18308,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 872 - FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 872 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18324,7 +18324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 873 - FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 873 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18338,7 +18338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 874 - FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 874 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18352,7 +18352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 875 - VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 875 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18372,7 +18372,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 876 - FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 876 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18388,7 +18388,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 877 - FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 877 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18402,7 +18402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 878 - FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 878 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18418,7 +18418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 879 - FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 879 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18432,7 +18432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 880 - VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 880 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18452,7 +18452,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 881 - FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18461,7 +18461,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18470,7 +18470,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18486,7 +18486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 882 - VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 882 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18506,7 +18506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 883 - FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 883 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18522,7 +18522,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 884 - FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 884 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18536,7 +18536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 885 - FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 885 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18552,7 +18552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 886 - FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 886 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18566,7 +18566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 887 - VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 887 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18586,7 +18586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 888 - FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18595,7 +18595,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18604,7 +18604,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18620,7 +18620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 889 - FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 889 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18640,7 +18640,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 890 - FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 890 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18660,7 +18660,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 891 - FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 891 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18680,7 +18680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 892 - FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 892 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18700,7 +18700,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 893 - FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 893 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18716,7 +18716,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 894 - FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 894 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18729,10 +18729,10 @@ namespace mg5amcCpu // *** DIAGRAM 895 OF 1240 *** // Wavefunction(s) for diagram number 895 - VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 0., 0., w_fp[65] ); + VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] ); // Amplitude(s) for diagram number 895 - VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 895 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18760,7 +18760,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 896 - VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 896 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18788,7 +18788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 897 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -18805,7 +18805,7 @@ namespace mg5amcCpu jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -18822,7 +18822,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -18846,7 +18846,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 898 - VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 898 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18874,7 +18874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 899 - VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 899 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18902,7 +18902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 900 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -18919,7 +18919,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -18936,7 +18936,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[107] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -18960,7 +18960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 901 - VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 901 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18988,7 +18988,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 902 - VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 902 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19013,12 +19013,12 @@ namespace mg5amcCpu // *** DIAGRAM 903 OF 1240 *** // Wavefunction(s) for diagram number 903 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[93] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 903 - VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -19035,7 +19035,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -19052,7 +19052,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -19073,12 +19073,12 @@ namespace mg5amcCpu // *** DIAGRAM 904 OF 1240 *** // Wavefunction(s) for diagram number 904 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[103] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[63] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 904 - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[19] -= amp_sv[0]; @@ -19095,7 +19095,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -19112,7 +19112,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -19133,12 +19133,12 @@ namespace mg5amcCpu // *** DIAGRAM 905 OF 1240 *** // Wavefunction(s) for diagram number 905 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 905 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -19155,7 +19155,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -19172,7 +19172,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -19196,7 +19196,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 906 - VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -19213,7 +19213,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -19230,7 +19230,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -19254,7 +19254,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 907 - VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -19271,7 +19271,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -19288,7 +19288,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[20] += amp_sv[0]; @@ -19312,7 +19312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 908 - VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 908 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19340,7 +19340,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 909 - VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 909 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19368,7 +19368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 910 - VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 910 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19396,7 +19396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 911 - VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 911 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19416,7 +19416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 912 - FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 912 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19432,7 +19432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 913 - FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 913 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19446,7 +19446,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 914 - FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 914 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19462,7 +19462,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 915 - FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 915 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19476,7 +19476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 916 - VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 916 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19496,7 +19496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 917 - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; @@ -19505,7 +19505,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &_fp[0] ); jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19514,7 +19514,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19530,7 +19530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 918 - FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 918 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19546,7 +19546,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 919 - FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 919 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19560,7 +19560,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 920 - FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 920 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19574,7 +19574,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 921 - VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 921 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19594,7 +19594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 922 - FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 922 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19610,7 +19610,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 923 - FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 923 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19624,7 +19624,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 924 - FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 924 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19640,7 +19640,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 925 - FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 925 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19654,7 +19654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 926 - VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 926 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19674,7 +19674,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 927 - FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19683,7 +19683,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19692,7 +19692,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19708,7 +19708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 928 - FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 928 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19724,7 +19724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 929 - FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 929 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19738,7 +19738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 930 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 930 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19752,7 +19752,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 931 - VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 931 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19772,7 +19772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 932 - FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 932 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19788,7 +19788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 933 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 933 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19802,7 +19802,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 934 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 934 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19818,7 +19818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 935 - FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 935 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19832,7 +19832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 936 - VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 936 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19852,7 +19852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 937 - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; @@ -19861,7 +19861,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19870,7 +19870,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19886,7 +19886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 938 - VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 938 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19906,7 +19906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 939 - FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 939 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19922,7 +19922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 940 - FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 940 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19936,7 +19936,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 941 - FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 941 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19952,7 +19952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 942 - FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 942 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19966,7 +19966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 943 - VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 943 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19986,7 +19986,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 944 - FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19995,7 +19995,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20004,7 +20004,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20020,7 +20020,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 945 - FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 945 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20040,7 +20040,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 946 - FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 946 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20060,7 +20060,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 947 - FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 947 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20080,7 +20080,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 948 - FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 948 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20100,7 +20100,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 949 - FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 949 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20116,7 +20116,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 950 - FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 950 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20129,10 +20129,10 @@ namespace mg5amcCpu // *** DIAGRAM 951 OF 1240 *** // Wavefunction(s) for diagram number 951 - VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 951 - VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 951 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20160,7 +20160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 952 - VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 952 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20188,7 +20188,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 953 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -20205,7 +20205,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -20222,7 +20222,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -20246,7 +20246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 954 - VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 954 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20274,7 +20274,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 955 - VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 955 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20302,7 +20302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 956 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -20319,7 +20319,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -20336,7 +20336,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -20360,7 +20360,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 957 - VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 957 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20388,7 +20388,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 958 - VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 958 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20413,12 +20413,12 @@ namespace mg5amcCpu // *** DIAGRAM 959 OF 1240 *** // Wavefunction(s) for diagram number 959 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[94] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[65] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 959 - VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -20435,7 +20435,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -20452,7 +20452,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -20473,12 +20473,12 @@ namespace mg5amcCpu // *** DIAGRAM 960 OF 1240 *** // Wavefunction(s) for diagram number 960 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[90] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[93] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[69] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 960 - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -20495,7 +20495,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -20512,7 +20512,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -20536,7 +20536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 961 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -20553,7 +20553,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; @@ -20570,7 +20570,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -20594,7 +20594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 962 - VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -20611,7 +20611,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; @@ -20628,7 +20628,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -20652,7 +20652,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 963 - VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -20669,7 +20669,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -20686,7 +20686,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[14] += amp_sv[0]; @@ -20710,7 +20710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 964 - VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 964 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20738,7 +20738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 965 - VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 965 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20766,7 +20766,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 966 - VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 966 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20794,7 +20794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 967 - VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 967 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20814,7 +20814,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 968 - FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 968 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20830,7 +20830,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 969 - FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 969 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20844,7 +20844,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 970 - FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 970 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20860,7 +20860,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 971 - FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 971 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20874,7 +20874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 972 - VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 972 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20894,7 +20894,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 973 - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20903,7 +20903,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &_fp[0] ); jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20912,7 +20912,7 @@ namespace mg5amcCpu jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20928,7 +20928,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 974 - FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 974 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20944,7 +20944,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 975 - FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 975 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20958,7 +20958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 976 - FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 976 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20972,7 +20972,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 977 - VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 977 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20992,7 +20992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 978 - FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 978 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21008,7 +21008,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 979 - FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 979 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21022,7 +21022,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 980 - FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 980 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21038,7 +21038,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 981 - FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 981 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21052,7 +21052,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 982 - VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 982 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21072,7 +21072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 983 - FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21081,7 +21081,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &_fp[0] ); jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21090,7 +21090,7 @@ namespace mg5amcCpu jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21106,7 +21106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 984 - FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 984 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21122,7 +21122,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 985 - FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 985 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21136,7 +21136,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 986 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 986 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21150,7 +21150,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 987 - VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 987 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21170,7 +21170,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 988 - FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 988 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21186,7 +21186,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 989 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 989 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21200,7 +21200,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 990 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 990 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21216,7 +21216,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 991 - FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 991 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21230,7 +21230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 992 - VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 992 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21250,7 +21250,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 993 - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21259,7 +21259,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21268,7 +21268,7 @@ namespace mg5amcCpu jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21284,7 +21284,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 994 - VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 994 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21304,7 +21304,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 995 - FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 995 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21320,7 +21320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 996 - FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 996 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21334,7 +21334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 997 - FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 997 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21350,7 +21350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 998 - FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 998 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21364,7 +21364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 999 - VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 999 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21384,7 +21384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1000 - FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21393,7 +21393,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21402,7 +21402,7 @@ namespace mg5amcCpu jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21418,7 +21418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1001 - FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1001 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21438,7 +21438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1002 - FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1002 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21458,7 +21458,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1003 - FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1003 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21478,7 +21478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1004 - FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1004 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21498,7 +21498,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1005 - FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1005 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21514,7 +21514,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1006 - FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1006 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21530,7 +21530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1007 - VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1007 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21558,7 +21558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1008 - VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1008 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21586,7 +21586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1009 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21603,7 +21603,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21620,7 +21620,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -21644,7 +21644,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1010 - VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1010 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21672,7 +21672,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1011 - VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1011 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21700,7 +21700,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1012 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -21717,7 +21717,7 @@ namespace mg5amcCpu jamp_sv[101] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[14] += amp_sv[0]; @@ -21734,7 +21734,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -21758,7 +21758,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1013 - VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1013 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21786,7 +21786,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1014 - VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1014 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21811,12 +21811,12 @@ namespace mg5amcCpu // *** DIAGRAM 1015 OF 1240 *** // Wavefunction(s) for diagram number 1015 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[11] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[76] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] ); // Amplitude(s) for diagram number 1015 - VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -21833,7 +21833,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -21850,7 +21850,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -21871,12 +21871,12 @@ namespace mg5amcCpu // *** DIAGRAM 1016 OF 1240 *** // Wavefunction(s) for diagram number 1016 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[97] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1016 - VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21893,7 +21893,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -21910,7 +21910,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -21934,7 +21934,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1017 - VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -21951,7 +21951,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -21968,7 +21968,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21992,7 +21992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1018 - VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -22009,7 +22009,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[28] -= amp_sv[0]; @@ -22026,7 +22026,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -22050,7 +22050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1019 - VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1019 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22078,7 +22078,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1020 - VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1020 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22106,7 +22106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1021 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22123,7 +22123,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22140,7 +22140,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -22164,7 +22164,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1022 - VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1022 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22192,7 +22192,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1023 - VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1023 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22220,7 +22220,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1024 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -22237,7 +22237,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[20] += amp_sv[0]; @@ -22254,7 +22254,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -22278,7 +22278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1025 - VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1025 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22306,7 +22306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1026 - VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1026 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22334,7 +22334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1027 - VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -22351,7 +22351,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -22368,7 +22368,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -22389,12 +22389,12 @@ namespace mg5amcCpu // *** DIAGRAM 1028 OF 1240 *** // Wavefunction(s) for diagram number 1028 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1028 - VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22411,7 +22411,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -22428,7 +22428,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -22452,7 +22452,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1029 - VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -22469,7 +22469,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -22486,7 +22486,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22510,7 +22510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1030 - VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; @@ -22527,7 +22527,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[26] -= amp_sv[0]; @@ -22544,7 +22544,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -22568,7 +22568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1031 - VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1031 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22596,7 +22596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1032 - VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1032 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22624,7 +22624,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1033 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -22641,7 +22641,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -22658,7 +22658,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -22682,7 +22682,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1034 - VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1034 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22710,7 +22710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1035 - VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1035 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22738,7 +22738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1036 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -22755,7 +22755,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -22772,7 +22772,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -22796,7 +22796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1037 - VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1037 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22824,7 +22824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1038 - VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1038 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22852,7 +22852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1039 - VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; @@ -22869,7 +22869,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -22886,7 +22886,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -22907,12 +22907,12 @@ namespace mg5amcCpu // *** DIAGRAM 1040 OF 1240 *** // Wavefunction(s) for diagram number 1040 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[76] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[11] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 1040 - VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -22929,7 +22929,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -22946,7 +22946,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[90] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -22970,7 +22970,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1041 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -22987,7 +22987,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -23004,7 +23004,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -23028,7 +23028,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1042 - VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23045,7 +23045,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -23062,7 +23062,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -23086,7 +23086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1043 - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23103,7 +23103,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23120,7 +23120,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23137,7 +23137,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23154,7 +23154,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23171,7 +23171,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23188,7 +23188,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -23205,7 +23205,7 @@ namespace mg5amcCpu jamp_sv[113] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -23222,7 +23222,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -23246,7 +23246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1044 - VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23263,7 +23263,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23280,7 +23280,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -23304,7 +23304,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1045 - VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23321,7 +23321,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23338,7 +23338,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -23362,7 +23362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1046 - FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1046 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23375,7 +23375,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1047 - FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1047 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23388,7 +23388,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1048 - FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1048 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23401,7 +23401,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1049 - FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1049 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23414,7 +23414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1050 - FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1050 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23427,7 +23427,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1051 - FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1051 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23440,7 +23440,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1052 - FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1052 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23453,7 +23453,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1053 - FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1053 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23466,7 +23466,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1054 - FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1054 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23479,7 +23479,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1055 - FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1055 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23492,7 +23492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1056 - FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1056 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23505,7 +23505,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1057 - FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1057 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23518,7 +23518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1058 - FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1058 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23534,7 +23534,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1059 - FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1059 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23548,7 +23548,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1060 - FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1060 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23564,7 +23564,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1061 - VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1061 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23584,7 +23584,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1062 - FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1062 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23598,7 +23598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1063 - VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1063 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23618,7 +23618,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1064 - FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -23627,7 +23627,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23636,7 +23636,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23652,7 +23652,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1065 - FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1065 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23665,7 +23665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1066 - FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1066 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23678,7 +23678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1067 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1067 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23691,7 +23691,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1068 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1068 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23704,7 +23704,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1069 - FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1069 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23717,7 +23717,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1070 - FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1070 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23730,7 +23730,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1071 - FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1071 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23743,7 +23743,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1072 - FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1072 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23756,7 +23756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1073 - FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1073 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23769,7 +23769,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1074 - FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1074 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23782,7 +23782,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1075 - FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1075 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23795,7 +23795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1076 - FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1076 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23808,7 +23808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1077 - FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1077 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23824,7 +23824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1078 - FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1078 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23838,7 +23838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1079 - FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1079 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23854,7 +23854,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1080 - VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1080 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23874,7 +23874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1081 - FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1081 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23888,7 +23888,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1082 - VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1082 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23908,7 +23908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1083 - FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; @@ -23917,7 +23917,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23926,7 +23926,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23942,7 +23942,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1084 - FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1084 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23955,7 +23955,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1085 - FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1085 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23968,7 +23968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1086 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1086 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23981,7 +23981,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1087 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1087 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23994,7 +23994,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1088 - FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1088 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24007,7 +24007,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1089 - FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1089 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24020,7 +24020,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1090 - FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1090 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24033,7 +24033,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1091 - FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1091 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24046,7 +24046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1092 - FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1092 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24059,7 +24059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1093 - FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1093 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24072,7 +24072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1094 - FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1094 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24085,7 +24085,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1095 - FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1095 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24098,7 +24098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1096 - FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1096 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24114,7 +24114,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1097 - FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1097 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24128,7 +24128,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1098 - FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1098 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24144,7 +24144,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1099 - VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1099 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24164,7 +24164,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1100 - FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1100 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24178,7 +24178,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1101 - VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24198,7 +24198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1102 - FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24207,7 +24207,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24216,7 +24216,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24232,7 +24232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1103 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24248,7 +24248,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1104 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24262,7 +24262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1105 - FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24278,7 +24278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1106 - VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24298,7 +24298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1107 - FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1107 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24312,7 +24312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1108 - VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24332,7 +24332,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1109 - FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24341,7 +24341,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24350,7 +24350,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24366,7 +24366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1110 - FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24382,7 +24382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1111 - FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24396,7 +24396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1112 - FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24412,7 +24412,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1113 - VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24432,7 +24432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1114 - FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1114 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24446,7 +24446,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1115 - VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1115 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24466,7 +24466,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1116 - FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24475,7 +24475,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24484,7 +24484,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24500,7 +24500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1117 - FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1117 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24516,7 +24516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1118 - FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1118 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24530,7 +24530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1119 - FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1119 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24546,7 +24546,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1120 - VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1120 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24566,7 +24566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1121 - FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1121 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24580,7 +24580,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1122 - VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1122 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24600,7 +24600,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1123 - FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24609,7 +24609,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24618,7 +24618,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24631,12 +24631,12 @@ namespace mg5amcCpu // *** DIAGRAM 1124 OF 1240 *** // Wavefunction(s) for diagram number 1124 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[97] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] ); // Amplitude(s) for diagram number 1124 - VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -24653,7 +24653,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24670,7 +24670,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24687,7 +24687,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24704,7 +24704,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24721,7 +24721,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24738,7 +24738,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24755,7 +24755,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -24772,7 +24772,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -24793,12 +24793,12 @@ namespace mg5amcCpu // *** DIAGRAM 1125 OF 1240 *** // Wavefunction(s) for diagram number 1125 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); - VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] ); // Amplitude(s) for diagram number 1125 - VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24815,7 +24815,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24832,7 +24832,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -24853,12 +24853,12 @@ namespace mg5amcCpu // *** DIAGRAM 1126 OF 1240 *** // Wavefunction(s) for diagram number 1126 - VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 0., 0., w_fp[111] ); + VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1126 - VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24875,7 +24875,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24892,7 +24892,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -24916,7 +24916,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1127 - VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -24933,7 +24933,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24950,7 +24950,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24971,22 +24971,22 @@ namespace mg5amcCpu // *** DIAGRAM 1128 OF 1240 *** // Wavefunction(s) for diagram number 1128 - FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); - FFV1_2( w_fp[3], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); // Amplitude(s) for diagram number 1128 - FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[90] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[91] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[90] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[94] += amp_sv[0]; @@ -24998,7 +24998,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1129 - FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25007,7 +25007,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25016,7 +25016,7 @@ namespace mg5amcCpu jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25032,17 +25032,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1130 - FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += amp_sv[0]; jamp_sv[74] -= amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[74] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; jamp_sv[84] += amp_sv[0]; @@ -25054,17 +25054,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1131 - FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[115] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; @@ -25076,7 +25076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1132 - FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25085,7 +25085,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25094,7 +25094,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25110,17 +25110,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1133 - FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += amp_sv[0]; jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[98] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; @@ -25129,22 +25129,22 @@ namespace mg5amcCpu // *** DIAGRAM 1134 OF 1240 *** // Wavefunction(s) for diagram number 1134 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); - FFV1_1( w_fp[2], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 1134 - FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[49] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -25156,7 +25156,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1135 - FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25165,7 +25165,7 @@ namespace mg5amcCpu jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25174,7 +25174,7 @@ namespace mg5amcCpu jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25190,17 +25190,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1136 - FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[48] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25212,7 +25212,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1137 - FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25221,7 +25221,7 @@ namespace mg5amcCpu jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25230,7 +25230,7 @@ namespace mg5amcCpu jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25246,7 +25246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1138 - FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25255,7 +25255,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25264,7 +25264,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25280,7 +25280,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1139 - FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25289,7 +25289,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25298,7 +25298,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25311,12 +25311,12 @@ namespace mg5amcCpu // *** DIAGRAM 1140 OF 1240 *** // Wavefunction(s) for diagram number 1140 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[68] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[29] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[10] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1140 - VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -25333,7 +25333,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25350,7 +25350,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25367,7 +25367,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25384,7 +25384,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25401,7 +25401,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25418,7 +25418,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25435,7 +25435,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25452,7 +25452,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25473,12 +25473,12 @@ namespace mg5amcCpu // *** DIAGRAM 1141 OF 1240 *** // Wavefunction(s) for diagram number 1141 - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[16] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[71] ); - VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1141 - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25495,7 +25495,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25512,7 +25512,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25533,12 +25533,12 @@ namespace mg5amcCpu // *** DIAGRAM 1142 OF 1240 *** // Wavefunction(s) for diagram number 1142 - VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 1142 - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25555,7 +25555,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25572,7 +25572,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25596,7 +25596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1143 - VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -25613,7 +25613,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25630,7 +25630,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25651,22 +25651,22 @@ namespace mg5amcCpu // *** DIAGRAM 1144 OF 1240 *** // Wavefunction(s) for diagram number 1144 - FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[111] ); - FFV1_2( w_fp[3], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] ); + FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1144 - FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[66] += amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[67] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[66] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; jamp_sv[70] += amp_sv[0]; @@ -25678,7 +25678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1145 - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25687,7 +25687,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25696,7 +25696,7 @@ namespace mg5amcCpu jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25712,17 +25712,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1146 - FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += amp_sv[0]; jamp_sv[50] -= amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[50] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[60] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; jamp_sv[60] += amp_sv[0]; @@ -25734,17 +25734,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1147 - FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[108] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[109] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[108] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; @@ -25756,7 +25756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1148 - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25765,7 +25765,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25774,7 +25774,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25790,17 +25790,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1149 - FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[100] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; @@ -25809,22 +25809,22 @@ namespace mg5amcCpu // *** DIAGRAM 1150 OF 1240 *** // Wavefunction(s) for diagram number 1150 - FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); // Amplitude(s) for diagram number 1150 - FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[73] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[73] += amp_sv[0]; @@ -25836,7 +25836,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1151 - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25845,7 +25845,7 @@ namespace mg5amcCpu jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25854,7 +25854,7 @@ namespace mg5amcCpu jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25870,17 +25870,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1152 - FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[72] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[72] += amp_sv[0]; @@ -25892,7 +25892,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1153 - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25901,7 +25901,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25910,7 +25910,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25926,7 +25926,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1154 - FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25935,7 +25935,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25944,7 +25944,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25960,7 +25960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1155 - FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25969,7 +25969,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25978,7 +25978,7 @@ namespace mg5amcCpu jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25991,12 +25991,12 @@ namespace mg5amcCpu // *** DIAGRAM 1156 OF 1240 *** // Wavefunction(s) for diagram number 1156 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[27] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1156 - VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -26013,7 +26013,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26030,7 +26030,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26047,7 +26047,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26064,7 +26064,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26081,7 +26081,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26098,7 +26098,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26115,7 +26115,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26132,7 +26132,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26153,12 +26153,12 @@ namespace mg5amcCpu // *** DIAGRAM 1157 OF 1240 *** // Wavefunction(s) for diagram number 1157 - VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 0., 0., w_fp[29] ); - VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1157 - VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26175,7 +26175,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26192,7 +26192,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26213,12 +26213,12 @@ namespace mg5amcCpu // *** DIAGRAM 1158 OF 1240 *** // Wavefunction(s) for diagram number 1158 - VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1158 - VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26235,7 +26235,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26252,7 +26252,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26276,7 +26276,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1159 - VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -26293,7 +26293,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26310,7 +26310,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26331,22 +26331,22 @@ namespace mg5amcCpu // *** DIAGRAM 1160 OF 1240 *** // Wavefunction(s) for diagram number 1160 - FFV1_2( w_fp[3], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); - FFV1_2( w_fp[3], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 1160 - FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[60] += amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[65] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[61] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[64] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[60] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; jamp_sv[64] += amp_sv[0]; @@ -26358,7 +26358,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1161 - FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26367,7 +26367,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26376,7 +26376,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26392,17 +26392,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1162 - FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] += amp_sv[0]; jamp_sv[52] -= amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[52] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[66] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; jamp_sv[66] += amp_sv[0]; @@ -26414,17 +26414,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1163 - FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[84] += amp_sv[0]; jamp_sv[85] -= amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[85] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[84] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; jamp_sv[88] += amp_sv[0]; @@ -26436,7 +26436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1164 - FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26445,7 +26445,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26454,7 +26454,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26470,17 +26470,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1165 - FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] += amp_sv[0]; jamp_sv[76] -= amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[76] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[90] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; jamp_sv[90] += amp_sv[0]; @@ -26489,22 +26489,22 @@ namespace mg5amcCpu // *** DIAGRAM 1166 OF 1240 *** // Wavefunction(s) for diagram number 1166 - FFV1_1( w_fp[2], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); - FFV1_1( w_fp[2], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1166 - FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; @@ -26516,7 +26516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1167 - FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26525,7 +26525,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26534,7 +26534,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26550,17 +26550,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1168 - FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; @@ -26572,7 +26572,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1169 - FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26581,7 +26581,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26590,7 +26590,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26606,7 +26606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1170 - FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26615,7 +26615,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26624,7 +26624,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26640,7 +26640,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1171 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26649,7 +26649,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26658,7 +26658,7 @@ namespace mg5amcCpu jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26671,25 +26671,25 @@ namespace mg5amcCpu // *** DIAGRAM 1172 OF 1240 *** // Wavefunction(s) for diagram number 1172 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[60] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1172 - FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[42] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[43] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[42] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; jamp_sv[46] += amp_sv[0]; @@ -26698,12 +26698,12 @@ namespace mg5amcCpu // *** DIAGRAM 1173 OF 1240 *** // Wavefunction(s) for diagram number 1173 - VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[68] ); - VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 1173 - FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26712,7 +26712,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26721,7 +26721,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26737,17 +26737,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1174 - FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[36] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -26756,22 +26756,22 @@ namespace mg5amcCpu // *** DIAGRAM 1175 OF 1240 *** // Wavefunction(s) for diagram number 1175 - FFV1_1( w_fp[2], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); - FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); - FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 1175 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[15] -= amp_sv[0]; jamp_sv[51] += amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[75] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[51] += amp_sv[0]; jamp_sv[75] += amp_sv[0]; @@ -26783,7 +26783,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1176 - FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26792,7 +26792,7 @@ namespace mg5amcCpu jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26801,7 +26801,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26817,17 +26817,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1177 - FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[101] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[115] += amp_sv[0]; @@ -26839,7 +26839,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1178 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26848,7 +26848,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26857,7 +26857,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26873,7 +26873,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1179 - FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26882,7 +26882,7 @@ namespace mg5amcCpu jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26891,7 +26891,7 @@ namespace mg5amcCpu jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26907,7 +26907,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1180 - VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[14] += amp_sv[0]; @@ -26924,7 +26924,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26941,7 +26941,7 @@ namespace mg5amcCpu jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26965,7 +26965,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1181 - VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -26982,7 +26982,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[14] -= amp_sv[0]; @@ -26999,7 +26999,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -27016,7 +27016,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27033,7 +27033,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -27050,7 +27050,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27067,7 +27067,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27084,7 +27084,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -27101,7 +27101,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27122,12 +27122,12 @@ namespace mg5amcCpu // *** DIAGRAM 1182 OF 1240 *** // Wavefunction(s) for diagram number 1182 - VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1182 - VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -27144,7 +27144,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27161,7 +27161,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27185,7 +27185,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1183 - VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -27202,7 +27202,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27219,7 +27219,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27243,7 +27243,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1184 - FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27252,7 +27252,7 @@ namespace mg5amcCpu jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27261,7 +27261,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27277,17 +27277,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1185 - FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[103] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; @@ -27299,7 +27299,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1186 - FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27308,7 +27308,7 @@ namespace mg5amcCpu jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27317,7 +27317,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27333,17 +27333,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1187 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[74] += amp_sv[0]; @@ -27352,25 +27352,25 @@ namespace mg5amcCpu // *** DIAGRAM 1188 OF 1240 *** // Wavefunction(s) for diagram number 1188 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[59] ); - FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); - FFV1_2( w_fp[3], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 1188 - FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[36] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[41] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[37] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[40] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[36] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; jamp_sv[40] += amp_sv[0]; @@ -27379,12 +27379,12 @@ namespace mg5amcCpu // *** DIAGRAM 1189 OF 1240 *** // Wavefunction(s) for diagram number 1189 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1189 - FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27393,7 +27393,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27402,7 +27402,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27418,17 +27418,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1190 - FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[42] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -27437,22 +27437,22 @@ namespace mg5amcCpu // *** DIAGRAM 1191 OF 1240 *** // Wavefunction(s) for diagram number 1191 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 1191 - FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[21] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; @@ -27464,7 +27464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1192 - FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27473,7 +27473,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27482,7 +27482,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27498,17 +27498,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1193 - FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] += amp_sv[0]; jamp_sv[77] -= amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[77] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; @@ -27520,7 +27520,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1194 - FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27529,7 +27529,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27538,7 +27538,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27554,7 +27554,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1195 - FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27563,7 +27563,7 @@ namespace mg5amcCpu jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27572,7 +27572,7 @@ namespace mg5amcCpu jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27588,7 +27588,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1196 - VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[20] += amp_sv[0]; @@ -27605,7 +27605,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -27622,7 +27622,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -27646,7 +27646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1197 - VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -27663,7 +27663,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[20] -= amp_sv[0]; @@ -27680,7 +27680,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -27697,7 +27697,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27714,7 +27714,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -27731,7 +27731,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27748,7 +27748,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27765,7 +27765,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -27782,7 +27782,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27803,12 +27803,12 @@ namespace mg5amcCpu // *** DIAGRAM 1198 OF 1240 *** // Wavefunction(s) for diagram number 1198 - VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1198 - VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -27825,7 +27825,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27842,7 +27842,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27866,7 +27866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1199 - VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -27883,7 +27883,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27900,7 +27900,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27924,7 +27924,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1200 - FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27933,7 +27933,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27942,7 +27942,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27958,17 +27958,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1201 - FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[78] += amp_sv[0]; jamp_sv[79] -= amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[79] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[78] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; jamp_sv[82] += amp_sv[0]; @@ -27980,7 +27980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1202 - FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27989,7 +27989,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27998,7 +27998,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28014,17 +28014,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1203 - FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; @@ -28033,25 +28033,25 @@ namespace mg5amcCpu // *** DIAGRAM 1204 OF 1240 *** // Wavefunction(s) for diagram number 1204 - VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); - VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[68] ); - VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[29] ); - FFV1_2( w_fp[3], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); - FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); + VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); + VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] ); + VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] ); + FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 1204 - FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[30] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[35] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[31] -= amp_sv[0]; jamp_sv[32] += amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[34] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[30] -= amp_sv[0]; jamp_sv[32] += amp_sv[0]; jamp_sv[34] += amp_sv[0]; @@ -28060,12 +28060,12 @@ namespace mg5amcCpu // *** DIAGRAM 1205 OF 1240 *** // Wavefunction(s) for diagram number 1205 - VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1205 - FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28074,7 +28074,7 @@ namespace mg5amcCpu jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28083,7 +28083,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28099,17 +28099,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1206 - FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[29] -= amp_sv[0]; jamp_sv[37] += amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[43] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] -= amp_sv[0]; jamp_sv[37] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -28118,22 +28118,22 @@ namespace mg5amcCpu // *** DIAGRAM 1207 OF 1240 *** // Wavefunction(s) for diagram number 1207 - FFV1_1( w_fp[2], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); - FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1207 - FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[23] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; jamp_sv[101] += amp_sv[0]; @@ -28145,7 +28145,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1208 - FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28154,7 +28154,7 @@ namespace mg5amcCpu jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28163,7 +28163,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28179,17 +28179,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1209 - FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] += amp_sv[0]; jamp_sv[53] -= amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[53] -= amp_sv[0]; jamp_sv[61] += amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[67] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] -= amp_sv[0]; jamp_sv[61] += amp_sv[0]; jamp_sv[67] += amp_sv[0]; @@ -28201,7 +28201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1210 - FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28210,7 +28210,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -28219,7 +28219,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -28235,7 +28235,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1211 - FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28244,7 +28244,7 @@ namespace mg5amcCpu jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28253,7 +28253,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -28269,7 +28269,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1212 - VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -28286,7 +28286,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[22] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -28303,7 +28303,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -28327,7 +28327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1213 - VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -28344,7 +28344,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -28361,7 +28361,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -28378,7 +28378,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28395,7 +28395,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -28412,7 +28412,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -28429,7 +28429,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28446,7 +28446,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -28463,7 +28463,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -28484,12 +28484,12 @@ namespace mg5amcCpu // *** DIAGRAM 1214 OF 1240 *** // Wavefunction(s) for diagram number 1214 - VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 0., 0., w_fp[61] ); - VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] ); + VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1214 - VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -28506,7 +28506,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28523,7 +28523,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28547,7 +28547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1215 - VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &_fp[0] ); jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -28564,7 +28564,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -28581,7 +28581,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -28605,7 +28605,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1216 - FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28614,7 +28614,7 @@ namespace mg5amcCpu jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28623,7 +28623,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28639,17 +28639,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1217 - FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[54] += amp_sv[0]; jamp_sv[55] -= amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[55] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[54] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[58] += amp_sv[0]; @@ -28661,7 +28661,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1218 - FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28670,7 +28670,7 @@ namespace mg5amcCpu jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28679,7 +28679,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28695,17 +28695,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1219 - FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; @@ -28717,7 +28717,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1220 - VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28734,7 +28734,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28751,7 +28751,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -28768,7 +28768,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28785,7 +28785,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28802,7 +28802,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -28819,7 +28819,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28836,7 +28836,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28853,7 +28853,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -28874,12 +28874,12 @@ namespace mg5amcCpu // *** DIAGRAM 1221 OF 1240 *** // Wavefunction(s) for diagram number 1221 - VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 0., 0., w_fp[1] ); - VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] ); + VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1221 - VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28896,7 +28896,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28913,7 +28913,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28937,7 +28937,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1222 - VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28954,7 +28954,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28971,7 +28971,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28995,7 +28995,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1223 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29004,7 +29004,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29013,7 +29013,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29029,17 +29029,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1224 - FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; @@ -29051,7 +29051,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1225 - FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29060,7 +29060,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29069,7 +29069,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29085,17 +29085,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1226 - FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[32] += amp_sv[0]; jamp_sv[38] -= amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[38] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[32] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[80] += amp_sv[0]; @@ -29107,7 +29107,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1227 - VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29124,7 +29124,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29141,7 +29141,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -29158,7 +29158,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29175,7 +29175,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29192,7 +29192,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -29209,7 +29209,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -29226,7 +29226,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -29243,7 +29243,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29264,12 +29264,12 @@ namespace mg5amcCpu // *** DIAGRAM 1228 OF 1240 *** // Wavefunction(s) for diagram number 1228 - VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 0., 0., w_fp[80] ); - VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 0., 0., w_fp[79] ); + VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] ); + VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] ); // Amplitude(s) for diagram number 1228 - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29286,7 +29286,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29303,7 +29303,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -29327,7 +29327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1229 - VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29344,7 +29344,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29361,7 +29361,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -29385,7 +29385,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1230 - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29394,7 +29394,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29403,7 +29403,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29419,17 +29419,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1231 - FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += amp_sv[0]; jamp_sv[73] -= amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; jamp_sv[76] += amp_sv[0]; @@ -29441,7 +29441,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1232 - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29450,7 +29450,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29459,7 +29459,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29475,17 +29475,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1233 - FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[34] += amp_sv[0]; jamp_sv[44] -= amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[44] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[34] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; @@ -29497,7 +29497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1234 - VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29514,7 +29514,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -29531,7 +29531,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -29548,7 +29548,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29565,7 +29565,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29582,7 +29582,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -29599,7 +29599,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -29616,7 +29616,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29633,7 +29633,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[107] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29654,12 +29654,12 @@ namespace mg5amcCpu // *** DIAGRAM 1235 OF 1240 *** // Wavefunction(s) for diagram number 1235 - VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 0., 0., w_fp[104] ); - VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 0., 0., w_fp[82] ); - VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 0., 0., w_fp[81] ); + VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] ); + VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] ); // Amplitude(s) for diagram number 1235 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29676,7 +29676,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29693,7 +29693,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -29717,7 +29717,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1236 - VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -29734,7 +29734,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29751,7 +29751,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29775,7 +29775,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1237 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29784,7 +29784,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29793,7 +29793,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29809,17 +29809,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1238 - FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += amp_sv[0]; jamp_sv[49] -= amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[52] += amp_sv[0]; @@ -29831,7 +29831,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1239 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29840,7 +29840,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29849,7 +29849,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29865,17 +29865,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1240 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[40] += amp_sv[0]; jamp_sv[46] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[46] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[40] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; diff --git a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h index 9cea8bcbe7..9b946c21e1 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -934,6 +940,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -945,6 +952,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -970,6 +979,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -984,6 +994,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -995,6 +1006,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1008,6 +1020,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1042,6 +1055,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1080,6 +1094,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1103,6 +1118,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1134,6 +1150,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1165,6 +1182,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1197,6 +1215,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1225,6 +1244,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1260,6 +1280,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1288,6 +1309,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1323,6 +1345,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1351,6 +1374,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index c782410e30..2ecbe5782c 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005506038665771484  +DEBUG: model prefixing takes 0.005434513092041016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.841 s +1 processes with 1240 diagrams generated in 1.849 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -174,9 +174,9 @@ INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.445 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.440 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -184,7 +184,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.341 s +ALOHA: aloha creates 5 routines in 0.340 s VVV1 VVV1 FFV1 @@ -208,6 +208,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m12.751s -user 0m12.589s -sys 0m0.108s +real 0m12.780s +user 0m12.635s +sys 0m0.096s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc index aa20dc7f78..a67b74e5b7 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc @@ -252,13 +252,13 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][6], +1, w_fp[6], 6 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); - VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 0., 0., w_fp[9] ); - VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -282,10 +282,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 1240 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 3 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -332,7 +332,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -352,7 +352,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -376,11 +376,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 1240 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 0., 0., w_fp[12] ); - VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 0., 0., w_fp[13] ); + VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -407,7 +407,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -434,7 +434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -454,7 +454,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -474,7 +474,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -498,10 +498,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 1240 *** // Wavefunction(s) for diagram number 7 - VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 0., 0., w_fp[14] ); + VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 7 - VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -528,7 +528,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -555,7 +555,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -575,7 +575,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -595,7 +595,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -619,12 +619,12 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 1240 *** // Wavefunction(s) for diagram number 10 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -644,7 +644,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -664,7 +664,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -688,12 +688,12 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 1240 *** // Wavefunction(s) for diagram number 11 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[18] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[20] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 11 - VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -713,7 +713,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -733,7 +733,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -757,12 +757,12 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 1240 *** // Wavefunction(s) for diagram number 12 - VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); + VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -782,7 +782,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -802,7 +802,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -826,10 +826,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 1240 *** // Wavefunction(s) for diagram number 13 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 13 - VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -849,7 +849,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -869,7 +869,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -893,10 +893,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 1240 *** // Wavefunction(s) for diagram number 14 - VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 14 - VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -920,10 +920,10 @@ namespace mg5amcCpu // *** DIAGRAM 15 OF 1240 *** // Wavefunction(s) for diagram number 15 - VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 0., 0., w_fp[26] ); + VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] ); // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -950,7 +950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -974,10 +974,10 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 1240 *** // Wavefunction(s) for diagram number 17 - VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] ); // Amplitude(s) for diagram number 17 - VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -997,7 +997,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1017,7 +1017,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1044,7 +1044,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1068,10 +1068,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 1240 *** // Wavefunction(s) for diagram number 19 - VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 0., 0., w_fp[28] ); + VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] ); // Amplitude(s) for diagram number 19 - VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1098,7 +1098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1122,10 +1122,10 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 1240 *** // Wavefunction(s) for diagram number 21 - VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 21 - VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1145,7 +1145,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1165,7 +1165,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1192,7 +1192,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1219,7 +1219,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1243,10 +1243,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 1240 *** // Wavefunction(s) for diagram number 24 - VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 24 - VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1270,12 +1270,12 @@ namespace mg5amcCpu // *** DIAGRAM 25 OF 1240 *** // Wavefunction(s) for diagram number 25 - VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[30] ); - VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[31] ); - VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[32] ); + VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] ); + VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] ); + VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] ); // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1295,7 +1295,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1315,7 +1315,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1339,12 +1339,12 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 1240 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[33] ); - FFV1_2( w_fp[3], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[33], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[35] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] ); + FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1354,10 +1354,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 1240 *** // Wavefunction(s) for diagram number 27 - FFV1_1( w_fp[33], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[36] ); + FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1367,10 +1367,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 1240 *** // Wavefunction(s) for diagram number 28 - FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 0., 0., w_fp[37] ); + FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] ); // Amplitude(s) for diagram number 28 - VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1389,7 +1389,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1404,7 +1404,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1423,7 +1423,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1438,7 +1438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1450,7 +1450,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1462,7 +1462,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1478,11 +1478,11 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 1240 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[38] ); - FFV1_1( w_fp[33], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] ); + FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1492,10 +1492,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 1240 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[38], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1508,7 +1508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1520,10 +1520,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 1240 *** // Wavefunction(s) for diagram number 36 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[41] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] ); // Amplitude(s) for diagram number 36 - FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1533,10 +1533,10 @@ namespace mg5amcCpu // *** DIAGRAM 37 OF 1240 *** // Wavefunction(s) for diagram number 37 - FFV1_2( w_fp[41], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[42] ); + FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] ); // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1549,7 +1549,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1564,7 +1564,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1579,7 +1579,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1594,7 +1594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1610,11 +1610,11 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 1240 *** // Wavefunction(s) for diagram number 42 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); - FFV1_1( w_fp[39], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[43] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); + FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1624,10 +1624,10 @@ namespace mg5amcCpu // *** DIAGRAM 43 OF 1240 *** // Wavefunction(s) for diagram number 43 - FFV1_1( w_fp[39], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[44] ); + FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] ); // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1637,10 +1637,10 @@ namespace mg5amcCpu // *** DIAGRAM 44 OF 1240 *** // Wavefunction(s) for diagram number 44 - FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 0., 0., w_fp[45] ); + FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] ); // Amplitude(s) for diagram number 44 - VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1659,7 +1659,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1674,7 +1674,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1693,7 +1693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1708,7 +1708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1720,7 +1720,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1732,7 +1732,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1748,11 +1748,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 1240 *** // Wavefunction(s) for diagram number 49 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[46] ); - FFV1_1( w_fp[39], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] ); + FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1762,10 +1762,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 1240 *** // Wavefunction(s) for diagram number 50 - FFV1_2( w_fp[46], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1778,7 +1778,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1793,7 +1793,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1806,7 +1806,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1819,7 +1819,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1834,7 +1834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1849,7 +1849,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1864,7 +1864,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1880,11 +1880,11 @@ namespace mg5amcCpu // *** DIAGRAM 58 OF 1240 *** // Wavefunction(s) for diagram number 58 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); - FFV1_1( w_fp[47], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[49] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); + FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] ); // Amplitude(s) for diagram number 58 - FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1894,10 +1894,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 1240 *** // Wavefunction(s) for diagram number 59 - FFV1_1( w_fp[47], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[50] ); + FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] ); // Amplitude(s) for diagram number 59 - FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1907,10 +1907,10 @@ namespace mg5amcCpu // *** DIAGRAM 60 OF 1240 *** // Wavefunction(s) for diagram number 60 - FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 0., 0., w_fp[51] ); + FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] ); // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1929,7 +1929,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1944,7 +1944,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1963,7 +1963,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1978,7 +1978,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1990,7 +1990,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2002,7 +2002,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2018,10 +2018,10 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 1240 *** // Wavefunction(s) for diagram number 65 - FFV1_1( w_fp[47], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2034,7 +2034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2047,7 +2047,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2062,7 +2062,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2075,7 +2075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2088,7 +2088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2103,7 +2103,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2118,7 +2118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2133,7 +2133,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2149,11 +2149,11 @@ namespace mg5amcCpu // *** DIAGRAM 74 OF 1240 *** // Wavefunction(s) for diagram number 74 - FFV1_1( w_fp[2], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); - FFV1_2( w_fp[46], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); + FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 74 - FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2163,10 +2163,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 1240 *** // Wavefunction(s) for diagram number 75 - FFV1_2( w_fp[46], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[53] ); + FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] ); // Amplitude(s) for diagram number 75 - FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2176,10 +2176,10 @@ namespace mg5amcCpu // *** DIAGRAM 76 OF 1240 *** // Wavefunction(s) for diagram number 76 - FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 0., 0., w_fp[54] ); + FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] ); // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2198,7 +2198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2213,7 +2213,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2232,7 +2232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2247,7 +2247,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2259,7 +2259,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2271,7 +2271,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2290,7 +2290,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2305,7 +2305,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2320,7 +2320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2336,10 +2336,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 1240 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[38], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[25] ); + FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2349,10 +2349,10 @@ namespace mg5amcCpu // *** DIAGRAM 85 OF 1240 *** // Wavefunction(s) for diagram number 85 - FFV1_2( w_fp[38], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2362,10 +2362,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 1240 *** // Wavefunction(s) for diagram number 86 - FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 0., 0., w_fp[23] ); + FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2384,7 +2384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2399,7 +2399,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 88 - VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2418,7 +2418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2433,7 +2433,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2445,7 +2445,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2457,7 +2457,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2476,7 +2476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2491,7 +2491,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2506,7 +2506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2522,10 +2522,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 1240 *** // Wavefunction(s) for diagram number 94 - FFV1_2( w_fp[41], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[28] ); + FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] ); // Amplitude(s) for diagram number 94 - FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2535,10 +2535,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 1240 *** // Wavefunction(s) for diagram number 95 - FFV1_2( w_fp[41], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 95 - FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2548,10 +2548,10 @@ namespace mg5amcCpu // *** DIAGRAM 96 OF 1240 *** // Wavefunction(s) for diagram number 96 - FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 0., 0., w_fp[20] ); + FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 96 - VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2570,7 +2570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2585,7 +2585,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2604,7 +2604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2619,7 +2619,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2631,7 +2631,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2643,7 +2643,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2662,7 +2662,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 101 - FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2677,7 +2677,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2692,7 +2692,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2708,10 +2708,10 @@ namespace mg5amcCpu // *** DIAGRAM 104 OF 1240 *** // Wavefunction(s) for diagram number 104 - FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[26] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] ); // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2723,10 +2723,10 @@ namespace mg5amcCpu // *** DIAGRAM 105 OF 1240 *** // Wavefunction(s) for diagram number 105 - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[42] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] ); // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2742,10 +2742,10 @@ namespace mg5amcCpu // *** DIAGRAM 106 OF 1240 *** // Wavefunction(s) for diagram number 106 - FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2760,7 +2760,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2779,7 +2779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2798,7 +2798,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2814,10 +2814,10 @@ namespace mg5amcCpu // *** DIAGRAM 110 OF 1240 *** // Wavefunction(s) for diagram number 110 - FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2829,10 +2829,10 @@ namespace mg5amcCpu // *** DIAGRAM 111 OF 1240 *** // Wavefunction(s) for diagram number 111 - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2848,10 +2848,10 @@ namespace mg5amcCpu // *** DIAGRAM 112 OF 1240 *** // Wavefunction(s) for diagram number 112 - FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2866,7 +2866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2885,7 +2885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 114 - FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2904,7 +2904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2920,10 +2920,10 @@ namespace mg5amcCpu // *** DIAGRAM 116 OF 1240 *** // Wavefunction(s) for diagram number 116 - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2935,10 +2935,10 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 1240 *** // Wavefunction(s) for diagram number 117 - VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 0., 0., w_fp[19] ); + VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] ); // Amplitude(s) for diagram number 117 - FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2954,10 +2954,10 @@ namespace mg5amcCpu // *** DIAGRAM 118 OF 1240 *** // Wavefunction(s) for diagram number 118 - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[18] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] ); // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2972,7 +2972,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2991,7 +2991,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3010,7 +3010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3029,7 +3029,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3041,7 +3041,7 @@ namespace mg5amcCpu jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3053,7 +3053,7 @@ namespace mg5amcCpu jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3072,7 +3072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3084,7 +3084,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3096,7 +3096,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3112,13 +3112,13 @@ namespace mg5amcCpu // *** DIAGRAM 124 OF 1240 *** // Wavefunction(s) for diagram number 124 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); - FFV1_1( w_fp[34], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[52], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 124 - FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3127,10 +3127,10 @@ namespace mg5amcCpu // *** DIAGRAM 125 OF 1240 *** // Wavefunction(s) for diagram number 125 - FFV1_2( w_fp[52], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 125 - FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3139,11 +3139,11 @@ namespace mg5amcCpu // *** DIAGRAM 126 OF 1240 *** // Wavefunction(s) for diagram number 126 - FFV1_1( w_fp[34], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[55] ); - FFV1_2( w_fp[52], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[56] ); + FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] ); + FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] ); // Amplitude(s) for diagram number 126 - FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3155,7 +3155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 127 - FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3164,10 +3164,10 @@ namespace mg5amcCpu // *** DIAGRAM 128 OF 1240 *** // Wavefunction(s) for diagram number 128 - FFV1_1( w_fp[34], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[57] ); + FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] ); // Amplitude(s) for diagram number 128 - FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3179,7 +3179,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 129 - FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3188,10 +3188,10 @@ namespace mg5amcCpu // *** DIAGRAM 130 OF 1240 *** // Wavefunction(s) for diagram number 130 - FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 0., 0., w_fp[58] ); + FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] ); // Amplitude(s) for diagram number 130 - VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3203,10 +3203,10 @@ namespace mg5amcCpu // *** DIAGRAM 131 OF 1240 *** // Wavefunction(s) for diagram number 131 - FFV1_1( w_fp[34], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); // Amplitude(s) for diagram number 131 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3219,7 +3219,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 132 - FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3232,7 +3232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 133 - VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3244,10 +3244,10 @@ namespace mg5amcCpu // *** DIAGRAM 134 OF 1240 *** // Wavefunction(s) for diagram number 134 - FFV1_1( w_fp[34], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 134 - FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3260,7 +3260,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 135 - FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3273,7 +3273,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 136 - VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3288,7 +3288,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 137 - FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3298,10 +3298,10 @@ namespace mg5amcCpu // *** DIAGRAM 138 OF 1240 *** // Wavefunction(s) for diagram number 138 - FFV1_1( w_fp[34], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); + FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 138 - FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3314,7 +3314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 139 - FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3322,7 +3322,7 @@ namespace mg5amcCpu jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3330,7 +3330,7 @@ namespace mg5amcCpu jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3342,12 +3342,12 @@ namespace mg5amcCpu // *** DIAGRAM 140 OF 1240 *** // Wavefunction(s) for diagram number 140 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[61] ); - FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 0., 0., w_fp[63] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] ); + FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 140 - VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3363,10 +3363,10 @@ namespace mg5amcCpu // *** DIAGRAM 141 OF 1240 *** // Wavefunction(s) for diagram number 141 - VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 0., 0., w_fp[64] ); + VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] ); // Amplitude(s) for diagram number 141 - VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3385,7 +3385,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 142 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3397,7 +3397,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3409,7 +3409,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3425,10 +3425,10 @@ namespace mg5amcCpu // *** DIAGRAM 143 OF 1240 *** // Wavefunction(s) for diagram number 143 - FFV1_2( w_fp[3], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[65] ); + FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] ); // Amplitude(s) for diagram number 143 - FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3441,7 +3441,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 144 - FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3456,7 +3456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 145 - FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3469,7 +3469,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 146 - FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3481,10 +3481,10 @@ namespace mg5amcCpu // *** DIAGRAM 147 OF 1240 *** // Wavefunction(s) for diagram number 147 - FFV1_1( w_fp[34], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); + FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 147 - FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3494,10 +3494,10 @@ namespace mg5amcCpu // *** DIAGRAM 148 OF 1240 *** // Wavefunction(s) for diagram number 148 - FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 0., 0., w_fp[67] ); + FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 148 - VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3512,7 +3512,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 149 - FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3525,7 +3525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 150 - FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3535,10 +3535,10 @@ namespace mg5amcCpu // *** DIAGRAM 151 OF 1240 *** // Wavefunction(s) for diagram number 151 - FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 0., 0., w_fp[68] ); + FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 151 - VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3553,7 +3553,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 152 - FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3566,7 +3566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 153 - FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3581,7 +3581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 154 - VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3600,7 +3600,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 155 - FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3612,11 +3612,11 @@ namespace mg5amcCpu // *** DIAGRAM 156 OF 1240 *** // Wavefunction(s) for diagram number 156 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 0., 0., w_fp[69] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 156 - VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3632,10 +3632,10 @@ namespace mg5amcCpu // *** DIAGRAM 157 OF 1240 *** // Wavefunction(s) for diagram number 157 - VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 0., 0., w_fp[70] ); + VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] ); // Amplitude(s) for diagram number 157 - VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3654,7 +3654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 158 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3666,7 +3666,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3678,7 +3678,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3694,10 +3694,10 @@ namespace mg5amcCpu // *** DIAGRAM 159 OF 1240 *** // Wavefunction(s) for diagram number 159 - FFV1_2( w_fp[3], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 159 - FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3710,7 +3710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 160 - FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3725,7 +3725,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 161 - FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3738,7 +3738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 162 - FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3750,10 +3750,10 @@ namespace mg5amcCpu // *** DIAGRAM 163 OF 1240 *** // Wavefunction(s) for diagram number 163 - FFV1_1( w_fp[34], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); + FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 163 - FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3763,10 +3763,10 @@ namespace mg5amcCpu // *** DIAGRAM 164 OF 1240 *** // Wavefunction(s) for diagram number 164 - FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 0., 0., w_fp[73] ); + FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] ); // Amplitude(s) for diagram number 164 - VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3781,7 +3781,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 165 - FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3794,7 +3794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 166 - FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3807,7 +3807,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 167 - VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3822,7 +3822,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 168 - FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3835,7 +3835,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 169 - FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3850,7 +3850,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 170 - VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3869,7 +3869,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 171 - FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3881,11 +3881,11 @@ namespace mg5amcCpu // *** DIAGRAM 172 OF 1240 *** // Wavefunction(s) for diagram number 172 - VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 0., 0., w_fp[74] ); + VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] ); // Amplitude(s) for diagram number 172 - VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3901,10 +3901,10 @@ namespace mg5amcCpu // *** DIAGRAM 173 OF 1240 *** // Wavefunction(s) for diagram number 173 - VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 0., 0., w_fp[75] ); + VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] ); // Amplitude(s) for diagram number 173 - VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3923,7 +3923,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 174 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3935,7 +3935,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3947,7 +3947,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3963,10 +3963,10 @@ namespace mg5amcCpu // *** DIAGRAM 175 OF 1240 *** // Wavefunction(s) for diagram number 175 - FFV1_2( w_fp[3], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[76] ); + FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] ); // Amplitude(s) for diagram number 175 - FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3979,7 +3979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 176 - FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3994,7 +3994,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 177 - FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4007,7 +4007,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 178 - FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4019,10 +4019,10 @@ namespace mg5amcCpu // *** DIAGRAM 179 OF 1240 *** // Wavefunction(s) for diagram number 179 - FFV1_1( w_fp[34], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 179 - FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4035,7 +4035,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 180 - VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4050,7 +4050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 181 - FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4063,7 +4063,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 182 - FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4076,7 +4076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 183 - VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4091,7 +4091,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 184 - FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4104,7 +4104,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 185 - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4119,7 +4119,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 186 - VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4138,7 +4138,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 187 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4150,10 +4150,10 @@ namespace mg5amcCpu // *** DIAGRAM 188 OF 1240 *** // Wavefunction(s) for diagram number 188 - FFV1_1( w_fp[34], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 188 - FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4165,7 +4165,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 189 - FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4174,10 +4174,10 @@ namespace mg5amcCpu // *** DIAGRAM 190 OF 1240 *** // Wavefunction(s) for diagram number 190 - FFV1_2( w_fp[46], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[78] ); + FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] ); // Amplitude(s) for diagram number 190 - FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4189,7 +4189,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 191 - FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4201,7 +4201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 192 - FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4213,7 +4213,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 193 - FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4225,7 +4225,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 194 - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4238,7 +4238,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 195 - VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4253,7 +4253,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 196 - FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4266,7 +4266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 197 - FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4278,7 +4278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 198 - FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4287,10 +4287,10 @@ namespace mg5amcCpu // *** DIAGRAM 199 OF 1240 *** // Wavefunction(s) for diagram number 199 - FFV1_2( w_fp[38], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); + FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 199 - FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4302,7 +4302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 200 - FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4314,7 +4314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 201 - FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4326,7 +4326,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 202 - FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4338,7 +4338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 203 - FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4351,7 +4351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 204 - VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4366,7 +4366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 205 - FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4379,7 +4379,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 206 - FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4391,7 +4391,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 207 - FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4400,10 +4400,10 @@ namespace mg5amcCpu // *** DIAGRAM 208 OF 1240 *** // Wavefunction(s) for diagram number 208 - FFV1_2( w_fp[41], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 208 - FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4415,7 +4415,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 209 - FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4427,7 +4427,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 210 - FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4439,7 +4439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 211 - FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4451,7 +4451,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 212 - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4464,7 +4464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 213 - VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4479,7 +4479,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 214 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4492,7 +4492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 215 - FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4505,7 +4505,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 216 - FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4517,10 +4517,10 @@ namespace mg5amcCpu // *** DIAGRAM 217 OF 1240 *** // Wavefunction(s) for diagram number 217 - VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] ); // Amplitude(s) for diagram number 217 - VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4539,7 +4539,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 218 - VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4558,7 +4558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 219 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4570,7 +4570,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4582,7 +4582,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4601,7 +4601,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 220 - FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4616,7 +4616,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 221 - FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4629,7 +4629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 222 - FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4642,7 +4642,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 223 - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4654,10 +4654,10 @@ namespace mg5amcCpu // *** DIAGRAM 224 OF 1240 *** // Wavefunction(s) for diagram number 224 - VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 224 - VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4676,7 +4676,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 225 - VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4695,7 +4695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 226 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4707,7 +4707,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4719,7 +4719,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4738,7 +4738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 227 - FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4753,7 +4753,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 228 - FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4766,7 +4766,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 229 - FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4779,7 +4779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 230 - FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4791,10 +4791,10 @@ namespace mg5amcCpu // *** DIAGRAM 231 OF 1240 *** // Wavefunction(s) for diagram number 231 - VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 0., 0., w_fp[67] ); + VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 231 - VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4813,7 +4813,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 232 - VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4832,7 +4832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 233 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4844,7 +4844,7 @@ namespace mg5amcCpu jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4856,7 +4856,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4875,7 +4875,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 234 - FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4890,7 +4890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 235 - FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4900,12 +4900,12 @@ namespace mg5amcCpu // *** DIAGRAM 236 OF 1240 *** // Wavefunction(s) for diagram number 236 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[73] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[79] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[80] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] ); // Amplitude(s) for diagram number 236 - VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4917,7 +4917,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4929,7 +4929,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4948,7 +4948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 237 - FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4956,7 +4956,7 @@ namespace mg5amcCpu jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4964,7 +4964,7 @@ namespace mg5amcCpu jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4979,7 +4979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 238 - FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4987,7 +4987,7 @@ namespace mg5amcCpu jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4995,7 +4995,7 @@ namespace mg5amcCpu jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5007,12 +5007,12 @@ namespace mg5amcCpu // *** DIAGRAM 239 OF 1240 *** // Wavefunction(s) for diagram number 239 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[57] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[81] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[82] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] ); // Amplitude(s) for diagram number 239 - VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5024,7 +5024,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5036,7 +5036,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5055,7 +5055,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 240 - FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5063,7 +5063,7 @@ namespace mg5amcCpu jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5071,7 +5071,7 @@ namespace mg5amcCpu jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5086,7 +5086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 241 - FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5094,7 +5094,7 @@ namespace mg5amcCpu jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5102,7 +5102,7 @@ namespace mg5amcCpu jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5114,12 +5114,12 @@ namespace mg5amcCpu // *** DIAGRAM 242 OF 1240 *** // Wavefunction(s) for diagram number 242 - VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[55] ); - VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[83] ); - VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[84] ); + VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] ); + VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] ); + VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] ); // Amplitude(s) for diagram number 242 - VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5131,7 +5131,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5143,7 +5143,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5162,7 +5162,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 243 - FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5170,7 +5170,7 @@ namespace mg5amcCpu jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5178,7 +5178,7 @@ namespace mg5amcCpu jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5193,7 +5193,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 244 - FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5201,7 +5201,7 @@ namespace mg5amcCpu jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5209,7 +5209,7 @@ namespace mg5amcCpu jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5224,7 +5224,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 245 - FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5232,7 +5232,7 @@ namespace mg5amcCpu jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5240,7 +5240,7 @@ namespace mg5amcCpu jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5255,7 +5255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 246 - VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5267,7 +5267,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5279,7 +5279,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5295,13 +5295,13 @@ namespace mg5amcCpu // *** DIAGRAM 247 OF 1240 *** // Wavefunction(s) for diagram number 247 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); - FFV1_2( w_fp[62], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[77], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 247 - FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5310,10 +5310,10 @@ namespace mg5amcCpu // *** DIAGRAM 248 OF 1240 *** // Wavefunction(s) for diagram number 248 - FFV1_1( w_fp[77], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[85] ); + FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] ); // Amplitude(s) for diagram number 248 - FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5322,11 +5322,11 @@ namespace mg5amcCpu // *** DIAGRAM 249 OF 1240 *** // Wavefunction(s) for diagram number 249 - FFV1_2( w_fp[62], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); - FFV1_1( w_fp[77], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[87] ); + FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); + FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] ); // Amplitude(s) for diagram number 249 - FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5338,7 +5338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 250 - FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5347,10 +5347,10 @@ namespace mg5amcCpu // *** DIAGRAM 251 OF 1240 *** // Wavefunction(s) for diagram number 251 - FFV1_2( w_fp[62], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 251 - FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5362,7 +5362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 252 - FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5371,10 +5371,10 @@ namespace mg5amcCpu // *** DIAGRAM 253 OF 1240 *** // Wavefunction(s) for diagram number 253 - FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 0., 0., w_fp[89] ); + FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] ); // Amplitude(s) for diagram number 253 - VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5386,10 +5386,10 @@ namespace mg5amcCpu // *** DIAGRAM 254 OF 1240 *** // Wavefunction(s) for diagram number 254 - FFV1_2( w_fp[62], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 254 - FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5402,7 +5402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 255 - FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5415,7 +5415,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 256 - VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5427,10 +5427,10 @@ namespace mg5amcCpu // *** DIAGRAM 257 OF 1240 *** // Wavefunction(s) for diagram number 257 - FFV1_2( w_fp[62], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); + FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 257 - FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5443,7 +5443,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 258 - FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5456,7 +5456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 259 - VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5471,7 +5471,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 260 - FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5481,10 +5481,10 @@ namespace mg5amcCpu // *** DIAGRAM 261 OF 1240 *** // Wavefunction(s) for diagram number 261 - FFV1_2( w_fp[62], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); + FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 261 - FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5497,7 +5497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 262 - FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5505,7 +5505,7 @@ namespace mg5amcCpu jamp_sv[35] -= amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5513,7 +5513,7 @@ namespace mg5amcCpu jamp_sv[39] += amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[45] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5525,10 +5525,10 @@ namespace mg5amcCpu // *** DIAGRAM 263 OF 1240 *** // Wavefunction(s) for diagram number 263 - FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 0., 0., w_fp[92] ); + FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] ); // Amplitude(s) for diagram number 263 - VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5547,7 +5547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 264 - VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5566,7 +5566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 265 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5578,7 +5578,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5590,7 +5590,7 @@ namespace mg5amcCpu jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5606,10 +5606,10 @@ namespace mg5amcCpu // *** DIAGRAM 266 OF 1240 *** // Wavefunction(s) for diagram number 266 - FFV1_1( w_fp[2], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[93] ); + FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] ); // Amplitude(s) for diagram number 266 - FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5622,7 +5622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 267 - FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5637,7 +5637,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 268 - FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5650,7 +5650,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 269 - FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5662,10 +5662,10 @@ namespace mg5amcCpu // *** DIAGRAM 270 OF 1240 *** // Wavefunction(s) for diagram number 270 - FFV1_2( w_fp[62], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); + FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 270 - FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5675,10 +5675,10 @@ namespace mg5amcCpu // *** DIAGRAM 271 OF 1240 *** // Wavefunction(s) for diagram number 271 - FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 0., 0., w_fp[95] ); + FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 271 - VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5693,7 +5693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 272 - FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5706,7 +5706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 273 - FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5716,10 +5716,10 @@ namespace mg5amcCpu // *** DIAGRAM 274 OF 1240 *** // Wavefunction(s) for diagram number 274 - FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 0., 0., w_fp[96] ); + FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 274 - VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5734,7 +5734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 275 - FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5747,7 +5747,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 276 - FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5762,7 +5762,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 277 - VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5781,7 +5781,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 278 - FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5796,7 +5796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 279 - VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5815,7 +5815,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 280 - VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5834,7 +5834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 281 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5846,7 +5846,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5858,7 +5858,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5874,10 +5874,10 @@ namespace mg5amcCpu // *** DIAGRAM 282 OF 1240 *** // Wavefunction(s) for diagram number 282 - FFV1_1( w_fp[2], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); + FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 282 - FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5890,7 +5890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 283 - FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5905,7 +5905,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 284 - FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5918,7 +5918,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 285 - FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5930,10 +5930,10 @@ namespace mg5amcCpu // *** DIAGRAM 286 OF 1240 *** // Wavefunction(s) for diagram number 286 - FFV1_2( w_fp[62], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); + FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 286 - FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5943,10 +5943,10 @@ namespace mg5amcCpu // *** DIAGRAM 287 OF 1240 *** // Wavefunction(s) for diagram number 287 - FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 0., 0., w_fp[98] ); + FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 287 - VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5961,7 +5961,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 288 - FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5974,7 +5974,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 289 - FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5987,7 +5987,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 290 - VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6002,7 +6002,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 291 - FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6015,7 +6015,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 292 - FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6030,7 +6030,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 293 - VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6049,7 +6049,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 294 - FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6064,7 +6064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 295 - VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6083,7 +6083,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 296 - VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6102,7 +6102,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 297 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6114,7 +6114,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6126,7 +6126,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6142,10 +6142,10 @@ namespace mg5amcCpu // *** DIAGRAM 298 OF 1240 *** // Wavefunction(s) for diagram number 298 - FFV1_1( w_fp[2], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); + FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 298 - FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6158,7 +6158,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 299 - FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6173,7 +6173,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 300 - FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6186,7 +6186,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 301 - FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6198,10 +6198,10 @@ namespace mg5amcCpu // *** DIAGRAM 302 OF 1240 *** // Wavefunction(s) for diagram number 302 - FFV1_2( w_fp[62], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 302 - FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6214,7 +6214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 303 - VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6229,7 +6229,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 304 - FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6242,7 +6242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 305 - FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6255,7 +6255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 306 - VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6270,7 +6270,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 307 - FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6283,7 +6283,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 308 - FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6298,7 +6298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 309 - VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6317,7 +6317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 310 - FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6329,10 +6329,10 @@ namespace mg5amcCpu // *** DIAGRAM 311 OF 1240 *** // Wavefunction(s) for diagram number 311 - FFV1_2( w_fp[62], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 311 - FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6344,7 +6344,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 312 - FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6353,10 +6353,10 @@ namespace mg5amcCpu // *** DIAGRAM 313 OF 1240 *** // Wavefunction(s) for diagram number 313 - FFV1_1( w_fp[33], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[100] ); + FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] ); // Amplitude(s) for diagram number 313 - FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6368,7 +6368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 314 - FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6380,7 +6380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 315 - FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6392,7 +6392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 316 - FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6404,7 +6404,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 317 - FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6417,7 +6417,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 318 - VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6432,7 +6432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 319 - FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6445,7 +6445,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 320 - FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6457,7 +6457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 321 - FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6466,10 +6466,10 @@ namespace mg5amcCpu // *** DIAGRAM 322 OF 1240 *** // Wavefunction(s) for diagram number 322 - FFV1_1( w_fp[39], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); + FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 322 - FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6481,7 +6481,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 323 - FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6493,7 +6493,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 324 - FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6505,7 +6505,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 325 - FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6517,7 +6517,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 326 - FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6530,7 +6530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 327 - VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6545,7 +6545,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 328 - FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6558,7 +6558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 329 - FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6570,7 +6570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 330 - FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6579,10 +6579,10 @@ namespace mg5amcCpu // *** DIAGRAM 331 OF 1240 *** // Wavefunction(s) for diagram number 331 - FFV1_1( w_fp[47], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); + FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 331 - FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6594,7 +6594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 332 - FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6606,7 +6606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 333 - FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6618,7 +6618,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 334 - FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6630,7 +6630,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 335 - FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6643,7 +6643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 336 - VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6658,7 +6658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 337 - FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6671,7 +6671,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 338 - FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6684,7 +6684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 339 - FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6699,7 +6699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 340 - VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6718,7 +6718,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 341 - VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6737,7 +6737,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 342 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6749,7 +6749,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6761,7 +6761,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6780,7 +6780,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 343 - FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6795,7 +6795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 344 - FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6808,7 +6808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 345 - FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6821,7 +6821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 346 - FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6836,7 +6836,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 347 - VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6855,7 +6855,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 348 - VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6874,7 +6874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 349 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6886,7 +6886,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6898,7 +6898,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6917,7 +6917,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 350 - FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6932,7 +6932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 351 - FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6945,7 +6945,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 352 - FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6958,7 +6958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 353 - FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6973,7 +6973,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 354 - VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6992,7 +6992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 355 - VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7011,7 +7011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 356 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7023,7 +7023,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7035,7 +7035,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7054,7 +7054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 357 - FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7069,7 +7069,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 358 - FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7082,7 +7082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 359 - VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7094,7 +7094,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7106,7 +7106,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7125,7 +7125,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 360 - FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7133,7 +7133,7 @@ namespace mg5amcCpu jamp_sv[39] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7141,7 +7141,7 @@ namespace mg5amcCpu jamp_sv[57] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[81] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7156,7 +7156,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 361 - FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7164,7 +7164,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7172,7 +7172,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7187,7 +7187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 362 - VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7199,7 +7199,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7211,7 +7211,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7230,7 +7230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 363 - FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7238,7 +7238,7 @@ namespace mg5amcCpu jamp_sv[45] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7246,7 +7246,7 @@ namespace mg5amcCpu jamp_sv[59] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7261,7 +7261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 364 - FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7269,7 +7269,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7277,7 +7277,7 @@ namespace mg5amcCpu jamp_sv[87] += amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7292,7 +7292,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 365 - VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7304,7 +7304,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7316,7 +7316,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7335,7 +7335,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 366 - FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7343,7 +7343,7 @@ namespace mg5amcCpu jamp_sv[47] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7351,7 +7351,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7366,7 +7366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 367 - FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7374,7 +7374,7 @@ namespace mg5amcCpu jamp_sv[59] -= amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7382,7 +7382,7 @@ namespace mg5amcCpu jamp_sv[63] += amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[69] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7397,7 +7397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 368 - FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7405,7 +7405,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7413,7 +7413,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7428,7 +7428,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 369 - VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7440,7 +7440,7 @@ namespace mg5amcCpu jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7452,7 +7452,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7468,11 +7468,11 @@ namespace mg5amcCpu // *** DIAGRAM 370 OF 1240 *** // Wavefunction(s) for diagram number 370 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 370 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7485,7 +7485,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 371 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7495,11 +7495,11 @@ namespace mg5amcCpu // *** DIAGRAM 372 OF 1240 *** // Wavefunction(s) for diagram number 372 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[62] ); - FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 0., 0., w_fp[34] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] ); + FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] ); // Amplitude(s) for diagram number 372 - VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7518,7 +7518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 373 - FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7530,10 +7530,10 @@ namespace mg5amcCpu // *** DIAGRAM 374 OF 1240 *** // Wavefunction(s) for diagram number 374 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 374 - VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7552,7 +7552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 375 - FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7564,12 +7564,12 @@ namespace mg5amcCpu // *** DIAGRAM 376 OF 1240 *** // Wavefunction(s) for diagram number 376 - VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); - VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); + VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); + VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 376 - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7581,7 +7581,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7593,7 +7593,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7609,10 +7609,10 @@ namespace mg5amcCpu // *** DIAGRAM 377 OF 1240 *** // Wavefunction(s) for diagram number 377 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[95] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] ); // Amplitude(s) for diagram number 377 - FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7622,10 +7622,10 @@ namespace mg5amcCpu // *** DIAGRAM 378 OF 1240 *** // Wavefunction(s) for diagram number 378 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 378 - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7638,7 +7638,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 379 - FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7653,7 +7653,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 380 - FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7663,10 +7663,10 @@ namespace mg5amcCpu // *** DIAGRAM 381 OF 1240 *** // Wavefunction(s) for diagram number 381 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[101] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] ); // Amplitude(s) for diagram number 381 - FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7679,7 +7679,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 382 - FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7694,7 +7694,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 383 - FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7709,7 +7709,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 384 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7721,10 +7721,10 @@ namespace mg5amcCpu // *** DIAGRAM 385 OF 1240 *** // Wavefunction(s) for diagram number 385 - VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 0., 0., w_fp[95] ); + VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 385 - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7740,10 +7740,10 @@ namespace mg5amcCpu // *** DIAGRAM 386 OF 1240 *** // Wavefunction(s) for diagram number 386 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 386 - FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7756,7 +7756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 387 - FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7766,10 +7766,10 @@ namespace mg5amcCpu // *** DIAGRAM 388 OF 1240 *** // Wavefunction(s) for diagram number 388 - FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 0., 0., w_fp[103] ); + FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] ); // Amplitude(s) for diagram number 388 - VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7788,7 +7788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 389 - FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7803,7 +7803,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 390 - VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7822,7 +7822,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 391 - FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7837,7 +7837,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 392 - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7849,7 +7849,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7861,7 +7861,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7877,10 +7877,10 @@ namespace mg5amcCpu // *** DIAGRAM 393 OF 1240 *** // Wavefunction(s) for diagram number 393 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 393 - FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7890,10 +7890,10 @@ namespace mg5amcCpu // *** DIAGRAM 394 OF 1240 *** // Wavefunction(s) for diagram number 394 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[105] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] ); // Amplitude(s) for diagram number 394 - FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7906,7 +7906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 395 - FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7921,7 +7921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 396 - FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7931,10 +7931,10 @@ namespace mg5amcCpu // *** DIAGRAM 397 OF 1240 *** // Wavefunction(s) for diagram number 397 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 397 - FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7947,7 +7947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 398 - FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7962,7 +7962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 399 - FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7977,7 +7977,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 400 - FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7992,7 +7992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 401 - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8011,7 +8011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 402 - FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8026,7 +8026,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 403 - FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8045,7 +8045,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 404 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8060,7 +8060,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 405 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8079,7 +8079,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 406 - FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8098,7 +8098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 407 - FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8117,7 +8117,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 408 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8137,7 +8137,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8157,7 +8157,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8181,10 +8181,10 @@ namespace mg5amcCpu // *** DIAGRAM 409 OF 1240 *** // Wavefunction(s) for diagram number 409 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 409 - VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8208,10 +8208,10 @@ namespace mg5amcCpu // *** DIAGRAM 410 OF 1240 *** // Wavefunction(s) for diagram number 410 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[107] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 410 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8238,7 +8238,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 411 - VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8265,7 +8265,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 412 - FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8284,7 +8284,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 413 - FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8299,7 +8299,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 414 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8314,7 +8314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 415 - FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8333,7 +8333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 416 - FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8348,7 +8348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 417 - FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8363,7 +8363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 418 - FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8378,7 +8378,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 419 - FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8397,7 +8397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 420 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8412,7 +8412,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 421 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8431,7 +8431,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 422 - FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8450,7 +8450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 423 - FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8469,7 +8469,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 424 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8489,7 +8489,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8509,7 +8509,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8533,10 +8533,10 @@ namespace mg5amcCpu // *** DIAGRAM 425 OF 1240 *** // Wavefunction(s) for diagram number 425 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 425 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8563,7 +8563,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 426 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8590,7 +8590,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 427 - VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8617,7 +8617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 428 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8636,7 +8636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 429 - FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8651,7 +8651,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 430 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8666,7 +8666,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 431 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8685,7 +8685,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 432 - FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8700,7 +8700,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 433 - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8712,10 +8712,10 @@ namespace mg5amcCpu // *** DIAGRAM 434 OF 1240 *** // Wavefunction(s) for diagram number 434 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 434 - VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8742,7 +8742,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 435 - VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8769,7 +8769,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 436 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8789,7 +8789,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8809,7 +8809,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8833,10 +8833,10 @@ namespace mg5amcCpu // *** DIAGRAM 437 OF 1240 *** // Wavefunction(s) for diagram number 437 - VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 0., 0., w_fp[108] ); + VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] ); // Amplitude(s) for diagram number 437 - VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8863,7 +8863,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 438 - VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8890,7 +8890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 439 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8910,7 +8910,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[115] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8930,7 +8930,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8957,7 +8957,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 440 - VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8984,7 +8984,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 441 - VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9011,7 +9011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 442 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9031,7 +9031,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9051,7 +9051,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9075,12 +9075,12 @@ namespace mg5amcCpu // *** DIAGRAM 443 OF 1240 *** // Wavefunction(s) for diagram number 443 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 443 - VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9100,7 +9100,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9120,7 +9120,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9144,12 +9144,12 @@ namespace mg5amcCpu // *** DIAGRAM 444 OF 1240 *** // Wavefunction(s) for diagram number 444 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[113] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[114] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] ); // Amplitude(s) for diagram number 444 - VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9169,7 +9169,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9189,7 +9189,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9216,7 +9216,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 445 - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9236,7 +9236,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9256,7 +9256,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9283,7 +9283,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 446 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9303,7 +9303,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9323,7 +9323,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9350,7 +9350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 447 - VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9377,7 +9377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 448 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9404,7 +9404,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 449 - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9431,7 +9431,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 450 - VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9450,7 +9450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 451 - FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9465,7 +9465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 452 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9478,7 +9478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 453 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9491,7 +9491,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 454 - FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9506,7 +9506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 455 - VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9525,7 +9525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 456 - FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9537,7 +9537,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9549,7 +9549,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9568,7 +9568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 457 - FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9583,7 +9583,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 458 - FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9596,7 +9596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 459 - FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9609,7 +9609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 460 - VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9628,7 +9628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 461 - FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9643,7 +9643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 462 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9656,7 +9656,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 463 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9669,7 +9669,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 464 - FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9684,7 +9684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 465 - VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9703,7 +9703,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 466 - FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9715,7 +9715,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9727,7 +9727,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9746,7 +9746,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 467 - FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9761,7 +9761,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 468 - FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9774,7 +9774,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 469 - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9787,7 +9787,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 470 - VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9806,7 +9806,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 471 - FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9821,7 +9821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 472 - FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9834,7 +9834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 473 - FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9847,7 +9847,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 474 - FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9862,7 +9862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 475 - VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9881,7 +9881,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 476 - FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9893,7 +9893,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9905,7 +9905,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9924,7 +9924,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 477 - VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9943,7 +9943,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 478 - FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9958,7 +9958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 479 - FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9971,7 +9971,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 480 - FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9984,7 +9984,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 481 - FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9999,7 +9999,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 482 - VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10018,7 +10018,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 483 - FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10030,7 +10030,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10042,7 +10042,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10061,7 +10061,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 484 - FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10080,7 +10080,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 485 - FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10099,7 +10099,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 486 - FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10118,7 +10118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 487 - FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10133,7 +10133,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 488 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10152,7 +10152,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 489 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10167,7 +10167,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 490 - FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10179,7 +10179,7 @@ namespace mg5amcCpu jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10191,7 +10191,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10210,7 +10210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 491 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10222,7 +10222,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10234,7 +10234,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10253,7 +10253,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 492 - VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10273,7 +10273,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10293,7 +10293,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10317,11 +10317,11 @@ namespace mg5amcCpu // *** DIAGRAM 493 OF 1240 *** // Wavefunction(s) for diagram number 493 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 493 - FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10334,7 +10334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 494 - FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10344,10 +10344,10 @@ namespace mg5amcCpu // *** DIAGRAM 495 OF 1240 *** // Wavefunction(s) for diagram number 495 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 495 - VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10366,7 +10366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 496 - FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10378,10 +10378,10 @@ namespace mg5amcCpu // *** DIAGRAM 497 OF 1240 *** // Wavefunction(s) for diagram number 497 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 497 - VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10400,7 +10400,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 498 - FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10412,12 +10412,12 @@ namespace mg5amcCpu // *** DIAGRAM 499 OF 1240 *** // Wavefunction(s) for diagram number 499 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 499 - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10429,7 +10429,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10441,7 +10441,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10457,10 +10457,10 @@ namespace mg5amcCpu // *** DIAGRAM 500 OF 1240 *** // Wavefunction(s) for diagram number 500 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 500 - FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10470,10 +10470,10 @@ namespace mg5amcCpu // *** DIAGRAM 501 OF 1240 *** // Wavefunction(s) for diagram number 501 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 501 - FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10486,7 +10486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 502 - FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10501,7 +10501,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 503 - FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10511,10 +10511,10 @@ namespace mg5amcCpu // *** DIAGRAM 504 OF 1240 *** // Wavefunction(s) for diagram number 504 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 504 - FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10527,7 +10527,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 505 - FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10542,7 +10542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 506 - FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10557,7 +10557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 507 - FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10569,10 +10569,10 @@ namespace mg5amcCpu // *** DIAGRAM 508 OF 1240 *** // Wavefunction(s) for diagram number 508 - VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] ); // Amplitude(s) for diagram number 508 - FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10588,10 +10588,10 @@ namespace mg5amcCpu // *** DIAGRAM 509 OF 1240 *** // Wavefunction(s) for diagram number 509 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[112] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] ); // Amplitude(s) for diagram number 509 - FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10604,7 +10604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 510 - FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10617,7 +10617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 511 - VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10636,7 +10636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 512 - FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10651,7 +10651,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 513 - VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10670,7 +10670,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 514 - FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10685,7 +10685,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 515 - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10697,7 +10697,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10709,7 +10709,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10725,10 +10725,10 @@ namespace mg5amcCpu // *** DIAGRAM 516 OF 1240 *** // Wavefunction(s) for diagram number 516 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); // Amplitude(s) for diagram number 516 - FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10738,10 +10738,10 @@ namespace mg5amcCpu // *** DIAGRAM 517 OF 1240 *** // Wavefunction(s) for diagram number 517 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 517 - FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10754,7 +10754,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 518 - FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10769,7 +10769,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 519 - FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10779,10 +10779,10 @@ namespace mg5amcCpu // *** DIAGRAM 520 OF 1240 *** // Wavefunction(s) for diagram number 520 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 520 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10795,7 +10795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 521 - FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10810,7 +10810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 522 - FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10825,7 +10825,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 523 - FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10840,7 +10840,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 524 - FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10859,7 +10859,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 525 - FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10874,7 +10874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 526 - FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10893,7 +10893,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 527 - FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10908,7 +10908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 528 - FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10927,7 +10927,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 529 - FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10946,7 +10946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 530 - FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10965,7 +10965,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 531 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10985,7 +10985,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11005,7 +11005,7 @@ namespace mg5amcCpu jamp_sv[105] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11029,10 +11029,10 @@ namespace mg5amcCpu // *** DIAGRAM 532 OF 1240 *** // Wavefunction(s) for diagram number 532 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 532 - VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11056,10 +11056,10 @@ namespace mg5amcCpu // *** DIAGRAM 533 OF 1240 *** // Wavefunction(s) for diagram number 533 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 533 - VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11086,7 +11086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 534 - VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11113,7 +11113,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 535 - FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11132,7 +11132,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 536 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11147,7 +11147,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 537 - FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11162,7 +11162,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 538 - FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11181,7 +11181,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 539 - FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11196,7 +11196,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 540 - FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11211,7 +11211,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 541 - FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11226,7 +11226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 542 - FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11245,7 +11245,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 543 - FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11260,7 +11260,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 544 - FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11279,7 +11279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 545 - FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11298,7 +11298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 546 - FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11317,7 +11317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 547 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11337,7 +11337,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11357,7 +11357,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11381,10 +11381,10 @@ namespace mg5amcCpu // *** DIAGRAM 548 OF 1240 *** // Wavefunction(s) for diagram number 548 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 548 - VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11411,7 +11411,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 549 - VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11438,7 +11438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 550 - VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11465,7 +11465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 551 - FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11484,7 +11484,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 552 - FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11499,7 +11499,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 553 - FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11514,7 +11514,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 554 - FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11533,7 +11533,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 555 - FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11548,7 +11548,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 556 - FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11560,10 +11560,10 @@ namespace mg5amcCpu // *** DIAGRAM 557 OF 1240 *** // Wavefunction(s) for diagram number 557 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 557 - VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11590,7 +11590,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 558 - VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11617,7 +11617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 559 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11637,7 +11637,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11657,7 +11657,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11684,7 +11684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 560 - VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11711,7 +11711,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 561 - VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11738,7 +11738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 562 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11758,7 +11758,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11778,7 +11778,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11805,7 +11805,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 563 - VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11832,7 +11832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 564 - VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11859,7 +11859,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 565 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11879,7 +11879,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11899,7 +11899,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11923,12 +11923,12 @@ namespace mg5amcCpu // *** DIAGRAM 566 OF 1240 *** // Wavefunction(s) for diagram number 566 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 566 - VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11948,7 +11948,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11968,7 +11968,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11992,12 +11992,12 @@ namespace mg5amcCpu // *** DIAGRAM 567 OF 1240 *** // Wavefunction(s) for diagram number 567 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); // Amplitude(s) for diagram number 567 - VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12017,7 +12017,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12037,7 +12037,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12064,7 +12064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 568 - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12084,7 +12084,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12104,7 +12104,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12131,7 +12131,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 569 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12151,7 +12151,7 @@ namespace mg5amcCpu jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12171,7 +12171,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12198,7 +12198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 570 - VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12225,7 +12225,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 571 - VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12252,7 +12252,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 572 - VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12279,7 +12279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 573 - VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12298,7 +12298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 574 - FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12313,7 +12313,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 575 - FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12326,7 +12326,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 576 - FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12339,7 +12339,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 577 - FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12354,7 +12354,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 578 - VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12373,7 +12373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 579 - FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12385,7 +12385,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12397,7 +12397,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12416,7 +12416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 580 - FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12431,7 +12431,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 581 - FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12444,7 +12444,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 582 - FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12457,7 +12457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 583 - VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12476,7 +12476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 584 - FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12491,7 +12491,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 585 - FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12504,7 +12504,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 586 - FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12517,7 +12517,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 587 - FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12532,7 +12532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 588 - VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12551,7 +12551,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 589 - FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12563,7 +12563,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12575,7 +12575,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12594,7 +12594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 590 - FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12609,7 +12609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 591 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12622,7 +12622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 592 - FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12635,7 +12635,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 593 - VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12654,7 +12654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 594 - FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12669,7 +12669,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 595 - FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12682,7 +12682,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 596 - FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12695,7 +12695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 597 - FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12710,7 +12710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 598 - VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12729,7 +12729,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 599 - FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12741,7 +12741,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12753,7 +12753,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12772,7 +12772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 600 - VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12791,7 +12791,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 601 - FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12806,7 +12806,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 602 - FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12819,7 +12819,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 603 - FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12832,7 +12832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 604 - FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12847,7 +12847,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 605 - VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12866,7 +12866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 606 - FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12878,7 +12878,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12890,7 +12890,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12909,7 +12909,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 607 - FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12928,7 +12928,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 608 - FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12947,7 +12947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 609 - FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12966,7 +12966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 610 - FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12981,7 +12981,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 611 - FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13000,7 +13000,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 612 - FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13015,7 +13015,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 613 - FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13027,7 +13027,7 @@ namespace mg5amcCpu jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13039,7 +13039,7 @@ namespace mg5amcCpu jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13058,7 +13058,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 614 - FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13070,7 +13070,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13082,7 +13082,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13101,7 +13101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 615 - VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13121,7 +13121,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13141,7 +13141,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13165,11 +13165,11 @@ namespace mg5amcCpu // *** DIAGRAM 616 OF 1240 *** // Wavefunction(s) for diagram number 616 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 616 - FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13182,7 +13182,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 617 - FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13192,10 +13192,10 @@ namespace mg5amcCpu // *** DIAGRAM 618 OF 1240 *** // Wavefunction(s) for diagram number 618 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[112] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] ); // Amplitude(s) for diagram number 618 - VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13214,7 +13214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 619 - FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13226,10 +13226,10 @@ namespace mg5amcCpu // *** DIAGRAM 620 OF 1240 *** // Wavefunction(s) for diagram number 620 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 620 - VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13248,7 +13248,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 621 - FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13260,12 +13260,12 @@ namespace mg5amcCpu // *** DIAGRAM 622 OF 1240 *** // Wavefunction(s) for diagram number 622 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 622 - FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13277,7 +13277,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13289,7 +13289,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13305,10 +13305,10 @@ namespace mg5amcCpu // *** DIAGRAM 623 OF 1240 *** // Wavefunction(s) for diagram number 623 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 623 - FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13318,10 +13318,10 @@ namespace mg5amcCpu // *** DIAGRAM 624 OF 1240 *** // Wavefunction(s) for diagram number 624 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 624 - FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13334,7 +13334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 625 - FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13349,7 +13349,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 626 - FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13359,10 +13359,10 @@ namespace mg5amcCpu // *** DIAGRAM 627 OF 1240 *** // Wavefunction(s) for diagram number 627 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 627 - FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13375,7 +13375,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 628 - FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13390,7 +13390,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 629 - FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13405,7 +13405,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 630 - FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13417,10 +13417,10 @@ namespace mg5amcCpu // *** DIAGRAM 631 OF 1240 *** // Wavefunction(s) for diagram number 631 - VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 631 - FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13436,10 +13436,10 @@ namespace mg5amcCpu // *** DIAGRAM 632 OF 1240 *** // Wavefunction(s) for diagram number 632 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[96] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] ); // Amplitude(s) for diagram number 632 - FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13452,7 +13452,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 633 - FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13465,7 +13465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 634 - VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13484,7 +13484,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 635 - FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13499,7 +13499,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 636 - VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13518,7 +13518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 637 - FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13533,7 +13533,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 638 - FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13545,7 +13545,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13557,7 +13557,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13573,10 +13573,10 @@ namespace mg5amcCpu // *** DIAGRAM 639 OF 1240 *** // Wavefunction(s) for diagram number 639 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 639 - FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13586,10 +13586,10 @@ namespace mg5amcCpu // *** DIAGRAM 640 OF 1240 *** // Wavefunction(s) for diagram number 640 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 640 - FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13602,7 +13602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 641 - FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13617,7 +13617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 642 - FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13627,10 +13627,10 @@ namespace mg5amcCpu // *** DIAGRAM 643 OF 1240 *** // Wavefunction(s) for diagram number 643 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 643 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13643,7 +13643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 644 - FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13658,7 +13658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 645 - FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13673,7 +13673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 646 - FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13688,7 +13688,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 647 - FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13707,7 +13707,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 648 - FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13722,7 +13722,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 649 - FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13741,7 +13741,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 650 - FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13756,7 +13756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 651 - FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13775,7 +13775,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 652 - FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13794,7 +13794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 653 - FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13813,7 +13813,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 654 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13833,7 +13833,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13853,7 +13853,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13877,10 +13877,10 @@ namespace mg5amcCpu // *** DIAGRAM 655 OF 1240 *** // Wavefunction(s) for diagram number 655 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 655 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13904,10 +13904,10 @@ namespace mg5amcCpu // *** DIAGRAM 656 OF 1240 *** // Wavefunction(s) for diagram number 656 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[113] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] ); // Amplitude(s) for diagram number 656 - VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13934,7 +13934,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 657 - VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13961,7 +13961,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 658 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13980,7 +13980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 659 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13995,7 +13995,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 660 - FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14010,7 +14010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 661 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14029,7 +14029,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 662 - FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14044,7 +14044,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 663 - FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14059,7 +14059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 664 - FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14074,7 +14074,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 665 - FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14093,7 +14093,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 666 - FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14108,7 +14108,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 667 - FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14127,7 +14127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 668 - FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14146,7 +14146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 669 - FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14165,7 +14165,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 670 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14185,7 +14185,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14205,7 +14205,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14229,10 +14229,10 @@ namespace mg5amcCpu // *** DIAGRAM 671 OF 1240 *** // Wavefunction(s) for diagram number 671 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 671 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14259,7 +14259,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 672 - VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14286,7 +14286,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 673 - VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14313,7 +14313,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 674 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14332,7 +14332,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 675 - FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14347,7 +14347,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 676 - FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14362,7 +14362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 677 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14381,7 +14381,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 678 - FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14396,7 +14396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 679 - FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14408,10 +14408,10 @@ namespace mg5amcCpu // *** DIAGRAM 680 OF 1240 *** // Wavefunction(s) for diagram number 680 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 680 - VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14438,7 +14438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 681 - VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14465,7 +14465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 682 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14485,7 +14485,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14505,7 +14505,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14532,7 +14532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 683 - VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14559,7 +14559,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 684 - VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14586,7 +14586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 685 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14606,7 +14606,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14626,7 +14626,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14653,7 +14653,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 686 - VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14680,7 +14680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 687 - VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14707,7 +14707,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 688 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14727,7 +14727,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14747,7 +14747,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14771,12 +14771,12 @@ namespace mg5amcCpu // *** DIAGRAM 689 OF 1240 *** // Wavefunction(s) for diagram number 689 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[62] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[101] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 689 - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14796,7 +14796,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14816,7 +14816,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14840,12 +14840,12 @@ namespace mg5amcCpu // *** DIAGRAM 690 OF 1240 *** // Wavefunction(s) for diagram number 690 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 690 - VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14865,7 +14865,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14885,7 +14885,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14912,7 +14912,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 691 - VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14932,7 +14932,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14952,7 +14952,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14979,7 +14979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 692 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14999,7 +14999,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15019,7 +15019,7 @@ namespace mg5amcCpu jamp_sv[97] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15046,7 +15046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 693 - VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15073,7 +15073,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 694 - VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15100,7 +15100,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 695 - VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15127,7 +15127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 696 - VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15146,7 +15146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 697 - FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15161,7 +15161,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 698 - FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15174,7 +15174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 699 - FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15187,7 +15187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 700 - FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15202,7 +15202,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 701 - VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15221,7 +15221,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 702 - FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15233,7 +15233,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15245,7 +15245,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15264,7 +15264,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 703 - FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15279,7 +15279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 704 - FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15292,7 +15292,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 705 - FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15305,7 +15305,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 706 - VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15324,7 +15324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 707 - FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15339,7 +15339,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 708 - FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15352,7 +15352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 709 - FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15365,7 +15365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 710 - FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15380,7 +15380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 711 - VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15399,7 +15399,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 712 - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15411,7 +15411,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15423,7 +15423,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15442,7 +15442,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 713 - FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15457,7 +15457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 714 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15470,7 +15470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 715 - FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15483,7 +15483,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 716 - VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15502,7 +15502,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 717 - FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15517,7 +15517,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 718 - FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15530,7 +15530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 719 - FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15543,7 +15543,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 720 - FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15558,7 +15558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 721 - VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15577,7 +15577,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 722 - FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15589,7 +15589,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15601,7 +15601,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15620,7 +15620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 723 - VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15639,7 +15639,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 724 - FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15654,7 +15654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 725 - FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15667,7 +15667,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 726 - FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15680,7 +15680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 727 - FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15695,7 +15695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 728 - VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15714,7 +15714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 729 - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15726,7 +15726,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15738,7 +15738,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15757,7 +15757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 730 - FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15776,7 +15776,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 731 - FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15795,7 +15795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 732 - FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15814,7 +15814,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 733 - FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15829,7 +15829,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 734 - FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15848,7 +15848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 735 - FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15863,7 +15863,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 736 - FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15875,7 +15875,7 @@ namespace mg5amcCpu jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15887,7 +15887,7 @@ namespace mg5amcCpu jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15906,7 +15906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 737 - FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15918,7 +15918,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15930,7 +15930,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15949,7 +15949,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 738 - VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15969,7 +15969,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15989,7 +15989,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16013,10 +16013,10 @@ namespace mg5amcCpu // *** DIAGRAM 739 OF 1240 *** // Wavefunction(s) for diagram number 739 - FFV1_1( w_fp[77], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[92] ); + FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] ); // Amplitude(s) for diagram number 739 - FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16028,7 +16028,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 740 - FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16037,10 +16037,10 @@ namespace mg5amcCpu // *** DIAGRAM 741 OF 1240 *** // Wavefunction(s) for diagram number 741 - FFV1_2( w_fp[46], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 741 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16052,7 +16052,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 742 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16064,7 +16064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 743 - FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16076,7 +16076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 744 - FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16088,7 +16088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 745 - FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16101,7 +16101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 746 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16111,10 +16111,10 @@ namespace mg5amcCpu // *** DIAGRAM 747 OF 1240 *** // Wavefunction(s) for diagram number 747 - VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 0., 0., w_fp[96] ); + VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 747 - FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16129,7 +16129,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 748 - FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16141,7 +16141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 749 - FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16150,10 +16150,10 @@ namespace mg5amcCpu // *** DIAGRAM 750 OF 1240 *** // Wavefunction(s) for diagram number 750 - FFV1_2( w_fp[38], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 750 - FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16165,7 +16165,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 751 - FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16177,7 +16177,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 752 - FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16189,7 +16189,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 753 - FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16201,7 +16201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 754 - FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16214,7 +16214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 755 - FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16224,10 +16224,10 @@ namespace mg5amcCpu // *** DIAGRAM 756 OF 1240 *** // Wavefunction(s) for diagram number 756 - VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 756 - FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16242,7 +16242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 757 - FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16254,7 +16254,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 758 - FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16263,10 +16263,10 @@ namespace mg5amcCpu // *** DIAGRAM 759 OF 1240 *** // Wavefunction(s) for diagram number 759 - FFV1_2( w_fp[41], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); + FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 759 - FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16278,7 +16278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 760 - FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16290,7 +16290,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 761 - FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16302,7 +16302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 762 - FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16314,7 +16314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 763 - FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16327,7 +16327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 764 - FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16337,10 +16337,10 @@ namespace mg5amcCpu // *** DIAGRAM 765 OF 1240 *** // Wavefunction(s) for diagram number 765 - VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 765 - FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16355,7 +16355,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 766 - FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16368,7 +16368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 767 - FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16383,7 +16383,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 768 - VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16402,7 +16402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 769 - FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16417,7 +16417,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 770 - VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16436,7 +16436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 771 - FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16446,12 +16446,12 @@ namespace mg5amcCpu // *** DIAGRAM 772 OF 1240 *** // Wavefunction(s) for diagram number 772 - VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[85] ); - VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); - VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] ); + VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); + VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 772 - FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16463,7 +16463,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16475,7 +16475,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16494,7 +16494,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 773 - FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16507,7 +16507,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 774 - FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16522,7 +16522,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 775 - VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16541,7 +16541,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 776 - FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16556,7 +16556,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 777 - VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16575,7 +16575,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 778 - FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16585,12 +16585,12 @@ namespace mg5amcCpu // *** DIAGRAM 779 OF 1240 *** // Wavefunction(s) for diagram number 779 - VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[9] ); - VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] ); + VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 779 - FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16602,7 +16602,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16614,7 +16614,7 @@ namespace mg5amcCpu jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16633,7 +16633,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 780 - FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16646,7 +16646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 781 - FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16661,7 +16661,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 782 - VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16680,7 +16680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 783 - FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16695,7 +16695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 784 - VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16714,7 +16714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 785 - FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16724,12 +16724,12 @@ namespace mg5amcCpu // *** DIAGRAM 786 OF 1240 *** // Wavefunction(s) for diagram number 786 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[87] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[34] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[86] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 786 - FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16741,7 +16741,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16753,7 +16753,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16772,7 +16772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 787 - FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16780,7 +16780,7 @@ namespace mg5amcCpu jamp_sv[25] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16788,7 +16788,7 @@ namespace mg5amcCpu jamp_sv[26] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16800,12 +16800,12 @@ namespace mg5amcCpu // *** DIAGRAM 788 OF 1240 *** // Wavefunction(s) for diagram number 788 - VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 0., 0., w_fp[92] ); - VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 0., 0., w_fp[88] ); - VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 0., 0., w_fp[106] ); + VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] ); + VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] ); + VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] ); // Amplitude(s) for diagram number 788 - FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16817,7 +16817,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16829,7 +16829,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16845,10 +16845,10 @@ namespace mg5amcCpu // *** DIAGRAM 789 OF 1240 *** // Wavefunction(s) for diagram number 789 - FFV1_2( w_fp[52], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 789 - FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16860,7 +16860,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 790 - FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16869,10 +16869,10 @@ namespace mg5amcCpu // *** DIAGRAM 791 OF 1240 *** // Wavefunction(s) for diagram number 791 - FFV1_1( w_fp[33], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 791 - FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16884,7 +16884,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 792 - FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16896,7 +16896,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 793 - FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16908,7 +16908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 794 - FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16920,7 +16920,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 795 - FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16933,7 +16933,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 796 - FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16946,7 +16946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 797 - FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16961,7 +16961,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 798 - FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16973,7 +16973,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 799 - FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16982,10 +16982,10 @@ namespace mg5amcCpu // *** DIAGRAM 800 OF 1240 *** // Wavefunction(s) for diagram number 800 - FFV1_1( w_fp[39], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 800 - FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16997,7 +16997,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 801 - FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17009,7 +17009,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 802 - FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17021,7 +17021,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 803 - FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17033,7 +17033,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 804 - FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17046,7 +17046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 805 - FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17059,7 +17059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 806 - FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17074,7 +17074,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 807 - FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17086,7 +17086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 808 - FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17095,10 +17095,10 @@ namespace mg5amcCpu // *** DIAGRAM 809 OF 1240 *** // Wavefunction(s) for diagram number 809 - FFV1_1( w_fp[47], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); + FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 809 - FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17110,7 +17110,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 810 - FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17122,7 +17122,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 811 - FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17134,7 +17134,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 812 - FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17146,7 +17146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 813 - FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17159,7 +17159,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 814 - FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17172,7 +17172,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 815 - FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17187,7 +17187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 816 - FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17200,7 +17200,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 817 - FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17215,7 +17215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 818 - VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17234,7 +17234,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 819 - FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17249,7 +17249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 820 - VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17268,7 +17268,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 821 - FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17281,7 +17281,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 822 - FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17293,7 +17293,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17305,7 +17305,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17324,7 +17324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 823 - FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17337,7 +17337,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 824 - FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17352,7 +17352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 825 - VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17371,7 +17371,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 826 - FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17386,7 +17386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 827 - VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17405,7 +17405,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 828 - FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17418,7 +17418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 829 - FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17430,7 +17430,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17442,7 +17442,7 @@ namespace mg5amcCpu jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17461,7 +17461,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 830 - FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17474,7 +17474,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 831 - FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17489,7 +17489,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 832 - VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17508,7 +17508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 833 - FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17523,7 +17523,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 834 - VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17542,7 +17542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 835 - FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17555,7 +17555,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 836 - FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17567,7 +17567,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17579,7 +17579,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17598,7 +17598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 837 - FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17606,7 +17606,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17614,7 +17614,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17629,7 +17629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 838 - FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17641,7 +17641,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17653,7 +17653,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17669,10 +17669,10 @@ namespace mg5amcCpu // *** DIAGRAM 839 OF 1240 *** // Wavefunction(s) for diagram number 839 - VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 0., 0., w_fp[90] ); + VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] ); // Amplitude(s) for diagram number 839 - VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17699,7 +17699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 840 - VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17726,7 +17726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 841 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17746,7 +17746,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17766,7 +17766,7 @@ namespace mg5amcCpu jamp_sv[115] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17790,10 +17790,10 @@ namespace mg5amcCpu // *** DIAGRAM 842 OF 1240 *** // Wavefunction(s) for diagram number 842 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[56] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] ); // Amplitude(s) for diagram number 842 - VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17820,7 +17820,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 843 - VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17847,7 +17847,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 844 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17867,7 +17867,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17887,7 +17887,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17914,7 +17914,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 845 - VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17941,7 +17941,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 846 - VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17965,12 +17965,12 @@ namespace mg5amcCpu // *** DIAGRAM 847 OF 1240 *** // Wavefunction(s) for diagram number 847 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[103] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 847 - VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17990,7 +17990,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18010,7 +18010,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18034,12 +18034,12 @@ namespace mg5amcCpu // *** DIAGRAM 848 OF 1240 *** // Wavefunction(s) for diagram number 848 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 848 - VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18059,7 +18059,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18079,7 +18079,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[98] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18103,12 +18103,12 @@ namespace mg5amcCpu // *** DIAGRAM 849 OF 1240 *** // Wavefunction(s) for diagram number 849 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[115] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[116] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[117] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] ); // Amplitude(s) for diagram number 849 - VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18128,7 +18128,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18148,7 +18148,7 @@ namespace mg5amcCpu jamp_sv[105] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18172,12 +18172,12 @@ namespace mg5amcCpu // *** DIAGRAM 850 OF 1240 *** // Wavefunction(s) for diagram number 850 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[118] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[119] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[120] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] ); // Amplitude(s) for diagram number 850 - VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18197,7 +18197,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18217,7 +18217,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18244,7 +18244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 851 - VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18264,7 +18264,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18284,7 +18284,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18311,7 +18311,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 852 - VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18338,7 +18338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 853 - VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18365,7 +18365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 854 - VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18392,7 +18392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 855 - VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18411,7 +18411,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 856 - FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18426,7 +18426,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 857 - FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18439,7 +18439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 858 - FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18454,7 +18454,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 859 - FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18467,7 +18467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 860 - VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18486,7 +18486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 861 - FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18498,7 +18498,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18510,7 +18510,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18529,7 +18529,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 862 - FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18544,7 +18544,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 863 - FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18557,7 +18557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 864 - FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18570,7 +18570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 865 - VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18589,7 +18589,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 866 - FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18604,7 +18604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 867 - FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18617,7 +18617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 868 - FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18632,7 +18632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 869 - FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18645,7 +18645,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 870 - VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18664,7 +18664,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 871 - FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18676,7 +18676,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18688,7 +18688,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18707,7 +18707,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 872 - FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18722,7 +18722,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 873 - FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18735,7 +18735,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 874 - FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18748,7 +18748,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 875 - VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18767,7 +18767,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 876 - FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18782,7 +18782,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 877 - FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18795,7 +18795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 878 - FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18810,7 +18810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 879 - FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18823,7 +18823,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 880 - VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18842,7 +18842,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 881 - FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18854,7 +18854,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18866,7 +18866,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18885,7 +18885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 882 - VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18904,7 +18904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 883 - FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18919,7 +18919,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 884 - FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18932,7 +18932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 885 - FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18947,7 +18947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 886 - FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18960,7 +18960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 887 - VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18979,7 +18979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 888 - FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18991,7 +18991,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19003,7 +19003,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19022,7 +19022,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 889 - FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19041,7 +19041,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 890 - FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19060,7 +19060,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 891 - FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19079,7 +19079,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 892 - FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19098,7 +19098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 893 - FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19113,7 +19113,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 894 - FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19125,10 +19125,10 @@ namespace mg5amcCpu // *** DIAGRAM 895 OF 1240 *** // Wavefunction(s) for diagram number 895 - VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 0., 0., w_fp[65] ); + VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] ); // Amplitude(s) for diagram number 895 - VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19155,7 +19155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 896 - VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19182,7 +19182,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 897 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19202,7 +19202,7 @@ namespace mg5amcCpu jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19222,7 +19222,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19249,7 +19249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 898 - VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19276,7 +19276,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 899 - VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19303,7 +19303,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 900 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19323,7 +19323,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19343,7 +19343,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[107] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19370,7 +19370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 901 - VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19397,7 +19397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 902 - VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19421,12 +19421,12 @@ namespace mg5amcCpu // *** DIAGRAM 903 OF 1240 *** // Wavefunction(s) for diagram number 903 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[93] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 903 - VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19446,7 +19446,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19466,7 +19466,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19490,12 +19490,12 @@ namespace mg5amcCpu // *** DIAGRAM 904 OF 1240 *** // Wavefunction(s) for diagram number 904 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[103] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[63] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 904 - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19515,7 +19515,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19535,7 +19535,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19559,12 +19559,12 @@ namespace mg5amcCpu // *** DIAGRAM 905 OF 1240 *** // Wavefunction(s) for diagram number 905 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 905 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19584,7 +19584,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19604,7 +19604,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19631,7 +19631,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 906 - VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19651,7 +19651,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19671,7 +19671,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19698,7 +19698,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 907 - VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19718,7 +19718,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19738,7 +19738,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19765,7 +19765,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 908 - VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19792,7 +19792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 909 - VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19819,7 +19819,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 910 - VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19846,7 +19846,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 911 - VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19865,7 +19865,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 912 - FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19880,7 +19880,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 913 - FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19893,7 +19893,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 914 - FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19908,7 +19908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 915 - FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19921,7 +19921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 916 - VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19940,7 +19940,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 917 - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19952,7 +19952,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19964,7 +19964,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19983,7 +19983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 918 - FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19998,7 +19998,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 919 - FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20011,7 +20011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 920 - FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20024,7 +20024,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 921 - VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20043,7 +20043,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 922 - FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20058,7 +20058,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 923 - FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20071,7 +20071,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 924 - FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20086,7 +20086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 925 - FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20099,7 +20099,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 926 - VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20118,7 +20118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 927 - FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20130,7 +20130,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20142,7 +20142,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20161,7 +20161,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 928 - FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20176,7 +20176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 929 - FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20189,7 +20189,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 930 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20202,7 +20202,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 931 - VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20221,7 +20221,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 932 - FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20236,7 +20236,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 933 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20249,7 +20249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 934 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20264,7 +20264,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 935 - FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20277,7 +20277,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 936 - VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20296,7 +20296,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 937 - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20308,7 +20308,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20320,7 +20320,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20339,7 +20339,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 938 - VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20358,7 +20358,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 939 - FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20373,7 +20373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 940 - FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20386,7 +20386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 941 - FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20401,7 +20401,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 942 - FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20414,7 +20414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 943 - VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20433,7 +20433,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 944 - FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20445,7 +20445,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20457,7 +20457,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20476,7 +20476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 945 - FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20495,7 +20495,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 946 - FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20514,7 +20514,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 947 - FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20533,7 +20533,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 948 - FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20552,7 +20552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 949 - FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20567,7 +20567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 950 - FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20579,10 +20579,10 @@ namespace mg5amcCpu // *** DIAGRAM 951 OF 1240 *** // Wavefunction(s) for diagram number 951 - VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 951 - VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20609,7 +20609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 952 - VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20636,7 +20636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 953 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20656,7 +20656,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20676,7 +20676,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20703,7 +20703,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 954 - VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20730,7 +20730,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 955 - VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20757,7 +20757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 956 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20777,7 +20777,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20797,7 +20797,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20824,7 +20824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 957 - VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20851,7 +20851,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 958 - VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20875,12 +20875,12 @@ namespace mg5amcCpu // *** DIAGRAM 959 OF 1240 *** // Wavefunction(s) for diagram number 959 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[94] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[65] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 959 - VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20900,7 +20900,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20920,7 +20920,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20944,12 +20944,12 @@ namespace mg5amcCpu // *** DIAGRAM 960 OF 1240 *** // Wavefunction(s) for diagram number 960 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[90] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[93] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[69] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 960 - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20969,7 +20969,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20989,7 +20989,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21016,7 +21016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 961 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21036,7 +21036,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21056,7 +21056,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21083,7 +21083,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 962 - VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21103,7 +21103,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21123,7 +21123,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21150,7 +21150,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 963 - VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21170,7 +21170,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21190,7 +21190,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21217,7 +21217,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 964 - VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21244,7 +21244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 965 - VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21271,7 +21271,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 966 - VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21298,7 +21298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 967 - VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21317,7 +21317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 968 - FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21332,7 +21332,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 969 - FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21345,7 +21345,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 970 - FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21360,7 +21360,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 971 - FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21373,7 +21373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 972 - VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21392,7 +21392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 973 - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21404,7 +21404,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21416,7 +21416,7 @@ namespace mg5amcCpu jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21435,7 +21435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 974 - FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21450,7 +21450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 975 - FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21463,7 +21463,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 976 - FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21476,7 +21476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 977 - VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21495,7 +21495,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 978 - FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21510,7 +21510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 979 - FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21523,7 +21523,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 980 - FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21538,7 +21538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 981 - FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21551,7 +21551,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 982 - VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21570,7 +21570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 983 - FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21582,7 +21582,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21594,7 +21594,7 @@ namespace mg5amcCpu jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21613,7 +21613,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 984 - FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21628,7 +21628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 985 - FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21641,7 +21641,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 986 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21654,7 +21654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 987 - VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21673,7 +21673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 988 - FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21688,7 +21688,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 989 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21701,7 +21701,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 990 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21716,7 +21716,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 991 - FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21729,7 +21729,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 992 - VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21748,7 +21748,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 993 - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21760,7 +21760,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21772,7 +21772,7 @@ namespace mg5amcCpu jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21791,7 +21791,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 994 - VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21810,7 +21810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 995 - FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21825,7 +21825,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 996 - FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21838,7 +21838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 997 - FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21853,7 +21853,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 998 - FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21866,7 +21866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 999 - VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21885,7 +21885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1000 - FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21897,7 +21897,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21909,7 +21909,7 @@ namespace mg5amcCpu jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21928,7 +21928,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1001 - FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21947,7 +21947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1002 - FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21966,7 +21966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1003 - FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21985,7 +21985,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1004 - FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22004,7 +22004,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1005 - FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22019,7 +22019,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1006 - FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22034,7 +22034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1007 - VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22061,7 +22061,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1008 - VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22088,7 +22088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1009 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22108,7 +22108,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22128,7 +22128,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22155,7 +22155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1010 - VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22182,7 +22182,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1011 - VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22209,7 +22209,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1012 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22229,7 +22229,7 @@ namespace mg5amcCpu jamp_sv[101] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22249,7 +22249,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22276,7 +22276,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1013 - VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22303,7 +22303,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1014 - VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22327,12 +22327,12 @@ namespace mg5amcCpu // *** DIAGRAM 1015 OF 1240 *** // Wavefunction(s) for diagram number 1015 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[11] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[76] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] ); // Amplitude(s) for diagram number 1015 - VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22352,7 +22352,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22372,7 +22372,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22396,12 +22396,12 @@ namespace mg5amcCpu // *** DIAGRAM 1016 OF 1240 *** // Wavefunction(s) for diagram number 1016 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[97] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1016 - VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22421,7 +22421,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22441,7 +22441,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22468,7 +22468,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1017 - VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22488,7 +22488,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22508,7 +22508,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22535,7 +22535,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1018 - VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22555,7 +22555,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22575,7 +22575,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22602,7 +22602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1019 - VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22629,7 +22629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1020 - VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22656,7 +22656,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1021 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22676,7 +22676,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22696,7 +22696,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22723,7 +22723,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1022 - VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22750,7 +22750,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1023 - VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22777,7 +22777,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1024 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22797,7 +22797,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22817,7 +22817,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22844,7 +22844,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1025 - VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22871,7 +22871,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1026 - VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22898,7 +22898,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1027 - VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22918,7 +22918,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22938,7 +22938,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22962,12 +22962,12 @@ namespace mg5amcCpu // *** DIAGRAM 1028 OF 1240 *** // Wavefunction(s) for diagram number 1028 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1028 - VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22987,7 +22987,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23007,7 +23007,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23034,7 +23034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1029 - VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23054,7 +23054,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23074,7 +23074,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23101,7 +23101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1030 - VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23121,7 +23121,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23141,7 +23141,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23168,7 +23168,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1031 - VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23195,7 +23195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1032 - VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23222,7 +23222,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1033 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23242,7 +23242,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23262,7 +23262,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23289,7 +23289,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1034 - VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23316,7 +23316,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1035 - VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23343,7 +23343,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1036 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23363,7 +23363,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23383,7 +23383,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23410,7 +23410,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1037 - VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23437,7 +23437,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1038 - VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23464,7 +23464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1039 - VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23484,7 +23484,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23504,7 +23504,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23528,12 +23528,12 @@ namespace mg5amcCpu // *** DIAGRAM 1040 OF 1240 *** // Wavefunction(s) for diagram number 1040 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[76] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[11] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 1040 - VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23553,7 +23553,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23573,7 +23573,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[90] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23600,7 +23600,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1041 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23620,7 +23620,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23640,7 +23640,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23667,7 +23667,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1042 - VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23687,7 +23687,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23707,7 +23707,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23734,7 +23734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1043 - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23754,7 +23754,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23774,7 +23774,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23794,7 +23794,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23814,7 +23814,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23834,7 +23834,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23854,7 +23854,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23874,7 +23874,7 @@ namespace mg5amcCpu jamp_sv[113] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23894,7 +23894,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23921,7 +23921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1044 - VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23941,7 +23941,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23961,7 +23961,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23988,7 +23988,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1045 - VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24008,7 +24008,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24028,7 +24028,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24055,7 +24055,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1046 - FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24067,7 +24067,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1047 - FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24079,7 +24079,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1048 - FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24091,7 +24091,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1049 - FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24103,7 +24103,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1050 - FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24115,7 +24115,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1051 - FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24127,7 +24127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1052 - FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24139,7 +24139,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1053 - FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24151,7 +24151,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1054 - FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24163,7 +24163,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1055 - FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24175,7 +24175,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1056 - FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24187,7 +24187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1057 - FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24199,7 +24199,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1058 - FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24214,7 +24214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1059 - FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24227,7 +24227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1060 - FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24242,7 +24242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1061 - VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24261,7 +24261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1062 - FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24274,7 +24274,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1063 - VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24293,7 +24293,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1064 - FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24305,7 +24305,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24317,7 +24317,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24336,7 +24336,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1065 - FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24348,7 +24348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1066 - FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24360,7 +24360,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1067 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24372,7 +24372,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1068 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24384,7 +24384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1069 - FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24396,7 +24396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1070 - FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24408,7 +24408,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1071 - FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24420,7 +24420,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1072 - FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24432,7 +24432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1073 - FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24444,7 +24444,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1074 - FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24456,7 +24456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1075 - FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24468,7 +24468,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1076 - FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24480,7 +24480,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1077 - FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24495,7 +24495,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1078 - FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24508,7 +24508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1079 - FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24523,7 +24523,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1080 - VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24542,7 +24542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1081 - FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24555,7 +24555,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1082 - VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24574,7 +24574,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1083 - FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24586,7 +24586,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24598,7 +24598,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24617,7 +24617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1084 - FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24629,7 +24629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1085 - FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24641,7 +24641,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1086 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24653,7 +24653,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1087 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24665,7 +24665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1088 - FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24677,7 +24677,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1089 - FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24689,7 +24689,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1090 - FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24701,7 +24701,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1091 - FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24713,7 +24713,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1092 - FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24725,7 +24725,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1093 - FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24737,7 +24737,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1094 - FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24749,7 +24749,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1095 - FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24761,7 +24761,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1096 - FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24776,7 +24776,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1097 - FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24789,7 +24789,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1098 - FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24804,7 +24804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1099 - VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24823,7 +24823,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1100 - FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24836,7 +24836,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1101 - VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24855,7 +24855,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1102 - FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24867,7 +24867,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24879,7 +24879,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24898,7 +24898,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1103 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24913,7 +24913,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1104 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24926,7 +24926,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1105 - FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24941,7 +24941,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1106 - VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24960,7 +24960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1107 - FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24973,7 +24973,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1108 - VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24992,7 +24992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1109 - FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25004,7 +25004,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25016,7 +25016,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25035,7 +25035,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1110 - FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25050,7 +25050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1111 - FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25063,7 +25063,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1112 - FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25078,7 +25078,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1113 - VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25097,7 +25097,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1114 - FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25110,7 +25110,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1115 - VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25129,7 +25129,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1116 - FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25141,7 +25141,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25153,7 +25153,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25172,7 +25172,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1117 - FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25187,7 +25187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1118 - FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25200,7 +25200,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1119 - FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25215,7 +25215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1120 - VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25234,7 +25234,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1121 - FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25247,7 +25247,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1122 - VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25266,7 +25266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1123 - FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25278,7 +25278,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25290,7 +25290,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25306,12 +25306,12 @@ namespace mg5amcCpu // *** DIAGRAM 1124 OF 1240 *** // Wavefunction(s) for diagram number 1124 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[97] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] ); // Amplitude(s) for diagram number 1124 - VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25331,7 +25331,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25351,7 +25351,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25371,7 +25371,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25391,7 +25391,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25411,7 +25411,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25431,7 +25431,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25451,7 +25451,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25471,7 +25471,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25495,12 +25495,12 @@ namespace mg5amcCpu // *** DIAGRAM 1125 OF 1240 *** // Wavefunction(s) for diagram number 1125 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); - VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] ); // Amplitude(s) for diagram number 1125 - VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25520,7 +25520,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25540,7 +25540,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25564,12 +25564,12 @@ namespace mg5amcCpu // *** DIAGRAM 1126 OF 1240 *** // Wavefunction(s) for diagram number 1126 - VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 0., 0., w_fp[111] ); + VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1126 - VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25589,7 +25589,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25609,7 +25609,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25636,7 +25636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1127 - VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25656,7 +25656,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25676,7 +25676,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25700,12 +25700,12 @@ namespace mg5amcCpu // *** DIAGRAM 1128 OF 1240 *** // Wavefunction(s) for diagram number 1128 - FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); - FFV1_2( w_fp[3], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); // Amplitude(s) for diagram number 1128 - FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25713,7 +25713,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25721,7 +25721,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25736,7 +25736,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1129 - FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25748,7 +25748,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25760,7 +25760,7 @@ namespace mg5amcCpu jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25779,7 +25779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1130 - FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25787,7 +25787,7 @@ namespace mg5amcCpu jamp_sv[74] -= amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25795,7 +25795,7 @@ namespace mg5amcCpu jamp_sv[78] += amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25810,7 +25810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1131 - FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25818,7 +25818,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25826,7 +25826,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25841,7 +25841,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1132 - FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25853,7 +25853,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25865,7 +25865,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25884,7 +25884,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1133 - FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25892,7 +25892,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25900,7 +25900,7 @@ namespace mg5amcCpu jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25912,12 +25912,12 @@ namespace mg5amcCpu // *** DIAGRAM 1134 OF 1240 *** // Wavefunction(s) for diagram number 1134 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); - FFV1_1( w_fp[2], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 1134 - FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25925,7 +25925,7 @@ namespace mg5amcCpu jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25933,7 +25933,7 @@ namespace mg5amcCpu jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[49] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25948,7 +25948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1135 - FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25960,7 +25960,7 @@ namespace mg5amcCpu jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25972,7 +25972,7 @@ namespace mg5amcCpu jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25991,7 +25991,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1136 - FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25999,7 +25999,7 @@ namespace mg5amcCpu jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26007,7 +26007,7 @@ namespace mg5amcCpu jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[48] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26022,7 +26022,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1137 - FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26034,7 +26034,7 @@ namespace mg5amcCpu jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26046,7 +26046,7 @@ namespace mg5amcCpu jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26065,7 +26065,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1138 - FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26077,7 +26077,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26089,7 +26089,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26108,7 +26108,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1139 - FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26120,7 +26120,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26132,7 +26132,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26148,12 +26148,12 @@ namespace mg5amcCpu // *** DIAGRAM 1140 OF 1240 *** // Wavefunction(s) for diagram number 1140 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[68] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[29] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[10] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1140 - VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26173,7 +26173,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26193,7 +26193,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26213,7 +26213,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26233,7 +26233,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26253,7 +26253,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26273,7 +26273,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26293,7 +26293,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26313,7 +26313,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26337,12 +26337,12 @@ namespace mg5amcCpu // *** DIAGRAM 1141 OF 1240 *** // Wavefunction(s) for diagram number 1141 - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[16] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[71] ); - VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1141 - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26362,7 +26362,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26382,7 +26382,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26406,12 +26406,12 @@ namespace mg5amcCpu // *** DIAGRAM 1142 OF 1240 *** // Wavefunction(s) for diagram number 1142 - VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 1142 - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26431,7 +26431,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26451,7 +26451,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26478,7 +26478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1143 - VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26498,7 +26498,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26518,7 +26518,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26542,12 +26542,12 @@ namespace mg5amcCpu // *** DIAGRAM 1144 OF 1240 *** // Wavefunction(s) for diagram number 1144 - FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[111] ); - FFV1_2( w_fp[3], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] ); + FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1144 - FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26555,7 +26555,7 @@ namespace mg5amcCpu jamp_sv[67] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26563,7 +26563,7 @@ namespace mg5amcCpu jamp_sv[68] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26578,7 +26578,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1145 - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26590,7 +26590,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26602,7 +26602,7 @@ namespace mg5amcCpu jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26621,7 +26621,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1146 - FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26629,7 +26629,7 @@ namespace mg5amcCpu jamp_sv[50] -= amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26637,7 +26637,7 @@ namespace mg5amcCpu jamp_sv[54] += amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[60] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26652,7 +26652,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1147 - FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26660,7 +26660,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26668,7 +26668,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26683,7 +26683,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1148 - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26695,7 +26695,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26707,7 +26707,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26726,7 +26726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1149 - FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26734,7 +26734,7 @@ namespace mg5amcCpu jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26742,7 +26742,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26754,12 +26754,12 @@ namespace mg5amcCpu // *** DIAGRAM 1150 OF 1240 *** // Wavefunction(s) for diagram number 1150 - FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); // Amplitude(s) for diagram number 1150 - FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26767,7 +26767,7 @@ namespace mg5amcCpu jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26775,7 +26775,7 @@ namespace mg5amcCpu jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[73] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26790,7 +26790,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1151 - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26802,7 +26802,7 @@ namespace mg5amcCpu jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26814,7 +26814,7 @@ namespace mg5amcCpu jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26833,7 +26833,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1152 - FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26841,7 +26841,7 @@ namespace mg5amcCpu jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26849,7 +26849,7 @@ namespace mg5amcCpu jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[72] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26864,7 +26864,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1153 - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26876,7 +26876,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26888,7 +26888,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26907,7 +26907,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1154 - FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26919,7 +26919,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26931,7 +26931,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26950,7 +26950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1155 - FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26962,7 +26962,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26974,7 +26974,7 @@ namespace mg5amcCpu jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26990,12 +26990,12 @@ namespace mg5amcCpu // *** DIAGRAM 1156 OF 1240 *** // Wavefunction(s) for diagram number 1156 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[27] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1156 - VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27015,7 +27015,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27035,7 +27035,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27055,7 +27055,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27075,7 +27075,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27095,7 +27095,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27115,7 +27115,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27135,7 +27135,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27155,7 +27155,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27179,12 +27179,12 @@ namespace mg5amcCpu // *** DIAGRAM 1157 OF 1240 *** // Wavefunction(s) for diagram number 1157 - VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 0., 0., w_fp[29] ); - VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1157 - VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27204,7 +27204,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27224,7 +27224,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27248,12 +27248,12 @@ namespace mg5amcCpu // *** DIAGRAM 1158 OF 1240 *** // Wavefunction(s) for diagram number 1158 - VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1158 - VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27273,7 +27273,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27293,7 +27293,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27320,7 +27320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1159 - VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27340,7 +27340,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27360,7 +27360,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27384,12 +27384,12 @@ namespace mg5amcCpu // *** DIAGRAM 1160 OF 1240 *** // Wavefunction(s) for diagram number 1160 - FFV1_2( w_fp[3], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); - FFV1_2( w_fp[3], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 1160 - FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27397,7 +27397,7 @@ namespace mg5amcCpu jamp_sv[61] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[65] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27405,7 +27405,7 @@ namespace mg5amcCpu jamp_sv[62] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[64] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27420,7 +27420,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1161 - FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27432,7 +27432,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27444,7 +27444,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27463,7 +27463,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1162 - FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27471,7 +27471,7 @@ namespace mg5amcCpu jamp_sv[52] -= amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27479,7 +27479,7 @@ namespace mg5amcCpu jamp_sv[55] += amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[66] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27494,7 +27494,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1163 - FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27502,7 +27502,7 @@ namespace mg5amcCpu jamp_sv[85] -= amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27510,7 +27510,7 @@ namespace mg5amcCpu jamp_sv[86] += amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27525,7 +27525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1164 - FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27537,7 +27537,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27549,7 +27549,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27568,7 +27568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1165 - FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27576,7 +27576,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27584,7 +27584,7 @@ namespace mg5amcCpu jamp_sv[79] += amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[90] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27596,12 +27596,12 @@ namespace mg5amcCpu // *** DIAGRAM 1166 OF 1240 *** // Wavefunction(s) for diagram number 1166 - FFV1_1( w_fp[2], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); - FFV1_1( w_fp[2], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1166 - FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27609,7 +27609,7 @@ namespace mg5amcCpu jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27617,7 +27617,7 @@ namespace mg5amcCpu jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27632,7 +27632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1167 - FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27644,7 +27644,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27656,7 +27656,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27675,7 +27675,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1168 - FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27683,7 +27683,7 @@ namespace mg5amcCpu jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27691,7 +27691,7 @@ namespace mg5amcCpu jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27706,7 +27706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1169 - FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27718,7 +27718,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27730,7 +27730,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27749,7 +27749,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1170 - FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27761,7 +27761,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27773,7 +27773,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27792,7 +27792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1171 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27804,7 +27804,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27816,7 +27816,7 @@ namespace mg5amcCpu jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27832,15 +27832,15 @@ namespace mg5amcCpu // *** DIAGRAM 1172 OF 1240 *** // Wavefunction(s) for diagram number 1172 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[60] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1172 - FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27848,7 +27848,7 @@ namespace mg5amcCpu jamp_sv[43] -= amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27856,7 +27856,7 @@ namespace mg5amcCpu jamp_sv[44] += amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27868,12 +27868,12 @@ namespace mg5amcCpu // *** DIAGRAM 1173 OF 1240 *** // Wavefunction(s) for diagram number 1173 - VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[68] ); - VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 1173 - FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27885,7 +27885,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27897,7 +27897,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27916,7 +27916,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1174 - FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27924,7 +27924,7 @@ namespace mg5amcCpu jamp_sv[26] -= amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27932,7 +27932,7 @@ namespace mg5amcCpu jamp_sv[30] += amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[36] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27944,12 +27944,12 @@ namespace mg5amcCpu // *** DIAGRAM 1175 OF 1240 *** // Wavefunction(s) for diagram number 1175 - FFV1_1( w_fp[2], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); - FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); - FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 1175 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27957,7 +27957,7 @@ namespace mg5amcCpu jamp_sv[15] -= amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27965,7 +27965,7 @@ namespace mg5amcCpu jamp_sv[51] += amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[75] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27980,7 +27980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1176 - FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27992,7 +27992,7 @@ namespace mg5amcCpu jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28004,7 +28004,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28023,7 +28023,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1177 - FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28031,7 +28031,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28039,7 +28039,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28054,7 +28054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1178 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28066,7 +28066,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28078,7 +28078,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28097,7 +28097,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1179 - FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28109,7 +28109,7 @@ namespace mg5amcCpu jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28121,7 +28121,7 @@ namespace mg5amcCpu jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28140,7 +28140,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1180 - VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28160,7 +28160,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28180,7 +28180,7 @@ namespace mg5amcCpu jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28207,7 +28207,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1181 - VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28227,7 +28227,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28247,7 +28247,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28267,7 +28267,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28287,7 +28287,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28307,7 +28307,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28327,7 +28327,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28347,7 +28347,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28367,7 +28367,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28391,12 +28391,12 @@ namespace mg5amcCpu // *** DIAGRAM 1182 OF 1240 *** // Wavefunction(s) for diagram number 1182 - VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1182 - VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28416,7 +28416,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28436,7 +28436,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28463,7 +28463,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1183 - VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28483,7 +28483,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28503,7 +28503,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28530,7 +28530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1184 - FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28542,7 +28542,7 @@ namespace mg5amcCpu jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28554,7 +28554,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28573,7 +28573,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1185 - FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28581,7 +28581,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28589,7 +28589,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28604,7 +28604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1186 - FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28616,7 +28616,7 @@ namespace mg5amcCpu jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28628,7 +28628,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28647,7 +28647,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1187 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28655,7 +28655,7 @@ namespace mg5amcCpu jamp_sv[14] -= amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28663,7 +28663,7 @@ namespace mg5amcCpu jamp_sv[50] += amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28675,15 +28675,15 @@ namespace mg5amcCpu // *** DIAGRAM 1188 OF 1240 *** // Wavefunction(s) for diagram number 1188 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[59] ); - FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); - FFV1_2( w_fp[3], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 1188 - FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28691,7 +28691,7 @@ namespace mg5amcCpu jamp_sv[37] -= amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[41] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28699,7 +28699,7 @@ namespace mg5amcCpu jamp_sv[38] += amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[40] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28711,12 +28711,12 @@ namespace mg5amcCpu // *** DIAGRAM 1189 OF 1240 *** // Wavefunction(s) for diagram number 1189 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1189 - FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28728,7 +28728,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28740,7 +28740,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28759,7 +28759,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1190 - FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28767,7 +28767,7 @@ namespace mg5amcCpu jamp_sv[28] -= amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28775,7 +28775,7 @@ namespace mg5amcCpu jamp_sv[31] += amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[42] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28787,12 +28787,12 @@ namespace mg5amcCpu // *** DIAGRAM 1191 OF 1240 *** // Wavefunction(s) for diagram number 1191 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 1191 - FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28800,7 +28800,7 @@ namespace mg5amcCpu jamp_sv[21] -= amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28808,7 +28808,7 @@ namespace mg5amcCpu jamp_sv[53] += amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28823,7 +28823,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1192 - FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28835,7 +28835,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28847,7 +28847,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28866,7 +28866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1193 - FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28874,7 +28874,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28882,7 +28882,7 @@ namespace mg5amcCpu jamp_sv[85] += amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28897,7 +28897,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1194 - FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28909,7 +28909,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28921,7 +28921,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28940,7 +28940,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1195 - FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28952,7 +28952,7 @@ namespace mg5amcCpu jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28964,7 +28964,7 @@ namespace mg5amcCpu jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28983,7 +28983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1196 - VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29003,7 +29003,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29023,7 +29023,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29050,7 +29050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1197 - VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29070,7 +29070,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29090,7 +29090,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29110,7 +29110,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29130,7 +29130,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29150,7 +29150,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29170,7 +29170,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29190,7 +29190,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29210,7 +29210,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29234,12 +29234,12 @@ namespace mg5amcCpu // *** DIAGRAM 1198 OF 1240 *** // Wavefunction(s) for diagram number 1198 - VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1198 - VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29259,7 +29259,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29279,7 +29279,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29306,7 +29306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1199 - VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29326,7 +29326,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29346,7 +29346,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29373,7 +29373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1200 - FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29385,7 +29385,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29397,7 +29397,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29416,7 +29416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1201 - FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29424,7 +29424,7 @@ namespace mg5amcCpu jamp_sv[79] -= amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29432,7 +29432,7 @@ namespace mg5amcCpu jamp_sv[80] += amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29447,7 +29447,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1202 - FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29459,7 +29459,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29471,7 +29471,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29490,7 +29490,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1203 - FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29498,7 +29498,7 @@ namespace mg5amcCpu jamp_sv[20] -= amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29506,7 +29506,7 @@ namespace mg5amcCpu jamp_sv[52] += amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29518,15 +29518,15 @@ namespace mg5amcCpu // *** DIAGRAM 1204 OF 1240 *** // Wavefunction(s) for diagram number 1204 - VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); - VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[68] ); - VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[29] ); - FFV1_2( w_fp[3], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); - FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); + VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); + VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] ); + VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] ); + FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 1204 - FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29534,7 +29534,7 @@ namespace mg5amcCpu jamp_sv[31] -= amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[35] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29542,7 +29542,7 @@ namespace mg5amcCpu jamp_sv[32] += amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[34] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29554,12 +29554,12 @@ namespace mg5amcCpu // *** DIAGRAM 1205 OF 1240 *** // Wavefunction(s) for diagram number 1205 - VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1205 - FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29571,7 +29571,7 @@ namespace mg5amcCpu jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29583,7 +29583,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29602,7 +29602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1206 - FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29610,7 +29610,7 @@ namespace mg5amcCpu jamp_sv[29] -= amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29618,7 +29618,7 @@ namespace mg5amcCpu jamp_sv[37] += amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[43] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29630,12 +29630,12 @@ namespace mg5amcCpu // *** DIAGRAM 1207 OF 1240 *** // Wavefunction(s) for diagram number 1207 - FFV1_1( w_fp[2], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); - FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1207 - FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29643,7 +29643,7 @@ namespace mg5amcCpu jamp_sv[23] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29651,7 +29651,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29666,7 +29666,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1208 - FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29678,7 +29678,7 @@ namespace mg5amcCpu jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29690,7 +29690,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29709,7 +29709,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1209 - FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29717,7 +29717,7 @@ namespace mg5amcCpu jamp_sv[53] -= amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29725,7 +29725,7 @@ namespace mg5amcCpu jamp_sv[61] += amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[67] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29740,7 +29740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1210 - FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29752,7 +29752,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29764,7 +29764,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29783,7 +29783,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1211 - FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29795,7 +29795,7 @@ namespace mg5amcCpu jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29807,7 +29807,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29826,7 +29826,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1212 - VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29846,7 +29846,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29866,7 +29866,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29893,7 +29893,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1213 - VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29913,7 +29913,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29933,7 +29933,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29953,7 +29953,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29973,7 +29973,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29993,7 +29993,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30013,7 +30013,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30033,7 +30033,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30053,7 +30053,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30077,12 +30077,12 @@ namespace mg5amcCpu // *** DIAGRAM 1214 OF 1240 *** // Wavefunction(s) for diagram number 1214 - VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 0., 0., w_fp[61] ); - VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] ); + VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1214 - VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30102,7 +30102,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30122,7 +30122,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30149,7 +30149,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1215 - VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30169,7 +30169,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30189,7 +30189,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30216,7 +30216,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1216 - FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30228,7 +30228,7 @@ namespace mg5amcCpu jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30240,7 +30240,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30259,7 +30259,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1217 - FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30267,7 +30267,7 @@ namespace mg5amcCpu jamp_sv[55] -= amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30275,7 +30275,7 @@ namespace mg5amcCpu jamp_sv[56] += amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30290,7 +30290,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1218 - FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30302,7 +30302,7 @@ namespace mg5amcCpu jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30314,7 +30314,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30333,7 +30333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1219 - FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30341,7 +30341,7 @@ namespace mg5amcCpu jamp_sv[22] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30349,7 +30349,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30364,7 +30364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1220 - VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30384,7 +30384,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30404,7 +30404,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30424,7 +30424,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30444,7 +30444,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30464,7 +30464,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30484,7 +30484,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30504,7 +30504,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30524,7 +30524,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30548,12 +30548,12 @@ namespace mg5amcCpu // *** DIAGRAM 1221 OF 1240 *** // Wavefunction(s) for diagram number 1221 - VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 0., 0., w_fp[1] ); - VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] ); + VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1221 - VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30573,7 +30573,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30593,7 +30593,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30620,7 +30620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1222 - VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30640,7 +30640,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30660,7 +30660,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30687,7 +30687,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1223 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30699,7 +30699,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30711,7 +30711,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30730,7 +30730,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1224 - FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30738,7 +30738,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30746,7 +30746,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30761,7 +30761,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1225 - FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30773,7 +30773,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30785,7 +30785,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30804,7 +30804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1226 - FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30812,7 +30812,7 @@ namespace mg5amcCpu jamp_sv[38] -= amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30820,7 +30820,7 @@ namespace mg5amcCpu jamp_sv[56] += amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30835,7 +30835,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1227 - VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30855,7 +30855,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30875,7 +30875,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30895,7 +30895,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30915,7 +30915,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30935,7 +30935,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30955,7 +30955,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30975,7 +30975,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30995,7 +30995,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31019,12 +31019,12 @@ namespace mg5amcCpu // *** DIAGRAM 1228 OF 1240 *** // Wavefunction(s) for diagram number 1228 - VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 0., 0., w_fp[80] ); - VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 0., 0., w_fp[79] ); + VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] ); + VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] ); // Amplitude(s) for diagram number 1228 - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31044,7 +31044,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31064,7 +31064,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31091,7 +31091,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1229 - VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31111,7 +31111,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31131,7 +31131,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31158,7 +31158,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1230 - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31170,7 +31170,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31182,7 +31182,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31201,7 +31201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1231 - FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31209,7 +31209,7 @@ namespace mg5amcCpu jamp_sv[73] -= amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31217,7 +31217,7 @@ namespace mg5amcCpu jamp_sv[74] += amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31232,7 +31232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1232 - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31244,7 +31244,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31256,7 +31256,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31275,7 +31275,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1233 - FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31283,7 +31283,7 @@ namespace mg5amcCpu jamp_sv[44] -= amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31291,7 +31291,7 @@ namespace mg5amcCpu jamp_sv[58] += amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31306,7 +31306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1234 - VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31326,7 +31326,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31346,7 +31346,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31366,7 +31366,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31386,7 +31386,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31406,7 +31406,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31426,7 +31426,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31446,7 +31446,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31466,7 +31466,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[107] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31490,12 +31490,12 @@ namespace mg5amcCpu // *** DIAGRAM 1235 OF 1240 *** // Wavefunction(s) for diagram number 1235 - VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 0., 0., w_fp[104] ); - VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 0., 0., w_fp[82] ); - VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 0., 0., w_fp[81] ); + VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] ); + VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] ); // Amplitude(s) for diagram number 1235 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31515,7 +31515,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31535,7 +31535,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31562,7 +31562,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1236 - VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31582,7 +31582,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31602,7 +31602,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31629,7 +31629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1237 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31641,7 +31641,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31653,7 +31653,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31672,7 +31672,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1238 - FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31680,7 +31680,7 @@ namespace mg5amcCpu jamp_sv[49] -= amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31688,7 +31688,7 @@ namespace mg5amcCpu jamp_sv[50] += amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31703,7 +31703,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1239 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31715,7 +31715,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31727,7 +31727,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31746,7 +31746,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1240 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31754,7 +31754,7 @@ namespace mg5amcCpu jamp_sv[46] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31762,7 +31762,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif diff --git a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h index 9cea8bcbe7..9b946c21e1 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -934,6 +940,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -945,6 +952,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -970,6 +979,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -984,6 +994,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -995,6 +1006,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1008,6 +1020,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1042,6 +1055,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1080,6 +1094,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1103,6 +1118,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1134,6 +1150,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1165,6 +1182,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1197,6 +1215,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1225,6 +1244,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1260,6 +1280,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1288,6 +1309,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1323,6 +1345,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1351,6 +1374,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index ef4699c04f..53619cb35b 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0054738521575927734  +DEBUG: model prefixing takes 0.005560159683227539  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.076 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -196,12 +196,12 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  @@ -210,12 +210,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  @@ -223,17 +223,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s Wrote files for 32 helas calls in 0.157 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.142 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.129 s +ALOHA: aloha creates 4 routines in 0.133 s FFV1 FFV1 FFV1 @@ -357,6 +357,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.537s -user 0m2.168s -sys 0m0.315s +real 0m2.611s +user 0m2.179s +sys 0m0.312s diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 7aeea1667e..c526dd6b31 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -251,11 +251,11 @@ namespace mg5amcCpu oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index ec65d2ccae..8d92e4e769 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -251,11 +251,11 @@ namespace mg5amcCpu ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); - FFV1_2( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h index 901400d447..0dd5f20f71 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -885,6 +887,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -897,6 +900,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -910,6 +914,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //========================================================================== @@ -921,6 +926,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -944,6 +950,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -975,6 +982,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1006,6 +1014,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1037,6 +1046,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 7f29ac4333..aa777faeb1 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052490234375  +DEBUG: model prefixing takes 0.005236387252807617  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -196,7 +196,7 @@ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  DEBUG: type(subproc_group)= [output.py at line 190]  @@ -205,14 +205,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.147 s +ALOHA: aloha creates 2 routines in 0.141 s FFV1 FFV1 FFV1 @@ -229,6 +229,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.659s -user 0m0.603s -sys 0m0.042s +real 0m0.650s +user 0m0.592s +sys 0m0.050s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc index 184b12db36..037662f7db 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc @@ -251,11 +251,11 @@ namespace mg5amcCpu oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -265,11 +265,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -292,10 +292,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -308,7 +308,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc index a641bc4240..12179b9801 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc @@ -251,11 +251,11 @@ namespace mg5amcCpu ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); - FFV1_2( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -265,11 +265,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -292,10 +292,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -308,7 +308,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif diff --git a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h index 901400d447..0dd5f20f71 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -885,6 +887,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -897,6 +900,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -910,6 +914,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //========================================================================== @@ -921,6 +926,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -944,6 +950,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -975,6 +982,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1006,6 +1014,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1037,6 +1046,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 28c0d86a0b..7824240731 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -146,7 +146,7 @@ INFO: Processing color information for process: g g > h HIG<=1 HIW<=1 @1 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  @@ -165,6 +165,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.462s -user 0m0.381s -sys 0m0.050s +real 0m0.431s +user 0m0.365s +sys 0m0.056s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc index 6c47c93912..6cc0be1461 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc @@ -245,7 +245,7 @@ namespace mg5amcCpu sxxxxx( momenta, +1, w_fp[2], 2 ); // Amplitude(s) for diagram number 1 - VVS3_0( w_fp[0], w_fp[1], w_fp[2], COUPs[0], &_fp[0] ); + VVS3_0( w_fp[0], w_fp[1], w_fp[2], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif diff --git a/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h index d35dba2369..a2e9b6a70c 100644 --- a/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h +++ b/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allS3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //========================================================================== @@ -874,6 +875,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allS3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 85441a5025..416dbc561d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005836009979248047  +DEBUG: model prefixing takes 0.005372524261474609  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.031 s +5 processes with 7 diagrams generated in 0.029 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.141 s +13 processes with 76 diagrams generated in 0.133 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.806 s +65 processes with 1119 diagrams generated in 1.789 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -496,12 +496,12 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  @@ -510,12 +510,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  @@ -524,12 +524,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  @@ -538,12 +538,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  @@ -552,12 +552,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  @@ -566,12 +566,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  @@ -580,12 +580,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  @@ -594,12 +594,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  @@ -608,12 +608,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  @@ -622,12 +622,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  @@ -636,12 +636,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  @@ -650,12 +650,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  @@ -664,12 +664,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  @@ -678,12 +678,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  @@ -692,12 +692,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  @@ -706,12 +706,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  @@ -720,12 +720,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  @@ -734,12 +734,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1161]  +DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1] [export_cpp.py at line 711]  @@ -747,8 +747,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: Done [export_cpp.py at line 713]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.266 s -Wrote files for 810 helas calls in 3.032 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.258 s +Wrote files for 810 helas calls in 2.693 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -763,7 +763,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.307 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -1074,6 +1074,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.245s -user 0m8.325s -sys 0m0.546s +real 0m8.831s +user 0m8.246s +sys 0m0.545s diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc index f3892b05e4..0317bbc95a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -260,10 +260,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc index 3d4424d157..75110e8fec 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc @@ -249,10 +249,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[4] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 62e8d65a7d..f7f5899260 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -265,10 +265,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -293,11 +293,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -307,10 +307,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -324,7 +324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -334,11 +334,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,7 +351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,7 +365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -375,10 +375,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -392,7 +392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -406,7 +406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -435,7 +435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -448,7 +448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -461,22 +461,22 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 91ca3b410f..90a457ac40 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -251,11 +251,11 @@ namespace mg5amcCpu oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index fc53543eb6..9a73b3ed94 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -251,11 +251,11 @@ namespace mg5amcCpu ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); - FFV1_2( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc index 4951b43b8d..dc1a3e9d26 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc @@ -251,11 +251,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[0], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[4], w_fp[7], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc index 7ea6145d0a..cbc45ff652 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc @@ -250,11 +250,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 1 - VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -263,7 +263,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -272,7 +272,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -285,10 +285,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 123 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -305,10 +305,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 123 *** // Wavefunction(s) for diagram number 3 - VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 123 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -345,11 +345,11 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 123 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -363,7 +363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -376,10 +376,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 123 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -390,10 +390,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 123 *** // Wavefunction(s) for diagram number 8 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,7 +407,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -420,10 +420,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 123 *** // Wavefunction(s) for diagram number 10 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -434,10 +434,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 123 *** // Wavefunction(s) for diagram number 11 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -451,7 +451,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -481,7 +481,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -497,7 +497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -513,7 +513,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -526,12 +526,12 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 123 *** // Wavefunction(s) for diagram number 17 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -541,10 +541,10 @@ namespace mg5amcCpu // *** DIAGRAM 18 OF 123 *** // Wavefunction(s) for diagram number 18 - FFV1_1( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -557,7 +557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -568,11 +568,11 @@ namespace mg5amcCpu // *** DIAGRAM 20 OF 123 *** // Wavefunction(s) for diagram number 20 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -588,7 +588,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -602,7 +602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -613,10 +613,10 @@ namespace mg5amcCpu // *** DIAGRAM 23 OF 123 *** // Wavefunction(s) for diagram number 23 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[18] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] ); // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -632,7 +632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -646,7 +646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -657,10 +657,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 123 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[19] ); + FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -673,7 +673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -686,7 +686,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -699,7 +699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -712,7 +712,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -726,7 +726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -739,22 +739,22 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 123 *** // Wavefunction(s) for diagram number 32 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[8] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -763,12 +763,12 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 123 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -778,10 +778,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 123 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,7 +794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -805,10 +805,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 123 *** // Wavefunction(s) for diagram number 36 - FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 0., 0., w_fp[22] ); + FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -824,7 +824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -838,7 +838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -852,7 +852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -868,7 +868,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -882,7 +882,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -893,10 +893,10 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 123 *** // Wavefunction(s) for diagram number 42 - FFV1_2( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -909,7 +909,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -922,7 +922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 44 - FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -935,7 +935,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -948,7 +948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -962,7 +962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -978,17 +978,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -997,11 +997,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 123 *** // Wavefunction(s) for diagram number 49 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[12] ); - FFV1_2( w_fp[3], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] ); + FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1012,10 +1012,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 123 *** // Wavefunction(s) for diagram number 50 - VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1031,7 +1031,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1042,10 +1042,10 @@ namespace mg5amcCpu // *** DIAGRAM 52 OF 123 *** // Wavefunction(s) for diagram number 52 - FFV1_1( w_fp[2], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1059,7 +1059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1075,7 +1075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1089,7 +1089,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1105,7 +1105,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1121,7 +1121,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1141,7 +1141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 58 - VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1150,7 +1150,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1159,7 +1159,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1172,10 +1172,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 123 *** // Wavefunction(s) for diagram number 59 - VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 59 - VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1195,7 +1195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1215,7 +1215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1231,7 +1231,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1245,7 +1245,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1261,7 +1261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1272,11 +1272,11 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 123 *** // Wavefunction(s) for diagram number 65 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1287,10 +1287,10 @@ namespace mg5amcCpu // *** DIAGRAM 66 OF 123 *** // Wavefunction(s) for diagram number 66 - VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1306,7 +1306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1317,10 +1317,10 @@ namespace mg5amcCpu // *** DIAGRAM 68 OF 123 *** // Wavefunction(s) for diagram number 68 - FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1334,7 +1334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1350,7 +1350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1364,7 +1364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1380,7 +1380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1396,7 +1396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1416,7 +1416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 74 - VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1425,7 +1425,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1434,7 +1434,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1447,10 +1447,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 123 *** // Wavefunction(s) for diagram number 75 - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 75 - VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1470,7 +1470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1490,7 +1490,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1506,7 +1506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1520,7 +1520,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1536,7 +1536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 80 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1547,10 +1547,10 @@ namespace mg5amcCpu // *** DIAGRAM 81 OF 123 *** // Wavefunction(s) for diagram number 81 - FFV1_1( w_fp[9], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1560,10 +1560,10 @@ namespace mg5amcCpu // *** DIAGRAM 82 OF 123 *** // Wavefunction(s) for diagram number 82 - FFV1_2( w_fp[15], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1576,7 +1576,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1586,10 +1586,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 123 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1602,7 +1602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1613,10 +1613,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 123 *** // Wavefunction(s) for diagram number 86 - VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1629,10 +1629,10 @@ namespace mg5amcCpu // *** DIAGRAM 87 OF 123 *** // Wavefunction(s) for diagram number 87 - FFV1_2( w_fp[16], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1642,10 +1642,10 @@ namespace mg5amcCpu // *** DIAGRAM 88 OF 123 *** // Wavefunction(s) for diagram number 88 - FFV1_1( w_fp[11], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 88 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1658,7 +1658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1668,10 +1668,10 @@ namespace mg5amcCpu // *** DIAGRAM 90 OF 123 *** // Wavefunction(s) for diagram number 90 - FFV1_1( w_fp[14], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); + FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 90 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1684,7 +1684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1698,7 +1698,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1714,7 +1714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1723,7 +1723,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1732,7 +1732,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1745,10 +1745,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 123 *** // Wavefunction(s) for diagram number 94 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 94 - VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1765,10 +1765,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 123 *** // Wavefunction(s) for diagram number 95 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 95 - VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1788,7 +1788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 96 - FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1804,7 +1804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1818,7 +1818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1834,7 +1834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1848,7 +1848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1857,7 +1857,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1866,7 +1866,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1879,10 +1879,10 @@ namespace mg5amcCpu // *** DIAGRAM 101 OF 123 *** // Wavefunction(s) for diagram number 101 - VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 101 - VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1902,7 +1902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1922,7 +1922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1938,7 +1938,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1952,7 +1952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1968,7 +1968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1982,7 +1982,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1991,7 +1991,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2000,7 +2000,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2016,7 +2016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2036,7 +2036,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2056,7 +2056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2069,7 +2069,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2082,7 +2082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2095,7 +2095,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2105,12 +2105,12 @@ namespace mg5amcCpu // *** DIAGRAM 114 OF 123 *** // Wavefunction(s) for diagram number 114 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[12] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 114 - VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2119,7 +2119,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2128,7 +2128,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2144,17 +2144,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -2166,17 +2166,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -2185,12 +2185,12 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 123 *** // Wavefunction(s) for diagram number 117 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 117 - VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2199,7 +2199,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2208,7 +2208,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2224,17 +2224,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -2246,17 +2246,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -2265,22 +2265,22 @@ namespace mg5amcCpu // *** DIAGRAM 120 OF 123 *** // Wavefunction(s) for diagram number 120 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -2292,17 +2292,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -2314,7 +2314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2323,7 +2323,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2332,7 +2332,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2348,7 +2348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2357,7 +2357,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2366,7 +2366,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc index 4d62df6c3a..5723ed5665 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc @@ -253,12 +253,12 @@ namespace mg5amcCpu ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,11 +335,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -350,11 +350,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[4], w_fp[1], COUPs[1], 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,10 +365,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[12] ); + FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -379,10 +379,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -396,7 +396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[1], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,11 +407,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,10 +422,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -439,7 +439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[4], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[4], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -450,10 +450,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -478,11 +478,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -496,7 +496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -510,7 +510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -521,10 +521,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[1], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -538,7 +538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -549,11 +549,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 0., 0., w_fp[11] ); + FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -567,7 +567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -581,7 +581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -592,10 +592,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[1], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -609,7 +609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -620,10 +620,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -634,10 +634,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -648,10 +648,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -665,7 +665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -676,10 +676,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -690,10 +690,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -704,10 +704,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[0], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -721,7 +721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[4], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[4], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -735,17 +735,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += 1. / 2. * amp_sv[0]; jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[9] -= 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[6] -= 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[6] -= 1. / 2. * amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[1], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -773,7 +773,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc index 2307f25625..b8f74ecafe 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc @@ -253,12 +253,12 @@ namespace mg5amcCpu oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[1], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,11 +335,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -350,11 +350,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[1], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,10 +365,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[12] ); + FFV1_2( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[5], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[5], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -379,10 +379,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -396,7 +396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,11 +407,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,10 +422,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[1], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -439,7 +439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[5], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[5], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -450,10 +450,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -478,11 +478,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[1], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); + FFV1_1( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -496,7 +496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -510,7 +510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -521,10 +521,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[1], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -538,7 +538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -549,11 +549,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[5], COUPs[1], 0., 0., w_fp[11] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -567,7 +567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -581,7 +581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -592,10 +592,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -609,7 +609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -620,10 +620,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -634,10 +634,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -648,10 +648,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -665,7 +665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -676,10 +676,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[1], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -690,10 +690,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[1], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -704,10 +704,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[0], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -721,7 +721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -735,17 +735,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[5] += 1. / 2. * amp_sv[0]; jamp_sv[8] -= 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += 1. / 2. * amp_sv[0]; jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; jamp_sv[8] -= 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -773,7 +773,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc index 305ce2fd5e..2495941a73 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc @@ -253,12 +253,12 @@ namespace mg5amcCpu ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,11 +335,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -350,11 +350,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,10 +365,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[12] ); + FFV1_2( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -379,10 +379,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -396,7 +396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,11 +407,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,10 +422,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -439,7 +439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -450,10 +450,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -478,11 +478,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -496,7 +496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -510,7 +510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -521,10 +521,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -538,7 +538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -549,11 +549,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 0., 0., w_fp[11] ); + FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -567,7 +567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -581,7 +581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -592,10 +592,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -609,7 +609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -620,10 +620,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -634,10 +634,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -648,10 +648,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -665,7 +665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -676,10 +676,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -690,10 +690,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -704,10 +704,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[0], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -721,7 +721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -735,17 +735,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; jamp_sv[8] -= 1. / 2. * amp_sv[0]; jamp_sv[11] += 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= 1. / 2. * amp_sv[0]; jamp_sv[3] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; jamp_sv[11] += 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= 1. / 2. * amp_sv[0]; jamp_sv[3] -= 1. / 2. * amp_sv[0]; jamp_sv[4] += 1. / 2. * amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -773,7 +773,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc index 54ac36d31c..529477ff3e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc @@ -255,12 +255,12 @@ namespace mg5amcCpu oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -289,10 +289,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[5], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[1], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,10 +335,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc index 34ff4139ab..e54a24ea57 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc @@ -261,12 +261,12 @@ namespace mg5amcCpu ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -309,10 +309,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -341,10 +341,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -357,10 +357,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc index 9dd3b1764a..8638bbefa2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc @@ -253,12 +253,12 @@ namespace mg5amcCpu oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 14 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 14 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -301,10 +301,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 14 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -317,10 +317,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 14 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[1], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -333,12 +333,12 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 14 *** // Wavefunction(s) for diagram number 6 - FFV1P0_3( w_fp[0], w_fp[5], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); - FFV1_1( w_fp[2], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1P0_3( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 14 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -370,7 +370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -381,10 +381,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 14 *** // Wavefunction(s) for diagram number 9 - FFV1_2( w_fp[1], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -397,10 +397,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 14 *** // Wavefunction(s) for diagram number 10 - FFV1_1( w_fp[4], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_1( w_fp[4], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[1], w_fp[10], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -413,10 +413,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 14 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[0], w_fp[6], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[10], w_fp[5], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[5], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,10 +429,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 14 *** // Wavefunction(s) for diagram number 12 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[10], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -445,10 +445,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 14 *** // Wavefunction(s) for diagram number 13 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[6], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -464,7 +464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc index 8e57cf0896..c071cc6900 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc @@ -261,12 +261,12 @@ namespace mg5amcCpu ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -309,10 +309,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -341,10 +341,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -357,10 +357,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc index 13d360e5ad..2eb6b491fa 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc @@ -253,12 +253,12 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[0], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,11 +335,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -350,11 +350,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[0], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,10 +365,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[0], w_fp[5], COUPs[1], 0., 0., w_fp[12] ); + FFV1_2( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -379,10 +379,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -396,7 +396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[5], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,11 +407,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,10 +422,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[0], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -439,7 +439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -450,10 +450,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[5], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -478,11 +478,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[0], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); + FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -496,7 +496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -510,7 +510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -521,10 +521,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[5], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[0], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -538,7 +538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[5], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -549,11 +549,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 0., 0., w_fp[11] ); + FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -567,7 +567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -581,7 +581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -592,10 +592,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[5], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -609,7 +609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -620,10 +620,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -634,10 +634,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[4], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[4], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -648,10 +648,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -665,7 +665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -676,10 +676,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[0], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -690,10 +690,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[4], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[4], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -704,10 +704,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[4], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -721,7 +721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -735,17 +735,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[3] += 1. / 2. * amp_sv[0]; jamp_sv[4] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[9] += 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= 1. / 2. * amp_sv[0]; jamp_sv[4] -= 1. / 2. * amp_sv[0]; jamp_sv[9] += 1. / 2. * amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[5], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -773,7 +773,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc index dd25c56cba..8682128442 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc @@ -253,12 +253,12 @@ namespace mg5amcCpu ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 14 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 14 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -301,10 +301,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 14 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -317,10 +317,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 14 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -333,12 +333,12 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 14 *** // Wavefunction(s) for diagram number 6 - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[6] ); - FFV1_1( w_fp[2], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 14 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -370,7 +370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -381,10 +381,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 14 *** // Wavefunction(s) for diagram number 9 - FFV1_2( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -397,10 +397,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 14 *** // Wavefunction(s) for diagram number 10 - FFV1_1( w_fp[1], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_1( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -413,10 +413,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 14 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[0], w_fp[6], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,10 +429,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 14 *** // Wavefunction(s) for diagram number 12 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[10], w_fp[4], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -445,10 +445,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 14 *** // Wavefunction(s) for diagram number 13 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[6], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -464,7 +464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc index 61f388d16b..7d3141cfc4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc @@ -255,12 +255,12 @@ namespace mg5amcCpu ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -289,10 +289,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,10 +335,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[4], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[0], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[0], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[4], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[0], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[0], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc index f0f57381de..6ec302f68b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc @@ -253,12 +253,12 @@ namespace mg5amcCpu ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 14 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 14 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -301,10 +301,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 14 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -317,10 +317,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 14 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -333,12 +333,12 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 14 *** // Wavefunction(s) for diagram number 6 - FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); - FFV1_1( w_fp[2], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 14 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -370,7 +370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -381,10 +381,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 14 *** // Wavefunction(s) for diagram number 9 - FFV1_2( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[10], w_fp[0], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[0], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -397,10 +397,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 14 *** // Wavefunction(s) for diagram number 10 - FFV1_1( w_fp[0], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_1( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -413,10 +413,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 14 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,10 +429,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 14 *** // Wavefunction(s) for diagram number 12 - FFV1_2( w_fp[4], w_fp[8], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[10], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -445,10 +445,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 14 *** // Wavefunction(s) for diagram number 13 - FFV1_2( w_fp[4], w_fp[7], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[6], w_fp[0], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[0], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -464,7 +464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[10], w_fp[0], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[0], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h index 9cea8bcbe7..9b946c21e1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -934,6 +940,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -945,6 +952,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -970,6 +979,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -984,6 +994,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -995,6 +1006,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1008,6 +1020,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1042,6 +1055,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1080,6 +1094,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1103,6 +1118,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1134,6 +1150,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1165,6 +1182,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1197,6 +1215,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1225,6 +1244,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1260,6 +1280,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1288,6 +1309,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1323,6 +1345,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1351,6 +1374,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) From 328b18eb4c224e2fd567bdc863e5b22cf2008b68 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 10:46:43 +0200 Subject: [PATCH 029/119] [oct23av] in 8 mad directories (copy it manually also for eemumu), add bin/internal/launch_plugin.py to the git repo This has been introduced in Olivier's latest changes but I had forgotten to include it so far --- .../ee_mumu.mad/bin/internal/launch_plugin.py | 40 +++++++++++++++++++ .../gg_tt.mad/bin/internal/launch_plugin.py | 40 +++++++++++++++++++ .../bin/internal/launch_plugin.py | 40 +++++++++++++++++++ .../gg_ttg.mad/bin/internal/launch_plugin.py | 40 +++++++++++++++++++ .../gg_ttgg.mad/bin/internal/launch_plugin.py | 40 +++++++++++++++++++ .../bin/internal/launch_plugin.py | 40 +++++++++++++++++++ .../gq_ttq.mad/bin/internal/launch_plugin.py | 40 +++++++++++++++++++ .../bin/internal/launch_plugin.py | 40 +++++++++++++++++++ 8 files changed, 320 insertions(+) create mode 100644 epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py create mode 100644 epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py create mode 100644 epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py create mode 100644 epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py create mode 100644 epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py create mode 100644 epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py create mode 100644 epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py create mode 100644 epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py new file mode 100644 index 0000000000..f4c9cb6334 --- /dev/null +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py @@ -0,0 +1,40 @@ + +import logging + +logger = logging.getLogger('cmdprint') # for stdout + +try: + import madgraph +except ImportError: + import internal.madevent_interface as madevent_interface + import internal.misc as misc + import internal.extended_cmd as extended_cmd +else: + import madgraph.interface.madevent_interface as madevent_interface + import madgraph.various.misc as misc + import madgraph.interface.extended_cmd as extended_cmd + +class CPPMEInterface(madevent_interface.MadEventCmdShell): + + def compile(self, *args, **opts): + """ """ + import multiprocessing + if not self.options['nb_core'] or self.options['nb_core'] == 'None': + self.options['nb_core'] = multiprocessing.cpu_count() + + if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py + logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend == 'FORTRAN': + args[0][0] = 'madevent_fortran_link' + elif cudacpp_backend == 'CPP': + args[0][0] = 'madevent_cpp_link' + elif cudacpp_backend == 'CUDA': + args[0][0] = 'madevent_cuda_link' + else: + raise Exception("Invalid cudacpp_backend='%s': only 'FORTRAN', 'CPP', 'CUDA' are supported") + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + else: + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + +MEINTERFACE = CPPMEInterface diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py new file mode 100644 index 0000000000..f4c9cb6334 --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py @@ -0,0 +1,40 @@ + +import logging + +logger = logging.getLogger('cmdprint') # for stdout + +try: + import madgraph +except ImportError: + import internal.madevent_interface as madevent_interface + import internal.misc as misc + import internal.extended_cmd as extended_cmd +else: + import madgraph.interface.madevent_interface as madevent_interface + import madgraph.various.misc as misc + import madgraph.interface.extended_cmd as extended_cmd + +class CPPMEInterface(madevent_interface.MadEventCmdShell): + + def compile(self, *args, **opts): + """ """ + import multiprocessing + if not self.options['nb_core'] or self.options['nb_core'] == 'None': + self.options['nb_core'] = multiprocessing.cpu_count() + + if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py + logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend == 'FORTRAN': + args[0][0] = 'madevent_fortran_link' + elif cudacpp_backend == 'CPP': + args[0][0] = 'madevent_cpp_link' + elif cudacpp_backend == 'CUDA': + args[0][0] = 'madevent_cuda_link' + else: + raise Exception("Invalid cudacpp_backend='%s': only 'FORTRAN', 'CPP', 'CUDA' are supported") + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + else: + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + +MEINTERFACE = CPPMEInterface diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py new file mode 100644 index 0000000000..f4c9cb6334 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py @@ -0,0 +1,40 @@ + +import logging + +logger = logging.getLogger('cmdprint') # for stdout + +try: + import madgraph +except ImportError: + import internal.madevent_interface as madevent_interface + import internal.misc as misc + import internal.extended_cmd as extended_cmd +else: + import madgraph.interface.madevent_interface as madevent_interface + import madgraph.various.misc as misc + import madgraph.interface.extended_cmd as extended_cmd + +class CPPMEInterface(madevent_interface.MadEventCmdShell): + + def compile(self, *args, **opts): + """ """ + import multiprocessing + if not self.options['nb_core'] or self.options['nb_core'] == 'None': + self.options['nb_core'] = multiprocessing.cpu_count() + + if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py + logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend == 'FORTRAN': + args[0][0] = 'madevent_fortran_link' + elif cudacpp_backend == 'CPP': + args[0][0] = 'madevent_cpp_link' + elif cudacpp_backend == 'CUDA': + args[0][0] = 'madevent_cuda_link' + else: + raise Exception("Invalid cudacpp_backend='%s': only 'FORTRAN', 'CPP', 'CUDA' are supported") + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + else: + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + +MEINTERFACE = CPPMEInterface diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py new file mode 100644 index 0000000000..f4c9cb6334 --- /dev/null +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py @@ -0,0 +1,40 @@ + +import logging + +logger = logging.getLogger('cmdprint') # for stdout + +try: + import madgraph +except ImportError: + import internal.madevent_interface as madevent_interface + import internal.misc as misc + import internal.extended_cmd as extended_cmd +else: + import madgraph.interface.madevent_interface as madevent_interface + import madgraph.various.misc as misc + import madgraph.interface.extended_cmd as extended_cmd + +class CPPMEInterface(madevent_interface.MadEventCmdShell): + + def compile(self, *args, **opts): + """ """ + import multiprocessing + if not self.options['nb_core'] or self.options['nb_core'] == 'None': + self.options['nb_core'] = multiprocessing.cpu_count() + + if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py + logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend == 'FORTRAN': + args[0][0] = 'madevent_fortran_link' + elif cudacpp_backend == 'CPP': + args[0][0] = 'madevent_cpp_link' + elif cudacpp_backend == 'CUDA': + args[0][0] = 'madevent_cuda_link' + else: + raise Exception("Invalid cudacpp_backend='%s': only 'FORTRAN', 'CPP', 'CUDA' are supported") + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + else: + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + +MEINTERFACE = CPPMEInterface diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py new file mode 100644 index 0000000000..f4c9cb6334 --- /dev/null +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py @@ -0,0 +1,40 @@ + +import logging + +logger = logging.getLogger('cmdprint') # for stdout + +try: + import madgraph +except ImportError: + import internal.madevent_interface as madevent_interface + import internal.misc as misc + import internal.extended_cmd as extended_cmd +else: + import madgraph.interface.madevent_interface as madevent_interface + import madgraph.various.misc as misc + import madgraph.interface.extended_cmd as extended_cmd + +class CPPMEInterface(madevent_interface.MadEventCmdShell): + + def compile(self, *args, **opts): + """ """ + import multiprocessing + if not self.options['nb_core'] or self.options['nb_core'] == 'None': + self.options['nb_core'] = multiprocessing.cpu_count() + + if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py + logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend == 'FORTRAN': + args[0][0] = 'madevent_fortran_link' + elif cudacpp_backend == 'CPP': + args[0][0] = 'madevent_cpp_link' + elif cudacpp_backend == 'CUDA': + args[0][0] = 'madevent_cuda_link' + else: + raise Exception("Invalid cudacpp_backend='%s': only 'FORTRAN', 'CPP', 'CUDA' are supported") + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + else: + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + +MEINTERFACE = CPPMEInterface diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py new file mode 100644 index 0000000000..f4c9cb6334 --- /dev/null +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py @@ -0,0 +1,40 @@ + +import logging + +logger = logging.getLogger('cmdprint') # for stdout + +try: + import madgraph +except ImportError: + import internal.madevent_interface as madevent_interface + import internal.misc as misc + import internal.extended_cmd as extended_cmd +else: + import madgraph.interface.madevent_interface as madevent_interface + import madgraph.various.misc as misc + import madgraph.interface.extended_cmd as extended_cmd + +class CPPMEInterface(madevent_interface.MadEventCmdShell): + + def compile(self, *args, **opts): + """ """ + import multiprocessing + if not self.options['nb_core'] or self.options['nb_core'] == 'None': + self.options['nb_core'] = multiprocessing.cpu_count() + + if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py + logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend == 'FORTRAN': + args[0][0] = 'madevent_fortran_link' + elif cudacpp_backend == 'CPP': + args[0][0] = 'madevent_cpp_link' + elif cudacpp_backend == 'CUDA': + args[0][0] = 'madevent_cuda_link' + else: + raise Exception("Invalid cudacpp_backend='%s': only 'FORTRAN', 'CPP', 'CUDA' are supported") + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + else: + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + +MEINTERFACE = CPPMEInterface diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py new file mode 100644 index 0000000000..f4c9cb6334 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py @@ -0,0 +1,40 @@ + +import logging + +logger = logging.getLogger('cmdprint') # for stdout + +try: + import madgraph +except ImportError: + import internal.madevent_interface as madevent_interface + import internal.misc as misc + import internal.extended_cmd as extended_cmd +else: + import madgraph.interface.madevent_interface as madevent_interface + import madgraph.various.misc as misc + import madgraph.interface.extended_cmd as extended_cmd + +class CPPMEInterface(madevent_interface.MadEventCmdShell): + + def compile(self, *args, **opts): + """ """ + import multiprocessing + if not self.options['nb_core'] or self.options['nb_core'] == 'None': + self.options['nb_core'] = multiprocessing.cpu_count() + + if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py + logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend == 'FORTRAN': + args[0][0] = 'madevent_fortran_link' + elif cudacpp_backend == 'CPP': + args[0][0] = 'madevent_cpp_link' + elif cudacpp_backend == 'CUDA': + args[0][0] = 'madevent_cuda_link' + else: + raise Exception("Invalid cudacpp_backend='%s': only 'FORTRAN', 'CPP', 'CUDA' are supported") + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + else: + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + +MEINTERFACE = CPPMEInterface diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py new file mode 100644 index 0000000000..f4c9cb6334 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py @@ -0,0 +1,40 @@ + +import logging + +logger = logging.getLogger('cmdprint') # for stdout + +try: + import madgraph +except ImportError: + import internal.madevent_interface as madevent_interface + import internal.misc as misc + import internal.extended_cmd as extended_cmd +else: + import madgraph.interface.madevent_interface as madevent_interface + import madgraph.various.misc as misc + import madgraph.interface.extended_cmd as extended_cmd + +class CPPMEInterface(madevent_interface.MadEventCmdShell): + + def compile(self, *args, **opts): + """ """ + import multiprocessing + if not self.options['nb_core'] or self.options['nb_core'] == 'None': + self.options['nb_core'] = multiprocessing.cpu_count() + + if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py + logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) + if cudacpp_backend == 'FORTRAN': + args[0][0] = 'madevent_fortran_link' + elif cudacpp_backend == 'CPP': + args[0][0] = 'madevent_cpp_link' + elif cudacpp_backend == 'CUDA': + args[0][0] = 'madevent_cuda_link' + else: + raise Exception("Invalid cudacpp_backend='%s': only 'FORTRAN', 'CPP', 'CUDA' are supported") + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + else: + return misc.compile(nb_core=self.options['nb_core'], *args, **opts) + +MEINTERFACE = CPPMEInterface From eeab712daa9fd4dc9263c9c98a78f76f09ca362b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 10:52:31 +0200 Subject: [PATCH 030/119] [oct23av] in 8 mad directories (create it manually also for eemumu), add SubProcesses/lib as symlink to ../lib This has been introduced in Olivier's latest changes but I had forgotten to include it so far --- epochX/cudacpp/ee_mumu.mad/SubProcesses/lib | 1 + epochX/cudacpp/gg_tt.mad/SubProcesses/lib | 1 + epochX/cudacpp/gg_tt01g.mad/SubProcesses/lib | 1 + epochX/cudacpp/gg_ttg.mad/SubProcesses/lib | 1 + epochX/cudacpp/gg_ttgg.mad/SubProcesses/lib | 1 + epochX/cudacpp/gg_ttggg.mad/SubProcesses/lib | 1 + epochX/cudacpp/gq_ttq.mad/SubProcesses/lib | 1 + epochX/cudacpp/pp_tt012j.mad/SubProcesses/lib | 1 + 8 files changed, 8 insertions(+) create mode 120000 epochX/cudacpp/ee_mumu.mad/SubProcesses/lib create mode 120000 epochX/cudacpp/gg_tt.mad/SubProcesses/lib create mode 120000 epochX/cudacpp/gg_tt01g.mad/SubProcesses/lib create mode 120000 epochX/cudacpp/gg_ttg.mad/SubProcesses/lib create mode 120000 epochX/cudacpp/gg_ttgg.mad/SubProcesses/lib create mode 120000 epochX/cudacpp/gg_ttggg.mad/SubProcesses/lib create mode 120000 epochX/cudacpp/gq_ttq.mad/SubProcesses/lib create mode 120000 epochX/cudacpp/pp_tt012j.mad/SubProcesses/lib diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/lib b/epochX/cudacpp/ee_mumu.mad/SubProcesses/lib new file mode 120000 index 0000000000..dc598c56dc --- /dev/null +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/lib @@ -0,0 +1 @@ +../lib \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/lib b/epochX/cudacpp/gg_tt.mad/SubProcesses/lib new file mode 120000 index 0000000000..dc598c56dc --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/lib @@ -0,0 +1 @@ +../lib \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/lib b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/lib new file mode 120000 index 0000000000..dc598c56dc --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/lib @@ -0,0 +1 @@ +../lib \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/lib b/epochX/cudacpp/gg_ttg.mad/SubProcesses/lib new file mode 120000 index 0000000000..dc598c56dc --- /dev/null +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/lib @@ -0,0 +1 @@ +../lib \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/lib b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/lib new file mode 120000 index 0000000000..dc598c56dc --- /dev/null +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/lib @@ -0,0 +1 @@ +../lib \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/lib b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/lib new file mode 120000 index 0000000000..dc598c56dc --- /dev/null +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/lib @@ -0,0 +1 @@ +../lib \ No newline at end of file diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/lib b/epochX/cudacpp/gq_ttq.mad/SubProcesses/lib new file mode 120000 index 0000000000..dc598c56dc --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/lib @@ -0,0 +1 @@ +../lib \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/lib b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/lib new file mode 120000 index 0000000000..dc598c56dc --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/lib @@ -0,0 +1 @@ +../lib \ No newline at end of file From a298ba050faf257c070d338ebccd3b137327fda1 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 12:32:19 +0200 Subject: [PATCH 031/119] [oct23av] regenerate 7 mad and 6 sa processes (all but eemumu) after Olivier's DY fixes The only code changes are auto_dsig1.f, generate_events and madevent in .mad directories I checked that ggtt tmad tests look ok. --- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 22 ++- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 18 +- epochX/cudacpp/gg_tt.mad/bin/generate_events | 22 ++- epochX/cudacpp/gg_tt.mad/bin/madevent | 20 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 11 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 32 ++-- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P2_gg_ttxg/auto_dsig1.f | 18 +- .../cudacpp/gg_tt01g.mad/bin/generate_events | 22 ++- epochX/cudacpp/gg_tt01g.mad/bin/madevent | 20 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 26 +-- .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 18 +- epochX/cudacpp/gg_ttg.mad/bin/generate_events | 22 ++- epochX/cudacpp/gg_ttg.mad/bin/madevent | 20 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 13 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 26 +-- .../SubProcesses/P1_gg_ttxgg/auto_dsig1.f | 18 +- .../cudacpp/gg_ttgg.mad/bin/generate_events | 22 ++- epochX/cudacpp/gg_ttgg.mad/bin/madevent | 20 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 15 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 26 +-- .../SubProcesses/P1_gg_ttxggg/auto_dsig1.f | 18 +- .../cudacpp/gg_ttggg.mad/bin/generate_events | 22 ++- epochX/cudacpp/gg_ttggg.mad/bin/madevent | 20 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 15 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 34 ++-- .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 24 ++- .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 28 ++- epochX/cudacpp/gq_ttq.mad/bin/generate_events | 22 ++- epochX/cudacpp/gq_ttq.mad/bin/madevent | 20 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 14 +- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 7 +- .../CODEGEN_mad_pp_tt012j_log.txt | 175 ++++++++++++------ .../SubProcesses/P0_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P0_uux_ttx/auto_dsig1.f | 34 ++-- .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 24 ++- .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 28 ++- .../SubProcesses/P1_uux_ttxg/auto_dsig1.f | 34 ++-- .../SubProcesses/P2_gg_ttxgg/auto_dsig1.f | 18 +- .../SubProcesses/P2_gg_ttxuux/auto_dsig1.f | 18 +- .../SubProcesses/P2_gu_ttxgu/auto_dsig1.f | 24 ++- .../SubProcesses/P2_gux_ttxgux/auto_dsig1.f | 28 ++- .../SubProcesses/P2_uc_ttxuc/auto_dsig1.f | 26 ++- .../SubProcesses/P2_ucx_ttxucx/auto_dsig1.f | 34 ++-- .../SubProcesses/P2_uu_ttxuu/auto_dsig1.f | 30 ++- .../SubProcesses/P2_uux_ttxccx/auto_dsig1.f | 34 ++-- .../SubProcesses/P2_uux_ttxgg/auto_dsig1.f | 34 ++-- .../SubProcesses/P2_uux_ttxuux/auto_dsig1.f | 34 ++-- .../SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f | 32 ++-- .../SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f | 38 ++-- .../cudacpp/pp_tt012j.mad/bin/generate_events | 22 ++- epochX/cudacpp/pp_tt012j.mad/bin/madevent | 20 +- 53 files changed, 983 insertions(+), 393 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 7ea852db91..28d3d7b93b 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053539276123046875  +DEBUG: model prefixing takes 0.005661487579345703  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,29 +175,32 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.070 s +Wrote files for 10 helas calls in 0.099 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.143 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.131 s +ALOHA: aloha creates 4 routines in 0.129 s VVV1 FFV1 FFV1 @@ -292,6 +295,7 @@ patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. @@ -300,6 +304,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.347s -user 0m1.984s -sys 0m0.295s +real 0m2.886s +user 0m2.115s +sys 0m0.311s diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index 3b24a9924c..0b493ae244 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_tt.mad/bin/generate_events b/epochX/cudacpp/gg_tt.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/generate_events +++ b/epochX/cudacpp/gg_tt.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_tt.mad/bin/madevent b/epochX/cudacpp/gg_tt.mad/bin/madevent index c944aa1faf..10b6a71fa2 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/madevent +++ b/epochX/cudacpp/gg_tt.mad/bin/madevent @@ -32,6 +32,7 @@ except ImportError: import os +pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -160,10 +161,23 @@ except: pass import internal.madevent_interface as cmd_interface +# check for plugin customization of the launch command +launch_interface = cmd_interface.MadEventCmdShell +if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) + launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -178,7 +192,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -188,7 +202,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 814a040ad0..9a5217e178 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005364656448364258  +DEBUG: model prefixing takes 0.00539851188659668  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,14 +174,13 @@ INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.142 s VVV1 FFV1 FFV1 @@ -197,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.541s -user 0m0.476s -sys 0m0.054s +real 0m0.567s +user 0m0.472s +sys 0m0.059s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 82fd1eb100..95b0d68338 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005370140075683594  +DEBUG: model prefixing takes 0.005623579025268555  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -184,41 +184,47 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.041 s -Wrote files for 46 helas calls in 0.180 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s +Wrote files for 46 helas calls in 0.240 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.318 s +ALOHA: aloha creates 5 routines in 0.323 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -226,7 +232,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.307 s VVV1 VVV1 FFV1 @@ -326,10 +332,12 @@ patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). @@ -344,6 +352,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.881s -user 0m2.519s -sys 0m0.314s +real 0m2.973s +user 0m2.615s +sys 0m0.296s diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index 3b24a9924c..0b493ae244 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f index 071034763a..68e664f70c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/generate_events b/epochX/cudacpp/gg_tt01g.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/generate_events +++ b/epochX/cudacpp/gg_tt01g.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/madevent b/epochX/cudacpp/gg_tt01g.mad/bin/madevent index c944aa1faf..10b6a71fa2 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/madevent +++ b/epochX/cudacpp/gg_tt01g.mad/bin/madevent @@ -32,6 +32,7 @@ except ImportError: import os +pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -160,10 +161,23 @@ except: pass import internal.madevent_interface as cmd_interface +# check for plugin customization of the launch command +launch_interface = cmd_interface.MadEventCmdShell +if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) + launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -178,7 +192,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -188,7 +202,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 56445aa2f6..536d2538f3 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0054433345794677734  +DEBUG: model prefixing takes 0.005247592926025391  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,27 +175,30 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s -Wrote files for 36 helas calls in 0.124 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +Wrote files for 36 helas calls in 0.148 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.344 s +ALOHA: aloha creates 5 routines in 0.322 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -203,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.328 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -303,6 +306,7 @@ patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). @@ -317,6 +321,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.355s -user 0m2.552s -sys 0m0.301s +real 0m2.814s +user 0m2.477s +sys 0m0.303s diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index 668cc26192..b8615bc68f 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_ttg.mad/bin/generate_events b/epochX/cudacpp/gg_ttg.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/generate_events +++ b/epochX/cudacpp/gg_ttg.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_ttg.mad/bin/madevent b/epochX/cudacpp/gg_ttg.mad/bin/madevent index c944aa1faf..10b6a71fa2 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/madevent +++ b/epochX/cudacpp/gg_ttg.mad/bin/madevent @@ -32,6 +32,7 @@ except ImportError: import os +pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -160,10 +161,23 @@ except: pass import internal.madevent_interface as cmd_interface +# check for plugin customization of the launch command +launch_interface = cmd_interface.MadEventCmdShell +if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) + launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -178,7 +192,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -188,7 +202,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index f14e3d6d27..0c63c9d364 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005347490310668945  +DEBUG: model prefixing takes 0.005433082580566406  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,9 +174,8 @@ INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -184,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.320 s +ALOHA: aloha creates 5 routines in 0.324 s VVV1 VVV1 FFV1 @@ -205,6 +204,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.782s -user 0m0.723s -sys 0m0.049s +real 0m0.868s +user 0m0.714s +sys 0m0.064s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 8f353c4129..2643c37896 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053937435150146484  +DEBUG: model prefixing takes 0.005459308624267578  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.154 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,27 +175,30 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.422 s -Wrote files for 222 helas calls in 0.674 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.418 s +Wrote files for 222 helas calls in 0.707 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.325 s +ALOHA: aloha creates 5 routines in 0.327 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -203,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.307 s +ALOHA: aloha creates 10 routines in 0.311 s VVV1 VVV1 FFV1 @@ -306,6 +309,7 @@ patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 191 (offset 48 lines). @@ -320,6 +324,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.945s -user 0m3.541s -sys 0m0.300s +real 0m3.972s +user 0m3.563s +sys 0m0.318s diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f index d12d34daf6..0fa6436690 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/generate_events b/epochX/cudacpp/gg_ttgg.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/generate_events +++ b/epochX/cudacpp/gg_ttgg.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/madevent b/epochX/cudacpp/gg_ttgg.mad/bin/madevent index c944aa1faf..10b6a71fa2 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/madevent +++ b/epochX/cudacpp/gg_ttgg.mad/bin/madevent @@ -32,6 +32,7 @@ except ImportError: import os +pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -160,10 +161,23 @@ except: pass import internal.madevent_interface as cmd_interface +# check for plugin customization of the launch command +launch_interface = cmd_interface.MadEventCmdShell +if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) + launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -178,7 +192,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -188,7 +202,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 37ebefdc12..838608f6d4 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005511760711669922  +DEBUG: model prefixing takes 0.0056304931640625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.156 s +1 processes with 123 diagrams generated in 0.154 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -174,9 +174,8 @@ INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.430 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.416 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -184,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.316 s +ALOHA: aloha creates 5 routines in 0.313 s VVV1 VVV1 FFV1 @@ -208,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.541s -user 0m1.391s -sys 0m0.043s +real 0m1.458s +user 0m1.362s +sys 0m0.057s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 15a90b20cb..c8217e355b 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005498170852661133  +DEBUG: model prefixing takes 0.005827188491821289  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.836 s +1 processes with 1240 diagrams generated in 1.934 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,27 +177,30 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.483 s -Wrote files for 2281 helas calls in 46.276 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.465 s +Wrote files for 2281 helas calls in 46.250 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.310 s +ALOHA: aloha creates 5 routines in 0.313 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -205,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.305 s VVV1 VVV1 FFV1 @@ -308,6 +311,7 @@ patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 255 (offset 112 lines). @@ -322,6 +326,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m57.424s -user 0m56.469s -sys 0m0.771s +real 0m57.556s +user 0m56.426s +sys 0m0.883s diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f index e6d2fc3099..6828f1c252 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/generate_events b/epochX/cudacpp/gg_ttggg.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/generate_events +++ b/epochX/cudacpp/gg_ttggg.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/madevent b/epochX/cudacpp/gg_ttggg.mad/bin/madevent index c944aa1faf..10b6a71fa2 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/madevent +++ b/epochX/cudacpp/gg_ttggg.mad/bin/madevent @@ -32,6 +32,7 @@ except ImportError: import os +pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -160,10 +161,23 @@ except: pass import internal.madevent_interface as cmd_interface +# check for plugin customization of the launch command +launch_interface = cmd_interface.MadEventCmdShell +if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) + launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -178,7 +192,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -188,7 +202,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 2ecbe5782c..0641c4cc00 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005434513092041016  +DEBUG: model prefixing takes 0.0052568912506103516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.849 s +1 processes with 1240 diagrams generated in 1.837 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -174,9 +174,8 @@ INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  1536 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.440 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.446 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -184,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.340 s +ALOHA: aloha creates 5 routines in 0.341 s VVV1 VVV1 FFV1 @@ -208,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m12.780s -user 0m12.635s -sys 0m0.096s +real 0m12.837s +user 0m12.595s +sys 0m0.100s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 53619cb35b..4e69d362f6 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005560159683227539  +DEBUG: model prefixing takes 0.005332231521606445  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.076 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -197,43 +197,49 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.157 s +Wrote files for 32 helas calls in 0.212 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.148 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.133 s +ALOHA: aloha creates 4 routines in 0.130 s FFV1 FFV1 FFV1 @@ -329,7 +335,7 @@ patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 513 (offset 44 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -341,7 +347,7 @@ Hunk #6 succeeded at 441 (offset 45 lines). Hunk #7 succeeded at 531 (offset 61 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 517 (offset 48 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -357,6 +363,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.611s -user 0m2.179s -sys 0m0.312s +real 0m2.663s +user 0m2.232s +sys 0m0.316s diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index ca1b7c1dc5..81ab70f6d1 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,14 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 33e638e237..b58c5d70bd 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,18 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gq_ttq.mad/bin/generate_events b/epochX/cudacpp/gq_ttq.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/generate_events +++ b/epochX/cudacpp/gq_ttq.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gq_ttq.mad/bin/madevent b/epochX/cudacpp/gq_ttq.mad/bin/madevent index c944aa1faf..10b6a71fa2 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/madevent +++ b/epochX/cudacpp/gq_ttq.mad/bin/madevent @@ -32,6 +32,7 @@ except ImportError: import os +pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -160,10 +161,23 @@ except: pass import internal.madevent_interface as cmd_interface +# check for plugin customization of the launch command +launch_interface = cmd_interface.MadEventCmdShell +if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) + launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -178,7 +192,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -188,7 +202,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index aa777faeb1..531ff7554c 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005236387252807617  +DEBUG: model prefixing takes 0.005487680435180664  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -196,7 +196,6 @@ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  DEBUG: type(subproc_group)= [output.py at line 190]  @@ -205,14 +204,13 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/G INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.142 s FFV1 FFV1 FFV1 @@ -229,6 +227,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.650s -user 0m0.592s -sys 0m0.050s +real 0m0.657s +user 0m0.585s +sys 0m0.062s diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 7824240731..86f147e75d 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -146,7 +146,6 @@ INFO: Processing color information for process: g g > h HIG<=1 HIW<=1 @1 INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  @@ -165,6 +164,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.431s -user 0m0.365s -sys 0m0.056s +real 0m0.477s +user 0m0.368s +sys 0m0.055s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 416dbc561d..c1cd27fa9e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005372524261474609  +DEBUG: model prefixing takes 0.005520343780517578  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.133 s +13 processes with 76 diagrams generated in 0.135 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.789 s +65 processes with 1119 diagrams generated in 1.791 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,258 +497,312 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  512 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  2 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  3 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  4 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  5 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  6 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  7 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  72 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  8 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  9 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  10 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  11 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  12 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  13 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  96 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  14 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  15 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  256 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  16 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc -DEBUG: replace_dict['den_factors'] =  36 [model_handling.py at line 1172]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1] [export_cpp.py at line 711]  DEBUG: subproc_number =  17 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.258 s -Wrote files for 810 helas calls in 2.693 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.259 s +Wrote files for 810 helas calls in 3.219 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -866,11 +920,12 @@ patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -882,6 +937,7 @@ Hunk #6 succeeded at 402 (offset 6 lines). Hunk #7 succeeded at 466 (offset -4 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). @@ -892,7 +948,7 @@ Hunk #6 succeeded at 434 (offset 38 lines). Hunk #7 succeeded at 588 (offset 118 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 513 (offset 44 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -904,7 +960,7 @@ Hunk #6 succeeded at 428 (offset 32 lines). Hunk #7 succeeded at 518 (offset 48 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 517 (offset 48 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -916,7 +972,7 @@ Hunk #6 succeeded at 428 (offset 32 lines). Hunk #7 succeeded at 518 (offset 48 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -928,6 +984,7 @@ Hunk #6 succeeded at 428 (offset 32 lines). Hunk #7 succeeded at 518 (offset 48 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 191 (offset 48 lines). @@ -938,7 +995,7 @@ Hunk #6 succeeded at 830 (offset 434 lines). Hunk #7 succeeded at 1717 (offset 1247 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 502 (offset 33 lines). +Hunk #1 succeeded at 516 (offset 47 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -950,7 +1007,7 @@ Hunk #6 succeeded at 540 (offset 144 lines). Hunk #7 succeeded at 813 (offset 343 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 513 (offset 44 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -962,7 +1019,7 @@ Hunk #6 succeeded at 540 (offset 144 lines). Hunk #7 succeeded at 815 (offset 345 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 517 (offset 48 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -974,7 +1031,7 @@ Hunk #6 succeeded at 538 (offset 142 lines). Hunk #7 succeeded at 812 (offset 342 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 540 (offset 71 lines). +Hunk #1 succeeded at 554 (offset 85 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 77 (offset 5 lines). @@ -986,7 +1043,7 @@ Hunk #6 succeeded at 472 (offset 76 lines). Hunk #7 succeeded at 581 (offset 111 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 616 (offset 147 lines). +Hunk #1 succeeded at 626 (offset 157 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 83 (offset 11 lines). @@ -998,7 +1055,7 @@ Hunk #6 succeeded at 484 (offset 88 lines). Hunk #7 succeeded at 593 (offset 123 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 524 (offset 55 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1010,7 +1067,7 @@ Hunk #6 succeeded at 468 (offset 72 lines). Hunk #7 succeeded at 620 (offset 150 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 616 (offset 147 lines). +Hunk #1 succeeded at 626 (offset 157 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 83 (offset 11 lines). @@ -1022,7 +1079,7 @@ Hunk #6 succeeded at 484 (offset 88 lines). Hunk #7 succeeded at 593 (offset 123 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1034,7 +1091,7 @@ Hunk #6 succeeded at 540 (offset 144 lines). Hunk #7 succeeded at 821 (offset 351 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1046,7 +1103,7 @@ Hunk #6 succeeded at 468 (offset 72 lines). Hunk #7 succeeded at 620 (offset 150 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 546 (offset 77 lines). +Hunk #1 succeeded at 554 (offset 85 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 77 (offset 5 lines). @@ -1058,7 +1115,7 @@ Hunk #6 succeeded at 472 (offset 76 lines). Hunk #7 succeeded at 581 (offset 111 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 532 (offset 63 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1074,6 +1131,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m8.831s -user 0m8.246s -sys 0m0.545s +real 0m9.356s +user 0m8.764s +sys 0m0.556s diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f index b68450743c..ee723193db 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f index 8310241f21..f205954b28 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,21 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index 668cc26192..b8615bc68f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index ca1b7c1dc5..81ab70f6d1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,14 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 33e638e237..b58c5d70bd 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,18 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f index f2902c7183..d85b1143a0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,21 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f index 3f0b6e29c5..4d2e1b4f8c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f index 67decfd0d7..67adf83921 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,11 +130,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f index 58e83991fd..83a2a24681 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,14 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f index 56e24ed83e..8cb3f9af60 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,18 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f index c68a9f5a67..3488dfd2e6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f @@ -44,6 +44,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,C1 DOUBLE PRECISION D2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -131,15 +132,28 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f index 1044310fc4..0b6e873ee4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f @@ -50,6 +50,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -137,21 +138,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f index 25de63622f..5ed7bc881f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,17 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f index 300733b34c..a32595dce6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f @@ -50,6 +50,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -137,21 +138,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f index c9d97c2911..baaee299a2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,21 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f index c3b97a2a87..c2206e8d5e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,21 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f index adb807b78c..e92ee65fd7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f @@ -44,6 +44,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION CX1,UX1,DX1 DOUBLE PRECISION CX2,SX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -131,21 +132,28 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) - UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) - DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)), QSCALE) + UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)), QSCALE) + DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f index 68d329862c..cad7f4197d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION CX1,SX1,UX1,DX1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,25 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) - SX1=PDG2PDF(LPP(IB(1)),-3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) - UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) - DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)), QSCALE) + SX1=PDG2PDF(LPP(IB(1)),-3, IB(1),XBK(IB(1)), QSCALE) + UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)), QSCALE) + DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/generate_events b/epochX/cudacpp/pp_tt012j.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/generate_events +++ b/epochX/cudacpp/pp_tt012j.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/madevent b/epochX/cudacpp/pp_tt012j.mad/bin/madevent index c944aa1faf..10b6a71fa2 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/madevent +++ b/epochX/cudacpp/pp_tt012j.mad/bin/madevent @@ -32,6 +32,7 @@ except ImportError: import os +pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -160,10 +161,23 @@ except: pass import internal.madevent_interface as cmd_interface +# check for plugin customization of the launch command +launch_interface = cmd_interface.MadEventCmdShell +if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) + launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -178,7 +192,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -188,7 +202,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) From 2037678d4a7bec375c42589e1e98f344fda65911 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 12:47:17 +0200 Subject: [PATCH 032/119] [oct23av] regenerate all 8 mad and 7 sa processes (now including eemumu again) after Stefan's changes for PRs #760 and #761 The only code changes are in gq_ttq and in ee_mumu. - The tput and tmad tests in gq_ttq now fail again (runTest failure and xsec mismatch). Logs not included. - Code generation in ee_mumu is now successful (but clang format must be fixed around Ccoeff). I have not run eemumu tests yet. --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 64 +++++-------- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_epem_mupmum/CPPProcess.cc | 23 ++--- .../SubProcesses/P1_epem_mupmum/auto_dsig1.f | 20 ++++- .../ee_mumu.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/ee_mumu.mad/SubProcesses/makefile | 4 +- .../cudacpp/ee_mumu.mad/bin/generate_events | 22 ++++- .../ee_mumu.mad/bin/internal/banner.py | 3 +- epochX/cudacpp/ee_mumu.mad/bin/madevent | 20 ++++- epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h | 30 +++++-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 51 +++-------- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc | 23 ++--- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h | 30 +++++-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 16 ++-- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 8 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 22 ++--- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 18 ++-- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 12 +-- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 22 ++--- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 10 +-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 22 ++--- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 +-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 22 ++--- .../cudacpp/gq_ttq.mad/src/Parameters_sm.cc | 4 +- epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h | 18 ++-- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 10 +-- epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc | 4 +- epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h | 18 ++-- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 6 +- .../CODEGEN_mad_pp_tt012j_log.txt | 90 +++++++++---------- 32 files changed, 341 insertions(+), 295 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 74e619abdf..5c2d6714bb 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00560760498046875  +DEBUG: model prefixing takes 0.005345821380615234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -161,10 +161,10 @@ Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_ee_mumu INFO: remove old information in CODEGEN_mad_ee_mumu -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  @@ -173,57 +173,37 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1307]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  4 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2] [model_handling.py at line 1171]  -DEBUG: multi_channel =  {1: [0], 2: [1]} [model_handling.py at line 1177]  -DEBUG: multi_channel_map =  {1: [0], 2: [1]} [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {1: 1, 2: 2} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1410]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1344]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  WARNING: vector code for lepton pdf not implemented. We removed the option to run dressed lepton  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.099 s +Wrote files for 8 helas calls in 0.098 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.195 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 3 routines in 0.194 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.252 s +ALOHA: aloha creates 7 routines in 0.247 s FFV1 FFV1 FFV2 @@ -235,10 +215,7 @@ ALOHA: aloha creates 7 routines in 0.252 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory @@ -249,7 +226,9 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -278,6 +257,8 @@ Using default eps viewer "evince". Set another one in ./input/mg5_configuration. No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -317,11 +298,12 @@ patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). +Hunk #1 succeeded at 495 (offset 26 lines). patching file driver.f patching file matrix1.f Hunk #3 succeeded at 230 (offset 9 lines). @@ -335,6 +317,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.502s -user 0m2.200s -sys 0m0.288s +real 0m2.724s +user 0m2.192s +sys 0m0.282s diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc index 873b8c2fd2..8d370a6b34 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc @@ -238,25 +238,18 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 2 *** // Wavefunction(s) for diagram number 1 -#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz -#else - if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz - else - oxxxxx( momenta, 0, cHel[ihel][0], -1, w_fp[0], 0 ); -#endif + oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); - ixzxxx( momenta, cHel[ihel][2], -1, w_fp[2], 2 ); + ixxxxx( momenta, 0., cHel[ihel][2], -1, w_fp[2], 2 ); - oxzxxx( momenta, cHel[ihel][3], +1, w_fp[3], 3 ); + oxxxxx( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 ); - FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[4] ); + FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,10 +259,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 2 *** // Wavefunction(s) for diagram number 2 - FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], COUPs[2], cIPD[0], cIPD[1], w_fp[4] ); + FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], 1.0, COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], COUPs[2], &_fp[0] ); + FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], 1.0, COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f index b836e34865..31e7790d2d 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION EP1 DOUBLE PRECISION EM2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,15 +130,26 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - EP1=PDG2PDF(LPP(IB(1)),-11, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1) - $ ))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + EP1=PDG2PDF(LPP(IB(1)),-11, IB(1),XBK(IB(1)), QSCALE) IF (PDLABEL.EQ.'dressed') EP1_COMPONENTS(1:4) = $ EE_COMPONENTS(1:4) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - EM2=PDG2PDF(LPP(IB(2)),11, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + EM2=PDG2PDF(LPP(IB(2)),11, IB(2),XBK(IB(2)), QSCALE) IF (PDLABEL.EQ.'dressed') EM2_COMPONENTS(1:4) = $ EE_COMPONENTS(1:4) ENDIF diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/ee_mumu.mad/bin/generate_events b/epochX/cudacpp/ee_mumu.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/generate_events +++ b/epochX/cudacpp/ee_mumu.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/ee_mumu.mad/bin/madevent b/epochX/cudacpp/ee_mumu.mad/bin/madevent index c944aa1faf..10b6a71fa2 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/madevent +++ b/epochX/cudacpp/ee_mumu.mad/bin/madevent @@ -32,6 +32,7 @@ except ImportError: import os +pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -160,10 +161,23 @@ except: pass import internal.madevent_interface as cmd_interface +# check for plugin customization of the launch command +launch_interface = cmd_interface.MadEventCmdShell +if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) + launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -178,7 +192,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -188,7 +202,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h index 6a8781b113..17bcf68784 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -909,6 +913,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -919,6 +924,7 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -932,7 +938,9 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -943,7 +951,9 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -957,6 +967,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -980,6 +991,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1011,6 +1023,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1034,6 +1047,7 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1067,6 +1081,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1093,6 +1108,7 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1129,7 +1145,9 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1144,7 +1162,7 @@ namespace mg5amcCpu constexpr fptype two( 2. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); const cxtype_sv TMP3 = ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ); - ( *vertex ) = ( -one ) * ( COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * COUP1 ) ); + ( *vertex ) = ( -one ) * ( Ccoeff2*COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1*COUP1 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1157,7 +1175,9 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1179,10 +1199,10 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); const cxtype_sv TMP4 = ( F1[4] * ( F2[2] * ( P3[0] - P3[3] ) - F2[3] * ( P3[1] + cI * P3[2] ) ) + F1[5] * ( F2[2] * ( -P3[1] + cI * P3[2] ) + F2[3] * ( P3[0] + P3[3] ) ) ); const cxtype_sv denom = one / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); - V3[2] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); - V3[3] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); - V3[4] = denom * cI * ( COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); - V3[5] = denom * ( two * cI ) * ( COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); + V3[2] = denom * ( -two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1*COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); + V3[3] = denom * ( -two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( Ccoeff1*COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); + V3[4] = denom * cI * ( Ccoeff2*COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + Ccoeff1*COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); + V3[5] = denom * ( two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1*COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 152aa855f1..a84feb8679 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005333423614501953  +DEBUG: model prefixing takes 0.005468606948852539  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -160,50 +160,28 @@ output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1305]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1309]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1462]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1484]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: replace_dict['den_factors'] =  4 [model_handling.py at line 1155]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1170]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1171]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1663]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1718]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1352]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1361]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1378]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1398]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1428]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1439]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1450]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1344]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.263 s +ALOHA: aloha creates 4 routines in 0.261 s FFV1 FFV1 FFV2 @@ -215,17 +193,14 @@ ALOHA: aloha creates 4 routines in 0.263 s FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.686s -user 0m0.622s -sys 0m0.057s +real 0m0.656s +user 0m0.586s +sys 0m0.059s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc index 53aaf5cb29..11472d834e 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc @@ -238,25 +238,18 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 2 *** // Wavefunction(s) for diagram number 1 -#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz -#else - if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz - else - oxxxxx( momenta, 0, cHel[ihel][0], -1, w_fp[0], 0 ); -#endif + oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); - ixzxxx( momenta, cHel[ihel][2], -1, w_fp[2], 2 ); + ixxxxx( momenta, 0., cHel[ihel][2], -1, w_fp[2], 2 ); - oxzxxx( momenta, cHel[ihel][3], +1, w_fp[3], 3 ); + oxxxxx( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 ); - FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[4] ); + FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -265,10 +258,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 2 *** // Wavefunction(s) for diagram number 2 - FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], COUPs[2], cIPD[0], cIPD[1], w_fp[4] ); + FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], 1.0, COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], COUPs[2], &_fp[0] ); + FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], 1.0, COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h index 6a8781b113..17bcf68784 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -909,6 +913,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -919,6 +924,7 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -932,7 +938,9 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -943,7 +951,9 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -957,6 +967,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -980,6 +991,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1011,6 +1023,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1034,6 +1047,7 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1067,6 +1081,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1093,6 +1108,7 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1129,7 +1145,9 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1144,7 +1162,7 @@ namespace mg5amcCpu constexpr fptype two( 2. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); const cxtype_sv TMP3 = ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ); - ( *vertex ) = ( -one ) * ( COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * COUP1 ) ); + ( *vertex ) = ( -one ) * ( Ccoeff2*COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1*COUP1 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1157,7 +1175,9 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1179,10 +1199,10 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); const cxtype_sv TMP4 = ( F1[4] * ( F2[2] * ( P3[0] - P3[3] ) - F2[3] * ( P3[1] + cI * P3[2] ) ) + F1[5] * ( F2[2] * ( -P3[1] + cI * P3[2] ) + F2[3] * ( P3[0] + P3[3] ) ) ); const cxtype_sv denom = one / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); - V3[2] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); - V3[3] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); - V3[4] = denom * cI * ( COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); - V3[5] = denom * ( two * cI ) * ( COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); + V3[2] = denom * ( -two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1*COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); + V3[3] = denom * ( -two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( Ccoeff1*COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); + V3[4] = denom * cI * ( Ccoeff2*COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + Ccoeff1*COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); + V3[5] = denom * ( two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1*COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 28d3d7b93b..58f1c596db 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005661487579345703  +DEBUG: model prefixing takes 0.0055713653564453125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,8 +174,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,12 +195,12 @@ Wrote files for 10 helas calls in 0.099 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.141 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.129 s +ALOHA: aloha creates 4 routines in 0.130 s VVV1 FFV1 FFV1 @@ -304,6 +304,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.886s -user 0m2.115s -sys 0m0.311s +real 0m2.345s +user 0m2.010s +sys 0m0.294s diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 9a5217e178..8cdd83a795 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00539851188659668  +DEBUG: model prefixing takes 0.00533604621887207  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -196,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.567s -user 0m0.472s -sys 0m0.059s +real 0m0.553s +user 0m0.477s +sys 0m0.054s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 95b0d68338..0d2e3b8058 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005623579025268555  +DEBUG: model prefixing takes 0.0056264400482177734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -183,8 +183,8 @@ INFO: Processing color information for process: g g > t t~ g @2 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -200,8 +200,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,14 +217,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.240 s +Wrote files for 46 helas calls in 0.238 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.323 s +ALOHA: aloha creates 5 routines in 0.318 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -232,7 +232,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.307 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -352,6 +352,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.973s -user 0m2.615s -sys 0m0.296s +real 0m2.905s +user 0m2.569s +sys 0m0.322s diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 536d2538f3..a571165e79 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005247592926025391  +DEBUG: model prefixing takes 0.005511283874511719  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -174,8 +174,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,7 +191,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.148 s +Wrote files for 36 helas calls in 0.147 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.312 s VVV1 VVV1 FFV1 @@ -321,6 +321,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.814s -user 0m2.477s -sys 0m0.303s +real 0m2.806s +user 0m2.510s +sys 0m0.288s diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 0c63c9d364..de77eb37ae 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005433082580566406  +DEBUG: model prefixing takes 0.0053822994232177734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.324 s +ALOHA: aloha creates 5 routines in 0.317 s VVV1 VVV1 FFV1 @@ -204,6 +204,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.868s -user 0m0.714s -sys 0m0.064s +real 0m0.778s +user 0m0.716s +sys 0m0.051s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 2643c37896..d00eb26f08 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005459308624267578  +DEBUG: model prefixing takes 0.005320549011230469  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.154 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -174,8 +174,8 @@ INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.418 s -Wrote files for 222 helas calls in 0.707 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s +Wrote files for 222 helas calls in 0.715 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.323 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.311 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -324,6 +324,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.972s -user 0m3.563s -sys 0m0.318s +real 0m3.910s +user 0m3.578s +sys 0m0.310s diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 838608f6d4..3e120ef382 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056304931640625  +DEBUG: model prefixing takes 0.005327701568603516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.154 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.458s -user 0m1.362s -sys 0m0.057s +real 0m1.430s +user 0m1.360s +sys 0m0.053s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index c8217e355b..6d04ae6f16 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005827188491821289  +DEBUG: model prefixing takes 0.0053408145904541016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.934 s +1 processes with 1240 diagrams generated in 1.843 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -176,8 +176,8 @@ INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.465 s -Wrote files for 2281 helas calls in 46.250 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.438 s +Wrote files for 2281 helas calls in 46.268 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.313 s +ALOHA: aloha creates 5 routines in 0.316 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.305 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -326,6 +326,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m57.556s -user 0m56.426s -sys 0m0.883s +real 0m57.406s +user 0m56.359s +sys 0m0.856s diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 0641c4cc00..d6fd67c67b 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052568912506103516  +DEBUG: model prefixing takes 0.005448818206787109  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.837 s +1 processes with 1240 diagrams generated in 1.868 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.446 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.509 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.341 s +ALOHA: aloha creates 5 routines in 0.344 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m12.837s -user 0m12.595s -sys 0m0.100s +real 0m12.895s +user 0m12.714s +sys 0m0.107s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 4e69d362f6..5e5ad94f5b 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005332231521606445  +DEBUG: model prefixing takes 0.005564689636230469  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -196,8 +196,8 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -213,8 +213,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -229,12 +229,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.212 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Wrote files for 32 helas calls in 0.216 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.148 s +ALOHA: aloha creates 2 routines in 0.142 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines @@ -363,6 +363,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.663s -user 0m2.232s -sys 0m0.316s +real 0m2.560s +user 0m2.211s +sys 0m0.324s diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc index d5eda63ee0..3452d1e8da 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc @@ -111,8 +111,8 @@ Parameters_sm::setDependentParameters() // now computed event-by-event (running void Parameters_sm::setDependentCouplings() // now computed event-by-event (running alphas #373) { - GC_11 = mdl_complexi * G; GC_10 = -G; + GC_11 = mdl_complexi * G; } */ @@ -195,7 +195,7 @@ void Parameters_sm::printDependentCouplings() // now computed event-by-event (running alphas #373) { std::cout << "sm model couplings dependent on event kinematics:" << std::endl; - std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; + std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; } */ diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h index 0c77cf58f0..4f6f322ed9 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h @@ -54,7 +54,7 @@ namespace mg5amcCpu //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //cxsmpl GC_11, GC_10; // now computed event-by-event (running alphas #373) + //cxsmpl GC_10, GC_11; // now computed event-by-event (running alphas #373) // Set parameters that are unchanged during the run void setIndependentParameters( SLHAReader& slha ); @@ -194,8 +194,8 @@ namespace mg5amcCpu //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) + //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) // Print parameters that are unchanged during the run void printIndependentParameters(); @@ -226,12 +226,12 @@ namespace mg5amcCpu namespace Parameters_sm_dependentCouplings { constexpr size_t ndcoup = 2; // #couplings that vary event by event because they depend on the running alphas QCD - constexpr size_t idcoup_GC_11 = 0; - constexpr size_t idcoup_GC_10 = 1; + constexpr size_t idcoup_GC_10 = 0; + constexpr size_t idcoup_GC_11 = 1; struct DependentCouplings_sv { - cxtype_sv GC_11; cxtype_sv GC_10; + cxtype_sv GC_11; }; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> @@ -257,8 +257,8 @@ namespace mg5amcCpu //const fptype_sv G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); const fptype_sv mdl_G__exp__2 = ( ( G ) * ( G ) ); // Model couplings dependent on aS - out.GC_11 = cI * G; out.GC_10 = -G; + out.GC_11 = cI * G; } // End SM implementation - no special handling of vectors of floats as in EFT (#439) return out; @@ -293,12 +293,12 @@ namespace mg5amcCpu using namespace Parameters_sm_dependentCouplings; const fptype_sv& gs_sv = G_ACCESS::kernelAccessConst( gs ); DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv ); - fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); - cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); + fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); - GC_11s_sv = couplings_sv.GC_11; + cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); GC_10s_sv = couplings_sv.GC_10; + GC_11s_sv = couplings_sv.GC_11; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 531ff7554c..d98bc0f805 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005487680435180664  +DEBUG: model prefixing takes 0.0052411556243896484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -205,12 +205,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.029 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.141 s FFV1 FFV1 FFV1 @@ -227,6 +227,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.657s +real 0m1.908s user 0m0.585s -sys 0m0.062s +sys 0m0.055s diff --git a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc index d5eda63ee0..3452d1e8da 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc +++ b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc @@ -111,8 +111,8 @@ Parameters_sm::setDependentParameters() // now computed event-by-event (running void Parameters_sm::setDependentCouplings() // now computed event-by-event (running alphas #373) { - GC_11 = mdl_complexi * G; GC_10 = -G; + GC_11 = mdl_complexi * G; } */ @@ -195,7 +195,7 @@ void Parameters_sm::printDependentCouplings() // now computed event-by-event (running alphas #373) { std::cout << "sm model couplings dependent on event kinematics:" << std::endl; - std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; + std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; } */ diff --git a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h index 0c77cf58f0..4f6f322ed9 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h +++ b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h @@ -54,7 +54,7 @@ namespace mg5amcCpu //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //cxsmpl GC_11, GC_10; // now computed event-by-event (running alphas #373) + //cxsmpl GC_10, GC_11; // now computed event-by-event (running alphas #373) // Set parameters that are unchanged during the run void setIndependentParameters( SLHAReader& slha ); @@ -194,8 +194,8 @@ namespace mg5amcCpu //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) + //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) // Print parameters that are unchanged during the run void printIndependentParameters(); @@ -226,12 +226,12 @@ namespace mg5amcCpu namespace Parameters_sm_dependentCouplings { constexpr size_t ndcoup = 2; // #couplings that vary event by event because they depend on the running alphas QCD - constexpr size_t idcoup_GC_11 = 0; - constexpr size_t idcoup_GC_10 = 1; + constexpr size_t idcoup_GC_10 = 0; + constexpr size_t idcoup_GC_11 = 1; struct DependentCouplings_sv { - cxtype_sv GC_11; cxtype_sv GC_10; + cxtype_sv GC_11; }; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> @@ -257,8 +257,8 @@ namespace mg5amcCpu //const fptype_sv G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); const fptype_sv mdl_G__exp__2 = ( ( G ) * ( G ) ); // Model couplings dependent on aS - out.GC_11 = cI * G; out.GC_10 = -G; + out.GC_11 = cI * G; } // End SM implementation - no special handling of vectors of floats as in EFT (#439) return out; @@ -293,12 +293,12 @@ namespace mg5amcCpu using namespace Parameters_sm_dependentCouplings; const fptype_sv& gs_sv = G_ACCESS::kernelAccessConst( gs ); DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv ); - fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); - cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); + fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); - GC_11s_sv = couplings_sv.GC_11; + cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); GC_10s_sv = couplings_sv.GC_10; + GC_11s_sv = couplings_sv.GC_11; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 86f147e75d..3ab0b589e3 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -164,6 +164,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.477s -user 0m0.368s -sys 0m0.055s +real 0m0.430s +user 0m0.370s +sys 0m0.050s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index c1cd27fa9e..8e26a769b5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005520343780517578  +DEBUG: model prefixing takes 0.005576610565185547  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.029 s +5 processes with 7 diagrams generated in 0.028 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.791 s +65 processes with 1119 diagrams generated in 1.792 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -496,8 +496,8 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -513,8 +513,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -530,8 +530,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -547,8 +547,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -564,8 +564,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -581,8 +581,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -598,8 +598,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -615,8 +615,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -632,8 +632,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -649,8 +649,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -666,8 +666,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -683,8 +683,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -700,8 +700,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -717,8 +717,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -734,8 +734,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -751,8 +751,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -768,8 +768,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -785,8 +785,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,8 +801,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.259 s -Wrote files for 810 helas calls in 3.219 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.271 s +Wrote files for 810 helas calls in 3.238 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.309 s VVV1 VVV1 FFV1 @@ -1131,6 +1131,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.356s -user 0m8.764s -sys 0m0.556s +real 0m9.513s +user 0m8.804s +sys 0m0.553s From 9fb2a07e44fc7c6be18004bf163bd287b825b285 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 15:11:31 +0200 Subject: [PATCH 033/119] [oct23av] fix clang format in eemumu after Olivier's "Ccoeff" patch for unary minus #628 --- .../cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index ae8dca9208..f06d725385 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -541,7 +541,7 @@ def change_var_format(self, obj): if obj.startswith('COUP'): out = super().change_var_format(obj) postfix = out[4:] - return "Ccoeff%s*%s" % (postfix, out) # OM for 'unary minus' #628 + return "Ccoeff%s * %s" % (postfix, out) # OM for 'unary minus' #628 else: return super().change_var_format(obj) From fbba1a805d6746af37d060bd4993b26daca8538b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 15:14:00 +0200 Subject: [PATCH 034/119] [oct23av] regenerate all 8 mad and 7 sa processes (including eemumu) The only changes are the fixes in clang format for eemumu. I checked that tput and tmad tests for eemumu are ok (logs not included), however there is almost a factor two loss in throughput, probably due to the use of ixxx instead of imzx. --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 16 +++--- epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h | 10 ++-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 12 ++-- epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h | 10 ++-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 10 ++-- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 8 +-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 18 +++--- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 18 +++--- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 14 ++--- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 18 +++--- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 ++--- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 +++---- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 ++--- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 18 +++--- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 14 ++--- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 8 +-- .../CODEGEN_mad_pp_tt012j_log.txt | 56 +++++++++---------- 17 files changed, 139 insertions(+), 139 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 5c2d6714bb..14d2e02d1e 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005345821380615234  +DEBUG: model prefixing takes 0.005278587341308594  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,19 +191,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.098 s +Wrote files for 8 helas calls in 0.096 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.194 s +ALOHA: aloha creates 3 routines in 0.196 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.247 s +ALOHA: aloha creates 7 routines in 0.253 s FFV1 FFV1 FFV2 @@ -317,6 +317,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.724s -user 0m2.192s -sys 0m0.282s +real 0m2.484s +user 0m2.182s +sys 0m0.291s diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h index 17bcf68784..19819e2451 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h @@ -1162,7 +1162,7 @@ namespace mg5amcCpu constexpr fptype two( 2. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); const cxtype_sv TMP3 = ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ); - ( *vertex ) = ( -one ) * ( Ccoeff2*COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1*COUP1 ) ); + ( *vertex ) = ( -one ) * ( Ccoeff2 * COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1 * COUP1 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1199,10 +1199,10 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); const cxtype_sv TMP4 = ( F1[4] * ( F2[2] * ( P3[0] - P3[3] ) - F2[3] * ( P3[1] + cI * P3[2] ) ) + F1[5] * ( F2[2] * ( -P3[1] + cI * P3[2] ) + F2[3] * ( P3[0] + P3[3] ) ) ); const cxtype_sv denom = one / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); - V3[2] = denom * ( -two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1*COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); - V3[3] = denom * ( -two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( Ccoeff1*COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); - V3[4] = denom * cI * ( Ccoeff2*COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + Ccoeff1*COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); - V3[5] = denom * ( two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1*COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); + V3[2] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); + V3[3] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); + V3[4] = denom * cI * ( Ccoeff2 * COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + Ccoeff1 * COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); + V3[5] = denom * ( two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index a84feb8679..37c45d8bc4 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005468606948852539  +DEBUG: model prefixing takes 0.0058443546295166016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,14 +174,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.261 s +ALOHA: aloha creates 4 routines in 0.274 s FFV1 FFV1 FFV2 @@ -201,6 +201,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.656s -user 0m0.586s -sys 0m0.059s +real 0m0.765s +user 0m0.630s +sys 0m0.052s diff --git a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h index 17bcf68784..19819e2451 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h @@ -1162,7 +1162,7 @@ namespace mg5amcCpu constexpr fptype two( 2. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); const cxtype_sv TMP3 = ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ); - ( *vertex ) = ( -one ) * ( Ccoeff2*COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1*COUP1 ) ); + ( *vertex ) = ( -one ) * ( Ccoeff2 * COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1 * COUP1 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1199,10 +1199,10 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); const cxtype_sv TMP4 = ( F1[4] * ( F2[2] * ( P3[0] - P3[3] ) - F2[3] * ( P3[1] + cI * P3[2] ) ) + F1[5] * ( F2[2] * ( -P3[1] + cI * P3[2] ) + F2[3] * ( P3[0] + P3[3] ) ) ); const cxtype_sv denom = one / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); - V3[2] = denom * ( -two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1*COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); - V3[3] = denom * ( -two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( Ccoeff1*COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); - V3[4] = denom * cI * ( Ccoeff2*COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + Ccoeff1*COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); - V3[5] = denom * ( two * cI ) * ( Ccoeff2*COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1*COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); + V3[2] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); + V3[3] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); + V3[4] = denom * cI * ( Ccoeff2 * COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + Ccoeff1 * COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); + V3[5] = denom * ( two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 58f1c596db..22f5c151a1 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055713653564453125  +DEBUG: model prefixing takes 0.005438327789306641  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,7 +195,7 @@ Wrote files for 10 helas calls in 0.099 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.143 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -304,6 +304,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.345s -user 0m2.010s +real 0m2.322s +user 0m2.016s sys 0m0.294s diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 8cdd83a795..5f66b6e1a2 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00533604621887207  +DEBUG: model prefixing takes 0.0058879852294921875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -196,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.553s -user 0m0.477s -sys 0m0.054s +real 0m0.634s +user 0m0.480s +sys 0m0.057s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 0d2e3b8058..bcafdc7d79 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056264400482177734  +DEBUG: model prefixing takes 0.0055255889892578125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -184,7 +184,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -201,7 +201,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,14 +217,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.238 s +Wrote files for 46 helas calls in 0.239 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.318 s +ALOHA: aloha creates 5 routines in 0.320 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -232,7 +232,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.305 s VVV1 VVV1 FFV1 @@ -352,6 +352,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.905s -user 0m2.569s -sys 0m0.322s +real 0m2.921s +user 0m2.615s +sys 0m0.290s diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index a571165e79..968f457aa4 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005511283874511719  +DEBUG: model prefixing takes 0.0058557987213134766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.147 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Wrote files for 36 helas calls in 0.164 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.322 s +ALOHA: aloha creates 5 routines in 0.331 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.312 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -321,6 +321,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.806s -user 0m2.510s -sys 0m0.288s +real 0m2.890s +user 0m2.524s +sys 0m0.335s diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index de77eb37ae..abdb758409 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053822994232177734  +DEBUG: model prefixing takes 0.005751371383666992  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.023 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.317 s +ALOHA: aloha creates 5 routines in 0.325 s VVV1 VVV1 FFV1 @@ -204,6 +204,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.778s -user 0m0.716s -sys 0m0.051s +real 0m1.024s +user 0m0.735s +sys 0m0.061s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index d00eb26f08..c86dc85abb 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005320549011230469  +DEBUG: model prefixing takes 0.0057544708251953125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.166 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,8 +190,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s -Wrote files for 222 helas calls in 0.715 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s +Wrote files for 222 helas calls in 0.708 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -324,6 +324,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.910s -user 0m3.578s -sys 0m0.310s +real 0m3.918s +user 0m3.588s +sys 0m0.312s diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 3e120ef382..dc159f2ae4 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005327701568603516  +DEBUG: model prefixing takes 0.005491018295288086  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.157 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.416 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.313 s +ALOHA: aloha creates 5 routines in 0.311 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.430s -user 0m1.360s -sys 0m0.053s +real 0m1.435s +user 0m1.366s +sys 0m0.052s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 6d04ae6f16..65af8a9ad8 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053408145904541016  +DEBUG: model prefixing takes 0.005440235137939453  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.843 s +1 processes with 1240 diagrams generated in 1.845 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.438 s -Wrote files for 2281 helas calls in 46.268 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.541 s +Wrote files for 2281 helas calls in 46.423 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.316 s +ALOHA: aloha creates 5 routines in 0.313 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.309 s VVV1 VVV1 FFV1 @@ -326,6 +326,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m57.406s -user 0m56.359s -sys 0m0.856s +real 0m57.668s +user 0m56.583s +sys 0m0.889s diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index d6fd67c67b..460b5442dd 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005448818206787109  +DEBUG: model prefixing takes 0.005622386932373047  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.868 s +1 processes with 1240 diagrams generated in 1.829 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.509 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.468 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.344 s +ALOHA: aloha creates 5 routines in 0.343 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m12.895s -user 0m12.714s -sys 0m0.107s +real 0m12.761s +user 0m12.609s +sys 0m0.100s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 5e5ad94f5b..95af969710 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005564689636230469  +DEBUG: model prefixing takes 0.005733966827392578  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -197,7 +197,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -214,7 +214,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -230,16 +230,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s -Wrote files for 32 helas calls in 0.216 s +Wrote files for 32 helas calls in 0.218 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.146 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.132 s FFV1 FFV1 FFV1 @@ -363,6 +363,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.560s -user 0m2.211s -sys 0m0.324s +real 0m2.580s +user 0m2.267s +sys 0m0.284s diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index d98bc0f805..6f123bdbc8 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052411556243896484  +DEBUG: model prefixing takes 0.00573277473449707  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.081 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -205,12 +205,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.029 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.143 s FFV1 FFV1 FFV1 @@ -227,6 +227,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.908s -user 0m0.585s -sys 0m0.055s +real 0m0.673s +user 0m0.609s +sys 0m0.051s diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 3ab0b589e3..864d366bdd 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -151,7 +151,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.060 s +ALOHA: aloha creates 1 routines in 0.061 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -164,6 +164,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.430s -user 0m0.370s -sys 0m0.050s +real 0m0.899s +user 0m0.362s +sys 0m0.062s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 8e26a769b5..815b25abb0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005576610565185547  +DEBUG: model prefixing takes 0.005514860153198242  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.028 s +5 processes with 7 diagrams generated in 0.029 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.792 s +65 processes with 1119 diagrams generated in 1.797 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,15 +801,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.271 s -Wrote files for 810 helas calls in 3.238 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.263 s +Wrote files for 810 helas calls in 3.206 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.332 s +ALOHA: aloha creates 5 routines in 0.329 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.309 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -1131,6 +1131,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.513s -user 0m8.804s -sys 0m0.553s +real 0m9.415s +user 0m8.785s +sys 0m0.532s From 408955ddeec59f084f3df35e0ad667f986526c22 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 15:58:09 +0200 Subject: [PATCH 035/119] [oct23av] add copyright and license to Stephan's runCodegen.sh script --- epochX/cudacpp/runCodegen.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/epochX/cudacpp/runCodegen.sh b/epochX/cudacpp/runCodegen.sh index 0a4632bb9e..83ef0f3410 100755 --- a/epochX/cudacpp/runCodegen.sh +++ b/epochX/cudacpp/runCodegen.sh @@ -1,4 +1,8 @@ #! /bin/bash +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: S. Hageboeck (Sep 2023) for the MG5aMC CUDACPP plugin. +# Further modified by: S. Hageboeck, A. Valassi (2023) for the MG5aMC CUDACPP plugin. set -e From ba4a19eae65b42386a1381234e9b890a0dbc8499 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 16:02:22 +0200 Subject: [PATCH 036/119] [oct23AV] in CODEGEN/generateAndCompare.sh, add file mg5.in in each generated directory, as in Stephan's runCodegen.sh script --- epochX/cudacpp/CODEGEN/generateAndCompare.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index 33e7b07669..4b07d7c23f 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -243,6 +243,8 @@ function codeGenAndDiff() if [ -d ${OUTDIR}/${proc}.${autosuffix} ]; then mv ${OUTDIR}/${proc}.${autosuffix} ${OUTDIR}/${proc}.${autosuffix}.BKP; fi cp -dpr ${outprocauto} ${OUTDIR}/${proc}.${autosuffix} echo -e "\nOutput source code has been copied to ${OUTDIR}/${proc}.${autosuffix}" + # Add file mg5.in as in Stephan's runCodegen.sh script + cat ${MG5AMC_HOME}/${outproc}.mg | sed "s|${outproc}|${proc}.${autosuffix}|" | sed "s/;/\n/g" | sed "s/ *$//" | sed "s/^ *//" > ${OUTDIR}/${proc}.${autosuffix}/mg5.in # Fix build errors which arise because the autogenerated directories are not relocatable (see #400) if [ "${OUTBCK}" == "madnovec" ] || [ "${OUTBCK}" == "madonly" ] || [ "${OUTBCK}" == "mad" ] || [ "${OUTBCK}" == "madcpp" ] || [ "${OUTBCK}" == "madgpu" ]; then cat ${OUTDIR}/${proc}.${autosuffix}/Cards/me5_configuration.txt | sed 's/mg5_path/#mg5_path/' > ${OUTDIR}/${proc}.${autosuffix}/Cards/me5_configuration.txt.new From 71e46f13378cfc3414c0d2b9879fea20c97b6481 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 16:33:15 +0200 Subject: [PATCH 037/119] [oct23av] in CODEGEN, fix build warning in counters.cc (improve Stephan's excellent patch in PR #762) --- .../PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/counters.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/counters.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/counters.cc index cd6ecc8acd..3bbdec9387 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/counters.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -40,7 +40,6 @@ extern "C" static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; From 216db39571b9b2036c4bb7a999675b33d55549be Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 16:48:37 +0200 Subject: [PATCH 038/119] [oct23av] in CODEGEN/generateAndCompare.sh, minor fix for code generation log comparison --- epochX/cudacpp/CODEGEN/generateAndCompare.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index 4b07d7c23f..fe1bcf5981 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -294,7 +294,7 @@ EOF pushd ${OUTDIR} >& /dev/null echo -e "\n+++ Compare old and new code generation log for $proc\n" ###if diff -c ${proc}.${autosuffix}.BKP/${outproc}_log.txt ${proc}.${autosuffix}; then echo "Old and new code generation logs are identical"; fi # context diff - if diff ${proc}.${autosuffix}.BKP/${outproc}_log.txt ${proc}.${autosuffix}; then echo "Old and new code generation logs are identical"; fi # context diff + if diff ${proc}.${autosuffix}.BKP/$(basename ${outproc})_log.txt ${proc}.${autosuffix}; then echo "Old and new code generation logs are identical"; fi # context diff echo -e "\n+++ Compare old and new generated code for $proc\n" if $SCRDIR/diffCode.sh ${BRIEF} -r -c ${proc}.${autosuffix}.BKP ${proc}.${autosuffix}; then echo "Old and new generated codes are identical"; else echo -e "\nWARNING! Old and new generated codes differ"; fi popd >& /dev/null From 8a0c4d748d5fb49d08310efd2fd99ac4a4e0f88d Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 16:53:41 +0200 Subject: [PATCH 039/119] [oct23av] regenerate all 8 mad and 7 sa processes after including Stephan's counters.cc patch from PR #672 I checked that ggtt tput/tmad tests succeed. There is an average ~10% speedup even in the simple ggtt case! --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 17 ++-- .../SubProcesses/P1_epem_mupmum/counters.cc | 18 +--- .../SubProcesses/P1_epem_mupmum/matrix1.f | 2 - epochX/cudacpp/ee_mumu.mad/mg5.in | 2 + .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 +-- epochX/cudacpp/ee_mumu.sa/mg5.in | 5 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 15 ++-- .../SubProcesses/P1_gg_ttx/counters.cc | 18 +--- .../SubProcesses/P1_gg_ttx/matrix1.f | 2 - epochX/cudacpp/gg_tt.mad/mg5.in | 2 + .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 +-- epochX/cudacpp/gg_tt.sa/mg5.in | 5 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 21 ++--- .../SubProcesses/P1_gg_ttx/counters.cc | 18 +--- .../SubProcesses/P1_gg_ttx/matrix1.f | 2 - .../SubProcesses/P2_gg_ttxg/counters.cc | 18 +--- .../SubProcesses/P2_gg_ttxg/matrix1.f | 2 - epochX/cudacpp/gg_tt01g.mad/mg5.in | 3 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 19 ++--- .../SubProcesses/P1_gg_ttxg/counters.cc | 18 +--- .../SubProcesses/P1_gg_ttxg/matrix1.f | 2 - epochX/cudacpp/gg_ttg.mad/mg5.in | 2 + .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 14 +-- epochX/cudacpp/gg_ttg.sa/mg5.in | 5 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 23 +++-- .../SubProcesses/P1_gg_ttxgg/counters.cc | 18 +--- .../SubProcesses/P1_gg_ttxgg/matrix1.f | 2 - epochX/cudacpp/gg_ttgg.mad/mg5.in | 2 + .../CODEGEN_cudacpp_gg_ttgg_log.txt | 12 +-- epochX/cudacpp/gg_ttgg.sa/mg5.in | 5 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 23 +++-- .../SubProcesses/P1_gg_ttxggg/counters.cc | 18 +--- .../SubProcesses/P1_gg_ttxggg/matrix1.f | 2 - epochX/cudacpp/gg_ttggg.mad/mg5.in | 3 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 +-- epochX/cudacpp/gg_ttggg.sa/mg5.in | 5 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 25 +++--- .../SubProcesses/P1_gu_ttxu/counters.cc | 18 +--- .../SubProcesses/P1_gu_ttxu/matrix1.f | 2 - .../SubProcesses/P1_gux_ttxux/counters.cc | 18 +--- .../SubProcesses/P1_gux_ttxux/matrix1.f | 2 - epochX/cudacpp/gq_ttq.mad/mg5.in | 2 + .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 12 +-- epochX/cudacpp/gq_ttq.sa/mg5.in | 5 +- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 8 +- .../CODEGEN_mad_pp_tt012j_log.txt | 85 ++++++------------- .../SubProcesses/P0_gg_ttx/counters.cc | 18 +--- .../SubProcesses/P0_gg_ttx/matrix1.f | 2 - .../SubProcesses/P0_uux_ttx/counters.cc | 18 +--- .../SubProcesses/P0_uux_ttx/matrix1.f | 2 - .../SubProcesses/P1_gg_ttxg/counters.cc | 18 +--- .../SubProcesses/P1_gg_ttxg/matrix1.f | 2 - .../SubProcesses/P1_gu_ttxu/counters.cc | 18 +--- .../SubProcesses/P1_gu_ttxu/matrix1.f | 2 - .../SubProcesses/P1_gux_ttxux/counters.cc | 18 +--- .../SubProcesses/P1_gux_ttxux/matrix1.f | 2 - .../SubProcesses/P1_uux_ttxg/counters.cc | 18 +--- .../SubProcesses/P1_uux_ttxg/matrix1.f | 2 - .../SubProcesses/P2_gg_ttxgg/counters.cc | 18 +--- .../SubProcesses/P2_gg_ttxgg/matrix1.f | 2 - .../SubProcesses/P2_gg_ttxuux/counters.cc | 18 +--- .../SubProcesses/P2_gg_ttxuux/matrix1.f | 2 - .../SubProcesses/P2_gu_ttxgu/counters.cc | 18 +--- .../SubProcesses/P2_gu_ttxgu/matrix1.f | 2 - .../SubProcesses/P2_gux_ttxgux/counters.cc | 18 +--- .../SubProcesses/P2_gux_ttxgux/matrix1.f | 2 - .../SubProcesses/P2_uc_ttxuc/counters.cc | 18 +--- .../SubProcesses/P2_uc_ttxuc/matrix1.f | 2 - .../SubProcesses/P2_ucx_ttxucx/counters.cc | 18 +--- .../SubProcesses/P2_ucx_ttxucx/matrix1.f | 2 - .../SubProcesses/P2_uu_ttxuu/counters.cc | 18 +--- .../SubProcesses/P2_uu_ttxuu/matrix1.f | 2 - .../SubProcesses/P2_uux_ttxccx/counters.cc | 18 +--- .../SubProcesses/P2_uux_ttxccx/matrix1.f | 2 - .../SubProcesses/P2_uux_ttxgg/counters.cc | 18 +--- .../SubProcesses/P2_uux_ttxgg/matrix1.f | 2 - .../SubProcesses/P2_uux_ttxuux/counters.cc | 18 +--- .../SubProcesses/P2_uux_ttxuux/matrix1.f | 2 - .../SubProcesses/P2_uxcx_ttxuxcx/counters.cc | 18 +--- .../SubProcesses/P2_uxcx_ttxuxcx/matrix1.f | 2 - .../SubProcesses/P2_uxux_ttxuxux/counters.cc | 18 +--- .../SubProcesses/P2_uxux_ttxuxux/matrix1.f | 2 - epochX/cudacpp/pp_tt012j.mad/mg5.in | 2 + 83 files changed, 187 insertions(+), 709 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 14d2e02d1e..be5cee0fb8 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005278587341308594  +DEBUG: model prefixing takes 0.005257368087768555  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,19 +191,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.096 s +Wrote files for 8 helas calls in 0.097 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.196 s +ALOHA: aloha creates 3 routines in 0.197 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.253 s +ALOHA: aloha creates 7 routines in 0.251 s FFV1 FFV1 FFV2 @@ -227,7 +227,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * @@ -309,14 +308,12 @@ patching file matrix1.f Hunk #3 succeeded at 230 (offset 9 lines). Hunk #4 succeeded at 267 (offset 18 lines). Hunk #5 succeeded at 312 (offset 18 lines). -Hunk #6 succeeded at 410 (offset 14 lines). -Hunk #7 succeeded at 478 (offset 8 lines). Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README Run "open index.html" to see more information about this process. quit -real 0m2.484s -user 0m2.182s +real 0m2.494s +user 0m2.190s sys 0m0.291s diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f index e00f0e1b64..21e300b33e 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f @@ -410,7 +410,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -478,7 +477,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/ee_mumu.mad/mg5.in b/epochX/cudacpp/ee_mumu.mad/mg5.in index d868684019..12a2c58512 100644 --- a/epochX/cudacpp/ee_mumu.mad/mg5.in +++ b/epochX/cudacpp/ee_mumu.mad/mg5.in @@ -1,2 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate e+ e- > mu+ mu- output madevent ee_mumu.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 37c45d8bc4..b13f728dee 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058443546295166016  +DEBUG: model prefixing takes 0.00538325309753418  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -181,7 +181,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.274 s +ALOHA: aloha creates 4 routines in 0.262 s FFV1 FFV1 FFV2 @@ -201,6 +201,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.765s -user 0m0.630s -sys 0m0.052s +real 0m0.651s +user 0m0.601s +sys 0m0.044s diff --git a/epochX/cudacpp/ee_mumu.sa/mg5.in b/epochX/cudacpp/ee_mumu.sa/mg5.in index 5c48dc6ef9..dcdf875dea 100644 --- a/epochX/cudacpp/ee_mumu.sa/mg5.in +++ b/epochX/cudacpp/ee_mumu.sa/mg5.in @@ -1,3 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate e+ e- > mu+ mu- -output standalone_cudacpp ee_mumu.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp ee_mumu.sa diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 22f5c151a1..b5c53c1161 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005438327789306641  +DEBUG: model prefixing takes 0.005456686019897461  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,7 +191,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.099 s +Wrote files for 10 helas calls in 0.103 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines @@ -200,7 +200,7 @@ ALOHA: aloha creates 2 routines in 0.143 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.140 s VVV1 FFV1 FFV1 @@ -220,7 +220,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * @@ -304,6 +303,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.322s -user 0m2.016s -sys 0m0.294s +real 0m2.913s +user 0m2.038s +sys 0m0.292s diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index ef18aff221..daea73a6df 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -396,7 +396,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -470,7 +469,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_tt.mad/mg5.in b/epochX/cudacpp/gg_tt.mad/mg5.in index 8b65375c7e..7859bf9b80 100644 --- a/epochX/cudacpp/gg_tt.mad/mg5.in +++ b/epochX/cudacpp/gg_tt.mad/mg5.in @@ -1,2 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ output madevent gg_tt.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 5f66b6e1a2..23c04c9100 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058879852294921875  +DEBUG: model prefixing takes 0.005602359771728516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.143 s VVV1 FFV1 FFV1 @@ -196,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.634s -user 0m0.480s -sys 0m0.057s +real 0m0.539s +user 0m0.490s +sys 0m0.045s diff --git a/epochX/cudacpp/gg_tt.sa/mg5.in b/epochX/cudacpp/gg_tt.sa/mg5.in index a2f1230ada..8298656d17 100644 --- a/epochX/cudacpp/gg_tt.sa/mg5.in +++ b/epochX/cudacpp/gg_tt.sa/mg5.in @@ -1,3 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ -output standalone_cudacpp gg_tt.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp gg_tt.sa diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index bcafdc7d79..b27d021202 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055255889892578125  +DEBUG: model prefixing takes 0.005415201187133789  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -184,7 +184,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -201,7 +201,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -216,15 +216,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.239 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.041 s +Wrote files for 46 helas calls in 0.238 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.320 s +ALOHA: aloha creates 5 routines in 0.321 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -257,7 +257,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * @@ -344,14 +343,12 @@ Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -Hunk #6 succeeded at 434 (offset 38 lines). -Hunk #7 succeeded at 588 (offset 118 lines). Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README Run "open index.html" to see more information about this process. quit -real 0m2.921s -user 0m2.615s -sys 0m0.290s +real 0m2.906s +user 0m2.575s +sys 0m0.318s diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f index ef18aff221..daea73a6df 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -396,7 +396,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -470,7 +469,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f index 8fa4eb7211..eb85b7ebb0 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f @@ -434,7 +434,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -588,7 +587,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_tt01g.mad/mg5.in b/epochX/cudacpp/gg_tt01g.mad/mg5.in index f30dacfe93..a20e166e81 100644 --- a/epochX/cudacpp/gg_tt01g.mad/mg5.in +++ b/epochX/cudacpp/gg_tt01g.mad/mg5.in @@ -1,4 +1,5 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ add process g g > t t~ g output madevent gg_tt01g.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 968f457aa4..d367fef872 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058557987213134766  +DEBUG: model prefixing takes 0.005761861801147461  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s -Wrote files for 36 helas calls in 0.164 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +Wrote files for 36 helas calls in 0.146 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.331 s +ALOHA: aloha creates 5 routines in 0.323 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -231,7 +231,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * @@ -313,14 +312,12 @@ Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -Hunk #6 succeeded at 434 (offset 38 lines). -Hunk #7 succeeded at 588 (offset 118 lines). Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README Run "open index.html" to see more information about this process. quit -real 0m2.890s -user 0m2.524s -sys 0m0.335s +real 0m2.947s +user 0m2.470s +sys 0m0.321s diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 520966d7b7..fc924825c2 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -434,7 +434,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -588,7 +587,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_ttg.mad/mg5.in b/epochX/cudacpp/gg_ttg.mad/mg5.in index e5212c8ecf..98f53ce50d 100644 --- a/epochX/cudacpp/gg_ttg.mad/mg5.in +++ b/epochX/cudacpp/gg_ttg.mad/mg5.in @@ -1,2 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ g output madevent gg_ttg.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index abdb758409..20d22ac1c4 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005751371383666992  +DEBUG: model prefixing takes 0.005301237106323242  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.023 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.325 s +ALOHA: aloha creates 5 routines in 0.321 s VVV1 VVV1 FFV1 @@ -204,6 +204,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.024s -user 0m0.735s -sys 0m0.061s +real 0m0.792s +user 0m0.717s +sys 0m0.059s diff --git a/epochX/cudacpp/gg_ttg.sa/mg5.in b/epochX/cudacpp/gg_ttg.sa/mg5.in index 9fd2517e11..fbf08862ec 100644 --- a/epochX/cudacpp/gg_ttg.sa/mg5.in +++ b/epochX/cudacpp/gg_ttg.sa/mg5.in @@ -1,3 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ g -output standalone_cudacpp gg_ttg.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp gg_ttg.sa diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index c86dc85abb..745da9d88c 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057544708251953125  +DEBUG: model prefixing takes 0.005432605743408203  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.166 s +1 processes with 123 diagrams generated in 0.157 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s -Wrote files for 222 helas calls in 0.708 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.423 s +Wrote files for 222 helas calls in 0.710 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.323 s +ALOHA: aloha creates 5 routines in 0.325 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -234,7 +234,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * @@ -316,14 +315,12 @@ Hunk #2 succeeded at 191 (offset 48 lines). Hunk #3 succeeded at 269 (offset 48 lines). Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). -Hunk #6 succeeded at 830 (offset 434 lines). -Hunk #7 succeeded at 1717 (offset 1247 lines). Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README Run "open index.html" to see more information about this process. quit -real 0m3.918s -user 0m3.588s -sys 0m0.312s +real 0m3.903s +user 0m3.552s +sys 0m0.335s diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f index dc6e4b80f3..77f5152327 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f @@ -830,7 +830,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -1717,7 +1716,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_ttgg.mad/mg5.in b/epochX/cudacpp/gg_ttgg.mad/mg5.in index b1f4667829..e2c5858b63 100644 --- a/epochX/cudacpp/gg_ttgg.mad/mg5.in +++ b/epochX/cudacpp/gg_ttgg.mad/mg5.in @@ -1,2 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ g g output madevent gg_ttgg.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index dc159f2ae4..373a89a800 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005491018295288086  +DEBUG: model prefixing takes 0.005511283874511719  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.157 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.311 s +ALOHA: aloha creates 5 routines in 0.315 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.435s +real 0m1.438s user 0m1.366s -sys 0m0.052s +sys 0m0.060s diff --git a/epochX/cudacpp/gg_ttgg.sa/mg5.in b/epochX/cudacpp/gg_ttgg.sa/mg5.in index 0ec559d9b8..5b27867642 100644 --- a/epochX/cudacpp/gg_ttgg.sa/mg5.in +++ b/epochX/cudacpp/gg_ttgg.sa/mg5.in @@ -1,3 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ g g -output standalone_cudacpp gg_ttgg.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp gg_ttgg.sa diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 65af8a9ad8..7e024b5fd3 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005440235137939453  +DEBUG: model prefixing takes 0.005335092544555664  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.845 s +1 processes with 1240 diagrams generated in 1.895 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.541 s -Wrote files for 2281 helas calls in 46.423 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.515 s +Wrote files for 2281 helas calls in 46.436 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.313 s +ALOHA: aloha creates 5 routines in 0.315 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.309 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -236,7 +236,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * @@ -318,14 +317,12 @@ Hunk #2 succeeded at 255 (offset 112 lines). Hunk #3 succeeded at 333 (offset 112 lines). Hunk #4 succeeded at 361 (offset 112 lines). Hunk #5 succeeded at 406 (offset 112 lines). -Hunk #6 succeeded at 9862 (offset 9466 lines). -Hunk #7 succeeded at 19616 (offset 19146 lines). Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README Run "open index.html" to see more information about this process. quit -real 0m57.668s -user 0m56.583s -sys 0m0.889s +real 0m57.704s +user 0m56.670s +sys 0m0.842s diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f index 7da1a11e92..fc156798a8 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f @@ -9862,7 +9862,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -19616,7 +19615,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_ttggg.mad/mg5.in b/epochX/cudacpp/gg_ttggg.mad/mg5.in index 5f9d505e5c..cdbc845cdd 100644 --- a/epochX/cudacpp/gg_ttggg.mad/mg5.in +++ b/epochX/cudacpp/gg_ttggg.mad/mg5.in @@ -1,3 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ g g g output madevent gg_ttggg.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 460b5442dd..31573e7e51 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005622386932373047  +DEBUG: model prefixing takes 0.005338430404663086  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.829 s +1 processes with 1240 diagrams generated in 1.850 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.468 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.482 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.343 s +ALOHA: aloha creates 5 routines in 0.364 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m12.761s -user 0m12.609s -sys 0m0.100s +real 0m13.206s +user 0m12.699s +sys 0m0.116s diff --git a/epochX/cudacpp/gg_ttggg.sa/mg5.in b/epochX/cudacpp/gg_ttggg.sa/mg5.in index 644e3be9b4..2a135334ff 100644 --- a/epochX/cudacpp/gg_ttggg.sa/mg5.in +++ b/epochX/cudacpp/gg_ttggg.sa/mg5.in @@ -1,3 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ g g g -output standalone_cudacpp gg_ttggg.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp gg_ttggg.sa diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 95af969710..63bb0f3c9e 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005733966827392578  +DEBUG: model prefixing takes 0.005384206771850586  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -197,7 +197,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -214,7 +214,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -229,17 +229,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s -Wrote files for 32 helas calls in 0.218 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Wrote files for 32 helas calls in 0.216 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.146 s +ALOHA: aloha creates 2 routines in 0.143 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.132 s +ALOHA: aloha creates 4 routines in 0.129 s FFV1 FFV1 FFV1 @@ -260,7 +260,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * @@ -343,8 +342,6 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 247 (offset 26 lines). Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). -Hunk #6 succeeded at 441 (offset 45 lines). -Hunk #7 succeeded at 531 (offset 61 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 527 (offset 58 lines). @@ -355,14 +352,12 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 247 (offset 26 lines). Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). -Hunk #6 succeeded at 441 (offset 45 lines). -Hunk #7 succeeded at 531 (offset 61 lines). Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README Run "open index.html" to see more information about this process. quit -real 0m2.580s -user 0m2.267s -sys 0m0.284s +real 0m2.606s +user 0m2.256s +sys 0m0.312s diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f index a0750b5419..e6d01dad0b 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -441,7 +441,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -531,7 +530,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f index dfb8f9c040..7a2e329e64 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -441,7 +441,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -531,7 +530,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gq_ttq.mad/mg5.in b/epochX/cudacpp/gq_ttq.mad/mg5.in index 904e173bf4..e93843b8cd 100644 --- a/epochX/cudacpp/gq_ttq.mad/mg5.in +++ b/epochX/cudacpp/gq_ttq.mad/mg5.in @@ -1,3 +1,5 @@ +set stdout_level DEBUG +set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ generate g q > t t~ q output madevent gq_ttq.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 6f123bdbc8..71c2006493 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00573277473449707  +DEBUG: model prefixing takes 0.005433082580566406  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.081 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -205,12 +205,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.029 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.142 s FFV1 FFV1 FFV1 @@ -227,6 +227,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.673s -user 0m0.609s +real 0m0.651s +user 0m0.593s sys 0m0.051s diff --git a/epochX/cudacpp/gq_ttq.sa/mg5.in b/epochX/cudacpp/gq_ttq.sa/mg5.in index ae4d2d2c15..c0952db410 100644 --- a/epochX/cudacpp/gq_ttq.sa/mg5.in +++ b/epochX/cudacpp/gq_ttq.sa/mg5.in @@ -1,4 +1,5 @@ +set stdout_level DEBUG +set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ generate g q > t t~ q -output standalone_cudacpp gq_ttq.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp gq_ttq.sa diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 864d366bdd..64ce042fd4 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -151,7 +151,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.061 s +ALOHA: aloha creates 1 routines in 0.060 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -164,6 +164,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.899s -user 0m0.362s -sys 0m0.062s +real 0m0.428s +user 0m0.366s +sys 0m0.055s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 815b25abb0..e2ec882498 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005514860153198242  +DEBUG: model prefixing takes 0.005310535430908203  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,8 +801,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.263 s -Wrote files for 810 helas calls in 3.206 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.287 s +Wrote files for 810 helas calls in 3.227 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -845,7 +845,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: path =  /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT [output.py at line 213]  DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * @@ -933,8 +932,6 @@ Hunk #2 succeeded at 146 (offset 3 lines). Hunk #3 succeeded at 224 (offset 3 lines). Hunk #4 succeeded at 252 (offset 3 lines). Hunk #5 succeeded at 297 (offset 3 lines). -Hunk #6 succeeded at 402 (offset 6 lines). -Hunk #7 succeeded at 466 (offset -4 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 483 (offset 14 lines). @@ -944,8 +941,6 @@ Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -Hunk #6 succeeded at 434 (offset 38 lines). -Hunk #7 succeeded at 588 (offset 118 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 527 (offset 58 lines). @@ -956,8 +951,6 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -Hunk #6 succeeded at 428 (offset 32 lines). -Hunk #7 succeeded at 518 (offset 48 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 527 (offset 58 lines). @@ -968,8 +961,6 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -Hunk #6 succeeded at 428 (offset 32 lines). -Hunk #7 succeeded at 518 (offset 48 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 538 (offset 69 lines). @@ -980,8 +971,6 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -Hunk #6 succeeded at 428 (offset 32 lines). -Hunk #7 succeeded at 518 (offset 48 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 483 (offset 14 lines). @@ -991,8 +980,6 @@ Hunk #2 succeeded at 191 (offset 48 lines). Hunk #3 succeeded at 269 (offset 48 lines). Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). -Hunk #6 succeeded at 830 (offset 434 lines). -Hunk #7 succeeded at 1717 (offset 1247 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 516 (offset 47 lines). @@ -1003,8 +990,6 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 540 (offset 144 lines). -Hunk #7 succeeded at 813 (offset 343 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 527 (offset 58 lines). @@ -1015,8 +1000,6 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 540 (offset 144 lines). -Hunk #7 succeeded at 815 (offset 345 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 527 (offset 58 lines). @@ -1027,8 +1010,6 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 538 (offset 142 lines). -Hunk #7 succeeded at 812 (offset 342 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 554 (offset 85 lines). @@ -1039,8 +1020,6 @@ Hunk #2 succeeded at 196 (offset 53 lines). Hunk #3 succeeded at 274 (offset 53 lines). Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). -Hunk #6 succeeded at 472 (offset 76 lines). -Hunk #7 succeeded at 581 (offset 111 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 626 (offset 157 lines). @@ -1051,8 +1030,6 @@ Hunk #2 succeeded at 202 (offset 59 lines). Hunk #3 succeeded at 280 (offset 59 lines). Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). -Hunk #6 succeeded at 484 (offset 88 lines). -Hunk #7 succeeded at 593 (offset 123 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 538 (offset 69 lines). @@ -1063,8 +1040,6 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 468 (offset 72 lines). -Hunk #7 succeeded at 620 (offset 150 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 626 (offset 157 lines). @@ -1075,8 +1050,6 @@ Hunk #2 succeeded at 202 (offset 59 lines). Hunk #3 succeeded at 280 (offset 59 lines). Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). -Hunk #6 succeeded at 484 (offset 88 lines). -Hunk #7 succeeded at 593 (offset 123 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 538 (offset 69 lines). @@ -1087,8 +1060,6 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 540 (offset 144 lines). -Hunk #7 succeeded at 821 (offset 351 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 538 (offset 69 lines). @@ -1099,8 +1070,6 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 468 (offset 72 lines). -Hunk #7 succeeded at 620 (offset 150 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 554 (offset 85 lines). @@ -1111,8 +1080,6 @@ Hunk #2 succeeded at 196 (offset 53 lines). Hunk #3 succeeded at 274 (offset 53 lines). Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). -Hunk #6 succeeded at 472 (offset 76 lines). -Hunk #7 succeeded at 581 (offset 111 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 538 (offset 69 lines). @@ -1123,14 +1090,12 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 468 (offset 72 lines). -Hunk #7 succeeded at 620 (offset 150 lines). Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README Run "open index.html" to see more information about this process. quit -real 0m9.415s -user 0m8.785s -sys 0m0.532s +real 0m9.420s +user 0m8.812s +sys 0m0.563s diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f index d81c9e86cb..d803e4f19f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f @@ -396,7 +396,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -470,7 +469,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f index 0ec17d77eb..4c21758744 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f @@ -402,7 +402,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -466,7 +465,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 520966d7b7..fc924825c2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -434,7 +434,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -588,7 +587,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f index 5847ea0f3f..d61f0e1a21 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -428,7 +428,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -518,7 +517,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f index 65d83f3206..b082becd2a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -428,7 +428,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -518,7 +517,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f index beacc34205..265f6006db 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f @@ -428,7 +428,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -518,7 +517,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f index 663b15574e..2e8e377de8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f @@ -830,7 +830,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -1717,7 +1716,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f index f401e1eb21..41e5e36e39 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f @@ -540,7 +540,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -813,7 +812,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f index ad08208e22..a2b48f860a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f @@ -540,7 +540,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -815,7 +814,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f index 9517cf043b..7ce63300ba 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f @@ -538,7 +538,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -812,7 +811,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f index 6d94cf8fc6..efcaed5bd1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f @@ -472,7 +472,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -581,7 +580,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f index 69472aa185..3172975ef4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f @@ -484,7 +484,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -593,7 +592,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f index 44d755483f..77fe909abc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f @@ -468,7 +468,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -620,7 +619,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f index 4a4f39ba52..c5a7b6787c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f @@ -484,7 +484,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -593,7 +592,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f index 6fdd945c1e..80fb12abe5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f @@ -540,7 +540,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -821,7 +820,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f index 03d576c6d5..3544d80d72 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f @@ -468,7 +468,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -620,7 +619,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f index 19a22be6d6..61d4e59741 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f @@ -472,7 +472,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -581,7 +580,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f index 65a58589dc..1b50f51264 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f @@ -468,7 +468,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -620,7 +619,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/mg5.in b/epochX/cudacpp/pp_tt012j.mad/mg5.in index 66c20a304b..91e22f5295 100644 --- a/epochX/cudacpp/pp_tt012j.mad/mg5.in +++ b/epochX/cudacpp/pp_tt012j.mad/mg5.in @@ -1,3 +1,5 @@ +set stdout_level DEBUG +set zerowidth_tchannel F define j = p generate p p > t t~ @0 add process p p > t t~ j @1 From cd444c66ed27bbc2791105e9712b66dad159640a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 17:05:05 +0200 Subject: [PATCH 040/119] [oct23av] temporarely move to c586208a9 generated code to avoid conflicts with Stephan's PR #762 git checkout c586208a9 $(git ls-tree --name-only HEAD *.mad) git checkout c586208a9 $(git ls-tree --name-only HEAD *.sa) --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 118 +- .../ee_mumu.mad/Cards/me5_configuration.txt | 4 +- .../ee_mumu.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_epem_mupmum/CPPProcess.cc | 30 +- .../SubProcesses/P1_epem_mupmum/auto_dsig1.f | 20 +- .../SubProcesses/P1_epem_mupmum/counters.cc | 18 +- .../SubProcesses/P1_epem_mupmum/matrix1.f | 2 + .../ee_mumu.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/ee_mumu.mad/SubProcesses/makefile | 4 +- .../cudacpp/ee_mumu.mad/bin/generate_events | 22 +- .../ee_mumu.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42831 -> 42833 bytes epochX/cudacpp/ee_mumu.mad/bin/madevent | 20 +- epochX/cudacpp/ee_mumu.mad/mg5.in | 2 - epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h | 30 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 78 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc | 30 +- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/ee_mumu.sa/mg5.in | 5 +- epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h | 30 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 123 +- .../gg_tt.mad/Cards/me5_configuration.txt | 4 +- .../gg_tt.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 19 +- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttx/counters.cc | 18 +- .../SubProcesses/P1_gg_ttx/matrix1.f | 2 + .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gg_tt.mad/SubProcesses/makefile | 4 +- epochX/cudacpp/gg_tt.mad/bin/generate_events | 22 +- .../cudacpp/gg_tt.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42831 -> 42833 bytes epochX/cudacpp/gg_tt.mad/bin/madevent | 20 +- epochX/cudacpp/gg_tt.mad/mg5.in | 2 - epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h | 8 - .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 87 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_gg_ttx/CPPProcess.cc | 19 +- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/gg_tt.sa/mg5.in | 5 +- epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h | 8 - .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 169 +- .../gg_tt01g.mad/Cards/me5_configuration.txt | 4 +- .../gg_tt01g.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 19 +- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttx/counters.cc | 18 +- .../SubProcesses/P1_gg_ttx/matrix1.f | 2 + .../SubProcesses/P2_gg_ttxg/CPPProcess.cc | 69 +- .../SubProcesses/P2_gg_ttxg/auto_dsig1.f | 18 +- .../SubProcesses/P2_gg_ttxg/counters.cc | 18 +- .../SubProcesses/P2_gg_ttxg/matrix1.f | 2 + .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 6 +- .../gg_tt01g.mad/SubProcesses/makefile | 4 +- .../cudacpp/gg_tt01g.mad/bin/generate_events | 22 +- .../gg_tt01g.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42831 -> 42833 bytes epochX/cudacpp/gg_tt01g.mad/bin/madevent | 20 +- epochX/cudacpp/gg_tt01g.mad/mg5.in | 3 +- epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h | 18 - .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 129 +- .../gg_ttg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 69 +- .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxg/counters.cc | 18 +- .../SubProcesses/P1_gg_ttxg/matrix1.f | 2 + .../gg_ttg.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gg_ttg.mad/SubProcesses/makefile | 4 +- epochX/cudacpp/gg_ttg.mad/bin/generate_events | 22 +- .../cudacpp/gg_ttg.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42831 -> 42833 bytes epochX/cudacpp/gg_ttg.mad/bin/madevent | 20 +- epochX/cudacpp/gg_ttg.mad/mg5.in | 2 - epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h | 18 - .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 89 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_gg_ttxg/CPPProcess.cc | 69 +- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/gg_ttg.sa/mg5.in | 5 +- epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h | 18 - .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 131 +- .../gg_ttgg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttgg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gg_ttxgg/CPPProcess.cc | 439 +- .../SubProcesses/P1_gg_ttxgg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxgg/counters.cc | 18 +- .../SubProcesses/P1_gg_ttxgg/matrix1.f | 2 + .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gg_ttgg.mad/SubProcesses/makefile | 4 +- .../cudacpp/gg_ttgg.mad/bin/generate_events | 22 +- .../gg_ttgg.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42831 -> 42833 bytes epochX/cudacpp/gg_ttgg.mad/bin/madevent | 20 +- epochX/cudacpp/gg_ttgg.mad/mg5.in | 2 - epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h | 24 - .../CODEGEN_cudacpp_gg_ttgg_log.txt | 91 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_gg_ttxgg/CPPProcess.cc | 439 +- .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/gg_ttgg.sa/mg5.in | 5 +- epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h | 24 - .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 135 +- .../gg_ttggg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttggg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gg_ttxggg/CPPProcess.cc | 4555 +++++++++-------- .../SubProcesses/P1_gg_ttxggg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxggg/counters.cc | 18 +- .../SubProcesses/P1_gg_ttxggg/matrix1.f | 2 + .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 6 +- .../gg_ttggg.mad/SubProcesses/makefile | 4 +- .../cudacpp/gg_ttggg.mad/bin/generate_events | 22 +- .../gg_ttggg.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42831 -> 42833 bytes epochX/cudacpp/gg_ttggg.mad/bin/madevent | 20 +- epochX/cudacpp/gg_ttggg.mad/mg5.in | 3 +- epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h | 24 - .../CODEGEN_cudacpp_gg_ttggg_log.txt | 93 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_gg_ttxggg/CPPProcess.cc | 4555 +++++++++-------- .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/gg_ttggg.sa/mg5.in | 5 +- epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h | 24 - .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 165 +- .../gq_ttq.mad/Cards/me5_configuration.txt | 4 +- .../gq_ttq.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 40 +- .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 24 +- .../SubProcesses/P1_gu_ttxu/counters.cc | 18 +- .../SubProcesses/P1_gu_ttxu/matrix1.f | 2 + .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 33 +- .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 28 +- .../SubProcesses/P1_gux_ttxux/counters.cc | 18 +- .../SubProcesses/P1_gux_ttxux/matrix1.f | 2 + .../gq_ttq.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gq_ttq.mad/SubProcesses/makefile | 4 +- epochX/cudacpp/gq_ttq.mad/bin/generate_events | 22 +- .../cudacpp/gq_ttq.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42831 -> 42833 bytes epochX/cudacpp/gq_ttq.mad/bin/madevent | 20 +- epochX/cudacpp/gq_ttq.mad/mg5.in | 2 - epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h | 10 - .../ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt | 1026 ++-- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 124 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_gu_ttxu/CPPProcess.cc | 40 +- .../P1_Sigma_sm_gux_ttxux/CPPProcess.cc | 33 +- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/gq_ttq.sa/mg5.in | 5 +- epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h | 10 - .../ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt | 1026 ++-- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 85 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_heft_gg_h/CPPProcess.cc | 9 +- .../heft_gg_h.sa/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/heft_gg_h.sa/src/HelAmps_heft.h | 2 - .../CODEGEN_mad_pp_tt012j_log.txt | 792 ++- .../pp_tt012j.mad/Cards/me5_configuration.txt | 4 +- .../Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P0_gg_ttx/CPPProcess.cc | 19 +- .../SubProcesses/P0_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P0_gg_ttx/counters.cc | 18 +- .../SubProcesses/P0_gg_ttx/matrix1.f | 2 + .../SubProcesses/P0_uux_ttx/CPPProcess.cc | 17 +- .../SubProcesses/P0_uux_ttx/auto_dsig1.f | 34 +- .../SubProcesses/P0_uux_ttx/counters.cc | 18 +- .../SubProcesses/P0_uux_ttx/matrix1.f | 2 + .../SubProcesses/P0_uux_ttx/mirrorprocs.inc | 2 +- .../SubProcesses/P0_uux_ttx/processes.dat | 2 +- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 69 +- .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxg/counters.cc | 18 +- .../SubProcesses/P1_gg_ttxg/matrix1.f | 2 + .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 35 +- .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 24 +- .../SubProcesses/P1_gu_ttxu/counters.cc | 18 +- .../SubProcesses/P1_gu_ttxu/matrix1.f | 2 + .../SubProcesses/P1_gu_ttxu/mirrorprocs.inc | 2 +- .../SubProcesses/P1_gu_ttxu/processes.dat | 2 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 35 +- .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 28 +- .../SubProcesses/P1_gux_ttxux/counters.cc | 18 +- .../SubProcesses/P1_gux_ttxux/matrix1.f | 2 + .../SubProcesses/P1_gux_ttxux/mirrorprocs.inc | 2 +- .../SubProcesses/P1_gux_ttxux/processes.dat | 2 +- .../SubProcesses/P1_uux_ttxg/CPPProcess.cc | 35 +- .../SubProcesses/P1_uux_ttxg/auto_dsig1.f | 34 +- .../SubProcesses/P1_uux_ttxg/counters.cc | 18 +- .../SubProcesses/P1_uux_ttxg/matrix1.f | 2 + .../SubProcesses/P1_uux_ttxg/mirrorprocs.inc | 2 +- .../SubProcesses/P1_uux_ttxg/processes.dat | 2 +- .../SubProcesses/P2_gg_ttxgg/CPPProcess.cc | 439 +- .../SubProcesses/P2_gg_ttxgg/auto_dsig1.f | 18 +- .../SubProcesses/P2_gg_ttxgg/counters.cc | 18 +- .../SubProcesses/P2_gg_ttxgg/matrix1.f | 2 + .../SubProcesses/P2_gg_ttxuux/CPPProcess.cc | 152 +- .../SubProcesses/P2_gg_ttxuux/auto_dsig1.f | 18 +- .../SubProcesses/P2_gg_ttxuux/counters.cc | 18 +- .../SubProcesses/P2_gg_ttxuux/matrix1.f | 2 + .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc | 147 +- .../SubProcesses/P2_gu_ttxgu/auto_dsig1.f | 24 +- .../SubProcesses/P2_gu_ttxgu/counters.cc | 18 +- .../SubProcesses/P2_gu_ttxgu/matrix1.f | 2 + .../SubProcesses/P2_gu_ttxgu/mirrorprocs.inc | 2 +- .../SubProcesses/P2_gu_ttxgu/processes.dat | 2 +- .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc | 147 +- .../SubProcesses/P2_gux_ttxgux/auto_dsig1.f | 28 +- .../SubProcesses/P2_gux_ttxgux/counters.cc | 18 +- .../SubProcesses/P2_gux_ttxgux/matrix1.f | 2 + .../P2_gux_ttxgux/mirrorprocs.inc | 2 +- .../SubProcesses/P2_gux_ttxgux/processes.dat | 2 +- .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc | 49 +- .../SubProcesses/P2_uc_ttxuc/auto_dsig1.f | 26 +- .../SubProcesses/P2_uc_ttxuc/counters.cc | 18 +- .../SubProcesses/P2_uc_ttxuc/matrix1.f | 2 + .../SubProcesses/P2_uc_ttxuc/mirrorprocs.inc | 2 +- .../SubProcesses/P2_uc_ttxuc/processes.dat | 2 +- .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc | 49 +- .../SubProcesses/P2_ucx_ttxucx/auto_dsig1.f | 34 +- .../SubProcesses/P2_ucx_ttxucx/counters.cc | 18 +- .../SubProcesses/P2_ucx_ttxucx/matrix1.f | 2 + .../P2_ucx_ttxucx/mirrorprocs.inc | 2 +- .../SubProcesses/P2_ucx_ttxucx/processes.dat | 2 +- .../SubProcesses/P2_uu_ttxuu/CPPProcess.cc | 75 +- .../SubProcesses/P2_uu_ttxuu/auto_dsig1.f | 30 +- .../SubProcesses/P2_uu_ttxuu/counters.cc | 18 +- .../SubProcesses/P2_uu_ttxuu/matrix1.f | 2 + .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc | 49 +- .../SubProcesses/P2_uux_ttxccx/auto_dsig1.f | 34 +- .../SubProcesses/P2_uux_ttxccx/counters.cc | 18 +- .../SubProcesses/P2_uux_ttxccx/matrix1.f | 2 + .../P2_uux_ttxccx/mirrorprocs.inc | 2 +- .../SubProcesses/P2_uux_ttxccx/processes.dat | 2 +- .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc | 147 +- .../SubProcesses/P2_uux_ttxgg/auto_dsig1.f | 34 +- .../SubProcesses/P2_uux_ttxgg/counters.cc | 18 +- .../SubProcesses/P2_uux_ttxgg/matrix1.f | 2 + .../SubProcesses/P2_uux_ttxgg/mirrorprocs.inc | 2 +- .../SubProcesses/P2_uux_ttxgg/processes.dat | 2 +- .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc | 77 +- .../SubProcesses/P2_uux_ttxuux/auto_dsig1.f | 34 +- .../SubProcesses/P2_uux_ttxuux/counters.cc | 18 +- .../SubProcesses/P2_uux_ttxuux/matrix1.f | 2 + .../P2_uux_ttxuux/mirrorprocs.inc | 2 +- .../SubProcesses/P2_uux_ttxuux/processes.dat | 2 +- .../P2_uxcx_ttxuxcx/CPPProcess.cc | 49 +- .../SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f | 32 +- .../SubProcesses/P2_uxcx_ttxuxcx/counters.cc | 18 +- .../SubProcesses/P2_uxcx_ttxuxcx/matrix1.f | 2 + .../P2_uxcx_ttxuxcx/mirrorprocs.inc | 2 +- .../P2_uxcx_ttxuxcx/processes.dat | 2 +- .../P2_uxux_ttxuxux/CPPProcess.cc | 75 +- .../SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f | 38 +- .../SubProcesses/P2_uxux_ttxuxux/counters.cc | 18 +- .../SubProcesses/P2_uxux_ttxuxux/matrix1.f | 2 + .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 6 +- .../pp_tt012j.mad/SubProcesses/makefile | 4 +- .../cudacpp/pp_tt012j.mad/bin/generate_events | 22 +- .../pp_tt012j.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42831 -> 42833 bytes epochX/cudacpp/pp_tt012j.mad/bin/madevent | 20 +- epochX/cudacpp/pp_tt012j.mad/mg5.in | 2 - epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h | 24 - .../ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt | 1026 ++-- 273 files changed, 10168 insertions(+), 9710 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index be5cee0fb8..2d9aaf2a44 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005257368087768555  +DEBUG: model prefixing takes 0.004520893096923828  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -161,49 +161,68 @@ Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  INFO: initialize a new directory: CODEGEN_mad_ee_mumu INFO: remove old information in CODEGEN_mad_ee_mumu -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  WARNING: vector code for lepton pdf not implemented. We removed the option to run dressed lepton  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.097 s +Wrote files for 8 helas calls in 0.094 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.197 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +ALOHA: aloha creates 3 routines in 0.174 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.251 s +ALOHA: aloha creates 7 routines in 0.217 s FFV1 FFV1 FFV2 @@ -212,22 +231,24 @@ ALOHA: aloha creates 7 routines in 0.251 s FFV4 FFV2_4 FFV2_4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  ************************************************************ * * * W E L C O M E to * @@ -248,16 +269,14 @@ DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -281,39 +300,40 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 495 (offset 26 lines). +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #3 succeeded at 230 (offset 9 lines). Hunk #4 succeeded at 267 (offset 18 lines). Hunk #5 succeeded at 312 (offset 18 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. +Hunk #6 succeeded at 410 (offset 14 lines). +Hunk #7 succeeded at 478 (offset 8 lines). +Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README +/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README Run "open index.html" to see more information about this process. quit -real 0m2.494s -user 0m2.190s -sys 0m0.291s +real 0m2.433s +user 0m2.060s +sys 0m0.335s diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt index cdeedc7863..5ca005676e 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc index 738db319fd..4f385d6435 100644 --- a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_0.o FFV4_3.o FFV1P0_3.o FFV2_0.o FFV4_0.o FFV2_3.o +ALOHARoutine = FFV2_3.o FFV2_0.o FFV4_0.o FFV4_3.o FFV1_0.o FFV1P0_3.o diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc index 8d370a6b34..a6d90a2d1a 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc @@ -238,18 +238,25 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 2 *** // Wavefunction(s) for diagram number 1 - oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); +#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) + opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz +#else + if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) + opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz + else + oxxxxx( momenta, 0, cHel[ihel][0], -1, w_fp[0], 0 ); +#endif - ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); + imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz - ixxxxx( momenta, 0., cHel[ihel][2], -1, w_fp[2], 2 ); + ixzxxx( momenta, cHel[ihel][2], -1, w_fp[2], 2 ); - oxxxxx( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 ); + oxzxxx( momenta, cHel[ihel][3], +1, w_fp[3], 3 ); - FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[4] ); + FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -259,10 +266,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 2 *** // Wavefunction(s) for diagram number 2 - FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], 1.0, COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], COUPs[2], cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], 1.0, COUPs[2], 1.0, &_fp[0] ); + FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -779,12 +786,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f index 31e7790d2d..b836e34865 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f @@ -39,7 +39,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION EP1 DOUBLE PRECISION EM2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,26 +129,15 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - EP1=PDG2PDF(LPP(IB(1)),-11, IB(1),XBK(IB(1)), QSCALE) + EP1=PDG2PDF(LPP(IB(1)),-11, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1) + $ ))) IF (PDLABEL.EQ.'dressed') EP1_COMPONENTS(1:4) = $ EE_COMPONENTS(1:4) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - EM2=PDG2PDF(LPP(IB(2)),11, IB(2),XBK(IB(2)), QSCALE) + EM2=PDG2PDF(LPP(IB(2)),11, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) IF (PDLABEL.EQ.'dressed') EM2_COMPONENTS(1:4) = $ EE_COMPONENTS(1:4) ENDIF diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f index 21e300b33e..e00f0e1b64 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f @@ -410,6 +410,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -477,6 +478,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile index 74b19033a8..74db44d848 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/ee_mumu.mad/bin/generate_events b/epochX/cudacpp/ee_mumu.mad/bin/generate_events index 5577cc66a0..107313b25d 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/generate_events +++ b/epochX/cudacpp/ee_mumu.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME -import misc as misc + import logging import logging.config @@ -160,31 +160,17 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv - - # check for plugin customization of the launch command - launch_interface = ME.MadEventCmdShell - if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - + argument = sys.argv try: if '-h' in argument or '--help' in argument: - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py index e9f421ae5f..7624b9f557 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py @@ -1002,14 +1002,13 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() - self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - + self.plugin_input(finput) def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model.pkl index 27a1caae3c115073669b90622e9351ab04166d39..dc38da0bfa76ea4206a3c5b2d34b98c606f7d044 100644 GIT binary patch delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( diff --git a/epochX/cudacpp/ee_mumu.mad/bin/madevent b/epochX/cudacpp/ee_mumu.mad/bin/madevent index 10b6a71fa2..c944aa1faf 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/madevent +++ b/epochX/cudacpp/ee_mumu.mad/bin/madevent @@ -32,7 +32,6 @@ except ImportError: import os -pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -161,23 +160,10 @@ except: pass import internal.madevent_interface as cmd_interface -# check for plugin customization of the launch command -launch_interface = cmd_interface.MadEventCmdShell -if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) + launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -192,7 +178,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -202,7 +188,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/ee_mumu.mad/mg5.in b/epochX/cudacpp/ee_mumu.mad/mg5.in index 12a2c58512..d868684019 100644 --- a/epochX/cudacpp/ee_mumu.mad/mg5.in +++ b/epochX/cudacpp/ee_mumu.mad/mg5.in @@ -1,4 +1,2 @@ -set stdout_level DEBUG -set zerowidth_tchannel F generate e+ e- > mu+ mu- output madevent ee_mumu.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h index 19819e2451..6a8781b113 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -888,7 +886,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -899,7 +896,6 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -913,7 +909,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -924,7 +919,6 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -938,9 +932,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], - const double Ccoeff1, const fptype allCOUP2[], - const double Ccoeff2, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -951,9 +943,7 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], - const double Ccoeff1, const fptype allCOUP2[], - const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -967,7 +957,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -991,7 +980,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1023,7 +1011,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1047,7 +1034,6 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1081,7 +1067,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1108,7 +1093,6 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1145,9 +1129,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], - const double Ccoeff1, const fptype allCOUP2[], - const double Ccoeff2, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1162,7 +1144,7 @@ namespace mg5amcCpu constexpr fptype two( 2. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); const cxtype_sv TMP3 = ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ); - ( *vertex ) = ( -one ) * ( Ccoeff2 * COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1 * COUP1 ) ); + ( *vertex ) = ( -one ) * ( COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * COUP1 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1175,9 +1157,7 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], - const double Ccoeff1, const fptype allCOUP2[], - const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1199,10 +1179,10 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); const cxtype_sv TMP4 = ( F1[4] * ( F2[2] * ( P3[0] - P3[3] ) - F2[3] * ( P3[1] + cI * P3[2] ) ) + F1[5] * ( F2[2] * ( -P3[1] + cI * P3[2] ) + F2[3] * ( P3[0] + P3[3] ) ) ); const cxtype_sv denom = one / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); - V3[2] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); - V3[3] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); - V3[4] = denom * cI * ( Ccoeff2 * COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + Ccoeff1 * COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); - V3[5] = denom * ( two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); + V3[2] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); + V3[3] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); + V3[4] = denom * cI * ( COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); + V3[5] = denom * ( two * cI ) * ( COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index b13f728dee..e8795ee643 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00538325309753418  +DEBUG: model prefixing takes 0.004621267318725586  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -160,28 +160,49 @@ output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  -DEBUG: type(subproc_group)= [output.py at line 190]  -DEBUG: type(fortran_model)= [output.py at line 191]  -DEBUG: type(me)= me=0 [output.py at line 192]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  +DEBUG: type(subproc_group)= [output.py at line 188]  +DEBUG: type(fortran_model)= [output.py at line 189]  +DEBUG: type(me)= me=0 [output.py at line 190]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: proc_id =  0 [model_handling.py at line 1046]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1336]  +Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.262 s +ALOHA: aloha creates 4 routines in 0.227 s FFV1 FFV1 FFV2 @@ -190,17 +211,20 @@ ALOHA: aloha creates 4 routines in 0.262 s FFV4 FFV2_4 FFV2_4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.651s -user 0m0.601s -sys 0m0.044s +real 0m0.627s +user 0m0.560s +sys 0m0.060s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc index 11472d834e..12a28d3f7a 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc @@ -238,18 +238,25 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 2 *** // Wavefunction(s) for diagram number 1 - oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); +#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) + opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz +#else + if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) + opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz + else + oxxxxx( momenta, 0, cHel[ihel][0], -1, w_fp[0], 0 ); +#endif - ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); + imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz - ixxxxx( momenta, 0., cHel[ihel][2], -1, w_fp[2], 2 ); + ixzxxx( momenta, cHel[ihel][2], -1, w_fp[2], 2 ); - oxxxxx( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 ); + oxzxxx( momenta, cHel[ihel][3], +1, w_fp[3], 3 ); - FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[4] ); + FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -258,10 +265,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 2 *** // Wavefunction(s) for diagram number 2 - FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], 1.0, COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], COUPs[2], cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], 1.0, COUPs[2], 1.0, &_fp[0] ); + FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -777,12 +784,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/ee_mumu.sa/mg5.in b/epochX/cudacpp/ee_mumu.sa/mg5.in index dcdf875dea..5c48dc6ef9 100644 --- a/epochX/cudacpp/ee_mumu.sa/mg5.in +++ b/epochX/cudacpp/ee_mumu.sa/mg5.in @@ -1,4 +1,3 @@ -set stdout_level DEBUG -set zerowidth_tchannel F generate e+ e- > mu+ mu- -output standalone_cudacpp ee_mumu.sa +output standalone_cudacpp ee_mumu.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp + diff --git a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h index 19819e2451..6a8781b113 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -888,7 +886,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -899,7 +896,6 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -913,7 +909,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -924,7 +919,6 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -938,9 +932,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], - const double Ccoeff1, const fptype allCOUP2[], - const double Ccoeff2, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -951,9 +943,7 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], - const double Ccoeff1, const fptype allCOUP2[], - const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -967,7 +957,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -991,7 +980,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1023,7 +1011,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1047,7 +1034,6 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1081,7 +1067,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1108,7 +1093,6 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1145,9 +1129,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], - const double Ccoeff1, const fptype allCOUP2[], - const double Ccoeff2, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1162,7 +1144,7 @@ namespace mg5amcCpu constexpr fptype two( 2. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); const cxtype_sv TMP3 = ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ); - ( *vertex ) = ( -one ) * ( Ccoeff2 * COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1 * COUP1 ) ); + ( *vertex ) = ( -one ) * ( COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * COUP1 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1175,9 +1157,7 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], - const double Ccoeff1, const fptype allCOUP2[], - const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1199,10 +1179,10 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); const cxtype_sv TMP4 = ( F1[4] * ( F2[2] * ( P3[0] - P3[3] ) - F2[3] * ( P3[1] + cI * P3[2] ) ) + F1[5] * ( F2[2] * ( -P3[1] + cI * P3[2] ) + F2[3] * ( P3[0] + P3[3] ) ) ); const cxtype_sv denom = one / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); - V3[2] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); - V3[3] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); - V3[4] = denom * cI * ( Ccoeff2 * COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + Ccoeff1 * COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); - V3[5] = denom * ( two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); + V3[2] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); + V3[3] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); + V3[4] = denom * cI * ( COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); + V3[5] = denom * ( two * cI ) * ( COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index b5c53c1161..c777d7154a 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005456686019897461  +DEBUG: model prefixing takes 0.004714488983154297  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,72 +155,100 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.007 s Total: 1 processes with 3 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  INFO: initialize a new directory: CODEGEN_mad_gg_tt INFO: remove old information in CODEGEN_mad_gg_tt -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.103 s +Wrote files for 10 helas calls in 0.104 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +ALOHA: aloha creates 2 routines in 0.126 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.140 s +ALOHA: aloha creates 4 routines in 0.112 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  ************************************************************ * * * W E L C O M E to * @@ -241,15 +269,14 @@ DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -273,36 +300,34 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README +/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README Run "open index.html" to see more information about this process. quit -real 0m2.913s -user 0m2.038s -sys 0m0.292s +real 0m2.341s +user 0m1.909s +sys 0m0.352s diff --git a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt index cdeedc7863..5ca005676e 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc index 5597c614b0..59e590217d 100644 --- a/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o FFV1_0.o FFV1_2.o VVV1P0_1.o +ALOHARoutine = FFV1_1.o FFV1_2.o VVV1P0_1.o FFV1_0.o diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 02f655f48c..0afa202e07 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -260,10 +260,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,12 +794,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index 0b493ae244..3b24a9924c 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -39,7 +39,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -127,24 +126,11 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index daea73a6df..ef18aff221 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -396,6 +396,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -469,6 +470,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile index 74b19033a8..74db44d848 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_tt.mad/bin/generate_events b/epochX/cudacpp/gg_tt.mad/bin/generate_events index 5577cc66a0..107313b25d 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/generate_events +++ b/epochX/cudacpp/gg_tt.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME -import misc as misc + import logging import logging.config @@ -160,31 +160,17 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv - - # check for plugin customization of the launch command - launch_interface = ME.MadEventCmdShell - if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - + argument = sys.argv try: if '-h' in argument or '--help' in argument: - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py index e9f421ae5f..7624b9f557 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py @@ -1002,14 +1002,13 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() - self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - + self.plugin_input(finput) def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model.pkl index 27a1caae3c115073669b90622e9351ab04166d39..dc38da0bfa76ea4206a3c5b2d34b98c606f7d044 100644 GIT binary patch delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( diff --git a/epochX/cudacpp/gg_tt.mad/bin/madevent b/epochX/cudacpp/gg_tt.mad/bin/madevent index 10b6a71fa2..c944aa1faf 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/madevent +++ b/epochX/cudacpp/gg_tt.mad/bin/madevent @@ -32,7 +32,6 @@ except ImportError: import os -pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -161,23 +160,10 @@ except: pass import internal.madevent_interface as cmd_interface -# check for plugin customization of the launch command -launch_interface = cmd_interface.MadEventCmdShell -if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) + launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -192,7 +178,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -202,7 +188,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gg_tt.mad/mg5.in b/epochX/cudacpp/gg_tt.mad/mg5.in index 7859bf9b80..8b65375c7e 100644 --- a/epochX/cudacpp/gg_tt.mad/mg5.in +++ b/epochX/cudacpp/gg_tt.mad/mg5.in @@ -1,4 +1,2 @@ -set stdout_level DEBUG -set zerowidth_tchannel F generate g g > t t~ output madevent gg_tt.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h index 07d0bfa887..94bf8aca52 100644 --- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h @@ -862,7 +862,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -876,7 +875,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -887,7 +885,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -900,7 +897,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -913,7 +909,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -952,7 +947,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -976,7 +970,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1008,7 +1001,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 23c04c9100..cb7b25ef28 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005602359771728516  +DEBUG: model prefixing takes 0.004621744155883789  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,47 +155,78 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.007 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  -DEBUG: type(subproc_group)= [output.py at line 190]  -DEBUG: type(fortran_model)= [output.py at line 191]  -DEBUG: type(me)= me=0 [output.py at line 192]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. -Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  +DEBUG: type(subproc_group)= [output.py at line 188]  +DEBUG: type(fortran_model)= [output.py at line 189]  +DEBUG: type(me)= me=0 [output.py at line 190]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: proc_id =  0 [model_handling.py at line 1046]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  +Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.126 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.539s -user 0m0.490s -sys 0m0.045s +real 0m0.572s +user 0m0.500s +sys 0m0.057s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc index 141d1f24ac..ea42fb3e96 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -259,10 +259,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -791,12 +791,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt.sa/mg5.in b/epochX/cudacpp/gg_tt.sa/mg5.in index 8298656d17..a2f1230ada 100644 --- a/epochX/cudacpp/gg_tt.sa/mg5.in +++ b/epochX/cudacpp/gg_tt.sa/mg5.in @@ -1,4 +1,3 @@ -set stdout_level DEBUG -set zerowidth_tchannel F generate g g > t t~ -output standalone_cudacpp gg_tt.sa +output standalone_cudacpp gg_tt.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp + diff --git a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h index 07d0bfa887..94bf8aca52 100644 --- a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h @@ -862,7 +862,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -876,7 +875,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -887,7 +885,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -900,7 +897,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -913,7 +909,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -952,7 +947,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -976,7 +970,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1008,7 +1001,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index b27d021202..666f2f1d0b 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005415201187133789  +DEBUG: model prefixing takes 0.0046291351318359375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,84 +155,132 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.007 s Total: 1 processes with 3 diagrams INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.019 s +1 processes with 16 diagrams generated in 0.018 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  INFO: initialize a new directory: CODEGEN_mad_gg_tt01g INFO: remove old information in CODEGEN_mad_gg_tt01g -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @2 INFO: Processing color information for process: g g > t t~ g @2 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.041 s -Wrote files for 46 helas calls in 0.238 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.039 s +Wrote files for 46 helas calls in 0.247 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.321 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +ALOHA: aloha creates 5 routines in 0.276 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.305 s +ALOHA: aloha creates 10 routines in 0.263 s VVV1 VVV1 FFV1 @@ -242,22 +290,27 @@ ALOHA: aloha creates 10 routines in 0.305 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  ************************************************************ * * * W E L C O M E to * @@ -278,15 +331,14 @@ DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -310,45 +362,44 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. +Hunk #6 succeeded at 434 (offset 38 lines). +Hunk #7 succeeded at 588 (offset 118 lines). +Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README +/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README Run "open index.html" to see more information about this process. quit -real 0m2.906s -user 0m2.575s -sys 0m0.318s +real 0m2.847s +user 0m2.424s +sys 0m0.365s diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt index cdeedc7863..5ca005676e 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc index 50c12b0804..4f2ef3d0d8 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o +ALOHARoutine = VVVV4P0_1.o VVVV3P0_1.o VVVV1P0_1.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o FFV1_0.o FFV1P0_3.o diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 02f655f48c..0afa202e07 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -260,10 +260,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,12 +794,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index 0b493ae244..3b24a9924c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -39,7 +39,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -127,24 +126,11 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f index daea73a6df..ef18aff221 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -396,6 +396,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -469,6 +470,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc index ce1badffca..9dfd471c50 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -265,10 +265,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -293,11 +293,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -307,10 +307,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -324,7 +324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -334,11 +334,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,7 +351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,7 +365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -375,10 +375,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -392,7 +392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -406,7 +406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -435,7 +435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -448,7 +448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -461,22 +461,22 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -1015,12 +1015,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f index 68e664f70c..071034763a 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f @@ -39,7 +39,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -127,24 +126,11 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f index eb85b7ebb0..8fa4eb7211 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f @@ -434,6 +434,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -587,6 +588,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile index 74b19033a8..74db44d848 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/generate_events b/epochX/cudacpp/gg_tt01g.mad/bin/generate_events index 5577cc66a0..107313b25d 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/generate_events +++ b/epochX/cudacpp/gg_tt01g.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME -import misc as misc + import logging import logging.config @@ -160,31 +160,17 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv - - # check for plugin customization of the launch command - launch_interface = ME.MadEventCmdShell - if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - + argument = sys.argv try: if '-h' in argument or '--help' in argument: - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py index e9f421ae5f..7624b9f557 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py @@ -1002,14 +1002,13 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() - self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - + self.plugin_input(finput) def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model.pkl index 27a1caae3c115073669b90622e9351ab04166d39..dc38da0bfa76ea4206a3c5b2d34b98c606f7d044 100644 GIT binary patch delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/madevent b/epochX/cudacpp/gg_tt01g.mad/bin/madevent index 10b6a71fa2..c944aa1faf 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/madevent +++ b/epochX/cudacpp/gg_tt01g.mad/bin/madevent @@ -32,7 +32,6 @@ except ImportError: import os -pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -161,23 +160,10 @@ except: pass import internal.madevent_interface as cmd_interface -# check for plugin customization of the launch command -launch_interface = cmd_interface.MadEventCmdShell -if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) + launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -192,7 +178,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -202,7 +188,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gg_tt01g.mad/mg5.in b/epochX/cudacpp/gg_tt01g.mad/mg5.in index a20e166e81..f30dacfe93 100644 --- a/epochX/cudacpp/gg_tt01g.mad/mg5.in +++ b/epochX/cudacpp/gg_tt01g.mad/mg5.in @@ -1,5 +1,4 @@ -set stdout_level DEBUG -set zerowidth_tchannel F generate g g > t t~ add process g g > t t~ g output madevent gg_tt01g.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp + diff --git a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h index 8995b15c82..4a326fae62 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -888,7 +886,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -899,7 +896,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -912,7 +908,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -925,7 +920,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -939,7 +933,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -953,7 +946,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -967,7 +959,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -981,7 +972,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1016,7 +1006,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1055,7 +1044,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1079,7 +1067,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1111,7 +1098,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1143,7 +1129,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1175,7 +1160,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1210,7 +1194,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1245,7 +1228,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index d367fef872..eea422eba1 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005761861801147461  +DEBUG: model prefixing takes 0.004817008972167969  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,58 +155,83 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  INFO: initialize a new directory: CODEGEN_mad_gg_ttg INFO: remove old information in CODEGEN_mad_gg_ttg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.146 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.035 s +Wrote files for 36 helas calls in 0.153 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.323 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +ALOHA: aloha creates 5 routines in 0.277 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.263 s VVV1 VVV1 FFV1 @@ -216,22 +241,27 @@ ALOHA: aloha creates 10 routines in 0.310 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  ************************************************************ * * * W E L C O M E to * @@ -252,15 +282,14 @@ DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -284,40 +313,40 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. +Hunk #6 succeeded at 434 (offset 38 lines). +Hunk #7 succeeded at 588 (offset 118 lines). +Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README +/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README Run "open index.html" to see more information about this process. quit -real 0m2.947s -user 0m2.470s -sys 0m0.321s +real 0m2.802s +user 0m2.323s +sys 0m0.356s diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt index cdeedc7863..5ca005676e 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc index 50c12b0804..4f2ef3d0d8 100644 --- a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o +ALOHARoutine = VVVV4P0_1.o VVVV3P0_1.o VVVV1P0_1.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o FFV1_0.o FFV1P0_3.o diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index f7f5899260..8cc007dff8 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -265,10 +265,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -293,11 +293,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -307,10 +307,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -324,7 +324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -334,11 +334,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,7 +351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,7 +365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -375,10 +375,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -392,7 +392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -406,7 +406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -435,7 +435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -448,7 +448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -461,22 +461,22 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -1015,12 +1015,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index b8615bc68f..668cc26192 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -39,7 +39,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -127,24 +126,11 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f index fc924825c2..520966d7b7 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -434,6 +434,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -587,6 +588,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile index 74b19033a8..74db44d848 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_ttg.mad/bin/generate_events b/epochX/cudacpp/gg_ttg.mad/bin/generate_events index 5577cc66a0..107313b25d 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/generate_events +++ b/epochX/cudacpp/gg_ttg.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME -import misc as misc + import logging import logging.config @@ -160,31 +160,17 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv - - # check for plugin customization of the launch command - launch_interface = ME.MadEventCmdShell - if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - + argument = sys.argv try: if '-h' in argument or '--help' in argument: - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py index e9f421ae5f..7624b9f557 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py @@ -1002,14 +1002,13 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() - self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - + self.plugin_input(finput) def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model.pkl index 27a1caae3c115073669b90622e9351ab04166d39..dc38da0bfa76ea4206a3c5b2d34b98c606f7d044 100644 GIT binary patch delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( diff --git a/epochX/cudacpp/gg_ttg.mad/bin/madevent b/epochX/cudacpp/gg_ttg.mad/bin/madevent index 10b6a71fa2..c944aa1faf 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/madevent +++ b/epochX/cudacpp/gg_ttg.mad/bin/madevent @@ -32,7 +32,6 @@ except ImportError: import os -pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -161,23 +160,10 @@ except: pass import internal.madevent_interface as cmd_interface -# check for plugin customization of the launch command -launch_interface = cmd_interface.MadEventCmdShell -if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) + launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -192,7 +178,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -202,7 +188,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gg_ttg.mad/mg5.in b/epochX/cudacpp/gg_ttg.mad/mg5.in index 98f53ce50d..e5212c8ecf 100644 --- a/epochX/cudacpp/gg_ttg.mad/mg5.in +++ b/epochX/cudacpp/gg_ttg.mad/mg5.in @@ -1,4 +1,2 @@ -set stdout_level DEBUG -set zerowidth_tchannel F generate g g > t t~ g output madevent gg_ttg.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h index 8995b15c82..4a326fae62 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -888,7 +886,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -899,7 +896,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -912,7 +908,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -925,7 +920,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -939,7 +933,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -953,7 +946,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -967,7 +959,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -981,7 +972,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1016,7 +1006,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1055,7 +1044,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1079,7 +1067,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1111,7 +1098,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1143,7 +1129,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1175,7 +1160,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1210,7 +1194,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1245,7 +1228,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 20d22ac1c4..c23adaa32c 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005301237106323242  +DEBUG: model prefixing takes 0.005018949508666992  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,35 +155,62 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.020 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  -DEBUG: type(subproc_group)= [output.py at line 190]  -DEBUG: type(fortran_model)= [output.py at line 191]  -DEBUG: type(me)= me=0 [output.py at line 192]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  +DEBUG: type(subproc_group)= [output.py at line 188]  +DEBUG: type(fortran_model)= [output.py at line 189]  +DEBUG: type(me)= me=0 [output.py at line 190]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: proc_id =  0 [model_handling.py at line 1046]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  +Generated helas calls for 1 subprocesses (16 diagrams) in 0.034 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.321 s +ALOHA: aloha creates 5 routines in 0.278 s VVV1 VVV1 FFV1 @@ -193,17 +220,23 @@ ALOHA: aloha creates 5 routines in 0.321 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.792s -user 0m0.717s -sys 0m0.059s +real 0m0.847s +user 0m0.711s +sys 0m0.052s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc index 9393033e26..6f71af24b1 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -264,10 +264,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -277,10 +277,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -290,11 +290,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -319,7 +319,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -328,11 +328,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -344,7 +344,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -357,7 +357,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -366,10 +366,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -382,7 +382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -395,7 +395,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -410,7 +410,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -434,7 +434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -446,12 +446,12 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -459,7 +459,7 @@ namespace mg5amcCpu jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -467,7 +467,7 @@ namespace mg5amcCpu jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1009,12 +1009,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttg.sa/mg5.in b/epochX/cudacpp/gg_ttg.sa/mg5.in index fbf08862ec..9fd2517e11 100644 --- a/epochX/cudacpp/gg_ttg.sa/mg5.in +++ b/epochX/cudacpp/gg_ttg.sa/mg5.in @@ -1,4 +1,3 @@ -set stdout_level DEBUG -set zerowidth_tchannel F generate g g > t t~ g -output standalone_cudacpp gg_ttg.sa +output standalone_cudacpp gg_ttg.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp + diff --git a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h index 8995b15c82..4a326fae62 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -888,7 +886,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -899,7 +896,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -912,7 +908,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -925,7 +920,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -939,7 +933,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -953,7 +946,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -967,7 +959,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -981,7 +972,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1016,7 +1006,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1055,7 +1044,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1079,7 +1067,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1111,7 +1098,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1143,7 +1129,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1175,7 +1160,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1210,7 +1194,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1245,7 +1228,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 745da9d88c..0dfbe85bbc 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005432605743408203  +DEBUG: model prefixing takes 0.004771232604980469  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,58 +155,85 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.157 s +1 processes with 123 diagrams generated in 0.145 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  INFO: initialize a new directory: CODEGEN_mad_gg_ttgg INFO: remove old information in CODEGEN_mad_gg_ttgg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.423 s -Wrote files for 222 helas calls in 0.710 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.385 s +Wrote files for 222 helas calls in 0.655 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.325 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +ALOHA: aloha creates 5 routines in 0.285 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.268 s VVV1 VVV1 FFV1 @@ -219,22 +246,27 @@ ALOHA: aloha creates 10 routines in 0.308 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  ************************************************************ * * * W E L C O M E to * @@ -255,15 +287,14 @@ DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -287,40 +318,40 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 191 (offset 48 lines). Hunk #3 succeeded at 269 (offset 48 lines). Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. +Hunk #6 succeeded at 830 (offset 434 lines). +Hunk #7 succeeded at 1717 (offset 1247 lines). +Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README +/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README Run "open index.html" to see more information about this process. quit -real 0m3.903s -user 0m3.552s -sys 0m0.335s +real 0m3.946s +user 0m3.315s +sys 0m0.356s diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt index cdeedc7863..5ca005676e 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc index ec923afd6d..cf4ec946f8 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o +ALOHARoutine = VVVV3_0.o VVVV4P0_1.o VVVV3P0_1.o VVVV1P0_1.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o FFV1_0.o FFV1P0_3.o VVVV1_0.o VVVV4_0.o diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc index 896d64343e..442d769ae3 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc @@ -250,11 +250,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 1 - VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -263,7 +263,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -272,7 +272,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -285,10 +285,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 123 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -305,10 +305,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 123 *** // Wavefunction(s) for diagram number 3 - VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 123 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -345,11 +345,11 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 123 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -363,7 +363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -376,10 +376,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 123 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -390,10 +390,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 123 *** // Wavefunction(s) for diagram number 8 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,7 +407,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -420,10 +420,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 123 *** // Wavefunction(s) for diagram number 10 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -434,10 +434,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 123 *** // Wavefunction(s) for diagram number 11 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -451,7 +451,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -481,7 +481,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -497,7 +497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -513,7 +513,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -526,12 +526,12 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 123 *** // Wavefunction(s) for diagram number 17 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -541,10 +541,10 @@ namespace mg5amcCpu // *** DIAGRAM 18 OF 123 *** // Wavefunction(s) for diagram number 18 - FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -557,7 +557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -568,11 +568,11 @@ namespace mg5amcCpu // *** DIAGRAM 20 OF 123 *** // Wavefunction(s) for diagram number 20 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -588,7 +588,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -602,7 +602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -613,10 +613,10 @@ namespace mg5amcCpu // *** DIAGRAM 23 OF 123 *** // Wavefunction(s) for diagram number 23 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[18] ); // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -632,7 +632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -646,7 +646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -657,10 +657,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 123 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] ); + FFV1_1( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[19] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -673,7 +673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -686,7 +686,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -699,7 +699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -712,7 +712,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -726,7 +726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -739,22 +739,22 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 123 *** // Wavefunction(s) for diagram number 32 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -763,12 +763,12 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 123 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -778,10 +778,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 123 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,7 +794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -805,10 +805,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 123 *** // Wavefunction(s) for diagram number 36 - FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] ); + FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -824,7 +824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -838,7 +838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -852,7 +852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -868,7 +868,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -882,7 +882,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -893,10 +893,10 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 123 *** // Wavefunction(s) for diagram number 42 - FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_2( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -909,7 +909,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -922,7 +922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 44 - FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -935,7 +935,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -948,7 +948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -962,7 +962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -978,17 +978,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -997,11 +997,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 123 *** // Wavefunction(s) for diagram number 49 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] ); - FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[12] ); + FFV1_2( w_fp[3], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1012,10 +1012,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 123 *** // Wavefunction(s) for diagram number 50 - VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1031,7 +1031,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1042,10 +1042,10 @@ namespace mg5amcCpu // *** DIAGRAM 52 OF 123 *** // Wavefunction(s) for diagram number 52 - FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[2], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1059,7 +1059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1075,7 +1075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1089,7 +1089,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1105,7 +1105,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1121,7 +1121,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1141,7 +1141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 58 - VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1150,7 +1150,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1159,7 +1159,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1172,10 +1172,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 123 *** // Wavefunction(s) for diagram number 59 - VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 59 - VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1195,7 +1195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1215,7 +1215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1231,7 +1231,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1245,7 +1245,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1261,7 +1261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1272,11 +1272,11 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 123 *** // Wavefunction(s) for diagram number 65 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1287,10 +1287,10 @@ namespace mg5amcCpu // *** DIAGRAM 66 OF 123 *** // Wavefunction(s) for diagram number 66 - VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1306,7 +1306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1317,10 +1317,10 @@ namespace mg5amcCpu // *** DIAGRAM 68 OF 123 *** // Wavefunction(s) for diagram number 68 - FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1334,7 +1334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1350,7 +1350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1364,7 +1364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1380,7 +1380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1396,7 +1396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1416,7 +1416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 74 - VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1425,7 +1425,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1434,7 +1434,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1447,10 +1447,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 123 *** // Wavefunction(s) for diagram number 75 - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 75 - VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1470,7 +1470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1490,7 +1490,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1506,7 +1506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1520,7 +1520,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1536,7 +1536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 80 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1547,10 +1547,10 @@ namespace mg5amcCpu // *** DIAGRAM 81 OF 123 *** // Wavefunction(s) for diagram number 81 - FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[9], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1560,10 +1560,10 @@ namespace mg5amcCpu // *** DIAGRAM 82 OF 123 *** // Wavefunction(s) for diagram number 82 - FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[15], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1576,7 +1576,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1586,10 +1586,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 123 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1602,7 +1602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1613,10 +1613,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 123 *** // Wavefunction(s) for diagram number 86 - VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1629,10 +1629,10 @@ namespace mg5amcCpu // *** DIAGRAM 87 OF 123 *** // Wavefunction(s) for diagram number 87 - FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); + FFV1_2( w_fp[16], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1642,10 +1642,10 @@ namespace mg5amcCpu // *** DIAGRAM 88 OF 123 *** // Wavefunction(s) for diagram number 88 - FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[11], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 88 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1658,7 +1658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1668,10 +1668,10 @@ namespace mg5amcCpu // *** DIAGRAM 90 OF 123 *** // Wavefunction(s) for diagram number 90 - FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); + FFV1_1( w_fp[14], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 90 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1684,7 +1684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1698,7 +1698,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1714,7 +1714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1723,7 +1723,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1732,7 +1732,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1745,10 +1745,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 123 *** // Wavefunction(s) for diagram number 94 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 94 - VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1765,10 +1765,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 123 *** // Wavefunction(s) for diagram number 95 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 95 - VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1788,7 +1788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 96 - FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1804,7 +1804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1818,7 +1818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1834,7 +1834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1848,7 +1848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1857,7 +1857,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1866,7 +1866,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1879,10 +1879,10 @@ namespace mg5amcCpu // *** DIAGRAM 101 OF 123 *** // Wavefunction(s) for diagram number 101 - VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 101 - VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1902,7 +1902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1922,7 +1922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1938,7 +1938,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1952,7 +1952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1968,7 +1968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1982,7 +1982,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1991,7 +1991,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2000,7 +2000,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2016,7 +2016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2036,7 +2036,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2056,7 +2056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2069,7 +2069,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2082,7 +2082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2095,7 +2095,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2105,12 +2105,12 @@ namespace mg5amcCpu // *** DIAGRAM 114 OF 123 *** // Wavefunction(s) for diagram number 114 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[12] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 114 - VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2119,7 +2119,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2128,7 +2128,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2144,17 +2144,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -2166,17 +2166,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -2185,12 +2185,12 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 123 *** // Wavefunction(s) for diagram number 117 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 117 - VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2199,7 +2199,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2208,7 +2208,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2224,17 +2224,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -2246,17 +2246,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -2265,22 +2265,22 @@ namespace mg5amcCpu // *** DIAGRAM 120 OF 123 *** // Wavefunction(s) for diagram number 120 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -2292,17 +2292,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -2314,7 +2314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2323,7 +2323,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2332,7 +2332,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2348,7 +2348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2357,7 +2357,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2366,7 +2366,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2961,12 +2961,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f index 0fa6436690..d12d34daf6 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f @@ -39,7 +39,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -127,24 +126,11 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f index 77f5152327..dc6e4b80f3 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f @@ -830,6 +830,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -1716,6 +1717,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile index 74b19033a8..74db44d848 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/generate_events b/epochX/cudacpp/gg_ttgg.mad/bin/generate_events index 5577cc66a0..107313b25d 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/generate_events +++ b/epochX/cudacpp/gg_ttgg.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME -import misc as misc + import logging import logging.config @@ -160,31 +160,17 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv - - # check for plugin customization of the launch command - launch_interface = ME.MadEventCmdShell - if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - + argument = sys.argv try: if '-h' in argument or '--help' in argument: - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py index e9f421ae5f..7624b9f557 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py @@ -1002,14 +1002,13 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() - self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - + self.plugin_input(finput) def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model.pkl index 27a1caae3c115073669b90622e9351ab04166d39..dc38da0bfa76ea4206a3c5b2d34b98c606f7d044 100644 GIT binary patch delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/madevent b/epochX/cudacpp/gg_ttgg.mad/bin/madevent index 10b6a71fa2..c944aa1faf 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/madevent +++ b/epochX/cudacpp/gg_ttgg.mad/bin/madevent @@ -32,7 +32,6 @@ except ImportError: import os -pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -161,23 +160,10 @@ except: pass import internal.madevent_interface as cmd_interface -# check for plugin customization of the launch command -launch_interface = cmd_interface.MadEventCmdShell -if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) + launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -192,7 +178,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -202,7 +188,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gg_ttgg.mad/mg5.in b/epochX/cudacpp/gg_ttgg.mad/mg5.in index e2c5858b63..b1f4667829 100644 --- a/epochX/cudacpp/gg_ttgg.mad/mg5.in +++ b/epochX/cudacpp/gg_ttgg.mad/mg5.in @@ -1,4 +1,2 @@ -set stdout_level DEBUG -set zerowidth_tchannel F generate g g > t t~ g g output madevent gg_ttgg.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h index 9b946c21e1..9cea8bcbe7 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -888,7 +886,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -899,7 +896,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -912,7 +908,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -925,7 +920,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -940,7 +934,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -952,7 +945,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -967,7 +959,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -979,7 +970,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -994,7 +984,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -1006,7 +995,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1020,7 +1008,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1055,7 +1042,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1094,7 +1080,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1118,7 +1103,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1150,7 +1134,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1182,7 +1165,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1215,7 +1197,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1244,7 +1225,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1280,7 +1260,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1309,7 +1288,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1345,7 +1323,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1374,7 +1351,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 373a89a800..fb29a354ab 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005511283874511719  +DEBUG: model prefixing takes 0.0045855045318603516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,35 +155,64 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.142 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  -DEBUG: type(subproc_group)= [output.py at line 190]  -DEBUG: type(fortran_model)= [output.py at line 191]  -DEBUG: type(me)= me=0 [output.py at line 192]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  +DEBUG: type(subproc_group)= [output.py at line 188]  +DEBUG: type(fortran_model)= [output.py at line 189]  +DEBUG: type(me)= me=0 [output.py at line 190]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: proc_id =  0 [model_handling.py at line 1046]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  +Generated helas calls for 1 subprocesses (123 diagrams) in 0.382 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.315 s +ALOHA: aloha creates 5 routines in 0.601 s VVV1 VVV1 FFV1 @@ -196,17 +225,23 @@ ALOHA: aloha creates 5 routines in 0.315 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m1.438s -user 0m1.366s -sys 0m0.060s +real 0m1.922s +user 0m1.324s +sys 0m0.063s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc index 927a19a802..25f123c774 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc @@ -250,11 +250,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 1 - VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -266,7 +266,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -278,7 +278,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -294,10 +294,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 123 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -313,10 +313,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 123 *** // Wavefunction(s) for diagram number 3 - VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -332,10 +332,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 123 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -351,11 +351,11 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 123 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -368,7 +368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -380,10 +380,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 123 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -393,10 +393,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 123 *** // Wavefunction(s) for diagram number 8 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -409,7 +409,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -421,10 +421,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 123 *** // Wavefunction(s) for diagram number 10 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -434,10 +434,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 123 *** // Wavefunction(s) for diagram number 11 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -450,7 +450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -465,7 +465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -478,7 +478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -493,7 +493,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -508,7 +508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -520,12 +520,12 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 123 *** // Wavefunction(s) for diagram number 17 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -534,10 +534,10 @@ namespace mg5amcCpu // *** DIAGRAM 18 OF 123 *** // Wavefunction(s) for diagram number 18 - FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -549,7 +549,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -559,11 +559,11 @@ namespace mg5amcCpu // *** DIAGRAM 20 OF 123 *** // Wavefunction(s) for diagram number 20 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -578,7 +578,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -591,7 +591,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -601,10 +601,10 @@ namespace mg5amcCpu // *** DIAGRAM 23 OF 123 *** // Wavefunction(s) for diagram number 23 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[18] ); // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -619,7 +619,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -632,7 +632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -642,10 +642,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 123 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] ); + FFV1_1( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[19] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -657,7 +657,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -669,7 +669,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -681,7 +681,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -693,7 +693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -706,7 +706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -718,12 +718,12 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 123 *** // Wavefunction(s) for diagram number 32 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -731,7 +731,7 @@ namespace mg5amcCpu jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -739,7 +739,7 @@ namespace mg5amcCpu jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -751,12 +751,12 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 123 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -765,10 +765,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 123 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -780,7 +780,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -790,10 +790,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 123 *** // Wavefunction(s) for diagram number 36 - FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] ); + FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -808,7 +808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -821,7 +821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -834,7 +834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -849,7 +849,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -862,7 +862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -872,10 +872,10 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 123 *** // Wavefunction(s) for diagram number 42 - FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_2( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -887,7 +887,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -899,7 +899,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 44 - FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -911,7 +911,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -923,7 +923,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -936,7 +936,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -951,7 +951,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -959,7 +959,7 @@ namespace mg5amcCpu jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -967,7 +967,7 @@ namespace mg5amcCpu jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -979,11 +979,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 123 *** // Wavefunction(s) for diagram number 49 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] ); - FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[12] ); + FFV1_2( w_fp[3], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -993,10 +993,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 123 *** // Wavefunction(s) for diagram number 50 - VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1011,7 +1011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1021,10 +1021,10 @@ namespace mg5amcCpu // *** DIAGRAM 52 OF 123 *** // Wavefunction(s) for diagram number 52 - FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[2], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1037,7 +1037,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1052,7 +1052,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1065,7 +1065,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1080,7 +1080,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1095,7 +1095,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1114,7 +1114,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 58 - VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1126,7 +1126,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1138,7 +1138,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1154,10 +1154,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 123 *** // Wavefunction(s) for diagram number 59 - VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 59 - VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1176,7 +1176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1195,7 +1195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1210,7 +1210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1223,7 +1223,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1238,7 +1238,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1248,11 +1248,11 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 123 *** // Wavefunction(s) for diagram number 65 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1262,10 +1262,10 @@ namespace mg5amcCpu // *** DIAGRAM 66 OF 123 *** // Wavefunction(s) for diagram number 66 - VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1280,7 +1280,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1290,10 +1290,10 @@ namespace mg5amcCpu // *** DIAGRAM 68 OF 123 *** // Wavefunction(s) for diagram number 68 - FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1306,7 +1306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1321,7 +1321,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1334,7 +1334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1349,7 +1349,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1364,7 +1364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1383,7 +1383,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 74 - VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1395,7 +1395,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1407,7 +1407,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1423,10 +1423,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 123 *** // Wavefunction(s) for diagram number 75 - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 75 - VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1445,7 +1445,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1464,7 +1464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1479,7 +1479,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1492,7 +1492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1507,7 +1507,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1517,10 +1517,10 @@ namespace mg5amcCpu // *** DIAGRAM 81 OF 123 *** // Wavefunction(s) for diagram number 81 - FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[9], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1529,10 +1529,10 @@ namespace mg5amcCpu // *** DIAGRAM 82 OF 123 *** // Wavefunction(s) for diagram number 82 - FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[15], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1544,7 +1544,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1553,10 +1553,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 123 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1568,7 +1568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1578,10 +1578,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 123 *** // Wavefunction(s) for diagram number 86 - VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1593,10 +1593,10 @@ namespace mg5amcCpu // *** DIAGRAM 87 OF 123 *** // Wavefunction(s) for diagram number 87 - FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); + FFV1_2( w_fp[16], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1605,10 +1605,10 @@ namespace mg5amcCpu // *** DIAGRAM 88 OF 123 *** // Wavefunction(s) for diagram number 88 - FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[11], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 88 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1620,7 +1620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1629,10 +1629,10 @@ namespace mg5amcCpu // *** DIAGRAM 90 OF 123 *** // Wavefunction(s) for diagram number 90 - FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); + FFV1_1( w_fp[14], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1644,7 +1644,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1657,7 +1657,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1672,7 +1672,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1684,7 +1684,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1696,7 +1696,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1712,10 +1712,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 123 *** // Wavefunction(s) for diagram number 94 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 94 - VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1731,10 +1731,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 123 *** // Wavefunction(s) for diagram number 95 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 95 - VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1753,7 +1753,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 96 - FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1768,7 +1768,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1781,7 +1781,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1796,7 +1796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1809,7 +1809,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1821,7 +1821,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1833,7 +1833,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1849,10 +1849,10 @@ namespace mg5amcCpu // *** DIAGRAM 101 OF 123 *** // Wavefunction(s) for diagram number 101 - VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 101 - VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1871,7 +1871,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1890,7 +1890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1905,7 +1905,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1918,7 +1918,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1933,7 +1933,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1946,7 +1946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1958,7 +1958,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1970,7 +1970,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1989,7 +1989,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2008,7 +2008,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2027,7 +2027,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2039,7 +2039,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2051,7 +2051,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2063,7 +2063,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2072,12 +2072,12 @@ namespace mg5amcCpu // *** DIAGRAM 114 OF 123 *** // Wavefunction(s) for diagram number 114 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[12] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 114 - VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2089,7 +2089,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2101,7 +2101,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2120,7 +2120,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2128,7 +2128,7 @@ namespace mg5amcCpu jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2136,7 +2136,7 @@ namespace mg5amcCpu jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2151,7 +2151,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2159,7 +2159,7 @@ namespace mg5amcCpu jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2167,7 +2167,7 @@ namespace mg5amcCpu jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2179,12 +2179,12 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 123 *** // Wavefunction(s) for diagram number 117 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 117 - VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2196,7 +2196,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2208,7 +2208,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2227,7 +2227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2235,7 +2235,7 @@ namespace mg5amcCpu jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2243,7 +2243,7 @@ namespace mg5amcCpu jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2258,7 +2258,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2266,7 +2266,7 @@ namespace mg5amcCpu jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2274,7 +2274,7 @@ namespace mg5amcCpu jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2286,12 +2286,12 @@ namespace mg5amcCpu // *** DIAGRAM 120 OF 123 *** // Wavefunction(s) for diagram number 120 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2299,7 +2299,7 @@ namespace mg5amcCpu jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2307,7 +2307,7 @@ namespace mg5amcCpu jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2322,7 +2322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2330,7 +2330,7 @@ namespace mg5amcCpu jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2338,7 +2338,7 @@ namespace mg5amcCpu jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2353,7 +2353,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2365,7 +2365,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2377,7 +2377,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2396,7 +2396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2408,7 +2408,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2420,7 +2420,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3018,12 +3018,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttgg.sa/mg5.in b/epochX/cudacpp/gg_ttgg.sa/mg5.in index 5b27867642..0ec559d9b8 100644 --- a/epochX/cudacpp/gg_ttgg.sa/mg5.in +++ b/epochX/cudacpp/gg_ttgg.sa/mg5.in @@ -1,4 +1,3 @@ -set stdout_level DEBUG -set zerowidth_tchannel F generate g g > t t~ g g -output standalone_cudacpp gg_ttgg.sa +output standalone_cudacpp gg_ttgg.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp + diff --git a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h index 9b946c21e1..9cea8bcbe7 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -888,7 +886,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -899,7 +896,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -912,7 +908,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -925,7 +920,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -940,7 +934,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -952,7 +945,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -967,7 +959,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -979,7 +970,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -994,7 +984,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -1006,7 +995,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1020,7 +1008,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1055,7 +1042,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1094,7 +1080,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1118,7 +1103,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1150,7 +1134,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1182,7 +1165,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1215,7 +1197,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1244,7 +1225,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1280,7 +1260,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1309,7 +1288,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1345,7 +1323,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1374,7 +1351,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 7e024b5fd3..b0f5bcbfef 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005335092544555664  +DEBUG: model prefixing takes 0.0044476985931396484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,60 +155,89 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.895 s +1 processes with 1240 diagrams generated in 1.726 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  INFO: initialize a new directory: CODEGEN_mad_gg_ttggg INFO: remove old information in CODEGEN_mad_gg_ttggg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +INFO: Color-Flow passed to 1592 term in 30s. Introduce 2768 contraction +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 6: 3, 7: 4, 11: 5, 12: 6, 28: 7, 29: 8, 30: 9, 34: 10, 35: 11, 36: 12, 40: 13, 41: 14, 42: 15, 46: 16, 47: 17, 48: 18, 49: 19, 50: 20, 51: 21, 55: 22, 56: 23, 57: 24, 58: 25, 59: 26, 60: 27, 61: 28, 62: 29, 63: 30, 64: 31, 65: 32, 66: 33, 67: 34, 68: 35, 69: 36, 73: 37, 74: 38, 75: 39, 76: 40, 77: 41, 78: 42, 79: 43, 80: 44, 81: 45, 82: 46, 83: 47, 84: 48, 85: 49, 86: 50, 87: 51, 91: 52, 92: 53, 93: 54, 94: 55, 95: 56, 96: 57, 97: 58, 98: 59, 99: 60, 100: 61, 101: 62, 102: 63, 103: 64, 104: 65, 105: 66, 109: 67, 110: 68, 111: 69, 112: 70, 113: 71, 114: 72, 115: 73, 116: 74, 117: 75, 121: 76, 122: 77, 123: 78, 124: 79, 125: 80, 126: 81, 127: 82, 128: 83, 129: 84, 133: 85, 134: 86, 135: 87, 136: 88, 137: 89, 138: 90, 139: 91, 140: 92, 141: 93, 142: 94, 143: 95, 144: 96, 145: 97, 146: 98, 147: 99, 148: 100, 149: 101, 150: 102, 151: 103, 152: 104, 153: 105, 160: 106, 161: 107, 162: 108, 163: 109, 164: 110, 165: 111, 166: 112, 167: 113, 168: 114, 169: 115, 170: 116, 171: 117, 172: 118, 173: 119, 174: 120, 178: 121, 179: 122, 183: 123, 184: 124, 185: 125, 186: 126, 187: 127, 188: 128, 189: 129, 190: 130, 191: 131, 192: 132, 193: 133, 194: 134, 195: 135, 196: 136, 197: 137, 201: 138, 202: 139, 203: 140, 204: 141, 205: 142, 206: 143, 207: 144, 208: 145, 209: 146, 210: 147, 211: 148, 212: 149, 213: 150, 214: 151, 215: 152, 219: 153, 220: 154, 221: 155, 222: 156, 223: 157, 224: 158, 225: 159, 226: 160, 227: 161, 228: 162, 229: 163, 230: 164, 231: 165, 232: 166, 233: 167, 234: 168, 235: 169, 236: 170, 237: 171, 238: 172, 239: 173, 240: 174, 241: 175, 242: 176, 243: 177, 244: 178, 245: 179, 246: 180, 247: 181, 248: 182, 249: 183, 250: 184, 251: 185, 252: 186, 253: 187, 254: 188, 255: 189, 256: 190, 257: 191, 258: 192, 259: 193, 260: 194, 261: 195, 262: 196, 266: 197, 267: 198, 268: 199, 269: 200, 270: 201, 271: 202, 275: 203, 276: 204, 277: 205, 278: 206, 279: 207, 280: 208, 284: 209, 285: 210, 319: 211, 320: 212, 321: 213, 322: 214, 323: 215, 324: 216, 325: 217, 326: 218, 327: 219, 328: 220, 329: 221, 330: 222, 331: 223, 332: 224, 333: 225, 337: 226, 338: 227, 342: 228, 343: 229, 344: 230, 345: 231, 346: 232, 347: 233, 348: 234, 349: 235, 350: 236, 351: 237, 352: 238, 353: 239, 354: 240, 355: 241, 356: 242, 360: 243, 361: 244, 362: 245, 363: 246, 364: 247, 365: 248, 366: 249, 367: 250, 368: 251, 369: 252, 370: 253, 371: 254, 372: 255, 373: 256, 374: 257, 378: 258, 379: 259, 380: 260, 381: 261, 382: 262, 383: 263, 384: 264, 385: 265, 386: 266, 387: 267, 388: 268, 389: 269, 390: 270, 391: 271, 392: 272, 393: 273, 394: 274, 395: 275, 396: 276, 397: 277, 398: 278, 399: 279, 400: 280, 401: 281, 402: 282, 403: 283, 404: 284, 405: 285, 406: 286, 407: 287, 408: 288, 409: 289, 410: 290, 411: 291, 412: 292, 413: 293, 414: 294, 415: 295, 416: 296, 417: 297, 418: 298, 419: 299, 420: 300, 421: 301, 425: 302, 426: 303, 427: 304, 428: 305, 429: 306, 430: 307, 434: 308, 435: 309, 436: 310, 437: 311, 438: 312, 439: 313, 443: 314, 444: 315, 478: 316, 479: 317, 480: 318, 481: 319, 482: 320, 483: 321, 487: 322, 488: 323, 489: 324, 490: 325, 491: 326, 492: 327, 493: 328, 494: 329, 495: 330, 496: 331, 497: 332, 498: 333, 499: 334, 500: 335, 501: 336, 505: 337, 506: 338, 507: 339, 508: 340, 509: 341, 510: 342, 511: 343, 512: 344, 513: 345, 514: 346, 515: 347, 516: 348, 517: 349, 518: 350, 519: 351, 523: 352, 524: 353, 525: 354, 526: 355, 527: 356, 528: 357, 529: 358, 530: 359, 531: 360, 532: 361, 533: 362, 534: 363, 535: 364, 536: 365, 537: 366, 541: 367, 542: 368, 543: 369, 544: 370, 545: 371, 546: 372, 547: 373, 548: 374, 549: 375, 550: 376, 551: 377, 555: 378, 556: 379, 560: 380, 561: 381, 577: 382, 578: 383, 579: 384, 580: 385, 581: 386, 582: 387, 583: 388, 584: 389, 585: 390, 589: 391, 590: 392, 591: 393, 592: 394, 593: 395, 594: 396, 595: 397, 596: 398, 597: 399, 601: 400, 602: 401, 603: 402, 604: 403, 605: 404, 606: 405, 607: 406, 608: 407, 609: 408, 613: 409, 614: 410, 615: 411, 616: 412, 617: 413, 618: 414, 622: 415, 623: 416, 624: 417, 625: 418, 626: 419, 627: 420, 637: 421, 638: 422, 639: 423, 640: 424, 641: 425, 642: 426, 646: 427, 647: 428, 648: 429, 649: 430, 650: 431, 651: 432, 652: 433, 653: 434, 654: 435, 655: 436, 656: 437, 657: 438, 658: 439, 659: 440, 660: 441, 664: 442, 665: 443, 666: 444, 667: 445, 668: 446, 669: 447, 670: 448, 671: 449, 672: 450, 673: 451, 674: 452, 675: 453, 676: 454, 677: 455, 678: 456, 682: 457, 683: 458, 684: 459, 685: 460, 686: 461, 687: 462, 688: 463, 689: 464, 690: 465, 691: 466, 692: 467, 693: 468, 694: 469, 695: 470, 696: 471, 700: 472, 701: 473, 702: 474, 703: 475, 704: 476, 705: 477, 706: 478, 707: 479, 708: 480, 709: 481, 710: 482, 714: 483, 715: 484, 719: 485, 720: 486, 736: 487, 737: 488, 738: 489, 739: 490, 740: 491, 741: 492, 742: 493, 743: 494, 744: 495, 748: 496, 749: 497, 750: 498, 751: 499, 752: 500, 753: 501, 754: 502, 755: 503, 756: 504, 760: 505, 761: 506, 762: 507, 763: 508, 764: 509, 765: 510, 766: 511, 767: 512, 768: 513, 772: 514, 773: 515, 774: 516, 775: 517, 776: 518, 777: 519, 781: 520, 782: 521, 783: 522, 784: 523, 785: 524, 786: 525, 796: 526, 797: 527, 798: 528, 799: 529, 800: 530, 801: 531, 805: 532, 806: 533, 807: 534, 808: 535, 809: 536, 810: 537, 811: 538, 812: 539, 813: 540, 814: 541, 815: 542, 816: 543, 817: 544, 818: 545, 819: 546, 823: 547, 824: 548, 825: 549, 826: 550, 827: 551, 828: 552, 829: 553, 830: 554, 831: 555, 832: 556, 833: 557, 834: 558, 835: 559, 836: 560, 837: 561, 841: 562, 842: 563, 843: 564, 844: 565, 845: 566, 846: 567, 847: 568, 848: 569, 849: 570, 850: 571, 851: 572, 852: 573, 853: 574, 854: 575, 855: 576, 859: 577, 860: 578, 861: 579, 862: 580, 863: 581, 864: 582, 865: 583, 866: 584, 867: 585, 868: 586, 869: 587, 873: 588, 874: 589, 878: 590, 879: 591, 895: 592, 896: 593, 897: 594, 898: 595, 899: 596, 900: 597, 901: 598, 902: 599, 903: 600, 907: 601, 908: 602, 909: 603, 910: 604, 911: 605, 912: 606, 913: 607, 914: 608, 915: 609, 919: 610, 920: 611, 921: 612, 922: 613, 923: 614, 924: 615, 925: 616, 926: 617, 927: 618, 931: 619, 932: 620, 933: 621, 934: 622, 935: 623, 936: 624, 940: 625, 941: 626, 942: 627, 943: 628, 944: 629, 945: 630, 955: 631, 956: 632, 957: 633, 958: 634, 959: 635, 960: 636, 961: 637, 962: 638, 963: 639, 964: 640, 965: 641, 966: 642, 967: 643, 968: 644, 969: 645, 970: 646, 971: 647, 972: 648, 973: 649, 974: 650, 975: 651, 976: 652, 977: 653, 978: 654, 979: 655, 980: 656, 981: 657, 982: 658, 983: 659, 984: 660, 985: 661, 986: 662, 987: 663, 991: 664, 992: 665, 993: 666, 994: 667, 995: 668, 996: 669, 1000: 670, 1001: 671, 1002: 672, 1003: 673, 1004: 674, 1005: 675, 1015: 676, 1016: 677, 1017: 678, 1018: 679, 1019: 680, 1020: 681, 1021: 682, 1022: 683, 1023: 684, 1024: 685, 1025: 686, 1026: 687, 1027: 688, 1028: 689, 1029: 690, 1030: 691, 1031: 692, 1032: 693, 1033: 694, 1034: 695, 1035: 696, 1036: 697, 1037: 698, 1038: 699, 1039: 700, 1040: 701, 1041: 702, 1042: 703, 1043: 704, 1044: 705, 1045: 706, 1046: 707, 1047: 708, 1051: 709, 1052: 710, 1053: 711, 1054: 712, 1055: 713, 1056: 714, 1060: 715, 1061: 716, 1062: 717, 1063: 718, 1064: 719, 1065: 720, 1075: 721, 1076: 722, 1080: 723, 1081: 724, 1085: 725, 1086: 726, 1102: 727, 1103: 728, 1104: 729, 1105: 730, 1106: 731, 1107: 732, 1108: 733, 1109: 734, 1110: 735, 1114: 736, 1115: 737, 1116: 738, 1117: 739, 1118: 740, 1119: 741, 1120: 742, 1121: 743, 1122: 744, 1126: 745, 1127: 746, 1128: 747, 1129: 748, 1130: 749, 1131: 750, 1132: 751, 1133: 752, 1134: 753, 1138: 754, 1139: 755, 1140: 756, 1141: 757, 1142: 758, 1143: 759, 1147: 760, 1148: 761, 1149: 762, 1150: 763, 1151: 764, 1152: 765, 1153: 766, 1154: 767, 1158: 768, 1159: 769, 1163: 770, 1164: 771, 1180: 772, 1181: 773, 1182: 774, 1183: 775, 1184: 776, 1185: 777, 1186: 778, 1187: 779, 1188: 780, 1192: 781, 1193: 782, 1194: 783, 1195: 784, 1196: 785, 1197: 786, 1198: 787, 1199: 788, 1200: 789, 1204: 790, 1205: 791, 1206: 792, 1207: 793, 1208: 794, 1209: 795, 1210: 796, 1211: 797, 1212: 798, 1216: 799, 1217: 800, 1218: 801, 1219: 802, 1220: 803, 1221: 804, 1225: 805, 1226: 806, 1227: 807, 1228: 808, 1229: 809, 1230: 810, 1231: 811, 1232: 812, 1236: 813, 1237: 814, 1241: 815, 1242: 816, 1258: 817, 1259: 818, 1260: 819, 1261: 820, 1262: 821, 1263: 822, 1264: 823, 1265: 824, 1266: 825, 1270: 826, 1271: 827, 1272: 828, 1273: 829, 1274: 830, 1275: 831, 1276: 832, 1277: 833, 1278: 834, 1282: 835, 1283: 836, 1284: 837, 1285: 838, 1286: 839, 1287: 840, 1288: 841, 1289: 842, 1290: 843, 1294: 844, 1295: 845, 1296: 846, 1297: 847, 1298: 848, 1299: 849, 1303: 850, 1304: 851, 1305: 852, 1306: 853, 1307: 854, 1308: 855, 1309: 856, 1310: 857, 1314: 858, 1315: 859, 1319: 860, 1320: 861, 1333: 862, 1334: 863, 1338: 864, 1339: 865, 1343: 866, 1344: 867, 1357: 868, 1358: 869, 1362: 870, 1363: 871, 1367: 872, 1368: 873, 1396: 874, 1397: 875, 1398: 876, 1399: 877, 1400: 878, 1401: 879, 1402: 880, 1403: 881, 1404: 882, 1405: 883, 1406: 884, 1407: 885, 1408: 886, 1409: 887, 1410: 888, 1411: 889, 1412: 890, 1413: 891, 1417: 892, 1418: 893, 1419: 894, 1420: 895, 1421: 896, 1422: 897, 1423: 898, 1424: 899, 1425: 900, 1426: 901, 1427: 902, 1428: 903, 1429: 904, 1430: 905, 1431: 906, 1432: 907, 1433: 908, 1434: 909, 1438: 910, 1439: 911, 1440: 912, 1441: 913, 1442: 914, 1443: 915, 1444: 916, 1445: 917, 1446: 918, 1447: 919, 1448: 920, 1449: 921, 1450: 922, 1451: 923, 1452: 924, 1453: 925, 1454: 926, 1455: 927, 1459: 928, 1460: 929, 1461: 930, 1462: 931, 1463: 932, 1464: 933, 1468: 934, 1469: 935, 1470: 936, 1471: 937, 1472: 938, 1473: 939, 1477: 940, 1478: 941, 1479: 942, 1480: 943, 1481: 944, 1482: 945} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.515 s -Wrote files for 2281 helas calls in 46.436 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 5.721 s +Wrote files for 2281 helas calls in 39.057 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.315 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +ALOHA: aloha creates 5 routines in 0.276 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.271 s VVV1 VVV1 FFV1 @@ -221,22 +250,27 @@ ALOHA: aloha creates 10 routines in 0.310 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  ************************************************************ * * * W E L C O M E to * @@ -257,15 +291,14 @@ DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -289,40 +322,40 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 255 (offset 112 lines). Hunk #3 succeeded at 333 (offset 112 lines). Hunk #4 succeeded at 361 (offset 112 lines). Hunk #5 succeeded at 406 (offset 112 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. +Hunk #6 succeeded at 9862 (offset 9466 lines). +Hunk #7 succeeded at 19616 (offset 19146 lines). +Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README +/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README Run "open index.html" to see more information about this process. quit -real 0m57.704s -user 0m56.670s -sys 0m0.842s +real 0m49.632s +user 0m48.102s +sys 0m0.988s diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt index cdeedc7863..5ca005676e 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc index ec923afd6d..cf4ec946f8 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o +ALOHARoutine = VVVV3_0.o VVVV4P0_1.o VVVV3P0_1.o VVVV1P0_1.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o FFV1_0.o FFV1P0_3.o VVVV1_0.o VVVV4_0.o diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc index a525c4ba3f..18c4db8539 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc @@ -252,13 +252,13 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][6], +1, w_fp[6], 6 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); - VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] ); - VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -283,10 +283,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 1240 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -314,7 +314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 3 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -331,7 +331,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -348,7 +348,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -369,11 +369,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 1240 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] ); - VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] ); + VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -401,7 +401,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,7 +429,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -446,7 +446,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -463,7 +463,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -484,10 +484,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 1240 *** // Wavefunction(s) for diagram number 7 - VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] ); + VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 7 - VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -515,7 +515,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -543,7 +543,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -560,7 +560,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -577,7 +577,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -598,12 +598,12 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 1240 *** // Wavefunction(s) for diagram number 10 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -620,7 +620,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -637,7 +637,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -658,12 +658,12 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 1240 *** // Wavefunction(s) for diagram number 11 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[18] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 11 - VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -680,7 +680,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -697,7 +697,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -718,12 +718,12 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 1240 *** // Wavefunction(s) for diagram number 12 - VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); + VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -740,7 +740,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -778,10 +778,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 1240 *** // Wavefunction(s) for diagram number 13 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 13 - VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[4] -= amp_sv[0]; @@ -798,7 +798,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -815,7 +815,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[28] -= amp_sv[0]; @@ -836,10 +836,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 1240 *** // Wavefunction(s) for diagram number 14 - VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 14 - VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -864,10 +864,10 @@ namespace mg5amcCpu // *** DIAGRAM 15 OF 1240 *** // Wavefunction(s) for diagram number 15 - VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] ); + VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 0., 0., w_fp[26] ); // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -895,7 +895,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -920,10 +920,10 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 1240 *** // Wavefunction(s) for diagram number 17 - VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 0., 0., w_fp[27] ); // Amplitude(s) for diagram number 17 - VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -940,7 +940,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -957,7 +957,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[26] -= amp_sv[0]; @@ -981,7 +981,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1006,10 +1006,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 1240 *** // Wavefunction(s) for diagram number 19 - VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] ); + VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 0., 0., w_fp[28] ); // Amplitude(s) for diagram number 19 - VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1037,7 +1037,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1062,10 +1062,10 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 1240 *** // Wavefunction(s) for diagram number 21 - VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 21 - VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -1082,7 +1082,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -1099,7 +1099,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -1123,7 +1123,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1151,7 +1151,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1176,10 +1176,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 1240 *** // Wavefunction(s) for diagram number 24 - VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 24 - VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1204,12 +1204,12 @@ namespace mg5amcCpu // *** DIAGRAM 25 OF 1240 *** // Wavefunction(s) for diagram number 25 - VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] ); - VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] ); - VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] ); + VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[30] ); + VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[31] ); + VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[32] ); // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -1226,7 +1226,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -1243,7 +1243,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -1264,12 +1264,12 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 1240 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] ); - FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[33] ); + FFV1_2( w_fp[3], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[33], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[35] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1280,10 +1280,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 1240 *** // Wavefunction(s) for diagram number 27 - FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] ); + FFV1_1( w_fp[33], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[36] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1294,10 +1294,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 1240 *** // Wavefunction(s) for diagram number 28 - FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] ); + FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 0., 0., w_fp[37] ); // Amplitude(s) for diagram number 28 - VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1317,7 +1317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1333,7 +1333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1353,7 +1353,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1369,7 +1369,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1378,7 +1378,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1387,7 +1387,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1400,11 +1400,11 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 1240 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] ); - FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[38] ); + FFV1_1( w_fp[33], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1415,10 +1415,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 1240 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[38], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1432,7 +1432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1445,10 +1445,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 1240 *** // Wavefunction(s) for diagram number 36 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[41] ); // Amplitude(s) for diagram number 36 - FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1459,10 +1459,10 @@ namespace mg5amcCpu // *** DIAGRAM 37 OF 1240 *** // Wavefunction(s) for diagram number 37 - FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] ); + FFV1_2( w_fp[41], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[42] ); // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1476,7 +1476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1492,7 +1492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1508,7 +1508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1524,7 +1524,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1541,11 +1541,11 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 1240 *** // Wavefunction(s) for diagram number 42 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); - FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); + FFV1_1( w_fp[39], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[43] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1556,10 +1556,10 @@ namespace mg5amcCpu // *** DIAGRAM 43 OF 1240 *** // Wavefunction(s) for diagram number 43 - FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] ); + FFV1_1( w_fp[39], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[44] ); // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1570,10 +1570,10 @@ namespace mg5amcCpu // *** DIAGRAM 44 OF 1240 *** // Wavefunction(s) for diagram number 44 - FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] ); + FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 0., 0., w_fp[45] ); // Amplitude(s) for diagram number 44 - VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1593,7 +1593,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1609,7 +1609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1629,7 +1629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1645,7 +1645,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1654,7 +1654,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1663,7 +1663,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1676,11 +1676,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 1240 *** // Wavefunction(s) for diagram number 49 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] ); - FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[46] ); + FFV1_1( w_fp[39], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1691,10 +1691,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 1240 *** // Wavefunction(s) for diagram number 50 - FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[46], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1708,7 +1708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1724,7 +1724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1738,7 +1738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1752,7 +1752,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1768,7 +1768,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1784,7 +1784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1800,7 +1800,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1817,11 +1817,11 @@ namespace mg5amcCpu // *** DIAGRAM 58 OF 1240 *** // Wavefunction(s) for diagram number 58 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); - FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); + FFV1_1( w_fp[47], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[49] ); // Amplitude(s) for diagram number 58 - FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 58 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1832,10 +1832,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 1240 *** // Wavefunction(s) for diagram number 59 - FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] ); + FFV1_1( w_fp[47], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[50] ); // Amplitude(s) for diagram number 59 - FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1846,10 +1846,10 @@ namespace mg5amcCpu // *** DIAGRAM 60 OF 1240 *** // Wavefunction(s) for diagram number 60 - FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] ); + FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 0., 0., w_fp[51] ); // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1869,7 +1869,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1885,7 +1885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1905,7 +1905,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1921,7 +1921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1930,7 +1930,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1939,7 +1939,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1952,10 +1952,10 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 1240 *** // Wavefunction(s) for diagram number 65 - FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[47], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1969,7 +1969,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1983,7 +1983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1999,7 +1999,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2013,7 +2013,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2027,7 +2027,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2043,7 +2043,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2059,7 +2059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2075,7 +2075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2092,11 +2092,11 @@ namespace mg5amcCpu // *** DIAGRAM 74 OF 1240 *** // Wavefunction(s) for diagram number 74 - FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); - FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); + FFV1_2( w_fp[46], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 74 - FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 74 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2107,10 +2107,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 1240 *** // Wavefunction(s) for diagram number 75 - FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] ); + FFV1_2( w_fp[46], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[53] ); // Amplitude(s) for diagram number 75 - FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2121,10 +2121,10 @@ namespace mg5amcCpu // *** DIAGRAM 76 OF 1240 *** // Wavefunction(s) for diagram number 76 - FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] ); + FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 0., 0., w_fp[54] ); // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2144,7 +2144,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2160,7 +2160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2180,7 +2180,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2196,7 +2196,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2205,7 +2205,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2214,7 +2214,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2230,7 +2230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2246,7 +2246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2262,7 +2262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2279,10 +2279,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 1240 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] ); + FFV1_2( w_fp[38], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[25] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2293,10 +2293,10 @@ namespace mg5amcCpu // *** DIAGRAM 85 OF 1240 *** // Wavefunction(s) for diagram number 85 - FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[38], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2307,10 +2307,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 1240 *** // Wavefunction(s) for diagram number 86 - FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] ); + FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2330,7 +2330,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2346,7 +2346,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 88 - VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2366,7 +2366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2382,7 +2382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2391,7 +2391,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2400,7 +2400,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2416,7 +2416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2432,7 +2432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2448,7 +2448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 93 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2465,10 +2465,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 1240 *** // Wavefunction(s) for diagram number 94 - FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] ); + FFV1_2( w_fp[41], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[28] ); // Amplitude(s) for diagram number 94 - FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2479,10 +2479,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 1240 *** // Wavefunction(s) for diagram number 95 - FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[41], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 95 - FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2493,10 +2493,10 @@ namespace mg5amcCpu // *** DIAGRAM 96 OF 1240 *** // Wavefunction(s) for diagram number 96 - FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] ); + FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 96 - VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2516,7 +2516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2532,7 +2532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2552,7 +2552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2568,7 +2568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2577,7 +2577,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2586,7 +2586,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2602,7 +2602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 101 - FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2618,7 +2618,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2634,7 +2634,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2651,10 +2651,10 @@ namespace mg5amcCpu // *** DIAGRAM 104 OF 1240 *** // Wavefunction(s) for diagram number 104 - FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[26] ); // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2667,10 +2667,10 @@ namespace mg5amcCpu // *** DIAGRAM 105 OF 1240 *** // Wavefunction(s) for diagram number 105 - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[42] ); // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2687,10 +2687,10 @@ namespace mg5amcCpu // *** DIAGRAM 106 OF 1240 *** // Wavefunction(s) for diagram number 106 - FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2706,7 +2706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 107 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2726,7 +2726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2746,7 +2746,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2763,10 +2763,10 @@ namespace mg5amcCpu // *** DIAGRAM 110 OF 1240 *** // Wavefunction(s) for diagram number 110 - FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2779,10 +2779,10 @@ namespace mg5amcCpu // *** DIAGRAM 111 OF 1240 *** // Wavefunction(s) for diagram number 111 - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2799,10 +2799,10 @@ namespace mg5amcCpu // *** DIAGRAM 112 OF 1240 *** // Wavefunction(s) for diagram number 112 - FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2818,7 +2818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2838,7 +2838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 114 - FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 114 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2858,7 +2858,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 115 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2875,10 +2875,10 @@ namespace mg5amcCpu // *** DIAGRAM 116 OF 1240 *** // Wavefunction(s) for diagram number 116 - FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 116 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2891,10 +2891,10 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 1240 *** // Wavefunction(s) for diagram number 117 - VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] ); + VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 0., 0., w_fp[19] ); // Amplitude(s) for diagram number 117 - FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 117 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2911,10 +2911,10 @@ namespace mg5amcCpu // *** DIAGRAM 118 OF 1240 *** // Wavefunction(s) for diagram number 118 - FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[18] ); // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 118 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2930,7 +2930,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 119 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2950,7 +2950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 120 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2970,7 +2970,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 121 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2990,7 +2990,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2999,7 +2999,7 @@ namespace mg5amcCpu jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3008,7 +3008,7 @@ namespace mg5amcCpu jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3024,7 +3024,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3033,7 +3033,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3042,7 +3042,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3055,13 +3055,13 @@ namespace mg5amcCpu // *** DIAGRAM 124 OF 1240 *** // Wavefunction(s) for diagram number 124 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); - FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[34], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[52], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 124 - FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 124 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3071,10 +3071,10 @@ namespace mg5amcCpu // *** DIAGRAM 125 OF 1240 *** // Wavefunction(s) for diagram number 125 - FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[52], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 125 - FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 125 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3084,11 +3084,11 @@ namespace mg5amcCpu // *** DIAGRAM 126 OF 1240 *** // Wavefunction(s) for diagram number 126 - FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] ); - FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] ); + FFV1_1( w_fp[34], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[55] ); + FFV1_2( w_fp[52], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[56] ); // Amplitude(s) for diagram number 126 - FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 126 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3101,7 +3101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 127 - FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 127 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3111,10 +3111,10 @@ namespace mg5amcCpu // *** DIAGRAM 128 OF 1240 *** // Wavefunction(s) for diagram number 128 - FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] ); + FFV1_1( w_fp[34], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[57] ); // Amplitude(s) for diagram number 128 - FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 128 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3127,7 +3127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 129 - FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 129 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3137,10 +3137,10 @@ namespace mg5amcCpu // *** DIAGRAM 130 OF 1240 *** // Wavefunction(s) for diagram number 130 - FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] ); + FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 0., 0., w_fp[58] ); // Amplitude(s) for diagram number 130 - VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 130 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3153,10 +3153,10 @@ namespace mg5amcCpu // *** DIAGRAM 131 OF 1240 *** // Wavefunction(s) for diagram number 131 - FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[34], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); // Amplitude(s) for diagram number 131 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 131 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3170,7 +3170,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 132 - FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 132 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3184,7 +3184,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 133 - VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 133 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3197,10 +3197,10 @@ namespace mg5amcCpu // *** DIAGRAM 134 OF 1240 *** // Wavefunction(s) for diagram number 134 - FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); + FFV1_1( w_fp[34], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 134 - FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 134 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3214,7 +3214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 135 - FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 135 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3228,7 +3228,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 136 - VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 136 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3244,7 +3244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 137 - FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 137 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3255,10 +3255,10 @@ namespace mg5amcCpu // *** DIAGRAM 138 OF 1240 *** // Wavefunction(s) for diagram number 138 - FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); + FFV1_1( w_fp[34], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 138 - FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 138 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3272,17 +3272,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 139 - FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -3291,12 +3291,12 @@ namespace mg5amcCpu // *** DIAGRAM 140 OF 1240 *** // Wavefunction(s) for diagram number 140 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] ); - FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[61] ); + FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 140 - VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 140 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3313,10 +3313,10 @@ namespace mg5amcCpu // *** DIAGRAM 141 OF 1240 *** // Wavefunction(s) for diagram number 141 - VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] ); + VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 0., 0., w_fp[64] ); // Amplitude(s) for diagram number 141 - VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 141 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3336,7 +3336,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 142 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3345,7 +3345,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3354,7 +3354,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3367,10 +3367,10 @@ namespace mg5amcCpu // *** DIAGRAM 143 OF 1240 *** // Wavefunction(s) for diagram number 143 - FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] ); + FFV1_2( w_fp[3], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[65] ); // Amplitude(s) for diagram number 143 - FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 143 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3384,7 +3384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 144 - FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 144 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3400,7 +3400,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 145 - FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 145 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3414,7 +3414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 146 - FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 146 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3427,10 +3427,10 @@ namespace mg5amcCpu // *** DIAGRAM 147 OF 1240 *** // Wavefunction(s) for diagram number 147 - FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); + FFV1_1( w_fp[34], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 147 - FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 147 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3441,10 +3441,10 @@ namespace mg5amcCpu // *** DIAGRAM 148 OF 1240 *** // Wavefunction(s) for diagram number 148 - FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] ); + FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 148 - VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 148 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3460,7 +3460,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 149 - FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 149 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3474,7 +3474,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 150 - FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 150 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3485,10 +3485,10 @@ namespace mg5amcCpu // *** DIAGRAM 151 OF 1240 *** // Wavefunction(s) for diagram number 151 - FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] ); + FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 151 - VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 151 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3504,7 +3504,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 152 - FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 152 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3518,7 +3518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 153 - FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 153 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3534,7 +3534,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 154 - VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 154 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3554,7 +3554,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 155 - FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 155 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3567,11 +3567,11 @@ namespace mg5amcCpu // *** DIAGRAM 156 OF 1240 *** // Wavefunction(s) for diagram number 156 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 156 - VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 156 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3588,10 +3588,10 @@ namespace mg5amcCpu // *** DIAGRAM 157 OF 1240 *** // Wavefunction(s) for diagram number 157 - VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] ); + VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 0., 0., w_fp[70] ); // Amplitude(s) for diagram number 157 - VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 157 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3611,7 +3611,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 158 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3620,7 +3620,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3629,7 +3629,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3642,10 +3642,10 @@ namespace mg5amcCpu // *** DIAGRAM 159 OF 1240 *** // Wavefunction(s) for diagram number 159 - FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 159 - FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 159 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3659,7 +3659,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 160 - FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 160 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3675,7 +3675,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 161 - FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 161 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3689,7 +3689,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 162 - FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 162 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3702,10 +3702,10 @@ namespace mg5amcCpu // *** DIAGRAM 163 OF 1240 *** // Wavefunction(s) for diagram number 163 - FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); + FFV1_1( w_fp[34], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 163 - FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 163 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3716,10 +3716,10 @@ namespace mg5amcCpu // *** DIAGRAM 164 OF 1240 *** // Wavefunction(s) for diagram number 164 - FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] ); + FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 0., 0., w_fp[73] ); // Amplitude(s) for diagram number 164 - VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 164 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3735,7 +3735,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 165 - FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 165 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3749,7 +3749,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 166 - FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 166 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3763,7 +3763,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 167 - VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 167 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3779,7 +3779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 168 - FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 168 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3793,7 +3793,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 169 - FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 169 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3809,7 +3809,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 170 - VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 170 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3829,7 +3829,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 171 - FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 171 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3842,11 +3842,11 @@ namespace mg5amcCpu // *** DIAGRAM 172 OF 1240 *** // Wavefunction(s) for diagram number 172 - VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] ); + VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 0., 0., w_fp[74] ); // Amplitude(s) for diagram number 172 - VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 172 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3863,10 +3863,10 @@ namespace mg5amcCpu // *** DIAGRAM 173 OF 1240 *** // Wavefunction(s) for diagram number 173 - VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] ); + VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 0., 0., w_fp[75] ); // Amplitude(s) for diagram number 173 - VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 173 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3886,7 +3886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 174 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3895,7 +3895,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3904,7 +3904,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3917,10 +3917,10 @@ namespace mg5amcCpu // *** DIAGRAM 175 OF 1240 *** // Wavefunction(s) for diagram number 175 - FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] ); + FFV1_2( w_fp[3], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[76] ); // Amplitude(s) for diagram number 175 - FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 175 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3934,7 +3934,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 176 - FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 176 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3950,7 +3950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 177 - FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 177 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3964,7 +3964,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 178 - FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 178 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3977,10 +3977,10 @@ namespace mg5amcCpu // *** DIAGRAM 179 OF 1240 *** // Wavefunction(s) for diagram number 179 - FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 179 - FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 179 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3994,7 +3994,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 180 - VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 180 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4010,7 +4010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 181 - FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 181 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4024,7 +4024,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 182 - FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 182 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4038,7 +4038,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 183 - VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 183 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4054,7 +4054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 184 - FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 184 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4068,7 +4068,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 185 - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 185 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4084,7 +4084,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 186 - VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 186 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4104,7 +4104,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 187 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 187 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4117,10 +4117,10 @@ namespace mg5amcCpu // *** DIAGRAM 188 OF 1240 *** // Wavefunction(s) for diagram number 188 - FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 188 - FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 188 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4133,7 +4133,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 189 - FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 189 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4143,10 +4143,10 @@ namespace mg5amcCpu // *** DIAGRAM 190 OF 1240 *** // Wavefunction(s) for diagram number 190 - FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] ); + FFV1_2( w_fp[46], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[78] ); // Amplitude(s) for diagram number 190 - FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 190 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4159,7 +4159,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 191 - FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 191 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4172,7 +4172,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 192 - FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 192 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4185,7 +4185,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 193 - FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 193 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4198,7 +4198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 194 - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 194 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4212,7 +4212,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 195 - VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 195 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4228,7 +4228,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 196 - FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 196 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4242,7 +4242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 197 - FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 197 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4255,7 +4255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 198 - FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 198 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4265,10 +4265,10 @@ namespace mg5amcCpu // *** DIAGRAM 199 OF 1240 *** // Wavefunction(s) for diagram number 199 - FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); + FFV1_2( w_fp[38], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 199 - FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 199 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4281,7 +4281,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 200 - FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 200 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4294,7 +4294,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 201 - FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 201 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4307,7 +4307,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 202 - FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 202 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4320,7 +4320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 203 - FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 203 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4334,7 +4334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 204 - VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 204 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4350,7 +4350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 205 - FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 205 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4364,7 +4364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 206 - FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 206 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4377,7 +4377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 207 - FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 207 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4387,10 +4387,10 @@ namespace mg5amcCpu // *** DIAGRAM 208 OF 1240 *** // Wavefunction(s) for diagram number 208 - FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[41], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 208 - FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 208 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4403,7 +4403,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 209 - FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 209 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4416,7 +4416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 210 - FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 210 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4429,7 +4429,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 211 - FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 211 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4442,7 +4442,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 212 - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 212 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4456,7 +4456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 213 - VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 213 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4472,7 +4472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 214 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 214 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4486,7 +4486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 215 - FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 215 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4500,7 +4500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 216 - FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 216 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4513,10 +4513,10 @@ namespace mg5amcCpu // *** DIAGRAM 217 OF 1240 *** // Wavefunction(s) for diagram number 217 - VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 0., 0., w_fp[59] ); // Amplitude(s) for diagram number 217 - VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 217 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4536,7 +4536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 218 - VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 218 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4556,7 +4556,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 219 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4565,7 +4565,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4574,7 +4574,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -4590,7 +4590,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 220 - FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 220 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4606,7 +4606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 221 - FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 221 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4620,7 +4620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 222 - FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 222 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4634,7 +4634,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 223 - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 223 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4647,10 +4647,10 @@ namespace mg5amcCpu // *** DIAGRAM 224 OF 1240 *** // Wavefunction(s) for diagram number 224 - VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 224 - VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 224 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4670,7 +4670,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 225 - VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 225 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4690,7 +4690,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 226 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4699,7 +4699,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4708,7 +4708,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4724,7 +4724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 227 - FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 227 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4740,7 +4740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 228 - FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 228 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4754,7 +4754,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 229 - FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 229 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4768,7 +4768,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 230 - FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 230 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4781,10 +4781,10 @@ namespace mg5amcCpu // *** DIAGRAM 231 OF 1240 *** // Wavefunction(s) for diagram number 231 - VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] ); + VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 231 - VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 231 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4804,7 +4804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 232 - VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 232 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4824,7 +4824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 233 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4833,7 +4833,7 @@ namespace mg5amcCpu jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4842,7 +4842,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -4858,7 +4858,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 234 - FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 234 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4874,7 +4874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 235 - FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 235 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4885,12 +4885,12 @@ namespace mg5amcCpu // *** DIAGRAM 236 OF 1240 *** // Wavefunction(s) for diagram number 236 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[73] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[79] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[80] ); // Amplitude(s) for diagram number 236 - VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4899,7 +4899,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4908,7 +4908,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -4924,17 +4924,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 237 - FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -4946,17 +4946,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 238 - FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -4965,12 +4965,12 @@ namespace mg5amcCpu // *** DIAGRAM 239 OF 1240 *** // Wavefunction(s) for diagram number 239 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[57] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[81] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[82] ); // Amplitude(s) for diagram number 239 - VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4979,7 +4979,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4988,7 +4988,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5004,17 +5004,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 240 - FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -5026,17 +5026,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 241 - FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -5045,12 +5045,12 @@ namespace mg5amcCpu // *** DIAGRAM 242 OF 1240 *** // Wavefunction(s) for diagram number 242 - VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] ); - VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] ); - VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] ); + VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[55] ); + VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[83] ); + VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[84] ); // Amplitude(s) for diagram number 242 - VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5059,7 +5059,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5068,7 +5068,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5084,17 +5084,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 243 - FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -5106,17 +5106,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 244 - FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -5128,17 +5128,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 245 - FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -5150,7 +5150,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 246 - VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5159,7 +5159,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5168,7 +5168,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5181,13 +5181,13 @@ namespace mg5amcCpu // *** DIAGRAM 247 OF 1240 *** // Wavefunction(s) for diagram number 247 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); - FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_2( w_fp[62], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[77], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 247 - FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 247 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5197,10 +5197,10 @@ namespace mg5amcCpu // *** DIAGRAM 248 OF 1240 *** // Wavefunction(s) for diagram number 248 - FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] ); + FFV1_1( w_fp[77], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[85] ); // Amplitude(s) for diagram number 248 - FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 248 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5210,11 +5210,11 @@ namespace mg5amcCpu // *** DIAGRAM 249 OF 1240 *** // Wavefunction(s) for diagram number 249 - FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); - FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] ); + FFV1_2( w_fp[62], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); + FFV1_1( w_fp[77], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[87] ); // Amplitude(s) for diagram number 249 - FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 249 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5227,7 +5227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 250 - FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 250 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5237,10 +5237,10 @@ namespace mg5amcCpu // *** DIAGRAM 251 OF 1240 *** // Wavefunction(s) for diagram number 251 - FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[62], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 251 - FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 251 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5253,7 +5253,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 252 - FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 252 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5263,10 +5263,10 @@ namespace mg5amcCpu // *** DIAGRAM 253 OF 1240 *** // Wavefunction(s) for diagram number 253 - FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] ); + FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 0., 0., w_fp[89] ); // Amplitude(s) for diagram number 253 - VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 253 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5279,10 +5279,10 @@ namespace mg5amcCpu // *** DIAGRAM 254 OF 1240 *** // Wavefunction(s) for diagram number 254 - FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[62], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 254 - FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 254 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5296,7 +5296,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 255 - FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 255 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5310,7 +5310,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 256 - VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 256 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5323,10 +5323,10 @@ namespace mg5amcCpu // *** DIAGRAM 257 OF 1240 *** // Wavefunction(s) for diagram number 257 - FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); + FFV1_2( w_fp[62], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 257 - FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 257 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5340,7 +5340,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 258 - FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 258 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5354,7 +5354,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 259 - VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 259 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5370,7 +5370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 260 - FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 260 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5381,10 +5381,10 @@ namespace mg5amcCpu // *** DIAGRAM 261 OF 1240 *** // Wavefunction(s) for diagram number 261 - FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); + FFV1_2( w_fp[62], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 261 - FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 261 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5398,17 +5398,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 262 - FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); jamp_sv[33] += amp_sv[0]; jamp_sv[35] -= amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); jamp_sv[35] -= amp_sv[0]; jamp_sv[39] += amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[45] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); jamp_sv[33] -= amp_sv[0]; jamp_sv[39] += amp_sv[0]; jamp_sv[45] += amp_sv[0]; @@ -5417,10 +5417,10 @@ namespace mg5amcCpu // *** DIAGRAM 263 OF 1240 *** // Wavefunction(s) for diagram number 263 - FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] ); + FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 0., 0., w_fp[92] ); // Amplitude(s) for diagram number 263 - VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 263 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5440,7 +5440,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 264 - VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 264 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5460,7 +5460,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 265 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5469,7 +5469,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5478,7 +5478,7 @@ namespace mg5amcCpu jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5491,10 +5491,10 @@ namespace mg5amcCpu // *** DIAGRAM 266 OF 1240 *** // Wavefunction(s) for diagram number 266 - FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] ); + FFV1_1( w_fp[2], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[93] ); // Amplitude(s) for diagram number 266 - FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 266 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5508,7 +5508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 267 - FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 267 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5524,7 +5524,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 268 - FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 268 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5538,7 +5538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 269 - FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 269 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5551,10 +5551,10 @@ namespace mg5amcCpu // *** DIAGRAM 270 OF 1240 *** // Wavefunction(s) for diagram number 270 - FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); + FFV1_2( w_fp[62], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 270 - FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 270 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5565,10 +5565,10 @@ namespace mg5amcCpu // *** DIAGRAM 271 OF 1240 *** // Wavefunction(s) for diagram number 271 - FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] ); + FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 271 - VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 271 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5584,7 +5584,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 272 - FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 272 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5598,7 +5598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 273 - FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 273 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5609,10 +5609,10 @@ namespace mg5amcCpu // *** DIAGRAM 274 OF 1240 *** // Wavefunction(s) for diagram number 274 - FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] ); + FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 274 - VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 274 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5628,7 +5628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 275 - FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 275 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5642,7 +5642,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 276 - FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 276 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5658,7 +5658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 277 - VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 277 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5678,7 +5678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 278 - FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 278 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5694,7 +5694,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 279 - VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 279 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5714,7 +5714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 280 - VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 280 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5734,7 +5734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 281 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5743,7 +5743,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5752,7 +5752,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5765,10 +5765,10 @@ namespace mg5amcCpu // *** DIAGRAM 282 OF 1240 *** // Wavefunction(s) for diagram number 282 - FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); + FFV1_1( w_fp[2], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 282 - FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 282 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5782,7 +5782,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 283 - FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 283 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5798,7 +5798,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 284 - FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 284 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5812,7 +5812,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 285 - FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 285 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5825,10 +5825,10 @@ namespace mg5amcCpu // *** DIAGRAM 286 OF 1240 *** // Wavefunction(s) for diagram number 286 - FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); + FFV1_2( w_fp[62], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 286 - FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 286 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5839,10 +5839,10 @@ namespace mg5amcCpu // *** DIAGRAM 287 OF 1240 *** // Wavefunction(s) for diagram number 287 - FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] ); + FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 287 - VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 287 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5858,7 +5858,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 288 - FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 288 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5872,7 +5872,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 289 - FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 289 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5886,7 +5886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 290 - VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 290 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5902,7 +5902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 291 - FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 291 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5916,7 +5916,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 292 - FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 292 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5932,7 +5932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 293 - VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 293 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5952,7 +5952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 294 - FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 294 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5968,7 +5968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 295 - VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 295 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5988,7 +5988,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 296 - VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 296 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6008,7 +6008,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 297 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6017,7 +6017,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6026,7 +6026,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6039,10 +6039,10 @@ namespace mg5amcCpu // *** DIAGRAM 298 OF 1240 *** // Wavefunction(s) for diagram number 298 - FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); + FFV1_1( w_fp[2], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 298 - FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 298 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6056,7 +6056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 299 - FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 299 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6072,7 +6072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 300 - FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 300 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6086,7 +6086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 301 - FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 301 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6099,10 +6099,10 @@ namespace mg5amcCpu // *** DIAGRAM 302 OF 1240 *** // Wavefunction(s) for diagram number 302 - FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 302 - FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 302 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6116,7 +6116,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 303 - VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 303 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6132,7 +6132,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 304 - FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 304 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6146,7 +6146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 305 - FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 305 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6160,7 +6160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 306 - VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 306 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6176,7 +6176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 307 - FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 307 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6190,7 +6190,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 308 - FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 308 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6206,7 +6206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 309 - VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 309 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6226,7 +6226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 310 - FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 310 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6239,10 +6239,10 @@ namespace mg5amcCpu // *** DIAGRAM 311 OF 1240 *** // Wavefunction(s) for diagram number 311 - FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 311 - FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 311 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6255,7 +6255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 312 - FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 312 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6265,10 +6265,10 @@ namespace mg5amcCpu // *** DIAGRAM 313 OF 1240 *** // Wavefunction(s) for diagram number 313 - FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] ); + FFV1_1( w_fp[33], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[100] ); // Amplitude(s) for diagram number 313 - FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 313 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6281,7 +6281,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 314 - FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 314 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6294,7 +6294,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 315 - FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 315 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6307,7 +6307,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 316 - FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 316 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6320,7 +6320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 317 - FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 317 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6334,7 +6334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 318 - VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 318 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6350,7 +6350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 319 - FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 319 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6364,7 +6364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 320 - FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 320 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6377,7 +6377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 321 - FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 321 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6387,10 +6387,10 @@ namespace mg5amcCpu // *** DIAGRAM 322 OF 1240 *** // Wavefunction(s) for diagram number 322 - FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); + FFV1_1( w_fp[39], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 322 - FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 322 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6403,7 +6403,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 323 - FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 323 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6416,7 +6416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 324 - FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 324 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6429,7 +6429,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 325 - FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 325 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6442,7 +6442,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 326 - FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 326 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6456,7 +6456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 327 - VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 327 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6472,7 +6472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 328 - FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 328 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6486,7 +6486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 329 - FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 329 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6499,7 +6499,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 330 - FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 330 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6509,10 +6509,10 @@ namespace mg5amcCpu // *** DIAGRAM 331 OF 1240 *** // Wavefunction(s) for diagram number 331 - FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); + FFV1_1( w_fp[47], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 331 - FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 331 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6525,7 +6525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 332 - FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 332 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6538,7 +6538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 333 - FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 333 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6551,7 +6551,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 334 - FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 334 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6564,7 +6564,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 335 - FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 335 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6578,7 +6578,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 336 - VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 336 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6594,7 +6594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 337 - FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 337 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6608,7 +6608,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 338 - FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 338 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6622,7 +6622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 339 - FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 339 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6638,7 +6638,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 340 - VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 340 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6658,7 +6658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 341 - VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 341 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6678,7 +6678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 342 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6687,7 +6687,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6696,7 +6696,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6712,7 +6712,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 343 - FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 343 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6728,7 +6728,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 344 - FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 344 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6742,7 +6742,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 345 - FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 345 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6756,7 +6756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 346 - FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 346 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6772,7 +6772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 347 - VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 347 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6792,7 +6792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 348 - VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 348 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6812,7 +6812,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 349 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6821,7 +6821,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6830,7 +6830,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6846,7 +6846,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 350 - FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 350 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6862,7 +6862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 351 - FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 351 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6876,7 +6876,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 352 - FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 352 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6890,7 +6890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 353 - FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 353 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6906,7 +6906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 354 - VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 354 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6926,7 +6926,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 355 - VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 355 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6946,7 +6946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 356 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6955,7 +6955,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6964,7 +6964,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6980,7 +6980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 357 - FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 357 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6996,7 +6996,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 358 - FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 358 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7010,7 +7010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 359 - VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7019,7 +7019,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7028,7 +7028,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7044,17 +7044,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 360 - FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); jamp_sv[33] += amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); jamp_sv[39] -= amp_sv[0]; jamp_sv[57] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[81] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); jamp_sv[33] -= amp_sv[0]; jamp_sv[57] += amp_sv[0]; jamp_sv[81] += amp_sv[0]; @@ -7066,17 +7066,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 361 - FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], &_fp[0] ); jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], &_fp[0] ); jamp_sv[107] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], &_fp[0] ); jamp_sv[105] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; @@ -7088,7 +7088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 362 - VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7097,7 +7097,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7106,7 +7106,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7122,17 +7122,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 363 - FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); jamp_sv[35] += amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); jamp_sv[45] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); jamp_sv[35] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; @@ -7144,17 +7144,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 364 - FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], &_fp[0] ); jamp_sv[81] += amp_sv[0]; jamp_sv[83] -= amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], &_fp[0] ); jamp_sv[83] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], &_fp[0] ); jamp_sv[81] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; @@ -7166,7 +7166,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 365 - VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7175,7 +7175,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7184,7 +7184,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7200,17 +7200,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 366 - FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); jamp_sv[41] += amp_sv[0]; jamp_sv[47] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); jamp_sv[47] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); jamp_sv[41] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; jamp_sv[107] += amp_sv[0]; @@ -7222,17 +7222,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 367 - FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], &_fp[0] ); jamp_sv[57] += amp_sv[0]; jamp_sv[59] -= amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], &_fp[0] ); jamp_sv[59] -= amp_sv[0]; jamp_sv[63] += amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[69] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], &_fp[0] ); jamp_sv[57] -= amp_sv[0]; jamp_sv[63] += amp_sv[0]; jamp_sv[69] += amp_sv[0]; @@ -7244,17 +7244,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 368 - FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); jamp_sv[65] += amp_sv[0]; jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); jamp_sv[71] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); jamp_sv[65] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; @@ -7266,7 +7266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 369 - VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7275,7 +7275,7 @@ namespace mg5amcCpu jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7284,7 +7284,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7297,11 +7297,11 @@ namespace mg5amcCpu // *** DIAGRAM 370 OF 1240 *** // Wavefunction(s) for diagram number 370 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 370 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 370 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7315,7 +7315,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 371 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 371 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7326,11 +7326,11 @@ namespace mg5amcCpu // *** DIAGRAM 372 OF 1240 *** // Wavefunction(s) for diagram number 372 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] ); - FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[62] ); + FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 0., 0., w_fp[34] ); // Amplitude(s) for diagram number 372 - VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 372 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7350,7 +7350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 373 - FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 373 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7363,10 +7363,10 @@ namespace mg5amcCpu // *** DIAGRAM 374 OF 1240 *** // Wavefunction(s) for diagram number 374 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 374 - VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 374 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7386,7 +7386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 375 - FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 375 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7399,12 +7399,12 @@ namespace mg5amcCpu // *** DIAGRAM 376 OF 1240 *** // Wavefunction(s) for diagram number 376 - VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); - VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); + VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); + VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 376 - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7413,7 +7413,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7422,7 +7422,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7435,10 +7435,10 @@ namespace mg5amcCpu // *** DIAGRAM 377 OF 1240 *** // Wavefunction(s) for diagram number 377 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[95] ); // Amplitude(s) for diagram number 377 - FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 377 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7449,10 +7449,10 @@ namespace mg5amcCpu // *** DIAGRAM 378 OF 1240 *** // Wavefunction(s) for diagram number 378 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 378 - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 378 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7466,7 +7466,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 379 - FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 379 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7482,7 +7482,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 380 - FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 380 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7493,10 +7493,10 @@ namespace mg5amcCpu // *** DIAGRAM 381 OF 1240 *** // Wavefunction(s) for diagram number 381 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[101] ); // Amplitude(s) for diagram number 381 - FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 381 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7510,7 +7510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 382 - FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 382 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7526,7 +7526,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 383 - FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 383 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7542,7 +7542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 384 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 384 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7555,10 +7555,10 @@ namespace mg5amcCpu // *** DIAGRAM 385 OF 1240 *** // Wavefunction(s) for diagram number 385 - VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] ); + VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 385 - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 385 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7575,10 +7575,10 @@ namespace mg5amcCpu // *** DIAGRAM 386 OF 1240 *** // Wavefunction(s) for diagram number 386 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 386 - FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 386 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7592,7 +7592,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 387 - FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 387 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7603,10 +7603,10 @@ namespace mg5amcCpu // *** DIAGRAM 388 OF 1240 *** // Wavefunction(s) for diagram number 388 - FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] ); + FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 0., 0., w_fp[103] ); // Amplitude(s) for diagram number 388 - VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 388 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7626,7 +7626,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 389 - FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 389 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7642,7 +7642,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 390 - VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 390 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7662,7 +7662,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 391 - FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 391 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7678,7 +7678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 392 - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7687,7 +7687,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7696,7 +7696,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7709,10 +7709,10 @@ namespace mg5amcCpu // *** DIAGRAM 393 OF 1240 *** // Wavefunction(s) for diagram number 393 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 393 - FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 393 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7723,10 +7723,10 @@ namespace mg5amcCpu // *** DIAGRAM 394 OF 1240 *** // Wavefunction(s) for diagram number 394 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[105] ); // Amplitude(s) for diagram number 394 - FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 394 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7740,7 +7740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 395 - FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 395 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7756,7 +7756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 396 - FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 396 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7767,10 +7767,10 @@ namespace mg5amcCpu // *** DIAGRAM 397 OF 1240 *** // Wavefunction(s) for diagram number 397 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 397 - FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 397 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7784,7 +7784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 398 - FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 398 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7800,7 +7800,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 399 - FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 399 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7816,7 +7816,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 400 - FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 400 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7832,7 +7832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 401 - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 401 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7852,7 +7852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 402 - FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 402 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7868,7 +7868,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 403 - FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 403 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7888,7 +7888,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 404 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 404 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7904,7 +7904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 405 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 405 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7924,7 +7924,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 406 - FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 406 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7944,7 +7944,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 407 - FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 407 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7964,7 +7964,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 408 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -7981,7 +7981,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -7998,7 +7998,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -8019,10 +8019,10 @@ namespace mg5amcCpu // *** DIAGRAM 409 OF 1240 *** // Wavefunction(s) for diagram number 409 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 409 - VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 409 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8047,10 +8047,10 @@ namespace mg5amcCpu // *** DIAGRAM 410 OF 1240 *** // Wavefunction(s) for diagram number 410 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 410 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 410 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8078,7 +8078,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 411 - VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 411 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8106,7 +8106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 412 - FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 412 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8126,7 +8126,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 413 - FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 413 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8142,7 +8142,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 414 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 414 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8158,7 +8158,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 415 - FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 415 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8178,7 +8178,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 416 - FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 416 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8194,7 +8194,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 417 - FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 417 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8210,7 +8210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 418 - FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 418 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8226,7 +8226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 419 - FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 419 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8246,7 +8246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 420 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 420 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8262,7 +8262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 421 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 421 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8282,7 +8282,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 422 - FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 422 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8302,7 +8302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 423 - FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 423 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8322,7 +8322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 424 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -8339,7 +8339,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -8356,7 +8356,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -8377,10 +8377,10 @@ namespace mg5amcCpu // *** DIAGRAM 425 OF 1240 *** // Wavefunction(s) for diagram number 425 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 425 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 425 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8408,7 +8408,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 426 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 426 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8436,7 +8436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 427 - VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 427 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8464,7 +8464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 428 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 428 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8484,7 +8484,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 429 - FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 429 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8500,7 +8500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 430 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 430 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8516,7 +8516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 431 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 431 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8536,7 +8536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 432 - FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 432 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8552,7 +8552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 433 - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 433 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8565,10 +8565,10 @@ namespace mg5amcCpu // *** DIAGRAM 434 OF 1240 *** // Wavefunction(s) for diagram number 434 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 434 - VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 434 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8596,7 +8596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 435 - VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 435 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8624,7 +8624,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 436 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -8641,7 +8641,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8658,7 +8658,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -8679,10 +8679,10 @@ namespace mg5amcCpu // *** DIAGRAM 437 OF 1240 *** // Wavefunction(s) for diagram number 437 - VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] ); + VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 0., 0., w_fp[108] ); // Amplitude(s) for diagram number 437 - VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 437 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8710,7 +8710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 438 - VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 438 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8738,7 +8738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 439 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -8755,7 +8755,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[115] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -8772,7 +8772,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8796,7 +8796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 440 - VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 440 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8824,7 +8824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 441 - VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 441 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8852,7 +8852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 442 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -8869,7 +8869,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -8886,7 +8886,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -8907,12 +8907,12 @@ namespace mg5amcCpu // *** DIAGRAM 443 OF 1240 *** // Wavefunction(s) for diagram number 443 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 443 - VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -8929,7 +8929,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8946,7 +8946,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8967,12 +8967,12 @@ namespace mg5amcCpu // *** DIAGRAM 444 OF 1240 *** // Wavefunction(s) for diagram number 444 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[113] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[114] ); // Amplitude(s) for diagram number 444 - VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -8989,7 +8989,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -9006,7 +9006,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -9030,7 +9030,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 445 - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -9047,7 +9047,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -9064,7 +9064,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -9088,7 +9088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 446 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -9105,7 +9105,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; @@ -9122,7 +9122,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -9146,7 +9146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 447 - VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 447 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9174,7 +9174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 448 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 448 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9202,7 +9202,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 449 - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 449 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9230,7 +9230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 450 - VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 450 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9250,7 +9250,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 451 - FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 451 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9266,7 +9266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 452 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 452 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9280,7 +9280,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 453 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 453 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9294,7 +9294,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 454 - FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 454 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9310,7 +9310,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 455 - VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 455 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9330,7 +9330,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 456 - FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9339,7 +9339,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], &_fp[0] ); jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9348,7 +9348,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], &_fp[0] ); jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9364,7 +9364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 457 - FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 457 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9380,7 +9380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 458 - FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 458 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9394,7 +9394,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 459 - FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 459 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9408,7 +9408,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 460 - VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 460 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9428,7 +9428,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 461 - FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 461 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9444,7 +9444,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 462 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 462 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9458,7 +9458,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 463 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 463 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9472,7 +9472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 464 - FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 464 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9488,7 +9488,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 465 - VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 465 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9508,7 +9508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 466 - FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], &_fp[0] ); jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9517,7 +9517,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], &_fp[0] ); jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9526,7 +9526,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9542,7 +9542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 467 - FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 467 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9558,7 +9558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 468 - FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 468 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9572,7 +9572,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 469 - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 469 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9586,7 +9586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 470 - VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 470 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9606,7 +9606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 471 - FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 471 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9622,7 +9622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 472 - FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 472 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9636,7 +9636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 473 - FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 473 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9650,7 +9650,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 474 - FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 474 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9666,7 +9666,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 475 - VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 475 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9686,7 +9686,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 476 - FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9695,7 +9695,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9704,7 +9704,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9720,7 +9720,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 477 - VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 477 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9740,7 +9740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 478 - FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 478 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9756,7 +9756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 479 - FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 479 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9770,7 +9770,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 480 - FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 480 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9784,7 +9784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 481 - FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 481 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9800,7 +9800,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 482 - VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 482 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9820,7 +9820,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 483 - FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9829,7 +9829,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9838,7 +9838,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9854,7 +9854,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 484 - FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 484 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9874,7 +9874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 485 - FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 485 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9894,7 +9894,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 486 - FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 486 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9914,7 +9914,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 487 - FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 487 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9930,7 +9930,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 488 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 488 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9950,7 +9950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 489 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 489 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9966,7 +9966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 490 - FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], &_fp[0] ); jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9975,7 +9975,7 @@ namespace mg5amcCpu jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9984,7 +9984,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -10000,7 +10000,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 491 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10009,7 +10009,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -10018,7 +10018,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -10034,7 +10034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 492 - VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -10051,7 +10051,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -10068,7 +10068,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -10089,11 +10089,11 @@ namespace mg5amcCpu // *** DIAGRAM 493 OF 1240 *** // Wavefunction(s) for diagram number 493 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 493 - FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 493 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10107,7 +10107,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 494 - FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 494 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10118,10 +10118,10 @@ namespace mg5amcCpu // *** DIAGRAM 495 OF 1240 *** // Wavefunction(s) for diagram number 495 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 495 - VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 495 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10141,7 +10141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 496 - FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 496 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10154,10 +10154,10 @@ namespace mg5amcCpu // *** DIAGRAM 497 OF 1240 *** // Wavefunction(s) for diagram number 497 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 497 - VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 497 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10177,7 +10177,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 498 - FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 498 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10190,12 +10190,12 @@ namespace mg5amcCpu // *** DIAGRAM 499 OF 1240 *** // Wavefunction(s) for diagram number 499 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 499 - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10204,7 +10204,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10213,7 +10213,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10226,10 +10226,10 @@ namespace mg5amcCpu // *** DIAGRAM 500 OF 1240 *** // Wavefunction(s) for diagram number 500 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 500 - FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 500 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10240,10 +10240,10 @@ namespace mg5amcCpu // *** DIAGRAM 501 OF 1240 *** // Wavefunction(s) for diagram number 501 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 501 - FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 501 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10257,7 +10257,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 502 - FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 502 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10273,7 +10273,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 503 - FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 503 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10284,10 +10284,10 @@ namespace mg5amcCpu // *** DIAGRAM 504 OF 1240 *** // Wavefunction(s) for diagram number 504 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 504 - FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 504 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10301,7 +10301,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 505 - FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 505 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10317,7 +10317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 506 - FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 506 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10333,7 +10333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 507 - FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 507 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10346,10 +10346,10 @@ namespace mg5amcCpu // *** DIAGRAM 508 OF 1240 *** // Wavefunction(s) for diagram number 508 - VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 0., 0., w_fp[62] ); // Amplitude(s) for diagram number 508 - FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 508 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10366,10 +10366,10 @@ namespace mg5amcCpu // *** DIAGRAM 509 OF 1240 *** // Wavefunction(s) for diagram number 509 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[112] ); // Amplitude(s) for diagram number 509 - FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 509 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10383,7 +10383,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 510 - FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 510 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10397,7 +10397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 511 - VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 511 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10417,7 +10417,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 512 - FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 512 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10433,7 +10433,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 513 - VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 513 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10453,7 +10453,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 514 - FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 514 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10469,7 +10469,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 515 - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10478,7 +10478,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10487,7 +10487,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10500,10 +10500,10 @@ namespace mg5amcCpu // *** DIAGRAM 516 OF 1240 *** // Wavefunction(s) for diagram number 516 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); // Amplitude(s) for diagram number 516 - FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 516 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10514,10 +10514,10 @@ namespace mg5amcCpu // *** DIAGRAM 517 OF 1240 *** // Wavefunction(s) for diagram number 517 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 517 - FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 517 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10531,7 +10531,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 518 - FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 518 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10547,7 +10547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 519 - FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 519 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10558,10 +10558,10 @@ namespace mg5amcCpu // *** DIAGRAM 520 OF 1240 *** // Wavefunction(s) for diagram number 520 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 520 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 520 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10575,7 +10575,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 521 - FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 521 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10591,7 +10591,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 522 - FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 522 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10607,7 +10607,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 523 - FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 523 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10623,7 +10623,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 524 - FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 524 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10643,7 +10643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 525 - FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 525 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10659,7 +10659,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 526 - FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 526 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10679,7 +10679,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 527 - FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 527 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10695,7 +10695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 528 - FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 528 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10715,7 +10715,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 529 - FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 529 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10735,7 +10735,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 530 - FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 530 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10755,7 +10755,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 531 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -10772,7 +10772,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -10789,7 +10789,7 @@ namespace mg5amcCpu jamp_sv[105] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -10810,10 +10810,10 @@ namespace mg5amcCpu // *** DIAGRAM 532 OF 1240 *** // Wavefunction(s) for diagram number 532 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 532 - VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 532 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10838,10 +10838,10 @@ namespace mg5amcCpu // *** DIAGRAM 533 OF 1240 *** // Wavefunction(s) for diagram number 533 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 533 - VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 533 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10869,7 +10869,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 534 - VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 534 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10897,7 +10897,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 535 - FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 535 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10917,7 +10917,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 536 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 536 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10933,7 +10933,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 537 - FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 537 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10949,7 +10949,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 538 - FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 538 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10969,7 +10969,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 539 - FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 539 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10985,7 +10985,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 540 - FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 540 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11001,7 +11001,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 541 - FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 541 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11017,7 +11017,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 542 - FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 542 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11037,7 +11037,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 543 - FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 543 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11053,7 +11053,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 544 - FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 544 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11073,7 +11073,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 545 - FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 545 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11093,7 +11093,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 546 - FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 546 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11113,7 +11113,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 547 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -11130,7 +11130,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -11147,7 +11147,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -11168,10 +11168,10 @@ namespace mg5amcCpu // *** DIAGRAM 548 OF 1240 *** // Wavefunction(s) for diagram number 548 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 548 - VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 548 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11199,7 +11199,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 549 - VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 549 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11227,7 +11227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 550 - VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 550 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11255,7 +11255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 551 - FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 551 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11275,7 +11275,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 552 - FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 552 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11291,7 +11291,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 553 - FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 553 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11307,7 +11307,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 554 - FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 554 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11327,7 +11327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 555 - FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 555 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11343,7 +11343,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 556 - FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 556 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11356,10 +11356,10 @@ namespace mg5amcCpu // *** DIAGRAM 557 OF 1240 *** // Wavefunction(s) for diagram number 557 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 557 - VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 557 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11387,7 +11387,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 558 - VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 558 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11415,7 +11415,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 559 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11432,7 +11432,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -11449,7 +11449,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -11473,7 +11473,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 560 - VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 560 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11501,7 +11501,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 561 - VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 561 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11529,7 +11529,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 562 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11546,7 +11546,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -11563,7 +11563,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -11587,7 +11587,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 563 - VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 563 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11615,7 +11615,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 564 - VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 564 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11643,7 +11643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 565 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11660,7 +11660,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11677,7 +11677,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11698,12 +11698,12 @@ namespace mg5amcCpu // *** DIAGRAM 566 OF 1240 *** // Wavefunction(s) for diagram number 566 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 566 - VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -11720,7 +11720,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -11737,7 +11737,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -11758,12 +11758,12 @@ namespace mg5amcCpu // *** DIAGRAM 567 OF 1240 *** // Wavefunction(s) for diagram number 567 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); // Amplitude(s) for diagram number 567 - VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -11780,7 +11780,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11797,7 +11797,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11821,7 +11821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 568 - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -11838,7 +11838,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11855,7 +11855,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11879,7 +11879,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 569 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[26] -= amp_sv[0]; @@ -11896,7 +11896,7 @@ namespace mg5amcCpu jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; @@ -11913,7 +11913,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -11937,7 +11937,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 570 - VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 570 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11965,7 +11965,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 571 - VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 571 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11993,7 +11993,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 572 - VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 572 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12021,7 +12021,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 573 - VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 573 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12041,7 +12041,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 574 - FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 574 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12057,7 +12057,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 575 - FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 575 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12071,7 +12071,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 576 - FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 576 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12085,7 +12085,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 577 - FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 577 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12101,7 +12101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 578 - VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 578 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12121,7 +12121,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 579 - FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12130,7 +12130,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12139,7 +12139,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], &_fp[0] ); jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12155,7 +12155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 580 - FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 580 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12171,7 +12171,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 581 - FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 581 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12185,7 +12185,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 582 - FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 582 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12199,7 +12199,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 583 - VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 583 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12219,7 +12219,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 584 - FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 584 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12235,7 +12235,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 585 - FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 585 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12249,7 +12249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 586 - FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 586 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12263,7 +12263,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 587 - FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 587 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12279,7 +12279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 588 - VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 588 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12299,7 +12299,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 589 - FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], &_fp[0] ); jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12308,7 +12308,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], &_fp[0] ); jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12317,7 +12317,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], &_fp[0] ); jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12333,7 +12333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 590 - FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 590 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12349,7 +12349,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 591 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 591 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12363,7 +12363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 592 - FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 592 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12377,7 +12377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 593 - VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 593 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12397,7 +12397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 594 - FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 594 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12413,7 +12413,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 595 - FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 595 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12427,7 +12427,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 596 - FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 596 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12441,7 +12441,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 597 - FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 597 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12457,7 +12457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 598 - VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 598 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12477,7 +12477,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 599 - FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12486,7 +12486,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12495,7 +12495,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12511,7 +12511,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 600 - VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 600 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12531,7 +12531,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 601 - FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 601 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12547,7 +12547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 602 - FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 602 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12561,7 +12561,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 603 - FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 603 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12575,7 +12575,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 604 - FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 604 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12591,7 +12591,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 605 - VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 605 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12611,7 +12611,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 606 - FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12620,7 +12620,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12629,7 +12629,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12645,7 +12645,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 607 - FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 607 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12665,7 +12665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 608 - FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 608 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12685,7 +12685,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 609 - FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 609 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12705,7 +12705,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 610 - FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 610 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12721,7 +12721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 611 - FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 611 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12741,7 +12741,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 612 - FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 612 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12757,7 +12757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 613 - FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], &_fp[0] ); jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12766,7 +12766,7 @@ namespace mg5amcCpu jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12775,7 +12775,7 @@ namespace mg5amcCpu jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -12791,7 +12791,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 614 - FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12800,7 +12800,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -12809,7 +12809,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -12825,7 +12825,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 615 - VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -12842,7 +12842,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -12859,7 +12859,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -12880,11 +12880,11 @@ namespace mg5amcCpu // *** DIAGRAM 616 OF 1240 *** // Wavefunction(s) for diagram number 616 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 616 - FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 616 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12898,7 +12898,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 617 - FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 617 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12909,10 +12909,10 @@ namespace mg5amcCpu // *** DIAGRAM 618 OF 1240 *** // Wavefunction(s) for diagram number 618 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[112] ); // Amplitude(s) for diagram number 618 - VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 618 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12932,7 +12932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 619 - FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 619 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12945,10 +12945,10 @@ namespace mg5amcCpu // *** DIAGRAM 620 OF 1240 *** // Wavefunction(s) for diagram number 620 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 620 - VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 620 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12968,7 +12968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 621 - FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 621 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12981,12 +12981,12 @@ namespace mg5amcCpu // *** DIAGRAM 622 OF 1240 *** // Wavefunction(s) for diagram number 622 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 622 - FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], &_fp[0] ); jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12995,7 +12995,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13004,7 +13004,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], &_fp[0] ); jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13017,10 +13017,10 @@ namespace mg5amcCpu // *** DIAGRAM 623 OF 1240 *** // Wavefunction(s) for diagram number 623 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 623 - FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 623 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13031,10 +13031,10 @@ namespace mg5amcCpu // *** DIAGRAM 624 OF 1240 *** // Wavefunction(s) for diagram number 624 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 624 - FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 624 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13048,7 +13048,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 625 - FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 625 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13064,7 +13064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 626 - FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 626 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13075,10 +13075,10 @@ namespace mg5amcCpu // *** DIAGRAM 627 OF 1240 *** // Wavefunction(s) for diagram number 627 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 627 - FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 627 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13092,7 +13092,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 628 - FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 628 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13108,7 +13108,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 629 - FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 629 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13124,7 +13124,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 630 - FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 630 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13137,10 +13137,10 @@ namespace mg5amcCpu // *** DIAGRAM 631 OF 1240 *** // Wavefunction(s) for diagram number 631 - VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 631 - FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 631 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13157,10 +13157,10 @@ namespace mg5amcCpu // *** DIAGRAM 632 OF 1240 *** // Wavefunction(s) for diagram number 632 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[96] ); // Amplitude(s) for diagram number 632 - FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 632 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13174,7 +13174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 633 - FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 633 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13188,7 +13188,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 634 - VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 634 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13208,7 +13208,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 635 - FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 635 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13224,7 +13224,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 636 - VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 636 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13244,7 +13244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 637 - FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 637 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13260,7 +13260,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 638 - FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13269,7 +13269,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13278,7 +13278,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13291,10 +13291,10 @@ namespace mg5amcCpu // *** DIAGRAM 639 OF 1240 *** // Wavefunction(s) for diagram number 639 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 639 - FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 639 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13305,10 +13305,10 @@ namespace mg5amcCpu // *** DIAGRAM 640 OF 1240 *** // Wavefunction(s) for diagram number 640 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 640 - FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 640 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13322,7 +13322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 641 - FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 641 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13338,7 +13338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 642 - FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 642 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13349,10 +13349,10 @@ namespace mg5amcCpu // *** DIAGRAM 643 OF 1240 *** // Wavefunction(s) for diagram number 643 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 643 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 643 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13366,7 +13366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 644 - FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 644 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13382,7 +13382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 645 - FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 645 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13398,7 +13398,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 646 - FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 646 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13414,7 +13414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 647 - FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 647 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13434,7 +13434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 648 - FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 648 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13450,7 +13450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 649 - FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 649 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13470,7 +13470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 650 - FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 650 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13486,7 +13486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 651 - FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 651 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13506,7 +13506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 652 - FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 652 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13526,7 +13526,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 653 - FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 653 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13546,7 +13546,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 654 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -13563,7 +13563,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -13580,7 +13580,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -13601,10 +13601,10 @@ namespace mg5amcCpu // *** DIAGRAM 655 OF 1240 *** // Wavefunction(s) for diagram number 655 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 655 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 655 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13629,10 +13629,10 @@ namespace mg5amcCpu // *** DIAGRAM 656 OF 1240 *** // Wavefunction(s) for diagram number 656 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[113] ); // Amplitude(s) for diagram number 656 - VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 656 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13660,7 +13660,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 657 - VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 657 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13688,7 +13688,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 658 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 658 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13708,7 +13708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 659 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 659 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13724,7 +13724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 660 - FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 660 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13740,7 +13740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 661 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 661 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13760,7 +13760,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 662 - FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 662 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13776,7 +13776,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 663 - FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 663 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13792,7 +13792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 664 - FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 664 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13808,7 +13808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 665 - FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 665 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13828,7 +13828,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 666 - FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 666 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13844,7 +13844,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 667 - FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 667 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13864,7 +13864,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 668 - FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 668 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13884,7 +13884,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 669 - FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 669 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13904,7 +13904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 670 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -13921,7 +13921,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -13938,7 +13938,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -13959,10 +13959,10 @@ namespace mg5amcCpu // *** DIAGRAM 671 OF 1240 *** // Wavefunction(s) for diagram number 671 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 671 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 671 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13990,7 +13990,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 672 - VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 672 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14018,7 +14018,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 673 - VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 673 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14046,7 +14046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 674 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 674 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14066,7 +14066,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 675 - FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 675 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14082,7 +14082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 676 - FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 676 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14098,7 +14098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 677 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 677 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14118,7 +14118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 678 - FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 678 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14134,7 +14134,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 679 - FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 679 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14147,10 +14147,10 @@ namespace mg5amcCpu // *** DIAGRAM 680 OF 1240 *** // Wavefunction(s) for diagram number 680 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 680 - VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 680 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14178,7 +14178,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 681 - VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 681 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14206,7 +14206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 682 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -14223,7 +14223,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -14240,7 +14240,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -14264,7 +14264,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 683 - VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 683 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14292,7 +14292,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 684 - VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 684 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14320,7 +14320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 685 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14337,7 +14337,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -14354,7 +14354,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -14378,7 +14378,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 686 - VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 686 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14406,7 +14406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 687 - VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 687 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14434,7 +14434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 688 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14451,7 +14451,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14468,7 +14468,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -14489,12 +14489,12 @@ namespace mg5amcCpu // *** DIAGRAM 689 OF 1240 *** // Wavefunction(s) for diagram number 689 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[62] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 689 - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -14511,7 +14511,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -14528,7 +14528,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -14549,12 +14549,12 @@ namespace mg5amcCpu // *** DIAGRAM 690 OF 1240 *** // Wavefunction(s) for diagram number 690 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 690 - VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -14571,7 +14571,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -14588,7 +14588,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -14612,7 +14612,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 691 - VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], &_fp[0] ); jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -14629,7 +14629,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14646,7 +14646,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14670,7 +14670,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 692 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[28] -= amp_sv[0]; @@ -14687,7 +14687,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -14704,7 +14704,7 @@ namespace mg5amcCpu jamp_sv[97] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -14728,7 +14728,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 693 - VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 693 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14756,7 +14756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 694 - VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 694 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14784,7 +14784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 695 - VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 695 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14812,7 +14812,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 696 - VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 696 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14832,7 +14832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 697 - FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 697 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14848,7 +14848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 698 - FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 698 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14862,7 +14862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 699 - FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 699 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14876,7 +14876,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 700 - FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 700 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14892,7 +14892,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 701 - VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 701 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14912,7 +14912,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 702 - FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], &_fp[0] ); jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -14921,7 +14921,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], &_fp[0] ); jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -14930,7 +14930,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -14946,7 +14946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 703 - FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 703 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14962,7 +14962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 704 - FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 704 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14976,7 +14976,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 705 - FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 705 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14990,7 +14990,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 706 - VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 706 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15010,7 +15010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 707 - FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 707 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15026,7 +15026,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 708 - FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 708 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15040,7 +15040,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 709 - FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 709 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15054,7 +15054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 710 - FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 710 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15070,7 +15070,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 711 - VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 711 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15090,7 +15090,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 712 - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15099,7 +15099,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15108,7 +15108,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15124,7 +15124,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 713 - FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 713 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15140,7 +15140,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 714 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 714 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15154,7 +15154,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 715 - FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 715 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15168,7 +15168,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 716 - VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 716 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15188,7 +15188,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 717 - FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 717 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15204,7 +15204,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 718 - FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 718 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15218,7 +15218,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 719 - FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 719 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15232,7 +15232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 720 - FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 720 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15248,7 +15248,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 721 - VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 721 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15268,7 +15268,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 722 - FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15277,7 +15277,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15286,7 +15286,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15302,7 +15302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 723 - VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 723 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15322,7 +15322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 724 - FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 724 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15338,7 +15338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 725 - FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 725 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15352,7 +15352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 726 - FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 726 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15366,7 +15366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 727 - FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 727 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15382,7 +15382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 728 - VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 728 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15402,7 +15402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 729 - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15411,7 +15411,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15420,7 +15420,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15436,7 +15436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 730 - FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 730 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15456,7 +15456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 731 - FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 731 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15476,7 +15476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 732 - FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 732 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15496,7 +15496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 733 - FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 733 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15512,7 +15512,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 734 - FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 734 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15532,7 +15532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 735 - FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 735 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15548,7 +15548,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 736 - FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], &_fp[0] ); jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15557,7 +15557,7 @@ namespace mg5amcCpu jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15566,7 +15566,7 @@ namespace mg5amcCpu jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -15582,7 +15582,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 737 - FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15591,7 +15591,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -15600,7 +15600,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -15616,7 +15616,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 738 - VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -15633,7 +15633,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -15650,7 +15650,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -15671,10 +15671,10 @@ namespace mg5amcCpu // *** DIAGRAM 739 OF 1240 *** // Wavefunction(s) for diagram number 739 - FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] ); + FFV1_1( w_fp[77], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[92] ); // Amplitude(s) for diagram number 739 - FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 739 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15687,7 +15687,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 740 - FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 740 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15697,10 +15697,10 @@ namespace mg5amcCpu // *** DIAGRAM 741 OF 1240 *** // Wavefunction(s) for diagram number 741 - FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[46], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 741 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 741 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15713,7 +15713,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 742 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 742 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15726,7 +15726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 743 - FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 743 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15739,7 +15739,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 744 - FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 744 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15752,7 +15752,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 745 - FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 745 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15766,7 +15766,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 746 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 746 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15777,10 +15777,10 @@ namespace mg5amcCpu // *** DIAGRAM 747 OF 1240 *** // Wavefunction(s) for diagram number 747 - VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] ); + VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 747 - FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 747 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15796,7 +15796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 748 - FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 748 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15809,7 +15809,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 749 - FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 749 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15819,10 +15819,10 @@ namespace mg5amcCpu // *** DIAGRAM 750 OF 1240 *** // Wavefunction(s) for diagram number 750 - FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[38], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 750 - FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 750 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15835,7 +15835,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 751 - FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 751 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15848,7 +15848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 752 - FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 752 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15861,7 +15861,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 753 - FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 753 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15874,7 +15874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 754 - FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 754 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15888,7 +15888,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 755 - FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 755 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15899,10 +15899,10 @@ namespace mg5amcCpu // *** DIAGRAM 756 OF 1240 *** // Wavefunction(s) for diagram number 756 - VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 756 - FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 756 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15918,7 +15918,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 757 - FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 757 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15931,7 +15931,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 758 - FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 758 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15941,10 +15941,10 @@ namespace mg5amcCpu // *** DIAGRAM 759 OF 1240 *** // Wavefunction(s) for diagram number 759 - FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); + FFV1_2( w_fp[41], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 759 - FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 759 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15957,7 +15957,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 760 - FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 760 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15970,7 +15970,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 761 - FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 761 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15983,7 +15983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 762 - FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 762 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15996,7 +15996,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 763 - FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 763 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16010,7 +16010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 764 - FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 764 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16021,10 +16021,10 @@ namespace mg5amcCpu // *** DIAGRAM 765 OF 1240 *** // Wavefunction(s) for diagram number 765 - VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 765 - FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 765 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16040,7 +16040,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 766 - FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 766 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16054,7 +16054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 767 - FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 767 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16070,7 +16070,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 768 - VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 768 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16090,7 +16090,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 769 - FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 769 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16106,7 +16106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 770 - VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 770 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16126,7 +16126,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 771 - FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 771 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16137,12 +16137,12 @@ namespace mg5amcCpu // *** DIAGRAM 772 OF 1240 *** // Wavefunction(s) for diagram number 772 - VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] ); - VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); - VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[85] ); + VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); + VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 772 - FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16151,7 +16151,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16160,7 +16160,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16176,7 +16176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 773 - FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 773 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16190,7 +16190,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 774 - FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 774 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16206,7 +16206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 775 - VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 775 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16226,7 +16226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 776 - FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 776 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16242,7 +16242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 777 - VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 777 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16262,7 +16262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 778 - FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 778 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16273,12 +16273,12 @@ namespace mg5amcCpu // *** DIAGRAM 779 OF 1240 *** // Wavefunction(s) for diagram number 779 - VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] ); - VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[9] ); + VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 779 - FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], &_fp[0] ); jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16287,7 +16287,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16296,7 +16296,7 @@ namespace mg5amcCpu jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16312,7 +16312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 780 - FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 780 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16326,7 +16326,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 781 - FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 781 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16342,7 +16342,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 782 - VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 782 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16362,7 +16362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 783 - FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 783 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16378,7 +16378,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 784 - VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 784 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16398,7 +16398,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 785 - FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 785 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16409,12 +16409,12 @@ namespace mg5amcCpu // *** DIAGRAM 786 OF 1240 *** // Wavefunction(s) for diagram number 786 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[87] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[34] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 786 - FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16423,7 +16423,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], &_fp[0] ); jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16432,7 +16432,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16448,17 +16448,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 787 - FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], &_fp[0] ); jamp_sv[24] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], &_fp[0] ); jamp_sv[25] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], &_fp[0] ); jamp_sv[24] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -16467,12 +16467,12 @@ namespace mg5amcCpu // *** DIAGRAM 788 OF 1240 *** // Wavefunction(s) for diagram number 788 - VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] ); - VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] ); - VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] ); + VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 0., 0., w_fp[92] ); + VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 0., 0., w_fp[88] ); + VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 0., 0., w_fp[106] ); // Amplitude(s) for diagram number 788 - FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16481,7 +16481,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16490,7 +16490,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16503,10 +16503,10 @@ namespace mg5amcCpu // *** DIAGRAM 789 OF 1240 *** // Wavefunction(s) for diagram number 789 - FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[52], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 789 - FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 789 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16519,7 +16519,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 790 - FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 790 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16529,10 +16529,10 @@ namespace mg5amcCpu // *** DIAGRAM 791 OF 1240 *** // Wavefunction(s) for diagram number 791 - FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 791 - FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 791 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16545,7 +16545,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 792 - FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 792 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16558,7 +16558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 793 - FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 793 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16571,7 +16571,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 794 - FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 794 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16584,7 +16584,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 795 - FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 795 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16598,7 +16598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 796 - FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 796 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16612,7 +16612,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 797 - FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 797 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16628,7 +16628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 798 - FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 798 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16641,7 +16641,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 799 - FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 799 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16651,10 +16651,10 @@ namespace mg5amcCpu // *** DIAGRAM 800 OF 1240 *** // Wavefunction(s) for diagram number 800 - FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[39], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 800 - FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 800 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16667,7 +16667,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 801 - FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 801 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16680,7 +16680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 802 - FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 802 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16693,7 +16693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 803 - FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 803 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16706,7 +16706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 804 - FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 804 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16720,7 +16720,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 805 - FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 805 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16734,7 +16734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 806 - FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 806 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16750,7 +16750,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 807 - FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 807 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16763,7 +16763,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 808 - FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 808 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16773,10 +16773,10 @@ namespace mg5amcCpu // *** DIAGRAM 809 OF 1240 *** // Wavefunction(s) for diagram number 809 - FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); + FFV1_1( w_fp[47], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 809 - FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 809 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16789,7 +16789,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 810 - FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 810 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16802,7 +16802,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 811 - FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 811 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16815,7 +16815,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 812 - FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 812 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16828,7 +16828,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 813 - FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 813 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16842,7 +16842,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 814 - FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 814 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16856,7 +16856,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 815 - FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 815 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16872,7 +16872,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 816 - FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 816 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16886,7 +16886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 817 - FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 817 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16902,7 +16902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 818 - VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 818 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16922,7 +16922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 819 - FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 819 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16938,7 +16938,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 820 - VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 820 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16958,7 +16958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 821 - FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 821 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16972,7 +16972,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 822 - FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16981,7 +16981,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16990,7 +16990,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17006,7 +17006,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 823 - FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 823 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17020,7 +17020,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 824 - FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 824 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17036,7 +17036,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 825 - VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 825 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17056,7 +17056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 826 - FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 826 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17072,7 +17072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 827 - VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 827 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17092,7 +17092,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 828 - FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 828 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17106,7 +17106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 829 - FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17115,7 +17115,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17124,7 +17124,7 @@ namespace mg5amcCpu jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17140,7 +17140,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 830 - FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 830 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17154,7 +17154,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 831 - FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 831 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17170,7 +17170,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 832 - VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 832 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17190,7 +17190,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 833 - FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 833 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17206,7 +17206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 834 - VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 834 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17226,7 +17226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 835 - FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 835 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17240,7 +17240,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 836 - FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; @@ -17249,7 +17249,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], &_fp[0] ); jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17258,7 +17258,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17274,17 +17274,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 837 - FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); jamp_sv[64] += amp_sv[0]; jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); jamp_sv[70] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); jamp_sv[64] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; @@ -17296,7 +17296,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 838 - FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; @@ -17305,7 +17305,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; @@ -17314,7 +17314,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17327,10 +17327,10 @@ namespace mg5amcCpu // *** DIAGRAM 839 OF 1240 *** // Wavefunction(s) for diagram number 839 - VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] ); + VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 0., 0., w_fp[90] ); // Amplitude(s) for diagram number 839 - VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 839 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17358,7 +17358,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 840 - VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 840 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17386,7 +17386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 841 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -17403,7 +17403,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -17420,7 +17420,7 @@ namespace mg5amcCpu jamp_sv[115] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[6] += amp_sv[0]; @@ -17441,10 +17441,10 @@ namespace mg5amcCpu // *** DIAGRAM 842 OF 1240 *** // Wavefunction(s) for diagram number 842 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[56] ); // Amplitude(s) for diagram number 842 - VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 842 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17472,7 +17472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 843 - VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 843 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17500,7 +17500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 844 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17517,7 +17517,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17534,7 +17534,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[6] += amp_sv[0]; @@ -17558,7 +17558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 845 - VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 845 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17586,7 +17586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 846 - VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 846 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17611,12 +17611,12 @@ namespace mg5amcCpu // *** DIAGRAM 847 OF 1240 *** // Wavefunction(s) for diagram number 847 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[103] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 847 - VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17633,7 +17633,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -17650,7 +17650,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -17671,12 +17671,12 @@ namespace mg5amcCpu // *** DIAGRAM 848 OF 1240 *** // Wavefunction(s) for diagram number 848 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 848 - VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -17693,7 +17693,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -17710,7 +17710,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[98] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -17731,12 +17731,12 @@ namespace mg5amcCpu // *** DIAGRAM 849 OF 1240 *** // Wavefunction(s) for diagram number 849 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[115] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[116] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[117] ); // Amplitude(s) for diagram number 849 - VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -17753,7 +17753,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -17770,7 +17770,7 @@ namespace mg5amcCpu jamp_sv[105] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17791,12 +17791,12 @@ namespace mg5amcCpu // *** DIAGRAM 850 OF 1240 *** // Wavefunction(s) for diagram number 850 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[118] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[119] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[120] ); // Amplitude(s) for diagram number 850 - VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -17813,7 +17813,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -17830,7 +17830,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17854,7 +17854,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 851 - VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -17871,7 +17871,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -17888,7 +17888,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -17912,7 +17912,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 852 - VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 852 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17940,7 +17940,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 853 - VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 853 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17968,7 +17968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 854 - VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 854 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17996,7 +17996,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 855 - VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 855 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18016,7 +18016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 856 - FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 856 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18032,7 +18032,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 857 - FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 857 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18046,7 +18046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 858 - FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 858 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18062,7 +18062,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 859 - FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 859 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18076,7 +18076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 860 - VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 860 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18096,7 +18096,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 861 - FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18105,7 +18105,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], &_fp[0] ); jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18114,7 +18114,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18130,7 +18130,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 862 - FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 862 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18146,7 +18146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 863 - FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 863 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18160,7 +18160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 864 - FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 864 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18174,7 +18174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 865 - VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 865 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18194,7 +18194,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 866 - FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 866 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18210,7 +18210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 867 - FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 867 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18224,7 +18224,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 868 - FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 868 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18240,7 +18240,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 869 - FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 869 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18254,7 +18254,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 870 - VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 870 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18274,7 +18274,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 871 - FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18283,7 +18283,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], &_fp[0] ); jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18292,7 +18292,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18308,7 +18308,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 872 - FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 872 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18324,7 +18324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 873 - FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 873 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18338,7 +18338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 874 - FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 874 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18352,7 +18352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 875 - VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 875 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18372,7 +18372,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 876 - FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 876 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18388,7 +18388,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 877 - FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 877 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18402,7 +18402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 878 - FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 878 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18418,7 +18418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 879 - FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 879 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18432,7 +18432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 880 - VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 880 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18452,7 +18452,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 881 - FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18461,7 +18461,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18470,7 +18470,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18486,7 +18486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 882 - VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 882 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18506,7 +18506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 883 - FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 883 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18522,7 +18522,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 884 - FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 884 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18536,7 +18536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 885 - FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 885 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18552,7 +18552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 886 - FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 886 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18566,7 +18566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 887 - VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 887 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18586,7 +18586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 888 - FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18595,7 +18595,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18604,7 +18604,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18620,7 +18620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 889 - FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 889 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18640,7 +18640,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 890 - FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 890 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18660,7 +18660,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 891 - FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 891 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18680,7 +18680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 892 - FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 892 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18700,7 +18700,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 893 - FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 893 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18716,7 +18716,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 894 - FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 894 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18729,10 +18729,10 @@ namespace mg5amcCpu // *** DIAGRAM 895 OF 1240 *** // Wavefunction(s) for diagram number 895 - VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] ); + VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 0., 0., w_fp[65] ); // Amplitude(s) for diagram number 895 - VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 895 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18760,7 +18760,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 896 - VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 896 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18788,7 +18788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 897 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -18805,7 +18805,7 @@ namespace mg5amcCpu jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -18822,7 +18822,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -18846,7 +18846,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 898 - VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 898 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18874,7 +18874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 899 - VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 899 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18902,7 +18902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 900 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -18919,7 +18919,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -18936,7 +18936,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[107] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -18960,7 +18960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 901 - VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 901 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18988,7 +18988,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 902 - VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 902 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19013,12 +19013,12 @@ namespace mg5amcCpu // *** DIAGRAM 903 OF 1240 *** // Wavefunction(s) for diagram number 903 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[93] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 903 - VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -19035,7 +19035,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -19052,7 +19052,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -19073,12 +19073,12 @@ namespace mg5amcCpu // *** DIAGRAM 904 OF 1240 *** // Wavefunction(s) for diagram number 904 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[103] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 904 - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[19] -= amp_sv[0]; @@ -19095,7 +19095,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -19112,7 +19112,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -19133,12 +19133,12 @@ namespace mg5amcCpu // *** DIAGRAM 905 OF 1240 *** // Wavefunction(s) for diagram number 905 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 905 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -19155,7 +19155,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -19172,7 +19172,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -19196,7 +19196,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 906 - VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -19213,7 +19213,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -19230,7 +19230,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -19254,7 +19254,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 907 - VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -19271,7 +19271,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -19288,7 +19288,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[20] += amp_sv[0]; @@ -19312,7 +19312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 908 - VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 908 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19340,7 +19340,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 909 - VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 909 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19368,7 +19368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 910 - VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 910 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19396,7 +19396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 911 - VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 911 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19416,7 +19416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 912 - FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 912 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19432,7 +19432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 913 - FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 913 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19446,7 +19446,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 914 - FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 914 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19462,7 +19462,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 915 - FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 915 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19476,7 +19476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 916 - VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 916 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19496,7 +19496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 917 - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; @@ -19505,7 +19505,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], &_fp[0] ); jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19514,7 +19514,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19530,7 +19530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 918 - FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 918 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19546,7 +19546,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 919 - FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 919 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19560,7 +19560,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 920 - FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 920 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19574,7 +19574,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 921 - VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 921 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19594,7 +19594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 922 - FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 922 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19610,7 +19610,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 923 - FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 923 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19624,7 +19624,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 924 - FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 924 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19640,7 +19640,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 925 - FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 925 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19654,7 +19654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 926 - VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 926 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19674,7 +19674,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 927 - FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], &_fp[0] ); jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19683,7 +19683,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19692,7 +19692,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19708,7 +19708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 928 - FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 928 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19724,7 +19724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 929 - FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 929 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19738,7 +19738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 930 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 930 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19752,7 +19752,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 931 - VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 931 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19772,7 +19772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 932 - FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 932 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19788,7 +19788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 933 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 933 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19802,7 +19802,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 934 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 934 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19818,7 +19818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 935 - FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 935 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19832,7 +19832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 936 - VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 936 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19852,7 +19852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 937 - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; @@ -19861,7 +19861,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19870,7 +19870,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19886,7 +19886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 938 - VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 938 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19906,7 +19906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 939 - FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 939 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19922,7 +19922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 940 - FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 940 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19936,7 +19936,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 941 - FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 941 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19952,7 +19952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 942 - FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 942 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19966,7 +19966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 943 - VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 943 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19986,7 +19986,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 944 - FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19995,7 +19995,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20004,7 +20004,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20020,7 +20020,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 945 - FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 945 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20040,7 +20040,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 946 - FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 946 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20060,7 +20060,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 947 - FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 947 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20080,7 +20080,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 948 - FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 948 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20100,7 +20100,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 949 - FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 949 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20116,7 +20116,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 950 - FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 950 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20129,10 +20129,10 @@ namespace mg5amcCpu // *** DIAGRAM 951 OF 1240 *** // Wavefunction(s) for diagram number 951 - VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 951 - VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 951 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20160,7 +20160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 952 - VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 952 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20188,7 +20188,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 953 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -20205,7 +20205,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -20222,7 +20222,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -20246,7 +20246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 954 - VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 954 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20274,7 +20274,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 955 - VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 955 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20302,7 +20302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 956 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -20319,7 +20319,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -20336,7 +20336,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -20360,7 +20360,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 957 - VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 957 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20388,7 +20388,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 958 - VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 958 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20413,12 +20413,12 @@ namespace mg5amcCpu // *** DIAGRAM 959 OF 1240 *** // Wavefunction(s) for diagram number 959 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[94] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[65] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 959 - VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -20435,7 +20435,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -20452,7 +20452,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -20473,12 +20473,12 @@ namespace mg5amcCpu // *** DIAGRAM 960 OF 1240 *** // Wavefunction(s) for diagram number 960 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[90] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[93] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 960 - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -20495,7 +20495,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -20512,7 +20512,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -20536,7 +20536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 961 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -20553,7 +20553,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; @@ -20570,7 +20570,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -20594,7 +20594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 962 - VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -20611,7 +20611,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; @@ -20628,7 +20628,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -20652,7 +20652,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 963 - VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -20669,7 +20669,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -20686,7 +20686,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[14] += amp_sv[0]; @@ -20710,7 +20710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 964 - VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 964 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20738,7 +20738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 965 - VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 965 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20766,7 +20766,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 966 - VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 966 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20794,7 +20794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 967 - VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 967 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20814,7 +20814,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 968 - FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 968 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20830,7 +20830,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 969 - FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 969 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20844,7 +20844,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 970 - FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 970 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20860,7 +20860,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 971 - FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 971 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20874,7 +20874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 972 - VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 972 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20894,7 +20894,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 973 - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20903,7 +20903,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], &_fp[0] ); jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20912,7 +20912,7 @@ namespace mg5amcCpu jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20928,7 +20928,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 974 - FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 974 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20944,7 +20944,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 975 - FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 975 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20958,7 +20958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 976 - FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 976 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20972,7 +20972,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 977 - VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 977 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20992,7 +20992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 978 - FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 978 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21008,7 +21008,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 979 - FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 979 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21022,7 +21022,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 980 - FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 980 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21038,7 +21038,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 981 - FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 981 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21052,7 +21052,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 982 - VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 982 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21072,7 +21072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 983 - FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], &_fp[0] ); jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21081,7 +21081,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], &_fp[0] ); jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21090,7 +21090,7 @@ namespace mg5amcCpu jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21106,7 +21106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 984 - FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 984 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21122,7 +21122,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 985 - FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 985 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21136,7 +21136,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 986 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 986 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21150,7 +21150,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 987 - VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 987 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21170,7 +21170,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 988 - FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 988 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21186,7 +21186,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 989 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 989 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21200,7 +21200,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 990 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 990 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21216,7 +21216,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 991 - FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 991 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21230,7 +21230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 992 - VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 992 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21250,7 +21250,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 993 - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21259,7 +21259,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21268,7 +21268,7 @@ namespace mg5amcCpu jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21284,7 +21284,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 994 - VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 994 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21304,7 +21304,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 995 - FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 995 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21320,7 +21320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 996 - FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 996 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21334,7 +21334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 997 - FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 997 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21350,7 +21350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 998 - FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 998 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21364,7 +21364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 999 - VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 999 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21384,7 +21384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1000 - FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21393,7 +21393,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21402,7 +21402,7 @@ namespace mg5amcCpu jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21418,7 +21418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1001 - FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1001 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21438,7 +21438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1002 - FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1002 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21458,7 +21458,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1003 - FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1003 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21478,7 +21478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1004 - FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1004 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21498,7 +21498,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1005 - FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1005 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21514,7 +21514,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1006 - FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1006 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21530,7 +21530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1007 - VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1007 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21558,7 +21558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1008 - VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1008 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21586,7 +21586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1009 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21603,7 +21603,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21620,7 +21620,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -21644,7 +21644,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1010 - VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1010 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21672,7 +21672,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1011 - VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1011 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21700,7 +21700,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1012 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -21717,7 +21717,7 @@ namespace mg5amcCpu jamp_sv[101] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[14] += amp_sv[0]; @@ -21734,7 +21734,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -21758,7 +21758,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1013 - VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1013 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21786,7 +21786,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1014 - VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1014 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21811,12 +21811,12 @@ namespace mg5amcCpu // *** DIAGRAM 1015 OF 1240 *** // Wavefunction(s) for diagram number 1015 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[11] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[76] ); // Amplitude(s) for diagram number 1015 - VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -21833,7 +21833,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -21850,7 +21850,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -21871,12 +21871,12 @@ namespace mg5amcCpu // *** DIAGRAM 1016 OF 1240 *** // Wavefunction(s) for diagram number 1016 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[97] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1016 - VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21893,7 +21893,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -21910,7 +21910,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -21934,7 +21934,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1017 - VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -21951,7 +21951,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -21968,7 +21968,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21992,7 +21992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1018 - VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -22009,7 +22009,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[28] -= amp_sv[0]; @@ -22026,7 +22026,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -22050,7 +22050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1019 - VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1019 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22078,7 +22078,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1020 - VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1020 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22106,7 +22106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1021 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22123,7 +22123,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22140,7 +22140,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -22164,7 +22164,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1022 - VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1022 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22192,7 +22192,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1023 - VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1023 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22220,7 +22220,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1024 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -22237,7 +22237,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[20] += amp_sv[0]; @@ -22254,7 +22254,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -22278,7 +22278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1025 - VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1025 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22306,7 +22306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1026 - VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1026 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22334,7 +22334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1027 - VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -22351,7 +22351,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -22368,7 +22368,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -22389,12 +22389,12 @@ namespace mg5amcCpu // *** DIAGRAM 1028 OF 1240 *** // Wavefunction(s) for diagram number 1028 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1028 - VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22411,7 +22411,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -22428,7 +22428,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -22452,7 +22452,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1029 - VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -22469,7 +22469,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -22486,7 +22486,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22510,7 +22510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1030 - VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; @@ -22527,7 +22527,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[26] -= amp_sv[0]; @@ -22544,7 +22544,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -22568,7 +22568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1031 - VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1031 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22596,7 +22596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1032 - VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1032 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22624,7 +22624,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1033 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -22641,7 +22641,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -22658,7 +22658,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -22682,7 +22682,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1034 - VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1034 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22710,7 +22710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1035 - VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1035 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22738,7 +22738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1036 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -22755,7 +22755,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -22772,7 +22772,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -22796,7 +22796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1037 - VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1037 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22824,7 +22824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1038 - VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1038 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22852,7 +22852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1039 - VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; @@ -22869,7 +22869,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -22886,7 +22886,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -22907,12 +22907,12 @@ namespace mg5amcCpu // *** DIAGRAM 1040 OF 1240 *** // Wavefunction(s) for diagram number 1040 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[76] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 1040 - VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -22929,7 +22929,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -22946,7 +22946,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[90] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -22970,7 +22970,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1041 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -22987,7 +22987,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -23004,7 +23004,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -23028,7 +23028,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1042 - VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23045,7 +23045,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -23062,7 +23062,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -23086,7 +23086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1043 - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23103,7 +23103,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23120,7 +23120,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23137,7 +23137,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23154,7 +23154,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23171,7 +23171,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23188,7 +23188,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -23205,7 +23205,7 @@ namespace mg5amcCpu jamp_sv[113] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -23222,7 +23222,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -23246,7 +23246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1044 - VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23263,7 +23263,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23280,7 +23280,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -23304,7 +23304,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1045 - VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23321,7 +23321,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23338,7 +23338,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -23362,7 +23362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1046 - FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1046 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23375,7 +23375,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1047 - FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1047 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23388,7 +23388,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1048 - FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1048 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23401,7 +23401,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1049 - FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1049 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23414,7 +23414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1050 - FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1050 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23427,7 +23427,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1051 - FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1051 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23440,7 +23440,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1052 - FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1052 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23453,7 +23453,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1053 - FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1053 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23466,7 +23466,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1054 - FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1054 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23479,7 +23479,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1055 - FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1055 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23492,7 +23492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1056 - FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1056 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23505,7 +23505,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1057 - FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1057 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23518,7 +23518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1058 - FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1058 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23534,7 +23534,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1059 - FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1059 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23548,7 +23548,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1060 - FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1060 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23564,7 +23564,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1061 - VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1061 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23584,7 +23584,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1062 - FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1062 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23598,7 +23598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1063 - VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1063 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23618,7 +23618,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1064 - FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -23627,7 +23627,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], &_fp[0] ); jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23636,7 +23636,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23652,7 +23652,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1065 - FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1065 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23665,7 +23665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1066 - FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1066 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23678,7 +23678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1067 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1067 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23691,7 +23691,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1068 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1068 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23704,7 +23704,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1069 - FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1069 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23717,7 +23717,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1070 - FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1070 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23730,7 +23730,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1071 - FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1071 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23743,7 +23743,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1072 - FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1072 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23756,7 +23756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1073 - FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1073 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23769,7 +23769,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1074 - FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1074 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23782,7 +23782,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1075 - FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1075 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23795,7 +23795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1076 - FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1076 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23808,7 +23808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1077 - FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1077 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23824,7 +23824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1078 - FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1078 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23838,7 +23838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1079 - FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1079 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23854,7 +23854,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1080 - VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1080 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23874,7 +23874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1081 - FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1081 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23888,7 +23888,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1082 - VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1082 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23908,7 +23908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1083 - FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; @@ -23917,7 +23917,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], &_fp[0] ); jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23926,7 +23926,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23942,7 +23942,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1084 - FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1084 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23955,7 +23955,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1085 - FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1085 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23968,7 +23968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1086 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1086 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23981,7 +23981,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1087 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1087 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23994,7 +23994,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1088 - FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1088 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24007,7 +24007,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1089 - FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1089 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24020,7 +24020,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1090 - FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1090 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24033,7 +24033,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1091 - FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1091 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24046,7 +24046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1092 - FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1092 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24059,7 +24059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1093 - FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1093 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24072,7 +24072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1094 - FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1094 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24085,7 +24085,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1095 - FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1095 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24098,7 +24098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1096 - FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1096 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24114,7 +24114,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1097 - FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1097 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24128,7 +24128,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1098 - FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1098 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24144,7 +24144,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1099 - VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1099 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24164,7 +24164,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1100 - FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1100 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24178,7 +24178,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1101 - VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24198,7 +24198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1102 - FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24207,7 +24207,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24216,7 +24216,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24232,7 +24232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1103 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24248,7 +24248,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1104 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24262,7 +24262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1105 - FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24278,7 +24278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1106 - VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24298,7 +24298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1107 - FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1107 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24312,7 +24312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1108 - VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24332,7 +24332,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1109 - FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24341,7 +24341,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24350,7 +24350,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24366,7 +24366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1110 - FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24382,7 +24382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1111 - FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24396,7 +24396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1112 - FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24412,7 +24412,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1113 - VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24432,7 +24432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1114 - FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1114 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24446,7 +24446,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1115 - VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1115 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24466,7 +24466,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1116 - FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24475,7 +24475,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24484,7 +24484,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24500,7 +24500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1117 - FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1117 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24516,7 +24516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1118 - FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1118 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24530,7 +24530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1119 - FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1119 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24546,7 +24546,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1120 - VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1120 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24566,7 +24566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1121 - FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1121 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24580,7 +24580,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1122 - VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1122 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24600,7 +24600,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1123 - FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24609,7 +24609,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24618,7 +24618,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24631,12 +24631,12 @@ namespace mg5amcCpu // *** DIAGRAM 1124 OF 1240 *** // Wavefunction(s) for diagram number 1124 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[97] ); // Amplitude(s) for diagram number 1124 - VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -24653,7 +24653,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24670,7 +24670,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24687,7 +24687,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24704,7 +24704,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24721,7 +24721,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24738,7 +24738,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24755,7 +24755,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -24772,7 +24772,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -24793,12 +24793,12 @@ namespace mg5amcCpu // *** DIAGRAM 1125 OF 1240 *** // Wavefunction(s) for diagram number 1125 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); - VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 0., 0., w_fp[60] ); // Amplitude(s) for diagram number 1125 - VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24815,7 +24815,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24832,7 +24832,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -24853,12 +24853,12 @@ namespace mg5amcCpu // *** DIAGRAM 1126 OF 1240 *** // Wavefunction(s) for diagram number 1126 - VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] ); + VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1126 - VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24875,7 +24875,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24892,7 +24892,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -24916,7 +24916,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1127 - VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -24933,7 +24933,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24950,7 +24950,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24971,22 +24971,22 @@ namespace mg5amcCpu // *** DIAGRAM 1128 OF 1240 *** // Wavefunction(s) for diagram number 1128 - FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); - FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); // Amplitude(s) for diagram number 1128 - FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[90] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[91] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[90] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[94] += amp_sv[0]; @@ -24998,7 +24998,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1129 - FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25007,7 +25007,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25016,7 +25016,7 @@ namespace mg5amcCpu jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25032,17 +25032,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1130 - FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[72] += amp_sv[0]; jamp_sv[74] -= amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[74] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], &_fp[0] ); jamp_sv[72] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; jamp_sv[84] += amp_sv[0]; @@ -25054,17 +25054,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1131 - FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[115] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; @@ -25076,7 +25076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1132 - FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25085,7 +25085,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25094,7 +25094,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25110,17 +25110,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1133 - FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[96] += amp_sv[0]; jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[98] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); jamp_sv[96] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; @@ -25129,22 +25129,22 @@ namespace mg5amcCpu // *** DIAGRAM 1134 OF 1240 *** // Wavefunction(s) for diagram number 1134 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); - FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 1134 - FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[49] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -25156,7 +25156,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1135 - FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25165,7 +25165,7 @@ namespace mg5amcCpu jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25174,7 +25174,7 @@ namespace mg5amcCpu jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25190,17 +25190,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1136 - FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[48] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25212,7 +25212,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1137 - FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25221,7 +25221,7 @@ namespace mg5amcCpu jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25230,7 +25230,7 @@ namespace mg5amcCpu jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25246,7 +25246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1138 - FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25255,7 +25255,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25264,7 +25264,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25280,7 +25280,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1139 - FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25289,7 +25289,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25298,7 +25298,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25311,12 +25311,12 @@ namespace mg5amcCpu // *** DIAGRAM 1140 OF 1240 *** // Wavefunction(s) for diagram number 1140 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[68] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[29] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1140 - VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -25333,7 +25333,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25350,7 +25350,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25367,7 +25367,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25384,7 +25384,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25401,7 +25401,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25418,7 +25418,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25435,7 +25435,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25452,7 +25452,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25473,12 +25473,12 @@ namespace mg5amcCpu // *** DIAGRAM 1141 OF 1240 *** // Wavefunction(s) for diagram number 1141 - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] ); - VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1141 - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25495,7 +25495,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25512,7 +25512,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25533,12 +25533,12 @@ namespace mg5amcCpu // *** DIAGRAM 1142 OF 1240 *** // Wavefunction(s) for diagram number 1142 - VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 1142 - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25555,7 +25555,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25572,7 +25572,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25596,7 +25596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1143 - VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -25613,7 +25613,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25630,7 +25630,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25651,22 +25651,22 @@ namespace mg5amcCpu // *** DIAGRAM 1144 OF 1240 *** // Wavefunction(s) for diagram number 1144 - FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] ); - FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[111] ); + FFV1_2( w_fp[3], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1144 - FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[66] += amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[67] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[66] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; jamp_sv[70] += amp_sv[0]; @@ -25678,7 +25678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1145 - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25687,7 +25687,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], &_fp[0] ); jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25696,7 +25696,7 @@ namespace mg5amcCpu jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25712,17 +25712,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1146 - FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); jamp_sv[48] += amp_sv[0]; jamp_sv[50] -= amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[50] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[60] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], &_fp[0] ); jamp_sv[48] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; jamp_sv[60] += amp_sv[0]; @@ -25734,17 +25734,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1147 - FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[108] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[109] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[108] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; @@ -25756,7 +25756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1148 - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25765,7 +25765,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25774,7 +25774,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25790,17 +25790,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1149 - FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], &_fp[0] ); jamp_sv[97] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[100] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], &_fp[0] ); jamp_sv[97] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; @@ -25809,22 +25809,22 @@ namespace mg5amcCpu // *** DIAGRAM 1150 OF 1240 *** // Wavefunction(s) for diagram number 1150 - FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); // Amplitude(s) for diagram number 1150 - FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[73] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[73] += amp_sv[0]; @@ -25836,7 +25836,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1151 - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25845,7 +25845,7 @@ namespace mg5amcCpu jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25854,7 +25854,7 @@ namespace mg5amcCpu jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25870,17 +25870,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1152 - FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[72] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[72] += amp_sv[0]; @@ -25892,7 +25892,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1153 - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25901,7 +25901,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25910,7 +25910,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25926,7 +25926,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1154 - FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25935,7 +25935,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25944,7 +25944,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25960,7 +25960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1155 - FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25969,7 +25969,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25978,7 +25978,7 @@ namespace mg5amcCpu jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25991,12 +25991,12 @@ namespace mg5amcCpu // *** DIAGRAM 1156 OF 1240 *** // Wavefunction(s) for diagram number 1156 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[27] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1156 - VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -26013,7 +26013,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26030,7 +26030,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26047,7 +26047,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26064,7 +26064,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26081,7 +26081,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26098,7 +26098,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26115,7 +26115,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26132,7 +26132,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26153,12 +26153,12 @@ namespace mg5amcCpu // *** DIAGRAM 1157 OF 1240 *** // Wavefunction(s) for diagram number 1157 - VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] ); - VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1157 - VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26175,7 +26175,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26192,7 +26192,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26213,12 +26213,12 @@ namespace mg5amcCpu // *** DIAGRAM 1158 OF 1240 *** // Wavefunction(s) for diagram number 1158 - VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1158 - VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26235,7 +26235,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26252,7 +26252,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26276,7 +26276,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1159 - VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -26293,7 +26293,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26310,7 +26310,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26331,22 +26331,22 @@ namespace mg5amcCpu // *** DIAGRAM 1160 OF 1240 *** // Wavefunction(s) for diagram number 1160 - FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); - FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 1160 - FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[60] += amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[65] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[61] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[64] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[60] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; jamp_sv[64] += amp_sv[0]; @@ -26358,7 +26358,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1161 - FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], &_fp[0] ); jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26367,7 +26367,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26376,7 +26376,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26392,17 +26392,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1162 - FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], &_fp[0] ); jamp_sv[49] += amp_sv[0]; jamp_sv[52] -= amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[52] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[66] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[49] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; jamp_sv[66] += amp_sv[0]; @@ -26414,17 +26414,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1163 - FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[84] += amp_sv[0]; jamp_sv[85] -= amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[85] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[84] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; jamp_sv[88] += amp_sv[0]; @@ -26436,7 +26436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1164 - FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26445,7 +26445,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26454,7 +26454,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26470,17 +26470,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1165 - FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); jamp_sv[73] += amp_sv[0]; jamp_sv[76] -= amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[76] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[90] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); jamp_sv[73] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; jamp_sv[90] += amp_sv[0]; @@ -26489,22 +26489,22 @@ namespace mg5amcCpu // *** DIAGRAM 1166 OF 1240 *** // Wavefunction(s) for diagram number 1166 - FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); - FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[2], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1166 - FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; @@ -26516,7 +26516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1167 - FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26525,7 +26525,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26534,7 +26534,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26550,17 +26550,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1168 - FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; @@ -26572,7 +26572,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1169 - FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26581,7 +26581,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26590,7 +26590,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26606,7 +26606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1170 - FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26615,7 +26615,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26624,7 +26624,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26640,7 +26640,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1171 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26649,7 +26649,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26658,7 +26658,7 @@ namespace mg5amcCpu jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26671,25 +26671,25 @@ namespace mg5amcCpu // *** DIAGRAM 1172 OF 1240 *** // Wavefunction(s) for diagram number 1172 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[60] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1172 - FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[42] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[43] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[42] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; jamp_sv[46] += amp_sv[0]; @@ -26698,12 +26698,12 @@ namespace mg5amcCpu // *** DIAGRAM 1173 OF 1240 *** // Wavefunction(s) for diagram number 1173 - VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] ); - VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 1173 - FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26712,7 +26712,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26721,7 +26721,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26737,17 +26737,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1174 - FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); jamp_sv[24] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[36] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], &_fp[0] ); jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -26756,22 +26756,22 @@ namespace mg5amcCpu // *** DIAGRAM 1175 OF 1240 *** // Wavefunction(s) for diagram number 1175 - FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); - FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); - FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 1175 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[15] -= amp_sv[0]; jamp_sv[51] += amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[75] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[51] += amp_sv[0]; jamp_sv[75] += amp_sv[0]; @@ -26783,7 +26783,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1176 - FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26792,7 +26792,7 @@ namespace mg5amcCpu jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26801,7 +26801,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26817,17 +26817,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1177 - FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[101] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[115] += amp_sv[0]; @@ -26839,7 +26839,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1178 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26848,7 +26848,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], &_fp[0] ); jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26857,7 +26857,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26873,7 +26873,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1179 - FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26882,7 +26882,7 @@ namespace mg5amcCpu jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26891,7 +26891,7 @@ namespace mg5amcCpu jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26907,7 +26907,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1180 - VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[14] += amp_sv[0]; @@ -26924,7 +26924,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26941,7 +26941,7 @@ namespace mg5amcCpu jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26965,7 +26965,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1181 - VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -26982,7 +26982,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[14] -= amp_sv[0]; @@ -26999,7 +26999,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -27016,7 +27016,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27033,7 +27033,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -27050,7 +27050,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27067,7 +27067,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27084,7 +27084,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -27101,7 +27101,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27122,12 +27122,12 @@ namespace mg5amcCpu // *** DIAGRAM 1182 OF 1240 *** // Wavefunction(s) for diagram number 1182 - VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1182 - VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -27144,7 +27144,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27161,7 +27161,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27185,7 +27185,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1183 - VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -27202,7 +27202,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27219,7 +27219,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27243,7 +27243,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1184 - FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], &_fp[0] ); jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27252,7 +27252,7 @@ namespace mg5amcCpu jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27261,7 +27261,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27277,17 +27277,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1185 - FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[103] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; @@ -27299,7 +27299,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1186 - FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27308,7 +27308,7 @@ namespace mg5amcCpu jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27317,7 +27317,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27333,17 +27333,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1187 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[74] += amp_sv[0]; @@ -27352,25 +27352,25 @@ namespace mg5amcCpu // *** DIAGRAM 1188 OF 1240 *** // Wavefunction(s) for diagram number 1188 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] ); - FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); - FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[59] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 1188 - FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[36] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[41] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[37] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[40] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[36] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; jamp_sv[40] += amp_sv[0]; @@ -27379,12 +27379,12 @@ namespace mg5amcCpu // *** DIAGRAM 1189 OF 1240 *** // Wavefunction(s) for diagram number 1189 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1189 - FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27393,7 +27393,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27402,7 +27402,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27418,17 +27418,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1190 - FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[25] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[42] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], &_fp[0] ); jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -27437,22 +27437,22 @@ namespace mg5amcCpu // *** DIAGRAM 1191 OF 1240 *** // Wavefunction(s) for diagram number 1191 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 1191 - FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[21] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; @@ -27464,7 +27464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1192 - FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27473,7 +27473,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27482,7 +27482,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27498,17 +27498,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1193 - FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[75] += amp_sv[0]; jamp_sv[77] -= amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[77] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); jamp_sv[75] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; @@ -27520,7 +27520,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1194 - FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], &_fp[0] ); jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27529,7 +27529,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], &_fp[0] ); jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27538,7 +27538,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27554,7 +27554,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1195 - FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27563,7 +27563,7 @@ namespace mg5amcCpu jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27572,7 +27572,7 @@ namespace mg5amcCpu jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27588,7 +27588,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1196 - VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[20] += amp_sv[0]; @@ -27605,7 +27605,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -27622,7 +27622,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -27646,7 +27646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1197 - VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -27663,7 +27663,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[20] -= amp_sv[0]; @@ -27680,7 +27680,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -27697,7 +27697,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27714,7 +27714,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -27731,7 +27731,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27748,7 +27748,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27765,7 +27765,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -27782,7 +27782,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27803,12 +27803,12 @@ namespace mg5amcCpu // *** DIAGRAM 1198 OF 1240 *** // Wavefunction(s) for diagram number 1198 - VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1198 - VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -27825,7 +27825,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27842,7 +27842,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27866,7 +27866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1199 - VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -27883,7 +27883,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27900,7 +27900,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27924,7 +27924,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1200 - FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], &_fp[0] ); jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27933,7 +27933,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27942,7 +27942,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27958,17 +27958,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1201 - FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[78] += amp_sv[0]; jamp_sv[79] -= amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[79] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[78] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; jamp_sv[82] += amp_sv[0]; @@ -27980,7 +27980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1202 - FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27989,7 +27989,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27998,7 +27998,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28014,17 +28014,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1203 - FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; @@ -28033,25 +28033,25 @@ namespace mg5amcCpu // *** DIAGRAM 1204 OF 1240 *** // Wavefunction(s) for diagram number 1204 - VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); - VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] ); - VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] ); - FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); - FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); + VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); + VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[68] ); + VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[29] ); + FFV1_2( w_fp[3], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 1204 - FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[30] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[35] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[31] -= amp_sv[0]; jamp_sv[32] += amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[34] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[30] -= amp_sv[0]; jamp_sv[32] += amp_sv[0]; jamp_sv[34] += amp_sv[0]; @@ -28060,12 +28060,12 @@ namespace mg5amcCpu // *** DIAGRAM 1205 OF 1240 *** // Wavefunction(s) for diagram number 1205 - VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1205 - FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], &_fp[0] ); jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28074,7 +28074,7 @@ namespace mg5amcCpu jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28083,7 +28083,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28099,17 +28099,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1206 - FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); jamp_sv[27] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); jamp_sv[29] -= amp_sv[0]; jamp_sv[37] += amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[43] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[27] -= amp_sv[0]; jamp_sv[37] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -28118,22 +28118,22 @@ namespace mg5amcCpu // *** DIAGRAM 1207 OF 1240 *** // Wavefunction(s) for diagram number 1207 - FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); - FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1207 - FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[23] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); jamp_sv[17] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; jamp_sv[101] += amp_sv[0]; @@ -28145,7 +28145,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1208 - FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28154,7 +28154,7 @@ namespace mg5amcCpu jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28163,7 +28163,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28179,17 +28179,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1209 - FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); jamp_sv[51] += amp_sv[0]; jamp_sv[53] -= amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); jamp_sv[53] -= amp_sv[0]; jamp_sv[61] += amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[67] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); jamp_sv[51] -= amp_sv[0]; jamp_sv[61] += amp_sv[0]; jamp_sv[67] += amp_sv[0]; @@ -28201,7 +28201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1210 - FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], &_fp[0] ); jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28210,7 +28210,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], &_fp[0] ); jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -28219,7 +28219,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -28235,7 +28235,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1211 - FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28244,7 +28244,7 @@ namespace mg5amcCpu jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28253,7 +28253,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -28269,7 +28269,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1212 - VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -28286,7 +28286,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[22] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -28303,7 +28303,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -28327,7 +28327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1213 - VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -28344,7 +28344,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -28361,7 +28361,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -28378,7 +28378,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28395,7 +28395,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -28412,7 +28412,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -28429,7 +28429,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28446,7 +28446,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -28463,7 +28463,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -28484,12 +28484,12 @@ namespace mg5amcCpu // *** DIAGRAM 1214 OF 1240 *** // Wavefunction(s) for diagram number 1214 - VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] ); - VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 0., 0., w_fp[61] ); + VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1214 - VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -28506,7 +28506,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28523,7 +28523,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28547,7 +28547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1215 - VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], &_fp[0] ); jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -28564,7 +28564,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -28581,7 +28581,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -28605,7 +28605,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1216 - FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], &_fp[0] ); jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28614,7 +28614,7 @@ namespace mg5amcCpu jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28623,7 +28623,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28639,17 +28639,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1217 - FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[54] += amp_sv[0]; jamp_sv[55] -= amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[55] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[54] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[58] += amp_sv[0]; @@ -28661,7 +28661,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1218 - FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28670,7 +28670,7 @@ namespace mg5amcCpu jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28679,7 +28679,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28695,17 +28695,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1219 - FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; @@ -28717,7 +28717,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1220 - VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28734,7 +28734,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28751,7 +28751,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -28768,7 +28768,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28785,7 +28785,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28802,7 +28802,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -28819,7 +28819,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28836,7 +28836,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28853,7 +28853,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -28874,12 +28874,12 @@ namespace mg5amcCpu // *** DIAGRAM 1221 OF 1240 *** // Wavefunction(s) for diagram number 1221 - VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] ); - VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 0., 0., w_fp[1] ); + VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1221 - VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28896,7 +28896,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28913,7 +28913,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28937,7 +28937,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1222 - VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28954,7 +28954,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28971,7 +28971,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28995,7 +28995,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1223 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29004,7 +29004,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29013,7 +29013,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29029,17 +29029,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1224 - FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], &_fp[0] ); jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], &_fp[0] ); jamp_sv[97] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], &_fp[0] ); jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; @@ -29051,7 +29051,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1225 - FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29060,7 +29060,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29069,7 +29069,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29085,17 +29085,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1226 - FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); jamp_sv[32] += amp_sv[0]; jamp_sv[38] -= amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); jamp_sv[38] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); jamp_sv[32] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[80] += amp_sv[0]; @@ -29107,7 +29107,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1227 - VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29124,7 +29124,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29141,7 +29141,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -29158,7 +29158,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29175,7 +29175,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29192,7 +29192,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -29209,7 +29209,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -29226,7 +29226,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -29243,7 +29243,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29264,12 +29264,12 @@ namespace mg5amcCpu // *** DIAGRAM 1228 OF 1240 *** // Wavefunction(s) for diagram number 1228 - VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] ); - VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] ); + VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 0., 0., w_fp[80] ); + VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 0., 0., w_fp[79] ); // Amplitude(s) for diagram number 1228 - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29286,7 +29286,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29303,7 +29303,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -29327,7 +29327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1229 - VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29344,7 +29344,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29361,7 +29361,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -29385,7 +29385,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1230 - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29394,7 +29394,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29403,7 +29403,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29419,17 +29419,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1231 - FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], &_fp[0] ); jamp_sv[72] += amp_sv[0]; jamp_sv[73] -= amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], &_fp[0] ); jamp_sv[73] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], &_fp[0] ); jamp_sv[72] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; jamp_sv[76] += amp_sv[0]; @@ -29441,7 +29441,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1232 - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29450,7 +29450,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29459,7 +29459,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29475,17 +29475,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1233 - FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); jamp_sv[34] += amp_sv[0]; jamp_sv[44] -= amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); jamp_sv[44] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); jamp_sv[34] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; @@ -29497,7 +29497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1234 - VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29514,7 +29514,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -29531,7 +29531,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -29548,7 +29548,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29565,7 +29565,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29582,7 +29582,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -29599,7 +29599,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -29616,7 +29616,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29633,7 +29633,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[107] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29654,12 +29654,12 @@ namespace mg5amcCpu // *** DIAGRAM 1235 OF 1240 *** // Wavefunction(s) for diagram number 1235 - VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] ); - VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] ); - VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] ); + VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 0., 0., w_fp[82] ); + VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 0., 0., w_fp[81] ); // Amplitude(s) for diagram number 1235 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29676,7 +29676,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29693,7 +29693,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -29717,7 +29717,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1236 - VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -29734,7 +29734,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29751,7 +29751,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29775,7 +29775,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1237 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29784,7 +29784,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29793,7 +29793,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29809,17 +29809,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1238 - FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], &_fp[0] ); jamp_sv[48] += amp_sv[0]; jamp_sv[49] -= amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], &_fp[0] ); jamp_sv[49] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], &_fp[0] ); jamp_sv[48] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[52] += amp_sv[0]; @@ -29831,7 +29831,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1239 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29840,7 +29840,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29849,7 +29849,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29865,17 +29865,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1240 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); jamp_sv[40] += amp_sv[0]; jamp_sv[46] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); jamp_sv[46] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); jamp_sv[40] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; @@ -30628,12 +30628,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f index 6828f1c252..e6d2fc3099 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f @@ -39,7 +39,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -127,24 +126,11 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f index fc156798a8..7da1a11e92 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f @@ -9862,6 +9862,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -19615,6 +19616,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile index 74b19033a8..74db44d848 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/generate_events b/epochX/cudacpp/gg_ttggg.mad/bin/generate_events index 5577cc66a0..107313b25d 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/generate_events +++ b/epochX/cudacpp/gg_ttggg.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME -import misc as misc + import logging import logging.config @@ -160,31 +160,17 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv - - # check for plugin customization of the launch command - launch_interface = ME.MadEventCmdShell - if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - + argument = sys.argv try: if '-h' in argument or '--help' in argument: - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py index e9f421ae5f..7624b9f557 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py @@ -1002,14 +1002,13 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() - self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - + self.plugin_input(finput) def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model.pkl index 27a1caae3c115073669b90622e9351ab04166d39..dc38da0bfa76ea4206a3c5b2d34b98c606f7d044 100644 GIT binary patch delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/madevent b/epochX/cudacpp/gg_ttggg.mad/bin/madevent index 10b6a71fa2..c944aa1faf 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/madevent +++ b/epochX/cudacpp/gg_ttggg.mad/bin/madevent @@ -32,7 +32,6 @@ except ImportError: import os -pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -161,23 +160,10 @@ except: pass import internal.madevent_interface as cmd_interface -# check for plugin customization of the launch command -launch_interface = cmd_interface.MadEventCmdShell -if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) + launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -192,7 +178,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -202,7 +188,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gg_ttggg.mad/mg5.in b/epochX/cudacpp/gg_ttggg.mad/mg5.in index cdbc845cdd..5f9d505e5c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/mg5.in +++ b/epochX/cudacpp/gg_ttggg.mad/mg5.in @@ -1,4 +1,3 @@ -set stdout_level DEBUG -set zerowidth_tchannel F generate g g > t t~ g g g output madevent gg_ttggg.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp + diff --git a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h index 9b946c21e1..9cea8bcbe7 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -888,7 +886,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -899,7 +896,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -912,7 +908,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -925,7 +920,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -940,7 +934,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -952,7 +945,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -967,7 +959,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -979,7 +970,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -994,7 +984,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -1006,7 +995,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1020,7 +1008,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1055,7 +1042,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1094,7 +1080,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1118,7 +1103,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1150,7 +1134,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1182,7 +1165,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1215,7 +1197,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1244,7 +1225,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1280,7 +1260,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1309,7 +1288,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1345,7 +1323,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1374,7 +1351,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 31573e7e51..9bee64b205 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005338430404663086  +DEBUG: model prefixing takes 0.004521608352661133  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,35 +155,66 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.850 s +1 processes with 1240 diagrams generated in 1.695 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  -DEBUG: type(subproc_group)= [output.py at line 190]  -DEBUG: type(fortran_model)= [output.py at line 191]  -DEBUG: type(me)= me=0 [output.py at line 192]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.482 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  +DEBUG: type(subproc_group)= [output.py at line 188]  +DEBUG: type(fortran_model)= [output.py at line 189]  +DEBUG: type(me)= me=0 [output.py at line 190]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: proc_id =  0 [model_handling.py at line 1046]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1823]  +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1336]  +Generated helas calls for 1 subprocesses (1240 diagrams) in 5.784 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.364 s +ALOHA: aloha creates 5 routines in 0.291 s VVV1 VVV1 FFV1 @@ -196,17 +227,23 @@ ALOHA: aloha creates 5 routines in 0.364 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m13.206s -user 0m12.699s -sys 0m0.116s +real 0m11.544s +user 0m11.267s +sys 0m0.097s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc index a67b74e5b7..36675814b4 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc @@ -252,13 +252,13 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][6], +1, w_fp[6], 6 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); - VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] ); - VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -282,10 +282,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 1240 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 3 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -332,7 +332,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -352,7 +352,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -376,11 +376,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 1240 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] ); - VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] ); + VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -407,7 +407,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -434,7 +434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -454,7 +454,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -474,7 +474,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -498,10 +498,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 1240 *** // Wavefunction(s) for diagram number 7 - VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] ); + VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 7 - VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -528,7 +528,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -555,7 +555,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -575,7 +575,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -595,7 +595,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -619,12 +619,12 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 1240 *** // Wavefunction(s) for diagram number 10 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -644,7 +644,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -664,7 +664,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -688,12 +688,12 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 1240 *** // Wavefunction(s) for diagram number 11 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[18] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 11 - VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -713,7 +713,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -733,7 +733,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -757,12 +757,12 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 1240 *** // Wavefunction(s) for diagram number 12 - VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); + VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -782,7 +782,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -802,7 +802,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -826,10 +826,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 1240 *** // Wavefunction(s) for diagram number 13 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 13 - VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -849,7 +849,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -869,7 +869,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -893,10 +893,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 1240 *** // Wavefunction(s) for diagram number 14 - VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 14 - VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -920,10 +920,10 @@ namespace mg5amcCpu // *** DIAGRAM 15 OF 1240 *** // Wavefunction(s) for diagram number 15 - VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] ); + VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 0., 0., w_fp[26] ); // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -950,7 +950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -974,10 +974,10 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 1240 *** // Wavefunction(s) for diagram number 17 - VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 0., 0., w_fp[27] ); // Amplitude(s) for diagram number 17 - VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -997,7 +997,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1017,7 +1017,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1044,7 +1044,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1068,10 +1068,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 1240 *** // Wavefunction(s) for diagram number 19 - VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] ); + VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 0., 0., w_fp[28] ); // Amplitude(s) for diagram number 19 - VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1098,7 +1098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1122,10 +1122,10 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 1240 *** // Wavefunction(s) for diagram number 21 - VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 21 - VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1145,7 +1145,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1165,7 +1165,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1192,7 +1192,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1219,7 +1219,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1243,10 +1243,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 1240 *** // Wavefunction(s) for diagram number 24 - VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 24 - VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1270,12 +1270,12 @@ namespace mg5amcCpu // *** DIAGRAM 25 OF 1240 *** // Wavefunction(s) for diagram number 25 - VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] ); - VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] ); - VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] ); + VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[30] ); + VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[31] ); + VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[32] ); // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1295,7 +1295,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1315,7 +1315,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1339,12 +1339,12 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 1240 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] ); - FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[33] ); + FFV1_2( w_fp[3], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[33], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[35] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1354,10 +1354,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 1240 *** // Wavefunction(s) for diagram number 27 - FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] ); + FFV1_1( w_fp[33], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[36] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1367,10 +1367,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 1240 *** // Wavefunction(s) for diagram number 28 - FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] ); + FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 0., 0., w_fp[37] ); // Amplitude(s) for diagram number 28 - VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1389,7 +1389,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1404,7 +1404,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1423,7 +1423,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1438,7 +1438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1450,7 +1450,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1462,7 +1462,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1478,11 +1478,11 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 1240 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] ); - FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[38] ); + FFV1_1( w_fp[33], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1492,10 +1492,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 1240 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[38], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1508,7 +1508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1520,10 +1520,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 1240 *** // Wavefunction(s) for diagram number 36 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[41] ); // Amplitude(s) for diagram number 36 - FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1533,10 +1533,10 @@ namespace mg5amcCpu // *** DIAGRAM 37 OF 1240 *** // Wavefunction(s) for diagram number 37 - FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] ); + FFV1_2( w_fp[41], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[42] ); // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1549,7 +1549,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1564,7 +1564,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1579,7 +1579,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1594,7 +1594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1610,11 +1610,11 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 1240 *** // Wavefunction(s) for diagram number 42 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); - FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); + FFV1_1( w_fp[39], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[43] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1624,10 +1624,10 @@ namespace mg5amcCpu // *** DIAGRAM 43 OF 1240 *** // Wavefunction(s) for diagram number 43 - FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] ); + FFV1_1( w_fp[39], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[44] ); // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1637,10 +1637,10 @@ namespace mg5amcCpu // *** DIAGRAM 44 OF 1240 *** // Wavefunction(s) for diagram number 44 - FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] ); + FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 0., 0., w_fp[45] ); // Amplitude(s) for diagram number 44 - VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1659,7 +1659,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1674,7 +1674,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1693,7 +1693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1708,7 +1708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1720,7 +1720,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1732,7 +1732,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1748,11 +1748,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 1240 *** // Wavefunction(s) for diagram number 49 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] ); - FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[46] ); + FFV1_1( w_fp[39], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1762,10 +1762,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 1240 *** // Wavefunction(s) for diagram number 50 - FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[46], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1778,7 +1778,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1793,7 +1793,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1806,7 +1806,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1819,7 +1819,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1834,7 +1834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1849,7 +1849,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1864,7 +1864,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1880,11 +1880,11 @@ namespace mg5amcCpu // *** DIAGRAM 58 OF 1240 *** // Wavefunction(s) for diagram number 58 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); - FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); + FFV1_1( w_fp[47], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[49] ); // Amplitude(s) for diagram number 58 - FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1894,10 +1894,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 1240 *** // Wavefunction(s) for diagram number 59 - FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] ); + FFV1_1( w_fp[47], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[50] ); // Amplitude(s) for diagram number 59 - FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1907,10 +1907,10 @@ namespace mg5amcCpu // *** DIAGRAM 60 OF 1240 *** // Wavefunction(s) for diagram number 60 - FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] ); + FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 0., 0., w_fp[51] ); // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1929,7 +1929,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1944,7 +1944,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1963,7 +1963,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1978,7 +1978,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1990,7 +1990,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2002,7 +2002,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2018,10 +2018,10 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 1240 *** // Wavefunction(s) for diagram number 65 - FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[47], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2034,7 +2034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2047,7 +2047,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2062,7 +2062,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2075,7 +2075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2088,7 +2088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2103,7 +2103,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2118,7 +2118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2133,7 +2133,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2149,11 +2149,11 @@ namespace mg5amcCpu // *** DIAGRAM 74 OF 1240 *** // Wavefunction(s) for diagram number 74 - FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); - FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); + FFV1_2( w_fp[46], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 74 - FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2163,10 +2163,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 1240 *** // Wavefunction(s) for diagram number 75 - FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] ); + FFV1_2( w_fp[46], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[53] ); // Amplitude(s) for diagram number 75 - FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2176,10 +2176,10 @@ namespace mg5amcCpu // *** DIAGRAM 76 OF 1240 *** // Wavefunction(s) for diagram number 76 - FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] ); + FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 0., 0., w_fp[54] ); // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2198,7 +2198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2213,7 +2213,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2232,7 +2232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2247,7 +2247,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2259,7 +2259,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2271,7 +2271,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2290,7 +2290,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2305,7 +2305,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2320,7 +2320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2336,10 +2336,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 1240 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] ); + FFV1_2( w_fp[38], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[25] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2349,10 +2349,10 @@ namespace mg5amcCpu // *** DIAGRAM 85 OF 1240 *** // Wavefunction(s) for diagram number 85 - FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[38], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2362,10 +2362,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 1240 *** // Wavefunction(s) for diagram number 86 - FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] ); + FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2384,7 +2384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2399,7 +2399,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 88 - VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2418,7 +2418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2433,7 +2433,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2445,7 +2445,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2457,7 +2457,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2476,7 +2476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2491,7 +2491,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2506,7 +2506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2522,10 +2522,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 1240 *** // Wavefunction(s) for diagram number 94 - FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] ); + FFV1_2( w_fp[41], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[28] ); // Amplitude(s) for diagram number 94 - FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2535,10 +2535,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 1240 *** // Wavefunction(s) for diagram number 95 - FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[41], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 95 - FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2548,10 +2548,10 @@ namespace mg5amcCpu // *** DIAGRAM 96 OF 1240 *** // Wavefunction(s) for diagram number 96 - FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] ); + FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 96 - VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2570,7 +2570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2585,7 +2585,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2604,7 +2604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2619,7 +2619,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2631,7 +2631,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2643,7 +2643,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2662,7 +2662,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 101 - FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2677,7 +2677,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2692,7 +2692,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2708,10 +2708,10 @@ namespace mg5amcCpu // *** DIAGRAM 104 OF 1240 *** // Wavefunction(s) for diagram number 104 - FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[26] ); // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2723,10 +2723,10 @@ namespace mg5amcCpu // *** DIAGRAM 105 OF 1240 *** // Wavefunction(s) for diagram number 105 - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[42] ); // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2742,10 +2742,10 @@ namespace mg5amcCpu // *** DIAGRAM 106 OF 1240 *** // Wavefunction(s) for diagram number 106 - FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2760,7 +2760,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2779,7 +2779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2798,7 +2798,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2814,10 +2814,10 @@ namespace mg5amcCpu // *** DIAGRAM 110 OF 1240 *** // Wavefunction(s) for diagram number 110 - FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2829,10 +2829,10 @@ namespace mg5amcCpu // *** DIAGRAM 111 OF 1240 *** // Wavefunction(s) for diagram number 111 - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2848,10 +2848,10 @@ namespace mg5amcCpu // *** DIAGRAM 112 OF 1240 *** // Wavefunction(s) for diagram number 112 - FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2866,7 +2866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2885,7 +2885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 114 - FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2904,7 +2904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2920,10 +2920,10 @@ namespace mg5amcCpu // *** DIAGRAM 116 OF 1240 *** // Wavefunction(s) for diagram number 116 - FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2935,10 +2935,10 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 1240 *** // Wavefunction(s) for diagram number 117 - VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] ); + VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 0., 0., w_fp[19] ); // Amplitude(s) for diagram number 117 - FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2954,10 +2954,10 @@ namespace mg5amcCpu // *** DIAGRAM 118 OF 1240 *** // Wavefunction(s) for diagram number 118 - FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[18] ); // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2972,7 +2972,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2991,7 +2991,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3010,7 +3010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3029,7 +3029,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3041,7 +3041,7 @@ namespace mg5amcCpu jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3053,7 +3053,7 @@ namespace mg5amcCpu jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3072,7 +3072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3084,7 +3084,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3096,7 +3096,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3112,13 +3112,13 @@ namespace mg5amcCpu // *** DIAGRAM 124 OF 1240 *** // Wavefunction(s) for diagram number 124 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); - FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[34], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[52], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 124 - FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3127,10 +3127,10 @@ namespace mg5amcCpu // *** DIAGRAM 125 OF 1240 *** // Wavefunction(s) for diagram number 125 - FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[52], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 125 - FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3139,11 +3139,11 @@ namespace mg5amcCpu // *** DIAGRAM 126 OF 1240 *** // Wavefunction(s) for diagram number 126 - FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] ); - FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] ); + FFV1_1( w_fp[34], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[55] ); + FFV1_2( w_fp[52], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[56] ); // Amplitude(s) for diagram number 126 - FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3155,7 +3155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 127 - FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3164,10 +3164,10 @@ namespace mg5amcCpu // *** DIAGRAM 128 OF 1240 *** // Wavefunction(s) for diagram number 128 - FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] ); + FFV1_1( w_fp[34], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[57] ); // Amplitude(s) for diagram number 128 - FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3179,7 +3179,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 129 - FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3188,10 +3188,10 @@ namespace mg5amcCpu // *** DIAGRAM 130 OF 1240 *** // Wavefunction(s) for diagram number 130 - FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] ); + FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 0., 0., w_fp[58] ); // Amplitude(s) for diagram number 130 - VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3203,10 +3203,10 @@ namespace mg5amcCpu // *** DIAGRAM 131 OF 1240 *** // Wavefunction(s) for diagram number 131 - FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[34], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); // Amplitude(s) for diagram number 131 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3219,7 +3219,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 132 - FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3232,7 +3232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 133 - VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3244,10 +3244,10 @@ namespace mg5amcCpu // *** DIAGRAM 134 OF 1240 *** // Wavefunction(s) for diagram number 134 - FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); + FFV1_1( w_fp[34], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 134 - FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3260,7 +3260,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 135 - FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3273,7 +3273,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 136 - VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3288,7 +3288,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 137 - FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3298,10 +3298,10 @@ namespace mg5amcCpu // *** DIAGRAM 138 OF 1240 *** // Wavefunction(s) for diagram number 138 - FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); + FFV1_1( w_fp[34], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 138 - FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3314,7 +3314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 139 - FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3322,7 +3322,7 @@ namespace mg5amcCpu jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3330,7 +3330,7 @@ namespace mg5amcCpu jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3342,12 +3342,12 @@ namespace mg5amcCpu // *** DIAGRAM 140 OF 1240 *** // Wavefunction(s) for diagram number 140 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] ); - FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[61] ); + FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 140 - VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3363,10 +3363,10 @@ namespace mg5amcCpu // *** DIAGRAM 141 OF 1240 *** // Wavefunction(s) for diagram number 141 - VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] ); + VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 0., 0., w_fp[64] ); // Amplitude(s) for diagram number 141 - VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3385,7 +3385,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 142 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3397,7 +3397,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3409,7 +3409,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3425,10 +3425,10 @@ namespace mg5amcCpu // *** DIAGRAM 143 OF 1240 *** // Wavefunction(s) for diagram number 143 - FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] ); + FFV1_2( w_fp[3], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[65] ); // Amplitude(s) for diagram number 143 - FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3441,7 +3441,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 144 - FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3456,7 +3456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 145 - FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3469,7 +3469,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 146 - FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3481,10 +3481,10 @@ namespace mg5amcCpu // *** DIAGRAM 147 OF 1240 *** // Wavefunction(s) for diagram number 147 - FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); + FFV1_1( w_fp[34], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 147 - FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3494,10 +3494,10 @@ namespace mg5amcCpu // *** DIAGRAM 148 OF 1240 *** // Wavefunction(s) for diagram number 148 - FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] ); + FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 148 - VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3512,7 +3512,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 149 - FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3525,7 +3525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 150 - FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3535,10 +3535,10 @@ namespace mg5amcCpu // *** DIAGRAM 151 OF 1240 *** // Wavefunction(s) for diagram number 151 - FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] ); + FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 151 - VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3553,7 +3553,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 152 - FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3566,7 +3566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 153 - FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3581,7 +3581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 154 - VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3600,7 +3600,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 155 - FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3612,11 +3612,11 @@ namespace mg5amcCpu // *** DIAGRAM 156 OF 1240 *** // Wavefunction(s) for diagram number 156 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 156 - VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3632,10 +3632,10 @@ namespace mg5amcCpu // *** DIAGRAM 157 OF 1240 *** // Wavefunction(s) for diagram number 157 - VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] ); + VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 0., 0., w_fp[70] ); // Amplitude(s) for diagram number 157 - VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3654,7 +3654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 158 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3666,7 +3666,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3678,7 +3678,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3694,10 +3694,10 @@ namespace mg5amcCpu // *** DIAGRAM 159 OF 1240 *** // Wavefunction(s) for diagram number 159 - FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 159 - FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3710,7 +3710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 160 - FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3725,7 +3725,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 161 - FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3738,7 +3738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 162 - FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3750,10 +3750,10 @@ namespace mg5amcCpu // *** DIAGRAM 163 OF 1240 *** // Wavefunction(s) for diagram number 163 - FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); + FFV1_1( w_fp[34], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 163 - FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3763,10 +3763,10 @@ namespace mg5amcCpu // *** DIAGRAM 164 OF 1240 *** // Wavefunction(s) for diagram number 164 - FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] ); + FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 0., 0., w_fp[73] ); // Amplitude(s) for diagram number 164 - VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3781,7 +3781,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 165 - FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3794,7 +3794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 166 - FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3807,7 +3807,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 167 - VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3822,7 +3822,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 168 - FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3835,7 +3835,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 169 - FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3850,7 +3850,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 170 - VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3869,7 +3869,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 171 - FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3881,11 +3881,11 @@ namespace mg5amcCpu // *** DIAGRAM 172 OF 1240 *** // Wavefunction(s) for diagram number 172 - VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] ); + VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 0., 0., w_fp[74] ); // Amplitude(s) for diagram number 172 - VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3901,10 +3901,10 @@ namespace mg5amcCpu // *** DIAGRAM 173 OF 1240 *** // Wavefunction(s) for diagram number 173 - VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] ); + VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 0., 0., w_fp[75] ); // Amplitude(s) for diagram number 173 - VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3923,7 +3923,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 174 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3935,7 +3935,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3947,7 +3947,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3963,10 +3963,10 @@ namespace mg5amcCpu // *** DIAGRAM 175 OF 1240 *** // Wavefunction(s) for diagram number 175 - FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] ); + FFV1_2( w_fp[3], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[76] ); // Amplitude(s) for diagram number 175 - FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3979,7 +3979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 176 - FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3994,7 +3994,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 177 - FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4007,7 +4007,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 178 - FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4019,10 +4019,10 @@ namespace mg5amcCpu // *** DIAGRAM 179 OF 1240 *** // Wavefunction(s) for diagram number 179 - FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 179 - FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4035,7 +4035,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 180 - VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4050,7 +4050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 181 - FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4063,7 +4063,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 182 - FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4076,7 +4076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 183 - VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4091,7 +4091,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 184 - FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4104,7 +4104,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 185 - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4119,7 +4119,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 186 - VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4138,7 +4138,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 187 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4150,10 +4150,10 @@ namespace mg5amcCpu // *** DIAGRAM 188 OF 1240 *** // Wavefunction(s) for diagram number 188 - FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 188 - FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4165,7 +4165,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 189 - FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4174,10 +4174,10 @@ namespace mg5amcCpu // *** DIAGRAM 190 OF 1240 *** // Wavefunction(s) for diagram number 190 - FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] ); + FFV1_2( w_fp[46], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[78] ); // Amplitude(s) for diagram number 190 - FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4189,7 +4189,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 191 - FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4201,7 +4201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 192 - FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4213,7 +4213,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 193 - FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4225,7 +4225,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 194 - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4238,7 +4238,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 195 - VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4253,7 +4253,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 196 - FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4266,7 +4266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 197 - FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4278,7 +4278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 198 - FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4287,10 +4287,10 @@ namespace mg5amcCpu // *** DIAGRAM 199 OF 1240 *** // Wavefunction(s) for diagram number 199 - FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); + FFV1_2( w_fp[38], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 199 - FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4302,7 +4302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 200 - FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4314,7 +4314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 201 - FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4326,7 +4326,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 202 - FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4338,7 +4338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 203 - FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4351,7 +4351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 204 - VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4366,7 +4366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 205 - FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4379,7 +4379,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 206 - FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4391,7 +4391,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 207 - FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4400,10 +4400,10 @@ namespace mg5amcCpu // *** DIAGRAM 208 OF 1240 *** // Wavefunction(s) for diagram number 208 - FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[41], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 208 - FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4415,7 +4415,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 209 - FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4427,7 +4427,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 210 - FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4439,7 +4439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 211 - FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4451,7 +4451,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 212 - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4464,7 +4464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 213 - VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4479,7 +4479,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 214 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4492,7 +4492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 215 - FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4505,7 +4505,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 216 - FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4517,10 +4517,10 @@ namespace mg5amcCpu // *** DIAGRAM 217 OF 1240 *** // Wavefunction(s) for diagram number 217 - VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 0., 0., w_fp[59] ); // Amplitude(s) for diagram number 217 - VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4539,7 +4539,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 218 - VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4558,7 +4558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 219 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4570,7 +4570,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4582,7 +4582,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4601,7 +4601,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 220 - FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4616,7 +4616,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 221 - FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4629,7 +4629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 222 - FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4642,7 +4642,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 223 - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4654,10 +4654,10 @@ namespace mg5amcCpu // *** DIAGRAM 224 OF 1240 *** // Wavefunction(s) for diagram number 224 - VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 224 - VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4676,7 +4676,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 225 - VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4695,7 +4695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 226 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4707,7 +4707,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4719,7 +4719,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4738,7 +4738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 227 - FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4753,7 +4753,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 228 - FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4766,7 +4766,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 229 - FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4779,7 +4779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 230 - FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4791,10 +4791,10 @@ namespace mg5amcCpu // *** DIAGRAM 231 OF 1240 *** // Wavefunction(s) for diagram number 231 - VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] ); + VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 231 - VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4813,7 +4813,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 232 - VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4832,7 +4832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 233 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4844,7 +4844,7 @@ namespace mg5amcCpu jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4856,7 +4856,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4875,7 +4875,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 234 - FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4890,7 +4890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 235 - FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4900,12 +4900,12 @@ namespace mg5amcCpu // *** DIAGRAM 236 OF 1240 *** // Wavefunction(s) for diagram number 236 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[73] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[79] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[80] ); // Amplitude(s) for diagram number 236 - VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4917,7 +4917,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4929,7 +4929,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4948,7 +4948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 237 - FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4956,7 +4956,7 @@ namespace mg5amcCpu jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4964,7 +4964,7 @@ namespace mg5amcCpu jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4979,7 +4979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 238 - FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4987,7 +4987,7 @@ namespace mg5amcCpu jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4995,7 +4995,7 @@ namespace mg5amcCpu jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5007,12 +5007,12 @@ namespace mg5amcCpu // *** DIAGRAM 239 OF 1240 *** // Wavefunction(s) for diagram number 239 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[57] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[81] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[82] ); // Amplitude(s) for diagram number 239 - VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5024,7 +5024,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5036,7 +5036,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5055,7 +5055,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 240 - FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5063,7 +5063,7 @@ namespace mg5amcCpu jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5071,7 +5071,7 @@ namespace mg5amcCpu jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5086,7 +5086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 241 - FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5094,7 +5094,7 @@ namespace mg5amcCpu jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5102,7 +5102,7 @@ namespace mg5amcCpu jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5114,12 +5114,12 @@ namespace mg5amcCpu // *** DIAGRAM 242 OF 1240 *** // Wavefunction(s) for diagram number 242 - VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] ); - VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] ); - VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] ); + VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[55] ); + VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[83] ); + VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[84] ); // Amplitude(s) for diagram number 242 - VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5131,7 +5131,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5143,7 +5143,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5162,7 +5162,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 243 - FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5170,7 +5170,7 @@ namespace mg5amcCpu jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5178,7 +5178,7 @@ namespace mg5amcCpu jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5193,7 +5193,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 244 - FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5201,7 +5201,7 @@ namespace mg5amcCpu jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5209,7 +5209,7 @@ namespace mg5amcCpu jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5224,7 +5224,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 245 - FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5232,7 +5232,7 @@ namespace mg5amcCpu jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5240,7 +5240,7 @@ namespace mg5amcCpu jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5255,7 +5255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 246 - VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5267,7 +5267,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5279,7 +5279,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5295,13 +5295,13 @@ namespace mg5amcCpu // *** DIAGRAM 247 OF 1240 *** // Wavefunction(s) for diagram number 247 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); - FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_2( w_fp[62], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[77], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 247 - FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5310,10 +5310,10 @@ namespace mg5amcCpu // *** DIAGRAM 248 OF 1240 *** // Wavefunction(s) for diagram number 248 - FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] ); + FFV1_1( w_fp[77], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[85] ); // Amplitude(s) for diagram number 248 - FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5322,11 +5322,11 @@ namespace mg5amcCpu // *** DIAGRAM 249 OF 1240 *** // Wavefunction(s) for diagram number 249 - FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); - FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] ); + FFV1_2( w_fp[62], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); + FFV1_1( w_fp[77], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[87] ); // Amplitude(s) for diagram number 249 - FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5338,7 +5338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 250 - FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5347,10 +5347,10 @@ namespace mg5amcCpu // *** DIAGRAM 251 OF 1240 *** // Wavefunction(s) for diagram number 251 - FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[62], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 251 - FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5362,7 +5362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 252 - FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5371,10 +5371,10 @@ namespace mg5amcCpu // *** DIAGRAM 253 OF 1240 *** // Wavefunction(s) for diagram number 253 - FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] ); + FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 0., 0., w_fp[89] ); // Amplitude(s) for diagram number 253 - VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5386,10 +5386,10 @@ namespace mg5amcCpu // *** DIAGRAM 254 OF 1240 *** // Wavefunction(s) for diagram number 254 - FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[62], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 254 - FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5402,7 +5402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 255 - FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5415,7 +5415,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 256 - VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5427,10 +5427,10 @@ namespace mg5amcCpu // *** DIAGRAM 257 OF 1240 *** // Wavefunction(s) for diagram number 257 - FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); + FFV1_2( w_fp[62], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 257 - FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5443,7 +5443,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 258 - FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5456,7 +5456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 259 - VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5471,7 +5471,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 260 - FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5481,10 +5481,10 @@ namespace mg5amcCpu // *** DIAGRAM 261 OF 1240 *** // Wavefunction(s) for diagram number 261 - FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); + FFV1_2( w_fp[62], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 261 - FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5497,7 +5497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 262 - FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5505,7 +5505,7 @@ namespace mg5amcCpu jamp_sv[35] -= amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5513,7 +5513,7 @@ namespace mg5amcCpu jamp_sv[39] += amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[45] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5525,10 +5525,10 @@ namespace mg5amcCpu // *** DIAGRAM 263 OF 1240 *** // Wavefunction(s) for diagram number 263 - FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] ); + FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 0., 0., w_fp[92] ); // Amplitude(s) for diagram number 263 - VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5547,7 +5547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 264 - VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5566,7 +5566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 265 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5578,7 +5578,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5590,7 +5590,7 @@ namespace mg5amcCpu jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5606,10 +5606,10 @@ namespace mg5amcCpu // *** DIAGRAM 266 OF 1240 *** // Wavefunction(s) for diagram number 266 - FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] ); + FFV1_1( w_fp[2], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[93] ); // Amplitude(s) for diagram number 266 - FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5622,7 +5622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 267 - FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5637,7 +5637,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 268 - FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5650,7 +5650,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 269 - FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5662,10 +5662,10 @@ namespace mg5amcCpu // *** DIAGRAM 270 OF 1240 *** // Wavefunction(s) for diagram number 270 - FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); + FFV1_2( w_fp[62], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 270 - FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5675,10 +5675,10 @@ namespace mg5amcCpu // *** DIAGRAM 271 OF 1240 *** // Wavefunction(s) for diagram number 271 - FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] ); + FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 271 - VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5693,7 +5693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 272 - FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5706,7 +5706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 273 - FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5716,10 +5716,10 @@ namespace mg5amcCpu // *** DIAGRAM 274 OF 1240 *** // Wavefunction(s) for diagram number 274 - FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] ); + FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 274 - VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5734,7 +5734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 275 - FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5747,7 +5747,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 276 - FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5762,7 +5762,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 277 - VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5781,7 +5781,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 278 - FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5796,7 +5796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 279 - VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5815,7 +5815,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 280 - VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5834,7 +5834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 281 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5846,7 +5846,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5858,7 +5858,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5874,10 +5874,10 @@ namespace mg5amcCpu // *** DIAGRAM 282 OF 1240 *** // Wavefunction(s) for diagram number 282 - FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); + FFV1_1( w_fp[2], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 282 - FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5890,7 +5890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 283 - FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5905,7 +5905,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 284 - FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5918,7 +5918,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 285 - FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5930,10 +5930,10 @@ namespace mg5amcCpu // *** DIAGRAM 286 OF 1240 *** // Wavefunction(s) for diagram number 286 - FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); + FFV1_2( w_fp[62], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 286 - FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5943,10 +5943,10 @@ namespace mg5amcCpu // *** DIAGRAM 287 OF 1240 *** // Wavefunction(s) for diagram number 287 - FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] ); + FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 287 - VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5961,7 +5961,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 288 - FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5974,7 +5974,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 289 - FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5987,7 +5987,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 290 - VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6002,7 +6002,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 291 - FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6015,7 +6015,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 292 - FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6030,7 +6030,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 293 - VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6049,7 +6049,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 294 - FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6064,7 +6064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 295 - VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6083,7 +6083,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 296 - VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6102,7 +6102,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 297 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6114,7 +6114,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6126,7 +6126,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6142,10 +6142,10 @@ namespace mg5amcCpu // *** DIAGRAM 298 OF 1240 *** // Wavefunction(s) for diagram number 298 - FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); + FFV1_1( w_fp[2], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 298 - FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6158,7 +6158,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 299 - FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6173,7 +6173,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 300 - FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6186,7 +6186,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 301 - FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6198,10 +6198,10 @@ namespace mg5amcCpu // *** DIAGRAM 302 OF 1240 *** // Wavefunction(s) for diagram number 302 - FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 302 - FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6214,7 +6214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 303 - VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6229,7 +6229,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 304 - FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6242,7 +6242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 305 - FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6255,7 +6255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 306 - VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6270,7 +6270,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 307 - FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6283,7 +6283,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 308 - FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6298,7 +6298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 309 - VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6317,7 +6317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 310 - FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6329,10 +6329,10 @@ namespace mg5amcCpu // *** DIAGRAM 311 OF 1240 *** // Wavefunction(s) for diagram number 311 - FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 311 - FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6344,7 +6344,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 312 - FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6353,10 +6353,10 @@ namespace mg5amcCpu // *** DIAGRAM 313 OF 1240 *** // Wavefunction(s) for diagram number 313 - FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] ); + FFV1_1( w_fp[33], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[100] ); // Amplitude(s) for diagram number 313 - FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6368,7 +6368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 314 - FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6380,7 +6380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 315 - FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6392,7 +6392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 316 - FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6404,7 +6404,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 317 - FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6417,7 +6417,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 318 - VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6432,7 +6432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 319 - FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6445,7 +6445,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 320 - FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6457,7 +6457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 321 - FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6466,10 +6466,10 @@ namespace mg5amcCpu // *** DIAGRAM 322 OF 1240 *** // Wavefunction(s) for diagram number 322 - FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); + FFV1_1( w_fp[39], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 322 - FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6481,7 +6481,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 323 - FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6493,7 +6493,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 324 - FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6505,7 +6505,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 325 - FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6517,7 +6517,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 326 - FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6530,7 +6530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 327 - VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6545,7 +6545,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 328 - FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6558,7 +6558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 329 - FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6570,7 +6570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 330 - FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6579,10 +6579,10 @@ namespace mg5amcCpu // *** DIAGRAM 331 OF 1240 *** // Wavefunction(s) for diagram number 331 - FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); + FFV1_1( w_fp[47], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 331 - FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6594,7 +6594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 332 - FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6606,7 +6606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 333 - FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6618,7 +6618,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 334 - FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6630,7 +6630,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 335 - FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6643,7 +6643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 336 - VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6658,7 +6658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 337 - FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6671,7 +6671,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 338 - FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6684,7 +6684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 339 - FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6699,7 +6699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 340 - VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6718,7 +6718,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 341 - VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6737,7 +6737,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 342 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6749,7 +6749,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6761,7 +6761,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6780,7 +6780,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 343 - FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6795,7 +6795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 344 - FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6808,7 +6808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 345 - FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6821,7 +6821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 346 - FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6836,7 +6836,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 347 - VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6855,7 +6855,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 348 - VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6874,7 +6874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 349 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6886,7 +6886,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6898,7 +6898,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6917,7 +6917,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 350 - FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6932,7 +6932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 351 - FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6945,7 +6945,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 352 - FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6958,7 +6958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 353 - FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6973,7 +6973,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 354 - VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6992,7 +6992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 355 - VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7011,7 +7011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 356 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7023,7 +7023,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7035,7 +7035,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7054,7 +7054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 357 - FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7069,7 +7069,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 358 - FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7082,7 +7082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 359 - VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7094,7 +7094,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7106,7 +7106,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7125,7 +7125,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 360 - FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7133,7 +7133,7 @@ namespace mg5amcCpu jamp_sv[39] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7141,7 +7141,7 @@ namespace mg5amcCpu jamp_sv[57] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[81] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7156,7 +7156,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 361 - FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7164,7 +7164,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7172,7 +7172,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7187,7 +7187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 362 - VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7199,7 +7199,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7211,7 +7211,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7230,7 +7230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 363 - FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7238,7 +7238,7 @@ namespace mg5amcCpu jamp_sv[45] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7246,7 +7246,7 @@ namespace mg5amcCpu jamp_sv[59] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7261,7 +7261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 364 - FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7269,7 +7269,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7277,7 +7277,7 @@ namespace mg5amcCpu jamp_sv[87] += amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7292,7 +7292,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 365 - VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7304,7 +7304,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7316,7 +7316,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7335,7 +7335,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 366 - FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7343,7 +7343,7 @@ namespace mg5amcCpu jamp_sv[47] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7351,7 +7351,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7366,7 +7366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 367 - FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7374,7 +7374,7 @@ namespace mg5amcCpu jamp_sv[59] -= amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7382,7 +7382,7 @@ namespace mg5amcCpu jamp_sv[63] += amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[69] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7397,7 +7397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 368 - FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7405,7 +7405,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7413,7 +7413,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7428,7 +7428,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 369 - VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7440,7 +7440,7 @@ namespace mg5amcCpu jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7452,7 +7452,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7468,11 +7468,11 @@ namespace mg5amcCpu // *** DIAGRAM 370 OF 1240 *** // Wavefunction(s) for diagram number 370 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 370 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7485,7 +7485,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 371 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7495,11 +7495,11 @@ namespace mg5amcCpu // *** DIAGRAM 372 OF 1240 *** // Wavefunction(s) for diagram number 372 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] ); - FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[62] ); + FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 0., 0., w_fp[34] ); // Amplitude(s) for diagram number 372 - VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7518,7 +7518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 373 - FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7530,10 +7530,10 @@ namespace mg5amcCpu // *** DIAGRAM 374 OF 1240 *** // Wavefunction(s) for diagram number 374 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 374 - VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7552,7 +7552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 375 - FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7564,12 +7564,12 @@ namespace mg5amcCpu // *** DIAGRAM 376 OF 1240 *** // Wavefunction(s) for diagram number 376 - VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); - VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); + VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); + VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 376 - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7581,7 +7581,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7593,7 +7593,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7609,10 +7609,10 @@ namespace mg5amcCpu // *** DIAGRAM 377 OF 1240 *** // Wavefunction(s) for diagram number 377 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[95] ); // Amplitude(s) for diagram number 377 - FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7622,10 +7622,10 @@ namespace mg5amcCpu // *** DIAGRAM 378 OF 1240 *** // Wavefunction(s) for diagram number 378 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 378 - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7638,7 +7638,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 379 - FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7653,7 +7653,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 380 - FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7663,10 +7663,10 @@ namespace mg5amcCpu // *** DIAGRAM 381 OF 1240 *** // Wavefunction(s) for diagram number 381 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[101] ); // Amplitude(s) for diagram number 381 - FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7679,7 +7679,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 382 - FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7694,7 +7694,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 383 - FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7709,7 +7709,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 384 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7721,10 +7721,10 @@ namespace mg5amcCpu // *** DIAGRAM 385 OF 1240 *** // Wavefunction(s) for diagram number 385 - VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] ); + VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 385 - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7740,10 +7740,10 @@ namespace mg5amcCpu // *** DIAGRAM 386 OF 1240 *** // Wavefunction(s) for diagram number 386 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 386 - FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7756,7 +7756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 387 - FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7766,10 +7766,10 @@ namespace mg5amcCpu // *** DIAGRAM 388 OF 1240 *** // Wavefunction(s) for diagram number 388 - FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] ); + FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 0., 0., w_fp[103] ); // Amplitude(s) for diagram number 388 - VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7788,7 +7788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 389 - FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7803,7 +7803,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 390 - VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7822,7 +7822,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 391 - FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7837,7 +7837,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 392 - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7849,7 +7849,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7861,7 +7861,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7877,10 +7877,10 @@ namespace mg5amcCpu // *** DIAGRAM 393 OF 1240 *** // Wavefunction(s) for diagram number 393 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 393 - FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7890,10 +7890,10 @@ namespace mg5amcCpu // *** DIAGRAM 394 OF 1240 *** // Wavefunction(s) for diagram number 394 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[105] ); // Amplitude(s) for diagram number 394 - FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7906,7 +7906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 395 - FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7921,7 +7921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 396 - FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7931,10 +7931,10 @@ namespace mg5amcCpu // *** DIAGRAM 397 OF 1240 *** // Wavefunction(s) for diagram number 397 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 397 - FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7947,7 +7947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 398 - FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7962,7 +7962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 399 - FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7977,7 +7977,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 400 - FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7992,7 +7992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 401 - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8011,7 +8011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 402 - FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8026,7 +8026,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 403 - FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8045,7 +8045,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 404 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8060,7 +8060,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 405 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8079,7 +8079,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 406 - FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8098,7 +8098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 407 - FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8117,7 +8117,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 408 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8137,7 +8137,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8157,7 +8157,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8181,10 +8181,10 @@ namespace mg5amcCpu // *** DIAGRAM 409 OF 1240 *** // Wavefunction(s) for diagram number 409 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 409 - VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8208,10 +8208,10 @@ namespace mg5amcCpu // *** DIAGRAM 410 OF 1240 *** // Wavefunction(s) for diagram number 410 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 410 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8238,7 +8238,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 411 - VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8265,7 +8265,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 412 - FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8284,7 +8284,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 413 - FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8299,7 +8299,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 414 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8314,7 +8314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 415 - FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8333,7 +8333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 416 - FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8348,7 +8348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 417 - FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8363,7 +8363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 418 - FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8378,7 +8378,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 419 - FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8397,7 +8397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 420 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8412,7 +8412,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 421 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8431,7 +8431,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 422 - FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8450,7 +8450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 423 - FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8469,7 +8469,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 424 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8489,7 +8489,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8509,7 +8509,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8533,10 +8533,10 @@ namespace mg5amcCpu // *** DIAGRAM 425 OF 1240 *** // Wavefunction(s) for diagram number 425 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 425 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8563,7 +8563,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 426 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8590,7 +8590,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 427 - VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8617,7 +8617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 428 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8636,7 +8636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 429 - FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8651,7 +8651,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 430 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8666,7 +8666,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 431 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8685,7 +8685,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 432 - FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8700,7 +8700,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 433 - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8712,10 +8712,10 @@ namespace mg5amcCpu // *** DIAGRAM 434 OF 1240 *** // Wavefunction(s) for diagram number 434 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 434 - VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8742,7 +8742,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 435 - VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8769,7 +8769,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 436 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8789,7 +8789,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8809,7 +8809,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8833,10 +8833,10 @@ namespace mg5amcCpu // *** DIAGRAM 437 OF 1240 *** // Wavefunction(s) for diagram number 437 - VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] ); + VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 0., 0., w_fp[108] ); // Amplitude(s) for diagram number 437 - VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8863,7 +8863,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 438 - VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8890,7 +8890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 439 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8910,7 +8910,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[115] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8930,7 +8930,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8957,7 +8957,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 440 - VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8984,7 +8984,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 441 - VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9011,7 +9011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 442 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9031,7 +9031,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9051,7 +9051,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9075,12 +9075,12 @@ namespace mg5amcCpu // *** DIAGRAM 443 OF 1240 *** // Wavefunction(s) for diagram number 443 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 443 - VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9100,7 +9100,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9120,7 +9120,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9144,12 +9144,12 @@ namespace mg5amcCpu // *** DIAGRAM 444 OF 1240 *** // Wavefunction(s) for diagram number 444 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[113] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[114] ); // Amplitude(s) for diagram number 444 - VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9169,7 +9169,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9189,7 +9189,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9216,7 +9216,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 445 - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9236,7 +9236,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9256,7 +9256,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9283,7 +9283,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 446 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9303,7 +9303,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9323,7 +9323,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9350,7 +9350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 447 - VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9377,7 +9377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 448 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9404,7 +9404,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 449 - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9431,7 +9431,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 450 - VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9450,7 +9450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 451 - FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9465,7 +9465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 452 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9478,7 +9478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 453 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9491,7 +9491,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 454 - FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9506,7 +9506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 455 - VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9525,7 +9525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 456 - FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9537,7 +9537,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9549,7 +9549,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9568,7 +9568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 457 - FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9583,7 +9583,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 458 - FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9596,7 +9596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 459 - FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9609,7 +9609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 460 - VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9628,7 +9628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 461 - FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9643,7 +9643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 462 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9656,7 +9656,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 463 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9669,7 +9669,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 464 - FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9684,7 +9684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 465 - VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9703,7 +9703,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 466 - FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9715,7 +9715,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9727,7 +9727,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9746,7 +9746,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 467 - FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9761,7 +9761,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 468 - FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9774,7 +9774,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 469 - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9787,7 +9787,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 470 - VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9806,7 +9806,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 471 - FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9821,7 +9821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 472 - FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9834,7 +9834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 473 - FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9847,7 +9847,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 474 - FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9862,7 +9862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 475 - VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9881,7 +9881,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 476 - FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9893,7 +9893,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9905,7 +9905,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9924,7 +9924,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 477 - VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9943,7 +9943,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 478 - FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9958,7 +9958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 479 - FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9971,7 +9971,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 480 - FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9984,7 +9984,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 481 - FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9999,7 +9999,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 482 - VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10018,7 +10018,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 483 - FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10030,7 +10030,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10042,7 +10042,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10061,7 +10061,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 484 - FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10080,7 +10080,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 485 - FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10099,7 +10099,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 486 - FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10118,7 +10118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 487 - FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10133,7 +10133,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 488 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10152,7 +10152,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 489 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10167,7 +10167,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 490 - FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10179,7 +10179,7 @@ namespace mg5amcCpu jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10191,7 +10191,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10210,7 +10210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 491 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10222,7 +10222,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10234,7 +10234,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10253,7 +10253,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 492 - VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10273,7 +10273,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10293,7 +10293,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10317,11 +10317,11 @@ namespace mg5amcCpu // *** DIAGRAM 493 OF 1240 *** // Wavefunction(s) for diagram number 493 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 493 - FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10334,7 +10334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 494 - FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10344,10 +10344,10 @@ namespace mg5amcCpu // *** DIAGRAM 495 OF 1240 *** // Wavefunction(s) for diagram number 495 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 495 - VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10366,7 +10366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 496 - FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10378,10 +10378,10 @@ namespace mg5amcCpu // *** DIAGRAM 497 OF 1240 *** // Wavefunction(s) for diagram number 497 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 497 - VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10400,7 +10400,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 498 - FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10412,12 +10412,12 @@ namespace mg5amcCpu // *** DIAGRAM 499 OF 1240 *** // Wavefunction(s) for diagram number 499 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 499 - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10429,7 +10429,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10441,7 +10441,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10457,10 +10457,10 @@ namespace mg5amcCpu // *** DIAGRAM 500 OF 1240 *** // Wavefunction(s) for diagram number 500 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 500 - FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10470,10 +10470,10 @@ namespace mg5amcCpu // *** DIAGRAM 501 OF 1240 *** // Wavefunction(s) for diagram number 501 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 501 - FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10486,7 +10486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 502 - FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10501,7 +10501,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 503 - FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10511,10 +10511,10 @@ namespace mg5amcCpu // *** DIAGRAM 504 OF 1240 *** // Wavefunction(s) for diagram number 504 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 504 - FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10527,7 +10527,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 505 - FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10542,7 +10542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 506 - FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10557,7 +10557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 507 - FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10569,10 +10569,10 @@ namespace mg5amcCpu // *** DIAGRAM 508 OF 1240 *** // Wavefunction(s) for diagram number 508 - VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 0., 0., w_fp[62] ); // Amplitude(s) for diagram number 508 - FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10588,10 +10588,10 @@ namespace mg5amcCpu // *** DIAGRAM 509 OF 1240 *** // Wavefunction(s) for diagram number 509 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[112] ); // Amplitude(s) for diagram number 509 - FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10604,7 +10604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 510 - FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10617,7 +10617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 511 - VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10636,7 +10636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 512 - FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10651,7 +10651,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 513 - VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10670,7 +10670,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 514 - FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10685,7 +10685,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 515 - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10697,7 +10697,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10709,7 +10709,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10725,10 +10725,10 @@ namespace mg5amcCpu // *** DIAGRAM 516 OF 1240 *** // Wavefunction(s) for diagram number 516 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); // Amplitude(s) for diagram number 516 - FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10738,10 +10738,10 @@ namespace mg5amcCpu // *** DIAGRAM 517 OF 1240 *** // Wavefunction(s) for diagram number 517 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 517 - FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10754,7 +10754,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 518 - FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10769,7 +10769,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 519 - FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10779,10 +10779,10 @@ namespace mg5amcCpu // *** DIAGRAM 520 OF 1240 *** // Wavefunction(s) for diagram number 520 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 520 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10795,7 +10795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 521 - FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10810,7 +10810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 522 - FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10825,7 +10825,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 523 - FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10840,7 +10840,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 524 - FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10859,7 +10859,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 525 - FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10874,7 +10874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 526 - FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10893,7 +10893,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 527 - FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10908,7 +10908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 528 - FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10927,7 +10927,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 529 - FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10946,7 +10946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 530 - FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10965,7 +10965,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 531 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10985,7 +10985,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11005,7 +11005,7 @@ namespace mg5amcCpu jamp_sv[105] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11029,10 +11029,10 @@ namespace mg5amcCpu // *** DIAGRAM 532 OF 1240 *** // Wavefunction(s) for diagram number 532 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 532 - VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11056,10 +11056,10 @@ namespace mg5amcCpu // *** DIAGRAM 533 OF 1240 *** // Wavefunction(s) for diagram number 533 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 533 - VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11086,7 +11086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 534 - VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11113,7 +11113,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 535 - FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11132,7 +11132,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 536 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11147,7 +11147,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 537 - FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11162,7 +11162,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 538 - FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11181,7 +11181,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 539 - FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11196,7 +11196,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 540 - FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11211,7 +11211,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 541 - FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11226,7 +11226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 542 - FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11245,7 +11245,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 543 - FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11260,7 +11260,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 544 - FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11279,7 +11279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 545 - FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11298,7 +11298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 546 - FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11317,7 +11317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 547 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11337,7 +11337,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11357,7 +11357,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11381,10 +11381,10 @@ namespace mg5amcCpu // *** DIAGRAM 548 OF 1240 *** // Wavefunction(s) for diagram number 548 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 548 - VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11411,7 +11411,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 549 - VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11438,7 +11438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 550 - VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11465,7 +11465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 551 - FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11484,7 +11484,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 552 - FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11499,7 +11499,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 553 - FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11514,7 +11514,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 554 - FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11533,7 +11533,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 555 - FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11548,7 +11548,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 556 - FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11560,10 +11560,10 @@ namespace mg5amcCpu // *** DIAGRAM 557 OF 1240 *** // Wavefunction(s) for diagram number 557 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 557 - VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11590,7 +11590,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 558 - VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11617,7 +11617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 559 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11637,7 +11637,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11657,7 +11657,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11684,7 +11684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 560 - VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11711,7 +11711,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 561 - VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11738,7 +11738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 562 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11758,7 +11758,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11778,7 +11778,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11805,7 +11805,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 563 - VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11832,7 +11832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 564 - VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11859,7 +11859,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 565 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11879,7 +11879,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11899,7 +11899,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11923,12 +11923,12 @@ namespace mg5amcCpu // *** DIAGRAM 566 OF 1240 *** // Wavefunction(s) for diagram number 566 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 566 - VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11948,7 +11948,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11968,7 +11968,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11992,12 +11992,12 @@ namespace mg5amcCpu // *** DIAGRAM 567 OF 1240 *** // Wavefunction(s) for diagram number 567 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); // Amplitude(s) for diagram number 567 - VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12017,7 +12017,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12037,7 +12037,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12064,7 +12064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 568 - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12084,7 +12084,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12104,7 +12104,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12131,7 +12131,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 569 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12151,7 +12151,7 @@ namespace mg5amcCpu jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12171,7 +12171,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12198,7 +12198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 570 - VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12225,7 +12225,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 571 - VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12252,7 +12252,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 572 - VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12279,7 +12279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 573 - VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12298,7 +12298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 574 - FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12313,7 +12313,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 575 - FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12326,7 +12326,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 576 - FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12339,7 +12339,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 577 - FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12354,7 +12354,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 578 - VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12373,7 +12373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 579 - FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12385,7 +12385,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12397,7 +12397,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12416,7 +12416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 580 - FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12431,7 +12431,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 581 - FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12444,7 +12444,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 582 - FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12457,7 +12457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 583 - VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12476,7 +12476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 584 - FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12491,7 +12491,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 585 - FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12504,7 +12504,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 586 - FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12517,7 +12517,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 587 - FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12532,7 +12532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 588 - VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12551,7 +12551,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 589 - FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12563,7 +12563,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12575,7 +12575,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12594,7 +12594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 590 - FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12609,7 +12609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 591 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12622,7 +12622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 592 - FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12635,7 +12635,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 593 - VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12654,7 +12654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 594 - FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12669,7 +12669,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 595 - FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12682,7 +12682,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 596 - FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12695,7 +12695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 597 - FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12710,7 +12710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 598 - VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12729,7 +12729,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 599 - FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12741,7 +12741,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12753,7 +12753,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12772,7 +12772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 600 - VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12791,7 +12791,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 601 - FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12806,7 +12806,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 602 - FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12819,7 +12819,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 603 - FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12832,7 +12832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 604 - FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12847,7 +12847,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 605 - VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12866,7 +12866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 606 - FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12878,7 +12878,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12890,7 +12890,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12909,7 +12909,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 607 - FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12928,7 +12928,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 608 - FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12947,7 +12947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 609 - FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12966,7 +12966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 610 - FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12981,7 +12981,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 611 - FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13000,7 +13000,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 612 - FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13015,7 +13015,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 613 - FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13027,7 +13027,7 @@ namespace mg5amcCpu jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13039,7 +13039,7 @@ namespace mg5amcCpu jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13058,7 +13058,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 614 - FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13070,7 +13070,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13082,7 +13082,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13101,7 +13101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 615 - VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13121,7 +13121,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13141,7 +13141,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13165,11 +13165,11 @@ namespace mg5amcCpu // *** DIAGRAM 616 OF 1240 *** // Wavefunction(s) for diagram number 616 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 616 - FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13182,7 +13182,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 617 - FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13192,10 +13192,10 @@ namespace mg5amcCpu // *** DIAGRAM 618 OF 1240 *** // Wavefunction(s) for diagram number 618 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[112] ); // Amplitude(s) for diagram number 618 - VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13214,7 +13214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 619 - FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13226,10 +13226,10 @@ namespace mg5amcCpu // *** DIAGRAM 620 OF 1240 *** // Wavefunction(s) for diagram number 620 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 620 - VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13248,7 +13248,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 621 - FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13260,12 +13260,12 @@ namespace mg5amcCpu // *** DIAGRAM 622 OF 1240 *** // Wavefunction(s) for diagram number 622 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 622 - FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13277,7 +13277,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13289,7 +13289,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13305,10 +13305,10 @@ namespace mg5amcCpu // *** DIAGRAM 623 OF 1240 *** // Wavefunction(s) for diagram number 623 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 623 - FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13318,10 +13318,10 @@ namespace mg5amcCpu // *** DIAGRAM 624 OF 1240 *** // Wavefunction(s) for diagram number 624 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 624 - FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13334,7 +13334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 625 - FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13349,7 +13349,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 626 - FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13359,10 +13359,10 @@ namespace mg5amcCpu // *** DIAGRAM 627 OF 1240 *** // Wavefunction(s) for diagram number 627 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 627 - FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13375,7 +13375,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 628 - FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13390,7 +13390,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 629 - FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13405,7 +13405,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 630 - FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13417,10 +13417,10 @@ namespace mg5amcCpu // *** DIAGRAM 631 OF 1240 *** // Wavefunction(s) for diagram number 631 - VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 631 - FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13436,10 +13436,10 @@ namespace mg5amcCpu // *** DIAGRAM 632 OF 1240 *** // Wavefunction(s) for diagram number 632 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[96] ); // Amplitude(s) for diagram number 632 - FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13452,7 +13452,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 633 - FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13465,7 +13465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 634 - VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13484,7 +13484,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 635 - FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13499,7 +13499,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 636 - VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13518,7 +13518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 637 - FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13533,7 +13533,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 638 - FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13545,7 +13545,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13557,7 +13557,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13573,10 +13573,10 @@ namespace mg5amcCpu // *** DIAGRAM 639 OF 1240 *** // Wavefunction(s) for diagram number 639 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 639 - FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13586,10 +13586,10 @@ namespace mg5amcCpu // *** DIAGRAM 640 OF 1240 *** // Wavefunction(s) for diagram number 640 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 640 - FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13602,7 +13602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 641 - FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13617,7 +13617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 642 - FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13627,10 +13627,10 @@ namespace mg5amcCpu // *** DIAGRAM 643 OF 1240 *** // Wavefunction(s) for diagram number 643 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 643 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13643,7 +13643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 644 - FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13658,7 +13658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 645 - FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13673,7 +13673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 646 - FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13688,7 +13688,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 647 - FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13707,7 +13707,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 648 - FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13722,7 +13722,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 649 - FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13741,7 +13741,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 650 - FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13756,7 +13756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 651 - FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13775,7 +13775,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 652 - FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13794,7 +13794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 653 - FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13813,7 +13813,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 654 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13833,7 +13833,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13853,7 +13853,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13877,10 +13877,10 @@ namespace mg5amcCpu // *** DIAGRAM 655 OF 1240 *** // Wavefunction(s) for diagram number 655 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 655 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13904,10 +13904,10 @@ namespace mg5amcCpu // *** DIAGRAM 656 OF 1240 *** // Wavefunction(s) for diagram number 656 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[113] ); // Amplitude(s) for diagram number 656 - VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13934,7 +13934,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 657 - VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13961,7 +13961,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 658 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13980,7 +13980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 659 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13995,7 +13995,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 660 - FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14010,7 +14010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 661 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14029,7 +14029,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 662 - FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14044,7 +14044,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 663 - FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14059,7 +14059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 664 - FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14074,7 +14074,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 665 - FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14093,7 +14093,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 666 - FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14108,7 +14108,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 667 - FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14127,7 +14127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 668 - FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14146,7 +14146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 669 - FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14165,7 +14165,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 670 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14185,7 +14185,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14205,7 +14205,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14229,10 +14229,10 @@ namespace mg5amcCpu // *** DIAGRAM 671 OF 1240 *** // Wavefunction(s) for diagram number 671 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 671 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14259,7 +14259,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 672 - VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14286,7 +14286,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 673 - VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14313,7 +14313,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 674 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14332,7 +14332,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 675 - FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14347,7 +14347,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 676 - FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14362,7 +14362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 677 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14381,7 +14381,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 678 - FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14396,7 +14396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 679 - FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14408,10 +14408,10 @@ namespace mg5amcCpu // *** DIAGRAM 680 OF 1240 *** // Wavefunction(s) for diagram number 680 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 680 - VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14438,7 +14438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 681 - VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14465,7 +14465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 682 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14485,7 +14485,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14505,7 +14505,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14532,7 +14532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 683 - VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14559,7 +14559,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 684 - VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14586,7 +14586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 685 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14606,7 +14606,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14626,7 +14626,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14653,7 +14653,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 686 - VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14680,7 +14680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 687 - VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14707,7 +14707,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 688 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14727,7 +14727,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14747,7 +14747,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14771,12 +14771,12 @@ namespace mg5amcCpu // *** DIAGRAM 689 OF 1240 *** // Wavefunction(s) for diagram number 689 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[62] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 689 - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14796,7 +14796,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14816,7 +14816,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14840,12 +14840,12 @@ namespace mg5amcCpu // *** DIAGRAM 690 OF 1240 *** // Wavefunction(s) for diagram number 690 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 690 - VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14865,7 +14865,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14885,7 +14885,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14912,7 +14912,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 691 - VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14932,7 +14932,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14952,7 +14952,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14979,7 +14979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 692 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14999,7 +14999,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15019,7 +15019,7 @@ namespace mg5amcCpu jamp_sv[97] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15046,7 +15046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 693 - VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15073,7 +15073,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 694 - VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15100,7 +15100,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 695 - VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15127,7 +15127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 696 - VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15146,7 +15146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 697 - FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15161,7 +15161,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 698 - FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15174,7 +15174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 699 - FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15187,7 +15187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 700 - FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15202,7 +15202,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 701 - VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15221,7 +15221,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 702 - FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15233,7 +15233,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15245,7 +15245,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15264,7 +15264,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 703 - FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15279,7 +15279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 704 - FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15292,7 +15292,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 705 - FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15305,7 +15305,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 706 - VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15324,7 +15324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 707 - FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15339,7 +15339,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 708 - FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15352,7 +15352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 709 - FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15365,7 +15365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 710 - FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15380,7 +15380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 711 - VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15399,7 +15399,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 712 - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15411,7 +15411,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15423,7 +15423,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15442,7 +15442,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 713 - FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15457,7 +15457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 714 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15470,7 +15470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 715 - FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15483,7 +15483,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 716 - VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15502,7 +15502,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 717 - FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15517,7 +15517,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 718 - FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15530,7 +15530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 719 - FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15543,7 +15543,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 720 - FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15558,7 +15558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 721 - VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15577,7 +15577,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 722 - FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15589,7 +15589,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15601,7 +15601,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15620,7 +15620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 723 - VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15639,7 +15639,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 724 - FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15654,7 +15654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 725 - FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15667,7 +15667,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 726 - FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15680,7 +15680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 727 - FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15695,7 +15695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 728 - VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15714,7 +15714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 729 - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15726,7 +15726,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15738,7 +15738,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15757,7 +15757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 730 - FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15776,7 +15776,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 731 - FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15795,7 +15795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 732 - FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15814,7 +15814,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 733 - FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15829,7 +15829,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 734 - FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15848,7 +15848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 735 - FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15863,7 +15863,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 736 - FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15875,7 +15875,7 @@ namespace mg5amcCpu jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15887,7 +15887,7 @@ namespace mg5amcCpu jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15906,7 +15906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 737 - FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15918,7 +15918,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15930,7 +15930,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15949,7 +15949,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 738 - VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15969,7 +15969,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15989,7 +15989,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16013,10 +16013,10 @@ namespace mg5amcCpu // *** DIAGRAM 739 OF 1240 *** // Wavefunction(s) for diagram number 739 - FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] ); + FFV1_1( w_fp[77], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[92] ); // Amplitude(s) for diagram number 739 - FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16028,7 +16028,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 740 - FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16037,10 +16037,10 @@ namespace mg5amcCpu // *** DIAGRAM 741 OF 1240 *** // Wavefunction(s) for diagram number 741 - FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[46], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 741 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16052,7 +16052,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 742 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16064,7 +16064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 743 - FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16076,7 +16076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 744 - FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16088,7 +16088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 745 - FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16101,7 +16101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 746 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16111,10 +16111,10 @@ namespace mg5amcCpu // *** DIAGRAM 747 OF 1240 *** // Wavefunction(s) for diagram number 747 - VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] ); + VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 747 - FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16129,7 +16129,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 748 - FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16141,7 +16141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 749 - FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16150,10 +16150,10 @@ namespace mg5amcCpu // *** DIAGRAM 750 OF 1240 *** // Wavefunction(s) for diagram number 750 - FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[38], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 750 - FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16165,7 +16165,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 751 - FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16177,7 +16177,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 752 - FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16189,7 +16189,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 753 - FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16201,7 +16201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 754 - FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16214,7 +16214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 755 - FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16224,10 +16224,10 @@ namespace mg5amcCpu // *** DIAGRAM 756 OF 1240 *** // Wavefunction(s) for diagram number 756 - VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 756 - FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16242,7 +16242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 757 - FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16254,7 +16254,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 758 - FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16263,10 +16263,10 @@ namespace mg5amcCpu // *** DIAGRAM 759 OF 1240 *** // Wavefunction(s) for diagram number 759 - FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); + FFV1_2( w_fp[41], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 759 - FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16278,7 +16278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 760 - FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16290,7 +16290,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 761 - FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16302,7 +16302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 762 - FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16314,7 +16314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 763 - FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16327,7 +16327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 764 - FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16337,10 +16337,10 @@ namespace mg5amcCpu // *** DIAGRAM 765 OF 1240 *** // Wavefunction(s) for diagram number 765 - VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 765 - FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16355,7 +16355,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 766 - FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16368,7 +16368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 767 - FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16383,7 +16383,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 768 - VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16402,7 +16402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 769 - FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16417,7 +16417,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 770 - VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16436,7 +16436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 771 - FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16446,12 +16446,12 @@ namespace mg5amcCpu // *** DIAGRAM 772 OF 1240 *** // Wavefunction(s) for diagram number 772 - VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] ); - VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); - VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[85] ); + VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); + VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 772 - FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16463,7 +16463,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16475,7 +16475,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16494,7 +16494,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 773 - FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16507,7 +16507,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 774 - FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16522,7 +16522,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 775 - VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16541,7 +16541,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 776 - FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16556,7 +16556,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 777 - VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16575,7 +16575,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 778 - FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16585,12 +16585,12 @@ namespace mg5amcCpu // *** DIAGRAM 779 OF 1240 *** // Wavefunction(s) for diagram number 779 - VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] ); - VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[9] ); + VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 779 - FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16602,7 +16602,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16614,7 +16614,7 @@ namespace mg5amcCpu jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16633,7 +16633,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 780 - FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16646,7 +16646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 781 - FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16661,7 +16661,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 782 - VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16680,7 +16680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 783 - FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16695,7 +16695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 784 - VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16714,7 +16714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 785 - FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16724,12 +16724,12 @@ namespace mg5amcCpu // *** DIAGRAM 786 OF 1240 *** // Wavefunction(s) for diagram number 786 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[87] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[34] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 786 - FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16741,7 +16741,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16753,7 +16753,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16772,7 +16772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 787 - FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16780,7 +16780,7 @@ namespace mg5amcCpu jamp_sv[25] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16788,7 +16788,7 @@ namespace mg5amcCpu jamp_sv[26] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16800,12 +16800,12 @@ namespace mg5amcCpu // *** DIAGRAM 788 OF 1240 *** // Wavefunction(s) for diagram number 788 - VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] ); - VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] ); - VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] ); + VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 0., 0., w_fp[92] ); + VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 0., 0., w_fp[88] ); + VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 0., 0., w_fp[106] ); // Amplitude(s) for diagram number 788 - FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16817,7 +16817,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16829,7 +16829,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16845,10 +16845,10 @@ namespace mg5amcCpu // *** DIAGRAM 789 OF 1240 *** // Wavefunction(s) for diagram number 789 - FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[52], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 789 - FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16860,7 +16860,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 790 - FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16869,10 +16869,10 @@ namespace mg5amcCpu // *** DIAGRAM 791 OF 1240 *** // Wavefunction(s) for diagram number 791 - FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 791 - FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16884,7 +16884,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 792 - FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16896,7 +16896,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 793 - FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16908,7 +16908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 794 - FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16920,7 +16920,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 795 - FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16933,7 +16933,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 796 - FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16946,7 +16946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 797 - FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16961,7 +16961,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 798 - FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16973,7 +16973,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 799 - FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16982,10 +16982,10 @@ namespace mg5amcCpu // *** DIAGRAM 800 OF 1240 *** // Wavefunction(s) for diagram number 800 - FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[39], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 800 - FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16997,7 +16997,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 801 - FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17009,7 +17009,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 802 - FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17021,7 +17021,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 803 - FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17033,7 +17033,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 804 - FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17046,7 +17046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 805 - FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17059,7 +17059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 806 - FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17074,7 +17074,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 807 - FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17086,7 +17086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 808 - FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17095,10 +17095,10 @@ namespace mg5amcCpu // *** DIAGRAM 809 OF 1240 *** // Wavefunction(s) for diagram number 809 - FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); + FFV1_1( w_fp[47], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 809 - FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17110,7 +17110,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 810 - FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17122,7 +17122,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 811 - FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17134,7 +17134,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 812 - FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17146,7 +17146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 813 - FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17159,7 +17159,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 814 - FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17172,7 +17172,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 815 - FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17187,7 +17187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 816 - FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17200,7 +17200,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 817 - FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17215,7 +17215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 818 - VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17234,7 +17234,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 819 - FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17249,7 +17249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 820 - VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17268,7 +17268,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 821 - FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17281,7 +17281,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 822 - FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17293,7 +17293,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17305,7 +17305,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17324,7 +17324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 823 - FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17337,7 +17337,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 824 - FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17352,7 +17352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 825 - VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17371,7 +17371,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 826 - FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17386,7 +17386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 827 - VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17405,7 +17405,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 828 - FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17418,7 +17418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 829 - FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17430,7 +17430,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17442,7 +17442,7 @@ namespace mg5amcCpu jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17461,7 +17461,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 830 - FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17474,7 +17474,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 831 - FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17489,7 +17489,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 832 - VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17508,7 +17508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 833 - FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17523,7 +17523,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 834 - VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17542,7 +17542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 835 - FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17555,7 +17555,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 836 - FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17567,7 +17567,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17579,7 +17579,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17598,7 +17598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 837 - FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17606,7 +17606,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17614,7 +17614,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17629,7 +17629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 838 - FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17641,7 +17641,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17653,7 +17653,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17669,10 +17669,10 @@ namespace mg5amcCpu // *** DIAGRAM 839 OF 1240 *** // Wavefunction(s) for diagram number 839 - VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] ); + VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 0., 0., w_fp[90] ); // Amplitude(s) for diagram number 839 - VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17699,7 +17699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 840 - VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17726,7 +17726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 841 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17746,7 +17746,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17766,7 +17766,7 @@ namespace mg5amcCpu jamp_sv[115] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17790,10 +17790,10 @@ namespace mg5amcCpu // *** DIAGRAM 842 OF 1240 *** // Wavefunction(s) for diagram number 842 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[56] ); // Amplitude(s) for diagram number 842 - VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17820,7 +17820,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 843 - VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17847,7 +17847,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 844 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17867,7 +17867,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17887,7 +17887,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17914,7 +17914,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 845 - VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17941,7 +17941,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 846 - VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17965,12 +17965,12 @@ namespace mg5amcCpu // *** DIAGRAM 847 OF 1240 *** // Wavefunction(s) for diagram number 847 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[103] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 847 - VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17990,7 +17990,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18010,7 +18010,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18034,12 +18034,12 @@ namespace mg5amcCpu // *** DIAGRAM 848 OF 1240 *** // Wavefunction(s) for diagram number 848 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 848 - VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18059,7 +18059,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18079,7 +18079,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[98] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18103,12 +18103,12 @@ namespace mg5amcCpu // *** DIAGRAM 849 OF 1240 *** // Wavefunction(s) for diagram number 849 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[115] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[116] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[117] ); // Amplitude(s) for diagram number 849 - VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18128,7 +18128,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18148,7 +18148,7 @@ namespace mg5amcCpu jamp_sv[105] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18172,12 +18172,12 @@ namespace mg5amcCpu // *** DIAGRAM 850 OF 1240 *** // Wavefunction(s) for diagram number 850 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[118] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[119] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[120] ); // Amplitude(s) for diagram number 850 - VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18197,7 +18197,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18217,7 +18217,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18244,7 +18244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 851 - VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18264,7 +18264,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18284,7 +18284,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18311,7 +18311,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 852 - VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18338,7 +18338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 853 - VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18365,7 +18365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 854 - VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18392,7 +18392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 855 - VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18411,7 +18411,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 856 - FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18426,7 +18426,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 857 - FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18439,7 +18439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 858 - FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18454,7 +18454,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 859 - FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18467,7 +18467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 860 - VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18486,7 +18486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 861 - FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18498,7 +18498,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18510,7 +18510,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18529,7 +18529,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 862 - FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18544,7 +18544,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 863 - FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18557,7 +18557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 864 - FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18570,7 +18570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 865 - VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18589,7 +18589,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 866 - FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18604,7 +18604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 867 - FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18617,7 +18617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 868 - FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18632,7 +18632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 869 - FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18645,7 +18645,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 870 - VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18664,7 +18664,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 871 - FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18676,7 +18676,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18688,7 +18688,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18707,7 +18707,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 872 - FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18722,7 +18722,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 873 - FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18735,7 +18735,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 874 - FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18748,7 +18748,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 875 - VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18767,7 +18767,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 876 - FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18782,7 +18782,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 877 - FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18795,7 +18795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 878 - FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18810,7 +18810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 879 - FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18823,7 +18823,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 880 - VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18842,7 +18842,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 881 - FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18854,7 +18854,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18866,7 +18866,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18885,7 +18885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 882 - VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18904,7 +18904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 883 - FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18919,7 +18919,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 884 - FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18932,7 +18932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 885 - FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18947,7 +18947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 886 - FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18960,7 +18960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 887 - VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18979,7 +18979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 888 - FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18991,7 +18991,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19003,7 +19003,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19022,7 +19022,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 889 - FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19041,7 +19041,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 890 - FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19060,7 +19060,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 891 - FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19079,7 +19079,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 892 - FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19098,7 +19098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 893 - FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19113,7 +19113,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 894 - FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19125,10 +19125,10 @@ namespace mg5amcCpu // *** DIAGRAM 895 OF 1240 *** // Wavefunction(s) for diagram number 895 - VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] ); + VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 0., 0., w_fp[65] ); // Amplitude(s) for diagram number 895 - VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19155,7 +19155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 896 - VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19182,7 +19182,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 897 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19202,7 +19202,7 @@ namespace mg5amcCpu jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19222,7 +19222,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19249,7 +19249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 898 - VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19276,7 +19276,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 899 - VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19303,7 +19303,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 900 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19323,7 +19323,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19343,7 +19343,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[107] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19370,7 +19370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 901 - VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19397,7 +19397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 902 - VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19421,12 +19421,12 @@ namespace mg5amcCpu // *** DIAGRAM 903 OF 1240 *** // Wavefunction(s) for diagram number 903 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[93] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 903 - VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19446,7 +19446,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19466,7 +19466,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19490,12 +19490,12 @@ namespace mg5amcCpu // *** DIAGRAM 904 OF 1240 *** // Wavefunction(s) for diagram number 904 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[103] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 904 - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19515,7 +19515,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19535,7 +19535,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19559,12 +19559,12 @@ namespace mg5amcCpu // *** DIAGRAM 905 OF 1240 *** // Wavefunction(s) for diagram number 905 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 905 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19584,7 +19584,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19604,7 +19604,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19631,7 +19631,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 906 - VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19651,7 +19651,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19671,7 +19671,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19698,7 +19698,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 907 - VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19718,7 +19718,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19738,7 +19738,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19765,7 +19765,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 908 - VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19792,7 +19792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 909 - VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19819,7 +19819,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 910 - VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19846,7 +19846,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 911 - VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19865,7 +19865,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 912 - FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19880,7 +19880,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 913 - FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19893,7 +19893,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 914 - FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19908,7 +19908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 915 - FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19921,7 +19921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 916 - VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19940,7 +19940,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 917 - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19952,7 +19952,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19964,7 +19964,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19983,7 +19983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 918 - FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19998,7 +19998,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 919 - FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20011,7 +20011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 920 - FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20024,7 +20024,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 921 - VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20043,7 +20043,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 922 - FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20058,7 +20058,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 923 - FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20071,7 +20071,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 924 - FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20086,7 +20086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 925 - FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20099,7 +20099,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 926 - VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20118,7 +20118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 927 - FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20130,7 +20130,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20142,7 +20142,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20161,7 +20161,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 928 - FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20176,7 +20176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 929 - FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20189,7 +20189,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 930 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20202,7 +20202,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 931 - VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20221,7 +20221,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 932 - FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20236,7 +20236,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 933 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20249,7 +20249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 934 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20264,7 +20264,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 935 - FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20277,7 +20277,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 936 - VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20296,7 +20296,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 937 - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20308,7 +20308,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20320,7 +20320,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20339,7 +20339,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 938 - VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20358,7 +20358,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 939 - FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20373,7 +20373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 940 - FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20386,7 +20386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 941 - FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20401,7 +20401,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 942 - FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20414,7 +20414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 943 - VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20433,7 +20433,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 944 - FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20445,7 +20445,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20457,7 +20457,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20476,7 +20476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 945 - FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20495,7 +20495,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 946 - FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20514,7 +20514,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 947 - FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20533,7 +20533,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 948 - FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20552,7 +20552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 949 - FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20567,7 +20567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 950 - FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20579,10 +20579,10 @@ namespace mg5amcCpu // *** DIAGRAM 951 OF 1240 *** // Wavefunction(s) for diagram number 951 - VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 951 - VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20609,7 +20609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 952 - VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20636,7 +20636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 953 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20656,7 +20656,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20676,7 +20676,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20703,7 +20703,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 954 - VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20730,7 +20730,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 955 - VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20757,7 +20757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 956 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20777,7 +20777,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20797,7 +20797,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20824,7 +20824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 957 - VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20851,7 +20851,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 958 - VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20875,12 +20875,12 @@ namespace mg5amcCpu // *** DIAGRAM 959 OF 1240 *** // Wavefunction(s) for diagram number 959 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[94] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[65] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 959 - VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20900,7 +20900,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20920,7 +20920,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20944,12 +20944,12 @@ namespace mg5amcCpu // *** DIAGRAM 960 OF 1240 *** // Wavefunction(s) for diagram number 960 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[90] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[93] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 960 - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20969,7 +20969,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20989,7 +20989,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21016,7 +21016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 961 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21036,7 +21036,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21056,7 +21056,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21083,7 +21083,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 962 - VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21103,7 +21103,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21123,7 +21123,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21150,7 +21150,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 963 - VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21170,7 +21170,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21190,7 +21190,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21217,7 +21217,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 964 - VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21244,7 +21244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 965 - VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21271,7 +21271,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 966 - VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21298,7 +21298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 967 - VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21317,7 +21317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 968 - FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21332,7 +21332,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 969 - FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21345,7 +21345,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 970 - FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21360,7 +21360,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 971 - FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21373,7 +21373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 972 - VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21392,7 +21392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 973 - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21404,7 +21404,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21416,7 +21416,7 @@ namespace mg5amcCpu jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21435,7 +21435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 974 - FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21450,7 +21450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 975 - FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21463,7 +21463,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 976 - FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21476,7 +21476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 977 - VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21495,7 +21495,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 978 - FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21510,7 +21510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 979 - FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21523,7 +21523,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 980 - FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21538,7 +21538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 981 - FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21551,7 +21551,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 982 - VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21570,7 +21570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 983 - FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21582,7 +21582,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21594,7 +21594,7 @@ namespace mg5amcCpu jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21613,7 +21613,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 984 - FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21628,7 +21628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 985 - FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21641,7 +21641,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 986 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21654,7 +21654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 987 - VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21673,7 +21673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 988 - FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21688,7 +21688,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 989 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21701,7 +21701,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 990 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21716,7 +21716,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 991 - FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21729,7 +21729,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 992 - VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21748,7 +21748,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 993 - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21760,7 +21760,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21772,7 +21772,7 @@ namespace mg5amcCpu jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21791,7 +21791,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 994 - VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21810,7 +21810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 995 - FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21825,7 +21825,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 996 - FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21838,7 +21838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 997 - FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21853,7 +21853,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 998 - FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21866,7 +21866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 999 - VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21885,7 +21885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1000 - FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21897,7 +21897,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21909,7 +21909,7 @@ namespace mg5amcCpu jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21928,7 +21928,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1001 - FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21947,7 +21947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1002 - FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21966,7 +21966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1003 - FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21985,7 +21985,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1004 - FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22004,7 +22004,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1005 - FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22019,7 +22019,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1006 - FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22034,7 +22034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1007 - VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22061,7 +22061,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1008 - VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22088,7 +22088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1009 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22108,7 +22108,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22128,7 +22128,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22155,7 +22155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1010 - VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22182,7 +22182,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1011 - VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22209,7 +22209,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1012 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22229,7 +22229,7 @@ namespace mg5amcCpu jamp_sv[101] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22249,7 +22249,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22276,7 +22276,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1013 - VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22303,7 +22303,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1014 - VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22327,12 +22327,12 @@ namespace mg5amcCpu // *** DIAGRAM 1015 OF 1240 *** // Wavefunction(s) for diagram number 1015 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[11] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[76] ); // Amplitude(s) for diagram number 1015 - VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22352,7 +22352,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22372,7 +22372,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22396,12 +22396,12 @@ namespace mg5amcCpu // *** DIAGRAM 1016 OF 1240 *** // Wavefunction(s) for diagram number 1016 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[97] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1016 - VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22421,7 +22421,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22441,7 +22441,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22468,7 +22468,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1017 - VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22488,7 +22488,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22508,7 +22508,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22535,7 +22535,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1018 - VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22555,7 +22555,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22575,7 +22575,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22602,7 +22602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1019 - VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22629,7 +22629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1020 - VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22656,7 +22656,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1021 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22676,7 +22676,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22696,7 +22696,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22723,7 +22723,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1022 - VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22750,7 +22750,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1023 - VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22777,7 +22777,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1024 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22797,7 +22797,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22817,7 +22817,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22844,7 +22844,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1025 - VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22871,7 +22871,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1026 - VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22898,7 +22898,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1027 - VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22918,7 +22918,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22938,7 +22938,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22962,12 +22962,12 @@ namespace mg5amcCpu // *** DIAGRAM 1028 OF 1240 *** // Wavefunction(s) for diagram number 1028 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1028 - VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22987,7 +22987,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23007,7 +23007,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23034,7 +23034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1029 - VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23054,7 +23054,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23074,7 +23074,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23101,7 +23101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1030 - VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23121,7 +23121,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23141,7 +23141,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23168,7 +23168,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1031 - VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23195,7 +23195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1032 - VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23222,7 +23222,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1033 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23242,7 +23242,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23262,7 +23262,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23289,7 +23289,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1034 - VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23316,7 +23316,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1035 - VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23343,7 +23343,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1036 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23363,7 +23363,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23383,7 +23383,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23410,7 +23410,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1037 - VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23437,7 +23437,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1038 - VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23464,7 +23464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1039 - VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23484,7 +23484,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23504,7 +23504,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23528,12 +23528,12 @@ namespace mg5amcCpu // *** DIAGRAM 1040 OF 1240 *** // Wavefunction(s) for diagram number 1040 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[76] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 1040 - VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23553,7 +23553,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23573,7 +23573,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[90] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23600,7 +23600,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1041 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23620,7 +23620,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23640,7 +23640,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23667,7 +23667,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1042 - VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23687,7 +23687,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23707,7 +23707,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23734,7 +23734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1043 - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23754,7 +23754,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23774,7 +23774,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23794,7 +23794,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23814,7 +23814,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23834,7 +23834,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23854,7 +23854,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23874,7 +23874,7 @@ namespace mg5amcCpu jamp_sv[113] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23894,7 +23894,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23921,7 +23921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1044 - VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23941,7 +23941,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23961,7 +23961,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23988,7 +23988,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1045 - VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24008,7 +24008,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24028,7 +24028,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24055,7 +24055,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1046 - FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24067,7 +24067,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1047 - FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24079,7 +24079,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1048 - FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24091,7 +24091,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1049 - FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24103,7 +24103,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1050 - FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24115,7 +24115,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1051 - FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24127,7 +24127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1052 - FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24139,7 +24139,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1053 - FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24151,7 +24151,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1054 - FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24163,7 +24163,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1055 - FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24175,7 +24175,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1056 - FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24187,7 +24187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1057 - FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24199,7 +24199,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1058 - FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24214,7 +24214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1059 - FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24227,7 +24227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1060 - FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24242,7 +24242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1061 - VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24261,7 +24261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1062 - FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24274,7 +24274,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1063 - VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24293,7 +24293,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1064 - FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24305,7 +24305,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24317,7 +24317,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24336,7 +24336,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1065 - FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24348,7 +24348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1066 - FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24360,7 +24360,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1067 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24372,7 +24372,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1068 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24384,7 +24384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1069 - FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24396,7 +24396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1070 - FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24408,7 +24408,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1071 - FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24420,7 +24420,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1072 - FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24432,7 +24432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1073 - FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24444,7 +24444,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1074 - FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24456,7 +24456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1075 - FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24468,7 +24468,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1076 - FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24480,7 +24480,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1077 - FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24495,7 +24495,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1078 - FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24508,7 +24508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1079 - FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24523,7 +24523,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1080 - VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24542,7 +24542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1081 - FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24555,7 +24555,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1082 - VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24574,7 +24574,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1083 - FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24586,7 +24586,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24598,7 +24598,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24617,7 +24617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1084 - FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24629,7 +24629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1085 - FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24641,7 +24641,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1086 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24653,7 +24653,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1087 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24665,7 +24665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1088 - FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24677,7 +24677,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1089 - FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24689,7 +24689,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1090 - FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24701,7 +24701,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1091 - FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24713,7 +24713,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1092 - FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24725,7 +24725,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1093 - FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24737,7 +24737,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1094 - FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24749,7 +24749,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1095 - FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24761,7 +24761,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1096 - FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24776,7 +24776,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1097 - FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24789,7 +24789,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1098 - FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24804,7 +24804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1099 - VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24823,7 +24823,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1100 - FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24836,7 +24836,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1101 - VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24855,7 +24855,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1102 - FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24867,7 +24867,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24879,7 +24879,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24898,7 +24898,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1103 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24913,7 +24913,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1104 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24926,7 +24926,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1105 - FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24941,7 +24941,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1106 - VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24960,7 +24960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1107 - FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24973,7 +24973,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1108 - VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24992,7 +24992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1109 - FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25004,7 +25004,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25016,7 +25016,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25035,7 +25035,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1110 - FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25050,7 +25050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1111 - FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25063,7 +25063,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1112 - FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25078,7 +25078,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1113 - VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25097,7 +25097,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1114 - FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25110,7 +25110,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1115 - VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25129,7 +25129,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1116 - FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25141,7 +25141,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25153,7 +25153,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25172,7 +25172,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1117 - FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25187,7 +25187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1118 - FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25200,7 +25200,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1119 - FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25215,7 +25215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1120 - VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25234,7 +25234,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1121 - FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25247,7 +25247,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1122 - VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25266,7 +25266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1123 - FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25278,7 +25278,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25290,7 +25290,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25306,12 +25306,12 @@ namespace mg5amcCpu // *** DIAGRAM 1124 OF 1240 *** // Wavefunction(s) for diagram number 1124 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[97] ); // Amplitude(s) for diagram number 1124 - VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25331,7 +25331,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25351,7 +25351,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25371,7 +25371,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25391,7 +25391,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25411,7 +25411,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25431,7 +25431,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25451,7 +25451,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25471,7 +25471,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25495,12 +25495,12 @@ namespace mg5amcCpu // *** DIAGRAM 1125 OF 1240 *** // Wavefunction(s) for diagram number 1125 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); - VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 0., 0., w_fp[60] ); // Amplitude(s) for diagram number 1125 - VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25520,7 +25520,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25540,7 +25540,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25564,12 +25564,12 @@ namespace mg5amcCpu // *** DIAGRAM 1126 OF 1240 *** // Wavefunction(s) for diagram number 1126 - VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] ); + VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1126 - VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25589,7 +25589,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25609,7 +25609,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25636,7 +25636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1127 - VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25656,7 +25656,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25676,7 +25676,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25700,12 +25700,12 @@ namespace mg5amcCpu // *** DIAGRAM 1128 OF 1240 *** // Wavefunction(s) for diagram number 1128 - FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); - FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); // Amplitude(s) for diagram number 1128 - FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25713,7 +25713,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25721,7 +25721,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25736,7 +25736,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1129 - FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25748,7 +25748,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25760,7 +25760,7 @@ namespace mg5amcCpu jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25779,7 +25779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1130 - FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25787,7 +25787,7 @@ namespace mg5amcCpu jamp_sv[74] -= amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25795,7 +25795,7 @@ namespace mg5amcCpu jamp_sv[78] += amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25810,7 +25810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1131 - FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25818,7 +25818,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25826,7 +25826,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25841,7 +25841,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1132 - FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25853,7 +25853,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25865,7 +25865,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25884,7 +25884,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1133 - FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25892,7 +25892,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25900,7 +25900,7 @@ namespace mg5amcCpu jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25912,12 +25912,12 @@ namespace mg5amcCpu // *** DIAGRAM 1134 OF 1240 *** // Wavefunction(s) for diagram number 1134 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); - FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 1134 - FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25925,7 +25925,7 @@ namespace mg5amcCpu jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25933,7 +25933,7 @@ namespace mg5amcCpu jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[49] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25948,7 +25948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1135 - FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25960,7 +25960,7 @@ namespace mg5amcCpu jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25972,7 +25972,7 @@ namespace mg5amcCpu jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25991,7 +25991,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1136 - FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25999,7 +25999,7 @@ namespace mg5amcCpu jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26007,7 +26007,7 @@ namespace mg5amcCpu jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[48] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26022,7 +26022,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1137 - FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26034,7 +26034,7 @@ namespace mg5amcCpu jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26046,7 +26046,7 @@ namespace mg5amcCpu jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26065,7 +26065,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1138 - FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26077,7 +26077,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26089,7 +26089,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26108,7 +26108,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1139 - FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26120,7 +26120,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26132,7 +26132,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26148,12 +26148,12 @@ namespace mg5amcCpu // *** DIAGRAM 1140 OF 1240 *** // Wavefunction(s) for diagram number 1140 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[68] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[29] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1140 - VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26173,7 +26173,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26193,7 +26193,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26213,7 +26213,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26233,7 +26233,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26253,7 +26253,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26273,7 +26273,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26293,7 +26293,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26313,7 +26313,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26337,12 +26337,12 @@ namespace mg5amcCpu // *** DIAGRAM 1141 OF 1240 *** // Wavefunction(s) for diagram number 1141 - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] ); - VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1141 - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26362,7 +26362,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26382,7 +26382,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26406,12 +26406,12 @@ namespace mg5amcCpu // *** DIAGRAM 1142 OF 1240 *** // Wavefunction(s) for diagram number 1142 - VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 1142 - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26431,7 +26431,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26451,7 +26451,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26478,7 +26478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1143 - VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26498,7 +26498,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26518,7 +26518,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26542,12 +26542,12 @@ namespace mg5amcCpu // *** DIAGRAM 1144 OF 1240 *** // Wavefunction(s) for diagram number 1144 - FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] ); - FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[111] ); + FFV1_2( w_fp[3], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1144 - FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26555,7 +26555,7 @@ namespace mg5amcCpu jamp_sv[67] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26563,7 +26563,7 @@ namespace mg5amcCpu jamp_sv[68] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26578,7 +26578,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1145 - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26590,7 +26590,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26602,7 +26602,7 @@ namespace mg5amcCpu jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26621,7 +26621,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1146 - FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26629,7 +26629,7 @@ namespace mg5amcCpu jamp_sv[50] -= amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26637,7 +26637,7 @@ namespace mg5amcCpu jamp_sv[54] += amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[60] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26652,7 +26652,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1147 - FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26660,7 +26660,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26668,7 +26668,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26683,7 +26683,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1148 - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26695,7 +26695,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26707,7 +26707,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26726,7 +26726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1149 - FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26734,7 +26734,7 @@ namespace mg5amcCpu jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26742,7 +26742,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26754,12 +26754,12 @@ namespace mg5amcCpu // *** DIAGRAM 1150 OF 1240 *** // Wavefunction(s) for diagram number 1150 - FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); // Amplitude(s) for diagram number 1150 - FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26767,7 +26767,7 @@ namespace mg5amcCpu jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26775,7 +26775,7 @@ namespace mg5amcCpu jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[73] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26790,7 +26790,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1151 - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26802,7 +26802,7 @@ namespace mg5amcCpu jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26814,7 +26814,7 @@ namespace mg5amcCpu jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26833,7 +26833,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1152 - FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26841,7 +26841,7 @@ namespace mg5amcCpu jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26849,7 +26849,7 @@ namespace mg5amcCpu jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[72] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26864,7 +26864,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1153 - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26876,7 +26876,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26888,7 +26888,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26907,7 +26907,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1154 - FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26919,7 +26919,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26931,7 +26931,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26950,7 +26950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1155 - FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26962,7 +26962,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26974,7 +26974,7 @@ namespace mg5amcCpu jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26990,12 +26990,12 @@ namespace mg5amcCpu // *** DIAGRAM 1156 OF 1240 *** // Wavefunction(s) for diagram number 1156 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[27] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1156 - VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27015,7 +27015,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27035,7 +27035,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27055,7 +27055,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27075,7 +27075,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27095,7 +27095,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27115,7 +27115,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27135,7 +27135,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27155,7 +27155,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27179,12 +27179,12 @@ namespace mg5amcCpu // *** DIAGRAM 1157 OF 1240 *** // Wavefunction(s) for diagram number 1157 - VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] ); - VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1157 - VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27204,7 +27204,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27224,7 +27224,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27248,12 +27248,12 @@ namespace mg5amcCpu // *** DIAGRAM 1158 OF 1240 *** // Wavefunction(s) for diagram number 1158 - VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1158 - VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27273,7 +27273,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27293,7 +27293,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27320,7 +27320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1159 - VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27340,7 +27340,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27360,7 +27360,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27384,12 +27384,12 @@ namespace mg5amcCpu // *** DIAGRAM 1160 OF 1240 *** // Wavefunction(s) for diagram number 1160 - FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); - FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 1160 - FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27397,7 +27397,7 @@ namespace mg5amcCpu jamp_sv[61] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[65] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27405,7 +27405,7 @@ namespace mg5amcCpu jamp_sv[62] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[64] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27420,7 +27420,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1161 - FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27432,7 +27432,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27444,7 +27444,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27463,7 +27463,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1162 - FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27471,7 +27471,7 @@ namespace mg5amcCpu jamp_sv[52] -= amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27479,7 +27479,7 @@ namespace mg5amcCpu jamp_sv[55] += amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[66] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27494,7 +27494,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1163 - FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27502,7 +27502,7 @@ namespace mg5amcCpu jamp_sv[85] -= amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27510,7 +27510,7 @@ namespace mg5amcCpu jamp_sv[86] += amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27525,7 +27525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1164 - FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27537,7 +27537,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27549,7 +27549,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27568,7 +27568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1165 - FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27576,7 +27576,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27584,7 +27584,7 @@ namespace mg5amcCpu jamp_sv[79] += amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[90] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27596,12 +27596,12 @@ namespace mg5amcCpu // *** DIAGRAM 1166 OF 1240 *** // Wavefunction(s) for diagram number 1166 - FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); - FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[2], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1166 - FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27609,7 +27609,7 @@ namespace mg5amcCpu jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27617,7 +27617,7 @@ namespace mg5amcCpu jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27632,7 +27632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1167 - FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27644,7 +27644,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27656,7 +27656,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27675,7 +27675,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1168 - FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27683,7 +27683,7 @@ namespace mg5amcCpu jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27691,7 +27691,7 @@ namespace mg5amcCpu jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27706,7 +27706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1169 - FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27718,7 +27718,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27730,7 +27730,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27749,7 +27749,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1170 - FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27761,7 +27761,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27773,7 +27773,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27792,7 +27792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1171 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27804,7 +27804,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27816,7 +27816,7 @@ namespace mg5amcCpu jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27832,15 +27832,15 @@ namespace mg5amcCpu // *** DIAGRAM 1172 OF 1240 *** // Wavefunction(s) for diagram number 1172 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[60] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1172 - FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27848,7 +27848,7 @@ namespace mg5amcCpu jamp_sv[43] -= amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27856,7 +27856,7 @@ namespace mg5amcCpu jamp_sv[44] += amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27868,12 +27868,12 @@ namespace mg5amcCpu // *** DIAGRAM 1173 OF 1240 *** // Wavefunction(s) for diagram number 1173 - VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] ); - VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 1173 - FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27885,7 +27885,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27897,7 +27897,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27916,7 +27916,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1174 - FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27924,7 +27924,7 @@ namespace mg5amcCpu jamp_sv[26] -= amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27932,7 +27932,7 @@ namespace mg5amcCpu jamp_sv[30] += amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[36] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27944,12 +27944,12 @@ namespace mg5amcCpu // *** DIAGRAM 1175 OF 1240 *** // Wavefunction(s) for diagram number 1175 - FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); - FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); - FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 1175 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27957,7 +27957,7 @@ namespace mg5amcCpu jamp_sv[15] -= amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27965,7 +27965,7 @@ namespace mg5amcCpu jamp_sv[51] += amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[75] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27980,7 +27980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1176 - FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27992,7 +27992,7 @@ namespace mg5amcCpu jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28004,7 +28004,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28023,7 +28023,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1177 - FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28031,7 +28031,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28039,7 +28039,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28054,7 +28054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1178 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28066,7 +28066,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28078,7 +28078,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28097,7 +28097,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1179 - FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28109,7 +28109,7 @@ namespace mg5amcCpu jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28121,7 +28121,7 @@ namespace mg5amcCpu jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28140,7 +28140,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1180 - VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28160,7 +28160,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28180,7 +28180,7 @@ namespace mg5amcCpu jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28207,7 +28207,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1181 - VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28227,7 +28227,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28247,7 +28247,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28267,7 +28267,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28287,7 +28287,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28307,7 +28307,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28327,7 +28327,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28347,7 +28347,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28367,7 +28367,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28391,12 +28391,12 @@ namespace mg5amcCpu // *** DIAGRAM 1182 OF 1240 *** // Wavefunction(s) for diagram number 1182 - VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1182 - VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28416,7 +28416,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28436,7 +28436,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28463,7 +28463,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1183 - VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28483,7 +28483,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28503,7 +28503,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28530,7 +28530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1184 - FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28542,7 +28542,7 @@ namespace mg5amcCpu jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28554,7 +28554,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28573,7 +28573,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1185 - FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28581,7 +28581,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28589,7 +28589,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28604,7 +28604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1186 - FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28616,7 +28616,7 @@ namespace mg5amcCpu jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28628,7 +28628,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28647,7 +28647,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1187 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28655,7 +28655,7 @@ namespace mg5amcCpu jamp_sv[14] -= amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28663,7 +28663,7 @@ namespace mg5amcCpu jamp_sv[50] += amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28675,15 +28675,15 @@ namespace mg5amcCpu // *** DIAGRAM 1188 OF 1240 *** // Wavefunction(s) for diagram number 1188 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] ); - FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); - FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[59] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 1188 - FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28691,7 +28691,7 @@ namespace mg5amcCpu jamp_sv[37] -= amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[41] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28699,7 +28699,7 @@ namespace mg5amcCpu jamp_sv[38] += amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[40] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28711,12 +28711,12 @@ namespace mg5amcCpu // *** DIAGRAM 1189 OF 1240 *** // Wavefunction(s) for diagram number 1189 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1189 - FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28728,7 +28728,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28740,7 +28740,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28759,7 +28759,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1190 - FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28767,7 +28767,7 @@ namespace mg5amcCpu jamp_sv[28] -= amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28775,7 +28775,7 @@ namespace mg5amcCpu jamp_sv[31] += amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[42] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28787,12 +28787,12 @@ namespace mg5amcCpu // *** DIAGRAM 1191 OF 1240 *** // Wavefunction(s) for diagram number 1191 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 1191 - FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28800,7 +28800,7 @@ namespace mg5amcCpu jamp_sv[21] -= amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28808,7 +28808,7 @@ namespace mg5amcCpu jamp_sv[53] += amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28823,7 +28823,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1192 - FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28835,7 +28835,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28847,7 +28847,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28866,7 +28866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1193 - FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28874,7 +28874,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28882,7 +28882,7 @@ namespace mg5amcCpu jamp_sv[85] += amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28897,7 +28897,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1194 - FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28909,7 +28909,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28921,7 +28921,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28940,7 +28940,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1195 - FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28952,7 +28952,7 @@ namespace mg5amcCpu jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28964,7 +28964,7 @@ namespace mg5amcCpu jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28983,7 +28983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1196 - VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29003,7 +29003,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29023,7 +29023,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29050,7 +29050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1197 - VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29070,7 +29070,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29090,7 +29090,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29110,7 +29110,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29130,7 +29130,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29150,7 +29150,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29170,7 +29170,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29190,7 +29190,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29210,7 +29210,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29234,12 +29234,12 @@ namespace mg5amcCpu // *** DIAGRAM 1198 OF 1240 *** // Wavefunction(s) for diagram number 1198 - VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1198 - VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29259,7 +29259,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29279,7 +29279,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29306,7 +29306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1199 - VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29326,7 +29326,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29346,7 +29346,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29373,7 +29373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1200 - FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29385,7 +29385,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29397,7 +29397,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29416,7 +29416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1201 - FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29424,7 +29424,7 @@ namespace mg5amcCpu jamp_sv[79] -= amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29432,7 +29432,7 @@ namespace mg5amcCpu jamp_sv[80] += amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29447,7 +29447,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1202 - FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29459,7 +29459,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29471,7 +29471,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29490,7 +29490,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1203 - FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29498,7 +29498,7 @@ namespace mg5amcCpu jamp_sv[20] -= amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29506,7 +29506,7 @@ namespace mg5amcCpu jamp_sv[52] += amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29518,15 +29518,15 @@ namespace mg5amcCpu // *** DIAGRAM 1204 OF 1240 *** // Wavefunction(s) for diagram number 1204 - VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); - VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] ); - VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] ); - FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); - FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); + VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); + VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[68] ); + VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[29] ); + FFV1_2( w_fp[3], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 1204 - FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29534,7 +29534,7 @@ namespace mg5amcCpu jamp_sv[31] -= amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[35] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29542,7 +29542,7 @@ namespace mg5amcCpu jamp_sv[32] += amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[34] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29554,12 +29554,12 @@ namespace mg5amcCpu // *** DIAGRAM 1205 OF 1240 *** // Wavefunction(s) for diagram number 1205 - VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1205 - FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29571,7 +29571,7 @@ namespace mg5amcCpu jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29583,7 +29583,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29602,7 +29602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1206 - FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29610,7 +29610,7 @@ namespace mg5amcCpu jamp_sv[29] -= amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29618,7 +29618,7 @@ namespace mg5amcCpu jamp_sv[37] += amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[43] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29630,12 +29630,12 @@ namespace mg5amcCpu // *** DIAGRAM 1207 OF 1240 *** // Wavefunction(s) for diagram number 1207 - FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); - FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1207 - FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29643,7 +29643,7 @@ namespace mg5amcCpu jamp_sv[23] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29651,7 +29651,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29666,7 +29666,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1208 - FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29678,7 +29678,7 @@ namespace mg5amcCpu jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29690,7 +29690,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29709,7 +29709,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1209 - FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29717,7 +29717,7 @@ namespace mg5amcCpu jamp_sv[53] -= amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29725,7 +29725,7 @@ namespace mg5amcCpu jamp_sv[61] += amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[67] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29740,7 +29740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1210 - FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29752,7 +29752,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29764,7 +29764,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29783,7 +29783,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1211 - FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29795,7 +29795,7 @@ namespace mg5amcCpu jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29807,7 +29807,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29826,7 +29826,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1212 - VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29846,7 +29846,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29866,7 +29866,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29893,7 +29893,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1213 - VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29913,7 +29913,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29933,7 +29933,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29953,7 +29953,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29973,7 +29973,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29993,7 +29993,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30013,7 +30013,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30033,7 +30033,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30053,7 +30053,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30077,12 +30077,12 @@ namespace mg5amcCpu // *** DIAGRAM 1214 OF 1240 *** // Wavefunction(s) for diagram number 1214 - VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] ); - VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 0., 0., w_fp[61] ); + VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1214 - VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30102,7 +30102,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30122,7 +30122,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30149,7 +30149,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1215 - VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30169,7 +30169,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30189,7 +30189,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30216,7 +30216,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1216 - FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30228,7 +30228,7 @@ namespace mg5amcCpu jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30240,7 +30240,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30259,7 +30259,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1217 - FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30267,7 +30267,7 @@ namespace mg5amcCpu jamp_sv[55] -= amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30275,7 +30275,7 @@ namespace mg5amcCpu jamp_sv[56] += amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30290,7 +30290,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1218 - FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30302,7 +30302,7 @@ namespace mg5amcCpu jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30314,7 +30314,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30333,7 +30333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1219 - FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30341,7 +30341,7 @@ namespace mg5amcCpu jamp_sv[22] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30349,7 +30349,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30364,7 +30364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1220 - VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30384,7 +30384,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30404,7 +30404,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30424,7 +30424,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30444,7 +30444,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30464,7 +30464,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30484,7 +30484,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30504,7 +30504,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30524,7 +30524,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30548,12 +30548,12 @@ namespace mg5amcCpu // *** DIAGRAM 1221 OF 1240 *** // Wavefunction(s) for diagram number 1221 - VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] ); - VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 0., 0., w_fp[1] ); + VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1221 - VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30573,7 +30573,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30593,7 +30593,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30620,7 +30620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1222 - VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30640,7 +30640,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30660,7 +30660,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30687,7 +30687,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1223 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30699,7 +30699,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30711,7 +30711,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30730,7 +30730,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1224 - FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30738,7 +30738,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30746,7 +30746,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30761,7 +30761,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1225 - FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30773,7 +30773,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30785,7 +30785,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30804,7 +30804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1226 - FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30812,7 +30812,7 @@ namespace mg5amcCpu jamp_sv[38] -= amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30820,7 +30820,7 @@ namespace mg5amcCpu jamp_sv[56] += amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30835,7 +30835,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1227 - VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30855,7 +30855,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30875,7 +30875,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30895,7 +30895,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30915,7 +30915,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30935,7 +30935,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30955,7 +30955,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30975,7 +30975,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30995,7 +30995,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31019,12 +31019,12 @@ namespace mg5amcCpu // *** DIAGRAM 1228 OF 1240 *** // Wavefunction(s) for diagram number 1228 - VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] ); - VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] ); + VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 0., 0., w_fp[80] ); + VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 0., 0., w_fp[79] ); // Amplitude(s) for diagram number 1228 - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31044,7 +31044,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31064,7 +31064,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31091,7 +31091,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1229 - VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31111,7 +31111,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31131,7 +31131,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31158,7 +31158,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1230 - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31170,7 +31170,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31182,7 +31182,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31201,7 +31201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1231 - FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31209,7 +31209,7 @@ namespace mg5amcCpu jamp_sv[73] -= amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31217,7 +31217,7 @@ namespace mg5amcCpu jamp_sv[74] += amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31232,7 +31232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1232 - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31244,7 +31244,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31256,7 +31256,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31275,7 +31275,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1233 - FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31283,7 +31283,7 @@ namespace mg5amcCpu jamp_sv[44] -= amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31291,7 +31291,7 @@ namespace mg5amcCpu jamp_sv[58] += amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31306,7 +31306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1234 - VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31326,7 +31326,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31346,7 +31346,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31366,7 +31366,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31386,7 +31386,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31406,7 +31406,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31426,7 +31426,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31446,7 +31446,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31466,7 +31466,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[107] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31490,12 +31490,12 @@ namespace mg5amcCpu // *** DIAGRAM 1235 OF 1240 *** // Wavefunction(s) for diagram number 1235 - VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] ); - VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] ); - VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] ); + VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 0., 0., w_fp[82] ); + VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 0., 0., w_fp[81] ); // Amplitude(s) for diagram number 1235 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31515,7 +31515,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31535,7 +31535,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31562,7 +31562,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1236 - VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31582,7 +31582,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31602,7 +31602,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31629,7 +31629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1237 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31641,7 +31641,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31653,7 +31653,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31672,7 +31672,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1238 - FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31680,7 +31680,7 @@ namespace mg5amcCpu jamp_sv[49] -= amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31688,7 +31688,7 @@ namespace mg5amcCpu jamp_sv[50] += amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31703,7 +31703,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1239 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31715,7 +31715,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31727,7 +31727,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31746,7 +31746,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1240 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31754,7 +31754,7 @@ namespace mg5amcCpu jamp_sv[46] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31762,7 +31762,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -32518,12 +32518,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttggg.sa/mg5.in b/epochX/cudacpp/gg_ttggg.sa/mg5.in index 2a135334ff..644e3be9b4 100644 --- a/epochX/cudacpp/gg_ttggg.sa/mg5.in +++ b/epochX/cudacpp/gg_ttggg.sa/mg5.in @@ -1,4 +1,3 @@ -set stdout_level DEBUG -set zerowidth_tchannel F generate g g > t t~ g g g -output standalone_cudacpp gg_ttggg.sa +output standalone_cudacpp gg_ttggg.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp + diff --git a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h index 9b946c21e1..9cea8bcbe7 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -888,7 +886,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -899,7 +896,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -912,7 +908,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -925,7 +920,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -940,7 +934,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -952,7 +945,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -967,7 +959,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -979,7 +970,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -994,7 +984,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -1006,7 +995,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1020,7 +1008,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1055,7 +1042,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1094,7 +1080,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1118,7 +1103,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1150,7 +1134,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1182,7 +1165,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1215,7 +1197,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1244,7 +1225,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1280,7 +1260,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1309,7 +1288,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1345,7 +1323,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1374,7 +1351,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 63bb0f3c9e..c3d29c5b80 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005384206771850586  +DEBUG: model prefixing takes 0.004678249359130859  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,21 +169,21 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.070 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  INFO: initialize a new directory: CODEGEN_mad_gq_ttq INFO: remove old information in CODEGEN_mad_gq_ttq -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -196,71 +196,118 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.216 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.028 s +Wrote files for 32 helas calls in 0.210 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.143 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +ALOHA: aloha creates 2 routines in 0.125 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.129 s +ALOHA: aloha creates 4 routines in 0.113 s FFV1 FFV1 FFV1 FFV1 VVV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  ************************************************************ * * * W E L C O M E to * @@ -281,15 +328,14 @@ DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -313,28 +359,27 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 513 (offset 44 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -342,9 +387,11 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 247 (offset 26 lines). Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 441 (offset 45 lines). +Hunk #7 succeeded at 531 (offset 61 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 517 (offset 48 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -352,12 +399,14 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 247 (offset 26 lines). Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. +Hunk #6 succeeded at 441 (offset 45 lines). +Hunk #7 succeeded at 531 (offset 61 lines). +Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README +/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README Run "open index.html" to see more information about this process. quit -real 0m2.606s -user 0m2.256s -sys 0m0.312s +real 0m2.498s +user 0m2.119s +sys 0m0.353s diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt index cdeedc7863..5ca005676e 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc index 4457933199..0c895f2b2c 100644 --- a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o FFV1_0.o VVV1_0.o FFV1_2.o FFV1P0_3.o +ALOHARoutine = FFV1_1.o FFV1_2.o VVV1_0.o FFV1_0.o FFV1P0_3.o diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index c526dd6b31..47666e308a 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -243,19 +243,26 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); +#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) + imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz +#else + if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) + imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + else + ixxxxx( momenta, 0, cHel[ihel][1], +1, w_fp[1], 1 ); +#endif oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); + oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +273,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +288,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +302,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +319,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -851,12 +858,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 81ab70f6d1..ca1b7c1dc5 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,27 +129,14 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f index e6d01dad0b..a0750b5419 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -441,6 +441,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -530,6 +531,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index 8d92e4e769..04a5cc423c 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -243,19 +243,19 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); + ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); - FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -851,12 +851,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index b58c5d70bd..33e638e237 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,27 +129,18 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f index 7a2e329e64..dfb8f9c040 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -441,6 +441,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -530,6 +531,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile index 74b19033a8..74db44d848 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gq_ttq.mad/bin/generate_events b/epochX/cudacpp/gq_ttq.mad/bin/generate_events index 5577cc66a0..107313b25d 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/generate_events +++ b/epochX/cudacpp/gq_ttq.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME -import misc as misc + import logging import logging.config @@ -160,31 +160,17 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv - - # check for plugin customization of the launch command - launch_interface = ME.MadEventCmdShell - if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - + argument = sys.argv try: if '-h' in argument or '--help' in argument: - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py index e9f421ae5f..7624b9f557 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py @@ -1002,14 +1002,13 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() - self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - + self.plugin_input(finput) def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model.pkl index 27a1caae3c115073669b90622e9351ab04166d39..dc38da0bfa76ea4206a3c5b2d34b98c606f7d044 100644 GIT binary patch delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( diff --git a/epochX/cudacpp/gq_ttq.mad/bin/madevent b/epochX/cudacpp/gq_ttq.mad/bin/madevent index 10b6a71fa2..c944aa1faf 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/madevent +++ b/epochX/cudacpp/gq_ttq.mad/bin/madevent @@ -32,7 +32,6 @@ except ImportError: import os -pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -161,23 +160,10 @@ except: pass import internal.madevent_interface as cmd_interface -# check for plugin customization of the launch command -launch_interface = cmd_interface.MadEventCmdShell -if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) + launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -192,7 +178,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -202,7 +188,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/gq_ttq.mad/mg5.in b/epochX/cudacpp/gq_ttq.mad/mg5.in index e93843b8cd..904e173bf4 100644 --- a/epochX/cudacpp/gq_ttq.mad/mg5.in +++ b/epochX/cudacpp/gq_ttq.mad/mg5.in @@ -1,5 +1,3 @@ -set stdout_level DEBUG -set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ generate g q > t t~ q output madevent gq_ttq.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h index 0dd5f20f71..901400d447 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -887,7 +885,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -900,7 +897,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -914,7 +910,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //========================================================================== @@ -926,7 +921,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -950,7 +944,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -982,7 +975,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1014,7 +1006,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1046,7 +1037,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); diff --git a/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt b/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt index d596b33ae7..dd90c94acf 100644 --- a/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt +++ b/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt @@ -4,7 +4,7 @@ Event 0 Batch 0 2 2.647483690509011e+02 7.527657265342380e+01 -2.528976247704283e+02 -2.163164141117315e+01 3 6.252973211776936e+02 -5.721080498766041e+02 -1.578766990348905e+01 2.518727230515587e+02 4 6.099543097714056e+02 4.968314772231802e+02 2.686852946739174e+02 -2.302410816403857e+02 - ME 6.254927412618323e-05 + ME 3.498510462248670e-04 Event 1 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -12,7 +12,7 @@ Event 1 Batch 0 2 2.542827954151951e+02 1.482213322085297e+02 -1.988618298139058e+02 -5.607271498295615e+01 3 6.883656117507998e+02 1.265478873489434e+02 5.602777828023585e+02 3.793700749224233e+02 4 5.573515928340058e+02 -2.747692195574731e+02 -3.614159529884527e+02 -3.232973599394667e+02 - ME 8.120933129385430e-05 + ME 7.257243108248426e-04 Event 2 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -20,7 +20,7 @@ Event 2 Batch 0 2 4.301460683791099e+02 -3.656995432079240e+02 -2.257802895903974e+02 -1.768459985405173e+01 3 5.058528987551350e+02 2.755467101243707e+02 -2.034821274188550e+02 3.722313656043856e+02 4 5.640010328657550e+02 9.015283308355326e+01 4.292624170092524e+02 -3.545467657503340e+02 - ME 1.104115154253218e-04 + ME 8.130044127338102e-04 Event 3 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -28,7 +28,7 @@ Event 3 Batch 0 2 6.758793342627306e+02 1.455349847705337e+02 4.360940220328824e+02 -4.954335945799966e+02 3 3.008019460079605e+02 -1.607139834787174e+02 2.732727402256846e+01 2.527964523704278e+02 4 5.233187197293092e+02 1.517899870818368e+01 -4.634212960554508e+02 2.426371422095687e+02 - ME 4.288074098478053e-05 + ME 7.753277710143621e-05 Event 4 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -36,7 +36,7 @@ Event 4 Batch 0 2 3.540811678028369e+02 5.414642718170588e+01 -3.497885023717100e+02 -9.467915537920108e+00 3 7.415000547748695e+02 1.453779348794601e+00 7.277337852109665e+02 1.422102514562805e+02 4 4.044187774222938e+02 -5.560020653050046e+01 -3.779452828392566e+02 -1.327423359183605e+02 - ME 1.304731284254719e-05 + ME 2.015528729476554e-04 Event 5 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -44,7 +44,7 @@ Event 5 Batch 0 2 4.747467875786874e+02 2.462969907607520e+02 3.713870243947702e+02 1.636886763636381e+02 3 3.438196236093862e+02 -2.056491112573935e+02 2.636029701703988e+02 8.021128807897365e+01 4 6.814335888119255e+02 -4.064787950335840e+01 -6.349899945651691e+02 -2.438999644426124e+02 - ME 1.932390649640220e-04 + ME 6.140777519977192e-04 Event 6 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -52,7 +52,7 @@ Event 6 Batch 0 2 5.623951200922340e+02 4.644673798421034e+02 3.089047820108764e+02 -7.166700647426805e+01 3 2.268243199894467e+02 1.761899852590787e+02 -7.114332369064562e+01 -1.238748914321566e+02 4 7.107805599183188e+02 -6.406573651011822e+02 -2.377614583202307e+02 1.955418979064247e+02 - ME 1.929702539767979e-04 + ME 8.375373201653861e-04 Event 7 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -60,7 +60,7 @@ Event 7 Batch 0 2 4.922243378496302e+02 2.878585072835456e+02 -1.441537488072182e+02 -3.723465794939189e+02 3 2.873990637609374e+02 -5.400981623596619e+01 -8.913204919452846e+01 -2.678369642286231e+02 4 7.203765983894325e+02 -2.338486910475794e+02 2.332857980017467e+02 6.401835437225419e+02 - ME 6.280412585349807e-04 + ME 2.045598717079573e-03 Event 8 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -68,7 +68,7 @@ Event 8 Batch 0 2 3.353309706037128e+02 -7.529439061162444e+01 -4.917829145606096e+01 -3.230466069128648e+02 3 7.169322705461503e+02 -1.597426278178964e+02 -1.460012137440150e+01 6.987567601563110e+02 4 4.477367588501368e+02 2.350370184295208e+02 6.377841283046249e+01 -3.757101532434461e+02 - ME 1.424871539111113e-03 + ME 5.176104304710922e-03 Event 9 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -76,7 +76,7 @@ Event 9 Batch 0 2 2.557626120875720e+02 2.000882245504951e+02 -5.276260741790070e+01 -1.503174088272977e+02 3 7.044202058180884e+02 -6.969679478438196e+02 -1.019614549623775e+02 6.882422911146106e+00 4 5.398171820943397e+02 4.968797232933244e+02 1.547240623802783e+02 1.434349859161515e+02 - ME 1.126010180174107e-05 + ME 6.498215193902510e-05 Event 10 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -84,7 +84,7 @@ Event 10 Batch 0 2 3.466796552973448e+02 1.172124288883391e+02 -1.804077050554743e+02 2.718475489457261e+02 3 5.174471655316495e+02 -1.610456139025784e+02 -4.497410659869822e+02 -1.988689340353916e+02 4 6.358731791710053e+02 4.383318501423926e+01 6.301487710424565e+02 -7.297861491033444e+01 - ME 8.292383053707579e-05 + ME 2.111165581639245e-04 Event 11 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -92,7 +92,7 @@ Event 11 Batch 0 2 5.730783827248506e+02 -3.059484875398849e+01 3.466457017175528e+02 -4.553235612803233e+02 3 4.410994673708892e+02 -3.026218886155176e+02 -1.990641070399019e+01 3.203005892260318e+02 4 4.858221499042607e+02 3.332167373695061e+02 -3.267392910135624e+02 1.350229720542913e+02 - ME 2.195851954305949e-05 + ME 5.129802099928076e-05 Event 12 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -100,7 +100,7 @@ Event 12 Batch 0 2 2.275003875859171e+02 -1.247450244086003e+02 1.654605359856639e+02 9.390376067217456e+01 3 6.138170466352969e+02 3.363961838598331e+02 -2.139358085817026e+01 5.129827374509639e+02 4 6.586825657787861e+02 -2.116511594512328e+02 -1.440669551274935e+02 -6.068864981231385e+02 - ME 3.843244876666358e-03 + ME 5.249882090061186e-02 Event 13 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -108,7 +108,7 @@ Event 13 Batch 0 2 2.867684047377951e+02 7.055192702127012e+01 -2.028354730671929e+02 1.900429278217245e+02 3 6.990707050557395e+02 -5.605742285334717e+02 2.413419117565430e+02 -3.408965629057132e+02 4 5.141608902064654e+02 4.900223015122016e+02 -3.850643868935023e+01 1.508536350839886e+02 - ME 1.780264803426774e-05 + ME 6.422048006176975e-05 Event 14 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -116,7 +116,7 @@ Event 14 Batch 0 2 3.551549262960330e+02 1.090410064132905e+02 3.205839746298526e+02 1.071027348074892e+02 3 5.276349775014137e+02 3.895763694332612e+02 -2.529209653865598e+02 2.503196099590423e+02 4 6.172100962025531e+02 -4.986173758465519e+02 -6.766300924329285e+01 -3.574223447665315e+02 - ME 1.172793340377339e-04 + ME 7.422587439250419e-04 Event 15 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -124,7 +124,7 @@ Event 15 Batch 0 2 5.846731991828425e+02 7.106081559720657e+01 3.900476102503054e+02 4.297161529048979e+02 3 2.829885923647302e+02 -2.767806781033229e+02 5.223342094943639e+01 -2.732525156618249e+01 4 6.323382084524278e+02 2.057198625061163e+02 -4.422810311997417e+02 -4.023909013387152e+02 - ME 2.768931482482754e-04 + ME 1.255922738422332e-03 Event 16 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -132,7 +132,7 @@ Event 16 Batch 0 2 7.471577506095512e+02 1.666056475215676e+02 -5.784682380714994e+02 -4.425627187781379e+02 3 6.589296733908160e+02 -1.235441202519038e+02 5.251239647671507e+02 3.783780998595698e+02 4 9.391257599963087e+01 -4.306152726966400e+01 5.334427330434855e+01 6.418461891856485e+01 - ME 3.619360847906487e-05 + ME 5.526726502577864e-05 Event 17 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -140,7 +140,7 @@ Event 17 Batch 0 2 3.567490993131759e+02 3.856364495163717e+01 -1.708845728849435e+02 -3.107752047682324e+02 3 6.453207560475681e+02 4.468356462873772e+02 2.282834847349605e+02 4.057874246326636e+02 4 4.979301446392561e+02 -4.853992912390142e+02 -5.739891185001719e+01 -9.501221986443127e+01 - ME 3.400819398697452e-05 + ME 1.327369996555111e-04 Event 18 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -148,7 +148,7 @@ Event 18 Batch 0 2 4.856701782481425e+02 2.509110753153842e+02 -3.498523763974107e+02 -2.247720379690150e+02 3 3.014847498930008e+02 -1.059425909901355e+02 -2.435847754696140e+02 -1.426032222348426e+02 4 7.128450718588564e+02 -1.449684843252488e+02 5.934371518670247e+02 3.673752602038576e+02 - ME 1.704840743724005e-04 + ME 1.018512933050835e-03 Event 19 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -156,7 +156,7 @@ Event 19 Batch 0 2 5.848213503304410e+02 -3.141116763848333e+02 -1.950442390378232e+02 4.531088295091878e+02 3 5.769300027107226e+02 5.020221748138873e+02 2.252239828724832e+02 -1.734823378963534e+02 4 3.382486469588368e+02 -1.879104984290540e+02 -3.017974383465995e+01 -2.796264916128346e+02 - ME 1.566312636528492e-04 + ME 4.267017342507976e-03 Event 20 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -164,7 +164,7 @@ Event 20 Batch 0 2 5.550938429889906e+02 -4.478597170519693e+02 -1.958065402362923e+02 -2.630791652090858e+02 3 5.585686897587655e+02 3.351111310173187e+02 -1.360174455686903e+02 4.256744830831253e+02 4 3.863374672522434e+02 1.127485860346507e+02 3.318239858049826e+02 -1.625953178740396e+02 - ME 4.443882992804106e-05 + ME 2.768271682113988e-04 Event 21 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -172,7 +172,7 @@ Event 21 Batch 0 2 6.296556563991993e+02 -3.477135312394776e+02 -1.376147989324512e+02 -5.065804111325866e+02 3 3.137568007204202e+02 1.080474571851863e+02 -2.382188236683311e+02 1.732653140250679e+02 4 5.565875428803801e+02 2.396660740542913e+02 3.758336226007823e+02 3.333150971075189e+02 - ME 2.195742323347977e-05 + ME 5.519034669639832e-05 Event 22 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -180,7 +180,7 @@ Event 22 Batch 0 2 5.583338925767162e+02 2.471586228668332e+02 -1.597599499756147e+02 -4.744745610949311e+02 3 5.378723432497920e+02 9.149532098241385e+00 4.314513680009925e+02 3.210493120152684e+02 4 4.037937641734921e+02 -2.563081549650745e+02 -2.716914180253778e+02 1.534252490796627e+02 - ME 1.393143104564022e-05 + ME 3.705224437539572e-05 Event 23 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -188,7 +188,7 @@ Event 23 Batch 0 2 6.057340011976822e+02 6.848115528115159e+01 -5.207204912425279e+02 -3.017849923015605e+02 3 6.884459352783615e+02 -2.949639632364767e+01 6.680977958792448e+02 1.635026102131439e+02 4 2.058200635239559e+02 -3.898475895750391e+01 -1.473773046367171e+02 1.382823820884168e+02 - ME 1.074117284514867e-05 + ME 2.946248744974782e-05 Event 24 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -196,7 +196,7 @@ Event 24 Batch 0 2 4.702316790647315e+02 -1.210575128627593e+02 4.313728504035306e+02 -1.427598490831810e+02 3 7.180482366151732e+02 1.040047389253588e+02 -7.104588047260974e+02 4.956931953573291e+00 4 3.117200843200960e+02 1.705277393740069e+01 2.790859543225674e+02 1.378029171296075e+02 - ME 5.213387311993420e-06 + ME 3.146557994448562e-05 Event 25 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -204,7 +204,7 @@ Event 25 Batch 0 2 6.261365010744016e+02 -5.354018140499276e+02 -2.095559720530078e+02 2.479477970595020e+02 3 5.483958991041942e+02 5.199465180092641e+02 -9.843995208133505e+01 -1.438862620216537e+02 4 3.254675998214045e+02 1.545529604066345e+01 3.079959241343431e+02 -1.040615350378483e+02 - ME 1.695323153210731e-05 + ME 1.657640191611339e-04 Event 26 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -212,7 +212,7 @@ Event 26 Batch 0 2 4.635816356180677e+02 1.904702824079147e+02 -2.351549941335565e+02 -3.511853259118595e+02 3 3.686385821486527e+02 -2.712527815845713e+02 -6.015354190959191e+01 -2.422764621809819e+02 4 6.677797822332798e+02 8.078249917665664e+01 2.953085360431485e+02 5.934617880928415e+02 - ME 1.052251904460155e-04 + ME 3.250975879010065e-04 Event 27 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -220,7 +220,7 @@ Event 27 Batch 0 2 2.851713673150520e+02 1.387976072955998e+02 1.520424011317634e+02 -1.973348453858079e+02 3 6.747356481771329e+02 2.426633222154767e+02 -4.300238522839811e+02 4.598501858640580e+02 4 5.400929845078149e+02 -3.814609295110765e+02 2.779814511522176e+02 -2.625153404782502e+02 - ME 7.957109124083736e-05 + ME 4.155279516527712e-04 Event 28 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -228,7 +228,7 @@ Event 28 Batch 0 2 1.977804200471008e+02 -1.803202618401224e+02 -8.082809162516925e+01 -8.277519444290659e+00 3 7.197523834069627e+02 3.152541965091956e+02 6.467033971658861e+02 -2.080867841663842e+01 4 5.824671965459364e+02 -1.349339346690732e+02 -5.658753055407169e+02 2.908619786092899e+01 - ME 1.748013159755222e-05 + ME 1.172809031809504e-04 Event 29 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -236,7 +236,7 @@ Event 29 Batch 0 2 6.123364628491765e+02 -3.746492624245139e+02 3.785128791537567e+02 -3.021950929683376e+02 3 4.056577755659300e+02 1.796205570313495e+00 -8.781658530568643e+01 3.960344074293251e+02 4 4.820057615848937e+02 3.728530568542006e+02 -2.906962938480702e+02 -9.383931446098750e+01 - ME 3.085570985177973e-04 + ME 5.496242925842306e-04 Event 30 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -244,7 +244,7 @@ Event 30 Batch 0 2 7.349194950356053e+02 7.241679607953656e+02 1.425637322816703e+01 1.244354634469208e+02 3 7.321421454671275e+02 -7.253765693071590e+02 -2.895970851972107e+01 -9.498573130653318e+01 4 3.293835949726734e+01 1.208608511793152e+00 1.470333529155409e+01 -2.944973214038765e+01 - ME 3.267107835672361e-04 + ME 5.147061682527938e-02 Event 31 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -252,7 +252,7 @@ Event 31 Batch 0 2 1.718338270585457e+02 -1.344914872264095e+02 -1.021614404532311e+02 3.165350011824393e+01 3 6.313115253715935e+02 -2.849940593920691e+02 -7.916450257599642e+01 -5.577325610990745e+02 4 6.968546475698608e+02 4.194855466184786e+02 1.813259430292275e+02 5.260790609808306e+02 - ME 1.685680846028125e-04 + ME 4.645345268703414e-04 Event 32 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -260,7 +260,7 @@ Event 32 Batch 0 2 7.235176898898732e+02 -4.762113006241282e+02 -2.880822916693121e+01 5.439400065022983e+02 3 6.603902828461299e+02 4.672103814637360e+02 1.031050210016798e+02 -4.551913221650266e+02 4 1.160920272639969e+02 9.000919160392018e+00 -7.429679183474862e+01 -8.874868433727177e+01 - ME 2.173072900368875e-04 + ME 4.476006843186700e-03 Event 33 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -268,7 +268,7 @@ Event 33 Batch 0 2 4.786737271642286e+02 2.009638309376703e+02 4.090184839380260e+02 1.464443769121513e+02 3 3.795793219608408e+02 -6.057523839522271e+00 -8.244277697544294e+01 3.704685635647950e+02 4 6.417469508749314e+02 -1.949063070981495e+02 -3.265757069625828e+02 -5.169129404769461e+02 - ME 3.322437827682699e-03 + ME 1.351709676586880e-02 Event 34 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -276,7 +276,7 @@ Event 34 Batch 0 2 6.621583515140109e+02 -5.051303032557109e+02 -1.429543729176959e+02 4.035605363216953e+02 3 3.008522892707525e+02 8.677543723835062e+01 2.726747894692539e+02 -9.290092916351111e+01 4 5.369893592152367e+02 4.183548660173603e+02 -1.297204165515579e+02 -3.106596071581844e+02 - ME 9.294666462955388e-05 + ME 6.460854093057828e-04 Event 35 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -284,7 +284,7 @@ Event 35 Batch 0 2 6.158114977149372e+02 2.502256147979830e+02 4.233348779616202e+00 5.626659943296695e+02 3 1.476397433483021e+02 -1.670550278282843e+01 -6.055370982200890e+01 1.336101351676488e+02 4 7.365487589367605e+02 -2.335201120151546e+02 5.632036104239269e+01 -6.962761294973184e+02 - ME 5.450893768264864e-01 + ME 2.101231899117793e+00 Event 36 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -292,7 +292,7 @@ Event 36 Batch 0 2 7.182456511154913e+02 -7.463771462544163e+01 -6.667773110518942e+02 2.563475070450518e+02 3 4.860008755751825e+02 -7.840660561780868e+01 4.141081959217036e+02 -2.419992919944378e+02 4 2.957534733093268e+02 1.530443202432501e+02 2.526691151301903e+02 -1.434821505061448e+01 - ME 1.793136635525090e-05 + ME 9.644531209480271e-05 Event 37 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -300,7 +300,7 @@ Event 37 Batch 0 2 5.672182018814327e+02 -2.031706828392718e+00 -5.267408190306547e+02 2.104197478372323e+02 3 4.664069288608281e+02 3.712365792892206e+02 2.604523782658950e+02 -1.090109358856581e+02 4 4.663748692577387e+02 -3.692048724608279e+02 2.662884407647597e+02 -1.014088119515743e+02 - ME 1.885829354904198e-05 + ME 1.216876552012178e-04 Event 38 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -308,7 +308,7 @@ Event 38 Batch 0 2 5.068057345787187e+02 4.883513201966852e+02 -7.570036138649985e+01 -1.124032737511800e+02 3 3.871140338254017e+02 -1.153787089711745e+02 -3.599073977747533e+02 -8.373585688177315e+01 4 6.060802315958797e+02 -3.729726112255107e+02 4.356077591612532e+02 1.961391306329531e+02 - ME 2.004468492837133e-05 + ME 1.006736553113524e-04 Event 39 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -316,7 +316,7 @@ Event 39 Batch 0 2 4.960337392567769e+02 -3.669089247616476e+02 2.651961920161227e+02 -2.027271347192069e+02 3 2.837821967046824e+02 -2.822567153069604e+02 -2.935613327724534e+01 -1.303560381865560e+00 4 7.201840640385411e+02 6.491656400686079e+02 -2.358400587388775e+02 2.040306951010725e+02 - ME 2.738639406673165e-04 + ME 1.372807525012575e-03 Event 40 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -324,7 +324,7 @@ Event 40 Batch 0 2 3.080730228651936e+02 -3.065830270999447e+02 -2.484308296331460e+01 1.728167064871203e+01 3 6.842346640746094e+02 4.630487823766367e+02 8.554554725666550e+01 -4.964321303112498e+02 4 5.076923130601962e+02 -1.564657552766919e+02 -6.070246429335075e+01 4.791504596625378e+02 - ME 4.316353181637933e-05 + ME 4.192363154074847e-05 Event 41 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -332,7 +332,7 @@ Event 41 Batch 0 2 1.602650851118221e+02 -1.258781096038287e+02 -9.817642232798531e+01 1.417706342452912e+01 3 7.146392966623014e+02 6.799675591776853e+02 -1.019163870176435e+02 1.948499239342933e+02 4 6.250956182258764e+02 -5.540894495738563e+02 2.000928093456288e+02 -2.090269873588226e+02 - ME 6.118266190948034e-05 + ME 4.523507186168379e-04 Event 42 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -340,7 +340,7 @@ Event 42 Batch 0 2 1.687893235969910e+02 1.289401357197518e+02 4.788693514682045e+01 9.783209393213438e+01 3 7.042017295435162e+02 -1.022058447296739e+02 -6.640064324330017e+02 -2.110675220936915e+02 4 6.270089468594927e+02 -2.673429099007782e+01 6.161194972861812e+02 1.132354281615572e+02 - ME 4.091574289077424e-05 + ME 1.686356189272381e-04 Event 43 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -348,7 +348,7 @@ Event 43 Batch 0 2 4.729783670130408e+02 -7.983817933050123e+01 9.052957805204315e+01 4.573169538528310e+02 3 5.638402597824536e+02 4.785250044669658e+02 7.435095949863268e+01 -2.887933404236804e+02 4 4.631813732045056e+02 -3.986868251364646e+02 -1.648805375506758e+02 -1.685236134291506e+02 - ME 2.654067897204875e-04 + ME 5.938757690519573e-04 Event 44 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -356,7 +356,7 @@ Event 44 Batch 0 2 1.774791104122977e+02 -1.952605982635784e+01 6.371003613266313e+01 1.644949814321787e+02 3 7.194816205691247e+02 -3.678871192485065e+02 2.644831693887214e+01 -6.177486190667772e+02 4 6.030392690185777e+02 3.874131790748646e+02 -9.015835307153536e+01 4.532536376345985e+02 - ME 1.390282437939369e-04 + ME 2.092333697371024e-04 Event 45 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -364,7 +364,7 @@ Event 45 Batch 0 2 7.477488480180839e+02 -3.787655987618923e+02 1.634662296474455e+02 6.236535517992064e+02 3 7.458113398274099e+02 3.819163358711198e+02 -1.661042992235261e+02 -6.186952632673017e+02 4 6.439812154506046e+00 -3.150737109227506e+00 2.638069576080606e+00 -4.958288531904773e+00 - ME 4.591622113024210e-03 + ME 9.377954359926730e-02 Event 46 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -372,7 +372,7 @@ Event 46 Batch 0 2 3.243146757688279e+02 -4.392587631431587e+00 -2.496903827548322e+02 -2.069188895501946e+02 3 5.341608950426614e+02 -2.704482657861201e+02 2.711825143656835e+02 -3.723515022507137e+02 4 6.415244291885106e+02 2.748408534175518e+02 -2.149213161085120e+01 5.792703918009084e+02 - ME 7.845213441237594e-05 + ME 1.879047912263320e-04 Event 47 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -380,7 +380,7 @@ Event 47 Batch 0 2 6.742198761450968e+02 -3.282965096491567e+02 5.301803926793563e+02 -2.563251730900704e+02 3 6.484148720042493e+02 3.527030795571956e+02 -3.975273148506379e+02 3.715029176935211e+02 4 1.773652518506536e+02 -2.440656990803885e+01 -1.326530778287185e+02 -1.151777446034508e+02 - ME 5.254395938575492e-05 + ME 1.136665455996279e-03 Event 48 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -388,7 +388,7 @@ Event 48 Batch 0 2 7.321401810535270e+02 -1.843482647928687e+02 4.412348098999295e+02 5.543976952635381e+02 3 7.293058265076229e+02 2.182722651304250e+02 -4.435200216702997e+02 -5.362221528717154e+02 4 3.855399243885009e+01 -3.392400033755636e+01 2.285211770370227e+00 -1.817554239182278e+01 - ME 2.330290263553363e-04 + ME 2.278442596973106e-03 Event 49 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -396,7 +396,7 @@ Event 49 Batch 0 2 3.511117284856090e+02 -3.272266866652174e+02 5.199533974843238e+01 1.161835877338140e+02 3 7.326526490901410e+02 6.615045961628415e+02 -2.993354007364775e+02 -9.792799058578566e+01 4 4.162356224242500e+02 -3.342779094976241e+02 2.473400609880451e+02 -1.825559714802838e+01 - ME 7.863589115869630e-06 + ME 8.806759903737244e-05 Event 50 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -404,7 +404,7 @@ Event 50 Batch 0 2 7.322170903075255e+02 2.740692406080844e+02 1.952596610981929e+01 -6.787095515302592e+02 3 3.078559130669522e+02 -1.663333363406682e+02 8.625456119089935e+01 2.442716420418760e+02 4 4.599269966255216e+02 -1.077359042674159e+02 -1.057805273007185e+02 4.344379094883832e+02 - ME 6.765758192049922e-05 + ME 7.579426018596712e-05 Event 51 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -412,7 +412,7 @@ Event 51 Batch 0 2 3.473696038265160e+02 -2.922314643158454e+02 -6.759614889845234e+01 -1.752060888796554e+02 3 5.389399151999496e+02 -2.449040872454050e+02 9.346474502284556e+01 4.708954891311219e+02 4 6.136904809735339e+02 5.371355515612503e+02 -2.586859612439322e+01 -2.956894002514666e+02 - ME 2.035652280642710e-04 + ME 4.687828430739845e-04 Event 52 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -420,7 +420,7 @@ Event 52 Batch 0 2 6.818614816439094e+02 5.970116833066725e+02 3.013730734325877e+02 1.329902280423528e+02 3 2.108623144448950e+02 -4.198344769951654e+00 -1.698802183673395e+02 -1.248439063859965e+02 4 6.072762039111957e+02 -5.928133385367207e+02 -1.314928550652483e+02 -8.146321656356344e+00 - ME 4.047005152694340e-05 + ME 1.636869658416981e-04 Event 53 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -428,7 +428,7 @@ Event 53 Batch 0 2 5.157714002491656e+02 -5.140718537651751e+02 -4.182413977701254e+01 1.003899065692042e+00 3 5.148181840855221e+02 2.868792199999327e+02 1.974924151010656e+02 3.791237552236646e+02 4 4.694104156653124e+02 2.271926337652422e+02 -1.556682753240530e+02 -3.801276542893567e+02 - ME 1.547751010871262e-04 + ME 3.182294022992135e-03 Event 54 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -436,7 +436,7 @@ Event 54 Batch 0 2 6.433410767101752e+02 2.586883950027282e+02 -5.809813083922761e+02 9.710187728524583e+01 3 6.928799734080563e+02 -1.579832568796111e+02 6.405510983559769e+02 -2.117031848853746e+02 4 1.637789498817686e+02 -1.007051381231171e+02 -5.956978996370073e+01 1.146013076001288e+02 - ME 1.302720215079095e-05 + ME 3.280140142776471e-05 Event 55 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -444,7 +444,7 @@ Event 55 Batch 0 2 7.193759752058201e+02 -3.536444481659258e+02 -7.212523476050659e+01 -6.222823703878202e+02 3 5.307053661742267e+02 2.409461639849982e+02 1.900944302490854e+02 4.329633233142391e+02 4 2.499186586199529e+02 1.126982841809279e+02 -1.179691954885788e+02 1.893190470735813e+02 - ME 3.087450123310173e-05 + ME 3.939174164528502e-05 Event 56 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -452,7 +452,7 @@ Event 56 Batch 0 2 3.858864959547013e+02 1.815174721437793e+02 3.218581876578407e+02 -1.112074732396182e+02 3 4.484505297447187e+02 -3.244105157450006e+02 2.934585578803474e+02 -9.873079412811623e+01 4 6.656629743005793e+02 1.428930436012212e+02 -6.153167455381879e+02 2.099382673677345e+02 - ME 4.275995533811995e-05 + ME 2.326138625268126e-04 Event 57 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -460,7 +460,7 @@ Event 57 Batch 0 2 5.284589752749192e+02 3.868194647882293e+02 -1.709996888155517e+02 3.168575336559793e+02 3 6.299868555278971e+02 -1.587414880613579e+02 2.327134172236622e+02 -5.634971548731005e+02 4 3.415541691971835e+02 -2.280779767268714e+02 -6.171372840811043e+01 2.466396212171210e+02 - ME 2.211478424702745e-05 + ME 3.474853710074164e-05 Event 58 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -468,7 +468,7 @@ Event 58 Batch 0 2 6.172037319760957e+02 -2.246119436411400e+02 -2.286037628748728e+01 5.744278237820342e+02 3 5.117934503257735e+02 1.262762853074207e+02 3.215736628881853e+02 -3.775939815489577e+02 4 3.710028176981306e+02 9.833565833371921e+01 -2.987132866006979e+02 -1.968338422330765e+02 - ME 1.857727050583390e-04 + ME 6.183305374210038e-04 Event 59 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -476,7 +476,7 @@ Event 59 Batch 0 2 7.388935626701858e+02 -3.912134623809441e+02 -5.457789630286015e+02 3.082872805076099e+02 3 1.936051438730608e+02 1.561492575196544e+02 8.304673385628061e+01 -7.876294246644987e+01 4 5.675012934567535e+02 2.350642048612896e+02 4.627322291723209e+02 -2.295243380411600e+02 - ME 6.745345781245190e-05 + ME 4.116991424436793e-04 Event 60 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -484,7 +484,7 @@ Event 60 Batch 0 2 7.258141426633659e+02 -5.584991156701968e+02 1.635894950857984e+02 4.337319270970709e+02 3 2.789580074371136e+02 2.331554478032953e+02 6.512410160032128e+01 -1.386180308029247e+02 4 4.952278498995201e+02 3.253436678669015e+02 -2.287135966861195e+02 -2.951138962941461e+02 - ME 9.170244877267536e-05 + ME 7.295672680059989e-04 Event 61 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -492,15 +492,15 @@ Event 61 Batch 0 2 5.906141202026897e+02 4.485275282318680e+02 -2.043613424290570e+02 3.253990429020988e+02 3 4.163572165237975e+02 -4.021600557528675e+02 -4.112755461437413e+01 9.964509802161204e+01 4 4.930286632735124e+02 -4.636747247900051e+01 2.454888970434311e+02 -4.250441409237108e+02 - ME 1.836685601489136e-04 + ME 5.845307122272604e-03 Event 62 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 1 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 -7.500000000000000e+02 2 7.346180891175762e+02 3.693463141798367e+02 7.549194961263061e+01 -6.305140780380819e+02 3 4.420621433230785e+02 -2.806743363126464e+02 3.467380983154045e+01 3.397625382625571e+02 - 4 3.233197675593452e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755248e+02 - ME 3.490896135533686e-05 + 4 3.233197675593453e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755249e+02 + ME 3.963631774242112e-05 Event 63 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -508,7 +508,7 @@ Event 63 Batch 0 2 6.451039732729313e+02 -2.415045377667665e+02 1.990362537024482e+02 -5.641092662620230e+02 3 3.260870385294104e+02 2.061141051805976e+02 -2.496695602716584e+02 3.892098426606745e+01 4 5.288089881976584e+02 3.539043258616898e+01 5.063330656921013e+01 5.251882819959555e+02 - ME 4.428689394331114e-04 + ME 4.832224458906289e-04 Event 64 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -516,7 +516,7 @@ Event 64 Batch 0 2 5.275973380665291e+02 -6.064553482667328e+01 4.309976929667101e+02 -2.981980196075213e+02 3 5.799838776791826e+02 3.279821268626862e+02 -1.824214634122377e+02 4.421893627315650e+02 4 3.924187842542880e+02 -2.673365920360130e+02 -2.485762295544724e+02 -1.439913431240437e+02 - ME 4.205989960223865e-05 + ME 2.175617604507715e-04 Event 65 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -524,7 +524,7 @@ Event 65 Batch 0 2 6.480172869826541e+02 2.720879118036237e+02 -5.153900904044360e+02 -2.833154199679406e+02 3 7.075023253568394e+02 -3.440299289242928e+02 4.709796137500282e+02 4.004761563708322e+02 4 1.444803876605064e+02 7.194201712066916e+01 4.441047665440794e+01 -1.171607364028916e+02 - ME 1.103463366798231e-04 + ME 4.989956280474397e-03 Event 66 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -532,7 +532,7 @@ Event 66 Batch 0 2 5.472978185025795e+02 4.857452785131266e+02 -2.223654169683454e+02 -1.189119332799752e+02 3 3.203062148499983e+02 1.169702135976477e+02 2.922172461416276e+02 -5.935588816501102e+01 4 6.323959666474225e+02 -6.027154921107744e+02 -6.985182917328234e+01 1.782678214449862e+02 - ME 2.913920636000223e-05 + ME 1.346850069104626e-04 Event 67 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -540,7 +540,7 @@ Event 67 Batch 0 2 4.264671493042950e+02 1.195959046886511e+02 -2.647539231733031e+02 3.122121220929446e+02 3 5.059969655247565e+02 3.777175441887567e+02 -7.608313561896731e+00 -3.366073372596325e+02 4 5.675358851709483e+02 -4.973134488774080e+02 2.723622367352000e+02 2.439521516668857e+01 - ME 4.009347519102052e-05 + ME 9.763221977220593e-05 Event 68 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -548,7 +548,7 @@ Event 68 Batch 0 2 5.996105691520872e+02 -3.814725562071957e+02 -3.417794545715573e+02 3.117664637712124e+02 3 2.164196744806214e+02 1.292759463548889e+02 -1.184749651041615e+02 1.268419798013013e+02 4 6.839697563672917e+02 2.521966098523068e+02 4.602544196757188e+02 -4.386084435725137e+02 - ME 6.175473672610461e-04 + ME 2.936083529685707e-03 Event 69 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -556,7 +556,7 @@ Event 69 Batch 0 2 4.950546755511076e+02 -1.873718558932053e+02 -4.578972175289678e+02 -1.735101101888631e+01 3 4.768584394819691e+02 -1.830244097668608e+02 2.985566003539791e+02 -3.236664843936508e+02 4 5.280868849669230e+02 3.703962656600661e+02 1.593406171749887e+02 3.410174954125370e+02 - ME 1.367292435278724e-05 + ME 5.234212626720279e-05 Event 70 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -564,7 +564,7 @@ Event 70 Batch 0 2 6.918343395272258e+02 6.895733556028865e+02 -5.391072441382606e+01 -1.473005040127906e+01 3 2.169590284692678e+02 -1.127375202028747e+02 1.807969800614662e+02 4.091361110301506e+01 4 5.912066320035063e+02 -5.768358354000119e+02 -1.268862556476402e+02 -2.618356070173603e+01 - ME 3.526540789264872e-05 + ME 1.591740981760110e-04 Event 71 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -572,7 +572,7 @@ Event 71 Batch 0 2 5.156371334918733e+02 1.547202099034306e+02 -4.807172487652236e+02 1.041836686949964e+02 3 3.718518305526428e+02 -8.969821893462726e+01 -7.521366892975188e+01 -3.529460545344468e+02 4 6.125110359554843e+02 -6.502199096880338e+01 5.559309176949756e+02 2.487623858394504e+02 - ME 2.860782472746935e-05 + ME 1.125100552069616e-04 Event 72 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -580,7 +580,7 @@ Event 72 Batch 0 2 2.110577464974889e+02 5.009520239746097e+01 -1.453533690489527e+02 -1.445968227848547e+02 3 7.317124633441161e+02 -4.429659627226336e+02 5.264774879404380e+02 2.490095170354977e+02 4 5.572297901583943e+02 3.928707603251725e+02 -3.811241188914850e+02 -1.044126942506430e+02 - ME 2.666441446531882e-05 + ME 1.823320413479066e-04 Event 73 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -588,7 +588,7 @@ Event 73 Batch 0 2 3.932257450488246e+02 3.105005764664288e+01 -2.932679039283983e+02 2.601082794045340e+02 3 5.658879124646472e+02 3.645905401293642e+02 4.244364556305355e+02 8.459646951004230e+01 4 5.408863424865281e+02 -3.956405977760074e+02 -1.311685517021372e+02 -3.447047489145762e+02 - ME 7.825486685913998e-05 + ME 8.953763196089171e-04 Event 74 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -596,7 +596,7 @@ Event 74 Batch 0 2 1.374854102925440e+02 7.785209805930555e+01 4.289805712042688e+01 1.048858692406466e+02 3 6.381281910764947e+02 -1.004137270491618e+02 -1.591026937267357e+02 6.097630724433484e+02 4 7.243863986309617e+02 2.256162898985645e+01 1.162046366063089e+02 -7.146489416839951e+02 - ME 1.919068868336380e+00 + ME 1.395531292378326e+01 Event 75 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -604,7 +604,7 @@ Event 75 Batch 0 2 5.936883054156938e+02 -3.438525101293572e+00 -2.706855443967301e+02 5.283780053968293e+02 3 5.912298912592892e+02 1.109657062166288e+02 4.832067437414102e+02 -3.221034603433170e+02 4 3.150818033250173e+02 -1.075271811153352e+02 -2.125211993446803e+02 -2.062745450535123e+02 - ME 1.642862842910461e-04 + ME 1.379908325625592e-03 Event 76 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -612,7 +612,7 @@ Event 76 Batch 0 2 6.619486867997672e+02 2.801967015359571e+01 2.136411519593737e+02 6.258980909300584e+02 3 1.201252731414031e+02 2.274423842261747e+01 -8.754996679960182e+01 7.904292618103446e+01 4 7.179260400588295e+02 -5.076390857621322e+01 -1.260911851597719e+02 -7.049410171110928e+02 - ME 7.362202483972824e-01 + ME 5.870483941147637e+00 Event 77 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -620,7 +620,7 @@ Event 77 Batch 0 2 7.456676259451606e+02 -7.346624001550109e+02 6.511229493320701e+01 -1.097804865615983e+02 3 1.284204120828029e+02 1.251494694834492e+02 2.867183268690428e+01 2.708973588335753e+00 4 6.259119619720373e+02 6.095129306715618e+02 -9.378412762011118e+01 1.070715129732624e+02 - ME 4.400761364703354e-05 + ME 1.662775178233579e-04 Event 78 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -628,7 +628,7 @@ Event 78 Batch 0 2 7.040158920877628e+02 6.911264613612161e+02 -6.659640240533211e+01 -1.163937709034254e+02 3 5.185438503615327e+02 -4.976050220224222e+02 -1.270913363611937e+02 7.158742227342900e+01 4 2.774402575507044e+02 -1.935214393387939e+02 1.936877387665258e+02 4.480634862999637e+01 - ME 9.352750539306009e-06 + ME 5.328004946641866e-05 Event 79 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -636,7 +636,7 @@ Event 79 Batch 0 2 6.777589592768838e+02 1.742725197144059e+02 -4.776543849198212e+01 6.532264221831092e+02 3 5.725002211294488e+02 -1.786302554544233e+02 -1.627852110918317e+02 -5.189881598643107e+02 4 2.497408195936665e+02 4.357735740017474e+00 2.105506495838138e+02 -1.342382623187985e+02 - ME 3.598558866345749e-04 + ME 9.179311580246363e-04 Event 80 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -644,7 +644,7 @@ Event 80 Batch 0 2 6.240819586861880e+02 4.679310297228965e+02 -4.118464023828053e+02 -3.002304821964348e+01 3 6.688675489057649e+02 -5.494372353172420e+02 3.251429131208653e+02 1.994607943266771e+02 4 2.070504924080468e+02 8.150620559434545e+01 8.670348926194001e+01 -1.694377461070337e+02 - ME 5.382869847396148e-05 + ME 3.575286400583300e-03 Event 81 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -652,7 +652,7 @@ Event 81 Batch 0 2 5.198056748722776e+02 1.034797897616987e+02 -2.885605608993972e+02 4.197888462474007e+02 3 5.672098642055398e+02 -4.160331805498524e+02 2.087659545613757e+01 -3.849773895903518e+02 4 4.129844609221831e+02 3.125533907881537e+02 2.676839654432596e+02 -3.481145665704891e+01 - ME 3.612255741613163e-05 + ME 1.018936778946332e-04 Event 82 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -660,7 +660,7 @@ Event 82 Batch 0 2 2.057598609140514e+02 6.385349666266659e+01 -2.765433460911293e+01 1.936364870179372e+02 3 6.235840147705873e+02 4.654039114453895e+02 -3.828889383639962e+02 -1.601633028106901e+02 4 6.706561243153629e+02 -5.292574081080552e+02 4.105432729731107e+02 -3.347318420724690e+01 - ME 3.172622561805068e-04 + ME 6.930850923220120e-04 Event 83 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -668,7 +668,7 @@ Event 83 Batch 0 2 6.583322583736492e+02 1.865539504254553e+02 -1.926584839569474e+02 6.012334775737429e+02 3 3.620902826842561e+02 -3.107067244571256e+02 -1.177956631152976e+01 -1.855584705935048e+02 4 4.795774589420946e+02 1.241527740316703e+02 2.044380502684771e+02 -4.156750069802382e+02 - ME 6.756528802944365e-04 + ME 8.385116111585099e-03 Event 84 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -676,7 +676,7 @@ Event 84 Batch 0 2 4.849329564663161e+02 -2.622178945286150e+02 4.068620488841210e+02 -2.941124332559817e+01 3 4.737588937677760e+02 6.014532316188546e+01 -1.333934272225749e+02 4.505954095412368e+02 4 5.413081497659077e+02 2.020725713667296e+02 -2.734686216615461e+02 -4.211841662156386e+02 - ME 1.017468409980153e-03 + ME 5.162990427398554e-03 Event 85 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -684,7 +684,7 @@ Event 85 Batch 0 2 7.085742632080854e+02 -2.174614026040270e+02 -5.283468657604088e+02 -4.190914152061853e+02 3 5.315764222715953e+02 8.528530557199829e+00 3.820092234108129e+02 3.695533927738615e+02 4 2.598493145203187e+02 2.089328720468272e+02 1.463376423495959e+02 4.953802243232388e+01 - ME 1.894143727100354e-05 + ME 6.335517668355978e-05 Event 86 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -692,7 +692,7 @@ Event 86 Batch 0 2 1.724500140939190e+02 1.231518677708316e+02 -1.121928207497684e+01 1.201946443701656e+02 3 7.028475062724231e+02 -6.467096040851287e+01 -4.553168759141600e+02 -5.315061866629339e+02 4 6.247024796336580e+02 -5.848090736231883e+01 4.665361579891369e+02 4.113115422927684e+02 - ME 5.311384036847167e-05 + ME 1.165531323127631e-04 Event 87 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -700,7 +700,7 @@ Event 87 Batch 0 2 1.942099203196796e+02 -7.751148196958454e+01 -1.356691819650310e+02 -1.153400900745028e+02 3 7.314670447251594e+02 1.724617634710876e+02 7.020747158546045e+02 1.113196793791551e+02 4 5.743230349551606e+02 -9.495028150150301e+01 -5.664055338895735e+02 4.020410695347637e+00 - ME 1.874087134673149e-05 + ME 1.237609879052555e-04 Event 88 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -708,7 +708,7 @@ Event 88 Batch 0 2 6.382497024023744e+02 2.632142028760094e+02 -5.613974181649784e+02 1.513733956108635e+02 3 3.997044228265544e+02 -5.264940326118349e+01 3.435187961344461e+02 1.974500004195773e+02 4 4.620458747710724e+02 -2.105647996148253e+02 2.178786220305324e+02 -3.488233960304407e+02 - ME 9.699609186666195e-05 + ME 1.863821317258467e-03 Event 89 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -716,7 +716,7 @@ Event 89 Batch 0 2 1.419006640093282e+02 -8.677155154367878e+01 6.457545216231642e+01 -9.185046144153740e+01 3 7.131224514048055e+02 5.460003286026870e+02 -4.154556538506974e+02 -1.944836022569670e+02 4 6.449768845858670e+02 -4.592287770590082e+02 3.508802016883808e+02 2.863340636985044e+02 - ME 2.974199953519439e-05 + ME 1.136115495374629e-04 Event 90 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -724,7 +724,7 @@ Event 90 Batch 0 2 5.730615760623938e+02 -6.017783679015001e+01 -5.202921970507185e+02 -2.325386583054727e+02 3 5.389913703864468e+02 -6.302812531165206e+01 2.446311215742109e+02 4.761247390423042e+02 4 3.879470535511588e+02 1.232059621018019e+02 2.756610754765076e+02 -2.435860807368315e+02 - ME 1.667772733247344e-04 + ME 1.094721025518881e-03 Event 91 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -732,7 +732,7 @@ Event 91 Batch 0 2 4.546745139784350e+02 -1.470341619195494e+02 -1.726383255301703e+02 -3.940886669878754e+02 3 5.110976540119647e+02 -2.482119727393537e+02 -1.865817698532448e+02 4.059542728975803e+02 4 5.342278320096005e+02 3.952461346589030e+02 3.592200953834151e+02 -1.186560590970480e+01 - ME 4.420313882846059e-05 + ME 8.789722587847313e-05 Event 92 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -740,7 +740,7 @@ Event 92 Batch 0 2 6.683728375977241e+02 -1.148152650923627e+02 3.458291789782991e+02 5.603051703379153e+02 3 2.872567998557088e+02 1.635098024620329e+02 7.847331657016402e+01 -2.227620976482501e+02 4 5.443703625465666e+02 -4.869453736967034e+01 -4.243024955484631e+02 -3.375430726896653e+02 - ME 2.265252332392545e-04 + ME 8.270083568815311e-04 Event 93 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -748,7 +748,7 @@ Event 93 Batch 0 2 5.666948073002088e+02 5.408074886689032e+01 5.639942928586390e+02 -1.134525653745258e+01 3 6.168025492529713e+02 2.439040545997395e+02 -5.541969602989467e+02 1.175666879272316e+02 4 3.165026434468199e+02 -2.979848034666298e+02 -9.797332559692304e+00 -1.062214313897791e+02 - ME 1.251778043268437e-05 + ME 1.664960428447917e-04 Event 94 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -756,7 +756,7 @@ Event 94 Batch 0 2 4.964349376711385e+02 8.445930034540567e+01 -2.409007074648561e+02 -4.257712097695705e+02 3 5.660980232871289e+02 1.373833465612049e+02 5.210669225216058e+02 1.734417778711397e+02 4 4.374670390417324e+02 -2.218426469066104e+02 -2.801662150567495e+02 2.523294318984307e+02 - ME 1.007141026120618e-05 + ME 3.431641292834382e-05 Event 95 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -764,7 +764,7 @@ Event 95 Batch 0 2 7.117074025057361e+02 -3.227984571262278e+02 4.276971164854593e+02 -4.684055501468919e+02 3 1.264078228725325e+02 8.675876182178401e+01 5.074873328843479e+01 7.665781760618943e+01 4 6.618847746217315e+02 2.360396953044439e+02 -4.784458497738940e+02 3.917477325407025e+02 - ME 8.653822330208906e-05 + ME 2.121249861094822e-04 Event 96 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -772,7 +772,7 @@ Event 96 Batch 0 2 7.329769441659936e+02 -9.642859092211874e+01 6.903981466332597e+02 -2.265107649915406e+02 3 3.937873938465678e+02 -4.837693103302091e+01 -3.847118583018795e+02 6.873841850241256e+01 4 3.732356619874385e+02 1.448055219551397e+02 -3.056862883313802e+02 1.577723464891279e+02 - ME 9.822975749896163e-06 + ME 3.473186069800973e-05 Event 97 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -780,7 +780,7 @@ Event 97 Batch 0 2 3.394989963266853e+01 6.003767577498499e+00 -2.078495220615399e+01 2.616364312804199e+01 3 7.377311980366451e+02 -5.308290258162607e+02 4.681853362634530e+02 2.080152802450354e+02 4 7.283189023306861e+02 5.248252582387622e+02 -4.474003840572991e+02 -2.341789233730774e+02 - ME 2.729355315721549e-03 + ME 2.063600678642283e-02 Event 98 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -788,7 +788,7 @@ Event 98 Batch 0 2 2.496912687496082e+02 -2.485814905959506e+02 -5.435228288348340e-01 -2.350907922099247e+01 3 7.458289852530976e+02 7.373315781279124e+02 9.801365830907572e+01 -5.473885205171283e+01 4 5.044797459972945e+02 -4.887500875319618e+02 -9.747013548024091e+01 7.824793127270530e+01 - ME 8.091578731489026e-06 + ME 6.800308216903296e-05 Event 99 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -796,7 +796,7 @@ Event 99 Batch 0 2 1.698125854886770e+02 8.336002034290719e+01 8.774494220182726e+01 -1.191144253093525e+02 3 6.496622934125946e+02 5.714329899004554e+02 -6.230613627727958e+01 3.027265745152471e+02 4 6.805251210987285e+02 -6.547930102433627e+02 -2.543880592454771e+01 -1.836121492058947e+02 - ME 1.856310681395454e-04 + ME 6.115029137493471e-04 Event 100 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -804,7 +804,7 @@ Event 100 Batch 0 2 6.141460480129781e+02 -5.842473718080511e+02 -5.092222124447417e+01 1.823110095657221e+02 3 3.909476383151783e+02 2.539115798088024e+02 -2.930333502072385e+02 -5.000421191795168e+01 4 4.949063136718440e+02 3.303357919992488e+02 3.439555714517127e+02 -1.323067976477707e+02 - ME 2.380755205932631e-05 + ME 1.550407956048336e-04 Event 101 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -812,7 +812,7 @@ Event 101 Batch 0 2 7.469346538870473e+02 3.524232024688497e+02 -1.488240016505349e+02 -6.415299525912136e+02 3 6.502268999047169e+02 -2.777200960400715e+02 1.351761574712158e+02 5.721835160737410e+02 4 1.028384462082358e+02 -7.470310642877820e+01 1.364784417931910e+01 6.934643651747267e+01 - ME 7.777208667430486e-05 + ME 1.080054053054822e-04 Event 102 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -820,7 +820,7 @@ Event 102 Batch 0 2 7.426790432885583e+02 -3.141071077544728e+02 6.615000409077074e+02 1.238005738162371e+02 3 6.735764515788642e+01 -4.139700837311957e+00 -5.533298776898177e+01 -3.818606686673834e+01 4 6.899633115535552e+02 3.182468085917849e+02 -6.061670531387255e+02 -8.561450694949879e+01 - ME 1.796768498680773e-04 + ME 6.292262541994918e-04 Event 103 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -828,7 +828,7 @@ Event 103 Batch 0 2 4.837874798175253e+02 -2.731724972668680e+02 1.247027290420595e+02 -3.793103501549069e+02 3 4.466406321977809e+02 -2.904538080082218e+02 -1.536665846758871e+02 3.025078850172422e+02 4 5.695718879846930e+02 5.636263052750895e+02 2.896385563382777e+01 7.680246513766473e+01 - ME 2.998858312831636e-05 + ME 8.140894767450013e-05 Event 104 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -836,7 +836,7 @@ Event 104 Batch 0 2 5.788466572679498e+02 3.572346730226224e+02 -3.682137844992378e+02 2.680773207965347e+02 3 2.925711988065158e+02 2.155069407513812e+02 1.697995838195863e+02 -1.016010147279926e+02 4 6.285821439255348e+02 -5.727416137740034e+02 1.984142006796517e+02 -1.664763060685422e+02 - ME 7.634200862908681e-05 + ME 2.849770726480251e-04 Event 105 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -844,7 +844,7 @@ Event 105 Batch 0 2 3.361125455083114e+02 2.619004058447622e+02 4.338373361330959e+01 -2.061496357605196e+02 3 5.299016201311088e+02 2.892532450564946e+02 2.091058919093095e+02 3.916669672191841e+02 4 6.339858343605800e+02 -5.511536509012568e+02 -2.524896255226191e+02 -1.855173314586645e+02 - ME 1.089382545947932e-04 + ME 2.866662317167052e-04 Event 106 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -852,7 +852,7 @@ Event 106 Batch 0 2 3.578050478863485e+02 -2.265838270225943e+02 2.740910124726658e+02 -3.947579646386072e+01 3 5.202885196186892e+02 1.412729374205232e+02 1.631578432376887e+02 4.734148487210871e+02 4 6.219064324949621e+02 8.531088960207101e+01 -4.372488557103545e+02 -4.339390522572265e+02 - ME 4.548955126640399e-04 + ME 1.912263829178338e-03 Event 107 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -860,7 +860,7 @@ Event 107 Batch 0 2 5.409822745993889e+02 9.278463733038997e+01 5.102180459532771e+02 -1.540466750365499e+02 3 2.501852297905710e+02 1.682301834486207e+02 1.474652503315489e+02 1.120056004263085e+02 4 7.088324956100398e+02 -2.610148207790107e+02 -6.576832962848259e+02 4.204107461024153e+01 - ME 2.159102073406285e-04 + ME 7.096163321035572e-04 Event 108 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -868,7 +868,7 @@ Event 108 Batch 0 2 6.835202199428555e+02 6.670011709444186e+02 6.653656309718588e+01 1.337243986739828e+02 3 2.377887385005082e+02 -1.098327419601477e+02 7.667443498831059e+01 -1.964720946353502e+02 4 5.786910415566365e+02 -5.571684289842709e+02 -1.432109980854965e+02 6.274769596136723e+01 - ME 2.960130886583330e-05 + ME 1.143500637563713e-04 Event 109 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -876,7 +876,7 @@ Event 109 Batch 0 2 5.978180281189351e+02 4.291222314737005e+02 2.249703559956599e+02 3.501840146583366e+02 3 3.585061336071061e+02 -3.227227650115256e+02 1.541688059097761e+02 2.467071262824850e+01 4 5.436758382739589e+02 -1.063994664621746e+02 -3.791391619054360e+02 -3.748547272865851e+02 - ME 1.100286424576873e-04 + ME 1.159187207430584e-03 Event 110 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -884,7 +884,7 @@ Event 110 Batch 0 2 7.073952645543156e+01 -4.753982451958468e+01 4.872856968801237e+01 -1.922426029646691e+01 3 7.438039776014969e+02 1.707202332282495e+02 -7.225114374584515e+02 4.556513803361385e+01 4 6.854564959430718e+02 -1.231804087086648e+02 6.737828677704391e+02 -2.634087773714689e+01 - ME 1.052942530962122e-04 + ME 5.177444310012934e-04 Event 111 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -892,7 +892,7 @@ Event 111 Batch 0 2 5.206822291802364e+02 -3.873336848644893e+02 2.415505427333673e+02 -2.504714268307115e+02 3 5.478000561519707e+02 4.687653961676166e+02 -2.245690260344170e+02 -1.729527606656598e+02 4 4.315177146677929e+02 -8.143171130312743e+01 -1.698151669895031e+01 4.234241874963712e+02 - ME 8.545692640795734e-05 + ME 1.041517236520828e-04 Event 112 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -900,7 +900,7 @@ Event 112 Batch 0 2 3.610471238372959e+02 2.563298943277285e+02 9.635756626046441e+01 -2.352981732387216e+02 3 6.139063356201009e+02 1.031778254919422e+02 -4.257030126280926e+02 4.301305270271111e+02 4 5.250465405426031e+02 -3.595077198196707e+02 3.293454463676283e+02 -1.948323537883896e+02 - ME 5.572029836371622e-05 + ME 2.333567140730066e-04 Event 113 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -908,7 +908,7 @@ Event 113 Batch 0 2 5.886653054136124e+02 3.035646198144377e+02 3.278619896967805e+02 -3.832517176826292e+02 3 5.420023902452333e+02 -3.658357535838290e+02 -3.990519958595696e+02 2.623541560166928e+01 4 3.693323043411537e+02 6.227113376939163e+01 7.119000616278893e+01 3.570163020809600e+02 - ME 4.986188449478774e-05 + ME 6.906402420910258e-05 Event 114 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -916,7 +916,7 @@ Event 114 Batch 0 2 5.165204340356855e+02 2.346362244736889e+01 6.298471388966840e+00 5.159487827839334e+02 3 5.932916594323345e+02 3.608814360715946e+02 -5.336137507463695e+01 -4.678804824963537e+02 4 3.901879065319798e+02 -3.843450585189634e+02 4.706290368567026e+01 -4.806830028757967e+01 - ME 4.029549711869195e-04 + ME 5.363382776736297e-04 Event 115 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -924,7 +924,7 @@ Event 115 Batch 0 2 5.432307281524777e+02 2.250327918244370e+02 4.870559856477670e+02 -8.506664127290338e+01 3 4.265243530840496e+02 2.057819224248363e+02 -2.472237669715339e+02 2.801021835354204e+02 4 5.302449187634726e+02 -4.308147142492733e+02 -2.398322186762331e+02 -1.950355422625171e+02 - ME 4.159321993514108e-05 + ME 2.364149932043149e-04 Event 116 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -932,7 +932,7 @@ Event 116 Batch 0 2 4.402635748890415e+02 -4.240500842615081e+02 -5.733358735035193e+01 -1.035683405941509e+02 3 4.399967684638562e+02 1.183617589007452e+02 -1.041572505293867e+02 -4.107784286579766e+02 4 6.197396566471035e+02 3.056883253607625e+02 1.614908378797388e+02 5.143467692521278e+02 - ME 4.172733678506819e-05 + ME 1.343295643586522e-04 Event 117 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -940,7 +940,7 @@ Event 117 Batch 0 2 3.074085311587982e+02 -4.270248480828711e+01 -3.034838508096459e+02 2.395944736750828e+01 3 5.360984061023379e+02 3.510554986169303e+02 -1.596589010508530e+02 -3.723849798683070e+02 4 6.564930627388640e+02 -3.083530138086433e+02 4.631427518604987e+02 3.484255325007987e+02 - ME 4.142391000026985e-05 + ME 1.795895763168496e-04 Event 118 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -948,7 +948,7 @@ Event 118 Batch 0 2 5.403602961735903e+02 4.471526113902045e+02 -1.804334130868151e+02 -2.439007487679592e+02 3 5.654623567965698e+02 -5.534570111367966e+02 -1.157195831079003e+02 6.480112868522320e+00 4 3.941773470298406e+02 1.063043997465919e+02 2.961529961947150e+02 2.374206358994370e+02 - ME 7.288650603673961e-06 + ME 3.055618730902428e-05 Event 119 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -956,7 +956,7 @@ Event 119 Batch 0 2 8.009099446659010e+01 5.775399043490319e+01 -2.629604726664823e+01 4.886268393818209e+01 3 7.131140611332349e+02 2.472685400460709e+02 -2.870014097539109e+02 -6.041689532644716e+02 4 7.067949444001758e+02 -3.050225304809738e+02 3.132974570205592e+02 5.553062693262896e+02 - ME 2.815424392761942e-04 + ME 6.861262467765907e-04 Event 120 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -964,7 +964,7 @@ Event 120 Batch 0 2 5.007248873753321e+02 2.708997263130530e+02 -3.880896283797751e+02 1.634784128397387e+02 3 7.413897277398672e+02 -4.257033276374029e+02 5.921425482134987e+02 -1.334264135464211e+02 4 2.578853848848011e+02 1.548036013243502e+02 -2.040529198337238e+02 -3.005199929331748e+01 - ME 6.003662532288496e-06 + ME 1.034513276694145e-04 Event 121 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -972,7 +972,7 @@ Event 121 Batch 0 2 5.732265116821120e+02 -1.149395375629033e+02 4.260916136383032e+02 3.658189076403451e+02 3 4.323948798659248e+02 -2.148488009071912e+01 -4.178027098651986e+02 1.092914804138530e+02 4 4.943786084519640e+02 1.364244176536226e+02 -8.288903773105691e+00 -4.751103880541979e+02 - ME 7.661241871407340e-04 + ME 8.074833733477824e-02 Event 122 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -980,7 +980,7 @@ Event 122 Batch 0 2 3.423360304412701e+02 2.648046119434483e+02 2.369247279710451e+01 -2.156644197927059e+02 3 6.059487982275789e+02 2.457729689670163e+01 -4.569077875801422e+02 3.972469964635579e+02 4 5.517151713311508e+02 -2.893819088401499e+02 4.332153147830377e+02 -1.815825766708520e+02 - ME 5.274300345459390e-05 + ME 2.180123533398812e-04 Event 123 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -988,7 +988,7 @@ Event 123 Batch 0 2 1.430133297276668e+02 -4.205671322284506e+01 3.498095937953869e+01 1.321377229770999e+02 3 7.140350670908600e+02 -2.955397919833849e+01 -6.570980288365154e+02 -2.778395577453968e+02 4 6.429516031814733e+02 7.161069242118367e+01 6.221170694569771e+02 1.457018347682969e+02 - ME 2.698780233597045e-04 + ME 5.626335206455025e-04 Event 124 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -996,7 +996,7 @@ Event 124 Batch 0 2 6.053457283343441e+02 5.458657819531910e+02 -1.853964251366731e+01 -2.610177782464909e+02 3 7.499633671623128e+02 -6.784114238502394e+02 2.145325921506613e+01 3.189713933003628e+02 4 1.446909045033435e+02 1.325456418970486e+02 -2.913616701398675e+00 -5.795361505387172e+01 - ME 2.629538535113942e-05 + ME 4.169465060943616e-04 Event 125 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1004,7 +1004,7 @@ Event 125 Batch 0 2 6.695439244882118e+02 9.058534244088493e+01 6.586171675820721e+02 7.941529525294386e+01 3 9.341516463500346e+01 3.490868167113007e+01 5.232133368429144e+01 6.906703243419068e+01 4 7.370409108767834e+02 -1.254940241120154e+02 -7.109385012663632e+02 -1.484823276871337e+02 - ME 4.436636984625360e-03 + ME 1.111472366347957e-02 Event 126 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1012,7 +1012,7 @@ Event 126 Batch 0 2 6.465564354211967e+02 -2.094351601488127e+02 -1.930091683601272e+02 -5.804477571728034e+02 3 1.356182567235447e+02 -2.832094442380729e+01 9.735247446175231e+01 -9.007070211700794e+01 4 7.178253078552584e+02 2.377561045726200e+02 9.565669389837488e+01 6.705184592898115e+02 - ME 1.230970446288030e-03 + ME 1.775660879411100e-03 Event 127 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1020,7 +1020,7 @@ Event 127 Batch 0 2 4.508388003927651e+02 -3.846405138087858e+02 7.756355374444065e+01 2.220162025777267e+02 3 6.162879941073576e+02 2.174727303224461e+02 1.334711143222092e+02 -5.609830344035003e+02 4 4.328732054998774e+02 1.671677834863399e+02 -2.110346680666500e+02 3.389668318257735e+02 - ME 2.127227557837123e-05 + ME 3.922171581774212e-05 Event 128 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1028,7 +1028,7 @@ Event 128 Batch 0 2 7.468963146802857e+02 5.701805835528932e+02 -3.440982003215339e+02 -3.381488363986430e+02 3 1.196664332518719e+02 -9.337643239636876e+01 2.398139841985228e+01 7.089280393650260e+01 4 6.334372520678420e+02 -4.768041511565244e+02 3.201168019016817e+02 2.672560324621404e+02 - ME 7.842790653965437e-05 + ME 2.053620454072734e-04 Event 129 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1036,7 +1036,7 @@ Event 129 Batch 0 2 4.378966182438207e+02 -4.256397208622688e+02 4.624364030548149e+01 9.190104474357973e+01 3 7.127537996732577e+02 5.790589826349546e+02 -1.369827771626340e+02 -3.923574802896586e+02 4 3.493495820829217e+02 -1.534192617726859e+02 9.073913685715252e+01 3.004564355460789e+02 - ME 1.046217618618756e-05 + ME 1.668072874757384e-05 Event 130 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1044,7 +1044,7 @@ Event 130 Batch 0 2 6.322026526626455e+02 5.905875735566585e+02 -2.387291116192753e+01 -2.243136110600485e+02 3 5.268087771404591e+02 -3.287250458747471e+02 1.913681034684307e+02 3.644798771698754e+02 4 3.409885701968954e+02 -2.618625276819114e+02 -1.674951923065032e+02 -1.401662661098267e+02 - ME 3.412796728096272e-05 + ME 2.766647151388132e-04 Event 131 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1052,7 +1052,7 @@ Event 131 Batch 0 2 2.691964685177017e+02 -2.641651354044939e+02 4.065264362900757e+01 -3.210735842607325e+01 3 5.382709487855662e+02 -3.022535437819008e+02 -4.307865739991411e+02 1.131429946566680e+02 4 6.925325826967319e+02 5.664186791863947e+02 3.901339303701337e+02 -8.103563623059465e+01 - ME 1.516502654737588e-04 + ME 5.354423766199649e-04 Event 132 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1060,7 +1060,7 @@ Event 132 Batch 0 2 1.376388194981169e+02 -2.491804956023667e+01 3.114513197621116e+01 1.317327453336230e+02 3 7.332494677489981e+02 -3.054807357444667e+02 -6.882601889638243e+00 -6.665500220046781e+02 4 6.291117127528858e+02 3.303987853047034e+02 -2.426253008657308e+01 5.348172766710551e+02 - ME 2.459616839911958e-04 + ME 3.625143788027957e-04 Event 133 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1068,7 +1068,7 @@ Event 133 Batch 0 2 5.818916885738672e+02 -3.437736592641007e+02 -2.113522447259726e+02 -4.192228966514222e+02 3 7.075583625851592e+02 3.695171106849944e+02 9.875952986414086e+01 5.952667441040354e+02 4 2.105499488409736e+02 -2.574345142089370e+01 1.125927148618317e+02 -1.760438474526132e+02 - ME 3.278402967978973e-04 + ME 6.644965721204062e-03 Event 134 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1076,7 +1076,7 @@ Event 134 Batch 0 2 7.039051474789593e+02 -1.767404282002263e+02 5.832845063404937e+02 3.521710697233707e+02 3 6.740856043500099e+02 9.540039380435479e+01 -5.203258634262522e+02 -4.177932056695244e+02 4 1.220092481710302e+02 8.134003439587134e+01 -6.295864291424151e+01 6.562213594615410e+01 - ME 3.621089826286842e-05 + ME 6.394436352069354e-05 Event 135 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1084,7 +1084,7 @@ Event 135 Batch 0 2 7.491379873081086e+02 -6.603965492909807e+02 -9.243924572685610e+01 -3.413782470545817e+02 3 4.360367703469753e+02 3.763875731093294e+02 3.833030381995060e+01 2.167746473012021e+02 4 3.148252423449159e+02 2.840089761816513e+02 5.410894190690560e+01 1.246035997533796e+02 - ME 1.170602675185252e-05 + ME 3.729096801849378e-05 Event 136 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1092,7 +1092,7 @@ Event 136 Batch 0 2 6.907976432034611e+02 -8.965778913807024e+01 -5.375684903631193e+02 -4.244796613161184e+02 3 4.317447428217263e+02 2.541758793770707e+02 2.501815833403360e+02 2.433255445990286e+02 4 3.774576139748129e+02 -1.645180902390004e+02 2.873869070227833e+02 1.811541167170898e+02 - ME 1.221598515374744e-05 + ME 3.295715598818487e-05 Event 137 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1100,7 +1100,7 @@ Event 137 Batch 0 2 5.927917878715718e+02 -5.453882061843875e+02 -2.239274061847312e+02 6.172783069514800e+01 3 3.718333194205911e+02 2.859809174201715e+02 -2.363544177495510e+02 2.472896101988843e+01 4 5.353748927078371e+02 2.594072887642160e+02 4.602818239342820e+02 -8.645679171503701e+01 - ME 2.222722395048600e-05 + ME 1.267334233155001e-04 Event 138 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1108,7 +1108,7 @@ Event 138 Batch 0 2 1.164849493482387e+02 2.012854405109472e+01 -2.573298799707043e+01 -1.118096528381494e+02 3 7.481698498358139e+02 -1.044692284663333e+02 -4.003634472873074e+00 7.408294509656059e+02 4 6.353452008159477e+02 8.434068441523856e+01 2.973662246994375e+01 -6.290197981274564e+02 - ME 1.183014588836486e-01 + ME 3.545594402685597e+00 Event 139 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1116,7 +1116,7 @@ Event 139 Batch 0 2 3.415587822283577e+02 -2.468214832259765e+02 1.926082427237748e+02 1.365416492148350e+02 3 5.828887331044928e+02 -1.023403009989268e+02 -5.561813319045077e+02 1.412376154306548e+02 4 5.755524846671491e+02 3.491617842249035e+02 3.635730891807333e+02 -2.777792646454897e+02 - ME 5.213154494000113e-05 + ME 4.142320485322521e-04 Event 140 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1124,7 +1124,7 @@ Event 140 Batch 0 2 4.395392082109443e+02 -3.037880820376849e+02 -2.455930383243060e+02 -2.014735126343029e+02 3 4.709796125547878e+02 -2.826270024952004e+02 2.984919122515593e+02 2.298833426397907e+02 4 5.894811792342680e+02 5.864150845328855e+02 -5.289887392725340e+01 -2.840983000548780e+01 - ME 2.990357782498624e-05 + ME 1.220048440917972e-04 Event 141 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1132,7 +1132,7 @@ Event 141 Batch 0 2 3.025838986653694e+02 -2.680006525137058e+02 -6.218827689980458e+01 -1.259574698062632e+02 3 5.104624598690772e+02 -2.829910827131053e+02 4.173533268753467e+02 -7.939880721102661e+01 4 6.869536414655528e+02 5.509917352268112e+02 -3.551650499755422e+02 2.053562770172896e+02 - ME 7.151804808113674e-05 + ME 3.735313583347012e-04 Event 142 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1140,7 +1140,7 @@ Event 142 Batch 0 2 4.390011511178412e+02 -3.153925512561953e+02 3.992377088505197e+01 -3.027468279160259e+02 3 4.597282536099518e+02 2.984856708041211e+02 -2.221794712617382e+02 -2.699863960308454e+02 4 6.012705952722066e+02 1.690688045207421e+01 1.822557003766862e+02 5.727332239468712e+02 - ME 8.945447985744934e-05 + ME 1.630913878361870e-04 Event 143 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1148,7 +1148,7 @@ Event 143 Batch 0 2 7.103308443495001e+02 -3.626595603160224e+02 2.462759922459802e+02 5.589240443825270e+02 3 3.424564807343295e+02 4.507572778536915e+01 -2.357842367637252e+02 -2.442343416788665e+02 4 4.472126749161695e+02 3.175838325306533e+02 -1.049175548225529e+01 -3.146897027036604e+02 - ME 1.789392510542836e-04 + ME 1.304325296055160e-03 Event 144 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1156,7 +1156,7 @@ Event 144 Batch 0 2 6.893886390440568e+02 -2.470805413393656e+02 1.331686162420120e+02 6.296618309717105e+02 3 7.132719020730987e+02 2.482972988978650e+02 -2.304803220538649e+02 -6.276815106349294e+02 4 9.733945888284487e+01 -1.216757558499225e+00 9.731170581185302e+01 -1.980320336781234e+00 - ME 1.486904409371019e-04 + ME 3.769348793094523e-04 Event 145 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1164,7 +1164,7 @@ Event 145 Batch 0 2 3.784954309743686e+02 2.391836032855264e+02 1.115572896135236e+01 -2.931305935912622e+02 3 7.389406222827198e+02 -4.231861417520660e+02 1.513250860114713e+02 5.865555822189353e+02 4 3.825639467429113e+02 1.840025384665394e+02 -1.624808149728234e+02 -2.934249886276727e+02 - ME 2.016505354100400e-04 + ME 2.193982780219728e-03 Event 146 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1172,7 +1172,7 @@ Event 146 Batch 0 2 4.681255842987410e+02 -3.253195724522379e+01 1.754808059398437e+02 -4.327698247100133e+02 3 2.875849079819393e+02 2.091841587061404e+01 1.879781824316579e+02 -2.166372592748876e+02 4 7.442895077193195e+02 1.161354137460973e+01 -3.634589883715017e+02 6.494070839849006e+02 - ME 1.210467216316050e-02 + ME 5.347932692815789e-02 Event 147 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1180,7 +1180,7 @@ Event 147 Batch 0 2 2.442136391928777e+02 -1.784444843977844e+02 -1.666832492802189e+02 -3.816014311599316e+00 3 5.551361515401285e+02 1.378338123621512e+02 -5.199472642306259e+02 1.372327560591401e+02 4 7.006502092669938e+02 4.061067203563306e+01 6.866305135108448e+02 -1.334167417475408e+02 - ME 2.360352365747709e-04 + ME 7.450632204513606e-04 Event 148 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1188,7 +1188,7 @@ Event 148 Batch 0 2 4.547263863263726e+02 3.928375677411887e+02 5.145105706241225e+01 2.231759855356057e+02 3 7.397285466814292e+02 -5.611511356388266e+02 -1.533645573573770e+02 -4.569322031694095e+02 4 3.055450669921979e+02 1.683135678976379e+02 1.019135002949646e+02 2.337562176338038e+02 - ME 6.307552439231181e-06 + ME 1.440225905683450e-05 Event 149 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1196,7 +1196,7 @@ Event 149 Batch 0 2 2.343018799311635e+02 9.853424545130945e+01 1.924850318874441e+02 -9.021023174733594e+01 3 7.291173748950658e+02 3.429747374294529e+01 -5.990516617369192e+02 4.142136359886766e+02 4 5.365807451737705e+02 -1.328317191942547e+02 4.065666298494750e+02 -3.240034042413406e+02 - ME 8.298171355094406e-05 + ME 8.405553848068603e-04 Event 150 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1204,7 +1204,7 @@ Event 150 Batch 0 2 4.707648023587808e+02 -8.969278865174961e+01 -3.008719699078221e+02 3.507859183712497e+02 3 6.876639918976698e+02 3.906111988928598e+02 4.609284537794546e+02 -3.284046551871671e+02 4 3.415712057435500e+02 -3.009184102411105e+02 -1.600564838716325e+02 -2.238126318408256e+01 - ME 1.887585788236135e-05 + ME 1.070125715137075e-04 Event 151 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1212,7 +1212,7 @@ Event 151 Batch 0 2 6.503034458278056e+02 -1.575298496674962e+02 -3.658248853789647e+01 -6.298735108350154e+02 3 6.998690336552314e+02 1.302751858829802e+02 -1.019415103826456e+02 6.800389464387812e+02 4 1.498275205169629e+02 2.725466378451580e+01 1.385239989205421e+02 -5.016543560376590e+01 - ME 4.060174493404880e-04 + ME 6.663776898009472e-04 Event 152 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1220,7 +1220,7 @@ Event 152 Batch 0 2 7.401192382353395e+02 1.493701961830190e+02 6.288419447382046e+02 3.605867993093739e+02 3 7.332111095478891e+02 -1.230079111936445e+02 -6.287602831147091e+02 -3.565502647954901e+02 4 2.666965221677112e+01 -2.636228498937447e+01 -8.166162349550861e-02 -4.036534513883709e+00 - ME 1.210964379505254e-04 + ME 8.446403371723604e-04 Event 153 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1228,7 +1228,7 @@ Event 153 Batch 0 2 5.645797071775899e+02 7.941901905692946e+01 3.691428696980725e+02 -4.197337333594241e+02 3 6.079979027943974e+02 1.021455738177839e+02 -5.566920170809548e+02 2.220849604771994e+02 4 3.274223900280123e+02 -1.815645928747133e+02 1.875491473828823e+02 1.976487728822249e+02 - ME 9.895323747190810e-06 + ME 2.846663840296023e-05 Event 154 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1236,7 +1236,7 @@ Event 154 Batch 0 2 6.022174885419887e+02 -5.152457849782368e+02 -1.493252664732707e+02 -2.736597328082223e+02 3 3.617627670199851e+02 1.925398333816265e+02 -2.626238171638091e+02 1.575736108034646e+02 4 5.360197444380261e+02 3.227059515966102e+02 4.119490836370796e+02 1.160861220047577e+02 - ME 1.660411512586943e-05 + ME 6.437319974597944e-05 Event 155 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1244,7 +1244,7 @@ Event 155 Batch 0 2 6.202229507100907e+02 -2.107861924791831e+02 -3.212541876154504e+02 4.868690137883067e+02 3 2.943040328093193e+02 2.940980302320592e+02 1.073731199058907e+01 2.433613089266508e+00 4 5.854730164805898e+02 -8.331183775287627e+01 3.105168756248616e+02 -4.893026268775732e+02 - ME 4.918845171174253e-04 + ME 5.904510654775639e-03 Event 156 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1252,7 +1252,7 @@ Event 156 Batch 0 2 4.945486805149833e+02 4.540818864859257e+02 -1.431706201593249e+02 -1.337542944644701e+02 3 5.997303202813281e+02 -3.624214233270367e+02 -5.726286247273350e+01 4.743923835389624e+02 4 4.057209992036886e+02 -9.166046315888883e+01 2.004334826320584e+02 -3.406380890744924e+02 - ME 1.986837824231628e-04 + ME 4.701306652347430e-03 Event 157 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1260,7 +1260,7 @@ Event 157 Batch 0 2 4.617003083190191e+02 3.118400043328062e+02 3.404502064148864e+02 -4.079626411035589e+00 3 5.720097526413113e+02 -4.999240316044806e+01 -4.329264075474301e+02 -3.705005295422582e+02 4 4.662899390396696e+02 -2.618476011723578e+02 9.247620113254365e+01 3.745801559532937e+02 - ME 1.403598809900552e-05 + ME 3.907978340087068e-05 Event 158 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1268,7 +1268,7 @@ Event 158 Batch 0 2 6.784877363061535e+02 -5.707102180762959e+02 -3.102223423027389e+02 -1.959529373021938e+02 3 5.650909444059712e+02 5.525284805868615e+02 7.765167789879932e+01 8.950011457818250e+01 4 2.564213192878751e+02 1.818173748943443e+01 2.325706644039396e+02 1.064528227240114e+02 - ME 8.470133063482862e-06 + ME 3.503179830087694e-05 Event 159 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1276,7 +1276,7 @@ Event 159 Batch 0 2 5.369491563274252e+02 2.154713482252002e+02 -2.912667909729743e+02 3.962955349875316e+02 3 6.066564496499102e+02 -4.020061311781470e+01 5.572389608252350e+02 -2.364332868806716e+02 4 3.563943940226648e+02 -1.752707351073854e+02 -2.659721698522608e+02 -1.598622481068599e+02 - ME 3.562393617300492e-05 + ME 3.198473025834927e-04 Event 160 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1284,7 +1284,7 @@ Event 160 Batch 0 2 6.492474755438517e+02 3.490068395973682e+02 1.460348644657111e+02 -5.276270735801970e+02 3 2.857818814470013e+02 -2.550253586192556e+02 1.227259509083862e+02 3.964456076362119e+01 4 5.649706430091471e+02 -9.398148097811273e+01 -2.687608153740973e+02 4.879825128165764e+02 - ME 3.516238941302227e-05 + ME 6.719464076924620e-05 Event 161 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1292,7 +1292,7 @@ Event 161 Batch 0 2 6.770282049439580e+02 -2.863253153105184e+02 -4.911270786072976e+02 -3.676672364525180e+02 3 1.598243093356544e+02 -7.505362471426160e+01 1.299195075310522e+02 -5.506073768810752e+01 4 6.631474857203874e+02 3.613789400247800e+02 3.612075710762453e+02 4.227279741406256e+02 - ME 5.970757951131334e-05 + ME 1.577168105051119e-04 Event 162 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1300,7 +1300,7 @@ Event 162 Batch 0 2 5.178592782584632e+02 -3.271131571456631e+02 3.943743741889439e+02 -7.512700901574514e+01 3 3.730686930366258e+02 -2.885924195736573e+01 -1.360208443078026e+02 -3.461874113706257e+02 4 6.090720287049110e+02 3.559723991030290e+02 -2.583535298811414e+02 4.213144203863710e+02 - ME 2.768303103320498e-05 + ME 1.031749267713353e-04 Event 163 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1308,7 +1308,7 @@ Event 163 Batch 0 2 5.388642316037673e+02 3.152159924116781e+02 3.539969933522669e+01 -4.356149670486711e+02 3 5.364171791816749e+02 -5.299694218906361e+02 3.369785517714305e+01 7.576448071880543e+01 4 4.247185892145582e+02 2.147534294789580e+02 -6.909755451236977e+01 3.598504863298658e+02 - ME 1.485600561394433e-05 + ME 3.508094027565679e-05 Event 164 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1316,7 +1316,7 @@ Event 164 Batch 0 2 6.862697092177667e+02 4.132218376422068e+02 1.310202162324327e+02 -5.320221138485150e+02 3 4.476895523579005e+02 -2.769046850483522e+02 1.374187337517142e+02 3.238299280529301e+02 4 3.660407384243329e+02 -1.363171525938544e+02 -2.684389499841469e+02 2.081921857955847e+02 - ME 1.755563256840939e-05 + ME 3.375894779915149e-05 Event 165 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1324,7 +1324,7 @@ Event 165 Batch 0 2 2.382444910715278e+02 -2.158277263671036e+02 -9.471372817531817e+00 -1.004446273032522e+02 3 7.304591383576048e+02 4.619003715882296e+02 -1.223345688256177e+02 5.524969256086772e+02 4 5.312963705708673e+02 -2.460726452211260e+02 1.318059416431495e+02 -4.520522983054250e+02 - ME 4.549138184301779e-04 + ME 6.966498968932957e-03 Event 166 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1332,7 +1332,7 @@ Event 166 Batch 0 2 2.131352071380649e+02 -7.633553084455029e+01 -1.899581415396244e+02 5.929087379418958e+01 3 7.305557876753161e+02 8.980971292745940e+01 7.136333043711877e+02 1.279589045828712e+02 4 5.563090051866194e+02 -1.347418208290915e+01 -5.236751628315633e+02 -1.872497783770607e+02 - ME 3.352199959657985e-05 + ME 3.314006956523505e-04 Event 167 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1340,7 +1340,7 @@ Event 167 Batch 0 2 4.122964103002419e+02 -3.405127102276982e+02 6.366431608201744e+01 2.235761145061386e+02 3 4.697083356610920e+02 -2.521100678451879e+02 -2.856113063438232e+01 -3.952855880214881e+02 4 6.179952540386658e+02 5.926227780728861e+02 -3.510318544763516e+01 1.717094735153495e+02 - ME 3.829535931496594e-05 + ME 1.146777177775239e-04 Event 168 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1348,7 +1348,7 @@ Event 168 Batch 0 2 7.156643283953484e+02 -3.999734570317170e+02 4.816586825103861e+02 3.467009924560655e+02 3 6.192344221355605e+02 2.722545660880235e+02 -4.999454120042317e+02 -2.436869012025525e+02 4 1.651012494690919e+02 1.277188909436936e+02 1.828672949384504e+01 -1.030140912535133e+02 - ME 5.027887292283473e-05 + ME 1.017624049822302e-03 Event 169 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1356,7 +1356,7 @@ Event 169 Batch 0 2 3.626022684949455e+02 7.511110909567982e+01 -2.030941161665286e+02 -2.908461902563517e+02 3 5.580565590514408e+02 -2.529981754432838e+02 -3.439969378312538e+02 3.592842232626199e+02 4 5.793411724536141e+02 1.778870663476037e+02 5.470910539977822e+02 -6.843803300626824e+01 - ME 4.350242525242475e-05 + ME 1.371698416063432e-04 Event 170 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1364,7 +1364,7 @@ Event 170 Batch 0 2 6.602909342483501e+02 4.699653539595539e+02 -3.020118498241596e+02 3.520021683086903e+02 3 1.039297502933440e+02 3.247420585022842e+01 -9.851348423194945e+01 6.473976746580508e+00 4 7.357793154583061e+02 -5.024395598097824e+02 4.005253340561092e+02 -3.584761450552709e+02 - ME 9.967260301798612e-03 + ME 1.673719496447659e-02 Event 171 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1372,7 +1372,7 @@ Event 171 Batch 0 2 1.506693011949600e+02 -3.657300520509282e+01 -1.244227366169959e+02 -7.669834565089053e+01 3 6.344013325830570e+02 -2.026333084464634e+02 -4.956100871165362e+02 3.402578943089165e+02 4 7.149293662219835e+02 2.392063136515561e+02 6.200328237335323e+02 -2.635595486580261e+02 - ME 9.157902172934166e-04 + ME 2.133207113512388e-03 Event 172 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1380,7 +1380,7 @@ Event 172 Batch 0 2 5.352445157558213e+02 -2.018352690102651e+02 3.892440882325296e+02 -3.069825004886504e+02 3 6.716112180685394e+02 2.825227203806547e+02 -5.978593235713698e+02 1.175022124175027e+02 4 2.931442661756383e+02 -8.068745137038898e+01 2.086152353388391e+02 1.894802880711483e+02 - ME 8.067092159940342e-06 + ME 2.630379932615259e-05 Event 173 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1388,7 +1388,7 @@ Event 173 Batch 0 2 6.571348515648592e+02 -2.769863586381786e+02 5.805753619381593e+02 1.343019708712704e+02 3 5.332990408103321e+02 1.871824832342877e+02 -4.782426732337677e+02 1.437168410371092e+02 4 3.095661076248081e+02 8.980387540389081e+01 -1.023326887043915e+02 -2.780188119083794e+02 - ME 1.269359653092767e-04 + ME 9.985413945498126e-03 Event 174 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1396,7 +1396,7 @@ Event 174 Batch 0 2 6.091496911716730e+02 -4.752584064243671e+02 3.135726231883978e+01 -3.797492797588730e+02 3 6.417481529658018e+02 3.309293137608124e+02 9.015643604119191e+01 5.424004960996682e+02 4 2.491021558625255e+02 1.443290926635548e+02 -1.215136983600317e+02 -1.626512163407953e+02 - ME 1.362612102685676e-04 + ME 1.319192968737130e-03 Event 175 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1404,7 +1404,7 @@ Event 175 Batch 0 2 5.399801778396885e+02 1.966672297646830e+02 2.343185748302537e+02 -4.449667388535759e+02 3 6.987953575798327e+02 -1.857207036318898e+02 -9.664246188148675e+01 6.666955876403318e+02 4 2.612244645804785e+02 -1.094652613279307e+01 -1.376761129487668e+02 -2.217288487867561e+02 - ME 9.613528518728674e-04 + ME 9.528877211334405e-03 Event 176 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1412,7 +1412,7 @@ Event 176 Batch 0 2 6.615757321243968e+02 -4.129469954321281e+02 4.686878756164518e+02 -2.179194886871010e+02 3 1.607981401590110e+02 -6.355407199259605e+01 7.929314438200207e+00 1.474925346731048e+02 4 6.776261277165921e+02 4.765010674247242e+02 -4.766171900546519e+02 7.042695401399614e+01 - ME 3.097907077728356e-04 + ME 6.965204353376922e-04 Event 177 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1420,7 +1420,7 @@ Event 177 Batch 0 2 4.314334067424883e+02 -3.493619040652741e+02 -2.026482683689240e+01 -2.523299055494341e+02 3 4.840006500668400e+02 -1.846595828310067e+02 -1.450727057198388e+02 4.232155216776995e+02 4 5.845659431906716e+02 5.340214868962809e+02 1.653375325567312e+02 -1.708856161282654e+02 - ME 1.084300812640113e-04 + ME 2.160100049311594e-04 Event 178 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1428,7 +1428,7 @@ Event 178 Batch 0 2 4.528135981327372e+02 -2.544528544607913e+02 1.436928116455424e+02 3.458992272209776e+02 3 3.053350882587867e+02 -1.380299578048218e+02 2.072032295570572e+02 1.767599177741536e+02 4 7.418513136084770e+02 3.924828122656132e+02 -3.508960412025996e+02 -5.226591449951313e+02 - ME 5.382438151181503e-02 + ME 7.384409254828141e-02 Event 179 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1436,7 +1436,7 @@ Event 179 Batch 0 2 7.433145319259943e+02 -2.538538580850882e+02 -6.778753511348521e+02 -1.689962142519080e+02 3 1.647945947160298e+02 1.009041857568576e+02 1.171651165877689e+02 5.699069397138987e+01 4 5.918908733579761e+02 1.529496723282306e+02 5.607102345470832e+02 1.120055202805181e+02 - ME 3.739915465576335e-05 + ME 1.335347052581446e-04 Event 180 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1444,7 +1444,7 @@ Event 180 Batch 0 2 2.396120216689867e+02 1.204528233788652e+02 -1.081248155319049e+02 1.766750195544080e+02 3 5.541470271917004e+02 2.767127195685322e+02 2.999096875483201e+02 3.749175614572557e+02 4 7.062409511393131e+02 -3.971655429473975e+02 -1.917848720164151e+02 -5.515925810116636e+02 - ME 2.792447184071457e-03 + ME 1.316593054412419e-02 Event 181 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1452,7 +1452,7 @@ Event 181 Batch 0 2 2.165494222755782e+02 1.336973493521793e+02 -1.495065670853883e+02 -8.164837697364385e+01 3 6.960869932595207e+02 -2.848973600545249e+02 2.209041937252092e+01 6.347303441548928e+02 4 5.873635844649011e+02 1.512000107023455e+02 1.274161477128675e+02 -5.530819671812490e+02 - ME 3.488874737600980e-03 + ME 6.164296623062663e-02 Event 182 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1460,7 +1460,7 @@ Event 182 Batch 0 2 6.472681881349898e+02 4.279258056181361e+02 3.994050733201775e+02 -2.762448183472868e+02 3 5.337197582091030e+02 -3.479343829022644e+02 -4.034091782989213e+02 -3.254965992745409e+01 4 3.190120536559070e+02 -7.999142271587166e+01 4.004104978744005e+00 3.087944782747408e+02 - ME 5.523679400573375e-05 + ME 6.393158381765308e-05 Event 183 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1468,7 +1468,7 @@ Event 183 Batch 0 2 6.165307808531154e+02 -3.276949594572818e+02 8.808524820164887e+01 -5.147496540405800e+02 3 2.975460412740734e+02 -1.030095950018341e+02 -2.375020297789284e+02 1.466814775843215e+02 4 5.859231778728107e+02 4.307045544591158e+02 1.494167815772794e+02 3.680681764562588e+02 - ME 2.562496117427957e-05 + ME 6.887775529805495e-05 Event 184 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1476,7 +1476,7 @@ Event 184 Batch 0 2 5.645337360463252e+02 -3.940276919793660e+02 3.776398996283964e+02 1.443212503288767e+02 3 5.368100353438223e+02 2.392766596964613e+02 -1.719264331693737e+02 -4.487237410122139e+02 4 3.986562286098531e+02 1.547510322829050e+02 -2.057134664590229e+02 3.044024906833372e+02 - ME 1.712138666139329e-05 + ME 3.553984578535888e-05 Event 185 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1484,7 +1484,7 @@ Event 185 Batch 0 2 6.347397779710931e+02 2.522092504724420e+02 -1.599825720327363e+02 5.600809373302327e+02 3 4.566768168089404e+02 -3.359958684022406e+02 -1.272903681003782e+02 -2.818823400219340e+02 4 4.085834052199659e+02 8.378661792979838e+01 2.872729401331145e+02 -2.781985973082986e+02 - ME 1.836859309200860e-04 + ME 1.184197550833168e-03 Event 186 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1492,7 +1492,7 @@ Event 186 Batch 0 2 7.089823220133230e+02 -5.197119220861886e+02 4.248734840868308e+02 -2.281183322067745e+02 3 5.364076825758043e+02 3.588264146200084e+02 -3.973752875032956e+02 3.270606945152315e+01 4 2.546099954108725e+02 1.608855074661802e+02 -2.749819658353518e+01 1.954122627552515e+02 - ME 1.318469173008218e-05 + ME 2.583895514537347e-05 Event 187 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1500,7 +1500,7 @@ Event 187 Batch 0 2 4.835105223217566e+02 -2.128653471696258e+02 1.375287019182911e+02 -4.117725407538514e+02 3 7.240136612790383e+02 4.407273454759851e+02 -4.896543389042274e+01 5.723264583716990e+02 4 2.924758163992057e+02 -2.278619983063593e+02 -8.856326802786833e+01 -1.605539176178473e+02 - ME 9.185777086042985e-05 + ME 5.307563978210835e-04 Event 188 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1508,7 +1508,7 @@ Event 188 Batch 0 2 6.611118500396009e+02 3.502021063704277e+02 -2.011693879247277e+02 -5.234102027267809e+02 3 3.072944371702247e+02 -6.894916504330918e+01 -1.599953986835475e+02 2.531350551695447e+02 4 5.315937127901742e+02 -2.812529413271184e+02 3.611647866082752e+02 2.702751475572362e+02 - ME 3.862980709292737e-05 + ME 6.863567490702385e-05 Event 189 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1516,7 +1516,7 @@ Event 189 Batch 0 2 7.498478362545707e+02 6.780504955298834e+02 -3.199144947524264e+02 -1.319162971889924e+01 3 3.253008430749361e+02 -2.985087551774363e+02 1.291384938207140e+02 6.034152914782593e+00 4 4.248513206704935e+02 -3.795417403524470e+02 1.907760009317124e+02 7.157476804116639e+00 - ME 1.504471760657040e-05 + ME 8.583750584152986e-05 Event 190 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1524,7 +1524,7 @@ Event 190 Batch 0 2 4.938867893347995e+02 3.689671478502748e+02 -1.218724623869293e+02 3.048516153777389e+02 3 5.264063001598521e+02 6.631942569346465e+01 1.276367949726208e+02 -5.063735530147588e+02 4 4.797069105053494e+02 -4.352865735437401e+02 -5.764332585691415e+00 2.015219376370201e+02 - ME 2.269926034328256e-05 + ME 4.759343488474735e-05 Event 191 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1532,7 +1532,7 @@ Event 191 Batch 0 2 3.681793141805986e+02 -3.225132888415706e+02 1.579589482507471e+02 -8.117977937027918e+01 3 5.431126642386394e+02 4.058413736814005e+01 9.147123993851424e+01 5.338139246166097e+02 4 5.887080215807621e+02 2.819291514734305e+02 -2.494301881892614e+02 -4.526341452463304e+02 - ME 1.427494731558637e-03 + ME 4.908990110546420e-03 Event 192 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1540,7 +1540,7 @@ Event 192 Batch 0 2 6.054165399887861e+02 1.497087111729466e+02 8.905021611535379e+01 5.798159601983524e+02 3 2.106656439489222e+02 1.451894976721945e+02 -1.487249448604451e+02 3.436443048222171e+01 4 6.839178160622922e+02 -2.948982088451411e+02 5.967472874509133e+01 -6.141803906805740e+02 - ME 6.984876913518998e-03 + ME 4.294450320853435e-02 Event 193 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1548,7 +1548,7 @@ Event 193 Batch 0 2 2.753169163933055e+02 -1.695475157411122e+02 -2.139406274107579e+02 3.581134319495643e+01 3 5.760219428901971e+02 -3.264616044953138e+02 1.527507522369444e+02 -4.493231656306969e+02 4 6.486611407164972e+02 4.960091202364260e+02 6.118987517381347e+01 4.135118224357404e+02 - ME 4.273063058931925e-05 + ME 1.537583375796735e-04 Event 194 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1556,7 +1556,7 @@ Event 194 Batch 0 2 3.445934948105150e+02 -2.970257025567896e+02 -8.183019525038441e+01 1.543509890854414e+02 3 7.485441862377920e+02 6.623797851941252e+02 1.083400559332054e+02 -3.314119056355291e+02 4 4.068623189516925e+02 -3.653540826373358e+02 -2.650986068282081e+01 1.770609165500877e+02 - ME 4.921158833271929e-06 + ME 3.024610065690235e-05 Event 195 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1564,7 +1564,7 @@ Event 195 Batch 0 2 2.012122274303647e+02 -5.190018365965096e+01 1.322177369426910e+02 -1.425173724194237e+02 3 7.122630330184543e+02 -3.054768058087834e+02 -2.528097616133813e+02 5.916838461125119e+02 4 5.865247395511832e+02 3.573769894684365e+02 1.205920246706904e+02 -4.491664736930883e+02 - ME 4.696445912229638e-04 + ME 3.011639483286710e-03 Event 196 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1572,7 +1572,7 @@ Event 196 Batch 0 2 4.490485793345989e+02 3.485190427929747e+02 -2.661098616642627e+01 -2.819059396826192e+02 3 5.531554978829222e+02 -3.330165694254377e+02 4.416170126965178e+02 7.442003978758296e+00 4 4.977959227824785e+02 -1.550247336753688e+01 -4.150060265300915e+02 2.744639357038610e+02 - ME 9.363355109875406e-06 + ME 4.340266456570635e-05 Event 197 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1580,7 +1580,7 @@ Event 197 Batch 0 2 3.951249254444253e+02 -2.278358800090239e+02 3.101157211704546e+02 -8.968142489336992e+01 3 3.607080640108546e+02 -2.889948719219027e+02 2.155030307719242e+02 -1.227661082778765e+01 4 7.441670105447209e+02 5.168307519309257e+02 -5.256187519423792e+02 1.019580357211576e+02 - ME 6.597373610109231e-03 + ME 3.377741088449004e-02 Event 198 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1588,7 +1588,7 @@ Event 198 Batch 0 2 3.750236904637998e+02 1.183014344420310e+02 -1.005952209347265e+02 -3.413621838211424e+02 3 4.381296266085964e+02 -2.726825461625328e+02 1.003845461170281e+02 -3.279096546785175e+02 4 6.868466829276033e+02 1.543811117205018e+02 2.106748176980602e-01 6.692718384996598e+02 - ME 6.145502577419889e-04 + ME 9.606390506705955e-04 Event 199 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1596,7 +1596,7 @@ Event 199 Batch 0 2 2.454478562244572e+02 -2.058455361543722e+02 -1.131056012155068e+02 -7.126982772660261e+01 3 5.321797086694488e+02 -9.806778012582416e+01 -4.820333037417012e+02 -2.030808875905193e+02 4 7.223724351060940e+02 3.039133162801963e+02 5.951389049572081e+02 2.743507153171219e+02 - ME 3.088173795554332e-04 + ME 1.577081887352965e-03 Event 200 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1604,7 +1604,7 @@ Event 200 Batch 0 2 3.952431318363244e+02 3.031309873729303e+02 9.337877017948550e+01 2.358159092128122e+02 3 6.094031244332663e+02 -7.796753338981905e+01 -5.315426896439308e+02 -2.876727322709444e+02 4 4.953537437304092e+02 -2.251634539831113e+02 4.381639194644453e+02 5.185682305813224e+01 - ME 1.668296552597111e-05 + ME 6.703240553489506e-05 Event 201 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1612,7 +1612,7 @@ Event 201 Batch 0 2 6.497938633639732e+02 3.771120671245744e+02 3.553445817627057e+02 -3.921081252746440e+02 3 3.369790646193914e+02 -2.140351778515325e+02 1.061239955238163e+02 2.376584318047305e+02 4 5.132270720166357e+02 -1.630768892730420e+02 -4.614685772865220e+02 1.544496934699135e+02 - ME 2.404518058628388e-05 + ME 6.283412004793947e-05 Event 202 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1620,7 +1620,7 @@ Event 202 Batch 0 2 7.267802742470179e+02 6.523432021666289e+02 -1.481957728499301e+02 2.840702844913056e+02 3 3.546086620137576e+02 -3.102429173963679e+02 -5.939291787501398e+01 -1.611493614224694e+02 4 4.186110637392242e+02 -3.421002847702610e+02 2.075886907249440e+02 -1.229209230688360e+02 - ME 2.830403199974809e-05 + ME 1.894138330341389e-04 Event 203 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1628,7 +1628,7 @@ Event 203 Batch 0 2 4.830190702985662e+02 2.789429895135886e+02 -3.943102945050296e+02 -4.197918611657844e+00 3 5.247163710833165e+02 -4.266462829986153e+02 3.263988520595893e+01 3.037019215942698e+02 4 4.922645586181170e+02 1.477032934850268e+02 3.616704092990706e+02 -2.995040029826120e+02 - ME 5.153190919865371e-05 + ME 5.831910678002871e-04 Event 204 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1636,7 +1636,7 @@ Event 204 Batch 0 2 6.952375769935185e+02 3.823764713153302e+01 6.531840992713522e+02 -2.350397908115460e+02 3 6.250862947179036e+02 1.031861473443961e+02 -5.506835576815644e+02 2.771878679515999e+02 4 1.796761282885781e+02 -1.414237944759291e+02 -1.025005415897879e+02 -4.214807714005369e+01 - ME 1.903000177287069e-05 + ME 1.802858800889920e-04 Event 205 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1644,7 +1644,7 @@ Event 205 Batch 0 2 5.625197268936781e+02 2.955060596751036e+02 4.395356105446072e+02 -1.895074112086703e+02 3 3.144813194259642e+02 -1.941101430078122e+02 -7.073026664887073e+00 -2.473251401357733e+02 4 6.229989536803572e+02 -1.013959166672914e+02 -4.324625838797200e+02 4.368325513444433e+02 - ME 3.163472493443465e-05 + ME 1.140145509231641e-04 Event 206 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1652,7 +1652,7 @@ Event 206 Batch 0 2 5.487698581700869e+02 -4.771827558939671e+02 -2.639484985605369e+02 6.145050708573941e+01 3 4.357856725513919e+02 1.877155863290790e+02 1.701172104948722e+02 3.545872893148349e+02 4 5.154444692785200e+02 2.894671695648880e+02 9.383128806566407e+01 -4.160377964005746e+02 - ME 3.341888001113221e-04 + ME 4.167786087259531e-03 Event 207 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1660,7 +1660,7 @@ Event 207 Batch 0 2 5.289473514933904e+02 -3.230637718239221e+02 -3.258094337294262e+02 2.631792409740627e+02 3 3.730441408755686e+02 -1.145152671243400e+02 -7.298530142052728e+01 -3.474497523579300e+02 4 5.980085076310412e+02 4.375790389482623e+02 3.987947351499535e+02 8.427051138386733e+01 - ME 3.789028948405571e-05 + ME 1.161501350367753e-04 Event 208 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1668,7 +1668,7 @@ Event 208 Batch 0 2 3.144460531270953e+02 3.105028133645123e+02 -3.495125011961062e+01 3.525242310830974e+01 3 7.230517599976935e+02 -6.554206809343713e+02 2.220922910679198e+02 2.095294558946058e+02 4 4.625021868752117e+02 3.449178675698588e+02 -1.871410409483092e+02 -2.447818790029155e+02 - ME 2.941989209837521e-05 + ME 4.858457850437588e-04 Event 209 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1676,7 +1676,7 @@ Event 209 Batch 0 2 2.827014058170527e+02 -6.682954863774688e+01 -1.958656753088385e+02 -1.925890275057887e+02 3 5.969812148172332e+02 5.625717004655273e+02 1.060136244597389e+02 -1.692949027847388e+02 4 6.203173793657136e+02 -4.957421518277804e+02 8.985205084909943e+01 3.618839302905275e+02 - ME 2.261939336541961e-05 + ME 1.004351001266980e-04 Event 210 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1684,7 +1684,7 @@ Event 210 Batch 0 2 3.369223392964550e+02 -2.366581006943837e+02 8.850719545688517e+01 -2.228813191927023e+02 3 6.926279093100447e+02 9.835546321295956e+01 -1.581805884470998e+02 6.671120783270956e+02 4 4.704497513935005e+02 1.383026374814242e+02 6.967339299021461e+01 -4.442307591343933e+02 - ME 3.044010300440331e-03 + ME 5.974710408786874e-02 Event 211 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1692,7 +1692,7 @@ Event 211 Batch 0 2 5.754314663824422e+02 -1.965408456680789e+02 -5.399725108422632e+02 3.037689947684008e+01 3 6.656941886103589e+02 4.112771407945243e+02 5.114655840792436e+02 1.113679599883347e+02 4 2.588743450071987e+02 -2.147362951264454e+02 2.850692676301957e+01 -1.417448594651748e+02 - ME 1.754510489093768e-05 + ME 4.382347812376007e-04 Event 212 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1700,7 +1700,7 @@ Event 212 Batch 0 2 5.922157374848572e+02 8.073316194509509e+00 4.947261155542873e+02 -3.254233732830556e+02 3 3.635572903001510e+02 8.951663862813328e+01 4.011175755255380e+01 3.500738802669425e+02 4 5.442269722149914e+02 -9.758995482264278e+01 -5.348378731068407e+02 -2.465050698388706e+01 - ME 1.919214373141161e-04 + ME 3.041427876287276e-04 Event 213 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1708,7 +1708,7 @@ Event 213 Batch 0 2 7.434820262506830e+02 2.991548764052629e+02 2.111623598614188e+02 -6.470566753063675e+02 3 5.607612173038236e+02 -2.664197873565705e+02 -1.905271140771768e+02 4.551626726109781e+02 4 1.957567564454930e+02 -3.273508904869271e+01 -2.063524578424195e+01 1.918940026953895e+02 - ME 1.896082550340891e-04 + ME 1.827786070323022e-04 Event 214 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1716,7 +1716,7 @@ Event 214 Batch 0 2 5.400874280734793e+02 3.457358963402696e+02 2.445843697627679e+02 -3.351710101016577e+02 3 3.400793067879315e+02 1.482066942304564e+02 1.256466447865830e+02 2.791086371729012e+02 4 6.198332651385892e+02 -4.939425905707261e+02 -3.702310145493508e+02 5.606237292875651e+01 - ME 6.515553919952984e-05 + ME 1.356968066378560e-04 Event 215 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1724,7 +1724,7 @@ Event 215 Batch 0 2 3.916345321859864e+02 3.271767110560381e+02 -1.945589530122144e+02 9.208594000107233e+01 3 6.136750729169615e+02 -1.269585669220027e+02 2.644680756040779e+02 -5.390132228350478e+02 4 4.946903948970534e+02 -2.002181441340350e+02 -6.990912259186331e+01 4.469272828339764e+02 - ME 3.427926940877871e-05 + ME 6.207321332343461e-05 Event 216 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1732,7 +1732,7 @@ Event 216 Batch 0 2 3.767411090262154e+02 1.602503356822860e+02 2.758455349572533e+02 -2.004069210086422e+02 3 4.061922956351256e+02 3.340053729931861e+02 2.237650079776778e+02 5.798114391563544e+01 4 7.170665953386593e+02 -4.942557086754721e+02 -4.996105429349309e+02 1.424257770930068e+02 - ME 2.360785017217177e-04 + ME 1.232271832865728e-03 Event 217 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1740,7 +1740,7 @@ Event 217 Batch 0 2 6.474118977458852e+02 -5.378641111590873e+02 -3.279650037002520e+02 1.492759847325320e+02 3 5.088298200539713e+02 3.261878344469131e+02 1.555821256186315e+02 -3.581947579501665e+02 4 3.437582822001433e+02 2.116762767121744e+02 1.723828780816206e+02 2.089187732176345e+02 - ME 1.388331578224744e-05 + ME 3.357118960820415e-05 Event 218 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1748,7 +1748,7 @@ Event 218 Batch 0 2 6.658501161076259e+02 -6.577627036244854e+02 -3.020200479570956e+01 9.895676706252418e+01 3 2.516345839620714e+02 1.565221509782131e+02 -1.156477271957936e+02 1.595192254662914e+02 4 5.825152999303023e+02 5.012405526462722e+02 1.458497319915031e+02 -2.584759925288157e+02 - ME 1.036808356896783e-04 + ME 5.956187308313417e-04 Event 219 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1756,7 +1756,7 @@ Event 219 Batch 0 2 4.328556070633435e+02 6.122246558068494e+01 -1.687441385117925e+02 3.938796795879554e+02 3 6.500677455605621e+02 -3.703058656885360e+02 4.356876543064814e+02 -3.092537914719426e+02 4 4.170766473760945e+02 3.090834001078509e+02 -2.669435157946888e+02 -8.462588811601287e+01 - ME 9.046106878448173e-05 + ME 2.797067114354785e-04 Event 220 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1764,7 +1764,7 @@ Event 220 Batch 0 2 3.686297280598666e+02 -3.497113779929074e+02 -8.765282776369953e+01 7.685577594963354e+01 3 4.155522773953191e+02 -1.777404948015450e+02 -1.525848366500187e+02 3.432344379292750e+02 4 7.158179945448145e+02 5.274518727944524e+02 2.402376644137182e+02 -4.200902138789084e+02 - ME 1.676729229638681e-03 + ME 3.485410710153060e-03 Event 221 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1772,7 +1772,7 @@ Event 221 Batch 0 2 5.295220830718469e+02 3.654688468413813e+01 4.204675060608333e+02 3.197890523886257e+02 3 7.127556392876786e+02 -1.727486268095863e+02 -4.342549693537605e+02 -5.381460163035255e+02 4 2.577222776404743e+02 1.362017421254481e+02 1.378746329292729e+01 2.183569639148998e+02 - ME 2.031931825964470e-05 + ME 2.819264207321091e-05 Event 222 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1780,7 +1780,7 @@ Event 222 Batch 0 2 2.464305981122427e+02 -2.054199106396077e+02 6.127423271580306e+01 1.215572638876956e+02 3 6.926647117218595e+02 4.702892479611936e+02 3.872350261814336e+02 -3.296383785530530e+02 4 5.609046901658980e+02 -2.648693373215859e+02 -4.485092588972366e+02 2.080811146653574e+02 - ME 1.678695785515194e-05 + ME 6.319142394583372e-05 Event 223 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1788,7 +1788,7 @@ Event 223 Batch 0 2 2.463384302181125e+02 -1.209251938955738e+02 -2.140981972257043e+02 -1.488897673935926e+01 3 6.819620845265065e+02 -2.400891875757811e+02 5.819023806457059e+02 2.623339210620683e+02 4 5.716994852553812e+02 3.610143814713547e+02 -3.678041834200016e+02 -2.474449443227091e+02 - ME 4.810915220985587e-05 + ME 3.931927185620913e-04 Event 224 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1796,7 +1796,7 @@ Event 224 Batch 0 2 2.236851263016067e+02 -8.671871524968952e+01 1.717231909970332e+02 1.141317038679677e+02 3 5.308972974363861e+02 -3.715833295102001e+01 4.680039348616383e+02 2.478780257941054e+02 4 7.454175762620068e+02 1.238770482007099e+02 -6.397271258586715e+02 -3.620097296620728e+02 - ME 6.017706528853119e-02 + ME 8.708656265179471e-02 Event 225 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1804,7 +1804,7 @@ Event 225 Batch 0 2 5.094176014319268e+02 1.569347096242780e+02 -1.561291130928888e+00 -4.846394040251013e+02 3 7.252311334449815e+02 -3.845161955462210e+02 -4.374219820797174e+01 6.133466494377277e+02 4 2.653512651230916e+02 2.275814859219426e+02 4.530348933890067e+01 -1.287072454126262e+02 - ME 1.151501859389029e-04 + ME 3.974215742688118e-04 Event 226 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1812,7 +1812,7 @@ Event 226 Batch 0 2 6.863217264048350e+02 -2.391756120967483e+02 -6.171186323675804e+02 1.816511279850093e+02 3 5.332348374442744e+02 1.096335504493486e+02 4.112484130583279e+02 -3.212391931833643e+02 4 2.804434361508906e+02 1.295420616473995e+02 2.058702193092524e+02 1.395880651983551e+02 - ME 1.438206074993319e-05 + ME 3.797053871351767e-05 Event 227 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1820,7 +1820,7 @@ Event 227 Batch 0 2 7.243206345463230e+02 -5.280189925476210e+02 -1.406011303275692e+02 4.754657162080069e+02 3 5.487499634657129e+02 3.840442912861271e+02 -1.353123555187442e+01 -3.917312987222202e+02 4 2.269294019879644e+02 1.439747012614939e+02 1.541323658794436e+02 -8.373441748578679e+01 - ME 5.165623507180856e-05 + ME 2.903986554770466e-04 Event 228 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1828,7 +1828,7 @@ Event 228 Batch 0 2 2.119578664379945e+02 1.625437651479949e+01 -1.806612394559917e+02 1.096514885776142e+02 3 6.254097456672617e+02 -3.200704000326812e+01 3.158243706171928e+02 5.388579277416935e+02 4 6.626323878947439e+02 1.575266348846865e+01 -1.351631311612011e+02 -6.485094163193077e+02 - ME 3.800526374221887e-02 + ME 8.951233069377997e-01 Event 229 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1836,7 +1836,7 @@ Event 229 Batch 0 2 5.921227120343664e+02 -3.877491982207575e+02 4.449193714386763e+02 -4.802726626309342e+01 3 4.688278331283221e+02 3.470549659129084e+02 -1.517581364471262e+02 -2.762641051115459e+02 4 4.390494548373113e+02 4.069423230784909e+01 -2.931612349915501e+02 3.242913713746393e+02 - ME 1.250052930035257e-05 + ME 3.492131538818778e-05 Event 230 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1844,7 +1844,7 @@ Event 230 Batch 0 2 4.261952284727868e+02 2.153699775439378e+02 -1.171086083390750e+02 3.486312082969335e+02 3 3.540619701921573e+02 3.070144260847319e+01 1.307424531367546e+02 3.276029778648147e+02 4 7.197428013350559e+02 -2.460714201524109e+02 -1.363384479767965e+01 -6.762341861617483e+02 - ME 4.711214236813061e-02 + ME 3.186738302883428e-01 Event 231 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1852,7 +1852,7 @@ Event 231 Batch 0 2 4.205236024420392e+02 7.533931576750228e+01 -3.260217181731272e+02 -2.547036061581322e+02 3 5.397543491930860e+02 8.423195081267914e+01 -1.158376015978276e+02 5.204050211049134e+02 4 5.397220483648740e+02 -1.595712665801811e+02 4.418593197709548e+02 -2.657014149467809e+02 - ME 3.265984123744224e-04 + ME 5.532186388062512e-04 Event 232 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1860,7 +1860,7 @@ Event 232 Batch 0 2 4.295782852421121e+02 3.239064445356881e+02 9.240815775655221e-01 2.821724019337124e+02 3 7.183371274312143e+02 -6.155391061575082e+02 -1.955291718271078e+02 -3.144649112405858e+02 4 3.520845873266736e+02 2.916326616218201e+02 1.946050902495422e+02 3.229250930687335e+01 - ME 1.049779024540051e-05 + ME 6.730603828970119e-05 Event 233 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1868,7 +1868,7 @@ Event 233 Batch 0 2 3.640046126075324e+02 -2.220120664068515e+02 -1.165482463207536e+02 2.638683509799470e+02 3 4.682121509308883e+02 -1.009786196736112e+02 3.762431872847591e+02 2.597441061312976e+02 4 6.677832364615790e+02 3.229906860804628e+02 -2.596949409640055e+02 -5.236124571112447e+02 - ME 7.598357868514145e-04 + ME 5.385640989777132e-03 Event 234 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1876,7 +1876,7 @@ Event 234 Batch 0 2 8.690043548936441e+01 -2.607433849884744e+01 -7.258333015587984e+01 4.004341073848801e+01 3 6.785651905172676e+02 -3.574930335951373e+02 -4.725723606052789e+01 5.748184081539155e+02 4 7.345343739933678e+02 3.835673720939847e+02 1.198405662164078e+02 -6.148618188924036e+02 - ME 8.152211059226219e-02 + ME 1.962113644780599e-01 Event 235 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1884,7 +1884,7 @@ Event 235 Batch 0 2 3.000566282865331e+02 1.219146462304108e+01 -2.126850238006026e+02 2.113064812540423e+02 3 7.160981218147422e+02 2.575873756248088e+02 2.779062108697769e+02 -6.076293293985470e+02 4 4.838452498987246e+02 -2.697788402478500e+02 -6.522118706917435e+01 3.963228481445046e+02 - ME 2.498899672933017e-05 + ME 3.940402333844027e-05 Event 236 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1892,7 +1892,7 @@ Event 236 Batch 0 2 1.510518772182422e+02 -9.497518588910037e+01 1.467158067736534e+01 1.165380984781943e+02 3 6.955499852411461e+02 5.933480346078575e+02 3.495450158124774e+02 9.770452249822526e+01 4 6.533981375406115e+02 -4.983728487187572e+02 -3.642165964898426e+02 -2.142426209764196e+02 - ME 2.623118294900277e-04 + ME 1.121647028585911e-03 Event 237 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1900,7 +1900,7 @@ Event 237 Batch 0 2 2.173874152942701e+02 2.069918593916189e+02 -3.850229167793934e+01 -5.412237993169356e+01 3 7.305677895866185e+02 -6.701932224704495e+02 -2.421540700080861e+02 1.610333695687662e+02 4 5.520447951191120e+02 4.632013630788306e+02 2.806563616860255e+02 -1.069109896370727e+02 - ME 2.170005261464319e-05 + ME 1.822378225061386e-04 Event 238 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1908,7 +1908,7 @@ Event 238 Batch 0 2 6.349573912113930e+02 -3.336495545457479e+02 -4.785400196851591e+02 2.506956580500139e+02 3 5.768887318987100e+02 4.812119270965607e+02 2.334547330568691e+02 -2.161818165921041e+02 4 2.881538768898968e+02 -1.475623725508129e+02 2.450852866282900e+02 -3.451384145790988e+01 - ME 1.383744831772315e-05 + ME 9.810731053503000e-05 Event 239 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1916,7 +1916,7 @@ Event 239 Batch 0 2 5.349076725903783e+02 -5.331874414268931e+02 1.887721601290929e+01 -3.848403846142781e+01 3 3.658437465440003e+02 8.335465236419728e+01 1.670818061666301e+01 -3.558292926602242e+02 4 5.992485808656214e+02 4.498327890626960e+02 -3.558539662957234e+01 3.943133311216517e+02 - ME 2.560110521983184e-05 + ME 9.226736931333760e-05 Event 240 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1924,7 +1924,7 @@ Event 240 Batch 0 2 2.870582387324442e+02 1.830793600232297e+02 -1.562409872742485e+02 1.564389154054251e+02 3 6.007192677438852e+02 3.433229388031108e+02 4.688113613010560e+02 -1.523446941819630e+02 4 6.122224935236703e+02 -5.264022988263405e+02 -3.125703740268075e+02 -4.094221223461989e+00 - ME 3.548113744927254e-05 + ME 1.424405912705748e-04 Event 241 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1932,7 +1932,7 @@ Event 241 Batch 0 2 7.424696267657401e+02 4.823783107714221e+02 2.498315161211407e+02 5.061190823507636e+02 3 2.455726236162737e+02 -1.827879695947952e+02 -1.199757723946156e+02 -1.118046764652876e+02 4 5.119577496179861e+02 -2.995903411766270e+02 -1.298557437265251e+02 -3.943144058854759e+02 - ME 2.366266620918590e-04 + ME 2.705973755259623e-03 Event 242 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1940,7 +1940,7 @@ Event 242 Batch 0 2 7.249130370348905e+02 1.676828147928013e+02 6.059046362201677e+02 -3.609168279440810e+02 3 6.240672718074169e+02 -4.529413961306761e+01 -5.490982345027019e+02 2.930862151720549e+02 4 1.510196911576933e+02 -1.223886751797337e+02 -5.680640171746593e+01 6.783061277202641e+01 - ME 1.668420503127583e-05 + ME 4.587322306592483e-05 Event 243 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1948,7 +1948,7 @@ Event 243 Batch 0 2 4.655090712555229e+02 2.096323612054770e+02 2.113490506800235e+02 3.578890153850057e+02 3 5.764797256412519e+02 6.697224883641857e+01 -5.382210340689440e+02 -1.953502251008744e+02 4 4.580112031032257e+02 -2.766046100418949e+02 3.268719833889206e+02 -1.625387902841314e+02 - ME 3.999521919602606e-05 + ME 2.309042201876567e-04 Event 244 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1956,7 +1956,7 @@ Event 244 Batch 0 2 5.237109195354749e+02 1.305098338947756e+02 -4.868141165486322e+02 -1.423106687020528e+02 3 5.804450110242352e+02 -4.045654344879671e+02 2.643676733537771e+02 3.214855413949400e+02 4 3.958440694402901e+02 2.740556005931916e+02 2.224464431948551e+02 -1.791748726928872e+02 - ME 2.634847163425152e-05 + ME 2.644202232750943e-04 Event 245 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1964,7 +1964,7 @@ Event 245 Batch 0 2 2.629169357520612e+02 2.457511487795889e+02 -4.402365929491729e+01 -8.242333044139184e+01 3 6.931386101565748e+02 -5.195573187661655e+02 4.004017488088275e+02 -2.240084037645317e+02 4 5.439444540913644e+02 2.738061699865766e+02 -3.563780895139104e+02 3.064317342059234e+02 - ME 1.052590061693975e-05 + ME 4.288053786412853e-05 Event 246 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1972,7 +1972,7 @@ Event 246 Batch 0 2 6.300937687157445e+02 -5.459948028041557e+02 3.085954426748102e+02 6.063567799240802e+01 3 1.673910408536145e+02 -3.546130270298926e+01 7.662824936562275e+01 -1.445350060290698e+02 4 7.025151904306430e+02 5.814561055071442e+02 -3.852236920404341e+02 8.389932803666261e+01 - ME 1.915763997923398e-04 + ME 6.282756509154168e-04 Event 247 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1980,7 +1980,7 @@ Event 247 Batch 0 2 2.577847506495701e+02 2.418237207037818e+02 -8.449121421856779e+01 2.890502538162603e+01 3 5.130193185035739e+02 4.381905811488919e+02 1.366496386102691e+02 2.291390669832418e+02 4 7.291959308468561e+02 -6.800143018526737e+02 -5.215842439170134e+01 -2.580440923648679e+02 - ME 1.831864018495938e-03 + ME 4.005872724472581e-03 Event 248 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1988,7 +1988,7 @@ Event 248 Batch 0 2 7.033207479153643e+02 -5.040306065309413e+02 -2.020637997366072e+02 4.469714117975369e+02 3 1.758360012551320e+02 -1.471306652922549e+01 -4.035460943683606e+00 -1.751728862172264e+02 4 6.208432508295037e+02 5.187436730601667e+02 2.060992606802909e+02 -2.717985255803103e+02 - ME 1.512538512828554e-04 + ME 5.592865021063005e-04 Event 249 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1996,7 +1996,7 @@ Event 249 Batch 0 2 3.018816177222694e+02 5.523075638651412e+01 1.752331212074551e+02 2.395316845419020e+02 3 6.597415560701297e+02 6.315352823685419e+01 -6.561001191322722e+02 -2.834054254405022e+01 4 5.383768262076012e+02 -1.183842846233684e+02 4.808669979248172e+02 -2.111911419978518e+02 - ME 9.225490912808109e-05 + ME 4.868100986861644e-04 Event 250 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2004,7 +2004,7 @@ Event 250 Batch 0 2 2.166381935101301e+02 -1.289072913913530e+02 -1.189615590004073e+02 -1.271344351215279e+02 3 6.815426093761062e+02 -2.511966318704653e+02 5.323234433390903e+02 3.435583388650892e+02 4 6.018191971137635e+02 3.801039232618182e+02 -4.133618843386827e+02 -2.164239037435611e+02 - ME 6.586594805989363e-05 + ME 3.468666532553966e-04 Event 251 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2012,7 +2012,7 @@ Event 251 Batch 0 2 6.676961532387151e+02 -3.991265595084280e+01 -4.419965947723094e+02 4.988628500443886e+02 3 7.150412702460949e+02 3.921851524844908e+01 5.505653759000154e+02 -4.545587894617490e+02 4 1.172625765151894e+02 6.941407023942340e-01 -1.085687811277060e+02 -4.430406058263954e+01 - ME 4.930952510857648e-05 + ME 5.615833562023813e-04 Event 252 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2020,7 +2020,7 @@ Event 252 Batch 0 2 2.112668789066533e+02 -1.147554660376938e+02 3.364589711187055e+01 -1.741632301749357e+02 3 7.393007599584276e+02 2.529046383258835e+02 -3.593132473314827e+02 5.945576909606565e+02 4 5.494323611349191e+02 -1.381491722881897e+02 3.256673502196121e+02 -4.203944607857206e+02 - ME 3.541023077707110e-04 + ME 2.709805393201018e-03 Event 253 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2028,7 +2028,7 @@ Event 253 Batch 0 2 7.299659304470913e+01 -4.405884533650594e+01 -5.451291667290519e+01 2.038780663930336e+01 3 7.253475305576840e+02 3.245698054519170e+02 -1.402290280555607e+02 -6.333397991328418e+02 4 7.016558763976062e+02 -2.805109601154107e+02 1.947419447284657e+02 6.129519924935382e+02 - ME 3.511004874943257e-04 + ME 6.484723438037138e-04 Event 254 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2036,7 +2036,7 @@ Event 254 Batch 0 2 1.982520535096858e+02 -6.164633378269741e+01 1.773450413210087e+02 -6.365801262063783e+01 3 7.183815394471145e+02 -1.984891252513599e+02 -6.893152145826987e+02 -3.896971029099802e+01 4 5.833664070431995e+02 2.601354590340572e+02 5.119701732616900e+02 1.026277229116358e+02 - ME 1.539519794804785e-05 + ME 9.210498573936143e-05 Event 255 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2044,7 +2044,7 @@ Event 255 Batch 0 2 5.347080663542586e+02 -5.063606624096446e+02 1.592577719822621e+02 6.440929941880935e+01 3 2.475406015289465e+02 -1.856063881081879e+02 3.468010668896048e+00 -1.637516137347836e+02 4 7.177513321167953e+02 6.919670505178326e+02 -1.627257826511582e+02 9.934231431597431e+01 - ME 3.137689362725149e-04 + ME 1.305481727349711e-03 Event 0 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2052,7 +2052,7 @@ Event 0 Batch 1 2 5.775677821222389e+02 4.314431287975208e+02 -2.652567205762379e+02 -2.776332864556192e+02 3 6.023469575940325e+02 -3.228069847179709e+02 5.005558924007591e+02 8.978477890465942e+01 4 3.200852602837275e+02 -1.086361440795499e+02 -2.352991718245218e+02 1.878485075509607e+02 - ME 7.533072458757011e-06 + ME 2.846168667868940e-05 Event 1 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2060,7 +2060,7 @@ Event 1 Batch 1 2 7.241206267812560e+02 3.541578305635416e+02 -4.894807402105655e+02 3.991635230623179e+02 3 7.375567605136832e+02 -3.903081173548693e+02 4.920451519627784e+02 -3.867054653560791e+02 4 3.832261270506111e+01 3.615028679132773e+01 -2.564411752212873e+00 -1.245805770623896e+01 - ME 7.043932941624384e-05 + ME 1.002871021831580e-03 Event 2 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2068,7 +2068,7 @@ Event 2 Batch 1 2 4.849204091734790e+02 2.108660079931152e+02 4.054727376659824e+02 1.620962335024329e+02 3 2.728468517759738e+02 4.961449545460115e+01 2.005017763154939e+02 1.782774356422519e+02 4 7.422327390505470e+02 -2.604805034477164e+02 -6.059745139814763e+02 -3.403736691446848e+02 - ME 1.721146206228212e-02 + ME 2.729395913593408e-02 Event 3 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2076,7 +2076,7 @@ Event 3 Batch 1 2 4.264155576764489e+02 -4.170952165204416e+02 -7.054834331799705e+01 5.370977042744418e+01 3 7.108631972082329e+02 6.832597695609467e+02 -1.727180704166534e+02 -9.301097030017993e+01 4 3.627212451153183e+02 -2.661645530405051e+02 2.432664137346505e+02 3.930119987273574e+01 - ME 5.739226791327231e-06 + ME 5.466137525204964e-05 Event 4 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2084,7 +2084,7 @@ Event 4 Batch 1 2 7.183269968238449e+02 -3.584978055671311e+02 -5.048824553914336e+02 -3.640971079361008e+02 3 7.387431276480253e+02 4.013538934928407e+02 5.036810263913359e+02 3.618865629982628e+02 4 4.292987552812846e+01 -4.285608792570924e+01 1.201429000097643e+00 2.210544937839338e+00 - ME 5.884725836744927e-05 + ME 3.145606575501715e-04 Event 5 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2092,7 +2092,7 @@ Event 5 Batch 1 2 4.529780005473896e+02 -8.443182436392424e+01 4.445408460134587e+02 -2.106590230986445e+01 3 4.683757780543924e+02 -6.076819021151039e+01 -1.335482427838441e+02 -4.448010379662153e+02 4 5.786462213982179e+02 1.452000145754347e+02 -3.109926032296145e+02 4.658669402760799e+02 - ME 2.851579396246287e-05 + ME 8.481958952475706e-05 Event 6 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2100,7 +2100,7 @@ Event 6 Batch 1 2 6.238848262005389e+02 -1.065131260140052e+02 -4.741487807795934e+02 -3.912418229627633e+02 3 1.729069432107234e+02 -1.460869767542721e+02 -8.199113358821990e+01 4.281191710484079e+01 4 7.032082305887380e+02 2.526001027682771e+02 5.561399143678132e+02 3.484299058579224e+02 - ME 1.468701510222534e-04 + ME 4.868510537699180e-04 Event 7 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2108,7 +2108,7 @@ Event 7 Batch 1 2 6.977203086376783e+02 -6.126072843634399e+02 -1.744636661244187e+02 2.847602033865263e+02 3 1.614193396272251e+02 -4.571584237043670e+00 8.497734613495712e+01 -1.371646983269120e+02 4 6.408603517350967e+02 6.171788686004836e+02 8.948631998946138e+01 -1.475955050596143e+02 - ME 9.523334397108766e-05 + ME 3.540796080305845e-04 Event 8 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2116,7 +2116,7 @@ Event 8 Batch 1 2 6.871091945484288e+02 4.059708628308462e+02 2.886614153103366e+02 4.732666173272762e+02 3 5.653302025665631e+02 -2.838835484844413e+02 -7.353399035097291e+01 -4.833229987253825e+02 4 2.475606028850081e+02 -1.220873143464048e+02 -2.151274249593637e+02 1.005638139810634e+01 - ME 3.726341895116938e-05 + ME 8.785466054587446e-05 Event 9 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2124,7 +2124,7 @@ Event 9 Batch 1 2 1.618579955503452e+02 1.385215220188489e+01 1.601201234527701e+02 -1.917484467788566e+01 3 7.196660585644588e+02 -4.527189715496824e+02 -4.214090439733052e+02 3.679391067910628e+02 4 6.184759458851959e+02 4.388668193477974e+02 2.612889205205349e+02 -3.487642621131772e+02 - ME 1.276556148007894e-04 + ME 1.054640649369016e-03 Event 10 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2132,7 +2132,7 @@ Event 10 Batch 1 2 7.832785200561162e+01 1.027681340851886e+01 -7.242726264265977e+01 -2.799877018853974e+01 3 7.448007230566494e+02 2.520540107528716e+02 6.813719334665398e+02 1.641011304445167e+02 4 6.768714249377393e+02 -2.623308241613905e+02 -6.089446708238800e+02 -1.361023602559769e+02 - ME 1.087112534498832e-04 + ME 5.876642887714617e-04 Event 11 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2140,7 +2140,7 @@ Event 11 Batch 1 2 5.478627446486676e+02 2.070882322301630e+02 -4.708081692757452e+02 1.887000762823861e+02 3 6.997827604382593e+02 -4.209013422316021e+02 4.569873120768409e+02 -3.220257264800591e+02 4 2.523544949130733e+02 2.138131100014392e+02 1.382085719890436e+01 1.333256501976729e+02 - ME 7.092902148917371e-06 + ME 2.703695959900953e-05 Event 12 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2148,7 +2148,7 @@ Event 12 Batch 1 2 5.802868936311938e+02 -4.467002255894120e+01 5.211262762381961e+02 -2.513262266832405e+02 3 5.208038834706859e+02 2.151797013176283e+01 -4.993650129388666e+02 -1.463155694111945e+02 4 3.989092228981199e+02 2.315205242717860e+01 -2.176126329932955e+01 3.976417960944350e+02 - ME 4.980323856672599e-04 + ME 5.046437564325244e-04 Event 13 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2156,7 +2156,7 @@ Event 13 Batch 1 2 5.774880087360024e+02 1.576445054854711e+02 5.481077151088400e+02 -9.065617884226717e+01 3 5.915098138161557e+02 -3.018001633277128e+02 -3.808656371901898e+02 3.372564123391869e+02 4 3.310021774478421e+02 1.441556578422419e+02 -1.672420779186502e+02 -2.466002334969197e+02 - ME 5.587942683639647e-05 + ME 1.505341700965184e-03 Event 14 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2164,7 +2164,7 @@ Event 14 Batch 1 2 2.531797527967491e+02 -8.400833666640553e+01 -2.384535242035555e+02 -1.350938161690895e+01 3 5.261064571264828e+02 -1.751971590790252e+02 -3.334570051994592e+02 3.672878780523887e+02 4 7.207137900767681e+02 2.592054957454308e+02 5.719105294030147e+02 -3.537784964354798e+02 - ME 1.659114310450813e-03 + ME 3.373121845959189e-03 Event 15 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2172,7 +2172,7 @@ Event 15 Batch 1 2 4.605848765362425e+02 3.563504404614684e+02 1.735853700506503e+02 2.345653669687875e+02 3 4.216445088607453e+02 1.370719005416187e+02 -3.933730877164850e+02 6.521502736890037e+01 4 6.177706146030118e+02 -4.934223410030871e+02 2.197877176658347e+02 -2.997803943376878e+02 - ME 9.110622752737525e-05 + ME 4.613631402771334e-04 Event 16 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2180,7 +2180,7 @@ Event 16 Batch 1 2 4.972484926572777e+02 -1.474122335888775e+02 -4.748950276275915e+02 -6.399787981958280e-01 3 5.072511849723048e+02 4.846784046822065e+02 1.224000792205880e+02 -8.607455661990267e+01 4 4.955003223704169e+02 -3.372661710933285e+02 3.524949484070036e+02 8.671453541809866e+01 - ME 1.035537635543116e-05 + ME 5.856804747367533e-05 Event 17 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2188,7 +2188,7 @@ Event 17 Batch 1 2 3.182636773520259e+02 -9.176062613973060e+01 -1.890905041641619e+02 2.389906630959087e+02 3 6.376303990615819e+02 -4.240378519397394e+02 2.706855745366566e+02 -3.917827786765570e+02 4 5.441059235863918e+02 5.157984780794702e+02 -8.159507037249479e+01 1.527921155806483e+02 - ME 2.964570775197734e-05 + ME 7.445984612273079e-05 Event 18 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2196,7 +2196,7 @@ Event 18 Batch 1 2 5.532560008158404e+02 -4.148613005881325e+02 1.689647846464811e+02 -3.247047971041214e+02 3 3.650144721835348e+02 -1.597348634907620e+02 -2.160675866909894e+02 2.470529017650751e+02 4 5.817295270006244e+02 5.745961640788944e+02 4.710280204450838e+01 7.765189533904635e+01 - ME 3.148325734685632e-05 + ME 9.119298978738387e-05 Event 19 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2204,7 +2204,7 @@ Event 19 Batch 1 2 3.263687475619531e+02 -1.904667433734991e+02 2.390747946355329e+02 -1.143775398573919e+02 3 7.331345945903582e+02 2.597391859223821e+02 -6.739404183465077e+02 1.258022320965774e+02 4 4.404966578476884e+02 -6.927244254888298e+01 4.348656237109747e+02 -1.142469223918529e+01 - ME 9.665339952809457e-06 + ME 8.793129888044293e-05 Event 20 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2212,7 +2212,7 @@ Event 20 Batch 1 2 9.588718605412237e+01 4.259536217794532e+01 8.056474827260676e+01 -2.982128277051557e+01 3 7.250265356668370e+02 3.120913743414047e+02 -4.446787057645155e+02 4.801284204484703e+02 4 6.790862782790414e+02 -3.546867365193502e+02 3.641139574919093e+02 -4.503071376779550e+02 - ME 6.402422614019696e-04 + ME 3.686389281265799e-03 Event 21 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2220,7 +2220,7 @@ Event 21 Batch 1 2 1.825278201605081e+02 -1.533737674675502e+02 8.574830442242751e+01 4.939757963742074e+01 3 7.183016103669913e+02 1.713205736990392e+02 -6.275703015775031e+02 -3.045685162014731e+02 4 5.991705694725008e+02 -1.794680623148897e+01 5.418219971550755e+02 2.551709365640523e+02 - ME 1.806434468406198e-05 + ME 7.470861105912214e-05 Event 22 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2228,7 +2228,7 @@ Event 22 Batch 1 2 2.349542451120770e+02 9.235159917618290e+01 -2.156570331301489e+02 -1.291214495308476e+01 3 7.360601907662837e+02 -2.182033070539752e+02 6.568866822530020e+02 -2.503433799808774e+02 4 5.289855641216395e+02 1.258517078777923e+02 -4.412296491228531e+02 2.632555249339621e+02 - ME 8.007442232312076e-06 + ME 3.893602972207037e-05 Event 23 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2236,7 +2236,7 @@ Event 23 Batch 1 2 2.350908908124364e+02 -7.377772511691019e+00 -2.298431804723787e+02 -4.884063683135331e+01 3 6.797114625392685e+02 -5.485955088721076e+02 3.603976926464840e+02 1.765336882516069e+02 4 5.851976466482949e+02 5.559732813837987e+02 -1.305545121741055e+02 -1.276930514202538e+02 - ME 3.185713653214173e-05 + ME 2.057468423101862e-04 Event 24 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2244,7 +2244,7 @@ Event 24 Batch 1 2 4.355364173804401e+02 2.538053291625626e+02 -2.665393838801487e+02 -2.328767540869265e+02 3 4.093863144993796e+02 -1.953012891316528e+02 -3.573484670764558e+02 4.191221827828568e+01 4 6.550772681201798e+02 -5.850404003090968e+01 6.238878509566048e+02 1.909645358086408e+02 - ME 3.721637657688893e-05 + ME 1.895168702655672e-04 Event 25 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2252,7 +2252,7 @@ Event 25 Batch 1 2 7.365386968907909e+02 3.875876454009267e+02 3.151568854896985e+02 5.412404333367775e+02 3 5.208510884285567e+02 -2.430585576296288e+02 -1.518636440371932e+02 -4.349089876054084e+02 4 2.426102146806534e+02 -1.445290877712977e+02 -1.632932414525050e+02 -1.063314457313693e+02 - ME 7.982561935336398e-05 + ME 3.717867207603688e-04 Event 26 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2260,7 +2260,7 @@ Event 26 Batch 1 2 7.198867014174701e+02 5.189601929589824e+02 4.797253921416957e+02 -1.370428003807496e+02 3 3.889101953712928e+02 -1.847394503243419e+02 -2.837815501141775e+02 1.912864537085460e+02 4 3.912031032112371e+02 -3.342207426346404e+02 -1.959438420275183e+02 -5.424365332779646e+01 - ME 1.928349098758061e-05 + ME 1.222836766708484e-04 Event 27 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2268,7 +2268,7 @@ Event 27 Batch 1 2 6.732032222628646e+02 5.870808395006010e+02 -9.126179303429218e+01 3.165595544104447e+02 3 1.177373967283342e+02 7.847176641415683e+01 5.304379211899001e+00 -8.761358356661104e+01 4 7.090593810088013e+02 -6.655526059147578e+02 8.595741382239324e+01 -2.289459708438336e+02 - ME 6.795383824785976e-04 + ME 1.603290018002586e-03 Event 28 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2276,7 +2276,7 @@ Event 28 Batch 1 2 6.475300414228806e+02 3.136396845517189e+02 3.816259196370642e+02 -4.186728559156669e+02 3 7.290923529036073e+02 -2.791764769994177e+02 -4.112865540505715e+02 5.333662195995520e+02 4 1.233776056735125e+02 -3.446320755230100e+01 2.966063441350738e+01 -1.146933636838856e+02 - ME 6.311296815400830e-04 + ME 5.037107889244314e-02 Event 29 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2284,7 +2284,7 @@ Event 29 Batch 1 2 3.156754590345620e+02 -2.870540678871016e+02 4.159516713841874e+01 -1.245825012466667e+02 3 4.770060274033896e+02 -2.355061130652810e+02 -3.231858413754910e+02 -2.600433287405434e+02 4 7.073185135620483e+02 5.225601809523826e+02 2.815906742370723e+02 3.846258299872100e+02 - ME 1.321807869823317e-04 + ME 7.956699356695784e-04 Event 30 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2292,7 +2292,7 @@ Event 30 Batch 1 2 6.091290614220995e+02 1.543004089904798e+02 4.216196287493766e+00 -5.892468251447810e+02 3 2.079357839022729e+02 2.034647466922837e+02 4.185675980476618e+01 9.348729279626889e+00 4 6.829351546756266e+02 -3.577651556827627e+02 -4.607295609226003e+01 5.798980958651539e+02 - ME 1.448382779935031e-04 + ME 3.902231064020147e-04 Event 31 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2300,7 +2300,7 @@ Event 31 Batch 1 2 6.901710072855793e+02 1.433309098684656e+01 6.447948515477649e+02 -2.457034416076623e+02 3 5.898919363861644e+02 1.120085307876391e+02 -4.815950471622465e+02 3.217029626736535e+02 4 2.199370563282564e+02 -1.263416217744856e+02 -1.631998043855182e+02 -7.599952106599136e+01 - ME 2.376400497996635e-05 + ME 2.415465849322543e-04 Event 32 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2308,7 +2308,7 @@ Event 32 Batch 1 2 6.144498311923271e+02 5.832947925341469e+02 -1.925283703230110e+02 1.576726595169125e+01 3 2.478450424037004e+02 5.004284035329792e+01 2.389954177960992e+02 4.247433867565734e+01 4 6.377051264039724e+02 -6.333376328874447e+02 -4.646704747308818e+01 -5.824160462734862e+01 - ME 5.390650629646604e-05 + ME 2.160220890176678e-04 Event 33 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2316,7 +2316,7 @@ Event 33 Batch 1 2 6.134536717469736e+02 -1.625429495269566e+02 -1.853973484494194e+02 5.617232593785355e+02 3 5.361644687950269e+02 -3.755831293394986e+01 -9.992652347025609e+01 -5.254297294928764e+02 4 3.503818594579993e+02 2.001012624609065e+02 2.853238719196754e+02 -3.629352988565911e+01 - ME 1.005452860076771e-04 + ME 1.224582992507153e-04 Event 34 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2324,7 +2324,7 @@ Event 34 Batch 1 2 3.840838099420727e+02 -2.442269925519278e+02 -3.827314394217582e+01 -2.939535943332559e+02 3 6.022630974514659e+02 3.956891925431131e+01 5.086724982658299e+02 3.200116071158652e+02 4 5.136530926064613e+02 2.046580732976165e+02 -4.703993543236541e+02 -2.605801278260916e+01 - ME 2.313941306740064e-05 + ME 9.608243105510499e-05 Event 35 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2332,7 +2332,7 @@ Event 35 Batch 1 2 3.454350783663418e+02 -3.439607925797615e+02 2.363778141880094e+01 -2.139209721976717e+01 3 6.705698302143294e+02 5.215327591153251e+02 4.060443141865528e+02 -1.131171661597076e+02 4 4.839950914193290e+02 -1.775719665355635e+02 -4.296820956053536e+02 1.345092633794747e+02 - ME 7.982017052260048e-06 + ME 4.862206803317224e-05 Event 36 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2340,7 +2340,7 @@ Event 36 Batch 1 2 7.098652154429357e+02 2.489290984574327e+02 -1.674080692141068e+02 -6.433641786725617e+02 3 6.178479130357197e+02 -1.435715807033598e+02 2.588953561477193e+02 5.423065917191846e+02 4 1.722868715213448e+02 -1.053575177540730e+02 -9.148728693361247e+01 1.010575869533772e+02 - ME 5.562249548714765e-05 + ME 6.680529568232270e-05 Event 37 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2348,7 +2348,7 @@ Event 37 Batch 1 2 6.906872786346031e+02 1.495946561071237e+02 1.712833879510068e+02 6.521750966909805e+02 3 3.682276595245592e+02 -1.358558710218083e+02 1.194309698061993e+02 -3.207351477449753e+02 4 4.410850618408380e+02 -1.373878508531530e+01 -2.907143577572061e+02 -3.314399489460051e+02 - ME 5.542438863722841e-04 + ME 2.014943348935539e-03 Event 38 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2356,7 +2356,7 @@ Event 38 Batch 1 2 6.131720166645955e+02 -5.222102655174087e+02 6.340623138461877e+00 3.213038392347352e+02 3 4.540063357567760e+02 2.932429176443922e+02 -3.207297067242505e+02 -1.313879727496968e+02 4 4.328216475786277e+02 2.289673478730168e+02 3.143890835857886e+02 -1.899158664850380e+02 - ME 3.150821423911933e-05 + ME 2.589645049118943e-04 Event 39 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2364,7 +2364,7 @@ Event 39 Batch 1 2 2.929747896182304e+02 2.510117592312210e+02 -1.378648144805472e+02 6.181113983529403e+01 3 6.287164314722783e+02 3.864928360025993e+01 6.254120614625328e+02 5.148142827864510e+01 4 5.783087789094894e+02 -2.896610428314818e+02 -4.875472469819856e+02 -1.132925681139394e+02 - ME 2.723120294663496e-05 + ME 1.708238325115053e-04 Event 40 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2372,7 +2372,7 @@ Event 40 Batch 1 2 1.143487538112954e+02 -3.203572478439017e+01 1.022340126870988e+02 3.996944439980560e+01 3 7.361483923235807e+02 5.924235295921244e+02 -3.838567751530157e+02 -2.088128187524163e+02 4 6.495028538651248e+02 -5.603878048077345e+02 2.816227624659169e+02 1.688433743526105e+02 - ME 4.279185076498264e-05 + ME 2.026369815874481e-04 Event 41 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2380,7 +2380,7 @@ Event 41 Batch 1 2 6.384898508133350e+02 5.540399192408263e+02 -3.014826159773289e+02 -9.908223727147148e+01 3 3.510407251698805e+02 -1.719168197014114e+02 2.065966849440144e+02 -2.258140996521069e+02 4 5.104694240167846e+02 -3.821230995394149e+02 9.488593103331458e+01 3.248963369235784e+02 - ME 1.488395965626735e-05 + ME 4.455092331482675e-05 Event 42 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2388,7 +2388,7 @@ Event 42 Batch 1 2 3.291654598309212e+02 -1.090829060981258e+02 2.972891943885482e+02 -8.983292515941632e+01 3 6.884965239796815e+02 4.933628807557017e+02 -2.919492821202986e+02 3.812953554581829e+02 4 4.823380161893969e+02 -3.842799746575757e+02 -5.339912268249619e+00 -2.914624302987665e+02 - ME 5.767145017550451e-05 + ME 6.690811667999076e-04 Event 43 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2396,7 +2396,7 @@ Event 43 Batch 1 2 3.674173006007981e+02 2.791827424102563e+02 1.079644067383057e+02 2.130637369397045e+02 3 7.392205647816575e+02 -6.110484627794917e+02 -4.247874240022372e+01 -4.138385868609020e+02 4 3.933621346175442e+02 3.318657203692355e+02 -6.548566433808202e+01 2.007748499211975e+02 - ME 6.513986915725277e-06 + ME 2.734436884563990e-05 Event 44 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2404,7 +2404,7 @@ Event 44 Batch 1 2 2.081359682230012e+02 -1.082501549908087e+02 1.771964605001424e+02 1.427934167997762e+01 3 7.449563315308093e+02 5.092828751965591e+02 -5.388739609944279e+02 7.215083562608928e+01 4 5.469077002461893e+02 -4.010327202057504e+02 3.616775004942854e+02 -8.643017730606689e+01 - ME 1.838899544278803e-05 + ME 1.760644262839344e-04 Event 45 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2412,7 +2412,7 @@ Event 45 Batch 1 2 5.180982465404422e+02 4.470261481799612e+02 -3.368837017252423e+01 -2.597277606009553e+02 3 3.377595659674062e+02 -7.316527185649456e+01 2.454727770679006e+02 -2.201624016839132e+02 4 6.441421874921515e+02 -3.738608763234666e+02 -2.117844068953763e+02 4.798901622848684e+02 - ME 4.091340785269233e-05 + ME 1.645403798734011e-04 Event 46 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2420,7 +2420,7 @@ Event 46 Batch 1 2 6.296560291524888e+02 2.172411497655985e+02 5.821614514430422e+02 -1.017892054705761e+02 3 6.224001894826197e+02 1.405102091633609e+01 -6.218608257778048e+02 2.176414579432105e+01 4 2.479437813648912e+02 -2.312921706819346e+02 3.969937433476264e+01 8.002505967625511e+01 - ME 7.434320230190137e-06 + ME 4.041878897626609e-05 Event 47 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2428,7 +2428,7 @@ Event 47 Batch 1 2 5.458843469271557e+02 -1.019033861791133e+02 -1.559739004096151e+02 5.131058004898495e+02 3 2.573134207008558e+02 6.791700498899543e+01 -2.412204887508016e+02 5.839651284901167e+01 4 6.968022323719882e+02 3.398638119011781e+01 3.971943891604168e+02 -5.715023133388611e+02 - ME 4.005478861198618e-03 + ME 1.408798022766008e-02 Event 48 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2436,7 +2436,7 @@ Event 48 Batch 1 2 6.623920218006384e+02 -6.284562032939594e+02 -1.837527125398962e+02 -1.002044496053409e+02 3 1.251779629744606e+02 -7.502448682133647e+01 9.550779386908961e+01 3.031682869117444e+01 4 7.124300152249010e+02 7.034806901152959e+02 8.824491867080658e+01 6.988762091416655e+01 - ME 3.004757451335502e-04 + ME 8.682321044518227e-04 Event 49 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2444,7 +2444,7 @@ Event 49 Batch 1 2 2.397494808364364e+02 2.393958238941666e+02 -4.144666783354266e+00 -1.233996761053010e+01 3 6.782491241100328e+02 -3.516321535544010e+02 -2.705899831712919e+02 5.129890485673947e+02 4 5.820013950535307e+02 1.122363296602344e+02 2.747346499546462e+02 -5.006490809568646e+02 - ME 6.040872325723622e-04 + ME 9.041285542966720e-03 Event 50 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2452,7 +2452,7 @@ Event 50 Batch 1 2 4.764898792162554e+02 4.667163214316568e+02 5.900817880915086e+01 -7.573978570375913e+01 3 5.114228101321805e+02 -2.035689445851523e+02 -4.549677995197112e+02 -1.145306811477843e+02 4 5.120873106515638e+02 -2.631473768465044e+02 3.959596207105603e+02 1.902704668515434e+02 - ME 9.692662313613028e-06 + ME 5.157319121365441e-05 Event 51 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2460,7 +2460,7 @@ Event 51 Batch 1 2 4.678795643859630e+02 4.629737719234085e+02 5.365495313512251e+01 4.108186077915564e+01 3 6.311645871918951e+02 -4.500610707732837e+02 -4.345770688214700e+02 8.340587481742408e+01 4 4.009558484221416e+02 -1.291270115012470e+01 3.809221156863474e+02 -1.244877355965797e+02 - ME 1.293558494013996e-05 + ME 1.517985021504320e-04 Event 52 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2468,7 +2468,7 @@ Event 52 Batch 1 2 3.696230029266819e+02 2.516704934433110e+02 2.514038675722595e+02 1.003953305301004e+02 3 6.696174214325739e+02 -2.754912388418390e+01 -6.493999246431116e+02 -1.609604756850079e+02 4 4.607595756407442e+02 -2.241213695591271e+02 3.979960570708519e+02 6.056514515490756e+01 - ME 8.655753222194317e-06 + ME 5.727699238559496e-05 Event 53 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2476,7 +2476,7 @@ Event 53 Batch 1 2 7.284624742442375e+01 -4.271742504396477e+01 -2.683807109937144e+01 -5.255012179908527e+01 3 7.493542950735829e+02 3.356513586119740e+02 2.501807367708783e+02 6.215139772812374e+02 4 6.777994575019936e+02 -2.929339335680093e+02 -2.233426656715069e+02 -5.689638554821522e+02 - ME 2.372423861687152e-03 + ME 1.612275481129464e-02 Event 54 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2484,7 +2484,7 @@ Event 54 Batch 1 2 7.460259847230064e+02 2.055186857047568e+01 6.233229443227743e+02 4.093908861479223e+02 3 5.756222844616437e+02 2.606063779094539e+01 -4.696411468594731e+02 -3.318117699890848e+02 4 1.783517308153497e+02 -4.661250636142109e+01 -1.536817974633012e+02 -7.757911615883735e+01 - ME 5.046268590690708e-05 + ME 4.374243668355642e-04 Event 55 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2492,7 +2492,7 @@ Event 55 Batch 1 2 5.967428482894213e+02 -8.165820254184375e+01 5.098287527914877e+02 -2.991798919868828e+02 3 5.942526243827265e+02 5.606061544962815e+01 -2.905196430116550e+02 5.153559216750568e+02 4 3.090045273278509e+02 2.559758709221549e+01 -2.193091097798325e+02 -2.161760296881746e+02 - ME 1.849048785615045e-04 + ME 1.779007466146034e-03 Event 56 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2500,7 +2500,7 @@ Event 56 Batch 1 2 5.610874267302015e+02 -4.199055433713192e+02 3.580252469767042e+02 1.015694718309908e+02 3 6.303091265298390e+02 2.130872195586830e+02 -5.453843477211296e+02 -2.333224059286980e+02 4 3.086034467399593e+02 2.068183238126362e+02 1.873591007444254e+02 1.317529340977073e+02 - ME 7.213009143835112e-06 + ME 3.258989367177766e-05 Event 57 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2508,7 +2508,7 @@ Event 57 Batch 1 2 6.552053965855981e+02 4.516249927537604e+02 7.110694105335197e+00 4.746350341729917e+02 3 6.035190443408458e+02 -3.717228873476765e+02 2.148772607224587e+02 -4.241286299324850e+02 4 2.412755590735562e+02 -7.990210540608396e+01 -2.219879548277939e+02 -5.050640424050685e+01 - ME 3.752873989265266e-05 + ME 1.623545585873121e-04 Event 58 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2516,7 +2516,7 @@ Event 58 Batch 1 2 2.959982971085279e+02 1.850007048157144e+02 -2.304987961744356e+02 1.612563397119956e+01 3 7.018897389129390e+02 -3.764226030262936e+02 4.376344751014918e+02 3.992884868423144e+02 4 5.021119639785326e+02 1.914218982105791e+02 -2.071356789270567e+02 -4.154141208135139e+02 - ME 1.901193343270815e-04 + ME 4.558573859477246e-03 Event 59 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2524,7 +2524,7 @@ Event 59 Batch 1 2 5.521089721327345e+02 1.223876815062619e+02 -3.629066091228882e+01 -5.371485459866160e+02 3 4.098988410471214e+02 -5.841964900319319e+01 -3.626461945087767e+02 1.819119075553315e+02 4 5.379921868201441e+02 -6.396803250306872e+01 3.989368554210655e+02 3.552366384312845e+02 - ME 1.780280399801712e-05 + ME 5.148841296796537e-05 Event 60 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2532,7 +2532,7 @@ Event 60 Batch 1 2 7.143828168925960e+02 -4.584044193456332e+02 -2.419772079280938e+02 -4.915844060170314e+02 3 1.284110307517517e+02 8.324300347118127e+01 -7.889851197070540e+01 5.774963203893758e+01 4 6.572061523556514e+02 3.751614158744520e+02 3.208757198987992e+02 4.338347739780938e+02 - ME 7.144001898958308e-05 + ME 1.673517837789511e-04 Event 61 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2540,7 +2540,7 @@ Event 61 Batch 1 2 4.394390210968651e+02 -2.137451655543886e+02 -3.779414621253704e+02 -6.767502250635177e+01 3 4.431311911324728e+02 3.845666395406355e+02 -2.150363068358313e+02 4.725610065709574e+01 4 6.174297877706618e+02 -1.708214739862469e+02 5.929777689612018e+02 2.041892184925626e+01 - ME 2.870354731125455e-05 + ME 1.368591177943825e-04 Event 62 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2548,7 +2548,7 @@ Event 62 Batch 1 2 7.301725729481176e+02 4.281927891852710e+02 5.652737593150771e+02 -1.739784429324868e+02 3 7.567373964415995e+01 2.589885732647599e+01 -5.696550981957816e+01 4.255225906941358e+01 4 6.941536874077224e+02 -4.540916465117469e+02 -5.083082494954988e+02 1.314261838630732e+02 - ME 2.379197431250548e-04 + ME 8.513592598060080e-04 Event 63 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2556,7 +2556,7 @@ Event 63 Batch 1 2 4.361152320236988e+02 -3.738769057978321e+02 1.427754799584550e+02 -1.732850750548248e+02 3 5.817148313055657e+02 5.081993893256957e+02 2.829214478037172e+02 -8.998890070513914e+00 4 4.821699366707353e+02 -1.343224835278637e+02 -4.256969277621721e+02 1.822839651253387e+02 - ME 8.350404272725701e-06 + ME 4.544766189571194e-05 Event 64 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2564,7 +2564,7 @@ Event 64 Batch 1 2 6.097675704107204e+02 3.288514690970509e+02 4.971291587853200e+02 -1.285916042465611e+02 3 5.709532610348123e+02 -6.501292612520263e+01 -4.768258747557200e+02 3.072426254385416e+02 4 3.192791685544673e+02 -2.638385429718484e+02 -2.030328402960006e+01 -1.786510211919805e+02 - ME 3.000969253297957e-05 + ME 4.598138986874043e-04 Event 65 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2572,7 +2572,7 @@ Event 65 Batch 1 2 6.258641293880484e+02 3.743515439843765e+02 -1.622018320411498e+02 -4.746128903155367e+02 3 7.438702198751357e+02 -4.029113627030089e+02 2.325939036896868e+02 5.804355380128616e+02 4 1.302656507368158e+02 2.855981871863233e+01 -7.039207164853700e+01 -1.058226476973252e+02 - ME 3.162776051460646e-04 + ME 6.427333508548903e-03 Event 66 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2580,7 +2580,7 @@ Event 66 Batch 1 2 3.731957242404369e+02 1.596860493342637e+01 -3.714568973276624e+02 3.224632809376674e+01 3 6.079923612940432e+02 4.451199598539357e+02 3.189341902600864e+02 -2.642043054431177e+02 4 5.188119144655197e+02 -4.610885647873621e+02 5.252270706757586e+01 2.319579773493509e+02 - ME 1.034065067393998e-05 + ME 4.681392980523237e-05 Event 67 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2588,7 +2588,7 @@ Event 67 Batch 1 2 7.084256499213539e+02 6.318790977834966e+02 -2.229764540025608e+02 2.299504472951746e+02 3 5.168612394424738e+01 1.130069959366449e+01 -1.428140623590627e+01 4.837138651102398e+01 4 7.398882261343989e+02 -6.431797973771612e+02 2.372578602384670e+02 -2.783218338061985e+02 - ME 1.479715191731530e-02 + ME 5.878400132197954e-02 Event 68 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2596,7 +2596,7 @@ Event 68 Batch 1 2 5.644037677826096e+02 -7.446914007305443e+01 3.170710956176409e+02 4.609467220707991e+02 3 4.303832728799333e+02 -1.588265612792408e+02 -3.994808673830752e+02 -2.046757440246668e+01 4 5.052129593374568e+02 2.332957013522950e+02 8.240977176543441e+01 -4.404791476683325e+02 - ME 3.274273226082449e-04 + ME 8.108482137897523e-03 Event 69 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2604,7 +2604,7 @@ Event 69 Batch 1 2 2.379282923937934e+02 -4.413455715133102e+01 1.058497776082811e+02 -2.084654354245804e+02 3 5.822935131976616e+02 -5.806422676829345e+02 4.095409019445288e+01 -1.559022092337181e+01 4 6.797781944085444e+02 6.247768248342655e+02 -1.468038678027338e+02 2.240556563479522e+02 - ME 6.379305675073031e-05 + ME 3.039802585689931e-04 Event 70 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2612,7 +2612,7 @@ Event 70 Batch 1 2 5.861861307468000e+02 1.831219916849830e+02 2.904683423406074e+02 -4.750880530376756e+02 3 4.633200606614189e+02 -4.245314712871158e+02 -1.339518705596282e+02 1.284344380284135e+02 4 4.504938085917810e+02 2.414094796021329e+02 -1.565164717809791e+02 3.466536150092620e+02 - ME 1.325653453486623e-05 + ME 3.530491740557932e-05 Event 71 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2620,7 +2620,7 @@ Event 71 Batch 1 2 7.383412459951699e+02 5.748049255568963e+02 -1.639684737984460e+02 -4.334298474879633e+02 3 3.973981306646684e+02 -3.228684354469153e+02 -4.837114091238284e+00 2.316416412804533e+02 4 3.642606233401616e+02 -2.519364901099809e+02 1.688055878896842e+02 2.017882062075102e+02 - ME 1.333441808219846e-05 + ME 3.103530482016079e-05 Event 72 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2628,7 +2628,7 @@ Event 72 Batch 1 2 3.538199915090663e+02 3.512029503136998e+02 -6.467835580753929e+00 -4.246458742680748e+01 3 5.344234504985296e+02 1.310173344785605e+01 3.836805260246265e+01 5.328833470497182e+02 4 6.117565579924039e+02 -3.643046837615559e+02 -3.190021702170876e+01 -4.904187596229107e+02 - ME 2.994704399169685e-03 + ME 9.376669006106200e-03 Event 73 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2636,7 +2636,7 @@ Event 73 Batch 1 2 4.694927197571710e+02 1.451947293992222e+02 -1.807863847612341e+02 4.082379055705570e+02 3 5.537325951281179e+02 -5.796379956652479e+01 5.401382741253894e+02 -1.072876026015002e+02 4 4.767746851147115e+02 -8.723092983269744e+01 -3.593518893641554e+02 -3.009503029690568e+02 - ME 1.535829386616431e-04 + ME 1.077472469645428e-03 Event 74 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2644,7 +2644,7 @@ Event 74 Batch 1 2 6.258444305735198e+02 -3.349227552763227e+02 4.941036656040852e+02 1.880679848209580e+02 3 5.555040664889822e+02 3.765538795180102e+01 -5.474422011270130e+02 -8.645158222500005e+01 4 3.186515029374982e+02 2.972673673245214e+02 5.333853552292791e+01 -1.016164025959578e+02 - ME 1.487896902219418e-05 + ME 1.623439923565115e-04 Event 75 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2652,7 +2652,7 @@ Event 75 Batch 1 2 3.943316317993887e+02 5.588489849751632e+01 -2.552251009651266e+02 -2.953548066221912e+02 3 5.467466262348042e+02 -3.021648543602057e+02 -2.377479281839000e+02 3.887212326756534e+02 4 5.589217419658066e+02 2.462799558626894e+02 4.929730291490265e+02 -9.336642605346221e+01 - ME 4.632408498797698e-05 + ME 1.348649436679123e-04 Event 76 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2660,7 +2660,7 @@ Event 76 Batch 1 2 5.517772830004059e+02 2.282681125856672e+02 -4.885490190451381e+02 -1.169260227747471e+02 3 4.245403880864563e+02 -2.793100283061228e+02 1.521744876196477e+02 -2.811821020654221e+02 4 5.236823289131380e+02 5.104191572045557e+01 3.363745314254903e+02 3.981081248401691e+02 - ME 1.645260485784409e-05 + ME 5.074216551061466e-05 Event 77 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2668,7 +2668,7 @@ Event 77 Batch 1 2 3.781543446472003e+02 -5.926925448310480e+01 -1.775497893613220e+02 3.285786605157444e+02 3 6.702964816234122e+02 -6.066564226432872e+01 -1.057468051743550e+02 -6.591165802199176e+02 4 4.515491737293867e+02 1.199348967474336e+02 2.832965945356770e+02 3.305379197041734e+02 - ME 5.041095643414513e-05 + ME 6.321080405055773e-05 Event 78 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2676,7 +2676,7 @@ Event 78 Batch 1 2 4.564262045363139e+02 1.882572856930395e+02 1.751822011208171e+02 -3.770878823051468e+02 3 3.809544602625751e+02 -2.816334489555117e+02 1.992812047321844e+02 -1.615422627793184e+02 4 6.626193352011103e+02 9.337616326247226e+01 -3.744634058530013e+02 5.386301450844651e+02 - ME 6.222463480998997e-05 + ME 2.572921643188974e-04 Event 79 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2684,7 +2684,7 @@ Event 79 Batch 1 2 6.126536521478922e+02 6.075062399138452e+02 -4.178945028651393e+01 6.733726903166659e+01 3 2.872846052831658e+02 -1.084163947926161e+02 2.139961846825774e+01 2.651799127051085e+02 4 6.000617425689430e+02 -4.990898451212283e+02 2.038983181825616e+01 -3.325171817367756e+02 - ME 6.289823950094716e-04 + ME 1.996659951821530e-03 Event 80 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2692,7 +2692,7 @@ Event 80 Batch 1 2 4.171281258707700e+02 -2.756641813219371e+02 1.445082905894664e+01 3.127240094205691e+02 3 3.805235327384960e+02 -2.955852199231463e+02 2.395269588958384e+02 7.373784162959287e+00 4 7.023483413907342e+02 5.712494012450838e+02 -2.539777879547846e+02 -3.200977935835284e+02 - ME 5.629434448779270e-04 + ME 1.297520069620947e-03 Event 81 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2700,7 +2700,7 @@ Event 81 Batch 1 2 7.471091333863935e+02 -9.753029041192970e+01 7.407154559164039e+02 -7.162458282065091e-01 3 6.775352561453885e+02 9.550863422814814e+01 -6.702673865908516e+02 -2.595678293896889e+01 4 7.535561046821789e+01 2.021656183781575e+00 -7.044806932555213e+01 2.667302876717550e+01 - ME 2.904529061551848e-05 + ME 1.022399816924924e-04 Event 82 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2708,7 +2708,7 @@ Event 82 Batch 1 2 4.309094465924175e+02 3.042233433179616e+02 2.799835808203350e+02 -1.214096495919827e+02 3 5.540384887187945e+02 -4.824447657759213e+02 1.988969596446625e+02 1.861335391629672e+02 4 5.150520646887885e+02 1.782214224579596e+02 -4.788805404649973e+02 -6.472388957098450e+01 - ME 1.778678120024833e-05 + ME 1.053635072607165e-04 Event 83 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2716,7 +2716,7 @@ Event 83 Batch 1 2 4.869534474909295e+02 -4.727010820510885e+02 1.062322962656182e+02 4.890855018466118e+01 3 3.520990385354405e+02 -1.437544586613779e+02 -3.142298368411062e+02 6.758696761482639e+01 4 6.609475139736298e+02 6.164555407124665e+02 2.079975405754878e+02 -1.164955177994876e+02 - ME 7.948516811691567e-05 + ME 2.998516055200512e-04 Event 84 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2724,7 +2724,7 @@ Event 84 Batch 1 2 1.391975815431583e+01 -3.682657486111166e-01 -1.138840508663312e+01 -7.995516055627093e+00 3 7.493632094786751e+02 -3.452281541586202e+01 3.833012084573049e+02 6.429880080772211e+02 4 7.367170323670085e+02 3.489108116447313e+01 -3.719128033706718e+02 -6.349924920215940e+02 - ME 8.671177508029917e-02 + ME 3.806217512266510e-01 Event 85 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2732,7 +2732,7 @@ Event 85 Batch 1 2 7.362448947738020e+02 6.409220704967113e+02 3.243429451315054e+02 1.614840505254833e+02 3 1.517836214454495e+02 -1.266859291808411e+02 -6.780846852200752e+01 4.889738933094901e+01 4 6.119714837807480e+02 -5.142361413158706e+02 -2.565344766094980e+02 -2.103814398564324e+02 - ME 1.062305495679385e-04 + ME 5.694785892689211e-04 Event 86 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2740,7 +2740,7 @@ Event 86 Batch 1 2 5.451728369778392e+02 -6.605005893803180e+01 1.066920544886257e+02 -5.305352178712969e+02 3 3.158718592284829e+02 -1.755596039144849e+02 2.550395858012225e+02 6.251932981237656e+01 4 6.389553037936773e+02 2.416096628525165e+02 -3.617316402898481e+02 4.680158880589203e+02 - ME 4.057626974930324e-05 + ME 1.469986179099727e-04 Event 87 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2748,7 +2748,7 @@ Event 87 Batch 1 2 3.414211232216659e+02 1.437256906952883e+02 1.534640422371205e+02 -2.689983214749668e+02 3 5.081668091119999e+02 4.794742948200324e+02 -1.464748766741243e+02 8.296394996143997e+01 4 6.504120676663341e+02 -6.231999855153207e+02 -6.989165562996117e+00 1.860343715135268e+02 - ME 3.656584417835253e-05 + ME 1.823135893899652e-04 Event 88 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2756,7 +2756,7 @@ Event 88 Batch 1 2 2.925516585730864e+02 1.655911293372511e+01 2.598275245766865e+02 -1.334238591297045e+02 3 7.159840369510271e+02 -1.056844973272874e+02 -3.694097043713192e+02 6.041526284885822e+02 4 4.914643044758866e+02 8.912538439356234e+01 1.095821797946327e+02 -4.707287693588777e+02 - ME 2.327745727475104e-03 + ME 8.728488941697977e-02 Event 89 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2764,7 +2764,7 @@ Event 89 Batch 1 2 6.333634651097186e+02 1.209853522660007e+02 5.372166546881791e+02 -3.129058794565919e+02 3 6.221307427802806e+02 5.757192259699385e+01 -4.327483989541182e+02 4.432391657372765e+02 4 2.445057921100010e+02 -1.785572748629945e+02 -1.044682557340609e+02 -1.303332862806847e+02 - ME 5.047204144927262e-05 + ME 5.497507832908574e-04 Event 90 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2772,7 +2772,7 @@ Event 90 Batch 1 2 3.111538587406461e+02 2.628215106651484e+02 -6.985334981761831e+01 -1.512021390726355e+02 3 5.216486323898988e+02 1.252715366480781e+02 4.457714554600226e+02 -2.402335265468457e+02 4 6.671975088694549e+02 -3.880930473132266e+02 -3.759181056424042e+02 3.914356656194811e+02 - ME 4.503542584588689e-05 + ME 2.329075524537458e-04 Event 91 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2780,7 +2780,7 @@ Event 91 Batch 1 2 3.007803348469016e+02 8.390513937949677e+01 2.884042062049404e+02 -1.586667134655829e+01 3 6.256884422056424e+02 2.364580673743878e+02 -3.590826126759745e+02 -4.545693416378727e+02 4 5.735312229474563e+02 -3.203632067538847e+02 7.067840647103421e+01 4.704360129844310e+02 - ME 2.635583378174906e-05 + ME 6.478111274774788e-05 Event 92 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2788,7 +2788,7 @@ Event 92 Batch 1 2 6.843865618656529e+02 -2.264962467301474e+02 -5.909185329480341e+02 2.605757158639088e+02 3 6.645516272550811e+02 3.453347116263074e+02 4.983670680340538e+02 -2.720350487207341e+02 4 1.510618108792659e+02 -1.188384648961601e+02 9.255146491398015e+01 1.145933285682523e+01 - ME 1.711437740567050e-05 + ME 9.365402433981294e-05 Event 93 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2796,7 +2796,7 @@ Event 93 Batch 1 2 5.579763469381434e+02 2.180908585044468e+02 5.135246110359701e+02 8.151996049100932e+00 3 3.333821836060117e+02 1.681122988324202e+02 -1.261705574188212e+02 2.587719570738210e+02 4 6.086414694558448e+02 -3.862031573368670e+02 -3.873540536171486e+02 -2.669239531229223e+02 - ME 1.157787815150910e-04 + ME 5.183695239236329e-04 Event 94 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2804,7 +2804,7 @@ Event 94 Batch 1 2 4.534979734151987e+02 1.139662723650677e+02 2.686183171543304e+01 4.381216071501101e+02 3 3.856184698299744e+02 1.545134372854228e+02 -3.452526490806396e+02 7.501873282757614e+01 4 6.608835567548277e+02 -2.684797096504910e+02 3.183908173652065e+02 -5.131403399776862e+02 - ME 1.545010233607317e-03 + ME 6.944325623628402e-03 Event 95 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2812,7 +2812,7 @@ Event 95 Batch 1 2 2.828073115974175e+02 -5.711637476392460e+01 5.915078172645698e+01 -2.705898746219725e+02 3 6.809618671276158e+02 3.772100991821226e+02 3.247893528880094e+02 4.646864338535512e+02 4 5.362308212749670e+02 -3.200937244181981e+02 -3.839401346144663e+02 -1.940965592315787e+02 - ME 6.408796328924562e-05 + ME 2.560512106670314e-04 Event 96 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2820,7 +2820,7 @@ Event 96 Batch 1 2 4.639832102051440e+02 -4.275497908582962e+02 -1.317248975374901e+02 -1.230046627491649e+02 3 7.474114851375481e+02 6.594176555428718e+02 2.654537688070380e+02 2.309254864669502e+02 4 2.886053046573076e+02 -2.318678646845757e+02 -1.337288712695479e+02 -1.079208237177853e+02 - ME 1.445191791082226e-05 + ME 2.440162169445852e-04 Event 97 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2828,7 +2828,7 @@ Event 97 Batch 1 2 5.095921959312568e+02 3.190102848863560e+02 3.100341192456060e+02 2.485869851668986e+02 3 4.555541331018014e+02 -2.788120391899956e+02 2.221549471930723e+02 -2.836205112936887e+02 4 5.348536709669415e+02 -4.019824569636059e+01 -5.321890664386783e+02 3.503352612679014e+01 - ME 2.250661525403011e-05 + ME 8.198891770965733e-05 Event 98 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2836,7 +2836,7 @@ Event 98 Batch 1 2 5.299941952467790e+02 -2.570048161992350e+02 -4.630296380940593e+02 -2.111695271961878e+01 3 7.352146396921255e+02 2.361229278157243e+02 6.962552486063584e+02 3.893348873424185e+00 4 2.347911650610957e+02 2.088188838351074e+01 -2.332256105122990e+02 1.722360384619465e+01 - ME 5.654417419793765e-06 + ME 6.760444392591968e-05 Event 99 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2844,7 +2844,7 @@ Event 99 Batch 1 2 4.290897291078425e+02 3.747236205606835e+02 2.040795775432686e+02 -4.529602465443949e+01 3 6.438744429739487e+02 -5.215755139094103e+02 2.133414139578182e+01 3.769325350988583e+02 4 4.270358279182090e+02 1.468518933487271e+02 -2.254137189390505e+02 -3.316365104444187e+02 - ME 8.457850707842401e-05 + ME 2.024851967866169e-03 Event 100 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2852,7 +2852,7 @@ Event 100 Batch 1 2 5.119062275524872e+02 -4.721600394809319e+02 -1.845880136125884e+02 7.099400083769524e+01 3 4.523854579707449e+02 2.836789572262426e+02 -3.060214184981774e+02 -1.747276258374610e+02 4 5.357083144767672e+02 1.884810822546894e+02 4.906094321107658e+02 1.037336249997658e+02 - ME 1.420495101373495e-05 + ME 6.898305006855298e-05 Event 101 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2860,7 +2860,7 @@ Event 101 Batch 1 2 6.024072815192737e+02 -3.080418730730875e+02 -4.692284526425155e+02 2.186993289696520e+02 3 3.347434020484399e+02 8.940653726951260e+01 -3.939923552329941e+01 -3.201676381969582e+02 4 5.628493164322859e+02 2.186353358035749e+02 5.086276881658150e+02 1.014683092273061e+02 - ME 2.743452031293993e-05 + ME 9.290725627447436e-05 Event 102 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2868,7 +2868,7 @@ Event 102 Batch 1 2 5.910857738801296e+02 3.707548039128416e+02 -7.516477307090547e+01 -4.541734518311494e+02 3 2.311218706704979e+02 4.536804143672514e+01 -2.262982016400413e+02 1.217307902336991e+01 4 6.777923554493723e+02 -4.161228453495667e+02 3.014629747109467e+02 4.420003728077793e+02 - ME 7.158169676479796e-05 + ME 2.633339755449651e-04 Event 103 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2876,7 +2876,7 @@ Event 103 Batch 1 2 6.627949406417042e+02 7.189602123685950e+01 -6.391860825813610e+02 -1.599038689489492e+02 3 5.519979886399102e+02 1.442810582977179e+02 4.734454174874869e+02 2.444057944057306e+02 4 2.852070707183856e+02 -2.161770795345774e+02 1.657406650938741e+02 -8.450192545678139e+01 - ME 1.658567428345252e-05 + ME 1.652798222861839e-04 Event 104 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2884,7 +2884,7 @@ Event 104 Batch 1 2 4.368180791462563e+02 -3.483499330357901e+02 -2.596280064690262e+02 4.533935023690698e+01 3 4.635715977792429e+02 1.873023362819025e+02 -2.251347602994603e+02 -3.593477435519053e+02 4 5.996103230745010e+02 1.610475967538876e+02 4.847627667684865e+02 3.140083933149983e+02 - ME 2.162124469235967e-05 + ME 9.158171748371188e-05 Event 105 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2892,7 +2892,7 @@ Event 105 Batch 1 2 5.701708357490469e+02 2.288495716262106e+02 -4.521314661478370e+02 -2.613422905391967e+02 3 3.711008490497917e+02 -3.362590561223710e+02 -8.126001400906793e+01 1.343223639771668e+02 4 5.587283152011612e+02 1.074094844961603e+02 5.333914801569049e+02 1.270199265620299e+02 - ME 1.720246557093887e-05 + ME 7.043372303967046e-05 Event 106 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2900,7 +2900,7 @@ Event 106 Batch 1 2 6.775588183099673e+02 5.149765831731705e+02 3.445381345095063e+02 -2.741870619150275e+02 3 7.044100837534635e+02 -4.546975847980706e+02 -4.392260662935809e+02 3.106833358270535e+02 4 1.180310979365712e+02 -6.027899837509908e+01 9.468793178407486e+01 -3.649627391202603e+01 - ME 2.786544600802367e-05 + ME 3.259673897057837e-04 Event 107 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2908,7 +2908,7 @@ Event 107 Batch 1 2 6.046880513041550e+02 2.289413119004024e+02 -5.349774474143721e+02 -1.644160754103499e+02 3 3.366746442316215e+02 -7.166101576320902e+01 2.452245434825371e+01 3.280444544890399e+02 4 5.586373044642238e+02 -1.572802961371935e+02 5.104549930661184e+02 -1.636283790786902e+02 - ME 4.667002706670146e-04 + ME 8.859556065170558e-04 Event 108 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2916,7 +2916,7 @@ Event 108 Batch 1 2 6.239206451413978e+02 -2.218030564243363e+02 5.011455197099735e+02 -2.982172759400455e+02 3 2.841199272340513e+02 1.209406641294798e+02 7.967327320293104e+01 2.444374323800143e+02 4 5.919594276245514e+02 1.008623922948564e+02 -5.808187929129044e+02 5.377984356003120e+01 - ME 7.961277501126149e-05 + ME 1.727643234936365e-04 Event 109 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2924,7 +2924,7 @@ Event 109 Batch 1 2 3.093404598873124e+02 1.546999830656544e+02 1.629193992247174e+02 2.126421988200774e+02 3 5.287372542258961e+02 -2.136116696975048e+02 -1.865832176193536e+02 4.462284633214169e+02 4 6.619222858867909e+02 5.891168663185049e+01 2.366381839463621e+01 -6.588706621414941e+02 - ME 2.902408960420708e-01 + ME 1.686695657867669e+01 Event 110 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2932,7 +2932,7 @@ Event 110 Batch 1 2 4.920948406187608e+02 -8.595212543403569e+01 -4.824913009925944e+02 -4.440392734262522e+01 3 4.634042325716594e+02 -2.085760624772916e+00 1.255608851371819e+02 4.460645653843308e+02 4 5.445009268095798e+02 8.803788605880843e+01 3.569304158554124e+02 -4.016606380417056e+02 - ME 1.043536440561108e-03 + ME 4.151412887207382e-03 Event 111 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2940,7 +2940,7 @@ Event 111 Batch 1 2 4.637454700443120e+02 1.543048221589588e+02 -4.372769385391800e+02 6.225902899506631e+00 3 3.246747011850293e+02 -5.128652792678845e+01 -2.274142471268230e+02 2.259781269206006e+02 4 7.115798287706589e+02 -1.030182942321705e+02 6.646911856660031e+02 -2.322040298201072e+02 - ME 5.219332617201280e-04 + ME 1.240833065187375e-03 Event 112 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2948,7 +2948,7 @@ Event 112 Batch 1 2 6.923761777814550e+02 3.939190124845535e+02 4.398224952082178e+01 -5.676954684419625e+02 3 5.277418353503033e+02 -4.270527740856185e+02 4.970714905179168e+01 3.060499505927539e+02 4 2.798819868682421e+02 3.313376160106501e+01 -9.368939857261346e+01 2.616455178492087e+02 - ME 4.381536575941429e-05 + ME 5.385735959435035e-05 Event 113 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2956,7 +2956,7 @@ Event 113 Batch 1 2 7.174898838850694e+02 -6.130145063482008e+02 3.726797356942233e+02 1.071275347265524e+01 3 1.705115822510491e+02 3.993583199494100e+01 -1.624320619120163e+02 3.309311510932528e+01 4 6.119985338638814e+02 5.730786743532599e+02 -2.102476737822071e+02 -4.380586858198049e+01 - ME 4.914674319256647e-05 + ME 2.197559713387976e-04 Event 114 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2964,7 +2964,7 @@ Event 114 Batch 1 2 6.772826088252357e+02 -1.430288042596954e+02 -3.410390118171982e+02 5.674036356844296e+02 3 6.725037798358682e+02 3.626161999767239e+01 2.510744134018114e+02 -6.228226615527174e+02 4 1.502136113388951e+02 1.067671842620232e+02 8.996459841538707e+01 5.541902586828807e+01 - ME 7.986648389935193e-05 + ME 8.926156406775035e-05 Event 115 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2972,7 +2972,7 @@ Event 115 Batch 1 2 9.320551230331124e+01 1.288474310894606e+01 -2.581623869377880e+01 8.862715576190526e+01 3 6.672654287607164e+02 1.525114284892182e+02 2.829200767588875e+02 5.847560574856374e+02 4 7.395290589359720e+02 -1.653961715981643e+02 -2.571038380651088e+02 -6.733832132475428e+02 - ME 4.304938165075599e-01 + ME 1.800237703627863e+00 Event 116 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2980,7 +2980,7 @@ Event 116 Batch 1 2 4.951202926530015e+02 -4.575339943514647e+02 4.220102313368785e+01 1.844608951947751e+02 3 3.101750696753587e+02 -4.711582585559527e+01 2.172188132736168e+02 2.163438466008694e+02 4 6.947046376716394e+02 5.046498202070600e+02 -2.594198364073050e+02 -4.008047417956444e+02 - ME 5.988625984136040e-04 + ME 1.933367100533606e-03 Event 117 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2988,7 +2988,7 @@ Event 117 Batch 1 2 6.543248494478489e+02 1.390926466871539e+02 9.107024539473488e+01 6.328510524967589e+02 3 5.040443237953712e+02 6.874740772121054e+01 1.336336536624387e+02 -4.811200690999848e+02 4 3.416308267567792e+02 -2.078400544083643e+02 -2.247038990571737e+02 -1.517309833967742e+02 - ME 3.026560085299302e-04 + ME 4.207453923038474e-04 Event 118 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2996,7 +2996,7 @@ Event 118 Batch 1 2 5.829230400014206e+02 5.307803371482089e+02 -3.192285892796672e+01 2.388565162167381e+02 3 3.965113090906140e+02 -5.470249758902820e+01 2.256187790844517e+02 -3.214420966810604e+02 4 5.205656509079653e+02 -4.760778395591807e+02 -1.936959201564850e+02 8.258558046432242e+01 - ME 2.168340782914014e-05 + ME 7.464562943747175e-05 Event 119 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3004,7 +3004,7 @@ Event 119 Batch 1 2 3.549567073991255e+02 2.281637891139605e+02 1.474502150787006e+02 2.284600261271838e+02 3 4.727085372220640e+02 7.463684946128350e+01 -3.092948822053327e+02 3.495988811576870e+02 4 6.723347553788102e+02 -3.028006385752440e+02 1.618446671266322e+02 -5.780589072848707e+02 - ME 1.664672733965846e-03 + ME 1.455012849105755e-02 Event 120 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3012,7 +3012,7 @@ Event 120 Batch 1 2 7.192117275853698e+02 4.094232477570927e+02 -5.552624156333899e+02 -2.032775518283800e+02 3 3.685061529232585e+02 -2.522084621786424e+02 1.741347663658646e+02 2.046087962197375e+02 4 4.122821194913712e+02 -1.572147855784500e+02 3.811276492675253e+02 -1.331244391357209e+00 - ME 1.900262756274459e-05 + ME 9.281995463485567e-05 Event 121 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3020,7 +3020,7 @@ Event 121 Batch 1 2 1.923953846467517e+02 -5.182078839520096e+01 -1.486351786617837e+02 -1.106262789198433e+02 3 6.582127150877787e+02 -3.509182841037630e+02 -1.191939510078701e+02 5.439606035624541e+02 4 6.493919002654695e+02 4.027390724989639e+02 2.678291296696539e+02 -4.333343246426108e+02 - ME 5.360055113881300e-04 + ME 1.925188892577692e-03 Event 122 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3028,7 +3028,7 @@ Event 122 Batch 1 2 6.905732817636248e+02 3.462508192534570e+02 -5.375670569609784e+02 -2.608131264380775e+02 3 7.097575386120018e+02 -2.677396278645660e+02 5.849221766424142e+02 2.998954860604125e+02 4 9.966917962437387e+01 -7.851119138889094e+01 -4.735511968143584e+01 -3.908235962233509e+01 - ME 3.451011759976180e-05 + ME 5.007312135859238e-04 Event 123 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3036,7 +3036,7 @@ Event 123 Batch 1 2 4.035126033432560e+02 2.481103298242076e+01 -3.878573016343356e+02 -1.085059780294573e+02 3 3.541388771651666e+02 1.572344474048876e+02 -3.105653677404273e+02 -6.512161875550808e+01 4 7.423485194915780e+02 -1.820454803873083e+02 6.984226693747627e+02 1.736275967849660e+02 - ME 3.471230489499830e-03 + ME 2.043564129780385e-02 Event 124 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3044,7 +3044,7 @@ Event 124 Batch 1 2 5.353042728143347e+02 -4.785252055946481e+02 -2.279396245170433e+02 7.488537693644093e+01 3 7.454081943698113e+02 6.785307544150930e+02 3.069354144183444e+02 -3.193811081429426e+01 4 2.192875328158541e+02 -2.000055488204448e+02 -7.899578990130104e+01 -4.294726612214667e+01 - ME 6.765427234678898e-06 + ME 1.399009675490331e-04 Event 125 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3052,7 +3052,7 @@ Event 125 Batch 1 2 7.351681880566981e+02 -1.932492970253984e+01 -4.393064933429818e+02 -5.891592456452273e+02 3 6.537497908129355e+02 -2.883189353576726e+01 3.454898907503182e+02 5.542510679217788e+02 4 1.110820211303664e+02 4.815682323830688e+01 9.381660259266363e+01 3.490817772344844e+01 - ME 6.639428548470109e-05 + ME 1.431077255619906e-04 Event 126 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3060,7 +3060,7 @@ Event 126 Batch 1 2 5.568747108147126e+02 1.149185667256990e+02 4.264979152236775e+02 -3.391204725116689e+02 3 6.934211462641822e+02 -1.939160042589616e+02 -6.294239612595663e+02 2.169215212257340e+02 4 2.497041429211053e+02 7.899743753326281e+01 2.029260460358889e+02 1.221989512859350e+02 - ME 9.143592130512915e-06 + ME 3.344185566612618e-05 Event 127 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3068,7 +3068,7 @@ Event 127 Batch 1 2 7.108931196972316e+02 4.270547743949553e+02 5.664613189451065e+02 -4.598718776252147e+01 3 4.445675167124290e+02 -1.247884466860518e+02 -4.129475031266345e+02 1.074359351009545e+02 4 3.445393635903407e+02 -3.022663277089035e+02 -1.535138158184720e+02 -6.144874733843321e+01 - ME 1.427738327825488e-05 + ME 1.180920695556687e-04 Event 128 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3076,7 +3076,7 @@ Event 128 Batch 1 2 5.312407894292422e+02 -7.192118124205533e+01 -4.398126160332176e+02 -2.891521793453568e+02 3 5.717192413787027e+02 3.434745903572437e+02 1.811915566412192e+02 4.195923218357252e+02 4 3.970399691920551e+02 -2.715534091151883e+02 2.586210593919984e+02 -1.304401424903685e+02 - ME 3.532660248239223e-05 + ME 1.848006274423395e-04 Event 129 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3084,7 +3084,7 @@ Event 129 Batch 1 2 6.644129951428383e+02 -3.595672586482287e+02 4.645590915434784e+02 3.103882489514914e+02 3 1.967652372382455e+02 -5.204943416929049e+01 8.794498000645085e+00 -1.895522930301724e+02 4 6.388217676189169e+02 4.116166928175192e+02 -4.733535895441232e+02 -1.208359559213191e+02 - ME 9.192558188476414e-05 + ME 3.082956717278722e-04 Event 130 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3092,7 +3092,7 @@ Event 130 Batch 1 2 7.302263990443511e+02 -1.919590472356484e+02 3.836584700935805e+02 -5.909217345563752e+02 3 4.156541164903923e+02 2.203243106780774e+02 -1.767969453775071e+02 3.049071707664833e+02 4 3.541194844652567e+02 -2.836526344242890e+01 -2.068615247160734e+02 2.860145637898919e+02 - ME 2.258971422042701e-05 + ME 3.110012368642411e-05 Event 131 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3100,7 +3100,7 @@ Event 131 Batch 1 2 2.308323688168238e+02 -1.780469473698228e+02 1.469011263880862e+02 1.710582294195638e+00 3 7.308075033948297e+02 5.219262643529272e+02 -3.840435213624620e+02 3.379099810545737e+02 4 5.383601277883465e+02 -3.438793169831044e+02 2.371423949743758e+02 -3.396205633487694e+02 - ME 7.770640764079256e-05 + ME 1.061667055612532e-03 Event 132 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3108,7 +3108,7 @@ Event 132 Batch 1 2 5.909630762789660e+02 -4.293852116769707e+02 -3.988922148105424e+02 7.583335995300355e+01 3 5.415993952096327e+02 2.260703809971038e+02 3.221145619770360e+02 -3.721079100067703e+02 4 3.674375285114020e+02 2.033148306798666e+02 7.677765283350686e+01 2.962745500537670e+02 - ME 1.628447412544396e-05 + ME 3.321676569401813e-05 Event 133 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3116,7 +3116,7 @@ Event 133 Batch 1 2 4.506052863582997e+02 2.189991325227701e+02 -3.914006430783634e+02 -4.347459771134355e+01 3 4.043998006859111e+02 3.160348074769272e+02 8.738893432792010e+01 2.366946839598570e+02 4 6.449949129557901e+02 -5.350339399996973e+02 3.040117087504433e+02 -1.932200862485142e+02 - ME 8.705579101282482e-05 + ME 3.121497332919934e-04 Event 134 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3124,7 +3124,7 @@ Event 134 Batch 1 2 7.151470882937614e+02 -1.041377497037516e+01 -4.186394096729767e+01 7.138447461686595e+02 3 3.416424731356660e+02 1.638631808685801e+02 3.081581136487586e+01 -2.981925940995343e+02 4 4.432104385705719e+02 -1.534494058982047e+02 1.104812960242199e+01 -4.156521520691248e+02 - ME 6.342792451335309e-03 + ME 5.534325530265236e-02 Event 135 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3132,7 +3132,7 @@ Event 135 Batch 1 2 7.115730144432832e+02 -3.219296530898238e+02 2.184242454110169e+02 -5.958089478700319e+02 3 1.627059459894212e+02 -6.880794311551747e+01 -3.259803939022061e+01 1.437917231708342e+02 4 6.257210395672955e+02 3.907375962053413e+02 -1.858262060207963e+02 4.520172246991979e+02 - ME 1.277979532321233e-04 + ME 2.112989182930814e-04 Event 136 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3140,7 +3140,7 @@ Event 136 Batch 1 2 7.195404287114588e+02 -4.369992732083461e+02 -4.270318019286997e+02 3.800182941743402e+02 3 6.668605996318223e+02 3.634158794560479e+02 4.690430049045651e+02 -3.043527845290675e+02 4 1.135989716567186e+02 7.358339375229815e+01 -4.201120297586535e+01 -7.566550964527264e+01 - ME 7.515399240093053e-05 + ME 1.804344388349211e-03 Event 137 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3148,7 +3148,7 @@ Event 137 Batch 1 2 6.722782806744999e+02 -6.045581260407005e+02 -2.538460778300668e+02 1.484241478840623e+02 3 6.869263774705689e+02 6.661257235671316e+02 1.481819739565761e+02 -7.865412297735662e+01 4 1.407953418549304e+02 -6.156759752643097e+01 1.056641038734908e+02 -6.977002490670534e+01 - ME 2.119149330726453e-05 + ME 5.192812231664224e-04 Event 138 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3156,7 +3156,7 @@ Event 138 Batch 1 2 6.463287544295633e+02 8.684709774942756e+01 2.409249839962013e+02 -5.934253049048401e+02 3 3.917330799270068e+02 1.767690441671677e+02 4.696120064017492e+01 3.464132742372293e+02 4 4.619381656434300e+02 -2.636161419165952e+02 -2.878861846363762e+02 2.470120306676108e+02 - ME 4.203806696206548e-05 + ME 5.804753959762886e-05 Event 139 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3164,7 +3164,7 @@ Event 139 Batch 1 2 2.994802063237944e+02 -1.272876183039153e+02 6.552211336810879e+00 2.710042891410713e+02 3 7.257546970836092e+02 -8.848613612326799e+00 5.127896146768584e+00 -7.256826352181574e+02 4 4.747650965925943e+02 1.361362319162416e+02 -1.168010748357900e+01 4.546783460770868e+02 - ME 1.500396153249019e-04 + ME 1.724196014694060e-04 Event 140 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3172,7 +3172,7 @@ Event 140 Batch 1 2 7.326756101999780e+02 5.655005379385240e+02 4.343799907428446e+02 1.683351270988810e+02 3 7.428339005597779e+02 -5.680473426214219e+02 -4.534832054058505e+02 -1.532233754243464e+02 4 2.449048924024402e+01 2.546804682897962e+00 1.910321466300584e+01 -1.511175167453447e+01 - ME 1.024603362434272e-04 + ME 4.669436438173466e-03 Event 141 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3180,7 +3180,7 @@ Event 141 Batch 1 2 7.363238871411332e+02 -6.772722174663238e+02 -2.824373475598683e+02 -6.086341204880675e+01 3 5.504260535970963e+02 4.650298533191528e+02 2.914345410616540e+02 4.221355560271704e+01 4 2.132500592617708e+02 2.122423641471711e+02 -8.997193501785816e+00 1.864985644608987e+01 - ME 1.166401869382226e-05 + ME 7.300791864660033e-05 Event 142 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3188,7 +3188,7 @@ Event 142 Batch 1 2 5.862280565156834e+02 4.248793793115829e+01 -2.479279504752411e+02 -5.295184989682986e+02 3 4.287264749982929e+02 -3.025296967755320e+02 2.785471849307642e+02 1.212173201341831e+02 4 4.850454684860405e+02 2.600417588443628e+02 -3.061923445551928e+01 4.083011788341197e+02 - ME 1.949810022878841e-05 + ME 4.569028399965169e-05 Event 143 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3196,7 +3196,7 @@ Event 143 Batch 1 2 2.464531733710510e+02 4.046044690030688e+01 -2.103865804466287e+02 1.218179201483223e+02 3 5.378449948854583e+02 4.607829603950880e+02 -2.747641700963839e+02 3.822241180409925e+01 4 7.157018317434903e+02 -5.012434072953949e+02 4.851507505430126e+02 -1.600403319524219e+02 - ME 4.863434295951330e-04 + ME 1.284493741497843e-03 Event 144 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3204,7 +3204,7 @@ Event 144 Batch 1 2 5.367418008803521e+02 -1.343004856786532e+02 -4.048537736989352e+02 -3.258044847458254e+02 3 6.294877130859599e+02 3.313530054622211e+02 5.282137272543231e+02 8.631468610520756e+01 4 3.337704860336884e+02 -1.970525197835678e+02 -1.233599535553879e+02 2.394897986406179e+02 - ME 8.754930746282009e-06 + ME 2.612855607885159e-05 Event 145 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3212,7 +3212,7 @@ Event 145 Batch 1 2 6.805380148481771e+01 -3.411514819754512e+01 -4.339750646760406e+01 -3.980116822894492e+01 3 6.831461500979880e+02 -3.834019790669201e+02 -2.756424954453614e+02 -4.936727656514237e+02 4 7.488000484171945e+02 4.175171272644653e+02 3.190400019129655e+02 5.334739338803686e+02 - ME 4.117012994651258e-01 + ME 4.832444287218038e-01 Event 146 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3220,7 +3220,7 @@ Event 146 Batch 1 2 5.031746658797123e+02 4.202301876294930e+02 2.767377273314875e+02 2.750283520766640e+00 3 4.317115817339341e+02 -1.098088257924671e+02 -5.455162180567243e+01 4.139336083717602e+02 4 5.651137523863538e+02 -3.104213618370259e+02 -2.221861055258150e+02 -4.166838918925268e+02 - ME 1.122040831263755e-03 + ME 4.446377084117306e-03 Event 147 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3228,7 +3228,7 @@ Event 147 Batch 1 2 4.251223043705630e+02 -4.223502783198938e+02 -4.694338569631599e+01 1.206377286808446e+01 3 5.457819748703678e+02 2.791608945230574e+02 -4.384138579515959e+02 -1.665546403390879e+02 4 5.290957207590696e+02 1.431893837968364e+02 4.853572436479118e+02 1.544908674710035e+02 - ME 1.117959404473985e-05 + ME 5.820013407126093e-05 Event 148 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3236,7 +3236,7 @@ Event 148 Batch 1 2 6.905785821272525e+02 6.249608768654489e+02 -6.243387159972350e+01 -2.870970082698929e+02 3 1.361638260920089e+02 2.862044352088506e+01 1.704210379179796e+01 1.320266050727362e+02 4 6.732575917807402e+02 -6.535813203863343e+02 4.539176780792534e+01 1.550704031971573e+02 - ME 5.047601105033982e-04 + ME 9.573948308169230e-04 Event 149 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3244,7 +3244,7 @@ Event 149 Batch 1 2 6.694705528096943e+02 -5.216497821741067e+02 -3.785079074709545e+02 1.811189935345937e+02 3 2.821401257551277e+02 1.148500354702071e-01 2.786662494166578e+02 -4.413795199872407e+01 4 5.483893214351779e+02 5.215349321386365e+02 9.984165805429673e+01 -1.369810415358697e+02 - ME 3.486097449584098e-05 + ME 1.943324414096923e-04 Event 150 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3252,7 +3252,7 @@ Event 150 Batch 1 2 4.637486188995366e+02 -4.033412855298819e+02 -2.279949807412008e+02 -1.992178895453991e+01 3 3.756800751656199e+02 6.230662615514293e+01 -2.632310737913946e+02 -2.606967683041707e+02 4 6.605713059348438e+02 3.410346593747391e+02 4.912260545325952e+02 2.806185572587107e+02 - ME 4.211370643652993e-05 + ME 2.156945366470290e-04 Event 151 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3260,7 +3260,7 @@ Event 151 Batch 1 2 3.821954355913596e+02 -2.528320044280690e+02 2.861764538722267e+02 1.588602445142563e+01 3 6.796189325418250e+02 2.911670128135291e+02 -4.900375979142738e+02 3.700902818893582e+02 4 4.381856318668152e+02 -3.833500838546018e+01 2.038611440420471e+02 -3.859763063407838e+02 - ME 1.923941526207248e-04 + ME 8.197229841786387e-03 Event 152 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3268,7 +3268,7 @@ Event 152 Batch 1 2 6.751133298339792e+02 -2.999578895043981e+02 -2.855974213275218e+02 -5.331391803034741e+02 3 4.976977783498468e+02 -3.003988119418482e+00 1.843802943840355e+02 4.622747685874795e+02 4 3.271888918161745e+02 3.029618776238166e+02 1.012171269434863e+02 7.086441171599445e+01 - ME 6.977738125195056e-05 + ME 1.204579535049519e-04 Event 153 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3276,7 +3276,7 @@ Event 153 Batch 1 2 1.729293620257127e+02 1.558357805102956e+02 -7.193392860849491e+01 2.110174585940510e+01 3 6.524550819255464e+02 2.410158908712478e+02 5.786677971610501e+02 1.809766692333240e+02 4 6.746155560487412e+02 -3.968516713815435e+02 -5.067338685525552e+02 -2.020784150927291e+02 - ME 1.391654510317005e-04 + ME 5.985591428637023e-04 Event 154 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3284,7 +3284,7 @@ Event 154 Batch 1 2 6.585658455851002e+02 -2.410305357139302e+02 -2.116446673272157e+02 -5.751693564652295e+02 3 5.764400833248005e+02 3.388133979948972e+02 3.092747322371399e+02 3.490527051926400e+02 4 2.649940710900988e+02 -9.778286228096688e+01 -9.763006490992416e+01 2.261166512725894e+02 - ME 2.686434432328395e-05 + ME 3.655181799213059e-05 Event 155 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3292,7 +3292,7 @@ Event 155 Batch 1 2 5.686586231936359e+02 -1.693366246265498e+02 -1.542203680657918e+02 5.204938187588979e+02 3 1.882190564276536e+02 -1.089234770645493e+02 -9.145416397064866e+01 1.232810822434430e+02 4 7.431223203787102e+02 2.782601016910992e+02 2.456745320364404e+02 -6.437749010023409e+02 - ME 4.701119881405690e-01 + ME 6.696396361607482e-01 Event 156 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3300,7 +3300,7 @@ Event 156 Batch 1 2 6.143652095725128e+02 2.879464601546110e+02 5.379391909976823e+02 -7.178351904348040e+01 3 6.287751645293085e+02 -4.584164185734781e+02 -4.225140875260598e+02 -8.181956094447702e+01 4 2.568596258981782e+02 1.704699584188668e+02 -1.154251034716223e+02 1.536030799879581e+02 - ME 7.769660148731367e-06 + ME 2.899571701789112e-05 Event 157 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3308,7 +3308,7 @@ Event 157 Batch 1 2 5.050842109798973e+02 4.185498850973046e+02 -1.305174306570672e+02 -2.507812875014723e+02 3 5.170424494038050e+02 -3.084595065654854e+02 3.930456446728388e+02 -1.330441599566699e+02 4 4.778733396162975e+02 -1.100903785318191e+02 -2.625282140157716e+02 3.838254474581424e+02 - ME 1.243977993100618e-05 + ME 4.033251359625283e-05 Event 158 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3316,7 +3316,7 @@ Event 158 Batch 1 2 4.312542366204098e+02 -3.114503370626313e+02 2.737030704635235e+02 1.185982013584742e+02 3 6.944315393047829e+02 2.166643175309468e+02 -6.173965008138002e+02 -2.326226495269423e+02 4 3.743142240748070e+02 9.478601953168439e+01 3.436934303502764e+02 1.140244481684682e+02 - ME 5.864250821924803e-06 + ME 3.680357310121394e-05 Event 159 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3324,7 +3324,7 @@ Event 159 Batch 1 2 5.860112473308646e+02 -1.581297551692178e+02 4.935632758462007e+02 2.734948907463652e+02 3 3.772013313646349e+02 -2.371132827856262e+02 -1.305099443644436e+02 -2.627266448837395e+02 4 5.367874213045002e+02 3.952430379548442e+02 -3.630533314817573e+02 -1.076824586262577e+01 - ME 2.805189658646002e-05 + ME 1.030382455754272e-04 Event 160 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3332,7 +3332,7 @@ Event 160 Batch 1 2 5.883409724804535e+02 -3.739819298758817e+02 -2.887651121595530e+02 3.505671490956299e+02 3 4.300332553173178e+02 1.788055146224819e+02 3.829208006453583e+02 7.955406370837679e+01 4 4.816257722022287e+02 1.951764152533999e+02 -9.415568848580530e+01 -4.301212128040066e+02 - ME 2.307516153071828e-04 + ME 9.797271586219467e-03 Event 161 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3340,7 +3340,7 @@ Event 161 Batch 1 2 6.868305165969147e+02 4.119610488151656e+00 5.515184990814985e+02 4.093244831537709e+02 3 3.260821955312833e+02 -1.956999890649130e+02 -2.483451099187458e+02 -7.972338993006402e+01 4 4.870872878718022e+02 1.915803785767614e+02 -3.031733891627526e+02 -3.296010932237070e+02 - ME 9.860610555787331e-05 + ME 1.075603053132144e-03 Event 162 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3348,7 +3348,7 @@ Event 162 Batch 1 2 2.159818802305119e+02 -2.018126805027919e+02 4.096951387107715e+01 -6.512536763314942e+01 3 6.870078865581224e+02 4.896730732821633e+02 -2.356527215298929e+02 -4.203188222421333e+02 4 5.970102332113654e+02 -2.878603927793715e+02 1.946832076588156e+02 4.854441898752826e+02 - ME 2.809071549115161e-05 + ME 5.344822454174306e-05 Event 163 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3356,7 +3356,7 @@ Event 163 Batch 1 2 4.889699854403287e+02 -4.067839821807834e+01 -2.740835242435768e+02 4.028835269878222e+02 3 4.282392920294498e+02 4.007468150560176e+02 -8.832740907173851e+01 -1.224301852772270e+02 4 5.827907225302220e+02 -3.600684168379390e+02 3.624109333153153e+02 -2.804533417105952e+02 - ME 1.173701793303044e-04 + ME 4.336231422638298e-04 Event 164 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3364,7 +3364,7 @@ Event 164 Batch 1 2 6.224346677404150e+02 -1.282049393554146e+02 5.480608628970117e+02 -2.657399098565701e+02 3 7.444531740822750e+02 1.794330131141779e+02 -6.708967511266460e+02 2.681638893170603e+02 4 1.331121581773107e+02 -5.122807375876333e+01 1.228358882296343e+02 -2.423979460490191e+00 - ME 1.571413941583783e-05 + ME 1.368953177788070e-04 Event 165 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3372,7 +3372,7 @@ Event 165 Batch 1 2 6.980339706506675e+02 -5.154669325341684e+01 -4.947847840614098e+02 4.896757907618869e+02 3 1.362964882116331e+02 4.252532371924361e+01 -5.641238783031591e+01 -1.165588780002596e+02 4 6.656695411377010e+02 9.021369534174053e+00 5.511971718917263e+02 -3.731169127616273e+02 - ME 4.238311927693088e-04 + ME 1.450267418906797e-03 Event 166 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3380,7 +3380,7 @@ Event 166 Batch 1 2 3.060640747281171e+02 -1.981167412190918e+02 -9.095380261170779e+01 -2.148310510107333e+02 3 5.580104478575086e+02 -3.585720992432471e+02 -1.558095186186280e+02 3.981521109704927e+02 4 6.359254774143739e+02 5.566888404623389e+02 2.467633212303362e+02 -1.833210599597597e+02 - ME 1.099447007687216e-04 + ME 3.000804338470548e-04 Event 167 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3388,7 +3388,7 @@ Event 167 Batch 1 2 2.833153623322893e+02 2.526850217013923e+02 8.687924899084067e+01 9.417998957332070e+01 3 6.595685044563415e+02 -8.780626893611850e+01 -2.875856231737449e+02 -5.870393347553995e+02 4 5.571161332113688e+02 -1.648787527652738e+02 2.007063741829043e+02 4.928593451820789e+02 - ME 4.244421486768831e-05 + ME 7.367447958524992e-05 Event 168 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3396,7 +3396,7 @@ Event 168 Batch 1 2 6.026267479353969e+02 -5.987968578530475e+02 5.775180228477150e+00 6.758674164241529e+01 3 4.991211680715713e+02 3.812575567959843e+02 3.220701575873951e+02 -5.952259631185711e+00 4 3.982520839930309e+02 2.175393010570631e+02 -3.278453378158730e+02 -6.163448201122968e+01 - ME 1.203107058680061e-05 + ME 9.606399998327532e-05 Event 169 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3404,7 +3404,7 @@ Event 169 Batch 1 2 5.510662376679772e+02 -9.251111075413947e+01 -5.291920243323356e+02 -1.227660134875281e+02 3 5.034535790022877e+02 -2.816014265681677e+02 3.283802195198170e+02 2.575511098657944e+02 4 4.454801833297348e+02 3.741125373223072e+02 2.008118048125185e+02 -1.347850963782663e+02 - ME 2.085195230877358e-05 + ME 1.532484123791625e-04 Event 170 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3412,7 +3412,7 @@ Event 170 Batch 1 2 2.814808559369750e+02 3.658097943502287e+01 -1.412301634042880e+02 -2.407225480659935e+02 3 6.646522150540470e+02 2.753499086551696e+02 -1.631412967142655e+02 5.825203104495404e+02 4 5.538669290089779e+02 -3.119308880901926e+02 3.043714601185535e+02 -3.417977623835468e+02 - ME 2.587160315460459e-04 + ME 7.823510217753851e-04 Event 171 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3420,7 +3420,7 @@ Event 171 Batch 1 2 1.777965289077954e+02 -6.143496808852239e+01 -1.603735842336773e+00 1.668375809551635e+02 3 7.439290290569696e+02 2.163074211412066e+01 -1.907051550939623e+01 -7.433699124308462e+02 4 5.782744420352348e+02 3.980422597440174e+01 2.067425135173305e+01 5.765323314756826e+02 - ME 1.981167274383509e-03 + ME 2.063755640794395e-03 Event 172 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3428,7 +3428,7 @@ Event 172 Batch 1 2 1.369499454750680e+02 -1.250080331667568e+01 -3.518152151649629e+01 -1.317622025690455e+02 3 6.692885586315896e+02 -2.346283187163472e+02 -6.130705295376303e+02 1.305421486874673e+02 4 6.937614958933425e+02 2.471291220330227e+02 6.482520510541266e+02 1.220053881578238e+00 - ME 1.548169060571347e-04 + ME 5.039586079692636e-04 Event 173 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3436,7 +3436,7 @@ Event 173 Batch 1 2 7.088772083623137e+02 4.973951266878932e+01 3.171232495758680e+01 -7.064185769505260e+02 3 5.785136264307895e+02 8.584813303397833e+01 5.766505028397120e+01 5.691949191590089e+02 4 2.126091652068944e+02 -1.355876457027672e+02 -8.937737524155732e+01 1.372236577915166e+02 - ME 1.732961413682620e-04 + ME 1.743760900867476e-04 Event 174 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3444,7 +3444,7 @@ Event 174 Batch 1 2 4.367208701713482e+02 -3.923163287174704e+01 4.325755195957351e+02 -4.543585887727652e+01 3 3.528978856725088e+02 9.622572295106905e+01 1.987077746703234e+02 -2.753048278549415e+02 4 7.103812441561454e+02 -5.699409007932221e+01 -6.312832942660567e+02 3.207406867322186e+02 - ME 1.541208918572365e-04 + ME 9.353677491192390e-04 Event 175 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3452,7 +3452,7 @@ Event 175 Batch 1 2 6.418562164876806e+02 1.962785648722137e+02 -6.110736372974047e+02 -6.567908015856712e+00 3 4.843421844702149e+02 -1.886631806266161e+02 3.569879071908527e+02 -2.674942804112337e+02 4 3.738015990421035e+02 -7.615384245597569e+00 2.540857301065516e+02 2.740621884270906e+02 - ME 1.279055979705581e-05 + ME 3.029111560812189e-05 Event 176 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3460,7 +3460,7 @@ Event 176 Batch 1 2 6.288652703123263e+02 4.005522031116294e+02 3.691482793515075e+02 3.142594606996526e+02 3 7.209127580467475e+02 -4.124575135572966e+02 -5.165298058232565e+02 -2.877341896975221e+02 4 1.502219716409257e+02 1.190531044566666e+01 1.473815264717492e+02 -2.652527100213051e+01 - ME 1.300720357566141e-05 + ME 1.719274466020296e-04 Event 177 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3468,7 +3468,7 @@ Event 177 Batch 1 2 4.716578040000077e+02 -4.521622645932388e+02 -1.012739918234145e+01 1.338200520767543e+02 3 3.021382980750606e+02 -2.714821202364266e+02 6.773215888881064e+01 -1.140059832109250e+02 4 7.262038979249317e+02 7.236443848296653e+02 -5.760475970646905e+01 -1.981406886582933e+01 - ME 6.442260552556652e-04 + ME 2.354271252348000e-03 Event 178 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3476,7 +3476,7 @@ Event 178 Batch 1 2 7.350088877399502e+02 -3.684484945749095e+02 -2.561732769425163e+02 -5.821159885132296e+02 3 1.415495174310248e+02 7.181268644032879e+01 1.095010133995263e+02 5.374692563910759e+01 4 6.234415948290248e+02 2.966358081345808e+02 1.466722635429900e+02 5.283690628741219e+02 - ME 6.828487731379645e-05 + ME 1.035408980291912e-04 Event 179 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3484,7 +3484,7 @@ Event 179 Batch 1 2 7.426064621425413e+02 6.748632301344054e+01 7.201624948975951e+02 -1.681544967131679e+02 3 5.821031882499326e+02 8.394276920418550e-01 -5.588194474899291e+02 1.629854049874919e+02 4 1.752903496075256e+02 -6.832575070548241e+01 -1.613430474076661e+02 5.169091725675888e+00 - ME 1.412410550503903e-05 + ME 9.197132478706931e-05 Event 180 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3492,7 +3492,7 @@ Event 180 Batch 1 2 6.099515195485484e+02 2.272495331206023e+02 1.762692760011278e+02 -5.378918555193875e+02 3 5.718889655176699e+02 4.324570510796980e+01 -3.278409766521432e+02 4.665909256493895e+02 4 3.181595149337819e+02 -2.704952382285720e+02 1.515717006510154e+02 7.130092986999803e+01 - ME 3.043963963928669e-05 + ME 5.401477812349802e-05 Event 181 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3500,7 +3500,7 @@ Event 181 Batch 1 2 1.206370886915177e+02 -8.151225636567759e+01 1.767749325039422e+01 8.715827822142556e+01 3 6.451493408002739e+02 -6.748216257939080e+01 4.373428479320614e+02 4.694625256943417e+02 4 7.342135705082084e+02 1.489944189450684e+02 -4.550203411824557e+02 -5.566208039157672e+02 - ME 2.625479922313071e-02 + ME 7.131653341377736e-02 Event 182 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3508,7 +3508,7 @@ Event 182 Batch 1 2 4.626866082364760e+02 -3.084610429505738e+02 3.306629079434072e+02 9.794245113140897e+01 3 4.974966719253473e+02 3.582955998671217e+02 1.664640547097976e+02 -3.023523113558579e+02 4 5.398167198381765e+02 -4.983455691654795e+01 -4.971269626532048e+02 2.044098602244489e+02 - ME 1.414799589613471e-05 + ME 5.959042767905828e-05 Event 183 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3516,7 +3516,7 @@ Event 183 Batch 1 2 3.304723045950491e+02 3.244647182058462e+00 3.209425641774955e+02 7.872284845075714e+01 3 4.379804819457451e+02 2.312428523500660e+02 3.131807483468383e+02 2.006775141049615e+02 4 7.315472134592065e+02 -2.344874995321247e+02 -6.341233125243344e+02 -2.794003625557186e+02 - ME 2.330806393221907e-03 + ME 4.899988668912175e-03 Event 184 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3524,7 +3524,7 @@ Event 184 Batch 1 2 7.470051035005908e+02 -4.953964753944513e+02 -4.028924750569613e+02 3.876552725878485e+02 3 2.183325716323390e+02 1.119040172022777e+02 1.451703047217021e+02 -1.186262424448778e+02 4 5.346623248670695e+02 3.834924581921736e+02 2.577221703352594e+02 -2.690290301429710e+02 - ME 7.987999480474686e-05 + ME 5.441344453720516e-04 Event 185 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3532,7 +3532,7 @@ Event 185 Batch 1 2 4.448583927494090e+02 2.810173563272025e+02 -3.384637477435971e+02 6.610995769032235e+01 3 6.236443795626774e+02 -1.690803760724666e+02 5.125139620028374e+02 3.125277225134823e+02 4 4.314972276879136e+02 -1.119369802547359e+02 -1.740502142592404e+02 -3.786376802038046e+02 - ME 1.405605442011058e-04 + ME 6.949230823829164e-03 Event 186 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3540,7 +3540,7 @@ Event 186 Batch 1 2 6.802792190696962e+02 -1.681815241656754e+02 5.427923640013703e+02 3.739936368565512e+02 3 6.331554869749547e+02 3.172201723440435e+02 -4.588808692389625e+02 -2.994755095011972e+02 4 1.865652939553488e+02 -1.490386481783679e+02 -8.391149476240778e+01 -7.451812735535422e+01 - ME 3.045129627255903e-05 + ME 3.276943053321406e-04 Event 187 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3548,7 +3548,7 @@ Event 187 Batch 1 2 7.472897115267965e+02 -6.988402471604775e+02 -2.391684329048669e+02 1.134137672609268e+02 3 6.826908170748527e+02 6.328852277257668e+02 2.212839847556716e+02 -1.286718241709738e+02 4 7.001947139835140e+01 6.595501943471052e+01 1.788444814919547e+01 1.525805691004725e+01 - ME 3.485925693242860e-05 + ME 1.461490870437387e-04 Event 188 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3556,7 +3556,7 @@ Event 188 Batch 1 2 6.496068877140275e+02 -5.024316730938291e+02 -3.980061777252906e+02 -1.055585379310702e+02 3 4.885976180718368e+02 4.424928723138696e+02 1.459942636040002e+02 -1.470148473169288e+02 4 3.617954942141354e+02 5.993880077995960e+01 2.520119141212904e+02 2.525733852479991e+02 - ME 1.006519408431335e-05 + ME 2.843805826594158e-05 Event 189 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3564,7 +3564,7 @@ Event 189 Batch 1 2 4.082379946778654e+02 2.679237131173331e+02 -7.718184435750955e+01 2.981913934867987e+02 3 5.864211573889181e+02 -5.780822197382728e+02 -6.394893886953379e+01 7.497502433004084e+01 4 5.053408479332167e+02 3.101585066209396e+02 1.411307832270433e+02 -3.731664178168398e+02 - ME 1.322787627040098e-04 + ME 1.937644878671120e-03 Event 190 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3572,7 +3572,7 @@ Event 190 Batch 1 2 6.472516823166364e+02 6.463779961822676e+02 -3.289365889632791e+01 6.945035458816692e+00 3 4.318767277050750e+02 -3.286790725415815e+02 -7.183748821760624e+00 -2.800642229191639e+02 4 4.208715899782885e+02 -3.176989236406859e+02 4.007740771808847e+01 2.731191874603472e+02 - ME 1.272332211942340e-05 + ME 3.409584379294133e-05 Event 191 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3580,7 +3580,7 @@ Event 191 Batch 1 2 6.757500036387052e+02 6.222744522021635e+02 -2.261571472854044e+02 1.351499844096745e+02 3 3.644673602666567e+02 -2.020102809038697e+02 1.114149692296405e+02 -2.821613151026251e+02 4 4.597826360946380e+02 -4.202641712982938e+02 1.147421780557637e+02 1.470113306929507e+02 - ME 1.560703181590231e-05 + ME 5.389305783035389e-05 Event 192 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3588,7 +3588,7 @@ Event 192 Batch 1 2 7.394562478491531e+02 -7.307873850878615e+02 3.988568028534699e+01 1.056147375500683e+02 3 8.098058518630978e+01 5.419286926826393e+01 4.244928426361276e+00 -6.002473390399248e+01 4 6.795631669645365e+02 6.765945158195976e+02 -4.413060871170821e+01 -4.559000364607596e+01 - ME 1.231033846344155e-04 + ME 4.204295748489254e-04 Event 193 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3596,7 +3596,7 @@ Event 193 Batch 1 2 5.607395612273153e+02 -3.164229781907934e+02 -3.517992386171808e+02 -3.009030576558548e+02 3 3.741643617741927e+02 -2.156271676189966e+02 1.666697084176705e+02 2.563690747778811e+02 4 5.650960769984922e+02 5.320501458097899e+02 1.851295301995104e+02 4.453398287797368e+01 - ME 3.026844143728605e-05 + ME 9.141090879934244e-05 Event 194 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3604,7 +3604,7 @@ Event 194 Batch 1 2 5.729373416862012e+02 -2.155045544874616e+02 -1.679805246197324e+02 5.035846779262559e+02 3 2.831035485618876e+02 -2.543279085173982e+02 1.042261812492671e+02 -6.783684323208054e+01 4 6.439591097519118e+02 4.698324630048598e+02 6.375434337046515e+01 -4.357478346941756e+02 - ME 5.497724763810379e-04 + ME 1.781231321893996e-03 Event 195 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3612,7 +3612,7 @@ Event 195 Batch 1 2 5.572874060171201e+02 -5.433144409127298e+02 3.646295232533866e+01 1.185290019729285e+02 3 6.765845568040619e+02 5.574999049241243e+02 -1.212989803269169e+01 -3.831623469093195e+02 4 2.661280371788181e+02 -1.418546401139455e+01 -2.433305429264712e+01 2.646333449363910e+02 - ME 3.378534889977447e-04 + ME 3.395618115588225e-04 Event 196 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3620,7 +3620,7 @@ Event 196 Batch 1 2 5.405888343305829e+02 3.940239871950471e+02 -8.826690628749978e+01 -3.594305754554688e+02 3 6.983754392688073e+02 -3.888370902622853e+02 -5.513072771506098e+01 5.774898910559966e+02 4 2.610357264006097e+02 -5.186896932761887e+00 1.433976340025607e+02 -2.180593156005277e+02 - ME 2.676929502290073e-04 + ME 5.539073969003598e-03 Event 197 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3628,7 +3628,7 @@ Event 197 Batch 1 2 2.783346334111661e+02 2.282410890438732e+02 -1.474467226896361e+02 6.029624695020830e+01 3 6.434654504578666e+02 1.172104173128919e+01 6.205939438823057e+02 1.696277097949658e+02 4 5.781999161309674e+02 -2.399621307751624e+02 -4.731472211926695e+02 -2.299239567451741e+02 - ME 4.280180350752636e-05 + ME 3.321087064690878e-04 Event 198 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3636,7 +3636,7 @@ Event 198 Batch 1 2 4.349536439683943e+02 1.774777254208009e+02 -9.709992209949135e+01 3.850427697141142e+02 3 4.134500153047116e+02 7.095914770071803e+01 -4.041194890923881e+02 -5.092301099466194e+01 4 6.515963407268921e+02 -2.484368731215197e+02 5.012194111918782e+02 -3.341197587194521e+02 - ME 2.926862112764983e-04 + ME 7.849443582399766e-04 Event 199 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3644,7 +3644,7 @@ Event 199 Batch 1 2 6.682109290882580e+02 2.136897997740939e+02 -5.035763266519416e+02 3.837361052354048e+02 3 1.424120473397155e+02 8.952788458880865e+01 -4.686863299276860e+01 -1.003458038481504e+02 4 6.893770235720265e+02 -3.032176843629025e+02 5.504449596447103e+02 -2.833903013872543e+02 - ME 4.183851150998592e-04 + ME 1.167594898598604e-03 Event 200 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3652,7 +3652,7 @@ Event 200 Batch 1 2 5.959952693237885e+02 -4.878566955018547e+02 -2.510837703973929e+01 -3.414319479966339e+02 3 4.479637599869168e+02 4.499951041477978e+01 7.146287716862105e+01 4.399313940955211e+02 4 4.560409706892941e+02 4.428571850870749e+02 -4.635450012888173e+01 -9.849944609888662e+01 - ME 3.228844805909175e-04 + ME 5.545496796633981e-04 Event 201 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3660,7 +3660,7 @@ Event 201 Batch 1 2 5.203096708642927e+02 -1.112696379946441e+02 1.367824427202020e+02 4.895219960522141e+02 3 2.871951825199399e+02 -2.582762312778227e+02 1.200876310962787e+02 3.678888524092984e+01 4 6.924951466157675e+02 3.695458692724667e+02 -2.568700738164807e+02 -5.263108812931440e+02 - ME 2.285182473348715e-03 + ME 6.577575910850049e-03 Event 202 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3668,7 +3668,7 @@ Event 202 Batch 1 2 2.158792376054218e+02 2.112389782008981e+01 -7.195062193526132e+01 -2.024369881546198e+02 3 5.463652944256570e+02 2.787950008966254e+02 -3.108926376755554e+02 -3.523267663221479e+02 4 7.377554679689213e+02 -2.999188987167153e+02 3.828432596108168e+02 5.547637544767679e+02 - ME 1.952686275320307e-03 + ME 8.695282964050810e-03 Event 203 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3676,7 +3676,7 @@ Event 203 Batch 1 2 7.124273471334275e+02 4.879265047129839e+02 -1.059167473143779e+02 -5.081949365946950e+02 3 6.746108110440506e+02 -5.248642991835990e+02 4.352799102536777e+01 4.215714978711400e+02 4 1.129618418225217e+02 3.693779447061509e+01 6.238875628901040e+01 8.662343872355494e+01 - ME 4.211918129012132e-05 + ME 5.361938367485652e-05 Event 204 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3684,7 +3684,7 @@ Event 204 Batch 1 2 7.084787759842808e+02 4.992472551829619e+02 -4.528122431715626e+02 -2.183012291454193e+02 3 1.034373169902747e+02 -8.959882065299325e+01 -3.938861547415055e+01 -3.346441176487074e+01 4 6.880839070254444e+02 -4.096484345299685e+02 4.922008586457131e+02 2.517656409102901e+02 - ME 1.033102023766027e-04 + ME 2.988048706021647e-04 Event 205 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3692,7 +3692,7 @@ Event 205 Batch 1 2 6.496569846879349e+02 -5.869603795046561e+02 -2.345911576090251e+02 1.499956646614410e+02 3 2.543878192344406e+02 -1.851019090219859e+00 2.474675926596849e+02 -5.890268997594536e+01 4 5.959551960776247e+02 5.888113985948760e+02 -1.287643505065981e+01 -9.109297468549572e+01 - ME 4.134215827558992e-05 + ME 1.871447246980874e-04 Event 206 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3700,7 +3700,7 @@ Event 206 Batch 1 2 6.172060642836410e+02 2.978040691523503e+02 4.166709400833434e+02 3.444435946201744e+02 3 7.205754982426181e+02 -2.468045809177361e+02 -5.690387091428452e+02 -3.667580878490107e+02 4 1.622184374737409e+02 -5.099948823461420e+01 1.523677690595017e+02 2.231449322883641e+01 - ME 1.138691716042452e-05 + ME 7.356489425273393e-05 Event 207 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3708,7 +3708,7 @@ Event 207 Batch 1 2 5.250113096394139e+02 -1.091977068802181e+02 -4.322753509449321e+02 2.772196909074646e+02 3 5.240251005653129e+02 3.541948269240045e+02 3.738549241960732e+02 9.685466564450643e+01 4 4.509635897952731e+02 -2.449971200437864e+02 5.842042674885889e+01 -3.740743565519710e+02 - ME 9.518274156960593e-05 + ME 3.378615964480245e-03 Event 208 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3716,7 +3716,7 @@ Event 208 Batch 1 2 4.449444343820048e+02 1.928662436733418e+02 -3.595193210859464e+02 1.775500478872298e+02 3 4.894053462810564e+02 -2.195789585225567e+02 2.295326432211599e+02 3.723136307450180e+02 4 5.656502193369389e+02 2.671271484921488e+01 1.299866778647865e+02 -5.498636786322478e+02 - ME 2.179806976662403e-03 + ME 2.068943926258950e-01 Event 209 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3724,7 +3724,7 @@ Event 209 Batch 1 2 4.949423498078044e+02 -2.830370809537592e+02 -1.684680620467476e+02 -3.694271951395289e+02 3 6.326444171345161e+02 3.898538983719823e+02 -1.748162179498052e+02 4.665749526039372e+02 4 3.724132330576786e+02 -1.068168174182231e+02 3.432842799965525e+02 -9.714775746440780e+01 - ME 3.638076645868775e-05 + ME 1.473942246791387e-04 Event 210 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3732,7 +3732,7 @@ Event 210 Batch 1 2 5.469464199121014e+02 -4.947084169679945e+02 2.319240083666633e+02 -2.500445517953792e+01 3 2.929141603572806e+02 -5.602902696925145e+01 2.099470855189298e+01 2.867379913571110e+02 4 6.601394197306178e+02 5.507374439372461e+02 -2.529187169185561e+02 -2.617335361775729e+02 - ME 7.792286450853471e-04 + ME 1.577330101330874e-03 Event 211 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3740,7 +3740,7 @@ Event 211 Batch 1 2 5.484404249965427e+02 1.659778109685243e+01 3.514591842057613e+02 -4.206992456262192e+02 3 4.635537606517395e+02 -3.607884938122542e+02 -3.140996451540818e+01 2.893564685231623e+02 4 4.880058143517181e+02 3.441907127154018e+02 -3.200492196903532e+02 1.313427771030569e+02 - ME 1.717788621912363e-05 + ME 4.999214184618137e-05 Event 212 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3748,7 +3748,7 @@ Event 212 Batch 1 2 6.930853388432640e+02 -3.424793196872474e+02 -8.152110066892747e+01 5.970171795281683e+02 3 9.131624224772825e+01 6.738328155058525e+01 1.365968298972706e+01 6.009627714210347e+01 4 7.155984189090078e+02 2.750960381366621e+02 6.786141767920034e+01 -6.571134566702718e+02 - ME 4.440767413899675e-02 + ME 3.224436999651524e-01 Event 213 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3756,7 +3756,7 @@ Event 213 Batch 1 2 7.316448870278512e+02 4.203233031264803e+02 4.913598772661251e+02 -3.423419819067778e+02 3 4.750162603483208e+02 -1.726357548525294e+02 -3.708603862154638e+02 2.414537588813190e+02 4 2.933388526238279e+02 -2.476875482739507e+02 -1.204994910506614e+02 1.008882230254589e+02 - ME 1.166473784051930e-05 + ME 4.008080891216109e-05 Event 214 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3764,7 +3764,7 @@ Event 214 Batch 1 2 4.805779599533694e+02 3.904513572450257e+02 -1.742898429406511e+02 2.193763065287195e+02 3 6.164938851206517e+02 -5.563771061772993e+02 2.227142270499353e+02 1.445946028815716e+02 4 4.029281549259790e+02 1.659257489322735e+02 -4.842438410928419e+01 -3.639709094102910e+02 - ME 1.644694060635318e-04 + ME 1.130096726278085e-02 Event 215 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3772,7 +3772,7 @@ Event 215 Batch 1 2 4.610896439725640e+02 -3.106576460930037e+02 -3.050258363865880e+02 -1.518378274323046e+02 3 7.153470686812809e+02 2.726436938726979e+02 6.046054769368644e+02 2.680280994976061e+02 4 3.235632873461531e+02 3.801395222030658e+01 -2.995796405502758e+02 -1.161902720653026e+02 - ME 1.638803663744001e-05 + ME 2.130646114222361e-04 Event 216 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3780,7 +3780,7 @@ Event 216 Batch 1 2 5.309452696424389e+02 -4.912950836090372e+02 -3.608909251460832e+01 -1.980646298023531e+02 3 6.627369363365399e+02 4.479096066616000e+02 2.308759280187052e+02 4.304573578259469e+02 4 3.063177940210212e+02 4.338547694743724e+01 -1.947868355040969e+02 -2.323927280235938e+02 - ME 7.684209531203918e-05 + ME 1.881406502208647e-03 Event 217 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3788,7 +3788,7 @@ Event 217 Batch 1 2 4.608032244164870e+02 2.215832851737383e+02 3.318832460795877e+02 -2.304212888079594e+02 3 3.107022283044695e+02 -4.724697178681157e+01 2.830528592337836e+02 -1.190994425256424e+02 4 7.284945472790432e+02 -1.743363133869267e+02 -6.149361053133712e+02 3.495207313336019e+02 - ME 4.426756984161849e-04 + ME 2.894775763457067e-03 Event 218 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3796,7 +3796,7 @@ Event 218 Batch 1 2 6.336891602166270e+02 5.249943224110900e+02 1.648031440577737e+02 -3.142973702098814e+02 3 5.195346944320743e+02 -3.655895580768890e+02 -3.610279413409480e+02 7.693763263116504e+01 4 3.467761453512956e+02 -1.594047643342018e+02 1.962247972831736e+02 2.373597375787177e+02 - ME 8.957256945094420e-06 + ME 2.703962034458943e-05 Event 219 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3804,7 +3804,7 @@ Event 219 Batch 1 2 2.579228498517417e+02 -4.166553381892272e+01 1.191899344508913e+02 2.249042891828000e+02 3 7.453266221408651e+02 -3.354388163550532e+01 -3.947818065141064e+02 -6.312954196904914e+02 4 4.967505280073930e+02 7.520941545442813e+01 2.755918720632151e+02 4.063911305076915e+02 - ME 4.019449398167179e-05 + ME 6.103184694489295e-05 Event 220 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3812,7 +3812,7 @@ Event 220 Batch 1 2 4.940336288355577e+02 -2.383755021420815e+02 -2.918661661143953e+02 3.194690712363630e+02 3 7.129224521449780e+02 2.727447507998269e+02 2.535039959962389e+02 -6.079510240944473e+02 4 2.930439190194635e+02 -3.436924865774512e+01 3.836217011815621e+01 2.884819528580837e+02 - ME 1.677977866215262e-04 + ME 1.761519882509421e-04 Event 221 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3820,7 +3820,7 @@ Event 221 Batch 1 2 3.305414381337777e+02 -2.712796684963201e+02 -1.199910663213094e+02 -1.458325333632650e+02 3 7.388441803280767e+02 5.510455284380058e+02 4.375213740715825e+02 2.254209298704556e+02 4 4.306143815381457e+02 -2.797658599416856e+02 -3.175303077502730e+02 -7.958839650719051e+01 - ME 1.392897982206581e-05 + ME 1.338118621913618e-04 Event 222 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3828,7 +3828,7 @@ Event 222 Batch 1 2 4.657562074797755e+02 2.823280548971349e+02 2.956503281023745e+02 2.231828795335844e+02 3 4.791948192186352e+02 -3.228825926298714e+02 2.575611801233854e+02 -2.429747818931873e+02 4 5.550489733015891e+02 4.055453773273638e+01 -5.532115082257600e+02 1.979190235960287e+01 - ME 2.328731171682892e-05 + ME 9.040551632672907e-05 Event 223 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3836,7 +3836,7 @@ Event 223 Batch 1 2 1.612164685986321e+02 -4.527922182271191e+01 -1.095260585492910e+01 1.543391792239740e+02 3 6.984218503485876e+02 -4.629950983513680e+02 2.605715575888556e+02 -4.533553609726805e+02 4 6.403616810527805e+02 5.082743201740799e+02 -2.496189517339264e+02 2.990161817487066e+02 - ME 2.446487784841432e-04 + ME 4.148580235863498e-04 Event 224 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3844,7 +3844,7 @@ Event 224 Batch 1 2 1.663853414671972e+02 -1.350882138037309e+02 9.706071747767010e+01 3.804401292344658e+00 3 6.436745581417563e+02 -4.469273298203079e+02 -4.412749113764766e+02 -1.408877256838118e+02 4 6.899401003910457e+02 5.820155436240389e+02 3.442141938988058e+02 1.370833243914657e+02 - ME 9.431632941984795e-05 + ME 3.449215697364171e-04 Event 225 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3852,7 +3852,7 @@ Event 225 Batch 1 2 6.702356777533546e+02 6.117158080352369e+02 -2.649249521350114e+02 -6.952987609335720e+01 3 6.901224376513153e+02 -6.564819557015361e+02 1.560869289536550e+02 1.446972404640001e+02 4 1.396418845953297e+02 4.476614766629927e+01 1.088380231813564e+02 -7.516736437064299e+01 - ME 2.456039108263569e-05 + ME 6.407468428023662e-04 Event 226 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3860,7 +3860,7 @@ Event 226 Batch 1 2 7.307777643673112e+02 -4.569648094661606e+02 4.416236342013199e+02 -3.608155616351098e+02 3 1.446420186345137e+02 4.133161435221925e+01 -3.411742569426914e+01 1.343466131828505e+02 4 6.245802169981752e+02 4.156331951139413e+02 -4.075062085070508e+02 2.264689484522593e+02 - ME 2.774761612267077e-04 + ME 4.858390443010437e-04 Event 227 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3868,7 +3868,7 @@ Event 227 Batch 1 2 7.408615397889290e+02 -4.398089081634772e+02 -5.325812259979131e+02 2.679574278743413e+02 3 4.035753807128123e+02 3.000971513323747e+02 2.468113220276344e+02 -1.090823496201683e+02 4 3.555630794982585e+02 1.397117568311025e+02 2.857699039702786e+02 -1.588750782541728e+02 - ME 3.077346064218035e-05 + ME 3.215647103618368e-04 Event 228 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3876,7 +3876,7 @@ Event 228 Batch 1 2 5.775455372723294e+02 -3.656199842755111e+02 -6.289501053880601e+01 4.426342647953073e+02 3 3.247306314578497e+02 8.776645762339835e+01 3.116872137482897e+02 2.445634292125525e+01 4 5.977238312698206e+02 2.778535266521127e+02 -2.487922032094836e+02 -4.670906077165625e+02 - ME 3.399241079583280e-04 + ME 3.156934429573604e-03 Event 229 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3884,7 +3884,7 @@ Event 229 Batch 1 2 3.665477125629453e+02 -2.081014917770363e+02 2.317985113364040e+02 -1.931850016112187e+02 3 6.187040836990479e+02 -2.134593092471877e+02 -3.484367286517815e+02 4.645661552545953e+02 4 5.147482037380067e+02 4.215608010242241e+02 1.166382173153775e+02 -2.713811536433765e+02 - ME 8.330968691049859e-05 + ME 4.392210547845218e-04 Event 230 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3892,7 +3892,7 @@ Event 230 Batch 1 2 5.913978529013565e+02 -4.986092821675885e+02 -3.028328044703767e+02 9.712104143419764e+01 3 3.439186614041002e+02 -6.573524045766426e+01 3.216488491089061e+02 -1.024741025375549e+02 4 5.646834856945436e+02 5.643445226252528e+02 -1.881604463852933e+01 5.353061103357447e+00 - ME 2.296146042402505e-05 + ME 1.067159092411647e-04 Event 231 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3900,7 +3900,7 @@ Event 231 Batch 1 2 5.760768557894827e+02 -7.075794524290799e+01 5.609870884449791e+02 1.102331327656218e+02 3 6.038619762337338e+02 -2.467027894308989e+02 -5.464177649873398e+02 -7.221250677108812e+01 4 3.200611679767834e+02 3.174607346738069e+02 -1.456932345763944e+01 -3.802062599453370e+01 - ME 9.438631267217403e-06 + ME 8.750887998909065e-05 Event 232 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3908,7 +3908,7 @@ Event 232 Batch 1 2 7.230187249684843e+02 -2.426041066061352e+02 1.884455685697195e+02 -6.545132479937492e+02 3 4.821326920133732e+02 2.438648429837413e+02 -1.563760752388986e+01 4.156168142598493e+02 4 2.948485830181424e+02 -1.260736377606032e+00 -1.728079610458298e+02 2.388964337338999e+02 - ME 3.745272037455064e-05 + ME 4.549716999825542e-05 Event 233 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3916,7 +3916,7 @@ Event 233 Batch 1 2 3.540260977608100e+02 -1.904526694678991e+02 -1.042089619355360e+02 -2.796475475319170e+02 3 4.925592302096041e+02 1.195034224421750e+02 3.554637678715695e+02 -3.193415679485398e+02 4 6.534146720295859e+02 7.094924702572415e+01 -2.512548059360335e+02 5.989891154804569e+02 - ME 1.035644942794080e-04 + ME 2.494643034161164e-04 Event 234 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3924,7 +3924,7 @@ Event 234 Batch 1 2 1.866526101194276e+02 7.776953530733704e+01 -1.047503781897390e+01 1.693557493124073e+02 3 6.012752698516817e+02 5.974840035795012e+02 -4.570329760029643e+01 4.955829083294186e+01 4 7.120721200288899e+02 -6.752535388868379e+02 5.617833541927040e+01 -2.189140401453492e+02 - ME 6.655948749153013e-04 + ME 2.154454342135980e-03 Event 235 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3932,7 +3932,7 @@ Event 235 Batch 1 2 5.032945404607945e+02 1.612889276925247e+02 2.561838854094329e+02 -4.020710050699558e+02 3 7.153634726767370e+02 -3.739069589148947e+02 -1.979140468542061e+02 5.768609140624169e+02 4 2.813419868624690e+02 2.126180312223700e+02 -5.826983855522722e+01 -1.747899089924609e+02 - ME 1.137471703441233e-04 + ME 8.184939555880423e-04 Event 236 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3940,7 +3940,7 @@ Event 236 Batch 1 2 6.980797829886610e+02 -9.803971882836288e+00 4.740144261428889e+02 5.123764137440797e+02 3 5.519387921056282e+02 -1.638876688381594e+02 -3.209728652821290e+02 -4.180355032606608e+02 4 2.499814249057108e+02 1.736916407209956e+02 -1.530415608607599e+02 -9.434091048341891e+01 - ME 5.842524801707843e-05 + ME 2.813360227943072e-04 Event 237 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3948,7 +3948,7 @@ Event 237 Batch 1 2 1.604490925133743e+02 6.212857081252698e+01 9.075394990141041e+01 1.168232534834160e+02 3 6.578242662283152e+02 5.348507070161563e+02 -3.810396531957998e+02 3.842224792439630e+01 4 6.817266412583107e+02 -5.969792778286832e+02 2.902857032943894e+02 -1.552455014078122e+02 - ME 1.834055676127939e-04 + ME 8.205069948818567e-04 Event 238 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3956,7 +3956,7 @@ Event 238 Batch 1 2 2.789018340499539e+02 1.069933592962543e+02 -2.572713415352736e+02 1.225197647611563e+01 3 4.761759619803052e+02 7.755191627191856e+01 -4.591043622469822e+02 -9.976187456245104e+01 4 7.449222039697408e+02 -1.845452755681728e+02 7.163757037822556e+02 8.750989808633538e+01 - ME 9.445005309896021e-03 + ME 4.130258343824905e-02 Event 239 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3964,7 +3964,7 @@ Event 239 Batch 1 2 4.581461811054764e+02 -3.899520773556200e+02 2.006122777919944e+02 1.326273524830990e+02 3 3.013476461129690e+02 -2.996604136348060e+02 3.145663680794619e+01 4.951799549362093e+00 4 7.405061727815548e+02 6.896124909904260e+02 -2.320689145999406e+02 -1.375791520324611e+02 - ME 4.970363634614722e-03 + ME 1.351152256907066e-02 Event 240 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3972,7 +3972,7 @@ Event 240 Batch 1 2 5.932490652975304e+02 -4.094504138983958e+01 -3.300190662632461e+02 4.912793227530680e+02 3 3.147487537014150e+02 3.081803657249563e+02 4.097350029662016e+01 -4.912038692507519e+01 4 5.920021810010543e+02 -2.672353243351168e+02 2.890455659666260e+02 -4.421589358279927e+02 - ME 3.420638167820422e-04 + ME 2.300291351402201e-03 Event 241 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3980,7 +3980,7 @@ Event 241 Batch 1 2 4.438703186026563e+01 1.425431959717181e+01 -4.430288595443099e+00 -4.180186016371768e+01 3 7.139617398095604e+02 -8.415544716076485e+01 -5.657765076565163e+02 -4.272659242311072e+02 4 7.416512283301737e+02 6.990112756359306e+01 5.702067962519594e+02 4.690677843948249e+02 - ME 9.983667466725972e-03 + ME 9.657825758456334e-03 Event 242 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3988,7 +3988,7 @@ Event 242 Batch 1 2 3.798759956195423e+02 -1.259218082844715e+02 -3.429343473884153e+02 1.041417477651927e+02 3 6.208895880511435e+02 5.354328139337265e+02 1.248673426784089e+02 -2.884852319370315e+02 4 4.992344163293142e+02 -4.095110056492549e+02 2.180670047100064e+02 1.843434841718389e+02 - ME 1.030886114253601e-05 + ME 4.523810239016752e-05 Event 243 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3996,7 +3996,7 @@ Event 243 Batch 1 2 2.320641800899440e+02 1.658639294991472e+02 7.783463994856535e+01 1.424243988788334e+02 3 6.251485586341132e+02 -2.328139095298017e+02 -4.262931976140131e+02 3.935511574875350e+02 4 6.427872612759426e+02 6.694998003065477e+01 3.484585576654476e+02 -5.359755563663684e+02 - ME 8.493072129055412e-04 + ME 1.068434238404496e-02 Event 244 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4004,7 +4004,7 @@ Event 244 Batch 1 2 6.609991843787810e+02 -2.293678857540617e+02 -4.971623496474938e+02 -3.703240376037023e+02 3 1.091403980947070e+02 1.154537470975927e+01 -9.115666825632124e+00 -1.081445118228680e+02 4 7.298604175265119e+02 2.178225110443025e+02 5.062780164731259e+02 4.784685494265703e+02 - ME 9.635755455313371e-04 + ME 2.129811247265830e-03 Event 245 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4012,7 +4012,7 @@ Event 245 Batch 1 2 4.893629130846664e+02 -3.546974954177181e+02 3.112856868655738e+02 -1.294873298810978e+02 3 7.129026631852477e+02 5.703735458058533e+02 -4.257115617679147e+02 -4.091322034012423e+01 4 2.977344237300874e+02 -2.156760503881352e+02 1.144258749023406e+02 1.704005502212233e+02 - ME 5.312368446054512e-06 + ME 2.548352504440589e-05 Event 246 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4020,7 +4020,7 @@ Event 246 Batch 1 2 3.999457395350199e+02 9.605025124341067e+01 9.072234098128430e+01 3.774922524438975e+02 3 3.675469088581873e+02 -1.615841482674670e+01 2.570183669846762e+02 2.622426259669196e+02 4 7.325073516067924e+02 -7.989183641666393e+01 -3.477407079659604e+02 -6.397348784108170e+02 - ME 5.023802198964801e-02 + ME 1.294421983622042e-01 Event 247 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4028,7 +4028,7 @@ Event 247 Batch 1 2 6.711864521923226e+02 3.763073240556692e+02 5.338170415278108e+02 1.546719678644905e+02 3 5.231557804938882e+02 -1.057595517177888e+02 -5.121603131388773e+02 -1.409615302513522e+01 4 3.056577673137891e+02 -2.705477723378804e+02 -2.165672838893370e+01 -1.405758148393554e+02 - ME 1.980507958825256e-05 + ME 2.873345328272106e-04 Event 248 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4036,7 +4036,7 @@ Event 248 Batch 1 2 6.307803946875938e+02 -6.240065811552291e+01 -3.654556314590158e+02 5.103256270499047e+02 3 3.935347424219227e+02 -2.188782290807617e+02 2.916853933646314e+01 -3.257470040392325e+02 4 4.756848628904837e+02 2.812788871962847e+02 3.362870921225527e+02 -1.845786230106721e+02 - ME 8.712398839363553e-05 + ME 2.418190194667681e-04 Event 249 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4044,7 +4044,7 @@ Event 249 Batch 1 2 4.326970760901858e+02 -4.070406664121577e+02 -1.467447404863359e+02 3.261392852829594e+00 3 4.839435229991528e+02 2.335311811831339e+01 2.018595963184923e+02 -4.392136936630267e+02 4 5.833594009106607e+02 3.836875482938447e+02 -5.511485583215654e+01 4.359523008101972e+02 - ME 2.487145538635957e-05 + ME 8.354140201035124e-05 Event 250 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4052,7 +4052,7 @@ Event 250 Batch 1 2 7.010671671345858e+02 -6.122994886156980e+02 -2.473946684860857e+02 2.353303785738851e+02 3 5.574643785654457e+02 3.902114201641945e+02 2.260985614407801e+02 -3.276904354069721e+02 4 2.414684542999681e+02 2.220880684515034e+02 2.129610704530562e+01 9.236005683308701e+01 - ME 1.645582299148298e-05 + ME 4.704118057291807e-05 Event 251 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4060,7 +4060,7 @@ Event 251 Batch 1 2 7.364006127103795e+02 5.379960890463808e+02 4.302640987755426e+02 2.602285070392761e+02 3 3.051282143252570e+01 -2.901685968644106e+00 1.337962970917706e+01 -2.726899336532026e+01 4 7.330865658570956e+02 -5.350944030777371e+02 -4.436437284847198e+02 -2.329595136739561e+02 - ME 6.389613086136084e-03 + ME 8.340546584740779e-03 Event 252 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4068,7 +4068,7 @@ Event 252 Batch 1 2 5.965625584838610e+02 -7.369842915522101e+01 -5.671364104158780e+02 -1.697401534860145e+02 3 6.549338760881149e+02 -1.514014639568436e+02 6.313240788068730e+02 8.628954906696529e+01 4 2.485035654280235e+02 2.250998931120648e+02 -6.418766839099484e+01 8.345060441904938e+01 - ME 7.225550854378042e-06 + ME 3.985162011735342e-05 Event 253 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4076,7 +4076,7 @@ Event 253 Batch 1 2 5.728678540484714e+02 3.212236187283236e+01 -4.622666283104808e+02 -3.368312580807653e+02 3 7.160302400837320e+02 1.132435775281999e+02 5.206369974620781e+02 4.783433011307397e+02 4 2.111019058677967e+02 -1.453659394010323e+02 -5.837036915159722e+01 -1.415120430499744e+02 - ME 7.499676590470843e-05 + ME 1.248429186447426e-03 Event 254 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4084,7 +4084,7 @@ Event 254 Batch 1 2 5.579357369440610e+02 1.333150067790222e+02 -6.785864805882139e+01 5.375077668373273e+02 3 6.202682598689536e+02 -4.039338689731095e+02 2.012068793592834e+02 -4.255419314189536e+02 4 3.217960031869852e+02 2.706188621940872e+02 -1.333482313004621e+02 -1.119658354183736e+02 - ME 2.226893396847405e-04 + ME 6.088720978226072e-04 Event 255 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4092,5 +4092,5 @@ Event 255 Batch 1 2 7.263612771087843e+02 3.396063850675520e+02 -6.401091575508393e+02 5.028393902637355e+01 3 1.540578578981475e+02 -3.080387127739228e+01 1.060177193258910e+02 -1.074485378375538e+02 4 6.195808649930684e+02 -3.088025137901597e+02 5.340914382249483e+02 5.716459881118030e+01 - ME 4.003666322732326e-05 + ME 1.547064591142216e-04 diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 71c2006493..386e592a4e 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005433082580566406  +DEBUG: model prefixing takes 0.004611015319824219  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,15 +169,15 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.071 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -189,44 +189,96 @@ INFO: Processing color information for process: g u~ > t t~ u~ @1 INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  -DEBUG: type(subproc_group)= [output.py at line 190]  -DEBUG: type(fortran_model)= [output.py at line 191]  -DEBUG: type(me)= me=0 [output.py at line 192]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  -DEBUG: type(subproc_group)= [output.py at line 190]  -DEBUG: type(fortran_model)= [output.py at line 191]  -DEBUG: type(me)= me=1 [output.py at line 192]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.029 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  +DEBUG: type(subproc_group)= [output.py at line 188]  +DEBUG: type(fortran_model)= [output.py at line 189]  +DEBUG: type(me)= me=0 [output.py at line 190]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: proc_id =  0 [model_handling.py at line 1046]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  +DEBUG: type(subproc_group)= [output.py at line 188]  +DEBUG: type(fortran_model)= [output.py at line 189]  +DEBUG: type(me)= me=1 [output.py at line 190]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: proc_id =  0 [model_handling.py at line 1046]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  +Generated helas calls for 2 subprocesses (10 diagrams) in 0.027 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.123 s FFV1 FFV1 FFV1 FFV1 VVV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.651s -user 0m0.593s -sys 0m0.051s +real 0m0.730s +user 0m0.605s +sys 0m0.063s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc index 037662f7db..0b2899d317 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc @@ -243,19 +243,26 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); +#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) + imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz +#else + if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) + imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + else + ixxxxx( momenta, 0, cHel[ihel][1], +1, w_fp[1], 1 ); +#endif oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); + oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -265,11 +272,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -279,10 +286,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -292,10 +299,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -308,7 +315,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -846,12 +853,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc index 12179b9801..e37fd43d6a 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc @@ -243,19 +243,19 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); + ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); - FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -265,11 +265,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -292,10 +292,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -308,7 +308,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -846,12 +846,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gq_ttq.sa/mg5.in b/epochX/cudacpp/gq_ttq.sa/mg5.in index c0952db410..ae4d2d2c15 100644 --- a/epochX/cudacpp/gq_ttq.sa/mg5.in +++ b/epochX/cudacpp/gq_ttq.sa/mg5.in @@ -1,5 +1,4 @@ -set stdout_level DEBUG -set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ generate g q > t t~ q -output standalone_cudacpp gq_ttq.sa +output standalone_cudacpp gq_ttq.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp + diff --git a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h index 0dd5f20f71..901400d447 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -887,7 +885,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -900,7 +897,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -914,7 +910,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //========================================================================== @@ -926,7 +921,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -950,7 +944,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -982,7 +975,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1014,7 +1006,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1046,7 +1037,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); diff --git a/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt b/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt index d596b33ae7..dd90c94acf 100644 --- a/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt +++ b/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt @@ -4,7 +4,7 @@ Event 0 Batch 0 2 2.647483690509011e+02 7.527657265342380e+01 -2.528976247704283e+02 -2.163164141117315e+01 3 6.252973211776936e+02 -5.721080498766041e+02 -1.578766990348905e+01 2.518727230515587e+02 4 6.099543097714056e+02 4.968314772231802e+02 2.686852946739174e+02 -2.302410816403857e+02 - ME 6.254927412618323e-05 + ME 3.498510462248670e-04 Event 1 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -12,7 +12,7 @@ Event 1 Batch 0 2 2.542827954151951e+02 1.482213322085297e+02 -1.988618298139058e+02 -5.607271498295615e+01 3 6.883656117507998e+02 1.265478873489434e+02 5.602777828023585e+02 3.793700749224233e+02 4 5.573515928340058e+02 -2.747692195574731e+02 -3.614159529884527e+02 -3.232973599394667e+02 - ME 8.120933129385430e-05 + ME 7.257243108248426e-04 Event 2 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -20,7 +20,7 @@ Event 2 Batch 0 2 4.301460683791099e+02 -3.656995432079240e+02 -2.257802895903974e+02 -1.768459985405173e+01 3 5.058528987551350e+02 2.755467101243707e+02 -2.034821274188550e+02 3.722313656043856e+02 4 5.640010328657550e+02 9.015283308355326e+01 4.292624170092524e+02 -3.545467657503340e+02 - ME 1.104115154253218e-04 + ME 8.130044127338102e-04 Event 3 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -28,7 +28,7 @@ Event 3 Batch 0 2 6.758793342627306e+02 1.455349847705337e+02 4.360940220328824e+02 -4.954335945799966e+02 3 3.008019460079605e+02 -1.607139834787174e+02 2.732727402256846e+01 2.527964523704278e+02 4 5.233187197293092e+02 1.517899870818368e+01 -4.634212960554508e+02 2.426371422095687e+02 - ME 4.288074098478053e-05 + ME 7.753277710143621e-05 Event 4 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -36,7 +36,7 @@ Event 4 Batch 0 2 3.540811678028369e+02 5.414642718170588e+01 -3.497885023717100e+02 -9.467915537920108e+00 3 7.415000547748695e+02 1.453779348794601e+00 7.277337852109665e+02 1.422102514562805e+02 4 4.044187774222938e+02 -5.560020653050046e+01 -3.779452828392566e+02 -1.327423359183605e+02 - ME 1.304731284254719e-05 + ME 2.015528729476554e-04 Event 5 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -44,7 +44,7 @@ Event 5 Batch 0 2 4.747467875786874e+02 2.462969907607520e+02 3.713870243947702e+02 1.636886763636381e+02 3 3.438196236093862e+02 -2.056491112573935e+02 2.636029701703988e+02 8.021128807897365e+01 4 6.814335888119255e+02 -4.064787950335840e+01 -6.349899945651691e+02 -2.438999644426124e+02 - ME 1.932390649640220e-04 + ME 6.140777519977192e-04 Event 6 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -52,7 +52,7 @@ Event 6 Batch 0 2 5.623951200922340e+02 4.644673798421034e+02 3.089047820108764e+02 -7.166700647426805e+01 3 2.268243199894467e+02 1.761899852590787e+02 -7.114332369064562e+01 -1.238748914321566e+02 4 7.107805599183188e+02 -6.406573651011822e+02 -2.377614583202307e+02 1.955418979064247e+02 - ME 1.929702539767979e-04 + ME 8.375373201653861e-04 Event 7 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -60,7 +60,7 @@ Event 7 Batch 0 2 4.922243378496302e+02 2.878585072835456e+02 -1.441537488072182e+02 -3.723465794939189e+02 3 2.873990637609374e+02 -5.400981623596619e+01 -8.913204919452846e+01 -2.678369642286231e+02 4 7.203765983894325e+02 -2.338486910475794e+02 2.332857980017467e+02 6.401835437225419e+02 - ME 6.280412585349807e-04 + ME 2.045598717079573e-03 Event 8 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -68,7 +68,7 @@ Event 8 Batch 0 2 3.353309706037128e+02 -7.529439061162444e+01 -4.917829145606096e+01 -3.230466069128648e+02 3 7.169322705461503e+02 -1.597426278178964e+02 -1.460012137440150e+01 6.987567601563110e+02 4 4.477367588501368e+02 2.350370184295208e+02 6.377841283046249e+01 -3.757101532434461e+02 - ME 1.424871539111113e-03 + ME 5.176104304710922e-03 Event 9 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -76,7 +76,7 @@ Event 9 Batch 0 2 2.557626120875720e+02 2.000882245504951e+02 -5.276260741790070e+01 -1.503174088272977e+02 3 7.044202058180884e+02 -6.969679478438196e+02 -1.019614549623775e+02 6.882422911146106e+00 4 5.398171820943397e+02 4.968797232933244e+02 1.547240623802783e+02 1.434349859161515e+02 - ME 1.126010180174107e-05 + ME 6.498215193902510e-05 Event 10 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -84,7 +84,7 @@ Event 10 Batch 0 2 3.466796552973448e+02 1.172124288883391e+02 -1.804077050554743e+02 2.718475489457261e+02 3 5.174471655316495e+02 -1.610456139025784e+02 -4.497410659869822e+02 -1.988689340353916e+02 4 6.358731791710053e+02 4.383318501423926e+01 6.301487710424565e+02 -7.297861491033444e+01 - ME 8.292383053707579e-05 + ME 2.111165581639245e-04 Event 11 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -92,7 +92,7 @@ Event 11 Batch 0 2 5.730783827248506e+02 -3.059484875398849e+01 3.466457017175528e+02 -4.553235612803233e+02 3 4.410994673708892e+02 -3.026218886155176e+02 -1.990641070399019e+01 3.203005892260318e+02 4 4.858221499042607e+02 3.332167373695061e+02 -3.267392910135624e+02 1.350229720542913e+02 - ME 2.195851954305949e-05 + ME 5.129802099928076e-05 Event 12 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -100,7 +100,7 @@ Event 12 Batch 0 2 2.275003875859171e+02 -1.247450244086003e+02 1.654605359856639e+02 9.390376067217456e+01 3 6.138170466352969e+02 3.363961838598331e+02 -2.139358085817026e+01 5.129827374509639e+02 4 6.586825657787861e+02 -2.116511594512328e+02 -1.440669551274935e+02 -6.068864981231385e+02 - ME 3.843244876666358e-03 + ME 5.249882090061186e-02 Event 13 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -108,7 +108,7 @@ Event 13 Batch 0 2 2.867684047377951e+02 7.055192702127012e+01 -2.028354730671929e+02 1.900429278217245e+02 3 6.990707050557395e+02 -5.605742285334717e+02 2.413419117565430e+02 -3.408965629057132e+02 4 5.141608902064654e+02 4.900223015122016e+02 -3.850643868935023e+01 1.508536350839886e+02 - ME 1.780264803426774e-05 + ME 6.422048006176975e-05 Event 14 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -116,7 +116,7 @@ Event 14 Batch 0 2 3.551549262960330e+02 1.090410064132905e+02 3.205839746298526e+02 1.071027348074892e+02 3 5.276349775014137e+02 3.895763694332612e+02 -2.529209653865598e+02 2.503196099590423e+02 4 6.172100962025531e+02 -4.986173758465519e+02 -6.766300924329285e+01 -3.574223447665315e+02 - ME 1.172793340377339e-04 + ME 7.422587439250419e-04 Event 15 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -124,7 +124,7 @@ Event 15 Batch 0 2 5.846731991828425e+02 7.106081559720657e+01 3.900476102503054e+02 4.297161529048979e+02 3 2.829885923647302e+02 -2.767806781033229e+02 5.223342094943639e+01 -2.732525156618249e+01 4 6.323382084524278e+02 2.057198625061163e+02 -4.422810311997417e+02 -4.023909013387152e+02 - ME 2.768931482482754e-04 + ME 1.255922738422332e-03 Event 16 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -132,7 +132,7 @@ Event 16 Batch 0 2 7.471577506095512e+02 1.666056475215676e+02 -5.784682380714994e+02 -4.425627187781379e+02 3 6.589296733908160e+02 -1.235441202519038e+02 5.251239647671507e+02 3.783780998595698e+02 4 9.391257599963087e+01 -4.306152726966400e+01 5.334427330434855e+01 6.418461891856485e+01 - ME 3.619360847906487e-05 + ME 5.526726502577864e-05 Event 17 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -140,7 +140,7 @@ Event 17 Batch 0 2 3.567490993131759e+02 3.856364495163717e+01 -1.708845728849435e+02 -3.107752047682324e+02 3 6.453207560475681e+02 4.468356462873772e+02 2.282834847349605e+02 4.057874246326636e+02 4 4.979301446392561e+02 -4.853992912390142e+02 -5.739891185001719e+01 -9.501221986443127e+01 - ME 3.400819398697452e-05 + ME 1.327369996555111e-04 Event 18 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -148,7 +148,7 @@ Event 18 Batch 0 2 4.856701782481425e+02 2.509110753153842e+02 -3.498523763974107e+02 -2.247720379690150e+02 3 3.014847498930008e+02 -1.059425909901355e+02 -2.435847754696140e+02 -1.426032222348426e+02 4 7.128450718588564e+02 -1.449684843252488e+02 5.934371518670247e+02 3.673752602038576e+02 - ME 1.704840743724005e-04 + ME 1.018512933050835e-03 Event 19 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -156,7 +156,7 @@ Event 19 Batch 0 2 5.848213503304410e+02 -3.141116763848333e+02 -1.950442390378232e+02 4.531088295091878e+02 3 5.769300027107226e+02 5.020221748138873e+02 2.252239828724832e+02 -1.734823378963534e+02 4 3.382486469588368e+02 -1.879104984290540e+02 -3.017974383465995e+01 -2.796264916128346e+02 - ME 1.566312636528492e-04 + ME 4.267017342507976e-03 Event 20 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -164,7 +164,7 @@ Event 20 Batch 0 2 5.550938429889906e+02 -4.478597170519693e+02 -1.958065402362923e+02 -2.630791652090858e+02 3 5.585686897587655e+02 3.351111310173187e+02 -1.360174455686903e+02 4.256744830831253e+02 4 3.863374672522434e+02 1.127485860346507e+02 3.318239858049826e+02 -1.625953178740396e+02 - ME 4.443882992804106e-05 + ME 2.768271682113988e-04 Event 21 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -172,7 +172,7 @@ Event 21 Batch 0 2 6.296556563991993e+02 -3.477135312394776e+02 -1.376147989324512e+02 -5.065804111325866e+02 3 3.137568007204202e+02 1.080474571851863e+02 -2.382188236683311e+02 1.732653140250679e+02 4 5.565875428803801e+02 2.396660740542913e+02 3.758336226007823e+02 3.333150971075189e+02 - ME 2.195742323347977e-05 + ME 5.519034669639832e-05 Event 22 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -180,7 +180,7 @@ Event 22 Batch 0 2 5.583338925767162e+02 2.471586228668332e+02 -1.597599499756147e+02 -4.744745610949311e+02 3 5.378723432497920e+02 9.149532098241385e+00 4.314513680009925e+02 3.210493120152684e+02 4 4.037937641734921e+02 -2.563081549650745e+02 -2.716914180253778e+02 1.534252490796627e+02 - ME 1.393143104564022e-05 + ME 3.705224437539572e-05 Event 23 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -188,7 +188,7 @@ Event 23 Batch 0 2 6.057340011976822e+02 6.848115528115159e+01 -5.207204912425279e+02 -3.017849923015605e+02 3 6.884459352783615e+02 -2.949639632364767e+01 6.680977958792448e+02 1.635026102131439e+02 4 2.058200635239559e+02 -3.898475895750391e+01 -1.473773046367171e+02 1.382823820884168e+02 - ME 1.074117284514867e-05 + ME 2.946248744974782e-05 Event 24 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -196,7 +196,7 @@ Event 24 Batch 0 2 4.702316790647315e+02 -1.210575128627593e+02 4.313728504035306e+02 -1.427598490831810e+02 3 7.180482366151732e+02 1.040047389253588e+02 -7.104588047260974e+02 4.956931953573291e+00 4 3.117200843200960e+02 1.705277393740069e+01 2.790859543225674e+02 1.378029171296075e+02 - ME 5.213387311993420e-06 + ME 3.146557994448562e-05 Event 25 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -204,7 +204,7 @@ Event 25 Batch 0 2 6.261365010744016e+02 -5.354018140499276e+02 -2.095559720530078e+02 2.479477970595020e+02 3 5.483958991041942e+02 5.199465180092641e+02 -9.843995208133505e+01 -1.438862620216537e+02 4 3.254675998214045e+02 1.545529604066345e+01 3.079959241343431e+02 -1.040615350378483e+02 - ME 1.695323153210731e-05 + ME 1.657640191611339e-04 Event 26 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -212,7 +212,7 @@ Event 26 Batch 0 2 4.635816356180677e+02 1.904702824079147e+02 -2.351549941335565e+02 -3.511853259118595e+02 3 3.686385821486527e+02 -2.712527815845713e+02 -6.015354190959191e+01 -2.422764621809819e+02 4 6.677797822332798e+02 8.078249917665664e+01 2.953085360431485e+02 5.934617880928415e+02 - ME 1.052251904460155e-04 + ME 3.250975879010065e-04 Event 27 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -220,7 +220,7 @@ Event 27 Batch 0 2 2.851713673150520e+02 1.387976072955998e+02 1.520424011317634e+02 -1.973348453858079e+02 3 6.747356481771329e+02 2.426633222154767e+02 -4.300238522839811e+02 4.598501858640580e+02 4 5.400929845078149e+02 -3.814609295110765e+02 2.779814511522176e+02 -2.625153404782502e+02 - ME 7.957109124083736e-05 + ME 4.155279516527712e-04 Event 28 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -228,7 +228,7 @@ Event 28 Batch 0 2 1.977804200471008e+02 -1.803202618401224e+02 -8.082809162516925e+01 -8.277519444290659e+00 3 7.197523834069627e+02 3.152541965091956e+02 6.467033971658861e+02 -2.080867841663842e+01 4 5.824671965459364e+02 -1.349339346690732e+02 -5.658753055407169e+02 2.908619786092899e+01 - ME 1.748013159755222e-05 + ME 1.172809031809504e-04 Event 29 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -236,7 +236,7 @@ Event 29 Batch 0 2 6.123364628491765e+02 -3.746492624245139e+02 3.785128791537567e+02 -3.021950929683376e+02 3 4.056577755659300e+02 1.796205570313495e+00 -8.781658530568643e+01 3.960344074293251e+02 4 4.820057615848937e+02 3.728530568542006e+02 -2.906962938480702e+02 -9.383931446098750e+01 - ME 3.085570985177973e-04 + ME 5.496242925842306e-04 Event 30 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -244,7 +244,7 @@ Event 30 Batch 0 2 7.349194950356053e+02 7.241679607953656e+02 1.425637322816703e+01 1.244354634469208e+02 3 7.321421454671275e+02 -7.253765693071590e+02 -2.895970851972107e+01 -9.498573130653318e+01 4 3.293835949726734e+01 1.208608511793152e+00 1.470333529155409e+01 -2.944973214038765e+01 - ME 3.267107835672361e-04 + ME 5.147061682527938e-02 Event 31 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -252,7 +252,7 @@ Event 31 Batch 0 2 1.718338270585457e+02 -1.344914872264095e+02 -1.021614404532311e+02 3.165350011824393e+01 3 6.313115253715935e+02 -2.849940593920691e+02 -7.916450257599642e+01 -5.577325610990745e+02 4 6.968546475698608e+02 4.194855466184786e+02 1.813259430292275e+02 5.260790609808306e+02 - ME 1.685680846028125e-04 + ME 4.645345268703414e-04 Event 32 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -260,7 +260,7 @@ Event 32 Batch 0 2 7.235176898898732e+02 -4.762113006241282e+02 -2.880822916693121e+01 5.439400065022983e+02 3 6.603902828461299e+02 4.672103814637360e+02 1.031050210016798e+02 -4.551913221650266e+02 4 1.160920272639969e+02 9.000919160392018e+00 -7.429679183474862e+01 -8.874868433727177e+01 - ME 2.173072900368875e-04 + ME 4.476006843186700e-03 Event 33 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -268,7 +268,7 @@ Event 33 Batch 0 2 4.786737271642286e+02 2.009638309376703e+02 4.090184839380260e+02 1.464443769121513e+02 3 3.795793219608408e+02 -6.057523839522271e+00 -8.244277697544294e+01 3.704685635647950e+02 4 6.417469508749314e+02 -1.949063070981495e+02 -3.265757069625828e+02 -5.169129404769461e+02 - ME 3.322437827682699e-03 + ME 1.351709676586880e-02 Event 34 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -276,7 +276,7 @@ Event 34 Batch 0 2 6.621583515140109e+02 -5.051303032557109e+02 -1.429543729176959e+02 4.035605363216953e+02 3 3.008522892707525e+02 8.677543723835062e+01 2.726747894692539e+02 -9.290092916351111e+01 4 5.369893592152367e+02 4.183548660173603e+02 -1.297204165515579e+02 -3.106596071581844e+02 - ME 9.294666462955388e-05 + ME 6.460854093057828e-04 Event 35 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -284,7 +284,7 @@ Event 35 Batch 0 2 6.158114977149372e+02 2.502256147979830e+02 4.233348779616202e+00 5.626659943296695e+02 3 1.476397433483021e+02 -1.670550278282843e+01 -6.055370982200890e+01 1.336101351676488e+02 4 7.365487589367605e+02 -2.335201120151546e+02 5.632036104239269e+01 -6.962761294973184e+02 - ME 5.450893768264864e-01 + ME 2.101231899117793e+00 Event 36 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -292,7 +292,7 @@ Event 36 Batch 0 2 7.182456511154913e+02 -7.463771462544163e+01 -6.667773110518942e+02 2.563475070450518e+02 3 4.860008755751825e+02 -7.840660561780868e+01 4.141081959217036e+02 -2.419992919944378e+02 4 2.957534733093268e+02 1.530443202432501e+02 2.526691151301903e+02 -1.434821505061448e+01 - ME 1.793136635525090e-05 + ME 9.644531209480271e-05 Event 37 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -300,7 +300,7 @@ Event 37 Batch 0 2 5.672182018814327e+02 -2.031706828392718e+00 -5.267408190306547e+02 2.104197478372323e+02 3 4.664069288608281e+02 3.712365792892206e+02 2.604523782658950e+02 -1.090109358856581e+02 4 4.663748692577387e+02 -3.692048724608279e+02 2.662884407647597e+02 -1.014088119515743e+02 - ME 1.885829354904198e-05 + ME 1.216876552012178e-04 Event 38 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -308,7 +308,7 @@ Event 38 Batch 0 2 5.068057345787187e+02 4.883513201966852e+02 -7.570036138649985e+01 -1.124032737511800e+02 3 3.871140338254017e+02 -1.153787089711745e+02 -3.599073977747533e+02 -8.373585688177315e+01 4 6.060802315958797e+02 -3.729726112255107e+02 4.356077591612532e+02 1.961391306329531e+02 - ME 2.004468492837133e-05 + ME 1.006736553113524e-04 Event 39 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -316,7 +316,7 @@ Event 39 Batch 0 2 4.960337392567769e+02 -3.669089247616476e+02 2.651961920161227e+02 -2.027271347192069e+02 3 2.837821967046824e+02 -2.822567153069604e+02 -2.935613327724534e+01 -1.303560381865560e+00 4 7.201840640385411e+02 6.491656400686079e+02 -2.358400587388775e+02 2.040306951010725e+02 - ME 2.738639406673165e-04 + ME 1.372807525012575e-03 Event 40 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -324,7 +324,7 @@ Event 40 Batch 0 2 3.080730228651936e+02 -3.065830270999447e+02 -2.484308296331460e+01 1.728167064871203e+01 3 6.842346640746094e+02 4.630487823766367e+02 8.554554725666550e+01 -4.964321303112498e+02 4 5.076923130601962e+02 -1.564657552766919e+02 -6.070246429335075e+01 4.791504596625378e+02 - ME 4.316353181637933e-05 + ME 4.192363154074847e-05 Event 41 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -332,7 +332,7 @@ Event 41 Batch 0 2 1.602650851118221e+02 -1.258781096038287e+02 -9.817642232798531e+01 1.417706342452912e+01 3 7.146392966623014e+02 6.799675591776853e+02 -1.019163870176435e+02 1.948499239342933e+02 4 6.250956182258764e+02 -5.540894495738563e+02 2.000928093456288e+02 -2.090269873588226e+02 - ME 6.118266190948034e-05 + ME 4.523507186168379e-04 Event 42 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -340,7 +340,7 @@ Event 42 Batch 0 2 1.687893235969910e+02 1.289401357197518e+02 4.788693514682045e+01 9.783209393213438e+01 3 7.042017295435162e+02 -1.022058447296739e+02 -6.640064324330017e+02 -2.110675220936915e+02 4 6.270089468594927e+02 -2.673429099007782e+01 6.161194972861812e+02 1.132354281615572e+02 - ME 4.091574289077424e-05 + ME 1.686356189272381e-04 Event 43 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -348,7 +348,7 @@ Event 43 Batch 0 2 4.729783670130408e+02 -7.983817933050123e+01 9.052957805204315e+01 4.573169538528310e+02 3 5.638402597824536e+02 4.785250044669658e+02 7.435095949863268e+01 -2.887933404236804e+02 4 4.631813732045056e+02 -3.986868251364646e+02 -1.648805375506758e+02 -1.685236134291506e+02 - ME 2.654067897204875e-04 + ME 5.938757690519573e-04 Event 44 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -356,7 +356,7 @@ Event 44 Batch 0 2 1.774791104122977e+02 -1.952605982635784e+01 6.371003613266313e+01 1.644949814321787e+02 3 7.194816205691247e+02 -3.678871192485065e+02 2.644831693887214e+01 -6.177486190667772e+02 4 6.030392690185777e+02 3.874131790748646e+02 -9.015835307153536e+01 4.532536376345985e+02 - ME 1.390282437939369e-04 + ME 2.092333697371024e-04 Event 45 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -364,7 +364,7 @@ Event 45 Batch 0 2 7.477488480180839e+02 -3.787655987618923e+02 1.634662296474455e+02 6.236535517992064e+02 3 7.458113398274099e+02 3.819163358711198e+02 -1.661042992235261e+02 -6.186952632673017e+02 4 6.439812154506046e+00 -3.150737109227506e+00 2.638069576080606e+00 -4.958288531904773e+00 - ME 4.591622113024210e-03 + ME 9.377954359926730e-02 Event 46 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -372,7 +372,7 @@ Event 46 Batch 0 2 3.243146757688279e+02 -4.392587631431587e+00 -2.496903827548322e+02 -2.069188895501946e+02 3 5.341608950426614e+02 -2.704482657861201e+02 2.711825143656835e+02 -3.723515022507137e+02 4 6.415244291885106e+02 2.748408534175518e+02 -2.149213161085120e+01 5.792703918009084e+02 - ME 7.845213441237594e-05 + ME 1.879047912263320e-04 Event 47 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -380,7 +380,7 @@ Event 47 Batch 0 2 6.742198761450968e+02 -3.282965096491567e+02 5.301803926793563e+02 -2.563251730900704e+02 3 6.484148720042493e+02 3.527030795571956e+02 -3.975273148506379e+02 3.715029176935211e+02 4 1.773652518506536e+02 -2.440656990803885e+01 -1.326530778287185e+02 -1.151777446034508e+02 - ME 5.254395938575492e-05 + ME 1.136665455996279e-03 Event 48 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -388,7 +388,7 @@ Event 48 Batch 0 2 7.321401810535270e+02 -1.843482647928687e+02 4.412348098999295e+02 5.543976952635381e+02 3 7.293058265076229e+02 2.182722651304250e+02 -4.435200216702997e+02 -5.362221528717154e+02 4 3.855399243885009e+01 -3.392400033755636e+01 2.285211770370227e+00 -1.817554239182278e+01 - ME 2.330290263553363e-04 + ME 2.278442596973106e-03 Event 49 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -396,7 +396,7 @@ Event 49 Batch 0 2 3.511117284856090e+02 -3.272266866652174e+02 5.199533974843238e+01 1.161835877338140e+02 3 7.326526490901410e+02 6.615045961628415e+02 -2.993354007364775e+02 -9.792799058578566e+01 4 4.162356224242500e+02 -3.342779094976241e+02 2.473400609880451e+02 -1.825559714802838e+01 - ME 7.863589115869630e-06 + ME 8.806759903737244e-05 Event 50 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -404,7 +404,7 @@ Event 50 Batch 0 2 7.322170903075255e+02 2.740692406080844e+02 1.952596610981929e+01 -6.787095515302592e+02 3 3.078559130669522e+02 -1.663333363406682e+02 8.625456119089935e+01 2.442716420418760e+02 4 4.599269966255216e+02 -1.077359042674159e+02 -1.057805273007185e+02 4.344379094883832e+02 - ME 6.765758192049922e-05 + ME 7.579426018596712e-05 Event 51 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -412,7 +412,7 @@ Event 51 Batch 0 2 3.473696038265160e+02 -2.922314643158454e+02 -6.759614889845234e+01 -1.752060888796554e+02 3 5.389399151999496e+02 -2.449040872454050e+02 9.346474502284556e+01 4.708954891311219e+02 4 6.136904809735339e+02 5.371355515612503e+02 -2.586859612439322e+01 -2.956894002514666e+02 - ME 2.035652280642710e-04 + ME 4.687828430739845e-04 Event 52 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -420,7 +420,7 @@ Event 52 Batch 0 2 6.818614816439094e+02 5.970116833066725e+02 3.013730734325877e+02 1.329902280423528e+02 3 2.108623144448950e+02 -4.198344769951654e+00 -1.698802183673395e+02 -1.248439063859965e+02 4 6.072762039111957e+02 -5.928133385367207e+02 -1.314928550652483e+02 -8.146321656356344e+00 - ME 4.047005152694340e-05 + ME 1.636869658416981e-04 Event 53 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -428,7 +428,7 @@ Event 53 Batch 0 2 5.157714002491656e+02 -5.140718537651751e+02 -4.182413977701254e+01 1.003899065692042e+00 3 5.148181840855221e+02 2.868792199999327e+02 1.974924151010656e+02 3.791237552236646e+02 4 4.694104156653124e+02 2.271926337652422e+02 -1.556682753240530e+02 -3.801276542893567e+02 - ME 1.547751010871262e-04 + ME 3.182294022992135e-03 Event 54 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -436,7 +436,7 @@ Event 54 Batch 0 2 6.433410767101752e+02 2.586883950027282e+02 -5.809813083922761e+02 9.710187728524583e+01 3 6.928799734080563e+02 -1.579832568796111e+02 6.405510983559769e+02 -2.117031848853746e+02 4 1.637789498817686e+02 -1.007051381231171e+02 -5.956978996370073e+01 1.146013076001288e+02 - ME 1.302720215079095e-05 + ME 3.280140142776471e-05 Event 55 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -444,7 +444,7 @@ Event 55 Batch 0 2 7.193759752058201e+02 -3.536444481659258e+02 -7.212523476050659e+01 -6.222823703878202e+02 3 5.307053661742267e+02 2.409461639849982e+02 1.900944302490854e+02 4.329633233142391e+02 4 2.499186586199529e+02 1.126982841809279e+02 -1.179691954885788e+02 1.893190470735813e+02 - ME 3.087450123310173e-05 + ME 3.939174164528502e-05 Event 56 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -452,7 +452,7 @@ Event 56 Batch 0 2 3.858864959547013e+02 1.815174721437793e+02 3.218581876578407e+02 -1.112074732396182e+02 3 4.484505297447187e+02 -3.244105157450006e+02 2.934585578803474e+02 -9.873079412811623e+01 4 6.656629743005793e+02 1.428930436012212e+02 -6.153167455381879e+02 2.099382673677345e+02 - ME 4.275995533811995e-05 + ME 2.326138625268126e-04 Event 57 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -460,7 +460,7 @@ Event 57 Batch 0 2 5.284589752749192e+02 3.868194647882293e+02 -1.709996888155517e+02 3.168575336559793e+02 3 6.299868555278971e+02 -1.587414880613579e+02 2.327134172236622e+02 -5.634971548731005e+02 4 3.415541691971835e+02 -2.280779767268714e+02 -6.171372840811043e+01 2.466396212171210e+02 - ME 2.211478424702745e-05 + ME 3.474853710074164e-05 Event 58 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -468,7 +468,7 @@ Event 58 Batch 0 2 6.172037319760957e+02 -2.246119436411400e+02 -2.286037628748728e+01 5.744278237820342e+02 3 5.117934503257735e+02 1.262762853074207e+02 3.215736628881853e+02 -3.775939815489577e+02 4 3.710028176981306e+02 9.833565833371921e+01 -2.987132866006979e+02 -1.968338422330765e+02 - ME 1.857727050583390e-04 + ME 6.183305374210038e-04 Event 59 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -476,7 +476,7 @@ Event 59 Batch 0 2 7.388935626701858e+02 -3.912134623809441e+02 -5.457789630286015e+02 3.082872805076099e+02 3 1.936051438730608e+02 1.561492575196544e+02 8.304673385628061e+01 -7.876294246644987e+01 4 5.675012934567535e+02 2.350642048612896e+02 4.627322291723209e+02 -2.295243380411600e+02 - ME 6.745345781245190e-05 + ME 4.116991424436793e-04 Event 60 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -484,7 +484,7 @@ Event 60 Batch 0 2 7.258141426633659e+02 -5.584991156701968e+02 1.635894950857984e+02 4.337319270970709e+02 3 2.789580074371136e+02 2.331554478032953e+02 6.512410160032128e+01 -1.386180308029247e+02 4 4.952278498995201e+02 3.253436678669015e+02 -2.287135966861195e+02 -2.951138962941461e+02 - ME 9.170244877267536e-05 + ME 7.295672680059989e-04 Event 61 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -492,15 +492,15 @@ Event 61 Batch 0 2 5.906141202026897e+02 4.485275282318680e+02 -2.043613424290570e+02 3.253990429020988e+02 3 4.163572165237975e+02 -4.021600557528675e+02 -4.112755461437413e+01 9.964509802161204e+01 4 4.930286632735124e+02 -4.636747247900051e+01 2.454888970434311e+02 -4.250441409237108e+02 - ME 1.836685601489136e-04 + ME 5.845307122272604e-03 Event 62 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 1 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 -7.500000000000000e+02 2 7.346180891175762e+02 3.693463141798367e+02 7.549194961263061e+01 -6.305140780380819e+02 3 4.420621433230785e+02 -2.806743363126464e+02 3.467380983154045e+01 3.397625382625571e+02 - 4 3.233197675593452e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755248e+02 - ME 3.490896135533686e-05 + 4 3.233197675593453e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755249e+02 + ME 3.963631774242112e-05 Event 63 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -508,7 +508,7 @@ Event 63 Batch 0 2 6.451039732729313e+02 -2.415045377667665e+02 1.990362537024482e+02 -5.641092662620230e+02 3 3.260870385294104e+02 2.061141051805976e+02 -2.496695602716584e+02 3.892098426606745e+01 4 5.288089881976584e+02 3.539043258616898e+01 5.063330656921013e+01 5.251882819959555e+02 - ME 4.428689394331114e-04 + ME 4.832224458906289e-04 Event 64 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -516,7 +516,7 @@ Event 64 Batch 0 2 5.275973380665291e+02 -6.064553482667328e+01 4.309976929667101e+02 -2.981980196075213e+02 3 5.799838776791826e+02 3.279821268626862e+02 -1.824214634122377e+02 4.421893627315650e+02 4 3.924187842542880e+02 -2.673365920360130e+02 -2.485762295544724e+02 -1.439913431240437e+02 - ME 4.205989960223865e-05 + ME 2.175617604507715e-04 Event 65 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -524,7 +524,7 @@ Event 65 Batch 0 2 6.480172869826541e+02 2.720879118036237e+02 -5.153900904044360e+02 -2.833154199679406e+02 3 7.075023253568394e+02 -3.440299289242928e+02 4.709796137500282e+02 4.004761563708322e+02 4 1.444803876605064e+02 7.194201712066916e+01 4.441047665440794e+01 -1.171607364028916e+02 - ME 1.103463366798231e-04 + ME 4.989956280474397e-03 Event 66 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -532,7 +532,7 @@ Event 66 Batch 0 2 5.472978185025795e+02 4.857452785131266e+02 -2.223654169683454e+02 -1.189119332799752e+02 3 3.203062148499983e+02 1.169702135976477e+02 2.922172461416276e+02 -5.935588816501102e+01 4 6.323959666474225e+02 -6.027154921107744e+02 -6.985182917328234e+01 1.782678214449862e+02 - ME 2.913920636000223e-05 + ME 1.346850069104626e-04 Event 67 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -540,7 +540,7 @@ Event 67 Batch 0 2 4.264671493042950e+02 1.195959046886511e+02 -2.647539231733031e+02 3.122121220929446e+02 3 5.059969655247565e+02 3.777175441887567e+02 -7.608313561896731e+00 -3.366073372596325e+02 4 5.675358851709483e+02 -4.973134488774080e+02 2.723622367352000e+02 2.439521516668857e+01 - ME 4.009347519102052e-05 + ME 9.763221977220593e-05 Event 68 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -548,7 +548,7 @@ Event 68 Batch 0 2 5.996105691520872e+02 -3.814725562071957e+02 -3.417794545715573e+02 3.117664637712124e+02 3 2.164196744806214e+02 1.292759463548889e+02 -1.184749651041615e+02 1.268419798013013e+02 4 6.839697563672917e+02 2.521966098523068e+02 4.602544196757188e+02 -4.386084435725137e+02 - ME 6.175473672610461e-04 + ME 2.936083529685707e-03 Event 69 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -556,7 +556,7 @@ Event 69 Batch 0 2 4.950546755511076e+02 -1.873718558932053e+02 -4.578972175289678e+02 -1.735101101888631e+01 3 4.768584394819691e+02 -1.830244097668608e+02 2.985566003539791e+02 -3.236664843936508e+02 4 5.280868849669230e+02 3.703962656600661e+02 1.593406171749887e+02 3.410174954125370e+02 - ME 1.367292435278724e-05 + ME 5.234212626720279e-05 Event 70 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -564,7 +564,7 @@ Event 70 Batch 0 2 6.918343395272258e+02 6.895733556028865e+02 -5.391072441382606e+01 -1.473005040127906e+01 3 2.169590284692678e+02 -1.127375202028747e+02 1.807969800614662e+02 4.091361110301506e+01 4 5.912066320035063e+02 -5.768358354000119e+02 -1.268862556476402e+02 -2.618356070173603e+01 - ME 3.526540789264872e-05 + ME 1.591740981760110e-04 Event 71 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -572,7 +572,7 @@ Event 71 Batch 0 2 5.156371334918733e+02 1.547202099034306e+02 -4.807172487652236e+02 1.041836686949964e+02 3 3.718518305526428e+02 -8.969821893462726e+01 -7.521366892975188e+01 -3.529460545344468e+02 4 6.125110359554843e+02 -6.502199096880338e+01 5.559309176949756e+02 2.487623858394504e+02 - ME 2.860782472746935e-05 + ME 1.125100552069616e-04 Event 72 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -580,7 +580,7 @@ Event 72 Batch 0 2 2.110577464974889e+02 5.009520239746097e+01 -1.453533690489527e+02 -1.445968227848547e+02 3 7.317124633441161e+02 -4.429659627226336e+02 5.264774879404380e+02 2.490095170354977e+02 4 5.572297901583943e+02 3.928707603251725e+02 -3.811241188914850e+02 -1.044126942506430e+02 - ME 2.666441446531882e-05 + ME 1.823320413479066e-04 Event 73 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -588,7 +588,7 @@ Event 73 Batch 0 2 3.932257450488246e+02 3.105005764664288e+01 -2.932679039283983e+02 2.601082794045340e+02 3 5.658879124646472e+02 3.645905401293642e+02 4.244364556305355e+02 8.459646951004230e+01 4 5.408863424865281e+02 -3.956405977760074e+02 -1.311685517021372e+02 -3.447047489145762e+02 - ME 7.825486685913998e-05 + ME 8.953763196089171e-04 Event 74 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -596,7 +596,7 @@ Event 74 Batch 0 2 1.374854102925440e+02 7.785209805930555e+01 4.289805712042688e+01 1.048858692406466e+02 3 6.381281910764947e+02 -1.004137270491618e+02 -1.591026937267357e+02 6.097630724433484e+02 4 7.243863986309617e+02 2.256162898985645e+01 1.162046366063089e+02 -7.146489416839951e+02 - ME 1.919068868336380e+00 + ME 1.395531292378326e+01 Event 75 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -604,7 +604,7 @@ Event 75 Batch 0 2 5.936883054156938e+02 -3.438525101293572e+00 -2.706855443967301e+02 5.283780053968293e+02 3 5.912298912592892e+02 1.109657062166288e+02 4.832067437414102e+02 -3.221034603433170e+02 4 3.150818033250173e+02 -1.075271811153352e+02 -2.125211993446803e+02 -2.062745450535123e+02 - ME 1.642862842910461e-04 + ME 1.379908325625592e-03 Event 76 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -612,7 +612,7 @@ Event 76 Batch 0 2 6.619486867997672e+02 2.801967015359571e+01 2.136411519593737e+02 6.258980909300584e+02 3 1.201252731414031e+02 2.274423842261747e+01 -8.754996679960182e+01 7.904292618103446e+01 4 7.179260400588295e+02 -5.076390857621322e+01 -1.260911851597719e+02 -7.049410171110928e+02 - ME 7.362202483972824e-01 + ME 5.870483941147637e+00 Event 77 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -620,7 +620,7 @@ Event 77 Batch 0 2 7.456676259451606e+02 -7.346624001550109e+02 6.511229493320701e+01 -1.097804865615983e+02 3 1.284204120828029e+02 1.251494694834492e+02 2.867183268690428e+01 2.708973588335753e+00 4 6.259119619720373e+02 6.095129306715618e+02 -9.378412762011118e+01 1.070715129732624e+02 - ME 4.400761364703354e-05 + ME 1.662775178233579e-04 Event 78 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -628,7 +628,7 @@ Event 78 Batch 0 2 7.040158920877628e+02 6.911264613612161e+02 -6.659640240533211e+01 -1.163937709034254e+02 3 5.185438503615327e+02 -4.976050220224222e+02 -1.270913363611937e+02 7.158742227342900e+01 4 2.774402575507044e+02 -1.935214393387939e+02 1.936877387665258e+02 4.480634862999637e+01 - ME 9.352750539306009e-06 + ME 5.328004946641866e-05 Event 79 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -636,7 +636,7 @@ Event 79 Batch 0 2 6.777589592768838e+02 1.742725197144059e+02 -4.776543849198212e+01 6.532264221831092e+02 3 5.725002211294488e+02 -1.786302554544233e+02 -1.627852110918317e+02 -5.189881598643107e+02 4 2.497408195936665e+02 4.357735740017474e+00 2.105506495838138e+02 -1.342382623187985e+02 - ME 3.598558866345749e-04 + ME 9.179311580246363e-04 Event 80 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -644,7 +644,7 @@ Event 80 Batch 0 2 6.240819586861880e+02 4.679310297228965e+02 -4.118464023828053e+02 -3.002304821964348e+01 3 6.688675489057649e+02 -5.494372353172420e+02 3.251429131208653e+02 1.994607943266771e+02 4 2.070504924080468e+02 8.150620559434545e+01 8.670348926194001e+01 -1.694377461070337e+02 - ME 5.382869847396148e-05 + ME 3.575286400583300e-03 Event 81 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -652,7 +652,7 @@ Event 81 Batch 0 2 5.198056748722776e+02 1.034797897616987e+02 -2.885605608993972e+02 4.197888462474007e+02 3 5.672098642055398e+02 -4.160331805498524e+02 2.087659545613757e+01 -3.849773895903518e+02 4 4.129844609221831e+02 3.125533907881537e+02 2.676839654432596e+02 -3.481145665704891e+01 - ME 3.612255741613163e-05 + ME 1.018936778946332e-04 Event 82 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -660,7 +660,7 @@ Event 82 Batch 0 2 2.057598609140514e+02 6.385349666266659e+01 -2.765433460911293e+01 1.936364870179372e+02 3 6.235840147705873e+02 4.654039114453895e+02 -3.828889383639962e+02 -1.601633028106901e+02 4 6.706561243153629e+02 -5.292574081080552e+02 4.105432729731107e+02 -3.347318420724690e+01 - ME 3.172622561805068e-04 + ME 6.930850923220120e-04 Event 83 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -668,7 +668,7 @@ Event 83 Batch 0 2 6.583322583736492e+02 1.865539504254553e+02 -1.926584839569474e+02 6.012334775737429e+02 3 3.620902826842561e+02 -3.107067244571256e+02 -1.177956631152976e+01 -1.855584705935048e+02 4 4.795774589420946e+02 1.241527740316703e+02 2.044380502684771e+02 -4.156750069802382e+02 - ME 6.756528802944365e-04 + ME 8.385116111585099e-03 Event 84 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -676,7 +676,7 @@ Event 84 Batch 0 2 4.849329564663161e+02 -2.622178945286150e+02 4.068620488841210e+02 -2.941124332559817e+01 3 4.737588937677760e+02 6.014532316188546e+01 -1.333934272225749e+02 4.505954095412368e+02 4 5.413081497659077e+02 2.020725713667296e+02 -2.734686216615461e+02 -4.211841662156386e+02 - ME 1.017468409980153e-03 + ME 5.162990427398554e-03 Event 85 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -684,7 +684,7 @@ Event 85 Batch 0 2 7.085742632080854e+02 -2.174614026040270e+02 -5.283468657604088e+02 -4.190914152061853e+02 3 5.315764222715953e+02 8.528530557199829e+00 3.820092234108129e+02 3.695533927738615e+02 4 2.598493145203187e+02 2.089328720468272e+02 1.463376423495959e+02 4.953802243232388e+01 - ME 1.894143727100354e-05 + ME 6.335517668355978e-05 Event 86 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -692,7 +692,7 @@ Event 86 Batch 0 2 1.724500140939190e+02 1.231518677708316e+02 -1.121928207497684e+01 1.201946443701656e+02 3 7.028475062724231e+02 -6.467096040851287e+01 -4.553168759141600e+02 -5.315061866629339e+02 4 6.247024796336580e+02 -5.848090736231883e+01 4.665361579891369e+02 4.113115422927684e+02 - ME 5.311384036847167e-05 + ME 1.165531323127631e-04 Event 87 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -700,7 +700,7 @@ Event 87 Batch 0 2 1.942099203196796e+02 -7.751148196958454e+01 -1.356691819650310e+02 -1.153400900745028e+02 3 7.314670447251594e+02 1.724617634710876e+02 7.020747158546045e+02 1.113196793791551e+02 4 5.743230349551606e+02 -9.495028150150301e+01 -5.664055338895735e+02 4.020410695347637e+00 - ME 1.874087134673149e-05 + ME 1.237609879052555e-04 Event 88 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -708,7 +708,7 @@ Event 88 Batch 0 2 6.382497024023744e+02 2.632142028760094e+02 -5.613974181649784e+02 1.513733956108635e+02 3 3.997044228265544e+02 -5.264940326118349e+01 3.435187961344461e+02 1.974500004195773e+02 4 4.620458747710724e+02 -2.105647996148253e+02 2.178786220305324e+02 -3.488233960304407e+02 - ME 9.699609186666195e-05 + ME 1.863821317258467e-03 Event 89 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -716,7 +716,7 @@ Event 89 Batch 0 2 1.419006640093282e+02 -8.677155154367878e+01 6.457545216231642e+01 -9.185046144153740e+01 3 7.131224514048055e+02 5.460003286026870e+02 -4.154556538506974e+02 -1.944836022569670e+02 4 6.449768845858670e+02 -4.592287770590082e+02 3.508802016883808e+02 2.863340636985044e+02 - ME 2.974199953519439e-05 + ME 1.136115495374629e-04 Event 90 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -724,7 +724,7 @@ Event 90 Batch 0 2 5.730615760623938e+02 -6.017783679015001e+01 -5.202921970507185e+02 -2.325386583054727e+02 3 5.389913703864468e+02 -6.302812531165206e+01 2.446311215742109e+02 4.761247390423042e+02 4 3.879470535511588e+02 1.232059621018019e+02 2.756610754765076e+02 -2.435860807368315e+02 - ME 1.667772733247344e-04 + ME 1.094721025518881e-03 Event 91 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -732,7 +732,7 @@ Event 91 Batch 0 2 4.546745139784350e+02 -1.470341619195494e+02 -1.726383255301703e+02 -3.940886669878754e+02 3 5.110976540119647e+02 -2.482119727393537e+02 -1.865817698532448e+02 4.059542728975803e+02 4 5.342278320096005e+02 3.952461346589030e+02 3.592200953834151e+02 -1.186560590970480e+01 - ME 4.420313882846059e-05 + ME 8.789722587847313e-05 Event 92 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -740,7 +740,7 @@ Event 92 Batch 0 2 6.683728375977241e+02 -1.148152650923627e+02 3.458291789782991e+02 5.603051703379153e+02 3 2.872567998557088e+02 1.635098024620329e+02 7.847331657016402e+01 -2.227620976482501e+02 4 5.443703625465666e+02 -4.869453736967034e+01 -4.243024955484631e+02 -3.375430726896653e+02 - ME 2.265252332392545e-04 + ME 8.270083568815311e-04 Event 93 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -748,7 +748,7 @@ Event 93 Batch 0 2 5.666948073002088e+02 5.408074886689032e+01 5.639942928586390e+02 -1.134525653745258e+01 3 6.168025492529713e+02 2.439040545997395e+02 -5.541969602989467e+02 1.175666879272316e+02 4 3.165026434468199e+02 -2.979848034666298e+02 -9.797332559692304e+00 -1.062214313897791e+02 - ME 1.251778043268437e-05 + ME 1.664960428447917e-04 Event 94 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -756,7 +756,7 @@ Event 94 Batch 0 2 4.964349376711385e+02 8.445930034540567e+01 -2.409007074648561e+02 -4.257712097695705e+02 3 5.660980232871289e+02 1.373833465612049e+02 5.210669225216058e+02 1.734417778711397e+02 4 4.374670390417324e+02 -2.218426469066104e+02 -2.801662150567495e+02 2.523294318984307e+02 - ME 1.007141026120618e-05 + ME 3.431641292834382e-05 Event 95 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -764,7 +764,7 @@ Event 95 Batch 0 2 7.117074025057361e+02 -3.227984571262278e+02 4.276971164854593e+02 -4.684055501468919e+02 3 1.264078228725325e+02 8.675876182178401e+01 5.074873328843479e+01 7.665781760618943e+01 4 6.618847746217315e+02 2.360396953044439e+02 -4.784458497738940e+02 3.917477325407025e+02 - ME 8.653822330208906e-05 + ME 2.121249861094822e-04 Event 96 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -772,7 +772,7 @@ Event 96 Batch 0 2 7.329769441659936e+02 -9.642859092211874e+01 6.903981466332597e+02 -2.265107649915406e+02 3 3.937873938465678e+02 -4.837693103302091e+01 -3.847118583018795e+02 6.873841850241256e+01 4 3.732356619874385e+02 1.448055219551397e+02 -3.056862883313802e+02 1.577723464891279e+02 - ME 9.822975749896163e-06 + ME 3.473186069800973e-05 Event 97 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -780,7 +780,7 @@ Event 97 Batch 0 2 3.394989963266853e+01 6.003767577498499e+00 -2.078495220615399e+01 2.616364312804199e+01 3 7.377311980366451e+02 -5.308290258162607e+02 4.681853362634530e+02 2.080152802450354e+02 4 7.283189023306861e+02 5.248252582387622e+02 -4.474003840572991e+02 -2.341789233730774e+02 - ME 2.729355315721549e-03 + ME 2.063600678642283e-02 Event 98 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -788,7 +788,7 @@ Event 98 Batch 0 2 2.496912687496082e+02 -2.485814905959506e+02 -5.435228288348340e-01 -2.350907922099247e+01 3 7.458289852530976e+02 7.373315781279124e+02 9.801365830907572e+01 -5.473885205171283e+01 4 5.044797459972945e+02 -4.887500875319618e+02 -9.747013548024091e+01 7.824793127270530e+01 - ME 8.091578731489026e-06 + ME 6.800308216903296e-05 Event 99 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -796,7 +796,7 @@ Event 99 Batch 0 2 1.698125854886770e+02 8.336002034290719e+01 8.774494220182726e+01 -1.191144253093525e+02 3 6.496622934125946e+02 5.714329899004554e+02 -6.230613627727958e+01 3.027265745152471e+02 4 6.805251210987285e+02 -6.547930102433627e+02 -2.543880592454771e+01 -1.836121492058947e+02 - ME 1.856310681395454e-04 + ME 6.115029137493471e-04 Event 100 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -804,7 +804,7 @@ Event 100 Batch 0 2 6.141460480129781e+02 -5.842473718080511e+02 -5.092222124447417e+01 1.823110095657221e+02 3 3.909476383151783e+02 2.539115798088024e+02 -2.930333502072385e+02 -5.000421191795168e+01 4 4.949063136718440e+02 3.303357919992488e+02 3.439555714517127e+02 -1.323067976477707e+02 - ME 2.380755205932631e-05 + ME 1.550407956048336e-04 Event 101 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -812,7 +812,7 @@ Event 101 Batch 0 2 7.469346538870473e+02 3.524232024688497e+02 -1.488240016505349e+02 -6.415299525912136e+02 3 6.502268999047169e+02 -2.777200960400715e+02 1.351761574712158e+02 5.721835160737410e+02 4 1.028384462082358e+02 -7.470310642877820e+01 1.364784417931910e+01 6.934643651747267e+01 - ME 7.777208667430486e-05 + ME 1.080054053054822e-04 Event 102 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -820,7 +820,7 @@ Event 102 Batch 0 2 7.426790432885583e+02 -3.141071077544728e+02 6.615000409077074e+02 1.238005738162371e+02 3 6.735764515788642e+01 -4.139700837311957e+00 -5.533298776898177e+01 -3.818606686673834e+01 4 6.899633115535552e+02 3.182468085917849e+02 -6.061670531387255e+02 -8.561450694949879e+01 - ME 1.796768498680773e-04 + ME 6.292262541994918e-04 Event 103 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -828,7 +828,7 @@ Event 103 Batch 0 2 4.837874798175253e+02 -2.731724972668680e+02 1.247027290420595e+02 -3.793103501549069e+02 3 4.466406321977809e+02 -2.904538080082218e+02 -1.536665846758871e+02 3.025078850172422e+02 4 5.695718879846930e+02 5.636263052750895e+02 2.896385563382777e+01 7.680246513766473e+01 - ME 2.998858312831636e-05 + ME 8.140894767450013e-05 Event 104 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -836,7 +836,7 @@ Event 104 Batch 0 2 5.788466572679498e+02 3.572346730226224e+02 -3.682137844992378e+02 2.680773207965347e+02 3 2.925711988065158e+02 2.155069407513812e+02 1.697995838195863e+02 -1.016010147279926e+02 4 6.285821439255348e+02 -5.727416137740034e+02 1.984142006796517e+02 -1.664763060685422e+02 - ME 7.634200862908681e-05 + ME 2.849770726480251e-04 Event 105 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -844,7 +844,7 @@ Event 105 Batch 0 2 3.361125455083114e+02 2.619004058447622e+02 4.338373361330959e+01 -2.061496357605196e+02 3 5.299016201311088e+02 2.892532450564946e+02 2.091058919093095e+02 3.916669672191841e+02 4 6.339858343605800e+02 -5.511536509012568e+02 -2.524896255226191e+02 -1.855173314586645e+02 - ME 1.089382545947932e-04 + ME 2.866662317167052e-04 Event 106 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -852,7 +852,7 @@ Event 106 Batch 0 2 3.578050478863485e+02 -2.265838270225943e+02 2.740910124726658e+02 -3.947579646386072e+01 3 5.202885196186892e+02 1.412729374205232e+02 1.631578432376887e+02 4.734148487210871e+02 4 6.219064324949621e+02 8.531088960207101e+01 -4.372488557103545e+02 -4.339390522572265e+02 - ME 4.548955126640399e-04 + ME 1.912263829178338e-03 Event 107 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -860,7 +860,7 @@ Event 107 Batch 0 2 5.409822745993889e+02 9.278463733038997e+01 5.102180459532771e+02 -1.540466750365499e+02 3 2.501852297905710e+02 1.682301834486207e+02 1.474652503315489e+02 1.120056004263085e+02 4 7.088324956100398e+02 -2.610148207790107e+02 -6.576832962848259e+02 4.204107461024153e+01 - ME 2.159102073406285e-04 + ME 7.096163321035572e-04 Event 108 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -868,7 +868,7 @@ Event 108 Batch 0 2 6.835202199428555e+02 6.670011709444186e+02 6.653656309718588e+01 1.337243986739828e+02 3 2.377887385005082e+02 -1.098327419601477e+02 7.667443498831059e+01 -1.964720946353502e+02 4 5.786910415566365e+02 -5.571684289842709e+02 -1.432109980854965e+02 6.274769596136723e+01 - ME 2.960130886583330e-05 + ME 1.143500637563713e-04 Event 109 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -876,7 +876,7 @@ Event 109 Batch 0 2 5.978180281189351e+02 4.291222314737005e+02 2.249703559956599e+02 3.501840146583366e+02 3 3.585061336071061e+02 -3.227227650115256e+02 1.541688059097761e+02 2.467071262824850e+01 4 5.436758382739589e+02 -1.063994664621746e+02 -3.791391619054360e+02 -3.748547272865851e+02 - ME 1.100286424576873e-04 + ME 1.159187207430584e-03 Event 110 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -884,7 +884,7 @@ Event 110 Batch 0 2 7.073952645543156e+01 -4.753982451958468e+01 4.872856968801237e+01 -1.922426029646691e+01 3 7.438039776014969e+02 1.707202332282495e+02 -7.225114374584515e+02 4.556513803361385e+01 4 6.854564959430718e+02 -1.231804087086648e+02 6.737828677704391e+02 -2.634087773714689e+01 - ME 1.052942530962122e-04 + ME 5.177444310012934e-04 Event 111 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -892,7 +892,7 @@ Event 111 Batch 0 2 5.206822291802364e+02 -3.873336848644893e+02 2.415505427333673e+02 -2.504714268307115e+02 3 5.478000561519707e+02 4.687653961676166e+02 -2.245690260344170e+02 -1.729527606656598e+02 4 4.315177146677929e+02 -8.143171130312743e+01 -1.698151669895031e+01 4.234241874963712e+02 - ME 8.545692640795734e-05 + ME 1.041517236520828e-04 Event 112 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -900,7 +900,7 @@ Event 112 Batch 0 2 3.610471238372959e+02 2.563298943277285e+02 9.635756626046441e+01 -2.352981732387216e+02 3 6.139063356201009e+02 1.031778254919422e+02 -4.257030126280926e+02 4.301305270271111e+02 4 5.250465405426031e+02 -3.595077198196707e+02 3.293454463676283e+02 -1.948323537883896e+02 - ME 5.572029836371622e-05 + ME 2.333567140730066e-04 Event 113 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -908,7 +908,7 @@ Event 113 Batch 0 2 5.886653054136124e+02 3.035646198144377e+02 3.278619896967805e+02 -3.832517176826292e+02 3 5.420023902452333e+02 -3.658357535838290e+02 -3.990519958595696e+02 2.623541560166928e+01 4 3.693323043411537e+02 6.227113376939163e+01 7.119000616278893e+01 3.570163020809600e+02 - ME 4.986188449478774e-05 + ME 6.906402420910258e-05 Event 114 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -916,7 +916,7 @@ Event 114 Batch 0 2 5.165204340356855e+02 2.346362244736889e+01 6.298471388966840e+00 5.159487827839334e+02 3 5.932916594323345e+02 3.608814360715946e+02 -5.336137507463695e+01 -4.678804824963537e+02 4 3.901879065319798e+02 -3.843450585189634e+02 4.706290368567026e+01 -4.806830028757967e+01 - ME 4.029549711869195e-04 + ME 5.363382776736297e-04 Event 115 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -924,7 +924,7 @@ Event 115 Batch 0 2 5.432307281524777e+02 2.250327918244370e+02 4.870559856477670e+02 -8.506664127290338e+01 3 4.265243530840496e+02 2.057819224248363e+02 -2.472237669715339e+02 2.801021835354204e+02 4 5.302449187634726e+02 -4.308147142492733e+02 -2.398322186762331e+02 -1.950355422625171e+02 - ME 4.159321993514108e-05 + ME 2.364149932043149e-04 Event 116 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -932,7 +932,7 @@ Event 116 Batch 0 2 4.402635748890415e+02 -4.240500842615081e+02 -5.733358735035193e+01 -1.035683405941509e+02 3 4.399967684638562e+02 1.183617589007452e+02 -1.041572505293867e+02 -4.107784286579766e+02 4 6.197396566471035e+02 3.056883253607625e+02 1.614908378797388e+02 5.143467692521278e+02 - ME 4.172733678506819e-05 + ME 1.343295643586522e-04 Event 117 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -940,7 +940,7 @@ Event 117 Batch 0 2 3.074085311587982e+02 -4.270248480828711e+01 -3.034838508096459e+02 2.395944736750828e+01 3 5.360984061023379e+02 3.510554986169303e+02 -1.596589010508530e+02 -3.723849798683070e+02 4 6.564930627388640e+02 -3.083530138086433e+02 4.631427518604987e+02 3.484255325007987e+02 - ME 4.142391000026985e-05 + ME 1.795895763168496e-04 Event 118 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -948,7 +948,7 @@ Event 118 Batch 0 2 5.403602961735903e+02 4.471526113902045e+02 -1.804334130868151e+02 -2.439007487679592e+02 3 5.654623567965698e+02 -5.534570111367966e+02 -1.157195831079003e+02 6.480112868522320e+00 4 3.941773470298406e+02 1.063043997465919e+02 2.961529961947150e+02 2.374206358994370e+02 - ME 7.288650603673961e-06 + ME 3.055618730902428e-05 Event 119 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -956,7 +956,7 @@ Event 119 Batch 0 2 8.009099446659010e+01 5.775399043490319e+01 -2.629604726664823e+01 4.886268393818209e+01 3 7.131140611332349e+02 2.472685400460709e+02 -2.870014097539109e+02 -6.041689532644716e+02 4 7.067949444001758e+02 -3.050225304809738e+02 3.132974570205592e+02 5.553062693262896e+02 - ME 2.815424392761942e-04 + ME 6.861262467765907e-04 Event 120 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -964,7 +964,7 @@ Event 120 Batch 0 2 5.007248873753321e+02 2.708997263130530e+02 -3.880896283797751e+02 1.634784128397387e+02 3 7.413897277398672e+02 -4.257033276374029e+02 5.921425482134987e+02 -1.334264135464211e+02 4 2.578853848848011e+02 1.548036013243502e+02 -2.040529198337238e+02 -3.005199929331748e+01 - ME 6.003662532288496e-06 + ME 1.034513276694145e-04 Event 121 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -972,7 +972,7 @@ Event 121 Batch 0 2 5.732265116821120e+02 -1.149395375629033e+02 4.260916136383032e+02 3.658189076403451e+02 3 4.323948798659248e+02 -2.148488009071912e+01 -4.178027098651986e+02 1.092914804138530e+02 4 4.943786084519640e+02 1.364244176536226e+02 -8.288903773105691e+00 -4.751103880541979e+02 - ME 7.661241871407340e-04 + ME 8.074833733477824e-02 Event 122 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -980,7 +980,7 @@ Event 122 Batch 0 2 3.423360304412701e+02 2.648046119434483e+02 2.369247279710451e+01 -2.156644197927059e+02 3 6.059487982275789e+02 2.457729689670163e+01 -4.569077875801422e+02 3.972469964635579e+02 4 5.517151713311508e+02 -2.893819088401499e+02 4.332153147830377e+02 -1.815825766708520e+02 - ME 5.274300345459390e-05 + ME 2.180123533398812e-04 Event 123 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -988,7 +988,7 @@ Event 123 Batch 0 2 1.430133297276668e+02 -4.205671322284506e+01 3.498095937953869e+01 1.321377229770999e+02 3 7.140350670908600e+02 -2.955397919833849e+01 -6.570980288365154e+02 -2.778395577453968e+02 4 6.429516031814733e+02 7.161069242118367e+01 6.221170694569771e+02 1.457018347682969e+02 - ME 2.698780233597045e-04 + ME 5.626335206455025e-04 Event 124 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -996,7 +996,7 @@ Event 124 Batch 0 2 6.053457283343441e+02 5.458657819531910e+02 -1.853964251366731e+01 -2.610177782464909e+02 3 7.499633671623128e+02 -6.784114238502394e+02 2.145325921506613e+01 3.189713933003628e+02 4 1.446909045033435e+02 1.325456418970486e+02 -2.913616701398675e+00 -5.795361505387172e+01 - ME 2.629538535113942e-05 + ME 4.169465060943616e-04 Event 125 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1004,7 +1004,7 @@ Event 125 Batch 0 2 6.695439244882118e+02 9.058534244088493e+01 6.586171675820721e+02 7.941529525294386e+01 3 9.341516463500346e+01 3.490868167113007e+01 5.232133368429144e+01 6.906703243419068e+01 4 7.370409108767834e+02 -1.254940241120154e+02 -7.109385012663632e+02 -1.484823276871337e+02 - ME 4.436636984625360e-03 + ME 1.111472366347957e-02 Event 126 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1012,7 +1012,7 @@ Event 126 Batch 0 2 6.465564354211967e+02 -2.094351601488127e+02 -1.930091683601272e+02 -5.804477571728034e+02 3 1.356182567235447e+02 -2.832094442380729e+01 9.735247446175231e+01 -9.007070211700794e+01 4 7.178253078552584e+02 2.377561045726200e+02 9.565669389837488e+01 6.705184592898115e+02 - ME 1.230970446288030e-03 + ME 1.775660879411100e-03 Event 127 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1020,7 +1020,7 @@ Event 127 Batch 0 2 4.508388003927651e+02 -3.846405138087858e+02 7.756355374444065e+01 2.220162025777267e+02 3 6.162879941073576e+02 2.174727303224461e+02 1.334711143222092e+02 -5.609830344035003e+02 4 4.328732054998774e+02 1.671677834863399e+02 -2.110346680666500e+02 3.389668318257735e+02 - ME 2.127227557837123e-05 + ME 3.922171581774212e-05 Event 128 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1028,7 +1028,7 @@ Event 128 Batch 0 2 7.468963146802857e+02 5.701805835528932e+02 -3.440982003215339e+02 -3.381488363986430e+02 3 1.196664332518719e+02 -9.337643239636876e+01 2.398139841985228e+01 7.089280393650260e+01 4 6.334372520678420e+02 -4.768041511565244e+02 3.201168019016817e+02 2.672560324621404e+02 - ME 7.842790653965437e-05 + ME 2.053620454072734e-04 Event 129 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1036,7 +1036,7 @@ Event 129 Batch 0 2 4.378966182438207e+02 -4.256397208622688e+02 4.624364030548149e+01 9.190104474357973e+01 3 7.127537996732577e+02 5.790589826349546e+02 -1.369827771626340e+02 -3.923574802896586e+02 4 3.493495820829217e+02 -1.534192617726859e+02 9.073913685715252e+01 3.004564355460789e+02 - ME 1.046217618618756e-05 + ME 1.668072874757384e-05 Event 130 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1044,7 +1044,7 @@ Event 130 Batch 0 2 6.322026526626455e+02 5.905875735566585e+02 -2.387291116192753e+01 -2.243136110600485e+02 3 5.268087771404591e+02 -3.287250458747471e+02 1.913681034684307e+02 3.644798771698754e+02 4 3.409885701968954e+02 -2.618625276819114e+02 -1.674951923065032e+02 -1.401662661098267e+02 - ME 3.412796728096272e-05 + ME 2.766647151388132e-04 Event 131 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1052,7 +1052,7 @@ Event 131 Batch 0 2 2.691964685177017e+02 -2.641651354044939e+02 4.065264362900757e+01 -3.210735842607325e+01 3 5.382709487855662e+02 -3.022535437819008e+02 -4.307865739991411e+02 1.131429946566680e+02 4 6.925325826967319e+02 5.664186791863947e+02 3.901339303701337e+02 -8.103563623059465e+01 - ME 1.516502654737588e-04 + ME 5.354423766199649e-04 Event 132 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1060,7 +1060,7 @@ Event 132 Batch 0 2 1.376388194981169e+02 -2.491804956023667e+01 3.114513197621116e+01 1.317327453336230e+02 3 7.332494677489981e+02 -3.054807357444667e+02 -6.882601889638243e+00 -6.665500220046781e+02 4 6.291117127528858e+02 3.303987853047034e+02 -2.426253008657308e+01 5.348172766710551e+02 - ME 2.459616839911958e-04 + ME 3.625143788027957e-04 Event 133 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1068,7 +1068,7 @@ Event 133 Batch 0 2 5.818916885738672e+02 -3.437736592641007e+02 -2.113522447259726e+02 -4.192228966514222e+02 3 7.075583625851592e+02 3.695171106849944e+02 9.875952986414086e+01 5.952667441040354e+02 4 2.105499488409736e+02 -2.574345142089370e+01 1.125927148618317e+02 -1.760438474526132e+02 - ME 3.278402967978973e-04 + ME 6.644965721204062e-03 Event 134 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1076,7 +1076,7 @@ Event 134 Batch 0 2 7.039051474789593e+02 -1.767404282002263e+02 5.832845063404937e+02 3.521710697233707e+02 3 6.740856043500099e+02 9.540039380435479e+01 -5.203258634262522e+02 -4.177932056695244e+02 4 1.220092481710302e+02 8.134003439587134e+01 -6.295864291424151e+01 6.562213594615410e+01 - ME 3.621089826286842e-05 + ME 6.394436352069354e-05 Event 135 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1084,7 +1084,7 @@ Event 135 Batch 0 2 7.491379873081086e+02 -6.603965492909807e+02 -9.243924572685610e+01 -3.413782470545817e+02 3 4.360367703469753e+02 3.763875731093294e+02 3.833030381995060e+01 2.167746473012021e+02 4 3.148252423449159e+02 2.840089761816513e+02 5.410894190690560e+01 1.246035997533796e+02 - ME 1.170602675185252e-05 + ME 3.729096801849378e-05 Event 136 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1092,7 +1092,7 @@ Event 136 Batch 0 2 6.907976432034611e+02 -8.965778913807024e+01 -5.375684903631193e+02 -4.244796613161184e+02 3 4.317447428217263e+02 2.541758793770707e+02 2.501815833403360e+02 2.433255445990286e+02 4 3.774576139748129e+02 -1.645180902390004e+02 2.873869070227833e+02 1.811541167170898e+02 - ME 1.221598515374744e-05 + ME 3.295715598818487e-05 Event 137 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1100,7 +1100,7 @@ Event 137 Batch 0 2 5.927917878715718e+02 -5.453882061843875e+02 -2.239274061847312e+02 6.172783069514800e+01 3 3.718333194205911e+02 2.859809174201715e+02 -2.363544177495510e+02 2.472896101988843e+01 4 5.353748927078371e+02 2.594072887642160e+02 4.602818239342820e+02 -8.645679171503701e+01 - ME 2.222722395048600e-05 + ME 1.267334233155001e-04 Event 138 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1108,7 +1108,7 @@ Event 138 Batch 0 2 1.164849493482387e+02 2.012854405109472e+01 -2.573298799707043e+01 -1.118096528381494e+02 3 7.481698498358139e+02 -1.044692284663333e+02 -4.003634472873074e+00 7.408294509656059e+02 4 6.353452008159477e+02 8.434068441523856e+01 2.973662246994375e+01 -6.290197981274564e+02 - ME 1.183014588836486e-01 + ME 3.545594402685597e+00 Event 139 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1116,7 +1116,7 @@ Event 139 Batch 0 2 3.415587822283577e+02 -2.468214832259765e+02 1.926082427237748e+02 1.365416492148350e+02 3 5.828887331044928e+02 -1.023403009989268e+02 -5.561813319045077e+02 1.412376154306548e+02 4 5.755524846671491e+02 3.491617842249035e+02 3.635730891807333e+02 -2.777792646454897e+02 - ME 5.213154494000113e-05 + ME 4.142320485322521e-04 Event 140 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1124,7 +1124,7 @@ Event 140 Batch 0 2 4.395392082109443e+02 -3.037880820376849e+02 -2.455930383243060e+02 -2.014735126343029e+02 3 4.709796125547878e+02 -2.826270024952004e+02 2.984919122515593e+02 2.298833426397907e+02 4 5.894811792342680e+02 5.864150845328855e+02 -5.289887392725340e+01 -2.840983000548780e+01 - ME 2.990357782498624e-05 + ME 1.220048440917972e-04 Event 141 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1132,7 +1132,7 @@ Event 141 Batch 0 2 3.025838986653694e+02 -2.680006525137058e+02 -6.218827689980458e+01 -1.259574698062632e+02 3 5.104624598690772e+02 -2.829910827131053e+02 4.173533268753467e+02 -7.939880721102661e+01 4 6.869536414655528e+02 5.509917352268112e+02 -3.551650499755422e+02 2.053562770172896e+02 - ME 7.151804808113674e-05 + ME 3.735313583347012e-04 Event 142 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1140,7 +1140,7 @@ Event 142 Batch 0 2 4.390011511178412e+02 -3.153925512561953e+02 3.992377088505197e+01 -3.027468279160259e+02 3 4.597282536099518e+02 2.984856708041211e+02 -2.221794712617382e+02 -2.699863960308454e+02 4 6.012705952722066e+02 1.690688045207421e+01 1.822557003766862e+02 5.727332239468712e+02 - ME 8.945447985744934e-05 + ME 1.630913878361870e-04 Event 143 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1148,7 +1148,7 @@ Event 143 Batch 0 2 7.103308443495001e+02 -3.626595603160224e+02 2.462759922459802e+02 5.589240443825270e+02 3 3.424564807343295e+02 4.507572778536915e+01 -2.357842367637252e+02 -2.442343416788665e+02 4 4.472126749161695e+02 3.175838325306533e+02 -1.049175548225529e+01 -3.146897027036604e+02 - ME 1.789392510542836e-04 + ME 1.304325296055160e-03 Event 144 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1156,7 +1156,7 @@ Event 144 Batch 0 2 6.893886390440568e+02 -2.470805413393656e+02 1.331686162420120e+02 6.296618309717105e+02 3 7.132719020730987e+02 2.482972988978650e+02 -2.304803220538649e+02 -6.276815106349294e+02 4 9.733945888284487e+01 -1.216757558499225e+00 9.731170581185302e+01 -1.980320336781234e+00 - ME 1.486904409371019e-04 + ME 3.769348793094523e-04 Event 145 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1164,7 +1164,7 @@ Event 145 Batch 0 2 3.784954309743686e+02 2.391836032855264e+02 1.115572896135236e+01 -2.931305935912622e+02 3 7.389406222827198e+02 -4.231861417520660e+02 1.513250860114713e+02 5.865555822189353e+02 4 3.825639467429113e+02 1.840025384665394e+02 -1.624808149728234e+02 -2.934249886276727e+02 - ME 2.016505354100400e-04 + ME 2.193982780219728e-03 Event 146 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1172,7 +1172,7 @@ Event 146 Batch 0 2 4.681255842987410e+02 -3.253195724522379e+01 1.754808059398437e+02 -4.327698247100133e+02 3 2.875849079819393e+02 2.091841587061404e+01 1.879781824316579e+02 -2.166372592748876e+02 4 7.442895077193195e+02 1.161354137460973e+01 -3.634589883715017e+02 6.494070839849006e+02 - ME 1.210467216316050e-02 + ME 5.347932692815789e-02 Event 147 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1180,7 +1180,7 @@ Event 147 Batch 0 2 2.442136391928777e+02 -1.784444843977844e+02 -1.666832492802189e+02 -3.816014311599316e+00 3 5.551361515401285e+02 1.378338123621512e+02 -5.199472642306259e+02 1.372327560591401e+02 4 7.006502092669938e+02 4.061067203563306e+01 6.866305135108448e+02 -1.334167417475408e+02 - ME 2.360352365747709e-04 + ME 7.450632204513606e-04 Event 148 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1188,7 +1188,7 @@ Event 148 Batch 0 2 4.547263863263726e+02 3.928375677411887e+02 5.145105706241225e+01 2.231759855356057e+02 3 7.397285466814292e+02 -5.611511356388266e+02 -1.533645573573770e+02 -4.569322031694095e+02 4 3.055450669921979e+02 1.683135678976379e+02 1.019135002949646e+02 2.337562176338038e+02 - ME 6.307552439231181e-06 + ME 1.440225905683450e-05 Event 149 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1196,7 +1196,7 @@ Event 149 Batch 0 2 2.343018799311635e+02 9.853424545130945e+01 1.924850318874441e+02 -9.021023174733594e+01 3 7.291173748950658e+02 3.429747374294529e+01 -5.990516617369192e+02 4.142136359886766e+02 4 5.365807451737705e+02 -1.328317191942547e+02 4.065666298494750e+02 -3.240034042413406e+02 - ME 8.298171355094406e-05 + ME 8.405553848068603e-04 Event 150 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1204,7 +1204,7 @@ Event 150 Batch 0 2 4.707648023587808e+02 -8.969278865174961e+01 -3.008719699078221e+02 3.507859183712497e+02 3 6.876639918976698e+02 3.906111988928598e+02 4.609284537794546e+02 -3.284046551871671e+02 4 3.415712057435500e+02 -3.009184102411105e+02 -1.600564838716325e+02 -2.238126318408256e+01 - ME 1.887585788236135e-05 + ME 1.070125715137075e-04 Event 151 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1212,7 +1212,7 @@ Event 151 Batch 0 2 6.503034458278056e+02 -1.575298496674962e+02 -3.658248853789647e+01 -6.298735108350154e+02 3 6.998690336552314e+02 1.302751858829802e+02 -1.019415103826456e+02 6.800389464387812e+02 4 1.498275205169629e+02 2.725466378451580e+01 1.385239989205421e+02 -5.016543560376590e+01 - ME 4.060174493404880e-04 + ME 6.663776898009472e-04 Event 152 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1220,7 +1220,7 @@ Event 152 Batch 0 2 7.401192382353395e+02 1.493701961830190e+02 6.288419447382046e+02 3.605867993093739e+02 3 7.332111095478891e+02 -1.230079111936445e+02 -6.287602831147091e+02 -3.565502647954901e+02 4 2.666965221677112e+01 -2.636228498937447e+01 -8.166162349550861e-02 -4.036534513883709e+00 - ME 1.210964379505254e-04 + ME 8.446403371723604e-04 Event 153 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1228,7 +1228,7 @@ Event 153 Batch 0 2 5.645797071775899e+02 7.941901905692946e+01 3.691428696980725e+02 -4.197337333594241e+02 3 6.079979027943974e+02 1.021455738177839e+02 -5.566920170809548e+02 2.220849604771994e+02 4 3.274223900280123e+02 -1.815645928747133e+02 1.875491473828823e+02 1.976487728822249e+02 - ME 9.895323747190810e-06 + ME 2.846663840296023e-05 Event 154 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1236,7 +1236,7 @@ Event 154 Batch 0 2 6.022174885419887e+02 -5.152457849782368e+02 -1.493252664732707e+02 -2.736597328082223e+02 3 3.617627670199851e+02 1.925398333816265e+02 -2.626238171638091e+02 1.575736108034646e+02 4 5.360197444380261e+02 3.227059515966102e+02 4.119490836370796e+02 1.160861220047577e+02 - ME 1.660411512586943e-05 + ME 6.437319974597944e-05 Event 155 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1244,7 +1244,7 @@ Event 155 Batch 0 2 6.202229507100907e+02 -2.107861924791831e+02 -3.212541876154504e+02 4.868690137883067e+02 3 2.943040328093193e+02 2.940980302320592e+02 1.073731199058907e+01 2.433613089266508e+00 4 5.854730164805898e+02 -8.331183775287627e+01 3.105168756248616e+02 -4.893026268775732e+02 - ME 4.918845171174253e-04 + ME 5.904510654775639e-03 Event 156 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1252,7 +1252,7 @@ Event 156 Batch 0 2 4.945486805149833e+02 4.540818864859257e+02 -1.431706201593249e+02 -1.337542944644701e+02 3 5.997303202813281e+02 -3.624214233270367e+02 -5.726286247273350e+01 4.743923835389624e+02 4 4.057209992036886e+02 -9.166046315888883e+01 2.004334826320584e+02 -3.406380890744924e+02 - ME 1.986837824231628e-04 + ME 4.701306652347430e-03 Event 157 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1260,7 +1260,7 @@ Event 157 Batch 0 2 4.617003083190191e+02 3.118400043328062e+02 3.404502064148864e+02 -4.079626411035589e+00 3 5.720097526413113e+02 -4.999240316044806e+01 -4.329264075474301e+02 -3.705005295422582e+02 4 4.662899390396696e+02 -2.618476011723578e+02 9.247620113254365e+01 3.745801559532937e+02 - ME 1.403598809900552e-05 + ME 3.907978340087068e-05 Event 158 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1268,7 +1268,7 @@ Event 158 Batch 0 2 6.784877363061535e+02 -5.707102180762959e+02 -3.102223423027389e+02 -1.959529373021938e+02 3 5.650909444059712e+02 5.525284805868615e+02 7.765167789879932e+01 8.950011457818250e+01 4 2.564213192878751e+02 1.818173748943443e+01 2.325706644039396e+02 1.064528227240114e+02 - ME 8.470133063482862e-06 + ME 3.503179830087694e-05 Event 159 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1276,7 +1276,7 @@ Event 159 Batch 0 2 5.369491563274252e+02 2.154713482252002e+02 -2.912667909729743e+02 3.962955349875316e+02 3 6.066564496499102e+02 -4.020061311781470e+01 5.572389608252350e+02 -2.364332868806716e+02 4 3.563943940226648e+02 -1.752707351073854e+02 -2.659721698522608e+02 -1.598622481068599e+02 - ME 3.562393617300492e-05 + ME 3.198473025834927e-04 Event 160 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1284,7 +1284,7 @@ Event 160 Batch 0 2 6.492474755438517e+02 3.490068395973682e+02 1.460348644657111e+02 -5.276270735801970e+02 3 2.857818814470013e+02 -2.550253586192556e+02 1.227259509083862e+02 3.964456076362119e+01 4 5.649706430091471e+02 -9.398148097811273e+01 -2.687608153740973e+02 4.879825128165764e+02 - ME 3.516238941302227e-05 + ME 6.719464076924620e-05 Event 161 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1292,7 +1292,7 @@ Event 161 Batch 0 2 6.770282049439580e+02 -2.863253153105184e+02 -4.911270786072976e+02 -3.676672364525180e+02 3 1.598243093356544e+02 -7.505362471426160e+01 1.299195075310522e+02 -5.506073768810752e+01 4 6.631474857203874e+02 3.613789400247800e+02 3.612075710762453e+02 4.227279741406256e+02 - ME 5.970757951131334e-05 + ME 1.577168105051119e-04 Event 162 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1300,7 +1300,7 @@ Event 162 Batch 0 2 5.178592782584632e+02 -3.271131571456631e+02 3.943743741889439e+02 -7.512700901574514e+01 3 3.730686930366258e+02 -2.885924195736573e+01 -1.360208443078026e+02 -3.461874113706257e+02 4 6.090720287049110e+02 3.559723991030290e+02 -2.583535298811414e+02 4.213144203863710e+02 - ME 2.768303103320498e-05 + ME 1.031749267713353e-04 Event 163 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1308,7 +1308,7 @@ Event 163 Batch 0 2 5.388642316037673e+02 3.152159924116781e+02 3.539969933522669e+01 -4.356149670486711e+02 3 5.364171791816749e+02 -5.299694218906361e+02 3.369785517714305e+01 7.576448071880543e+01 4 4.247185892145582e+02 2.147534294789580e+02 -6.909755451236977e+01 3.598504863298658e+02 - ME 1.485600561394433e-05 + ME 3.508094027565679e-05 Event 164 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1316,7 +1316,7 @@ Event 164 Batch 0 2 6.862697092177667e+02 4.132218376422068e+02 1.310202162324327e+02 -5.320221138485150e+02 3 4.476895523579005e+02 -2.769046850483522e+02 1.374187337517142e+02 3.238299280529301e+02 4 3.660407384243329e+02 -1.363171525938544e+02 -2.684389499841469e+02 2.081921857955847e+02 - ME 1.755563256840939e-05 + ME 3.375894779915149e-05 Event 165 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1324,7 +1324,7 @@ Event 165 Batch 0 2 2.382444910715278e+02 -2.158277263671036e+02 -9.471372817531817e+00 -1.004446273032522e+02 3 7.304591383576048e+02 4.619003715882296e+02 -1.223345688256177e+02 5.524969256086772e+02 4 5.312963705708673e+02 -2.460726452211260e+02 1.318059416431495e+02 -4.520522983054250e+02 - ME 4.549138184301779e-04 + ME 6.966498968932957e-03 Event 166 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1332,7 +1332,7 @@ Event 166 Batch 0 2 2.131352071380649e+02 -7.633553084455029e+01 -1.899581415396244e+02 5.929087379418958e+01 3 7.305557876753161e+02 8.980971292745940e+01 7.136333043711877e+02 1.279589045828712e+02 4 5.563090051866194e+02 -1.347418208290915e+01 -5.236751628315633e+02 -1.872497783770607e+02 - ME 3.352199959657985e-05 + ME 3.314006956523505e-04 Event 167 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1340,7 +1340,7 @@ Event 167 Batch 0 2 4.122964103002419e+02 -3.405127102276982e+02 6.366431608201744e+01 2.235761145061386e+02 3 4.697083356610920e+02 -2.521100678451879e+02 -2.856113063438232e+01 -3.952855880214881e+02 4 6.179952540386658e+02 5.926227780728861e+02 -3.510318544763516e+01 1.717094735153495e+02 - ME 3.829535931496594e-05 + ME 1.146777177775239e-04 Event 168 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1348,7 +1348,7 @@ Event 168 Batch 0 2 7.156643283953484e+02 -3.999734570317170e+02 4.816586825103861e+02 3.467009924560655e+02 3 6.192344221355605e+02 2.722545660880235e+02 -4.999454120042317e+02 -2.436869012025525e+02 4 1.651012494690919e+02 1.277188909436936e+02 1.828672949384504e+01 -1.030140912535133e+02 - ME 5.027887292283473e-05 + ME 1.017624049822302e-03 Event 169 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1356,7 +1356,7 @@ Event 169 Batch 0 2 3.626022684949455e+02 7.511110909567982e+01 -2.030941161665286e+02 -2.908461902563517e+02 3 5.580565590514408e+02 -2.529981754432838e+02 -3.439969378312538e+02 3.592842232626199e+02 4 5.793411724536141e+02 1.778870663476037e+02 5.470910539977822e+02 -6.843803300626824e+01 - ME 4.350242525242475e-05 + ME 1.371698416063432e-04 Event 170 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1364,7 +1364,7 @@ Event 170 Batch 0 2 6.602909342483501e+02 4.699653539595539e+02 -3.020118498241596e+02 3.520021683086903e+02 3 1.039297502933440e+02 3.247420585022842e+01 -9.851348423194945e+01 6.473976746580508e+00 4 7.357793154583061e+02 -5.024395598097824e+02 4.005253340561092e+02 -3.584761450552709e+02 - ME 9.967260301798612e-03 + ME 1.673719496447659e-02 Event 171 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1372,7 +1372,7 @@ Event 171 Batch 0 2 1.506693011949600e+02 -3.657300520509282e+01 -1.244227366169959e+02 -7.669834565089053e+01 3 6.344013325830570e+02 -2.026333084464634e+02 -4.956100871165362e+02 3.402578943089165e+02 4 7.149293662219835e+02 2.392063136515561e+02 6.200328237335323e+02 -2.635595486580261e+02 - ME 9.157902172934166e-04 + ME 2.133207113512388e-03 Event 172 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1380,7 +1380,7 @@ Event 172 Batch 0 2 5.352445157558213e+02 -2.018352690102651e+02 3.892440882325296e+02 -3.069825004886504e+02 3 6.716112180685394e+02 2.825227203806547e+02 -5.978593235713698e+02 1.175022124175027e+02 4 2.931442661756383e+02 -8.068745137038898e+01 2.086152353388391e+02 1.894802880711483e+02 - ME 8.067092159940342e-06 + ME 2.630379932615259e-05 Event 173 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1388,7 +1388,7 @@ Event 173 Batch 0 2 6.571348515648592e+02 -2.769863586381786e+02 5.805753619381593e+02 1.343019708712704e+02 3 5.332990408103321e+02 1.871824832342877e+02 -4.782426732337677e+02 1.437168410371092e+02 4 3.095661076248081e+02 8.980387540389081e+01 -1.023326887043915e+02 -2.780188119083794e+02 - ME 1.269359653092767e-04 + ME 9.985413945498126e-03 Event 174 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1396,7 +1396,7 @@ Event 174 Batch 0 2 6.091496911716730e+02 -4.752584064243671e+02 3.135726231883978e+01 -3.797492797588730e+02 3 6.417481529658018e+02 3.309293137608124e+02 9.015643604119191e+01 5.424004960996682e+02 4 2.491021558625255e+02 1.443290926635548e+02 -1.215136983600317e+02 -1.626512163407953e+02 - ME 1.362612102685676e-04 + ME 1.319192968737130e-03 Event 175 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1404,7 +1404,7 @@ Event 175 Batch 0 2 5.399801778396885e+02 1.966672297646830e+02 2.343185748302537e+02 -4.449667388535759e+02 3 6.987953575798327e+02 -1.857207036318898e+02 -9.664246188148675e+01 6.666955876403318e+02 4 2.612244645804785e+02 -1.094652613279307e+01 -1.376761129487668e+02 -2.217288487867561e+02 - ME 9.613528518728674e-04 + ME 9.528877211334405e-03 Event 176 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1412,7 +1412,7 @@ Event 176 Batch 0 2 6.615757321243968e+02 -4.129469954321281e+02 4.686878756164518e+02 -2.179194886871010e+02 3 1.607981401590110e+02 -6.355407199259605e+01 7.929314438200207e+00 1.474925346731048e+02 4 6.776261277165921e+02 4.765010674247242e+02 -4.766171900546519e+02 7.042695401399614e+01 - ME 3.097907077728356e-04 + ME 6.965204353376922e-04 Event 177 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1420,7 +1420,7 @@ Event 177 Batch 0 2 4.314334067424883e+02 -3.493619040652741e+02 -2.026482683689240e+01 -2.523299055494341e+02 3 4.840006500668400e+02 -1.846595828310067e+02 -1.450727057198388e+02 4.232155216776995e+02 4 5.845659431906716e+02 5.340214868962809e+02 1.653375325567312e+02 -1.708856161282654e+02 - ME 1.084300812640113e-04 + ME 2.160100049311594e-04 Event 178 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1428,7 +1428,7 @@ Event 178 Batch 0 2 4.528135981327372e+02 -2.544528544607913e+02 1.436928116455424e+02 3.458992272209776e+02 3 3.053350882587867e+02 -1.380299578048218e+02 2.072032295570572e+02 1.767599177741536e+02 4 7.418513136084770e+02 3.924828122656132e+02 -3.508960412025996e+02 -5.226591449951313e+02 - ME 5.382438151181503e-02 + ME 7.384409254828141e-02 Event 179 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1436,7 +1436,7 @@ Event 179 Batch 0 2 7.433145319259943e+02 -2.538538580850882e+02 -6.778753511348521e+02 -1.689962142519080e+02 3 1.647945947160298e+02 1.009041857568576e+02 1.171651165877689e+02 5.699069397138987e+01 4 5.918908733579761e+02 1.529496723282306e+02 5.607102345470832e+02 1.120055202805181e+02 - ME 3.739915465576335e-05 + ME 1.335347052581446e-04 Event 180 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1444,7 +1444,7 @@ Event 180 Batch 0 2 2.396120216689867e+02 1.204528233788652e+02 -1.081248155319049e+02 1.766750195544080e+02 3 5.541470271917004e+02 2.767127195685322e+02 2.999096875483201e+02 3.749175614572557e+02 4 7.062409511393131e+02 -3.971655429473975e+02 -1.917848720164151e+02 -5.515925810116636e+02 - ME 2.792447184071457e-03 + ME 1.316593054412419e-02 Event 181 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1452,7 +1452,7 @@ Event 181 Batch 0 2 2.165494222755782e+02 1.336973493521793e+02 -1.495065670853883e+02 -8.164837697364385e+01 3 6.960869932595207e+02 -2.848973600545249e+02 2.209041937252092e+01 6.347303441548928e+02 4 5.873635844649011e+02 1.512000107023455e+02 1.274161477128675e+02 -5.530819671812490e+02 - ME 3.488874737600980e-03 + ME 6.164296623062663e-02 Event 182 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1460,7 +1460,7 @@ Event 182 Batch 0 2 6.472681881349898e+02 4.279258056181361e+02 3.994050733201775e+02 -2.762448183472868e+02 3 5.337197582091030e+02 -3.479343829022644e+02 -4.034091782989213e+02 -3.254965992745409e+01 4 3.190120536559070e+02 -7.999142271587166e+01 4.004104978744005e+00 3.087944782747408e+02 - ME 5.523679400573375e-05 + ME 6.393158381765308e-05 Event 183 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1468,7 +1468,7 @@ Event 183 Batch 0 2 6.165307808531154e+02 -3.276949594572818e+02 8.808524820164887e+01 -5.147496540405800e+02 3 2.975460412740734e+02 -1.030095950018341e+02 -2.375020297789284e+02 1.466814775843215e+02 4 5.859231778728107e+02 4.307045544591158e+02 1.494167815772794e+02 3.680681764562588e+02 - ME 2.562496117427957e-05 + ME 6.887775529805495e-05 Event 184 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1476,7 +1476,7 @@ Event 184 Batch 0 2 5.645337360463252e+02 -3.940276919793660e+02 3.776398996283964e+02 1.443212503288767e+02 3 5.368100353438223e+02 2.392766596964613e+02 -1.719264331693737e+02 -4.487237410122139e+02 4 3.986562286098531e+02 1.547510322829050e+02 -2.057134664590229e+02 3.044024906833372e+02 - ME 1.712138666139329e-05 + ME 3.553984578535888e-05 Event 185 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1484,7 +1484,7 @@ Event 185 Batch 0 2 6.347397779710931e+02 2.522092504724420e+02 -1.599825720327363e+02 5.600809373302327e+02 3 4.566768168089404e+02 -3.359958684022406e+02 -1.272903681003782e+02 -2.818823400219340e+02 4 4.085834052199659e+02 8.378661792979838e+01 2.872729401331145e+02 -2.781985973082986e+02 - ME 1.836859309200860e-04 + ME 1.184197550833168e-03 Event 186 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1492,7 +1492,7 @@ Event 186 Batch 0 2 7.089823220133230e+02 -5.197119220861886e+02 4.248734840868308e+02 -2.281183322067745e+02 3 5.364076825758043e+02 3.588264146200084e+02 -3.973752875032956e+02 3.270606945152315e+01 4 2.546099954108725e+02 1.608855074661802e+02 -2.749819658353518e+01 1.954122627552515e+02 - ME 1.318469173008218e-05 + ME 2.583895514537347e-05 Event 187 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1500,7 +1500,7 @@ Event 187 Batch 0 2 4.835105223217566e+02 -2.128653471696258e+02 1.375287019182911e+02 -4.117725407538514e+02 3 7.240136612790383e+02 4.407273454759851e+02 -4.896543389042274e+01 5.723264583716990e+02 4 2.924758163992057e+02 -2.278619983063593e+02 -8.856326802786833e+01 -1.605539176178473e+02 - ME 9.185777086042985e-05 + ME 5.307563978210835e-04 Event 188 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1508,7 +1508,7 @@ Event 188 Batch 0 2 6.611118500396009e+02 3.502021063704277e+02 -2.011693879247277e+02 -5.234102027267809e+02 3 3.072944371702247e+02 -6.894916504330918e+01 -1.599953986835475e+02 2.531350551695447e+02 4 5.315937127901742e+02 -2.812529413271184e+02 3.611647866082752e+02 2.702751475572362e+02 - ME 3.862980709292737e-05 + ME 6.863567490702385e-05 Event 189 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1516,7 +1516,7 @@ Event 189 Batch 0 2 7.498478362545707e+02 6.780504955298834e+02 -3.199144947524264e+02 -1.319162971889924e+01 3 3.253008430749361e+02 -2.985087551774363e+02 1.291384938207140e+02 6.034152914782593e+00 4 4.248513206704935e+02 -3.795417403524470e+02 1.907760009317124e+02 7.157476804116639e+00 - ME 1.504471760657040e-05 + ME 8.583750584152986e-05 Event 190 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1524,7 +1524,7 @@ Event 190 Batch 0 2 4.938867893347995e+02 3.689671478502748e+02 -1.218724623869293e+02 3.048516153777389e+02 3 5.264063001598521e+02 6.631942569346465e+01 1.276367949726208e+02 -5.063735530147588e+02 4 4.797069105053494e+02 -4.352865735437401e+02 -5.764332585691415e+00 2.015219376370201e+02 - ME 2.269926034328256e-05 + ME 4.759343488474735e-05 Event 191 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1532,7 +1532,7 @@ Event 191 Batch 0 2 3.681793141805986e+02 -3.225132888415706e+02 1.579589482507471e+02 -8.117977937027918e+01 3 5.431126642386394e+02 4.058413736814005e+01 9.147123993851424e+01 5.338139246166097e+02 4 5.887080215807621e+02 2.819291514734305e+02 -2.494301881892614e+02 -4.526341452463304e+02 - ME 1.427494731558637e-03 + ME 4.908990110546420e-03 Event 192 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1540,7 +1540,7 @@ Event 192 Batch 0 2 6.054165399887861e+02 1.497087111729466e+02 8.905021611535379e+01 5.798159601983524e+02 3 2.106656439489222e+02 1.451894976721945e+02 -1.487249448604451e+02 3.436443048222171e+01 4 6.839178160622922e+02 -2.948982088451411e+02 5.967472874509133e+01 -6.141803906805740e+02 - ME 6.984876913518998e-03 + ME 4.294450320853435e-02 Event 193 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1548,7 +1548,7 @@ Event 193 Batch 0 2 2.753169163933055e+02 -1.695475157411122e+02 -2.139406274107579e+02 3.581134319495643e+01 3 5.760219428901971e+02 -3.264616044953138e+02 1.527507522369444e+02 -4.493231656306969e+02 4 6.486611407164972e+02 4.960091202364260e+02 6.118987517381347e+01 4.135118224357404e+02 - ME 4.273063058931925e-05 + ME 1.537583375796735e-04 Event 194 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1556,7 +1556,7 @@ Event 194 Batch 0 2 3.445934948105150e+02 -2.970257025567896e+02 -8.183019525038441e+01 1.543509890854414e+02 3 7.485441862377920e+02 6.623797851941252e+02 1.083400559332054e+02 -3.314119056355291e+02 4 4.068623189516925e+02 -3.653540826373358e+02 -2.650986068282081e+01 1.770609165500877e+02 - ME 4.921158833271929e-06 + ME 3.024610065690235e-05 Event 195 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1564,7 +1564,7 @@ Event 195 Batch 0 2 2.012122274303647e+02 -5.190018365965096e+01 1.322177369426910e+02 -1.425173724194237e+02 3 7.122630330184543e+02 -3.054768058087834e+02 -2.528097616133813e+02 5.916838461125119e+02 4 5.865247395511832e+02 3.573769894684365e+02 1.205920246706904e+02 -4.491664736930883e+02 - ME 4.696445912229638e-04 + ME 3.011639483286710e-03 Event 196 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1572,7 +1572,7 @@ Event 196 Batch 0 2 4.490485793345989e+02 3.485190427929747e+02 -2.661098616642627e+01 -2.819059396826192e+02 3 5.531554978829222e+02 -3.330165694254377e+02 4.416170126965178e+02 7.442003978758296e+00 4 4.977959227824785e+02 -1.550247336753688e+01 -4.150060265300915e+02 2.744639357038610e+02 - ME 9.363355109875406e-06 + ME 4.340266456570635e-05 Event 197 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1580,7 +1580,7 @@ Event 197 Batch 0 2 3.951249254444253e+02 -2.278358800090239e+02 3.101157211704546e+02 -8.968142489336992e+01 3 3.607080640108546e+02 -2.889948719219027e+02 2.155030307719242e+02 -1.227661082778765e+01 4 7.441670105447209e+02 5.168307519309257e+02 -5.256187519423792e+02 1.019580357211576e+02 - ME 6.597373610109231e-03 + ME 3.377741088449004e-02 Event 198 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1588,7 +1588,7 @@ Event 198 Batch 0 2 3.750236904637998e+02 1.183014344420310e+02 -1.005952209347265e+02 -3.413621838211424e+02 3 4.381296266085964e+02 -2.726825461625328e+02 1.003845461170281e+02 -3.279096546785175e+02 4 6.868466829276033e+02 1.543811117205018e+02 2.106748176980602e-01 6.692718384996598e+02 - ME 6.145502577419889e-04 + ME 9.606390506705955e-04 Event 199 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1596,7 +1596,7 @@ Event 199 Batch 0 2 2.454478562244572e+02 -2.058455361543722e+02 -1.131056012155068e+02 -7.126982772660261e+01 3 5.321797086694488e+02 -9.806778012582416e+01 -4.820333037417012e+02 -2.030808875905193e+02 4 7.223724351060940e+02 3.039133162801963e+02 5.951389049572081e+02 2.743507153171219e+02 - ME 3.088173795554332e-04 + ME 1.577081887352965e-03 Event 200 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1604,7 +1604,7 @@ Event 200 Batch 0 2 3.952431318363244e+02 3.031309873729303e+02 9.337877017948550e+01 2.358159092128122e+02 3 6.094031244332663e+02 -7.796753338981905e+01 -5.315426896439308e+02 -2.876727322709444e+02 4 4.953537437304092e+02 -2.251634539831113e+02 4.381639194644453e+02 5.185682305813224e+01 - ME 1.668296552597111e-05 + ME 6.703240553489506e-05 Event 201 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1612,7 +1612,7 @@ Event 201 Batch 0 2 6.497938633639732e+02 3.771120671245744e+02 3.553445817627057e+02 -3.921081252746440e+02 3 3.369790646193914e+02 -2.140351778515325e+02 1.061239955238163e+02 2.376584318047305e+02 4 5.132270720166357e+02 -1.630768892730420e+02 -4.614685772865220e+02 1.544496934699135e+02 - ME 2.404518058628388e-05 + ME 6.283412004793947e-05 Event 202 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1620,7 +1620,7 @@ Event 202 Batch 0 2 7.267802742470179e+02 6.523432021666289e+02 -1.481957728499301e+02 2.840702844913056e+02 3 3.546086620137576e+02 -3.102429173963679e+02 -5.939291787501398e+01 -1.611493614224694e+02 4 4.186110637392242e+02 -3.421002847702610e+02 2.075886907249440e+02 -1.229209230688360e+02 - ME 2.830403199974809e-05 + ME 1.894138330341389e-04 Event 203 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1628,7 +1628,7 @@ Event 203 Batch 0 2 4.830190702985662e+02 2.789429895135886e+02 -3.943102945050296e+02 -4.197918611657844e+00 3 5.247163710833165e+02 -4.266462829986153e+02 3.263988520595893e+01 3.037019215942698e+02 4 4.922645586181170e+02 1.477032934850268e+02 3.616704092990706e+02 -2.995040029826120e+02 - ME 5.153190919865371e-05 + ME 5.831910678002871e-04 Event 204 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1636,7 +1636,7 @@ Event 204 Batch 0 2 6.952375769935185e+02 3.823764713153302e+01 6.531840992713522e+02 -2.350397908115460e+02 3 6.250862947179036e+02 1.031861473443961e+02 -5.506835576815644e+02 2.771878679515999e+02 4 1.796761282885781e+02 -1.414237944759291e+02 -1.025005415897879e+02 -4.214807714005369e+01 - ME 1.903000177287069e-05 + ME 1.802858800889920e-04 Event 205 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1644,7 +1644,7 @@ Event 205 Batch 0 2 5.625197268936781e+02 2.955060596751036e+02 4.395356105446072e+02 -1.895074112086703e+02 3 3.144813194259642e+02 -1.941101430078122e+02 -7.073026664887073e+00 -2.473251401357733e+02 4 6.229989536803572e+02 -1.013959166672914e+02 -4.324625838797200e+02 4.368325513444433e+02 - ME 3.163472493443465e-05 + ME 1.140145509231641e-04 Event 206 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1652,7 +1652,7 @@ Event 206 Batch 0 2 5.487698581700869e+02 -4.771827558939671e+02 -2.639484985605369e+02 6.145050708573941e+01 3 4.357856725513919e+02 1.877155863290790e+02 1.701172104948722e+02 3.545872893148349e+02 4 5.154444692785200e+02 2.894671695648880e+02 9.383128806566407e+01 -4.160377964005746e+02 - ME 3.341888001113221e-04 + ME 4.167786087259531e-03 Event 207 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1660,7 +1660,7 @@ Event 207 Batch 0 2 5.289473514933904e+02 -3.230637718239221e+02 -3.258094337294262e+02 2.631792409740627e+02 3 3.730441408755686e+02 -1.145152671243400e+02 -7.298530142052728e+01 -3.474497523579300e+02 4 5.980085076310412e+02 4.375790389482623e+02 3.987947351499535e+02 8.427051138386733e+01 - ME 3.789028948405571e-05 + ME 1.161501350367753e-04 Event 208 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1668,7 +1668,7 @@ Event 208 Batch 0 2 3.144460531270953e+02 3.105028133645123e+02 -3.495125011961062e+01 3.525242310830974e+01 3 7.230517599976935e+02 -6.554206809343713e+02 2.220922910679198e+02 2.095294558946058e+02 4 4.625021868752117e+02 3.449178675698588e+02 -1.871410409483092e+02 -2.447818790029155e+02 - ME 2.941989209837521e-05 + ME 4.858457850437588e-04 Event 209 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1676,7 +1676,7 @@ Event 209 Batch 0 2 2.827014058170527e+02 -6.682954863774688e+01 -1.958656753088385e+02 -1.925890275057887e+02 3 5.969812148172332e+02 5.625717004655273e+02 1.060136244597389e+02 -1.692949027847388e+02 4 6.203173793657136e+02 -4.957421518277804e+02 8.985205084909943e+01 3.618839302905275e+02 - ME 2.261939336541961e-05 + ME 1.004351001266980e-04 Event 210 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1684,7 +1684,7 @@ Event 210 Batch 0 2 3.369223392964550e+02 -2.366581006943837e+02 8.850719545688517e+01 -2.228813191927023e+02 3 6.926279093100447e+02 9.835546321295956e+01 -1.581805884470998e+02 6.671120783270956e+02 4 4.704497513935005e+02 1.383026374814242e+02 6.967339299021461e+01 -4.442307591343933e+02 - ME 3.044010300440331e-03 + ME 5.974710408786874e-02 Event 211 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1692,7 +1692,7 @@ Event 211 Batch 0 2 5.754314663824422e+02 -1.965408456680789e+02 -5.399725108422632e+02 3.037689947684008e+01 3 6.656941886103589e+02 4.112771407945243e+02 5.114655840792436e+02 1.113679599883347e+02 4 2.588743450071987e+02 -2.147362951264454e+02 2.850692676301957e+01 -1.417448594651748e+02 - ME 1.754510489093768e-05 + ME 4.382347812376007e-04 Event 212 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1700,7 +1700,7 @@ Event 212 Batch 0 2 5.922157374848572e+02 8.073316194509509e+00 4.947261155542873e+02 -3.254233732830556e+02 3 3.635572903001510e+02 8.951663862813328e+01 4.011175755255380e+01 3.500738802669425e+02 4 5.442269722149914e+02 -9.758995482264278e+01 -5.348378731068407e+02 -2.465050698388706e+01 - ME 1.919214373141161e-04 + ME 3.041427876287276e-04 Event 213 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1708,7 +1708,7 @@ Event 213 Batch 0 2 7.434820262506830e+02 2.991548764052629e+02 2.111623598614188e+02 -6.470566753063675e+02 3 5.607612173038236e+02 -2.664197873565705e+02 -1.905271140771768e+02 4.551626726109781e+02 4 1.957567564454930e+02 -3.273508904869271e+01 -2.063524578424195e+01 1.918940026953895e+02 - ME 1.896082550340891e-04 + ME 1.827786070323022e-04 Event 214 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1716,7 +1716,7 @@ Event 214 Batch 0 2 5.400874280734793e+02 3.457358963402696e+02 2.445843697627679e+02 -3.351710101016577e+02 3 3.400793067879315e+02 1.482066942304564e+02 1.256466447865830e+02 2.791086371729012e+02 4 6.198332651385892e+02 -4.939425905707261e+02 -3.702310145493508e+02 5.606237292875651e+01 - ME 6.515553919952984e-05 + ME 1.356968066378560e-04 Event 215 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1724,7 +1724,7 @@ Event 215 Batch 0 2 3.916345321859864e+02 3.271767110560381e+02 -1.945589530122144e+02 9.208594000107233e+01 3 6.136750729169615e+02 -1.269585669220027e+02 2.644680756040779e+02 -5.390132228350478e+02 4 4.946903948970534e+02 -2.002181441340350e+02 -6.990912259186331e+01 4.469272828339764e+02 - ME 3.427926940877871e-05 + ME 6.207321332343461e-05 Event 216 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1732,7 +1732,7 @@ Event 216 Batch 0 2 3.767411090262154e+02 1.602503356822860e+02 2.758455349572533e+02 -2.004069210086422e+02 3 4.061922956351256e+02 3.340053729931861e+02 2.237650079776778e+02 5.798114391563544e+01 4 7.170665953386593e+02 -4.942557086754721e+02 -4.996105429349309e+02 1.424257770930068e+02 - ME 2.360785017217177e-04 + ME 1.232271832865728e-03 Event 217 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1740,7 +1740,7 @@ Event 217 Batch 0 2 6.474118977458852e+02 -5.378641111590873e+02 -3.279650037002520e+02 1.492759847325320e+02 3 5.088298200539713e+02 3.261878344469131e+02 1.555821256186315e+02 -3.581947579501665e+02 4 3.437582822001433e+02 2.116762767121744e+02 1.723828780816206e+02 2.089187732176345e+02 - ME 1.388331578224744e-05 + ME 3.357118960820415e-05 Event 218 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1748,7 +1748,7 @@ Event 218 Batch 0 2 6.658501161076259e+02 -6.577627036244854e+02 -3.020200479570956e+01 9.895676706252418e+01 3 2.516345839620714e+02 1.565221509782131e+02 -1.156477271957936e+02 1.595192254662914e+02 4 5.825152999303023e+02 5.012405526462722e+02 1.458497319915031e+02 -2.584759925288157e+02 - ME 1.036808356896783e-04 + ME 5.956187308313417e-04 Event 219 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1756,7 +1756,7 @@ Event 219 Batch 0 2 4.328556070633435e+02 6.122246558068494e+01 -1.687441385117925e+02 3.938796795879554e+02 3 6.500677455605621e+02 -3.703058656885360e+02 4.356876543064814e+02 -3.092537914719426e+02 4 4.170766473760945e+02 3.090834001078509e+02 -2.669435157946888e+02 -8.462588811601287e+01 - ME 9.046106878448173e-05 + ME 2.797067114354785e-04 Event 220 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1764,7 +1764,7 @@ Event 220 Batch 0 2 3.686297280598666e+02 -3.497113779929074e+02 -8.765282776369953e+01 7.685577594963354e+01 3 4.155522773953191e+02 -1.777404948015450e+02 -1.525848366500187e+02 3.432344379292750e+02 4 7.158179945448145e+02 5.274518727944524e+02 2.402376644137182e+02 -4.200902138789084e+02 - ME 1.676729229638681e-03 + ME 3.485410710153060e-03 Event 221 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1772,7 +1772,7 @@ Event 221 Batch 0 2 5.295220830718469e+02 3.654688468413813e+01 4.204675060608333e+02 3.197890523886257e+02 3 7.127556392876786e+02 -1.727486268095863e+02 -4.342549693537605e+02 -5.381460163035255e+02 4 2.577222776404743e+02 1.362017421254481e+02 1.378746329292729e+01 2.183569639148998e+02 - ME 2.031931825964470e-05 + ME 2.819264207321091e-05 Event 222 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1780,7 +1780,7 @@ Event 222 Batch 0 2 2.464305981122427e+02 -2.054199106396077e+02 6.127423271580306e+01 1.215572638876956e+02 3 6.926647117218595e+02 4.702892479611936e+02 3.872350261814336e+02 -3.296383785530530e+02 4 5.609046901658980e+02 -2.648693373215859e+02 -4.485092588972366e+02 2.080811146653574e+02 - ME 1.678695785515194e-05 + ME 6.319142394583372e-05 Event 223 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1788,7 +1788,7 @@ Event 223 Batch 0 2 2.463384302181125e+02 -1.209251938955738e+02 -2.140981972257043e+02 -1.488897673935926e+01 3 6.819620845265065e+02 -2.400891875757811e+02 5.819023806457059e+02 2.623339210620683e+02 4 5.716994852553812e+02 3.610143814713547e+02 -3.678041834200016e+02 -2.474449443227091e+02 - ME 4.810915220985587e-05 + ME 3.931927185620913e-04 Event 224 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1796,7 +1796,7 @@ Event 224 Batch 0 2 2.236851263016067e+02 -8.671871524968952e+01 1.717231909970332e+02 1.141317038679677e+02 3 5.308972974363861e+02 -3.715833295102001e+01 4.680039348616383e+02 2.478780257941054e+02 4 7.454175762620068e+02 1.238770482007099e+02 -6.397271258586715e+02 -3.620097296620728e+02 - ME 6.017706528853119e-02 + ME 8.708656265179471e-02 Event 225 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1804,7 +1804,7 @@ Event 225 Batch 0 2 5.094176014319268e+02 1.569347096242780e+02 -1.561291130928888e+00 -4.846394040251013e+02 3 7.252311334449815e+02 -3.845161955462210e+02 -4.374219820797174e+01 6.133466494377277e+02 4 2.653512651230916e+02 2.275814859219426e+02 4.530348933890067e+01 -1.287072454126262e+02 - ME 1.151501859389029e-04 + ME 3.974215742688118e-04 Event 226 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1812,7 +1812,7 @@ Event 226 Batch 0 2 6.863217264048350e+02 -2.391756120967483e+02 -6.171186323675804e+02 1.816511279850093e+02 3 5.332348374442744e+02 1.096335504493486e+02 4.112484130583279e+02 -3.212391931833643e+02 4 2.804434361508906e+02 1.295420616473995e+02 2.058702193092524e+02 1.395880651983551e+02 - ME 1.438206074993319e-05 + ME 3.797053871351767e-05 Event 227 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1820,7 +1820,7 @@ Event 227 Batch 0 2 7.243206345463230e+02 -5.280189925476210e+02 -1.406011303275692e+02 4.754657162080069e+02 3 5.487499634657129e+02 3.840442912861271e+02 -1.353123555187442e+01 -3.917312987222202e+02 4 2.269294019879644e+02 1.439747012614939e+02 1.541323658794436e+02 -8.373441748578679e+01 - ME 5.165623507180856e-05 + ME 2.903986554770466e-04 Event 228 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1828,7 +1828,7 @@ Event 228 Batch 0 2 2.119578664379945e+02 1.625437651479949e+01 -1.806612394559917e+02 1.096514885776142e+02 3 6.254097456672617e+02 -3.200704000326812e+01 3.158243706171928e+02 5.388579277416935e+02 4 6.626323878947439e+02 1.575266348846865e+01 -1.351631311612011e+02 -6.485094163193077e+02 - ME 3.800526374221887e-02 + ME 8.951233069377997e-01 Event 229 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1836,7 +1836,7 @@ Event 229 Batch 0 2 5.921227120343664e+02 -3.877491982207575e+02 4.449193714386763e+02 -4.802726626309342e+01 3 4.688278331283221e+02 3.470549659129084e+02 -1.517581364471262e+02 -2.762641051115459e+02 4 4.390494548373113e+02 4.069423230784909e+01 -2.931612349915501e+02 3.242913713746393e+02 - ME 1.250052930035257e-05 + ME 3.492131538818778e-05 Event 230 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1844,7 +1844,7 @@ Event 230 Batch 0 2 4.261952284727868e+02 2.153699775439378e+02 -1.171086083390750e+02 3.486312082969335e+02 3 3.540619701921573e+02 3.070144260847319e+01 1.307424531367546e+02 3.276029778648147e+02 4 7.197428013350559e+02 -2.460714201524109e+02 -1.363384479767965e+01 -6.762341861617483e+02 - ME 4.711214236813061e-02 + ME 3.186738302883428e-01 Event 231 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1852,7 +1852,7 @@ Event 231 Batch 0 2 4.205236024420392e+02 7.533931576750228e+01 -3.260217181731272e+02 -2.547036061581322e+02 3 5.397543491930860e+02 8.423195081267914e+01 -1.158376015978276e+02 5.204050211049134e+02 4 5.397220483648740e+02 -1.595712665801811e+02 4.418593197709548e+02 -2.657014149467809e+02 - ME 3.265984123744224e-04 + ME 5.532186388062512e-04 Event 232 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1860,7 +1860,7 @@ Event 232 Batch 0 2 4.295782852421121e+02 3.239064445356881e+02 9.240815775655221e-01 2.821724019337124e+02 3 7.183371274312143e+02 -6.155391061575082e+02 -1.955291718271078e+02 -3.144649112405858e+02 4 3.520845873266736e+02 2.916326616218201e+02 1.946050902495422e+02 3.229250930687335e+01 - ME 1.049779024540051e-05 + ME 6.730603828970119e-05 Event 233 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1868,7 +1868,7 @@ Event 233 Batch 0 2 3.640046126075324e+02 -2.220120664068515e+02 -1.165482463207536e+02 2.638683509799470e+02 3 4.682121509308883e+02 -1.009786196736112e+02 3.762431872847591e+02 2.597441061312976e+02 4 6.677832364615790e+02 3.229906860804628e+02 -2.596949409640055e+02 -5.236124571112447e+02 - ME 7.598357868514145e-04 + ME 5.385640989777132e-03 Event 234 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1876,7 +1876,7 @@ Event 234 Batch 0 2 8.690043548936441e+01 -2.607433849884744e+01 -7.258333015587984e+01 4.004341073848801e+01 3 6.785651905172676e+02 -3.574930335951373e+02 -4.725723606052789e+01 5.748184081539155e+02 4 7.345343739933678e+02 3.835673720939847e+02 1.198405662164078e+02 -6.148618188924036e+02 - ME 8.152211059226219e-02 + ME 1.962113644780599e-01 Event 235 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1884,7 +1884,7 @@ Event 235 Batch 0 2 3.000566282865331e+02 1.219146462304108e+01 -2.126850238006026e+02 2.113064812540423e+02 3 7.160981218147422e+02 2.575873756248088e+02 2.779062108697769e+02 -6.076293293985470e+02 4 4.838452498987246e+02 -2.697788402478500e+02 -6.522118706917435e+01 3.963228481445046e+02 - ME 2.498899672933017e-05 + ME 3.940402333844027e-05 Event 236 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1892,7 +1892,7 @@ Event 236 Batch 0 2 1.510518772182422e+02 -9.497518588910037e+01 1.467158067736534e+01 1.165380984781943e+02 3 6.955499852411461e+02 5.933480346078575e+02 3.495450158124774e+02 9.770452249822526e+01 4 6.533981375406115e+02 -4.983728487187572e+02 -3.642165964898426e+02 -2.142426209764196e+02 - ME 2.623118294900277e-04 + ME 1.121647028585911e-03 Event 237 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1900,7 +1900,7 @@ Event 237 Batch 0 2 2.173874152942701e+02 2.069918593916189e+02 -3.850229167793934e+01 -5.412237993169356e+01 3 7.305677895866185e+02 -6.701932224704495e+02 -2.421540700080861e+02 1.610333695687662e+02 4 5.520447951191120e+02 4.632013630788306e+02 2.806563616860255e+02 -1.069109896370727e+02 - ME 2.170005261464319e-05 + ME 1.822378225061386e-04 Event 238 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1908,7 +1908,7 @@ Event 238 Batch 0 2 6.349573912113930e+02 -3.336495545457479e+02 -4.785400196851591e+02 2.506956580500139e+02 3 5.768887318987100e+02 4.812119270965607e+02 2.334547330568691e+02 -2.161818165921041e+02 4 2.881538768898968e+02 -1.475623725508129e+02 2.450852866282900e+02 -3.451384145790988e+01 - ME 1.383744831772315e-05 + ME 9.810731053503000e-05 Event 239 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1916,7 +1916,7 @@ Event 239 Batch 0 2 5.349076725903783e+02 -5.331874414268931e+02 1.887721601290929e+01 -3.848403846142781e+01 3 3.658437465440003e+02 8.335465236419728e+01 1.670818061666301e+01 -3.558292926602242e+02 4 5.992485808656214e+02 4.498327890626960e+02 -3.558539662957234e+01 3.943133311216517e+02 - ME 2.560110521983184e-05 + ME 9.226736931333760e-05 Event 240 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1924,7 +1924,7 @@ Event 240 Batch 0 2 2.870582387324442e+02 1.830793600232297e+02 -1.562409872742485e+02 1.564389154054251e+02 3 6.007192677438852e+02 3.433229388031108e+02 4.688113613010560e+02 -1.523446941819630e+02 4 6.122224935236703e+02 -5.264022988263405e+02 -3.125703740268075e+02 -4.094221223461989e+00 - ME 3.548113744927254e-05 + ME 1.424405912705748e-04 Event 241 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1932,7 +1932,7 @@ Event 241 Batch 0 2 7.424696267657401e+02 4.823783107714221e+02 2.498315161211407e+02 5.061190823507636e+02 3 2.455726236162737e+02 -1.827879695947952e+02 -1.199757723946156e+02 -1.118046764652876e+02 4 5.119577496179861e+02 -2.995903411766270e+02 -1.298557437265251e+02 -3.943144058854759e+02 - ME 2.366266620918590e-04 + ME 2.705973755259623e-03 Event 242 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1940,7 +1940,7 @@ Event 242 Batch 0 2 7.249130370348905e+02 1.676828147928013e+02 6.059046362201677e+02 -3.609168279440810e+02 3 6.240672718074169e+02 -4.529413961306761e+01 -5.490982345027019e+02 2.930862151720549e+02 4 1.510196911576933e+02 -1.223886751797337e+02 -5.680640171746593e+01 6.783061277202641e+01 - ME 1.668420503127583e-05 + ME 4.587322306592483e-05 Event 243 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1948,7 +1948,7 @@ Event 243 Batch 0 2 4.655090712555229e+02 2.096323612054770e+02 2.113490506800235e+02 3.578890153850057e+02 3 5.764797256412519e+02 6.697224883641857e+01 -5.382210340689440e+02 -1.953502251008744e+02 4 4.580112031032257e+02 -2.766046100418949e+02 3.268719833889206e+02 -1.625387902841314e+02 - ME 3.999521919602606e-05 + ME 2.309042201876567e-04 Event 244 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1956,7 +1956,7 @@ Event 244 Batch 0 2 5.237109195354749e+02 1.305098338947756e+02 -4.868141165486322e+02 -1.423106687020528e+02 3 5.804450110242352e+02 -4.045654344879671e+02 2.643676733537771e+02 3.214855413949400e+02 4 3.958440694402901e+02 2.740556005931916e+02 2.224464431948551e+02 -1.791748726928872e+02 - ME 2.634847163425152e-05 + ME 2.644202232750943e-04 Event 245 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1964,7 +1964,7 @@ Event 245 Batch 0 2 2.629169357520612e+02 2.457511487795889e+02 -4.402365929491729e+01 -8.242333044139184e+01 3 6.931386101565748e+02 -5.195573187661655e+02 4.004017488088275e+02 -2.240084037645317e+02 4 5.439444540913644e+02 2.738061699865766e+02 -3.563780895139104e+02 3.064317342059234e+02 - ME 1.052590061693975e-05 + ME 4.288053786412853e-05 Event 246 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1972,7 +1972,7 @@ Event 246 Batch 0 2 6.300937687157445e+02 -5.459948028041557e+02 3.085954426748102e+02 6.063567799240802e+01 3 1.673910408536145e+02 -3.546130270298926e+01 7.662824936562275e+01 -1.445350060290698e+02 4 7.025151904306430e+02 5.814561055071442e+02 -3.852236920404341e+02 8.389932803666261e+01 - ME 1.915763997923398e-04 + ME 6.282756509154168e-04 Event 247 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1980,7 +1980,7 @@ Event 247 Batch 0 2 2.577847506495701e+02 2.418237207037818e+02 -8.449121421856779e+01 2.890502538162603e+01 3 5.130193185035739e+02 4.381905811488919e+02 1.366496386102691e+02 2.291390669832418e+02 4 7.291959308468561e+02 -6.800143018526737e+02 -5.215842439170134e+01 -2.580440923648679e+02 - ME 1.831864018495938e-03 + ME 4.005872724472581e-03 Event 248 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1988,7 +1988,7 @@ Event 248 Batch 0 2 7.033207479153643e+02 -5.040306065309413e+02 -2.020637997366072e+02 4.469714117975369e+02 3 1.758360012551320e+02 -1.471306652922549e+01 -4.035460943683606e+00 -1.751728862172264e+02 4 6.208432508295037e+02 5.187436730601667e+02 2.060992606802909e+02 -2.717985255803103e+02 - ME 1.512538512828554e-04 + ME 5.592865021063005e-04 Event 249 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1996,7 +1996,7 @@ Event 249 Batch 0 2 3.018816177222694e+02 5.523075638651412e+01 1.752331212074551e+02 2.395316845419020e+02 3 6.597415560701297e+02 6.315352823685419e+01 -6.561001191322722e+02 -2.834054254405022e+01 4 5.383768262076012e+02 -1.183842846233684e+02 4.808669979248172e+02 -2.111911419978518e+02 - ME 9.225490912808109e-05 + ME 4.868100986861644e-04 Event 250 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2004,7 +2004,7 @@ Event 250 Batch 0 2 2.166381935101301e+02 -1.289072913913530e+02 -1.189615590004073e+02 -1.271344351215279e+02 3 6.815426093761062e+02 -2.511966318704653e+02 5.323234433390903e+02 3.435583388650892e+02 4 6.018191971137635e+02 3.801039232618182e+02 -4.133618843386827e+02 -2.164239037435611e+02 - ME 6.586594805989363e-05 + ME 3.468666532553966e-04 Event 251 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2012,7 +2012,7 @@ Event 251 Batch 0 2 6.676961532387151e+02 -3.991265595084280e+01 -4.419965947723094e+02 4.988628500443886e+02 3 7.150412702460949e+02 3.921851524844908e+01 5.505653759000154e+02 -4.545587894617490e+02 4 1.172625765151894e+02 6.941407023942340e-01 -1.085687811277060e+02 -4.430406058263954e+01 - ME 4.930952510857648e-05 + ME 5.615833562023813e-04 Event 252 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2020,7 +2020,7 @@ Event 252 Batch 0 2 2.112668789066533e+02 -1.147554660376938e+02 3.364589711187055e+01 -1.741632301749357e+02 3 7.393007599584276e+02 2.529046383258835e+02 -3.593132473314827e+02 5.945576909606565e+02 4 5.494323611349191e+02 -1.381491722881897e+02 3.256673502196121e+02 -4.203944607857206e+02 - ME 3.541023077707110e-04 + ME 2.709805393201018e-03 Event 253 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2028,7 +2028,7 @@ Event 253 Batch 0 2 7.299659304470913e+01 -4.405884533650594e+01 -5.451291667290519e+01 2.038780663930336e+01 3 7.253475305576840e+02 3.245698054519170e+02 -1.402290280555607e+02 -6.333397991328418e+02 4 7.016558763976062e+02 -2.805109601154107e+02 1.947419447284657e+02 6.129519924935382e+02 - ME 3.511004874943257e-04 + ME 6.484723438037138e-04 Event 254 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2036,7 +2036,7 @@ Event 254 Batch 0 2 1.982520535096858e+02 -6.164633378269741e+01 1.773450413210087e+02 -6.365801262063783e+01 3 7.183815394471145e+02 -1.984891252513599e+02 -6.893152145826987e+02 -3.896971029099802e+01 4 5.833664070431995e+02 2.601354590340572e+02 5.119701732616900e+02 1.026277229116358e+02 - ME 1.539519794804785e-05 + ME 9.210498573936143e-05 Event 255 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2044,7 +2044,7 @@ Event 255 Batch 0 2 5.347080663542586e+02 -5.063606624096446e+02 1.592577719822621e+02 6.440929941880935e+01 3 2.475406015289465e+02 -1.856063881081879e+02 3.468010668896048e+00 -1.637516137347836e+02 4 7.177513321167953e+02 6.919670505178326e+02 -1.627257826511582e+02 9.934231431597431e+01 - ME 3.137689362725149e-04 + ME 1.305481727349711e-03 Event 0 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2052,7 +2052,7 @@ Event 0 Batch 1 2 5.775677821222389e+02 4.314431287975208e+02 -2.652567205762379e+02 -2.776332864556192e+02 3 6.023469575940325e+02 -3.228069847179709e+02 5.005558924007591e+02 8.978477890465942e+01 4 3.200852602837275e+02 -1.086361440795499e+02 -2.352991718245218e+02 1.878485075509607e+02 - ME 7.533072458757011e-06 + ME 2.846168667868940e-05 Event 1 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2060,7 +2060,7 @@ Event 1 Batch 1 2 7.241206267812560e+02 3.541578305635416e+02 -4.894807402105655e+02 3.991635230623179e+02 3 7.375567605136832e+02 -3.903081173548693e+02 4.920451519627784e+02 -3.867054653560791e+02 4 3.832261270506111e+01 3.615028679132773e+01 -2.564411752212873e+00 -1.245805770623896e+01 - ME 7.043932941624384e-05 + ME 1.002871021831580e-03 Event 2 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2068,7 +2068,7 @@ Event 2 Batch 1 2 4.849204091734790e+02 2.108660079931152e+02 4.054727376659824e+02 1.620962335024329e+02 3 2.728468517759738e+02 4.961449545460115e+01 2.005017763154939e+02 1.782774356422519e+02 4 7.422327390505470e+02 -2.604805034477164e+02 -6.059745139814763e+02 -3.403736691446848e+02 - ME 1.721146206228212e-02 + ME 2.729395913593408e-02 Event 3 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2076,7 +2076,7 @@ Event 3 Batch 1 2 4.264155576764489e+02 -4.170952165204416e+02 -7.054834331799705e+01 5.370977042744418e+01 3 7.108631972082329e+02 6.832597695609467e+02 -1.727180704166534e+02 -9.301097030017993e+01 4 3.627212451153183e+02 -2.661645530405051e+02 2.432664137346505e+02 3.930119987273574e+01 - ME 5.739226791327231e-06 + ME 5.466137525204964e-05 Event 4 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2084,7 +2084,7 @@ Event 4 Batch 1 2 7.183269968238449e+02 -3.584978055671311e+02 -5.048824553914336e+02 -3.640971079361008e+02 3 7.387431276480253e+02 4.013538934928407e+02 5.036810263913359e+02 3.618865629982628e+02 4 4.292987552812846e+01 -4.285608792570924e+01 1.201429000097643e+00 2.210544937839338e+00 - ME 5.884725836744927e-05 + ME 3.145606575501715e-04 Event 5 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2092,7 +2092,7 @@ Event 5 Batch 1 2 4.529780005473896e+02 -8.443182436392424e+01 4.445408460134587e+02 -2.106590230986445e+01 3 4.683757780543924e+02 -6.076819021151039e+01 -1.335482427838441e+02 -4.448010379662153e+02 4 5.786462213982179e+02 1.452000145754347e+02 -3.109926032296145e+02 4.658669402760799e+02 - ME 2.851579396246287e-05 + ME 8.481958952475706e-05 Event 6 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2100,7 +2100,7 @@ Event 6 Batch 1 2 6.238848262005389e+02 -1.065131260140052e+02 -4.741487807795934e+02 -3.912418229627633e+02 3 1.729069432107234e+02 -1.460869767542721e+02 -8.199113358821990e+01 4.281191710484079e+01 4 7.032082305887380e+02 2.526001027682771e+02 5.561399143678132e+02 3.484299058579224e+02 - ME 1.468701510222534e-04 + ME 4.868510537699180e-04 Event 7 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2108,7 +2108,7 @@ Event 7 Batch 1 2 6.977203086376783e+02 -6.126072843634399e+02 -1.744636661244187e+02 2.847602033865263e+02 3 1.614193396272251e+02 -4.571584237043670e+00 8.497734613495712e+01 -1.371646983269120e+02 4 6.408603517350967e+02 6.171788686004836e+02 8.948631998946138e+01 -1.475955050596143e+02 - ME 9.523334397108766e-05 + ME 3.540796080305845e-04 Event 8 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2116,7 +2116,7 @@ Event 8 Batch 1 2 6.871091945484288e+02 4.059708628308462e+02 2.886614153103366e+02 4.732666173272762e+02 3 5.653302025665631e+02 -2.838835484844413e+02 -7.353399035097291e+01 -4.833229987253825e+02 4 2.475606028850081e+02 -1.220873143464048e+02 -2.151274249593637e+02 1.005638139810634e+01 - ME 3.726341895116938e-05 + ME 8.785466054587446e-05 Event 9 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2124,7 +2124,7 @@ Event 9 Batch 1 2 1.618579955503452e+02 1.385215220188489e+01 1.601201234527701e+02 -1.917484467788566e+01 3 7.196660585644588e+02 -4.527189715496824e+02 -4.214090439733052e+02 3.679391067910628e+02 4 6.184759458851959e+02 4.388668193477974e+02 2.612889205205349e+02 -3.487642621131772e+02 - ME 1.276556148007894e-04 + ME 1.054640649369016e-03 Event 10 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2132,7 +2132,7 @@ Event 10 Batch 1 2 7.832785200561162e+01 1.027681340851886e+01 -7.242726264265977e+01 -2.799877018853974e+01 3 7.448007230566494e+02 2.520540107528716e+02 6.813719334665398e+02 1.641011304445167e+02 4 6.768714249377393e+02 -2.623308241613905e+02 -6.089446708238800e+02 -1.361023602559769e+02 - ME 1.087112534498832e-04 + ME 5.876642887714617e-04 Event 11 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2140,7 +2140,7 @@ Event 11 Batch 1 2 5.478627446486676e+02 2.070882322301630e+02 -4.708081692757452e+02 1.887000762823861e+02 3 6.997827604382593e+02 -4.209013422316021e+02 4.569873120768409e+02 -3.220257264800591e+02 4 2.523544949130733e+02 2.138131100014392e+02 1.382085719890436e+01 1.333256501976729e+02 - ME 7.092902148917371e-06 + ME 2.703695959900953e-05 Event 12 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2148,7 +2148,7 @@ Event 12 Batch 1 2 5.802868936311938e+02 -4.467002255894120e+01 5.211262762381961e+02 -2.513262266832405e+02 3 5.208038834706859e+02 2.151797013176283e+01 -4.993650129388666e+02 -1.463155694111945e+02 4 3.989092228981199e+02 2.315205242717860e+01 -2.176126329932955e+01 3.976417960944350e+02 - ME 4.980323856672599e-04 + ME 5.046437564325244e-04 Event 13 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2156,7 +2156,7 @@ Event 13 Batch 1 2 5.774880087360024e+02 1.576445054854711e+02 5.481077151088400e+02 -9.065617884226717e+01 3 5.915098138161557e+02 -3.018001633277128e+02 -3.808656371901898e+02 3.372564123391869e+02 4 3.310021774478421e+02 1.441556578422419e+02 -1.672420779186502e+02 -2.466002334969197e+02 - ME 5.587942683639647e-05 + ME 1.505341700965184e-03 Event 14 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2164,7 +2164,7 @@ Event 14 Batch 1 2 2.531797527967491e+02 -8.400833666640553e+01 -2.384535242035555e+02 -1.350938161690895e+01 3 5.261064571264828e+02 -1.751971590790252e+02 -3.334570051994592e+02 3.672878780523887e+02 4 7.207137900767681e+02 2.592054957454308e+02 5.719105294030147e+02 -3.537784964354798e+02 - ME 1.659114310450813e-03 + ME 3.373121845959189e-03 Event 15 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2172,7 +2172,7 @@ Event 15 Batch 1 2 4.605848765362425e+02 3.563504404614684e+02 1.735853700506503e+02 2.345653669687875e+02 3 4.216445088607453e+02 1.370719005416187e+02 -3.933730877164850e+02 6.521502736890037e+01 4 6.177706146030118e+02 -4.934223410030871e+02 2.197877176658347e+02 -2.997803943376878e+02 - ME 9.110622752737525e-05 + ME 4.613631402771334e-04 Event 16 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2180,7 +2180,7 @@ Event 16 Batch 1 2 4.972484926572777e+02 -1.474122335888775e+02 -4.748950276275915e+02 -6.399787981958280e-01 3 5.072511849723048e+02 4.846784046822065e+02 1.224000792205880e+02 -8.607455661990267e+01 4 4.955003223704169e+02 -3.372661710933285e+02 3.524949484070036e+02 8.671453541809866e+01 - ME 1.035537635543116e-05 + ME 5.856804747367533e-05 Event 17 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2188,7 +2188,7 @@ Event 17 Batch 1 2 3.182636773520259e+02 -9.176062613973060e+01 -1.890905041641619e+02 2.389906630959087e+02 3 6.376303990615819e+02 -4.240378519397394e+02 2.706855745366566e+02 -3.917827786765570e+02 4 5.441059235863918e+02 5.157984780794702e+02 -8.159507037249479e+01 1.527921155806483e+02 - ME 2.964570775197734e-05 + ME 7.445984612273079e-05 Event 18 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2196,7 +2196,7 @@ Event 18 Batch 1 2 5.532560008158404e+02 -4.148613005881325e+02 1.689647846464811e+02 -3.247047971041214e+02 3 3.650144721835348e+02 -1.597348634907620e+02 -2.160675866909894e+02 2.470529017650751e+02 4 5.817295270006244e+02 5.745961640788944e+02 4.710280204450838e+01 7.765189533904635e+01 - ME 3.148325734685632e-05 + ME 9.119298978738387e-05 Event 19 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2204,7 +2204,7 @@ Event 19 Batch 1 2 3.263687475619531e+02 -1.904667433734991e+02 2.390747946355329e+02 -1.143775398573919e+02 3 7.331345945903582e+02 2.597391859223821e+02 -6.739404183465077e+02 1.258022320965774e+02 4 4.404966578476884e+02 -6.927244254888298e+01 4.348656237109747e+02 -1.142469223918529e+01 - ME 9.665339952809457e-06 + ME 8.793129888044293e-05 Event 20 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2212,7 +2212,7 @@ Event 20 Batch 1 2 9.588718605412237e+01 4.259536217794532e+01 8.056474827260676e+01 -2.982128277051557e+01 3 7.250265356668370e+02 3.120913743414047e+02 -4.446787057645155e+02 4.801284204484703e+02 4 6.790862782790414e+02 -3.546867365193502e+02 3.641139574919093e+02 -4.503071376779550e+02 - ME 6.402422614019696e-04 + ME 3.686389281265799e-03 Event 21 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2220,7 +2220,7 @@ Event 21 Batch 1 2 1.825278201605081e+02 -1.533737674675502e+02 8.574830442242751e+01 4.939757963742074e+01 3 7.183016103669913e+02 1.713205736990392e+02 -6.275703015775031e+02 -3.045685162014731e+02 4 5.991705694725008e+02 -1.794680623148897e+01 5.418219971550755e+02 2.551709365640523e+02 - ME 1.806434468406198e-05 + ME 7.470861105912214e-05 Event 22 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2228,7 +2228,7 @@ Event 22 Batch 1 2 2.349542451120770e+02 9.235159917618290e+01 -2.156570331301489e+02 -1.291214495308476e+01 3 7.360601907662837e+02 -2.182033070539752e+02 6.568866822530020e+02 -2.503433799808774e+02 4 5.289855641216395e+02 1.258517078777923e+02 -4.412296491228531e+02 2.632555249339621e+02 - ME 8.007442232312076e-06 + ME 3.893602972207037e-05 Event 23 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2236,7 +2236,7 @@ Event 23 Batch 1 2 2.350908908124364e+02 -7.377772511691019e+00 -2.298431804723787e+02 -4.884063683135331e+01 3 6.797114625392685e+02 -5.485955088721076e+02 3.603976926464840e+02 1.765336882516069e+02 4 5.851976466482949e+02 5.559732813837987e+02 -1.305545121741055e+02 -1.276930514202538e+02 - ME 3.185713653214173e-05 + ME 2.057468423101862e-04 Event 24 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2244,7 +2244,7 @@ Event 24 Batch 1 2 4.355364173804401e+02 2.538053291625626e+02 -2.665393838801487e+02 -2.328767540869265e+02 3 4.093863144993796e+02 -1.953012891316528e+02 -3.573484670764558e+02 4.191221827828568e+01 4 6.550772681201798e+02 -5.850404003090968e+01 6.238878509566048e+02 1.909645358086408e+02 - ME 3.721637657688893e-05 + ME 1.895168702655672e-04 Event 25 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2252,7 +2252,7 @@ Event 25 Batch 1 2 7.365386968907909e+02 3.875876454009267e+02 3.151568854896985e+02 5.412404333367775e+02 3 5.208510884285567e+02 -2.430585576296288e+02 -1.518636440371932e+02 -4.349089876054084e+02 4 2.426102146806534e+02 -1.445290877712977e+02 -1.632932414525050e+02 -1.063314457313693e+02 - ME 7.982561935336398e-05 + ME 3.717867207603688e-04 Event 26 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2260,7 +2260,7 @@ Event 26 Batch 1 2 7.198867014174701e+02 5.189601929589824e+02 4.797253921416957e+02 -1.370428003807496e+02 3 3.889101953712928e+02 -1.847394503243419e+02 -2.837815501141775e+02 1.912864537085460e+02 4 3.912031032112371e+02 -3.342207426346404e+02 -1.959438420275183e+02 -5.424365332779646e+01 - ME 1.928349098758061e-05 + ME 1.222836766708484e-04 Event 27 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2268,7 +2268,7 @@ Event 27 Batch 1 2 6.732032222628646e+02 5.870808395006010e+02 -9.126179303429218e+01 3.165595544104447e+02 3 1.177373967283342e+02 7.847176641415683e+01 5.304379211899001e+00 -8.761358356661104e+01 4 7.090593810088013e+02 -6.655526059147578e+02 8.595741382239324e+01 -2.289459708438336e+02 - ME 6.795383824785976e-04 + ME 1.603290018002586e-03 Event 28 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2276,7 +2276,7 @@ Event 28 Batch 1 2 6.475300414228806e+02 3.136396845517189e+02 3.816259196370642e+02 -4.186728559156669e+02 3 7.290923529036073e+02 -2.791764769994177e+02 -4.112865540505715e+02 5.333662195995520e+02 4 1.233776056735125e+02 -3.446320755230100e+01 2.966063441350738e+01 -1.146933636838856e+02 - ME 6.311296815400830e-04 + ME 5.037107889244314e-02 Event 29 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2284,7 +2284,7 @@ Event 29 Batch 1 2 3.156754590345620e+02 -2.870540678871016e+02 4.159516713841874e+01 -1.245825012466667e+02 3 4.770060274033896e+02 -2.355061130652810e+02 -3.231858413754910e+02 -2.600433287405434e+02 4 7.073185135620483e+02 5.225601809523826e+02 2.815906742370723e+02 3.846258299872100e+02 - ME 1.321807869823317e-04 + ME 7.956699356695784e-04 Event 30 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2292,7 +2292,7 @@ Event 30 Batch 1 2 6.091290614220995e+02 1.543004089904798e+02 4.216196287493766e+00 -5.892468251447810e+02 3 2.079357839022729e+02 2.034647466922837e+02 4.185675980476618e+01 9.348729279626889e+00 4 6.829351546756266e+02 -3.577651556827627e+02 -4.607295609226003e+01 5.798980958651539e+02 - ME 1.448382779935031e-04 + ME 3.902231064020147e-04 Event 31 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2300,7 +2300,7 @@ Event 31 Batch 1 2 6.901710072855793e+02 1.433309098684656e+01 6.447948515477649e+02 -2.457034416076623e+02 3 5.898919363861644e+02 1.120085307876391e+02 -4.815950471622465e+02 3.217029626736535e+02 4 2.199370563282564e+02 -1.263416217744856e+02 -1.631998043855182e+02 -7.599952106599136e+01 - ME 2.376400497996635e-05 + ME 2.415465849322543e-04 Event 32 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2308,7 +2308,7 @@ Event 32 Batch 1 2 6.144498311923271e+02 5.832947925341469e+02 -1.925283703230110e+02 1.576726595169125e+01 3 2.478450424037004e+02 5.004284035329792e+01 2.389954177960992e+02 4.247433867565734e+01 4 6.377051264039724e+02 -6.333376328874447e+02 -4.646704747308818e+01 -5.824160462734862e+01 - ME 5.390650629646604e-05 + ME 2.160220890176678e-04 Event 33 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2316,7 +2316,7 @@ Event 33 Batch 1 2 6.134536717469736e+02 -1.625429495269566e+02 -1.853973484494194e+02 5.617232593785355e+02 3 5.361644687950269e+02 -3.755831293394986e+01 -9.992652347025609e+01 -5.254297294928764e+02 4 3.503818594579993e+02 2.001012624609065e+02 2.853238719196754e+02 -3.629352988565911e+01 - ME 1.005452860076771e-04 + ME 1.224582992507153e-04 Event 34 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2324,7 +2324,7 @@ Event 34 Batch 1 2 3.840838099420727e+02 -2.442269925519278e+02 -3.827314394217582e+01 -2.939535943332559e+02 3 6.022630974514659e+02 3.956891925431131e+01 5.086724982658299e+02 3.200116071158652e+02 4 5.136530926064613e+02 2.046580732976165e+02 -4.703993543236541e+02 -2.605801278260916e+01 - ME 2.313941306740064e-05 + ME 9.608243105510499e-05 Event 35 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2332,7 +2332,7 @@ Event 35 Batch 1 2 3.454350783663418e+02 -3.439607925797615e+02 2.363778141880094e+01 -2.139209721976717e+01 3 6.705698302143294e+02 5.215327591153251e+02 4.060443141865528e+02 -1.131171661597076e+02 4 4.839950914193290e+02 -1.775719665355635e+02 -4.296820956053536e+02 1.345092633794747e+02 - ME 7.982017052260048e-06 + ME 4.862206803317224e-05 Event 36 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2340,7 +2340,7 @@ Event 36 Batch 1 2 7.098652154429357e+02 2.489290984574327e+02 -1.674080692141068e+02 -6.433641786725617e+02 3 6.178479130357197e+02 -1.435715807033598e+02 2.588953561477193e+02 5.423065917191846e+02 4 1.722868715213448e+02 -1.053575177540730e+02 -9.148728693361247e+01 1.010575869533772e+02 - ME 5.562249548714765e-05 + ME 6.680529568232270e-05 Event 37 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2348,7 +2348,7 @@ Event 37 Batch 1 2 6.906872786346031e+02 1.495946561071237e+02 1.712833879510068e+02 6.521750966909805e+02 3 3.682276595245592e+02 -1.358558710218083e+02 1.194309698061993e+02 -3.207351477449753e+02 4 4.410850618408380e+02 -1.373878508531530e+01 -2.907143577572061e+02 -3.314399489460051e+02 - ME 5.542438863722841e-04 + ME 2.014943348935539e-03 Event 38 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2356,7 +2356,7 @@ Event 38 Batch 1 2 6.131720166645955e+02 -5.222102655174087e+02 6.340623138461877e+00 3.213038392347352e+02 3 4.540063357567760e+02 2.932429176443922e+02 -3.207297067242505e+02 -1.313879727496968e+02 4 4.328216475786277e+02 2.289673478730168e+02 3.143890835857886e+02 -1.899158664850380e+02 - ME 3.150821423911933e-05 + ME 2.589645049118943e-04 Event 39 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2364,7 +2364,7 @@ Event 39 Batch 1 2 2.929747896182304e+02 2.510117592312210e+02 -1.378648144805472e+02 6.181113983529403e+01 3 6.287164314722783e+02 3.864928360025993e+01 6.254120614625328e+02 5.148142827864510e+01 4 5.783087789094894e+02 -2.896610428314818e+02 -4.875472469819856e+02 -1.132925681139394e+02 - ME 2.723120294663496e-05 + ME 1.708238325115053e-04 Event 40 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2372,7 +2372,7 @@ Event 40 Batch 1 2 1.143487538112954e+02 -3.203572478439017e+01 1.022340126870988e+02 3.996944439980560e+01 3 7.361483923235807e+02 5.924235295921244e+02 -3.838567751530157e+02 -2.088128187524163e+02 4 6.495028538651248e+02 -5.603878048077345e+02 2.816227624659169e+02 1.688433743526105e+02 - ME 4.279185076498264e-05 + ME 2.026369815874481e-04 Event 41 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2380,7 +2380,7 @@ Event 41 Batch 1 2 6.384898508133350e+02 5.540399192408263e+02 -3.014826159773289e+02 -9.908223727147148e+01 3 3.510407251698805e+02 -1.719168197014114e+02 2.065966849440144e+02 -2.258140996521069e+02 4 5.104694240167846e+02 -3.821230995394149e+02 9.488593103331458e+01 3.248963369235784e+02 - ME 1.488395965626735e-05 + ME 4.455092331482675e-05 Event 42 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2388,7 +2388,7 @@ Event 42 Batch 1 2 3.291654598309212e+02 -1.090829060981258e+02 2.972891943885482e+02 -8.983292515941632e+01 3 6.884965239796815e+02 4.933628807557017e+02 -2.919492821202986e+02 3.812953554581829e+02 4 4.823380161893969e+02 -3.842799746575757e+02 -5.339912268249619e+00 -2.914624302987665e+02 - ME 5.767145017550451e-05 + ME 6.690811667999076e-04 Event 43 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2396,7 +2396,7 @@ Event 43 Batch 1 2 3.674173006007981e+02 2.791827424102563e+02 1.079644067383057e+02 2.130637369397045e+02 3 7.392205647816575e+02 -6.110484627794917e+02 -4.247874240022372e+01 -4.138385868609020e+02 4 3.933621346175442e+02 3.318657203692355e+02 -6.548566433808202e+01 2.007748499211975e+02 - ME 6.513986915725277e-06 + ME 2.734436884563990e-05 Event 44 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2404,7 +2404,7 @@ Event 44 Batch 1 2 2.081359682230012e+02 -1.082501549908087e+02 1.771964605001424e+02 1.427934167997762e+01 3 7.449563315308093e+02 5.092828751965591e+02 -5.388739609944279e+02 7.215083562608928e+01 4 5.469077002461893e+02 -4.010327202057504e+02 3.616775004942854e+02 -8.643017730606689e+01 - ME 1.838899544278803e-05 + ME 1.760644262839344e-04 Event 45 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2412,7 +2412,7 @@ Event 45 Batch 1 2 5.180982465404422e+02 4.470261481799612e+02 -3.368837017252423e+01 -2.597277606009553e+02 3 3.377595659674062e+02 -7.316527185649456e+01 2.454727770679006e+02 -2.201624016839132e+02 4 6.441421874921515e+02 -3.738608763234666e+02 -2.117844068953763e+02 4.798901622848684e+02 - ME 4.091340785269233e-05 + ME 1.645403798734011e-04 Event 46 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2420,7 +2420,7 @@ Event 46 Batch 1 2 6.296560291524888e+02 2.172411497655985e+02 5.821614514430422e+02 -1.017892054705761e+02 3 6.224001894826197e+02 1.405102091633609e+01 -6.218608257778048e+02 2.176414579432105e+01 4 2.479437813648912e+02 -2.312921706819346e+02 3.969937433476264e+01 8.002505967625511e+01 - ME 7.434320230190137e-06 + ME 4.041878897626609e-05 Event 47 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2428,7 +2428,7 @@ Event 47 Batch 1 2 5.458843469271557e+02 -1.019033861791133e+02 -1.559739004096151e+02 5.131058004898495e+02 3 2.573134207008558e+02 6.791700498899543e+01 -2.412204887508016e+02 5.839651284901167e+01 4 6.968022323719882e+02 3.398638119011781e+01 3.971943891604168e+02 -5.715023133388611e+02 - ME 4.005478861198618e-03 + ME 1.408798022766008e-02 Event 48 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2436,7 +2436,7 @@ Event 48 Batch 1 2 6.623920218006384e+02 -6.284562032939594e+02 -1.837527125398962e+02 -1.002044496053409e+02 3 1.251779629744606e+02 -7.502448682133647e+01 9.550779386908961e+01 3.031682869117444e+01 4 7.124300152249010e+02 7.034806901152959e+02 8.824491867080658e+01 6.988762091416655e+01 - ME 3.004757451335502e-04 + ME 8.682321044518227e-04 Event 49 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2444,7 +2444,7 @@ Event 49 Batch 1 2 2.397494808364364e+02 2.393958238941666e+02 -4.144666783354266e+00 -1.233996761053010e+01 3 6.782491241100328e+02 -3.516321535544010e+02 -2.705899831712919e+02 5.129890485673947e+02 4 5.820013950535307e+02 1.122363296602344e+02 2.747346499546462e+02 -5.006490809568646e+02 - ME 6.040872325723622e-04 + ME 9.041285542966720e-03 Event 50 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2452,7 +2452,7 @@ Event 50 Batch 1 2 4.764898792162554e+02 4.667163214316568e+02 5.900817880915086e+01 -7.573978570375913e+01 3 5.114228101321805e+02 -2.035689445851523e+02 -4.549677995197112e+02 -1.145306811477843e+02 4 5.120873106515638e+02 -2.631473768465044e+02 3.959596207105603e+02 1.902704668515434e+02 - ME 9.692662313613028e-06 + ME 5.157319121365441e-05 Event 51 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2460,7 +2460,7 @@ Event 51 Batch 1 2 4.678795643859630e+02 4.629737719234085e+02 5.365495313512251e+01 4.108186077915564e+01 3 6.311645871918951e+02 -4.500610707732837e+02 -4.345770688214700e+02 8.340587481742408e+01 4 4.009558484221416e+02 -1.291270115012470e+01 3.809221156863474e+02 -1.244877355965797e+02 - ME 1.293558494013996e-05 + ME 1.517985021504320e-04 Event 52 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2468,7 +2468,7 @@ Event 52 Batch 1 2 3.696230029266819e+02 2.516704934433110e+02 2.514038675722595e+02 1.003953305301004e+02 3 6.696174214325739e+02 -2.754912388418390e+01 -6.493999246431116e+02 -1.609604756850079e+02 4 4.607595756407442e+02 -2.241213695591271e+02 3.979960570708519e+02 6.056514515490756e+01 - ME 8.655753222194317e-06 + ME 5.727699238559496e-05 Event 53 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2476,7 +2476,7 @@ Event 53 Batch 1 2 7.284624742442375e+01 -4.271742504396477e+01 -2.683807109937144e+01 -5.255012179908527e+01 3 7.493542950735829e+02 3.356513586119740e+02 2.501807367708783e+02 6.215139772812374e+02 4 6.777994575019936e+02 -2.929339335680093e+02 -2.233426656715069e+02 -5.689638554821522e+02 - ME 2.372423861687152e-03 + ME 1.612275481129464e-02 Event 54 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2484,7 +2484,7 @@ Event 54 Batch 1 2 7.460259847230064e+02 2.055186857047568e+01 6.233229443227743e+02 4.093908861479223e+02 3 5.756222844616437e+02 2.606063779094539e+01 -4.696411468594731e+02 -3.318117699890848e+02 4 1.783517308153497e+02 -4.661250636142109e+01 -1.536817974633012e+02 -7.757911615883735e+01 - ME 5.046268590690708e-05 + ME 4.374243668355642e-04 Event 55 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2492,7 +2492,7 @@ Event 55 Batch 1 2 5.967428482894213e+02 -8.165820254184375e+01 5.098287527914877e+02 -2.991798919868828e+02 3 5.942526243827265e+02 5.606061544962815e+01 -2.905196430116550e+02 5.153559216750568e+02 4 3.090045273278509e+02 2.559758709221549e+01 -2.193091097798325e+02 -2.161760296881746e+02 - ME 1.849048785615045e-04 + ME 1.779007466146034e-03 Event 56 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2500,7 +2500,7 @@ Event 56 Batch 1 2 5.610874267302015e+02 -4.199055433713192e+02 3.580252469767042e+02 1.015694718309908e+02 3 6.303091265298390e+02 2.130872195586830e+02 -5.453843477211296e+02 -2.333224059286980e+02 4 3.086034467399593e+02 2.068183238126362e+02 1.873591007444254e+02 1.317529340977073e+02 - ME 7.213009143835112e-06 + ME 3.258989367177766e-05 Event 57 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2508,7 +2508,7 @@ Event 57 Batch 1 2 6.552053965855981e+02 4.516249927537604e+02 7.110694105335197e+00 4.746350341729917e+02 3 6.035190443408458e+02 -3.717228873476765e+02 2.148772607224587e+02 -4.241286299324850e+02 4 2.412755590735562e+02 -7.990210540608396e+01 -2.219879548277939e+02 -5.050640424050685e+01 - ME 3.752873989265266e-05 + ME 1.623545585873121e-04 Event 58 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2516,7 +2516,7 @@ Event 58 Batch 1 2 2.959982971085279e+02 1.850007048157144e+02 -2.304987961744356e+02 1.612563397119956e+01 3 7.018897389129390e+02 -3.764226030262936e+02 4.376344751014918e+02 3.992884868423144e+02 4 5.021119639785326e+02 1.914218982105791e+02 -2.071356789270567e+02 -4.154141208135139e+02 - ME 1.901193343270815e-04 + ME 4.558573859477246e-03 Event 59 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2524,7 +2524,7 @@ Event 59 Batch 1 2 5.521089721327345e+02 1.223876815062619e+02 -3.629066091228882e+01 -5.371485459866160e+02 3 4.098988410471214e+02 -5.841964900319319e+01 -3.626461945087767e+02 1.819119075553315e+02 4 5.379921868201441e+02 -6.396803250306872e+01 3.989368554210655e+02 3.552366384312845e+02 - ME 1.780280399801712e-05 + ME 5.148841296796537e-05 Event 60 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2532,7 +2532,7 @@ Event 60 Batch 1 2 7.143828168925960e+02 -4.584044193456332e+02 -2.419772079280938e+02 -4.915844060170314e+02 3 1.284110307517517e+02 8.324300347118127e+01 -7.889851197070540e+01 5.774963203893758e+01 4 6.572061523556514e+02 3.751614158744520e+02 3.208757198987992e+02 4.338347739780938e+02 - ME 7.144001898958308e-05 + ME 1.673517837789511e-04 Event 61 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2540,7 +2540,7 @@ Event 61 Batch 1 2 4.394390210968651e+02 -2.137451655543886e+02 -3.779414621253704e+02 -6.767502250635177e+01 3 4.431311911324728e+02 3.845666395406355e+02 -2.150363068358313e+02 4.725610065709574e+01 4 6.174297877706618e+02 -1.708214739862469e+02 5.929777689612018e+02 2.041892184925626e+01 - ME 2.870354731125455e-05 + ME 1.368591177943825e-04 Event 62 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2548,7 +2548,7 @@ Event 62 Batch 1 2 7.301725729481176e+02 4.281927891852710e+02 5.652737593150771e+02 -1.739784429324868e+02 3 7.567373964415995e+01 2.589885732647599e+01 -5.696550981957816e+01 4.255225906941358e+01 4 6.941536874077224e+02 -4.540916465117469e+02 -5.083082494954988e+02 1.314261838630732e+02 - ME 2.379197431250548e-04 + ME 8.513592598060080e-04 Event 63 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2556,7 +2556,7 @@ Event 63 Batch 1 2 4.361152320236988e+02 -3.738769057978321e+02 1.427754799584550e+02 -1.732850750548248e+02 3 5.817148313055657e+02 5.081993893256957e+02 2.829214478037172e+02 -8.998890070513914e+00 4 4.821699366707353e+02 -1.343224835278637e+02 -4.256969277621721e+02 1.822839651253387e+02 - ME 8.350404272725701e-06 + ME 4.544766189571194e-05 Event 64 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2564,7 +2564,7 @@ Event 64 Batch 1 2 6.097675704107204e+02 3.288514690970509e+02 4.971291587853200e+02 -1.285916042465611e+02 3 5.709532610348123e+02 -6.501292612520263e+01 -4.768258747557200e+02 3.072426254385416e+02 4 3.192791685544673e+02 -2.638385429718484e+02 -2.030328402960006e+01 -1.786510211919805e+02 - ME 3.000969253297957e-05 + ME 4.598138986874043e-04 Event 65 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2572,7 +2572,7 @@ Event 65 Batch 1 2 6.258641293880484e+02 3.743515439843765e+02 -1.622018320411498e+02 -4.746128903155367e+02 3 7.438702198751357e+02 -4.029113627030089e+02 2.325939036896868e+02 5.804355380128616e+02 4 1.302656507368158e+02 2.855981871863233e+01 -7.039207164853700e+01 -1.058226476973252e+02 - ME 3.162776051460646e-04 + ME 6.427333508548903e-03 Event 66 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2580,7 +2580,7 @@ Event 66 Batch 1 2 3.731957242404369e+02 1.596860493342637e+01 -3.714568973276624e+02 3.224632809376674e+01 3 6.079923612940432e+02 4.451199598539357e+02 3.189341902600864e+02 -2.642043054431177e+02 4 5.188119144655197e+02 -4.610885647873621e+02 5.252270706757586e+01 2.319579773493509e+02 - ME 1.034065067393998e-05 + ME 4.681392980523237e-05 Event 67 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2588,7 +2588,7 @@ Event 67 Batch 1 2 7.084256499213539e+02 6.318790977834966e+02 -2.229764540025608e+02 2.299504472951746e+02 3 5.168612394424738e+01 1.130069959366449e+01 -1.428140623590627e+01 4.837138651102398e+01 4 7.398882261343989e+02 -6.431797973771612e+02 2.372578602384670e+02 -2.783218338061985e+02 - ME 1.479715191731530e-02 + ME 5.878400132197954e-02 Event 68 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2596,7 +2596,7 @@ Event 68 Batch 1 2 5.644037677826096e+02 -7.446914007305443e+01 3.170710956176409e+02 4.609467220707991e+02 3 4.303832728799333e+02 -1.588265612792408e+02 -3.994808673830752e+02 -2.046757440246668e+01 4 5.052129593374568e+02 2.332957013522950e+02 8.240977176543441e+01 -4.404791476683325e+02 - ME 3.274273226082449e-04 + ME 8.108482137897523e-03 Event 69 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2604,7 +2604,7 @@ Event 69 Batch 1 2 2.379282923937934e+02 -4.413455715133102e+01 1.058497776082811e+02 -2.084654354245804e+02 3 5.822935131976616e+02 -5.806422676829345e+02 4.095409019445288e+01 -1.559022092337181e+01 4 6.797781944085444e+02 6.247768248342655e+02 -1.468038678027338e+02 2.240556563479522e+02 - ME 6.379305675073031e-05 + ME 3.039802585689931e-04 Event 70 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2612,7 +2612,7 @@ Event 70 Batch 1 2 5.861861307468000e+02 1.831219916849830e+02 2.904683423406074e+02 -4.750880530376756e+02 3 4.633200606614189e+02 -4.245314712871158e+02 -1.339518705596282e+02 1.284344380284135e+02 4 4.504938085917810e+02 2.414094796021329e+02 -1.565164717809791e+02 3.466536150092620e+02 - ME 1.325653453486623e-05 + ME 3.530491740557932e-05 Event 71 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2620,7 +2620,7 @@ Event 71 Batch 1 2 7.383412459951699e+02 5.748049255568963e+02 -1.639684737984460e+02 -4.334298474879633e+02 3 3.973981306646684e+02 -3.228684354469153e+02 -4.837114091238284e+00 2.316416412804533e+02 4 3.642606233401616e+02 -2.519364901099809e+02 1.688055878896842e+02 2.017882062075102e+02 - ME 1.333441808219846e-05 + ME 3.103530482016079e-05 Event 72 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2628,7 +2628,7 @@ Event 72 Batch 1 2 3.538199915090663e+02 3.512029503136998e+02 -6.467835580753929e+00 -4.246458742680748e+01 3 5.344234504985296e+02 1.310173344785605e+01 3.836805260246265e+01 5.328833470497182e+02 4 6.117565579924039e+02 -3.643046837615559e+02 -3.190021702170876e+01 -4.904187596229107e+02 - ME 2.994704399169685e-03 + ME 9.376669006106200e-03 Event 73 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2636,7 +2636,7 @@ Event 73 Batch 1 2 4.694927197571710e+02 1.451947293992222e+02 -1.807863847612341e+02 4.082379055705570e+02 3 5.537325951281179e+02 -5.796379956652479e+01 5.401382741253894e+02 -1.072876026015002e+02 4 4.767746851147115e+02 -8.723092983269744e+01 -3.593518893641554e+02 -3.009503029690568e+02 - ME 1.535829386616431e-04 + ME 1.077472469645428e-03 Event 74 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2644,7 +2644,7 @@ Event 74 Batch 1 2 6.258444305735198e+02 -3.349227552763227e+02 4.941036656040852e+02 1.880679848209580e+02 3 5.555040664889822e+02 3.765538795180102e+01 -5.474422011270130e+02 -8.645158222500005e+01 4 3.186515029374982e+02 2.972673673245214e+02 5.333853552292791e+01 -1.016164025959578e+02 - ME 1.487896902219418e-05 + ME 1.623439923565115e-04 Event 75 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2652,7 +2652,7 @@ Event 75 Batch 1 2 3.943316317993887e+02 5.588489849751632e+01 -2.552251009651266e+02 -2.953548066221912e+02 3 5.467466262348042e+02 -3.021648543602057e+02 -2.377479281839000e+02 3.887212326756534e+02 4 5.589217419658066e+02 2.462799558626894e+02 4.929730291490265e+02 -9.336642605346221e+01 - ME 4.632408498797698e-05 + ME 1.348649436679123e-04 Event 76 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2660,7 +2660,7 @@ Event 76 Batch 1 2 5.517772830004059e+02 2.282681125856672e+02 -4.885490190451381e+02 -1.169260227747471e+02 3 4.245403880864563e+02 -2.793100283061228e+02 1.521744876196477e+02 -2.811821020654221e+02 4 5.236823289131380e+02 5.104191572045557e+01 3.363745314254903e+02 3.981081248401691e+02 - ME 1.645260485784409e-05 + ME 5.074216551061466e-05 Event 77 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2668,7 +2668,7 @@ Event 77 Batch 1 2 3.781543446472003e+02 -5.926925448310480e+01 -1.775497893613220e+02 3.285786605157444e+02 3 6.702964816234122e+02 -6.066564226432872e+01 -1.057468051743550e+02 -6.591165802199176e+02 4 4.515491737293867e+02 1.199348967474336e+02 2.832965945356770e+02 3.305379197041734e+02 - ME 5.041095643414513e-05 + ME 6.321080405055773e-05 Event 78 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2676,7 +2676,7 @@ Event 78 Batch 1 2 4.564262045363139e+02 1.882572856930395e+02 1.751822011208171e+02 -3.770878823051468e+02 3 3.809544602625751e+02 -2.816334489555117e+02 1.992812047321844e+02 -1.615422627793184e+02 4 6.626193352011103e+02 9.337616326247226e+01 -3.744634058530013e+02 5.386301450844651e+02 - ME 6.222463480998997e-05 + ME 2.572921643188974e-04 Event 79 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2684,7 +2684,7 @@ Event 79 Batch 1 2 6.126536521478922e+02 6.075062399138452e+02 -4.178945028651393e+01 6.733726903166659e+01 3 2.872846052831658e+02 -1.084163947926161e+02 2.139961846825774e+01 2.651799127051085e+02 4 6.000617425689430e+02 -4.990898451212283e+02 2.038983181825616e+01 -3.325171817367756e+02 - ME 6.289823950094716e-04 + ME 1.996659951821530e-03 Event 80 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2692,7 +2692,7 @@ Event 80 Batch 1 2 4.171281258707700e+02 -2.756641813219371e+02 1.445082905894664e+01 3.127240094205691e+02 3 3.805235327384960e+02 -2.955852199231463e+02 2.395269588958384e+02 7.373784162959287e+00 4 7.023483413907342e+02 5.712494012450838e+02 -2.539777879547846e+02 -3.200977935835284e+02 - ME 5.629434448779270e-04 + ME 1.297520069620947e-03 Event 81 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2700,7 +2700,7 @@ Event 81 Batch 1 2 7.471091333863935e+02 -9.753029041192970e+01 7.407154559164039e+02 -7.162458282065091e-01 3 6.775352561453885e+02 9.550863422814814e+01 -6.702673865908516e+02 -2.595678293896889e+01 4 7.535561046821789e+01 2.021656183781575e+00 -7.044806932555213e+01 2.667302876717550e+01 - ME 2.904529061551848e-05 + ME 1.022399816924924e-04 Event 82 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2708,7 +2708,7 @@ Event 82 Batch 1 2 4.309094465924175e+02 3.042233433179616e+02 2.799835808203350e+02 -1.214096495919827e+02 3 5.540384887187945e+02 -4.824447657759213e+02 1.988969596446625e+02 1.861335391629672e+02 4 5.150520646887885e+02 1.782214224579596e+02 -4.788805404649973e+02 -6.472388957098450e+01 - ME 1.778678120024833e-05 + ME 1.053635072607165e-04 Event 83 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2716,7 +2716,7 @@ Event 83 Batch 1 2 4.869534474909295e+02 -4.727010820510885e+02 1.062322962656182e+02 4.890855018466118e+01 3 3.520990385354405e+02 -1.437544586613779e+02 -3.142298368411062e+02 6.758696761482639e+01 4 6.609475139736298e+02 6.164555407124665e+02 2.079975405754878e+02 -1.164955177994876e+02 - ME 7.948516811691567e-05 + ME 2.998516055200512e-04 Event 84 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2724,7 +2724,7 @@ Event 84 Batch 1 2 1.391975815431583e+01 -3.682657486111166e-01 -1.138840508663312e+01 -7.995516055627093e+00 3 7.493632094786751e+02 -3.452281541586202e+01 3.833012084573049e+02 6.429880080772211e+02 4 7.367170323670085e+02 3.489108116447313e+01 -3.719128033706718e+02 -6.349924920215940e+02 - ME 8.671177508029917e-02 + ME 3.806217512266510e-01 Event 85 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2732,7 +2732,7 @@ Event 85 Batch 1 2 7.362448947738020e+02 6.409220704967113e+02 3.243429451315054e+02 1.614840505254833e+02 3 1.517836214454495e+02 -1.266859291808411e+02 -6.780846852200752e+01 4.889738933094901e+01 4 6.119714837807480e+02 -5.142361413158706e+02 -2.565344766094980e+02 -2.103814398564324e+02 - ME 1.062305495679385e-04 + ME 5.694785892689211e-04 Event 86 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2740,7 +2740,7 @@ Event 86 Batch 1 2 5.451728369778392e+02 -6.605005893803180e+01 1.066920544886257e+02 -5.305352178712969e+02 3 3.158718592284829e+02 -1.755596039144849e+02 2.550395858012225e+02 6.251932981237656e+01 4 6.389553037936773e+02 2.416096628525165e+02 -3.617316402898481e+02 4.680158880589203e+02 - ME 4.057626974930324e-05 + ME 1.469986179099727e-04 Event 87 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2748,7 +2748,7 @@ Event 87 Batch 1 2 3.414211232216659e+02 1.437256906952883e+02 1.534640422371205e+02 -2.689983214749668e+02 3 5.081668091119999e+02 4.794742948200324e+02 -1.464748766741243e+02 8.296394996143997e+01 4 6.504120676663341e+02 -6.231999855153207e+02 -6.989165562996117e+00 1.860343715135268e+02 - ME 3.656584417835253e-05 + ME 1.823135893899652e-04 Event 88 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2756,7 +2756,7 @@ Event 88 Batch 1 2 2.925516585730864e+02 1.655911293372511e+01 2.598275245766865e+02 -1.334238591297045e+02 3 7.159840369510271e+02 -1.056844973272874e+02 -3.694097043713192e+02 6.041526284885822e+02 4 4.914643044758866e+02 8.912538439356234e+01 1.095821797946327e+02 -4.707287693588777e+02 - ME 2.327745727475104e-03 + ME 8.728488941697977e-02 Event 89 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2764,7 +2764,7 @@ Event 89 Batch 1 2 6.333634651097186e+02 1.209853522660007e+02 5.372166546881791e+02 -3.129058794565919e+02 3 6.221307427802806e+02 5.757192259699385e+01 -4.327483989541182e+02 4.432391657372765e+02 4 2.445057921100010e+02 -1.785572748629945e+02 -1.044682557340609e+02 -1.303332862806847e+02 - ME 5.047204144927262e-05 + ME 5.497507832908574e-04 Event 90 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2772,7 +2772,7 @@ Event 90 Batch 1 2 3.111538587406461e+02 2.628215106651484e+02 -6.985334981761831e+01 -1.512021390726355e+02 3 5.216486323898988e+02 1.252715366480781e+02 4.457714554600226e+02 -2.402335265468457e+02 4 6.671975088694549e+02 -3.880930473132266e+02 -3.759181056424042e+02 3.914356656194811e+02 - ME 4.503542584588689e-05 + ME 2.329075524537458e-04 Event 91 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2780,7 +2780,7 @@ Event 91 Batch 1 2 3.007803348469016e+02 8.390513937949677e+01 2.884042062049404e+02 -1.586667134655829e+01 3 6.256884422056424e+02 2.364580673743878e+02 -3.590826126759745e+02 -4.545693416378727e+02 4 5.735312229474563e+02 -3.203632067538847e+02 7.067840647103421e+01 4.704360129844310e+02 - ME 2.635583378174906e-05 + ME 6.478111274774788e-05 Event 92 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2788,7 +2788,7 @@ Event 92 Batch 1 2 6.843865618656529e+02 -2.264962467301474e+02 -5.909185329480341e+02 2.605757158639088e+02 3 6.645516272550811e+02 3.453347116263074e+02 4.983670680340538e+02 -2.720350487207341e+02 4 1.510618108792659e+02 -1.188384648961601e+02 9.255146491398015e+01 1.145933285682523e+01 - ME 1.711437740567050e-05 + ME 9.365402433981294e-05 Event 93 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2796,7 +2796,7 @@ Event 93 Batch 1 2 5.579763469381434e+02 2.180908585044468e+02 5.135246110359701e+02 8.151996049100932e+00 3 3.333821836060117e+02 1.681122988324202e+02 -1.261705574188212e+02 2.587719570738210e+02 4 6.086414694558448e+02 -3.862031573368670e+02 -3.873540536171486e+02 -2.669239531229223e+02 - ME 1.157787815150910e-04 + ME 5.183695239236329e-04 Event 94 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2804,7 +2804,7 @@ Event 94 Batch 1 2 4.534979734151987e+02 1.139662723650677e+02 2.686183171543304e+01 4.381216071501101e+02 3 3.856184698299744e+02 1.545134372854228e+02 -3.452526490806396e+02 7.501873282757614e+01 4 6.608835567548277e+02 -2.684797096504910e+02 3.183908173652065e+02 -5.131403399776862e+02 - ME 1.545010233607317e-03 + ME 6.944325623628402e-03 Event 95 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2812,7 +2812,7 @@ Event 95 Batch 1 2 2.828073115974175e+02 -5.711637476392460e+01 5.915078172645698e+01 -2.705898746219725e+02 3 6.809618671276158e+02 3.772100991821226e+02 3.247893528880094e+02 4.646864338535512e+02 4 5.362308212749670e+02 -3.200937244181981e+02 -3.839401346144663e+02 -1.940965592315787e+02 - ME 6.408796328924562e-05 + ME 2.560512106670314e-04 Event 96 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2820,7 +2820,7 @@ Event 96 Batch 1 2 4.639832102051440e+02 -4.275497908582962e+02 -1.317248975374901e+02 -1.230046627491649e+02 3 7.474114851375481e+02 6.594176555428718e+02 2.654537688070380e+02 2.309254864669502e+02 4 2.886053046573076e+02 -2.318678646845757e+02 -1.337288712695479e+02 -1.079208237177853e+02 - ME 1.445191791082226e-05 + ME 2.440162169445852e-04 Event 97 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2828,7 +2828,7 @@ Event 97 Batch 1 2 5.095921959312568e+02 3.190102848863560e+02 3.100341192456060e+02 2.485869851668986e+02 3 4.555541331018014e+02 -2.788120391899956e+02 2.221549471930723e+02 -2.836205112936887e+02 4 5.348536709669415e+02 -4.019824569636059e+01 -5.321890664386783e+02 3.503352612679014e+01 - ME 2.250661525403011e-05 + ME 8.198891770965733e-05 Event 98 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2836,7 +2836,7 @@ Event 98 Batch 1 2 5.299941952467790e+02 -2.570048161992350e+02 -4.630296380940593e+02 -2.111695271961878e+01 3 7.352146396921255e+02 2.361229278157243e+02 6.962552486063584e+02 3.893348873424185e+00 4 2.347911650610957e+02 2.088188838351074e+01 -2.332256105122990e+02 1.722360384619465e+01 - ME 5.654417419793765e-06 + ME 6.760444392591968e-05 Event 99 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2844,7 +2844,7 @@ Event 99 Batch 1 2 4.290897291078425e+02 3.747236205606835e+02 2.040795775432686e+02 -4.529602465443949e+01 3 6.438744429739487e+02 -5.215755139094103e+02 2.133414139578182e+01 3.769325350988583e+02 4 4.270358279182090e+02 1.468518933487271e+02 -2.254137189390505e+02 -3.316365104444187e+02 - ME 8.457850707842401e-05 + ME 2.024851967866169e-03 Event 100 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2852,7 +2852,7 @@ Event 100 Batch 1 2 5.119062275524872e+02 -4.721600394809319e+02 -1.845880136125884e+02 7.099400083769524e+01 3 4.523854579707449e+02 2.836789572262426e+02 -3.060214184981774e+02 -1.747276258374610e+02 4 5.357083144767672e+02 1.884810822546894e+02 4.906094321107658e+02 1.037336249997658e+02 - ME 1.420495101373495e-05 + ME 6.898305006855298e-05 Event 101 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2860,7 +2860,7 @@ Event 101 Batch 1 2 6.024072815192737e+02 -3.080418730730875e+02 -4.692284526425155e+02 2.186993289696520e+02 3 3.347434020484399e+02 8.940653726951260e+01 -3.939923552329941e+01 -3.201676381969582e+02 4 5.628493164322859e+02 2.186353358035749e+02 5.086276881658150e+02 1.014683092273061e+02 - ME 2.743452031293993e-05 + ME 9.290725627447436e-05 Event 102 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2868,7 +2868,7 @@ Event 102 Batch 1 2 5.910857738801296e+02 3.707548039128416e+02 -7.516477307090547e+01 -4.541734518311494e+02 3 2.311218706704979e+02 4.536804143672514e+01 -2.262982016400413e+02 1.217307902336991e+01 4 6.777923554493723e+02 -4.161228453495667e+02 3.014629747109467e+02 4.420003728077793e+02 - ME 7.158169676479796e-05 + ME 2.633339755449651e-04 Event 103 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2876,7 +2876,7 @@ Event 103 Batch 1 2 6.627949406417042e+02 7.189602123685950e+01 -6.391860825813610e+02 -1.599038689489492e+02 3 5.519979886399102e+02 1.442810582977179e+02 4.734454174874869e+02 2.444057944057306e+02 4 2.852070707183856e+02 -2.161770795345774e+02 1.657406650938741e+02 -8.450192545678139e+01 - ME 1.658567428345252e-05 + ME 1.652798222861839e-04 Event 104 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2884,7 +2884,7 @@ Event 104 Batch 1 2 4.368180791462563e+02 -3.483499330357901e+02 -2.596280064690262e+02 4.533935023690698e+01 3 4.635715977792429e+02 1.873023362819025e+02 -2.251347602994603e+02 -3.593477435519053e+02 4 5.996103230745010e+02 1.610475967538876e+02 4.847627667684865e+02 3.140083933149983e+02 - ME 2.162124469235967e-05 + ME 9.158171748371188e-05 Event 105 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2892,7 +2892,7 @@ Event 105 Batch 1 2 5.701708357490469e+02 2.288495716262106e+02 -4.521314661478370e+02 -2.613422905391967e+02 3 3.711008490497917e+02 -3.362590561223710e+02 -8.126001400906793e+01 1.343223639771668e+02 4 5.587283152011612e+02 1.074094844961603e+02 5.333914801569049e+02 1.270199265620299e+02 - ME 1.720246557093887e-05 + ME 7.043372303967046e-05 Event 106 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2900,7 +2900,7 @@ Event 106 Batch 1 2 6.775588183099673e+02 5.149765831731705e+02 3.445381345095063e+02 -2.741870619150275e+02 3 7.044100837534635e+02 -4.546975847980706e+02 -4.392260662935809e+02 3.106833358270535e+02 4 1.180310979365712e+02 -6.027899837509908e+01 9.468793178407486e+01 -3.649627391202603e+01 - ME 2.786544600802367e-05 + ME 3.259673897057837e-04 Event 107 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2908,7 +2908,7 @@ Event 107 Batch 1 2 6.046880513041550e+02 2.289413119004024e+02 -5.349774474143721e+02 -1.644160754103499e+02 3 3.366746442316215e+02 -7.166101576320902e+01 2.452245434825371e+01 3.280444544890399e+02 4 5.586373044642238e+02 -1.572802961371935e+02 5.104549930661184e+02 -1.636283790786902e+02 - ME 4.667002706670146e-04 + ME 8.859556065170558e-04 Event 108 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2916,7 +2916,7 @@ Event 108 Batch 1 2 6.239206451413978e+02 -2.218030564243363e+02 5.011455197099735e+02 -2.982172759400455e+02 3 2.841199272340513e+02 1.209406641294798e+02 7.967327320293104e+01 2.444374323800143e+02 4 5.919594276245514e+02 1.008623922948564e+02 -5.808187929129044e+02 5.377984356003120e+01 - ME 7.961277501126149e-05 + ME 1.727643234936365e-04 Event 109 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2924,7 +2924,7 @@ Event 109 Batch 1 2 3.093404598873124e+02 1.546999830656544e+02 1.629193992247174e+02 2.126421988200774e+02 3 5.287372542258961e+02 -2.136116696975048e+02 -1.865832176193536e+02 4.462284633214169e+02 4 6.619222858867909e+02 5.891168663185049e+01 2.366381839463621e+01 -6.588706621414941e+02 - ME 2.902408960420708e-01 + ME 1.686695657867669e+01 Event 110 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2932,7 +2932,7 @@ Event 110 Batch 1 2 4.920948406187608e+02 -8.595212543403569e+01 -4.824913009925944e+02 -4.440392734262522e+01 3 4.634042325716594e+02 -2.085760624772916e+00 1.255608851371819e+02 4.460645653843308e+02 4 5.445009268095798e+02 8.803788605880843e+01 3.569304158554124e+02 -4.016606380417056e+02 - ME 1.043536440561108e-03 + ME 4.151412887207382e-03 Event 111 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2940,7 +2940,7 @@ Event 111 Batch 1 2 4.637454700443120e+02 1.543048221589588e+02 -4.372769385391800e+02 6.225902899506631e+00 3 3.246747011850293e+02 -5.128652792678845e+01 -2.274142471268230e+02 2.259781269206006e+02 4 7.115798287706589e+02 -1.030182942321705e+02 6.646911856660031e+02 -2.322040298201072e+02 - ME 5.219332617201280e-04 + ME 1.240833065187375e-03 Event 112 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2948,7 +2948,7 @@ Event 112 Batch 1 2 6.923761777814550e+02 3.939190124845535e+02 4.398224952082178e+01 -5.676954684419625e+02 3 5.277418353503033e+02 -4.270527740856185e+02 4.970714905179168e+01 3.060499505927539e+02 4 2.798819868682421e+02 3.313376160106501e+01 -9.368939857261346e+01 2.616455178492087e+02 - ME 4.381536575941429e-05 + ME 5.385735959435035e-05 Event 113 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2956,7 +2956,7 @@ Event 113 Batch 1 2 7.174898838850694e+02 -6.130145063482008e+02 3.726797356942233e+02 1.071275347265524e+01 3 1.705115822510491e+02 3.993583199494100e+01 -1.624320619120163e+02 3.309311510932528e+01 4 6.119985338638814e+02 5.730786743532599e+02 -2.102476737822071e+02 -4.380586858198049e+01 - ME 4.914674319256647e-05 + ME 2.197559713387976e-04 Event 114 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2964,7 +2964,7 @@ Event 114 Batch 1 2 6.772826088252357e+02 -1.430288042596954e+02 -3.410390118171982e+02 5.674036356844296e+02 3 6.725037798358682e+02 3.626161999767239e+01 2.510744134018114e+02 -6.228226615527174e+02 4 1.502136113388951e+02 1.067671842620232e+02 8.996459841538707e+01 5.541902586828807e+01 - ME 7.986648389935193e-05 + ME 8.926156406775035e-05 Event 115 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2972,7 +2972,7 @@ Event 115 Batch 1 2 9.320551230331124e+01 1.288474310894606e+01 -2.581623869377880e+01 8.862715576190526e+01 3 6.672654287607164e+02 1.525114284892182e+02 2.829200767588875e+02 5.847560574856374e+02 4 7.395290589359720e+02 -1.653961715981643e+02 -2.571038380651088e+02 -6.733832132475428e+02 - ME 4.304938165075599e-01 + ME 1.800237703627863e+00 Event 116 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2980,7 +2980,7 @@ Event 116 Batch 1 2 4.951202926530015e+02 -4.575339943514647e+02 4.220102313368785e+01 1.844608951947751e+02 3 3.101750696753587e+02 -4.711582585559527e+01 2.172188132736168e+02 2.163438466008694e+02 4 6.947046376716394e+02 5.046498202070600e+02 -2.594198364073050e+02 -4.008047417956444e+02 - ME 5.988625984136040e-04 + ME 1.933367100533606e-03 Event 117 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2988,7 +2988,7 @@ Event 117 Batch 1 2 6.543248494478489e+02 1.390926466871539e+02 9.107024539473488e+01 6.328510524967589e+02 3 5.040443237953712e+02 6.874740772121054e+01 1.336336536624387e+02 -4.811200690999848e+02 4 3.416308267567792e+02 -2.078400544083643e+02 -2.247038990571737e+02 -1.517309833967742e+02 - ME 3.026560085299302e-04 + ME 4.207453923038474e-04 Event 118 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2996,7 +2996,7 @@ Event 118 Batch 1 2 5.829230400014206e+02 5.307803371482089e+02 -3.192285892796672e+01 2.388565162167381e+02 3 3.965113090906140e+02 -5.470249758902820e+01 2.256187790844517e+02 -3.214420966810604e+02 4 5.205656509079653e+02 -4.760778395591807e+02 -1.936959201564850e+02 8.258558046432242e+01 - ME 2.168340782914014e-05 + ME 7.464562943747175e-05 Event 119 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3004,7 +3004,7 @@ Event 119 Batch 1 2 3.549567073991255e+02 2.281637891139605e+02 1.474502150787006e+02 2.284600261271838e+02 3 4.727085372220640e+02 7.463684946128350e+01 -3.092948822053327e+02 3.495988811576870e+02 4 6.723347553788102e+02 -3.028006385752440e+02 1.618446671266322e+02 -5.780589072848707e+02 - ME 1.664672733965846e-03 + ME 1.455012849105755e-02 Event 120 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3012,7 +3012,7 @@ Event 120 Batch 1 2 7.192117275853698e+02 4.094232477570927e+02 -5.552624156333899e+02 -2.032775518283800e+02 3 3.685061529232585e+02 -2.522084621786424e+02 1.741347663658646e+02 2.046087962197375e+02 4 4.122821194913712e+02 -1.572147855784500e+02 3.811276492675253e+02 -1.331244391357209e+00 - ME 1.900262756274459e-05 + ME 9.281995463485567e-05 Event 121 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3020,7 +3020,7 @@ Event 121 Batch 1 2 1.923953846467517e+02 -5.182078839520096e+01 -1.486351786617837e+02 -1.106262789198433e+02 3 6.582127150877787e+02 -3.509182841037630e+02 -1.191939510078701e+02 5.439606035624541e+02 4 6.493919002654695e+02 4.027390724989639e+02 2.678291296696539e+02 -4.333343246426108e+02 - ME 5.360055113881300e-04 + ME 1.925188892577692e-03 Event 122 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3028,7 +3028,7 @@ Event 122 Batch 1 2 6.905732817636248e+02 3.462508192534570e+02 -5.375670569609784e+02 -2.608131264380775e+02 3 7.097575386120018e+02 -2.677396278645660e+02 5.849221766424142e+02 2.998954860604125e+02 4 9.966917962437387e+01 -7.851119138889094e+01 -4.735511968143584e+01 -3.908235962233509e+01 - ME 3.451011759976180e-05 + ME 5.007312135859238e-04 Event 123 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3036,7 +3036,7 @@ Event 123 Batch 1 2 4.035126033432560e+02 2.481103298242076e+01 -3.878573016343356e+02 -1.085059780294573e+02 3 3.541388771651666e+02 1.572344474048876e+02 -3.105653677404273e+02 -6.512161875550808e+01 4 7.423485194915780e+02 -1.820454803873083e+02 6.984226693747627e+02 1.736275967849660e+02 - ME 3.471230489499830e-03 + ME 2.043564129780385e-02 Event 124 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3044,7 +3044,7 @@ Event 124 Batch 1 2 5.353042728143347e+02 -4.785252055946481e+02 -2.279396245170433e+02 7.488537693644093e+01 3 7.454081943698113e+02 6.785307544150930e+02 3.069354144183444e+02 -3.193811081429426e+01 4 2.192875328158541e+02 -2.000055488204448e+02 -7.899578990130104e+01 -4.294726612214667e+01 - ME 6.765427234678898e-06 + ME 1.399009675490331e-04 Event 125 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3052,7 +3052,7 @@ Event 125 Batch 1 2 7.351681880566981e+02 -1.932492970253984e+01 -4.393064933429818e+02 -5.891592456452273e+02 3 6.537497908129355e+02 -2.883189353576726e+01 3.454898907503182e+02 5.542510679217788e+02 4 1.110820211303664e+02 4.815682323830688e+01 9.381660259266363e+01 3.490817772344844e+01 - ME 6.639428548470109e-05 + ME 1.431077255619906e-04 Event 126 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3060,7 +3060,7 @@ Event 126 Batch 1 2 5.568747108147126e+02 1.149185667256990e+02 4.264979152236775e+02 -3.391204725116689e+02 3 6.934211462641822e+02 -1.939160042589616e+02 -6.294239612595663e+02 2.169215212257340e+02 4 2.497041429211053e+02 7.899743753326281e+01 2.029260460358889e+02 1.221989512859350e+02 - ME 9.143592130512915e-06 + ME 3.344185566612618e-05 Event 127 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3068,7 +3068,7 @@ Event 127 Batch 1 2 7.108931196972316e+02 4.270547743949553e+02 5.664613189451065e+02 -4.598718776252147e+01 3 4.445675167124290e+02 -1.247884466860518e+02 -4.129475031266345e+02 1.074359351009545e+02 4 3.445393635903407e+02 -3.022663277089035e+02 -1.535138158184720e+02 -6.144874733843321e+01 - ME 1.427738327825488e-05 + ME 1.180920695556687e-04 Event 128 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3076,7 +3076,7 @@ Event 128 Batch 1 2 5.312407894292422e+02 -7.192118124205533e+01 -4.398126160332176e+02 -2.891521793453568e+02 3 5.717192413787027e+02 3.434745903572437e+02 1.811915566412192e+02 4.195923218357252e+02 4 3.970399691920551e+02 -2.715534091151883e+02 2.586210593919984e+02 -1.304401424903685e+02 - ME 3.532660248239223e-05 + ME 1.848006274423395e-04 Event 129 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3084,7 +3084,7 @@ Event 129 Batch 1 2 6.644129951428383e+02 -3.595672586482287e+02 4.645590915434784e+02 3.103882489514914e+02 3 1.967652372382455e+02 -5.204943416929049e+01 8.794498000645085e+00 -1.895522930301724e+02 4 6.388217676189169e+02 4.116166928175192e+02 -4.733535895441232e+02 -1.208359559213191e+02 - ME 9.192558188476414e-05 + ME 3.082956717278722e-04 Event 130 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3092,7 +3092,7 @@ Event 130 Batch 1 2 7.302263990443511e+02 -1.919590472356484e+02 3.836584700935805e+02 -5.909217345563752e+02 3 4.156541164903923e+02 2.203243106780774e+02 -1.767969453775071e+02 3.049071707664833e+02 4 3.541194844652567e+02 -2.836526344242890e+01 -2.068615247160734e+02 2.860145637898919e+02 - ME 2.258971422042701e-05 + ME 3.110012368642411e-05 Event 131 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3100,7 +3100,7 @@ Event 131 Batch 1 2 2.308323688168238e+02 -1.780469473698228e+02 1.469011263880862e+02 1.710582294195638e+00 3 7.308075033948297e+02 5.219262643529272e+02 -3.840435213624620e+02 3.379099810545737e+02 4 5.383601277883465e+02 -3.438793169831044e+02 2.371423949743758e+02 -3.396205633487694e+02 - ME 7.770640764079256e-05 + ME 1.061667055612532e-03 Event 132 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3108,7 +3108,7 @@ Event 132 Batch 1 2 5.909630762789660e+02 -4.293852116769707e+02 -3.988922148105424e+02 7.583335995300355e+01 3 5.415993952096327e+02 2.260703809971038e+02 3.221145619770360e+02 -3.721079100067703e+02 4 3.674375285114020e+02 2.033148306798666e+02 7.677765283350686e+01 2.962745500537670e+02 - ME 1.628447412544396e-05 + ME 3.321676569401813e-05 Event 133 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3116,7 +3116,7 @@ Event 133 Batch 1 2 4.506052863582997e+02 2.189991325227701e+02 -3.914006430783634e+02 -4.347459771134355e+01 3 4.043998006859111e+02 3.160348074769272e+02 8.738893432792010e+01 2.366946839598570e+02 4 6.449949129557901e+02 -5.350339399996973e+02 3.040117087504433e+02 -1.932200862485142e+02 - ME 8.705579101282482e-05 + ME 3.121497332919934e-04 Event 134 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3124,7 +3124,7 @@ Event 134 Batch 1 2 7.151470882937614e+02 -1.041377497037516e+01 -4.186394096729767e+01 7.138447461686595e+02 3 3.416424731356660e+02 1.638631808685801e+02 3.081581136487586e+01 -2.981925940995343e+02 4 4.432104385705719e+02 -1.534494058982047e+02 1.104812960242199e+01 -4.156521520691248e+02 - ME 6.342792451335309e-03 + ME 5.534325530265236e-02 Event 135 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3132,7 +3132,7 @@ Event 135 Batch 1 2 7.115730144432832e+02 -3.219296530898238e+02 2.184242454110169e+02 -5.958089478700319e+02 3 1.627059459894212e+02 -6.880794311551747e+01 -3.259803939022061e+01 1.437917231708342e+02 4 6.257210395672955e+02 3.907375962053413e+02 -1.858262060207963e+02 4.520172246991979e+02 - ME 1.277979532321233e-04 + ME 2.112989182930814e-04 Event 136 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3140,7 +3140,7 @@ Event 136 Batch 1 2 7.195404287114588e+02 -4.369992732083461e+02 -4.270318019286997e+02 3.800182941743402e+02 3 6.668605996318223e+02 3.634158794560479e+02 4.690430049045651e+02 -3.043527845290675e+02 4 1.135989716567186e+02 7.358339375229815e+01 -4.201120297586535e+01 -7.566550964527264e+01 - ME 7.515399240093053e-05 + ME 1.804344388349211e-03 Event 137 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3148,7 +3148,7 @@ Event 137 Batch 1 2 6.722782806744999e+02 -6.045581260407005e+02 -2.538460778300668e+02 1.484241478840623e+02 3 6.869263774705689e+02 6.661257235671316e+02 1.481819739565761e+02 -7.865412297735662e+01 4 1.407953418549304e+02 -6.156759752643097e+01 1.056641038734908e+02 -6.977002490670534e+01 - ME 2.119149330726453e-05 + ME 5.192812231664224e-04 Event 138 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3156,7 +3156,7 @@ Event 138 Batch 1 2 6.463287544295633e+02 8.684709774942756e+01 2.409249839962013e+02 -5.934253049048401e+02 3 3.917330799270068e+02 1.767690441671677e+02 4.696120064017492e+01 3.464132742372293e+02 4 4.619381656434300e+02 -2.636161419165952e+02 -2.878861846363762e+02 2.470120306676108e+02 - ME 4.203806696206548e-05 + ME 5.804753959762886e-05 Event 139 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3164,7 +3164,7 @@ Event 139 Batch 1 2 2.994802063237944e+02 -1.272876183039153e+02 6.552211336810879e+00 2.710042891410713e+02 3 7.257546970836092e+02 -8.848613612326799e+00 5.127896146768584e+00 -7.256826352181574e+02 4 4.747650965925943e+02 1.361362319162416e+02 -1.168010748357900e+01 4.546783460770868e+02 - ME 1.500396153249019e-04 + ME 1.724196014694060e-04 Event 140 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3172,7 +3172,7 @@ Event 140 Batch 1 2 7.326756101999780e+02 5.655005379385240e+02 4.343799907428446e+02 1.683351270988810e+02 3 7.428339005597779e+02 -5.680473426214219e+02 -4.534832054058505e+02 -1.532233754243464e+02 4 2.449048924024402e+01 2.546804682897962e+00 1.910321466300584e+01 -1.511175167453447e+01 - ME 1.024603362434272e-04 + ME 4.669436438173466e-03 Event 141 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3180,7 +3180,7 @@ Event 141 Batch 1 2 7.363238871411332e+02 -6.772722174663238e+02 -2.824373475598683e+02 -6.086341204880675e+01 3 5.504260535970963e+02 4.650298533191528e+02 2.914345410616540e+02 4.221355560271704e+01 4 2.132500592617708e+02 2.122423641471711e+02 -8.997193501785816e+00 1.864985644608987e+01 - ME 1.166401869382226e-05 + ME 7.300791864660033e-05 Event 142 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3188,7 +3188,7 @@ Event 142 Batch 1 2 5.862280565156834e+02 4.248793793115829e+01 -2.479279504752411e+02 -5.295184989682986e+02 3 4.287264749982929e+02 -3.025296967755320e+02 2.785471849307642e+02 1.212173201341831e+02 4 4.850454684860405e+02 2.600417588443628e+02 -3.061923445551928e+01 4.083011788341197e+02 - ME 1.949810022878841e-05 + ME 4.569028399965169e-05 Event 143 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3196,7 +3196,7 @@ Event 143 Batch 1 2 2.464531733710510e+02 4.046044690030688e+01 -2.103865804466287e+02 1.218179201483223e+02 3 5.378449948854583e+02 4.607829603950880e+02 -2.747641700963839e+02 3.822241180409925e+01 4 7.157018317434903e+02 -5.012434072953949e+02 4.851507505430126e+02 -1.600403319524219e+02 - ME 4.863434295951330e-04 + ME 1.284493741497843e-03 Event 144 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3204,7 +3204,7 @@ Event 144 Batch 1 2 5.367418008803521e+02 -1.343004856786532e+02 -4.048537736989352e+02 -3.258044847458254e+02 3 6.294877130859599e+02 3.313530054622211e+02 5.282137272543231e+02 8.631468610520756e+01 4 3.337704860336884e+02 -1.970525197835678e+02 -1.233599535553879e+02 2.394897986406179e+02 - ME 8.754930746282009e-06 + ME 2.612855607885159e-05 Event 145 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3212,7 +3212,7 @@ Event 145 Batch 1 2 6.805380148481771e+01 -3.411514819754512e+01 -4.339750646760406e+01 -3.980116822894492e+01 3 6.831461500979880e+02 -3.834019790669201e+02 -2.756424954453614e+02 -4.936727656514237e+02 4 7.488000484171945e+02 4.175171272644653e+02 3.190400019129655e+02 5.334739338803686e+02 - ME 4.117012994651258e-01 + ME 4.832444287218038e-01 Event 146 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3220,7 +3220,7 @@ Event 146 Batch 1 2 5.031746658797123e+02 4.202301876294930e+02 2.767377273314875e+02 2.750283520766640e+00 3 4.317115817339341e+02 -1.098088257924671e+02 -5.455162180567243e+01 4.139336083717602e+02 4 5.651137523863538e+02 -3.104213618370259e+02 -2.221861055258150e+02 -4.166838918925268e+02 - ME 1.122040831263755e-03 + ME 4.446377084117306e-03 Event 147 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3228,7 +3228,7 @@ Event 147 Batch 1 2 4.251223043705630e+02 -4.223502783198938e+02 -4.694338569631599e+01 1.206377286808446e+01 3 5.457819748703678e+02 2.791608945230574e+02 -4.384138579515959e+02 -1.665546403390879e+02 4 5.290957207590696e+02 1.431893837968364e+02 4.853572436479118e+02 1.544908674710035e+02 - ME 1.117959404473985e-05 + ME 5.820013407126093e-05 Event 148 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3236,7 +3236,7 @@ Event 148 Batch 1 2 6.905785821272525e+02 6.249608768654489e+02 -6.243387159972350e+01 -2.870970082698929e+02 3 1.361638260920089e+02 2.862044352088506e+01 1.704210379179796e+01 1.320266050727362e+02 4 6.732575917807402e+02 -6.535813203863343e+02 4.539176780792534e+01 1.550704031971573e+02 - ME 5.047601105033982e-04 + ME 9.573948308169230e-04 Event 149 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3244,7 +3244,7 @@ Event 149 Batch 1 2 6.694705528096943e+02 -5.216497821741067e+02 -3.785079074709545e+02 1.811189935345937e+02 3 2.821401257551277e+02 1.148500354702071e-01 2.786662494166578e+02 -4.413795199872407e+01 4 5.483893214351779e+02 5.215349321386365e+02 9.984165805429673e+01 -1.369810415358697e+02 - ME 3.486097449584098e-05 + ME 1.943324414096923e-04 Event 150 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3252,7 +3252,7 @@ Event 150 Batch 1 2 4.637486188995366e+02 -4.033412855298819e+02 -2.279949807412008e+02 -1.992178895453991e+01 3 3.756800751656199e+02 6.230662615514293e+01 -2.632310737913946e+02 -2.606967683041707e+02 4 6.605713059348438e+02 3.410346593747391e+02 4.912260545325952e+02 2.806185572587107e+02 - ME 4.211370643652993e-05 + ME 2.156945366470290e-04 Event 151 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3260,7 +3260,7 @@ Event 151 Batch 1 2 3.821954355913596e+02 -2.528320044280690e+02 2.861764538722267e+02 1.588602445142563e+01 3 6.796189325418250e+02 2.911670128135291e+02 -4.900375979142738e+02 3.700902818893582e+02 4 4.381856318668152e+02 -3.833500838546018e+01 2.038611440420471e+02 -3.859763063407838e+02 - ME 1.923941526207248e-04 + ME 8.197229841786387e-03 Event 152 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3268,7 +3268,7 @@ Event 152 Batch 1 2 6.751133298339792e+02 -2.999578895043981e+02 -2.855974213275218e+02 -5.331391803034741e+02 3 4.976977783498468e+02 -3.003988119418482e+00 1.843802943840355e+02 4.622747685874795e+02 4 3.271888918161745e+02 3.029618776238166e+02 1.012171269434863e+02 7.086441171599445e+01 - ME 6.977738125195056e-05 + ME 1.204579535049519e-04 Event 153 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3276,7 +3276,7 @@ Event 153 Batch 1 2 1.729293620257127e+02 1.558357805102956e+02 -7.193392860849491e+01 2.110174585940510e+01 3 6.524550819255464e+02 2.410158908712478e+02 5.786677971610501e+02 1.809766692333240e+02 4 6.746155560487412e+02 -3.968516713815435e+02 -5.067338685525552e+02 -2.020784150927291e+02 - ME 1.391654510317005e-04 + ME 5.985591428637023e-04 Event 154 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3284,7 +3284,7 @@ Event 154 Batch 1 2 6.585658455851002e+02 -2.410305357139302e+02 -2.116446673272157e+02 -5.751693564652295e+02 3 5.764400833248005e+02 3.388133979948972e+02 3.092747322371399e+02 3.490527051926400e+02 4 2.649940710900988e+02 -9.778286228096688e+01 -9.763006490992416e+01 2.261166512725894e+02 - ME 2.686434432328395e-05 + ME 3.655181799213059e-05 Event 155 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3292,7 +3292,7 @@ Event 155 Batch 1 2 5.686586231936359e+02 -1.693366246265498e+02 -1.542203680657918e+02 5.204938187588979e+02 3 1.882190564276536e+02 -1.089234770645493e+02 -9.145416397064866e+01 1.232810822434430e+02 4 7.431223203787102e+02 2.782601016910992e+02 2.456745320364404e+02 -6.437749010023409e+02 - ME 4.701119881405690e-01 + ME 6.696396361607482e-01 Event 156 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3300,7 +3300,7 @@ Event 156 Batch 1 2 6.143652095725128e+02 2.879464601546110e+02 5.379391909976823e+02 -7.178351904348040e+01 3 6.287751645293085e+02 -4.584164185734781e+02 -4.225140875260598e+02 -8.181956094447702e+01 4 2.568596258981782e+02 1.704699584188668e+02 -1.154251034716223e+02 1.536030799879581e+02 - ME 7.769660148731367e-06 + ME 2.899571701789112e-05 Event 157 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3308,7 +3308,7 @@ Event 157 Batch 1 2 5.050842109798973e+02 4.185498850973046e+02 -1.305174306570672e+02 -2.507812875014723e+02 3 5.170424494038050e+02 -3.084595065654854e+02 3.930456446728388e+02 -1.330441599566699e+02 4 4.778733396162975e+02 -1.100903785318191e+02 -2.625282140157716e+02 3.838254474581424e+02 - ME 1.243977993100618e-05 + ME 4.033251359625283e-05 Event 158 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3316,7 +3316,7 @@ Event 158 Batch 1 2 4.312542366204098e+02 -3.114503370626313e+02 2.737030704635235e+02 1.185982013584742e+02 3 6.944315393047829e+02 2.166643175309468e+02 -6.173965008138002e+02 -2.326226495269423e+02 4 3.743142240748070e+02 9.478601953168439e+01 3.436934303502764e+02 1.140244481684682e+02 - ME 5.864250821924803e-06 + ME 3.680357310121394e-05 Event 159 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3324,7 +3324,7 @@ Event 159 Batch 1 2 5.860112473308646e+02 -1.581297551692178e+02 4.935632758462007e+02 2.734948907463652e+02 3 3.772013313646349e+02 -2.371132827856262e+02 -1.305099443644436e+02 -2.627266448837395e+02 4 5.367874213045002e+02 3.952430379548442e+02 -3.630533314817573e+02 -1.076824586262577e+01 - ME 2.805189658646002e-05 + ME 1.030382455754272e-04 Event 160 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3332,7 +3332,7 @@ Event 160 Batch 1 2 5.883409724804535e+02 -3.739819298758817e+02 -2.887651121595530e+02 3.505671490956299e+02 3 4.300332553173178e+02 1.788055146224819e+02 3.829208006453583e+02 7.955406370837679e+01 4 4.816257722022287e+02 1.951764152533999e+02 -9.415568848580530e+01 -4.301212128040066e+02 - ME 2.307516153071828e-04 + ME 9.797271586219467e-03 Event 161 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3340,7 +3340,7 @@ Event 161 Batch 1 2 6.868305165969147e+02 4.119610488151656e+00 5.515184990814985e+02 4.093244831537709e+02 3 3.260821955312833e+02 -1.956999890649130e+02 -2.483451099187458e+02 -7.972338993006402e+01 4 4.870872878718022e+02 1.915803785767614e+02 -3.031733891627526e+02 -3.296010932237070e+02 - ME 9.860610555787331e-05 + ME 1.075603053132144e-03 Event 162 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3348,7 +3348,7 @@ Event 162 Batch 1 2 2.159818802305119e+02 -2.018126805027919e+02 4.096951387107715e+01 -6.512536763314942e+01 3 6.870078865581224e+02 4.896730732821633e+02 -2.356527215298929e+02 -4.203188222421333e+02 4 5.970102332113654e+02 -2.878603927793715e+02 1.946832076588156e+02 4.854441898752826e+02 - ME 2.809071549115161e-05 + ME 5.344822454174306e-05 Event 163 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3356,7 +3356,7 @@ Event 163 Batch 1 2 4.889699854403287e+02 -4.067839821807834e+01 -2.740835242435768e+02 4.028835269878222e+02 3 4.282392920294498e+02 4.007468150560176e+02 -8.832740907173851e+01 -1.224301852772270e+02 4 5.827907225302220e+02 -3.600684168379390e+02 3.624109333153153e+02 -2.804533417105952e+02 - ME 1.173701793303044e-04 + ME 4.336231422638298e-04 Event 164 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3364,7 +3364,7 @@ Event 164 Batch 1 2 6.224346677404150e+02 -1.282049393554146e+02 5.480608628970117e+02 -2.657399098565701e+02 3 7.444531740822750e+02 1.794330131141779e+02 -6.708967511266460e+02 2.681638893170603e+02 4 1.331121581773107e+02 -5.122807375876333e+01 1.228358882296343e+02 -2.423979460490191e+00 - ME 1.571413941583783e-05 + ME 1.368953177788070e-04 Event 165 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3372,7 +3372,7 @@ Event 165 Batch 1 2 6.980339706506675e+02 -5.154669325341684e+01 -4.947847840614098e+02 4.896757907618869e+02 3 1.362964882116331e+02 4.252532371924361e+01 -5.641238783031591e+01 -1.165588780002596e+02 4 6.656695411377010e+02 9.021369534174053e+00 5.511971718917263e+02 -3.731169127616273e+02 - ME 4.238311927693088e-04 + ME 1.450267418906797e-03 Event 166 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3380,7 +3380,7 @@ Event 166 Batch 1 2 3.060640747281171e+02 -1.981167412190918e+02 -9.095380261170779e+01 -2.148310510107333e+02 3 5.580104478575086e+02 -3.585720992432471e+02 -1.558095186186280e+02 3.981521109704927e+02 4 6.359254774143739e+02 5.566888404623389e+02 2.467633212303362e+02 -1.833210599597597e+02 - ME 1.099447007687216e-04 + ME 3.000804338470548e-04 Event 167 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3388,7 +3388,7 @@ Event 167 Batch 1 2 2.833153623322893e+02 2.526850217013923e+02 8.687924899084067e+01 9.417998957332070e+01 3 6.595685044563415e+02 -8.780626893611850e+01 -2.875856231737449e+02 -5.870393347553995e+02 4 5.571161332113688e+02 -1.648787527652738e+02 2.007063741829043e+02 4.928593451820789e+02 - ME 4.244421486768831e-05 + ME 7.367447958524992e-05 Event 168 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3396,7 +3396,7 @@ Event 168 Batch 1 2 6.026267479353969e+02 -5.987968578530475e+02 5.775180228477150e+00 6.758674164241529e+01 3 4.991211680715713e+02 3.812575567959843e+02 3.220701575873951e+02 -5.952259631185711e+00 4 3.982520839930309e+02 2.175393010570631e+02 -3.278453378158730e+02 -6.163448201122968e+01 - ME 1.203107058680061e-05 + ME 9.606399998327532e-05 Event 169 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3404,7 +3404,7 @@ Event 169 Batch 1 2 5.510662376679772e+02 -9.251111075413947e+01 -5.291920243323356e+02 -1.227660134875281e+02 3 5.034535790022877e+02 -2.816014265681677e+02 3.283802195198170e+02 2.575511098657944e+02 4 4.454801833297348e+02 3.741125373223072e+02 2.008118048125185e+02 -1.347850963782663e+02 - ME 2.085195230877358e-05 + ME 1.532484123791625e-04 Event 170 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3412,7 +3412,7 @@ Event 170 Batch 1 2 2.814808559369750e+02 3.658097943502287e+01 -1.412301634042880e+02 -2.407225480659935e+02 3 6.646522150540470e+02 2.753499086551696e+02 -1.631412967142655e+02 5.825203104495404e+02 4 5.538669290089779e+02 -3.119308880901926e+02 3.043714601185535e+02 -3.417977623835468e+02 - ME 2.587160315460459e-04 + ME 7.823510217753851e-04 Event 171 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3420,7 +3420,7 @@ Event 171 Batch 1 2 1.777965289077954e+02 -6.143496808852239e+01 -1.603735842336773e+00 1.668375809551635e+02 3 7.439290290569696e+02 2.163074211412066e+01 -1.907051550939623e+01 -7.433699124308462e+02 4 5.782744420352348e+02 3.980422597440174e+01 2.067425135173305e+01 5.765323314756826e+02 - ME 1.981167274383509e-03 + ME 2.063755640794395e-03 Event 172 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3428,7 +3428,7 @@ Event 172 Batch 1 2 1.369499454750680e+02 -1.250080331667568e+01 -3.518152151649629e+01 -1.317622025690455e+02 3 6.692885586315896e+02 -2.346283187163472e+02 -6.130705295376303e+02 1.305421486874673e+02 4 6.937614958933425e+02 2.471291220330227e+02 6.482520510541266e+02 1.220053881578238e+00 - ME 1.548169060571347e-04 + ME 5.039586079692636e-04 Event 173 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3436,7 +3436,7 @@ Event 173 Batch 1 2 7.088772083623137e+02 4.973951266878932e+01 3.171232495758680e+01 -7.064185769505260e+02 3 5.785136264307895e+02 8.584813303397833e+01 5.766505028397120e+01 5.691949191590089e+02 4 2.126091652068944e+02 -1.355876457027672e+02 -8.937737524155732e+01 1.372236577915166e+02 - ME 1.732961413682620e-04 + ME 1.743760900867476e-04 Event 174 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3444,7 +3444,7 @@ Event 174 Batch 1 2 4.367208701713482e+02 -3.923163287174704e+01 4.325755195957351e+02 -4.543585887727652e+01 3 3.528978856725088e+02 9.622572295106905e+01 1.987077746703234e+02 -2.753048278549415e+02 4 7.103812441561454e+02 -5.699409007932221e+01 -6.312832942660567e+02 3.207406867322186e+02 - ME 1.541208918572365e-04 + ME 9.353677491192390e-04 Event 175 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3452,7 +3452,7 @@ Event 175 Batch 1 2 6.418562164876806e+02 1.962785648722137e+02 -6.110736372974047e+02 -6.567908015856712e+00 3 4.843421844702149e+02 -1.886631806266161e+02 3.569879071908527e+02 -2.674942804112337e+02 4 3.738015990421035e+02 -7.615384245597569e+00 2.540857301065516e+02 2.740621884270906e+02 - ME 1.279055979705581e-05 + ME 3.029111560812189e-05 Event 176 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3460,7 +3460,7 @@ Event 176 Batch 1 2 6.288652703123263e+02 4.005522031116294e+02 3.691482793515075e+02 3.142594606996526e+02 3 7.209127580467475e+02 -4.124575135572966e+02 -5.165298058232565e+02 -2.877341896975221e+02 4 1.502219716409257e+02 1.190531044566666e+01 1.473815264717492e+02 -2.652527100213051e+01 - ME 1.300720357566141e-05 + ME 1.719274466020296e-04 Event 177 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3468,7 +3468,7 @@ Event 177 Batch 1 2 4.716578040000077e+02 -4.521622645932388e+02 -1.012739918234145e+01 1.338200520767543e+02 3 3.021382980750606e+02 -2.714821202364266e+02 6.773215888881064e+01 -1.140059832109250e+02 4 7.262038979249317e+02 7.236443848296653e+02 -5.760475970646905e+01 -1.981406886582933e+01 - ME 6.442260552556652e-04 + ME 2.354271252348000e-03 Event 178 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3476,7 +3476,7 @@ Event 178 Batch 1 2 7.350088877399502e+02 -3.684484945749095e+02 -2.561732769425163e+02 -5.821159885132296e+02 3 1.415495174310248e+02 7.181268644032879e+01 1.095010133995263e+02 5.374692563910759e+01 4 6.234415948290248e+02 2.966358081345808e+02 1.466722635429900e+02 5.283690628741219e+02 - ME 6.828487731379645e-05 + ME 1.035408980291912e-04 Event 179 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3484,7 +3484,7 @@ Event 179 Batch 1 2 7.426064621425413e+02 6.748632301344054e+01 7.201624948975951e+02 -1.681544967131679e+02 3 5.821031882499326e+02 8.394276920418550e-01 -5.588194474899291e+02 1.629854049874919e+02 4 1.752903496075256e+02 -6.832575070548241e+01 -1.613430474076661e+02 5.169091725675888e+00 - ME 1.412410550503903e-05 + ME 9.197132478706931e-05 Event 180 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3492,7 +3492,7 @@ Event 180 Batch 1 2 6.099515195485484e+02 2.272495331206023e+02 1.762692760011278e+02 -5.378918555193875e+02 3 5.718889655176699e+02 4.324570510796980e+01 -3.278409766521432e+02 4.665909256493895e+02 4 3.181595149337819e+02 -2.704952382285720e+02 1.515717006510154e+02 7.130092986999803e+01 - ME 3.043963963928669e-05 + ME 5.401477812349802e-05 Event 181 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3500,7 +3500,7 @@ Event 181 Batch 1 2 1.206370886915177e+02 -8.151225636567759e+01 1.767749325039422e+01 8.715827822142556e+01 3 6.451493408002739e+02 -6.748216257939080e+01 4.373428479320614e+02 4.694625256943417e+02 4 7.342135705082084e+02 1.489944189450684e+02 -4.550203411824557e+02 -5.566208039157672e+02 - ME 2.625479922313071e-02 + ME 7.131653341377736e-02 Event 182 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3508,7 +3508,7 @@ Event 182 Batch 1 2 4.626866082364760e+02 -3.084610429505738e+02 3.306629079434072e+02 9.794245113140897e+01 3 4.974966719253473e+02 3.582955998671217e+02 1.664640547097976e+02 -3.023523113558579e+02 4 5.398167198381765e+02 -4.983455691654795e+01 -4.971269626532048e+02 2.044098602244489e+02 - ME 1.414799589613471e-05 + ME 5.959042767905828e-05 Event 183 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3516,7 +3516,7 @@ Event 183 Batch 1 2 3.304723045950491e+02 3.244647182058462e+00 3.209425641774955e+02 7.872284845075714e+01 3 4.379804819457451e+02 2.312428523500660e+02 3.131807483468383e+02 2.006775141049615e+02 4 7.315472134592065e+02 -2.344874995321247e+02 -6.341233125243344e+02 -2.794003625557186e+02 - ME 2.330806393221907e-03 + ME 4.899988668912175e-03 Event 184 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3524,7 +3524,7 @@ Event 184 Batch 1 2 7.470051035005908e+02 -4.953964753944513e+02 -4.028924750569613e+02 3.876552725878485e+02 3 2.183325716323390e+02 1.119040172022777e+02 1.451703047217021e+02 -1.186262424448778e+02 4 5.346623248670695e+02 3.834924581921736e+02 2.577221703352594e+02 -2.690290301429710e+02 - ME 7.987999480474686e-05 + ME 5.441344453720516e-04 Event 185 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3532,7 +3532,7 @@ Event 185 Batch 1 2 4.448583927494090e+02 2.810173563272025e+02 -3.384637477435971e+02 6.610995769032235e+01 3 6.236443795626774e+02 -1.690803760724666e+02 5.125139620028374e+02 3.125277225134823e+02 4 4.314972276879136e+02 -1.119369802547359e+02 -1.740502142592404e+02 -3.786376802038046e+02 - ME 1.405605442011058e-04 + ME 6.949230823829164e-03 Event 186 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3540,7 +3540,7 @@ Event 186 Batch 1 2 6.802792190696962e+02 -1.681815241656754e+02 5.427923640013703e+02 3.739936368565512e+02 3 6.331554869749547e+02 3.172201723440435e+02 -4.588808692389625e+02 -2.994755095011972e+02 4 1.865652939553488e+02 -1.490386481783679e+02 -8.391149476240778e+01 -7.451812735535422e+01 - ME 3.045129627255903e-05 + ME 3.276943053321406e-04 Event 187 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3548,7 +3548,7 @@ Event 187 Batch 1 2 7.472897115267965e+02 -6.988402471604775e+02 -2.391684329048669e+02 1.134137672609268e+02 3 6.826908170748527e+02 6.328852277257668e+02 2.212839847556716e+02 -1.286718241709738e+02 4 7.001947139835140e+01 6.595501943471052e+01 1.788444814919547e+01 1.525805691004725e+01 - ME 3.485925693242860e-05 + ME 1.461490870437387e-04 Event 188 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3556,7 +3556,7 @@ Event 188 Batch 1 2 6.496068877140275e+02 -5.024316730938291e+02 -3.980061777252906e+02 -1.055585379310702e+02 3 4.885976180718368e+02 4.424928723138696e+02 1.459942636040002e+02 -1.470148473169288e+02 4 3.617954942141354e+02 5.993880077995960e+01 2.520119141212904e+02 2.525733852479991e+02 - ME 1.006519408431335e-05 + ME 2.843805826594158e-05 Event 189 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3564,7 +3564,7 @@ Event 189 Batch 1 2 4.082379946778654e+02 2.679237131173331e+02 -7.718184435750955e+01 2.981913934867987e+02 3 5.864211573889181e+02 -5.780822197382728e+02 -6.394893886953379e+01 7.497502433004084e+01 4 5.053408479332167e+02 3.101585066209396e+02 1.411307832270433e+02 -3.731664178168398e+02 - ME 1.322787627040098e-04 + ME 1.937644878671120e-03 Event 190 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3572,7 +3572,7 @@ Event 190 Batch 1 2 6.472516823166364e+02 6.463779961822676e+02 -3.289365889632791e+01 6.945035458816692e+00 3 4.318767277050750e+02 -3.286790725415815e+02 -7.183748821760624e+00 -2.800642229191639e+02 4 4.208715899782885e+02 -3.176989236406859e+02 4.007740771808847e+01 2.731191874603472e+02 - ME 1.272332211942340e-05 + ME 3.409584379294133e-05 Event 191 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3580,7 +3580,7 @@ Event 191 Batch 1 2 6.757500036387052e+02 6.222744522021635e+02 -2.261571472854044e+02 1.351499844096745e+02 3 3.644673602666567e+02 -2.020102809038697e+02 1.114149692296405e+02 -2.821613151026251e+02 4 4.597826360946380e+02 -4.202641712982938e+02 1.147421780557637e+02 1.470113306929507e+02 - ME 1.560703181590231e-05 + ME 5.389305783035389e-05 Event 192 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3588,7 +3588,7 @@ Event 192 Batch 1 2 7.394562478491531e+02 -7.307873850878615e+02 3.988568028534699e+01 1.056147375500683e+02 3 8.098058518630978e+01 5.419286926826393e+01 4.244928426361276e+00 -6.002473390399248e+01 4 6.795631669645365e+02 6.765945158195976e+02 -4.413060871170821e+01 -4.559000364607596e+01 - ME 1.231033846344155e-04 + ME 4.204295748489254e-04 Event 193 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3596,7 +3596,7 @@ Event 193 Batch 1 2 5.607395612273153e+02 -3.164229781907934e+02 -3.517992386171808e+02 -3.009030576558548e+02 3 3.741643617741927e+02 -2.156271676189966e+02 1.666697084176705e+02 2.563690747778811e+02 4 5.650960769984922e+02 5.320501458097899e+02 1.851295301995104e+02 4.453398287797368e+01 - ME 3.026844143728605e-05 + ME 9.141090879934244e-05 Event 194 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3604,7 +3604,7 @@ Event 194 Batch 1 2 5.729373416862012e+02 -2.155045544874616e+02 -1.679805246197324e+02 5.035846779262559e+02 3 2.831035485618876e+02 -2.543279085173982e+02 1.042261812492671e+02 -6.783684323208054e+01 4 6.439591097519118e+02 4.698324630048598e+02 6.375434337046515e+01 -4.357478346941756e+02 - ME 5.497724763810379e-04 + ME 1.781231321893996e-03 Event 195 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3612,7 +3612,7 @@ Event 195 Batch 1 2 5.572874060171201e+02 -5.433144409127298e+02 3.646295232533866e+01 1.185290019729285e+02 3 6.765845568040619e+02 5.574999049241243e+02 -1.212989803269169e+01 -3.831623469093195e+02 4 2.661280371788181e+02 -1.418546401139455e+01 -2.433305429264712e+01 2.646333449363910e+02 - ME 3.378534889977447e-04 + ME 3.395618115588225e-04 Event 196 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3620,7 +3620,7 @@ Event 196 Batch 1 2 5.405888343305829e+02 3.940239871950471e+02 -8.826690628749978e+01 -3.594305754554688e+02 3 6.983754392688073e+02 -3.888370902622853e+02 -5.513072771506098e+01 5.774898910559966e+02 4 2.610357264006097e+02 -5.186896932761887e+00 1.433976340025607e+02 -2.180593156005277e+02 - ME 2.676929502290073e-04 + ME 5.539073969003598e-03 Event 197 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3628,7 +3628,7 @@ Event 197 Batch 1 2 2.783346334111661e+02 2.282410890438732e+02 -1.474467226896361e+02 6.029624695020830e+01 3 6.434654504578666e+02 1.172104173128919e+01 6.205939438823057e+02 1.696277097949658e+02 4 5.781999161309674e+02 -2.399621307751624e+02 -4.731472211926695e+02 -2.299239567451741e+02 - ME 4.280180350752636e-05 + ME 3.321087064690878e-04 Event 198 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3636,7 +3636,7 @@ Event 198 Batch 1 2 4.349536439683943e+02 1.774777254208009e+02 -9.709992209949135e+01 3.850427697141142e+02 3 4.134500153047116e+02 7.095914770071803e+01 -4.041194890923881e+02 -5.092301099466194e+01 4 6.515963407268921e+02 -2.484368731215197e+02 5.012194111918782e+02 -3.341197587194521e+02 - ME 2.926862112764983e-04 + ME 7.849443582399766e-04 Event 199 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3644,7 +3644,7 @@ Event 199 Batch 1 2 6.682109290882580e+02 2.136897997740939e+02 -5.035763266519416e+02 3.837361052354048e+02 3 1.424120473397155e+02 8.952788458880865e+01 -4.686863299276860e+01 -1.003458038481504e+02 4 6.893770235720265e+02 -3.032176843629025e+02 5.504449596447103e+02 -2.833903013872543e+02 - ME 4.183851150998592e-04 + ME 1.167594898598604e-03 Event 200 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3652,7 +3652,7 @@ Event 200 Batch 1 2 5.959952693237885e+02 -4.878566955018547e+02 -2.510837703973929e+01 -3.414319479966339e+02 3 4.479637599869168e+02 4.499951041477978e+01 7.146287716862105e+01 4.399313940955211e+02 4 4.560409706892941e+02 4.428571850870749e+02 -4.635450012888173e+01 -9.849944609888662e+01 - ME 3.228844805909175e-04 + ME 5.545496796633981e-04 Event 201 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3660,7 +3660,7 @@ Event 201 Batch 1 2 5.203096708642927e+02 -1.112696379946441e+02 1.367824427202020e+02 4.895219960522141e+02 3 2.871951825199399e+02 -2.582762312778227e+02 1.200876310962787e+02 3.678888524092984e+01 4 6.924951466157675e+02 3.695458692724667e+02 -2.568700738164807e+02 -5.263108812931440e+02 - ME 2.285182473348715e-03 + ME 6.577575910850049e-03 Event 202 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3668,7 +3668,7 @@ Event 202 Batch 1 2 2.158792376054218e+02 2.112389782008981e+01 -7.195062193526132e+01 -2.024369881546198e+02 3 5.463652944256570e+02 2.787950008966254e+02 -3.108926376755554e+02 -3.523267663221479e+02 4 7.377554679689213e+02 -2.999188987167153e+02 3.828432596108168e+02 5.547637544767679e+02 - ME 1.952686275320307e-03 + ME 8.695282964050810e-03 Event 203 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3676,7 +3676,7 @@ Event 203 Batch 1 2 7.124273471334275e+02 4.879265047129839e+02 -1.059167473143779e+02 -5.081949365946950e+02 3 6.746108110440506e+02 -5.248642991835990e+02 4.352799102536777e+01 4.215714978711400e+02 4 1.129618418225217e+02 3.693779447061509e+01 6.238875628901040e+01 8.662343872355494e+01 - ME 4.211918129012132e-05 + ME 5.361938367485652e-05 Event 204 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3684,7 +3684,7 @@ Event 204 Batch 1 2 7.084787759842808e+02 4.992472551829619e+02 -4.528122431715626e+02 -2.183012291454193e+02 3 1.034373169902747e+02 -8.959882065299325e+01 -3.938861547415055e+01 -3.346441176487074e+01 4 6.880839070254444e+02 -4.096484345299685e+02 4.922008586457131e+02 2.517656409102901e+02 - ME 1.033102023766027e-04 + ME 2.988048706021647e-04 Event 205 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3692,7 +3692,7 @@ Event 205 Batch 1 2 6.496569846879349e+02 -5.869603795046561e+02 -2.345911576090251e+02 1.499956646614410e+02 3 2.543878192344406e+02 -1.851019090219859e+00 2.474675926596849e+02 -5.890268997594536e+01 4 5.959551960776247e+02 5.888113985948760e+02 -1.287643505065981e+01 -9.109297468549572e+01 - ME 4.134215827558992e-05 + ME 1.871447246980874e-04 Event 206 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3700,7 +3700,7 @@ Event 206 Batch 1 2 6.172060642836410e+02 2.978040691523503e+02 4.166709400833434e+02 3.444435946201744e+02 3 7.205754982426181e+02 -2.468045809177361e+02 -5.690387091428452e+02 -3.667580878490107e+02 4 1.622184374737409e+02 -5.099948823461420e+01 1.523677690595017e+02 2.231449322883641e+01 - ME 1.138691716042452e-05 + ME 7.356489425273393e-05 Event 207 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3708,7 +3708,7 @@ Event 207 Batch 1 2 5.250113096394139e+02 -1.091977068802181e+02 -4.322753509449321e+02 2.772196909074646e+02 3 5.240251005653129e+02 3.541948269240045e+02 3.738549241960732e+02 9.685466564450643e+01 4 4.509635897952731e+02 -2.449971200437864e+02 5.842042674885889e+01 -3.740743565519710e+02 - ME 9.518274156960593e-05 + ME 3.378615964480245e-03 Event 208 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3716,7 +3716,7 @@ Event 208 Batch 1 2 4.449444343820048e+02 1.928662436733418e+02 -3.595193210859464e+02 1.775500478872298e+02 3 4.894053462810564e+02 -2.195789585225567e+02 2.295326432211599e+02 3.723136307450180e+02 4 5.656502193369389e+02 2.671271484921488e+01 1.299866778647865e+02 -5.498636786322478e+02 - ME 2.179806976662403e-03 + ME 2.068943926258950e-01 Event 209 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3724,7 +3724,7 @@ Event 209 Batch 1 2 4.949423498078044e+02 -2.830370809537592e+02 -1.684680620467476e+02 -3.694271951395289e+02 3 6.326444171345161e+02 3.898538983719823e+02 -1.748162179498052e+02 4.665749526039372e+02 4 3.724132330576786e+02 -1.068168174182231e+02 3.432842799965525e+02 -9.714775746440780e+01 - ME 3.638076645868775e-05 + ME 1.473942246791387e-04 Event 210 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3732,7 +3732,7 @@ Event 210 Batch 1 2 5.469464199121014e+02 -4.947084169679945e+02 2.319240083666633e+02 -2.500445517953792e+01 3 2.929141603572806e+02 -5.602902696925145e+01 2.099470855189298e+01 2.867379913571110e+02 4 6.601394197306178e+02 5.507374439372461e+02 -2.529187169185561e+02 -2.617335361775729e+02 - ME 7.792286450853471e-04 + ME 1.577330101330874e-03 Event 211 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3740,7 +3740,7 @@ Event 211 Batch 1 2 5.484404249965427e+02 1.659778109685243e+01 3.514591842057613e+02 -4.206992456262192e+02 3 4.635537606517395e+02 -3.607884938122542e+02 -3.140996451540818e+01 2.893564685231623e+02 4 4.880058143517181e+02 3.441907127154018e+02 -3.200492196903532e+02 1.313427771030569e+02 - ME 1.717788621912363e-05 + ME 4.999214184618137e-05 Event 212 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3748,7 +3748,7 @@ Event 212 Batch 1 2 6.930853388432640e+02 -3.424793196872474e+02 -8.152110066892747e+01 5.970171795281683e+02 3 9.131624224772825e+01 6.738328155058525e+01 1.365968298972706e+01 6.009627714210347e+01 4 7.155984189090078e+02 2.750960381366621e+02 6.786141767920034e+01 -6.571134566702718e+02 - ME 4.440767413899675e-02 + ME 3.224436999651524e-01 Event 213 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3756,7 +3756,7 @@ Event 213 Batch 1 2 7.316448870278512e+02 4.203233031264803e+02 4.913598772661251e+02 -3.423419819067778e+02 3 4.750162603483208e+02 -1.726357548525294e+02 -3.708603862154638e+02 2.414537588813190e+02 4 2.933388526238279e+02 -2.476875482739507e+02 -1.204994910506614e+02 1.008882230254589e+02 - ME 1.166473784051930e-05 + ME 4.008080891216109e-05 Event 214 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3764,7 +3764,7 @@ Event 214 Batch 1 2 4.805779599533694e+02 3.904513572450257e+02 -1.742898429406511e+02 2.193763065287195e+02 3 6.164938851206517e+02 -5.563771061772993e+02 2.227142270499353e+02 1.445946028815716e+02 4 4.029281549259790e+02 1.659257489322735e+02 -4.842438410928419e+01 -3.639709094102910e+02 - ME 1.644694060635318e-04 + ME 1.130096726278085e-02 Event 215 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3772,7 +3772,7 @@ Event 215 Batch 1 2 4.610896439725640e+02 -3.106576460930037e+02 -3.050258363865880e+02 -1.518378274323046e+02 3 7.153470686812809e+02 2.726436938726979e+02 6.046054769368644e+02 2.680280994976061e+02 4 3.235632873461531e+02 3.801395222030658e+01 -2.995796405502758e+02 -1.161902720653026e+02 - ME 1.638803663744001e-05 + ME 2.130646114222361e-04 Event 216 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3780,7 +3780,7 @@ Event 216 Batch 1 2 5.309452696424389e+02 -4.912950836090372e+02 -3.608909251460832e+01 -1.980646298023531e+02 3 6.627369363365399e+02 4.479096066616000e+02 2.308759280187052e+02 4.304573578259469e+02 4 3.063177940210212e+02 4.338547694743724e+01 -1.947868355040969e+02 -2.323927280235938e+02 - ME 7.684209531203918e-05 + ME 1.881406502208647e-03 Event 217 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3788,7 +3788,7 @@ Event 217 Batch 1 2 4.608032244164870e+02 2.215832851737383e+02 3.318832460795877e+02 -2.304212888079594e+02 3 3.107022283044695e+02 -4.724697178681157e+01 2.830528592337836e+02 -1.190994425256424e+02 4 7.284945472790432e+02 -1.743363133869267e+02 -6.149361053133712e+02 3.495207313336019e+02 - ME 4.426756984161849e-04 + ME 2.894775763457067e-03 Event 218 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3796,7 +3796,7 @@ Event 218 Batch 1 2 6.336891602166270e+02 5.249943224110900e+02 1.648031440577737e+02 -3.142973702098814e+02 3 5.195346944320743e+02 -3.655895580768890e+02 -3.610279413409480e+02 7.693763263116504e+01 4 3.467761453512956e+02 -1.594047643342018e+02 1.962247972831736e+02 2.373597375787177e+02 - ME 8.957256945094420e-06 + ME 2.703962034458943e-05 Event 219 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3804,7 +3804,7 @@ Event 219 Batch 1 2 2.579228498517417e+02 -4.166553381892272e+01 1.191899344508913e+02 2.249042891828000e+02 3 7.453266221408651e+02 -3.354388163550532e+01 -3.947818065141064e+02 -6.312954196904914e+02 4 4.967505280073930e+02 7.520941545442813e+01 2.755918720632151e+02 4.063911305076915e+02 - ME 4.019449398167179e-05 + ME 6.103184694489295e-05 Event 220 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3812,7 +3812,7 @@ Event 220 Batch 1 2 4.940336288355577e+02 -2.383755021420815e+02 -2.918661661143953e+02 3.194690712363630e+02 3 7.129224521449780e+02 2.727447507998269e+02 2.535039959962389e+02 -6.079510240944473e+02 4 2.930439190194635e+02 -3.436924865774512e+01 3.836217011815621e+01 2.884819528580837e+02 - ME 1.677977866215262e-04 + ME 1.761519882509421e-04 Event 221 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3820,7 +3820,7 @@ Event 221 Batch 1 2 3.305414381337777e+02 -2.712796684963201e+02 -1.199910663213094e+02 -1.458325333632650e+02 3 7.388441803280767e+02 5.510455284380058e+02 4.375213740715825e+02 2.254209298704556e+02 4 4.306143815381457e+02 -2.797658599416856e+02 -3.175303077502730e+02 -7.958839650719051e+01 - ME 1.392897982206581e-05 + ME 1.338118621913618e-04 Event 222 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3828,7 +3828,7 @@ Event 222 Batch 1 2 4.657562074797755e+02 2.823280548971349e+02 2.956503281023745e+02 2.231828795335844e+02 3 4.791948192186352e+02 -3.228825926298714e+02 2.575611801233854e+02 -2.429747818931873e+02 4 5.550489733015891e+02 4.055453773273638e+01 -5.532115082257600e+02 1.979190235960287e+01 - ME 2.328731171682892e-05 + ME 9.040551632672907e-05 Event 223 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3836,7 +3836,7 @@ Event 223 Batch 1 2 1.612164685986321e+02 -4.527922182271191e+01 -1.095260585492910e+01 1.543391792239740e+02 3 6.984218503485876e+02 -4.629950983513680e+02 2.605715575888556e+02 -4.533553609726805e+02 4 6.403616810527805e+02 5.082743201740799e+02 -2.496189517339264e+02 2.990161817487066e+02 - ME 2.446487784841432e-04 + ME 4.148580235863498e-04 Event 224 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3844,7 +3844,7 @@ Event 224 Batch 1 2 1.663853414671972e+02 -1.350882138037309e+02 9.706071747767010e+01 3.804401292344658e+00 3 6.436745581417563e+02 -4.469273298203079e+02 -4.412749113764766e+02 -1.408877256838118e+02 4 6.899401003910457e+02 5.820155436240389e+02 3.442141938988058e+02 1.370833243914657e+02 - ME 9.431632941984795e-05 + ME 3.449215697364171e-04 Event 225 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3852,7 +3852,7 @@ Event 225 Batch 1 2 6.702356777533546e+02 6.117158080352369e+02 -2.649249521350114e+02 -6.952987609335720e+01 3 6.901224376513153e+02 -6.564819557015361e+02 1.560869289536550e+02 1.446972404640001e+02 4 1.396418845953297e+02 4.476614766629927e+01 1.088380231813564e+02 -7.516736437064299e+01 - ME 2.456039108263569e-05 + ME 6.407468428023662e-04 Event 226 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3860,7 +3860,7 @@ Event 226 Batch 1 2 7.307777643673112e+02 -4.569648094661606e+02 4.416236342013199e+02 -3.608155616351098e+02 3 1.446420186345137e+02 4.133161435221925e+01 -3.411742569426914e+01 1.343466131828505e+02 4 6.245802169981752e+02 4.156331951139413e+02 -4.075062085070508e+02 2.264689484522593e+02 - ME 2.774761612267077e-04 + ME 4.858390443010437e-04 Event 227 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3868,7 +3868,7 @@ Event 227 Batch 1 2 7.408615397889290e+02 -4.398089081634772e+02 -5.325812259979131e+02 2.679574278743413e+02 3 4.035753807128123e+02 3.000971513323747e+02 2.468113220276344e+02 -1.090823496201683e+02 4 3.555630794982585e+02 1.397117568311025e+02 2.857699039702786e+02 -1.588750782541728e+02 - ME 3.077346064218035e-05 + ME 3.215647103618368e-04 Event 228 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3876,7 +3876,7 @@ Event 228 Batch 1 2 5.775455372723294e+02 -3.656199842755111e+02 -6.289501053880601e+01 4.426342647953073e+02 3 3.247306314578497e+02 8.776645762339835e+01 3.116872137482897e+02 2.445634292125525e+01 4 5.977238312698206e+02 2.778535266521127e+02 -2.487922032094836e+02 -4.670906077165625e+02 - ME 3.399241079583280e-04 + ME 3.156934429573604e-03 Event 229 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3884,7 +3884,7 @@ Event 229 Batch 1 2 3.665477125629453e+02 -2.081014917770363e+02 2.317985113364040e+02 -1.931850016112187e+02 3 6.187040836990479e+02 -2.134593092471877e+02 -3.484367286517815e+02 4.645661552545953e+02 4 5.147482037380067e+02 4.215608010242241e+02 1.166382173153775e+02 -2.713811536433765e+02 - ME 8.330968691049859e-05 + ME 4.392210547845218e-04 Event 230 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3892,7 +3892,7 @@ Event 230 Batch 1 2 5.913978529013565e+02 -4.986092821675885e+02 -3.028328044703767e+02 9.712104143419764e+01 3 3.439186614041002e+02 -6.573524045766426e+01 3.216488491089061e+02 -1.024741025375549e+02 4 5.646834856945436e+02 5.643445226252528e+02 -1.881604463852933e+01 5.353061103357447e+00 - ME 2.296146042402505e-05 + ME 1.067159092411647e-04 Event 231 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3900,7 +3900,7 @@ Event 231 Batch 1 2 5.760768557894827e+02 -7.075794524290799e+01 5.609870884449791e+02 1.102331327656218e+02 3 6.038619762337338e+02 -2.467027894308989e+02 -5.464177649873398e+02 -7.221250677108812e+01 4 3.200611679767834e+02 3.174607346738069e+02 -1.456932345763944e+01 -3.802062599453370e+01 - ME 9.438631267217403e-06 + ME 8.750887998909065e-05 Event 232 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3908,7 +3908,7 @@ Event 232 Batch 1 2 7.230187249684843e+02 -2.426041066061352e+02 1.884455685697195e+02 -6.545132479937492e+02 3 4.821326920133732e+02 2.438648429837413e+02 -1.563760752388986e+01 4.156168142598493e+02 4 2.948485830181424e+02 -1.260736377606032e+00 -1.728079610458298e+02 2.388964337338999e+02 - ME 3.745272037455064e-05 + ME 4.549716999825542e-05 Event 233 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3916,7 +3916,7 @@ Event 233 Batch 1 2 3.540260977608100e+02 -1.904526694678991e+02 -1.042089619355360e+02 -2.796475475319170e+02 3 4.925592302096041e+02 1.195034224421750e+02 3.554637678715695e+02 -3.193415679485398e+02 4 6.534146720295859e+02 7.094924702572415e+01 -2.512548059360335e+02 5.989891154804569e+02 - ME 1.035644942794080e-04 + ME 2.494643034161164e-04 Event 234 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3924,7 +3924,7 @@ Event 234 Batch 1 2 1.866526101194276e+02 7.776953530733704e+01 -1.047503781897390e+01 1.693557493124073e+02 3 6.012752698516817e+02 5.974840035795012e+02 -4.570329760029643e+01 4.955829083294186e+01 4 7.120721200288899e+02 -6.752535388868379e+02 5.617833541927040e+01 -2.189140401453492e+02 - ME 6.655948749153013e-04 + ME 2.154454342135980e-03 Event 235 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3932,7 +3932,7 @@ Event 235 Batch 1 2 5.032945404607945e+02 1.612889276925247e+02 2.561838854094329e+02 -4.020710050699558e+02 3 7.153634726767370e+02 -3.739069589148947e+02 -1.979140468542061e+02 5.768609140624169e+02 4 2.813419868624690e+02 2.126180312223700e+02 -5.826983855522722e+01 -1.747899089924609e+02 - ME 1.137471703441233e-04 + ME 8.184939555880423e-04 Event 236 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3940,7 +3940,7 @@ Event 236 Batch 1 2 6.980797829886610e+02 -9.803971882836288e+00 4.740144261428889e+02 5.123764137440797e+02 3 5.519387921056282e+02 -1.638876688381594e+02 -3.209728652821290e+02 -4.180355032606608e+02 4 2.499814249057108e+02 1.736916407209956e+02 -1.530415608607599e+02 -9.434091048341891e+01 - ME 5.842524801707843e-05 + ME 2.813360227943072e-04 Event 237 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3948,7 +3948,7 @@ Event 237 Batch 1 2 1.604490925133743e+02 6.212857081252698e+01 9.075394990141041e+01 1.168232534834160e+02 3 6.578242662283152e+02 5.348507070161563e+02 -3.810396531957998e+02 3.842224792439630e+01 4 6.817266412583107e+02 -5.969792778286832e+02 2.902857032943894e+02 -1.552455014078122e+02 - ME 1.834055676127939e-04 + ME 8.205069948818567e-04 Event 238 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3956,7 +3956,7 @@ Event 238 Batch 1 2 2.789018340499539e+02 1.069933592962543e+02 -2.572713415352736e+02 1.225197647611563e+01 3 4.761759619803052e+02 7.755191627191856e+01 -4.591043622469822e+02 -9.976187456245104e+01 4 7.449222039697408e+02 -1.845452755681728e+02 7.163757037822556e+02 8.750989808633538e+01 - ME 9.445005309896021e-03 + ME 4.130258343824905e-02 Event 239 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3964,7 +3964,7 @@ Event 239 Batch 1 2 4.581461811054764e+02 -3.899520773556200e+02 2.006122777919944e+02 1.326273524830990e+02 3 3.013476461129690e+02 -2.996604136348060e+02 3.145663680794619e+01 4.951799549362093e+00 4 7.405061727815548e+02 6.896124909904260e+02 -2.320689145999406e+02 -1.375791520324611e+02 - ME 4.970363634614722e-03 + ME 1.351152256907066e-02 Event 240 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3972,7 +3972,7 @@ Event 240 Batch 1 2 5.932490652975304e+02 -4.094504138983958e+01 -3.300190662632461e+02 4.912793227530680e+02 3 3.147487537014150e+02 3.081803657249563e+02 4.097350029662016e+01 -4.912038692507519e+01 4 5.920021810010543e+02 -2.672353243351168e+02 2.890455659666260e+02 -4.421589358279927e+02 - ME 3.420638167820422e-04 + ME 2.300291351402201e-03 Event 241 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3980,7 +3980,7 @@ Event 241 Batch 1 2 4.438703186026563e+01 1.425431959717181e+01 -4.430288595443099e+00 -4.180186016371768e+01 3 7.139617398095604e+02 -8.415544716076485e+01 -5.657765076565163e+02 -4.272659242311072e+02 4 7.416512283301737e+02 6.990112756359306e+01 5.702067962519594e+02 4.690677843948249e+02 - ME 9.983667466725972e-03 + ME 9.657825758456334e-03 Event 242 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3988,7 +3988,7 @@ Event 242 Batch 1 2 3.798759956195423e+02 -1.259218082844715e+02 -3.429343473884153e+02 1.041417477651927e+02 3 6.208895880511435e+02 5.354328139337265e+02 1.248673426784089e+02 -2.884852319370315e+02 4 4.992344163293142e+02 -4.095110056492549e+02 2.180670047100064e+02 1.843434841718389e+02 - ME 1.030886114253601e-05 + ME 4.523810239016752e-05 Event 243 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3996,7 +3996,7 @@ Event 243 Batch 1 2 2.320641800899440e+02 1.658639294991472e+02 7.783463994856535e+01 1.424243988788334e+02 3 6.251485586341132e+02 -2.328139095298017e+02 -4.262931976140131e+02 3.935511574875350e+02 4 6.427872612759426e+02 6.694998003065477e+01 3.484585576654476e+02 -5.359755563663684e+02 - ME 8.493072129055412e-04 + ME 1.068434238404496e-02 Event 244 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4004,7 +4004,7 @@ Event 244 Batch 1 2 6.609991843787810e+02 -2.293678857540617e+02 -4.971623496474938e+02 -3.703240376037023e+02 3 1.091403980947070e+02 1.154537470975927e+01 -9.115666825632124e+00 -1.081445118228680e+02 4 7.298604175265119e+02 2.178225110443025e+02 5.062780164731259e+02 4.784685494265703e+02 - ME 9.635755455313371e-04 + ME 2.129811247265830e-03 Event 245 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4012,7 +4012,7 @@ Event 245 Batch 1 2 4.893629130846664e+02 -3.546974954177181e+02 3.112856868655738e+02 -1.294873298810978e+02 3 7.129026631852477e+02 5.703735458058533e+02 -4.257115617679147e+02 -4.091322034012423e+01 4 2.977344237300874e+02 -2.156760503881352e+02 1.144258749023406e+02 1.704005502212233e+02 - ME 5.312368446054512e-06 + ME 2.548352504440589e-05 Event 246 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4020,7 +4020,7 @@ Event 246 Batch 1 2 3.999457395350199e+02 9.605025124341067e+01 9.072234098128430e+01 3.774922524438975e+02 3 3.675469088581873e+02 -1.615841482674670e+01 2.570183669846762e+02 2.622426259669196e+02 4 7.325073516067924e+02 -7.989183641666393e+01 -3.477407079659604e+02 -6.397348784108170e+02 - ME 5.023802198964801e-02 + ME 1.294421983622042e-01 Event 247 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4028,7 +4028,7 @@ Event 247 Batch 1 2 6.711864521923226e+02 3.763073240556692e+02 5.338170415278108e+02 1.546719678644905e+02 3 5.231557804938882e+02 -1.057595517177888e+02 -5.121603131388773e+02 -1.409615302513522e+01 4 3.056577673137891e+02 -2.705477723378804e+02 -2.165672838893370e+01 -1.405758148393554e+02 - ME 1.980507958825256e-05 + ME 2.873345328272106e-04 Event 248 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4036,7 +4036,7 @@ Event 248 Batch 1 2 6.307803946875938e+02 -6.240065811552291e+01 -3.654556314590158e+02 5.103256270499047e+02 3 3.935347424219227e+02 -2.188782290807617e+02 2.916853933646314e+01 -3.257470040392325e+02 4 4.756848628904837e+02 2.812788871962847e+02 3.362870921225527e+02 -1.845786230106721e+02 - ME 8.712398839363553e-05 + ME 2.418190194667681e-04 Event 249 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4044,7 +4044,7 @@ Event 249 Batch 1 2 4.326970760901858e+02 -4.070406664121577e+02 -1.467447404863359e+02 3.261392852829594e+00 3 4.839435229991528e+02 2.335311811831339e+01 2.018595963184923e+02 -4.392136936630267e+02 4 5.833594009106607e+02 3.836875482938447e+02 -5.511485583215654e+01 4.359523008101972e+02 - ME 2.487145538635957e-05 + ME 8.354140201035124e-05 Event 250 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4052,7 +4052,7 @@ Event 250 Batch 1 2 7.010671671345858e+02 -6.122994886156980e+02 -2.473946684860857e+02 2.353303785738851e+02 3 5.574643785654457e+02 3.902114201641945e+02 2.260985614407801e+02 -3.276904354069721e+02 4 2.414684542999681e+02 2.220880684515034e+02 2.129610704530562e+01 9.236005683308701e+01 - ME 1.645582299148298e-05 + ME 4.704118057291807e-05 Event 251 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4060,7 +4060,7 @@ Event 251 Batch 1 2 7.364006127103795e+02 5.379960890463808e+02 4.302640987755426e+02 2.602285070392761e+02 3 3.051282143252570e+01 -2.901685968644106e+00 1.337962970917706e+01 -2.726899336532026e+01 4 7.330865658570956e+02 -5.350944030777371e+02 -4.436437284847198e+02 -2.329595136739561e+02 - ME 6.389613086136084e-03 + ME 8.340546584740779e-03 Event 252 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4068,7 +4068,7 @@ Event 252 Batch 1 2 5.965625584838610e+02 -7.369842915522101e+01 -5.671364104158780e+02 -1.697401534860145e+02 3 6.549338760881149e+02 -1.514014639568436e+02 6.313240788068730e+02 8.628954906696529e+01 4 2.485035654280235e+02 2.250998931120648e+02 -6.418766839099484e+01 8.345060441904938e+01 - ME 7.225550854378042e-06 + ME 3.985162011735342e-05 Event 253 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4076,7 +4076,7 @@ Event 253 Batch 1 2 5.728678540484714e+02 3.212236187283236e+01 -4.622666283104808e+02 -3.368312580807653e+02 3 7.160302400837320e+02 1.132435775281999e+02 5.206369974620781e+02 4.783433011307397e+02 4 2.111019058677967e+02 -1.453659394010323e+02 -5.837036915159722e+01 -1.415120430499744e+02 - ME 7.499676590470843e-05 + ME 1.248429186447426e-03 Event 254 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4084,7 +4084,7 @@ Event 254 Batch 1 2 5.579357369440610e+02 1.333150067790222e+02 -6.785864805882139e+01 5.375077668373273e+02 3 6.202682598689536e+02 -4.039338689731095e+02 2.012068793592834e+02 -4.255419314189536e+02 4 3.217960031869852e+02 2.706188621940872e+02 -1.333482313004621e+02 -1.119658354183736e+02 - ME 2.226893396847405e-04 + ME 6.088720978226072e-04 Event 255 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4092,5 +4092,5 @@ Event 255 Batch 1 2 7.263612771087843e+02 3.396063850675520e+02 -6.401091575508393e+02 5.028393902637355e+01 3 1.540578578981475e+02 -3.080387127739228e+01 1.060177193258910e+02 -1.074485378375538e+02 4 6.195808649930684e+02 -3.088025137901597e+02 5.340914382249483e+02 5.716459881118030e+01 - ME 4.003666322732326e-05 + ME 1.547064591142216e-04 diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 64ce042fd4..d13feee76f 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -51,16 +51,16 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T; import model heft; generate g g > h save options auto_convert_model -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -127,43 +127,74 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: g g > h HIG<=1 HIW<=1 WEIGHTED<=2 @1 INFO: Process has 1 diagrams -1 processes with 1 diagrams generated in 0.004 s +1 processes with 1 diagrams generated in 0.003 s Total: 1 processes with 1 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_h Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > h HIG<=1 HIW<=1 WEIGHTED<=2 @1 INFO: Processing color information for process: g g > h HIG<=1 HIW<=1 @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  -DEBUG: type(subproc_group)= [output.py at line 190]  -DEBUG: type(fortran_model)= [output.py at line 191]  -DEBUG: type(me)= me=0 [output.py at line 192]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  +DEBUG: type(subproc_group)= [output.py at line 188]  +DEBUG: type(fortran_model)= [output.py at line 189]  +DEBUG: type(me)= me=0 [output.py at line 190]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: proc_id =  0 [model_handling.py at line 1046]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  +DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_heft_gg_h.txt [model_handling.py at line 1336]  Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.060 s +ALOHA: aloha creates 1 routines in 0.054 s VVS3 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h -INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h +INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 59 , keys size = 59 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 5 , keys size = 5 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 1 , keys size = 1 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 5 , keys size = 5 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 1 , keys size = 1 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 1 , keys size = 1 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.cc INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  quit -real 0m0.428s -user 0m0.366s -sys 0m0.055s +real 0m0.497s +user 0m0.397s +sys 0m0.051s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc index 6cc0be1461..e120b9206f 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc @@ -245,7 +245,7 @@ namespace mg5amcCpu sxxxxx( momenta, +1, w_fp[2], 2 ); // Amplitude(s) for diagram number 1 - VVS3_0( w_fp[0], w_fp[1], w_fp[2], COUPs[0], 1.0, &_fp[0] ); + VVS3_0( w_fp[0], w_fp[1], w_fp[2], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -745,12 +745,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h index a2e9b6a70c..d35dba2369 100644 --- a/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h +++ b/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allS3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //========================================================================== @@ -875,7 +874,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allS3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index e2ec882498..1134659ef0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005310535430908203  +DEBUG: model prefixing takes 0.0048046112060546875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.029 s +5 processes with 7 diagrams generated in 0.027 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.135 s +13 processes with 76 diagrams generated in 0.125 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,21 +378,21 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.797 s +65 processes with 1119 diagrams generated in 1.848 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  INFO: initialize a new directory: CODEGEN_mad_pp_tt012j INFO: remove old information in CODEGEN_mad_pp_tt012j -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Processing color information for process: g g > t t~ g g @2 @@ -496,328 +496,710 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxuux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxgu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  2 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxgux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  3 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxgg.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  4 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  5 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uu_ttxuu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  6 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxuux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  7 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxux_ttxuxux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  8 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uc_ttxuc.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  9 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxccx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  10 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_ucx_ttxucx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  11 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxcx_ttxuxcx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  12 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  13 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  14 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxg.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  15 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1710]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  +DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  +DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  16 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  +DEBUG: proc_id =  1 [model_handling.py at line 1046]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  INFO: Creating files in directory . +DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  +DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc +DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  +DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  +DEBUG: self.include_multi_channel =  [1] [model_handling.py at line 1145]  +DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  +DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  +DEBUG: self.support_multichannel, self.include_multi_channel =  True [1] [model_handling.py at line 1164]  +DEBUG: multi_channel =  {1: [0]} [model_handling.py at line 1170]  +DEBUG: multi_channel_map =  {1: [0]} [model_handling.py at line 1655]  +DEBUG: diag_to_config =  {1: 1} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. +DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  +DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  +DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1] [export_cpp.py at line 711]  DEBUG: subproc_number =  17 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.287 s -Wrote files for 810 helas calls in 3.227 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.145 s +Wrote files for 810 helas calls in 3.136 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.329 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  +ALOHA: aloha creates 5 routines in 0.287 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.671 s VVV1 VVV1 FFV1 @@ -830,22 +1212,27 @@ ALOHA: aloha creates 10 routines in 0.308 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  ************************************************************ * * * W E L C O M E to * @@ -866,15 +1253,14 @@ DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -898,33 +1284,31 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt +No valid eps viewer found. Please set in ./input/mg5_configuration.txt +Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -932,18 +1316,21 @@ Hunk #2 succeeded at 146 (offset 3 lines). Hunk #3 succeeded at 224 (offset 3 lines). Hunk #4 succeeded at 252 (offset 3 lines). Hunk #5 succeeded at 297 (offset 3 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 402 (offset 6 lines). +Hunk #7 succeeded at 466 (offset -4 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 434 (offset 38 lines). +Hunk #7 succeeded at 588 (offset 118 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 513 (offset 44 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -951,9 +1338,11 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 428 (offset 32 lines). +Hunk #7 succeeded at 518 (offset 48 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 517 (offset 48 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -961,9 +1350,11 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 428 (offset 32 lines). +Hunk #7 succeeded at 518 (offset 48 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -971,18 +1362,21 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 428 (offset 32 lines). +Hunk #7 succeeded at 518 (offset 48 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 191 (offset 48 lines). Hunk #3 succeeded at 269 (offset 48 lines). Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 830 (offset 434 lines). +Hunk #7 succeeded at 1717 (offset 1247 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 516 (offset 47 lines). +Hunk #1 succeeded at 502 (offset 33 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -990,9 +1384,11 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 540 (offset 144 lines). +Hunk #7 succeeded at 813 (offset 343 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 513 (offset 44 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1000,9 +1396,11 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 540 (offset 144 lines). +Hunk #7 succeeded at 815 (offset 345 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 517 (offset 48 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1010,9 +1408,11 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 538 (offset 142 lines). +Hunk #7 succeeded at 812 (offset 342 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 554 (offset 85 lines). +Hunk #1 succeeded at 540 (offset 71 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 77 (offset 5 lines). @@ -1020,9 +1420,11 @@ Hunk #2 succeeded at 196 (offset 53 lines). Hunk #3 succeeded at 274 (offset 53 lines). Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 472 (offset 76 lines). +Hunk #7 succeeded at 581 (offset 111 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 626 (offset 157 lines). +Hunk #1 succeeded at 616 (offset 147 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 83 (offset 11 lines). @@ -1030,9 +1432,11 @@ Hunk #2 succeeded at 202 (offset 59 lines). Hunk #3 succeeded at 280 (offset 59 lines). Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 484 (offset 88 lines). +Hunk #7 succeeded at 593 (offset 123 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 524 (offset 55 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1040,9 +1444,11 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 468 (offset 72 lines). +Hunk #7 succeeded at 620 (offset 150 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 626 (offset 157 lines). +Hunk #1 succeeded at 616 (offset 147 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 83 (offset 11 lines). @@ -1050,9 +1456,11 @@ Hunk #2 succeeded at 202 (offset 59 lines). Hunk #3 succeeded at 280 (offset 59 lines). Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 484 (offset 88 lines). +Hunk #7 succeeded at 593 (offset 123 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1060,9 +1468,11 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 540 (offset 144 lines). +Hunk #7 succeeded at 821 (offset 351 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1070,9 +1480,11 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 468 (offset 72 lines). +Hunk #7 succeeded at 620 (offset 150 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 554 (offset 85 lines). +Hunk #1 succeeded at 546 (offset 77 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 77 (offset 5 lines). @@ -1080,9 +1492,11 @@ Hunk #2 succeeded at 196 (offset 53 lines). Hunk #3 succeeded at 274 (offset 53 lines). Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +Hunk #6 succeeded at 472 (offset 76 lines). +Hunk #7 succeeded at 581 (offset 111 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 532 (offset 63 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1090,12 +1504,14 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. +Hunk #6 succeeded at 468 (offset 72 lines). +Hunk #7 succeeded at 620 (offset 150 lines). +Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README +/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README Run "open index.html" to see more information about this process. quit -real 0m9.420s -user 0m8.812s -sys 0m0.563s +real 0m9.562s +user 0m8.185s +sys 0m0.659s diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt index cdeedc7863..5ca005676e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc index ec923afd6d..cf4ec946f8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o +ALOHARoutine = VVVV3_0.o VVVV4P0_1.o VVVV3P0_1.o VVVV1P0_1.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o FFV1_0.o FFV1P0_3.o VVVV1_0.o VVVV4_0.o diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc index 74b5239ebf..30257195b6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc @@ -112,17 +112,10 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#elif defined( __x86_64__ ) || defined( __i386__ ) +#else bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; -#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted - bool known = false; // __builtin_cpu_supports is not supported - // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html - // See https://stackoverflow.com/q/62783908 - // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu - bool ok = true; // this is just an assumption! - const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc index 0317bbc95a..44f313bf0a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -260,10 +260,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,12 +794,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f index ee723193db..b68450743c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f @@ -39,7 +39,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -127,24 +126,11 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f index d803e4f19f..d81c9e86cb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f @@ -396,6 +396,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -469,6 +470,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc index 75110e8fec..7567442343 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc @@ -241,18 +241,18 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 1 *** // Wavefunction(s) for diagram number 1 - ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); + ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -771,12 +771,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f index f205954b28..8310241f21 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,30 +129,21 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f index 4c21758744..0ec17d77eb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f @@ -402,6 +402,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -465,6 +466,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/processes.dat index 4e3f859a9f..aebe5534f0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/processes.dat @@ -1,2 +1,2 @@ 1 u u~ > t t~,c c~ > t t~,d d~ > t t~,s s~ > t t~ -mirror u~ u > t t~,c~ c > t t~,d~ d > t t~,s~ s > t t~ \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index f7f5899260..8cc007dff8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -265,10 +265,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -293,11 +293,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -307,10 +307,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -324,7 +324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -334,11 +334,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,7 +351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,7 +365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -375,10 +375,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -392,7 +392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -406,7 +406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -435,7 +435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -448,7 +448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -461,22 +461,22 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -1015,12 +1015,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index b8615bc68f..668cc26192 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -39,7 +39,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -127,24 +126,11 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f index fc924825c2..520966d7b7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -434,6 +434,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -587,6 +588,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 90a457ac40..e9456e497d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -243,19 +243,19 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); + imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); + oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -851,12 +851,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 81ab70f6d1..ca1b7c1dc5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,27 +129,14 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f index d61f0e1a21..5847ea0f3f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -428,6 +428,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -517,6 +518,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/processes.dat index cecfd6fccc..c3d6ba5983 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/processes.dat @@ -1,2 +1,2 @@ 1 g u > t t~ u,g c > t t~ c,g d > t t~ d,g s > t t~ s -mirror u g > t t~ u,c g > t t~ c,d g > t t~ d,s g > t t~ s \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index 9a73b3ed94..daa670b4b7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -243,19 +243,19 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); + ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); - FFV1_2( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -851,12 +851,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index b58c5d70bd..33e638e237 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,27 +129,18 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f index b082becd2a..65d83f3206 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -428,6 +428,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -517,6 +518,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/processes.dat index a4dc13c625..999d31033a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/processes.dat @@ -1,2 +1,2 @@ 1 g u~ > t t~ u~,g c~ > t t~ c~,g d~ > t t~ d~,g s~ > t t~ s~ -mirror u~ g > t t~ u~,c~ g > t t~ c~,d~ g > t t~ d~,s~ g > t t~ s~ \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc index dc1a3e9d26..81166eb007 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc @@ -241,9 +241,9 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 5 *** // Wavefunction(s) for diagram number 1 - ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); + ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); @@ -251,11 +251,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_2( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[0], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[0], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[4], w_fp[7], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -851,12 +851,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f index d85b1143a0..f2902c7183 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,30 +129,21 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f index 265f6006db..beacc34205 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f @@ -428,6 +428,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -517,6 +518,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/processes.dat index 70fc4fbfd9..3e7ec80666 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/processes.dat @@ -1,2 +1,2 @@ 1 u u~ > t t~ g,c c~ > t t~ g,d d~ > t t~ g,s s~ > t t~ g -mirror u~ u > t t~ g,c~ c > t t~ g,d~ d > t t~ g,s~ s > t t~ g \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc index cbc45ff652..f1e1f21142 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc @@ -250,11 +250,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 1 - VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -263,7 +263,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -272,7 +272,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -285,10 +285,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 123 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -305,10 +305,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 123 *** // Wavefunction(s) for diagram number 3 - VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 123 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -345,11 +345,11 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 123 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -363,7 +363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -376,10 +376,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 123 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -390,10 +390,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 123 *** // Wavefunction(s) for diagram number 8 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,7 +407,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -420,10 +420,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 123 *** // Wavefunction(s) for diagram number 10 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -434,10 +434,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 123 *** // Wavefunction(s) for diagram number 11 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -451,7 +451,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -481,7 +481,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -497,7 +497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -513,7 +513,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -526,12 +526,12 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 123 *** // Wavefunction(s) for diagram number 17 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -541,10 +541,10 @@ namespace mg5amcCpu // *** DIAGRAM 18 OF 123 *** // Wavefunction(s) for diagram number 18 - FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -557,7 +557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -568,11 +568,11 @@ namespace mg5amcCpu // *** DIAGRAM 20 OF 123 *** // Wavefunction(s) for diagram number 20 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -588,7 +588,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -602,7 +602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -613,10 +613,10 @@ namespace mg5amcCpu // *** DIAGRAM 23 OF 123 *** // Wavefunction(s) for diagram number 23 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[18] ); // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -632,7 +632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -646,7 +646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -657,10 +657,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 123 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] ); + FFV1_1( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[19] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -673,7 +673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -686,7 +686,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -699,7 +699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -712,7 +712,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -726,7 +726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -739,22 +739,22 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 123 *** // Wavefunction(s) for diagram number 32 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -763,12 +763,12 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 123 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -778,10 +778,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 123 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,7 +794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -805,10 +805,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 123 *** // Wavefunction(s) for diagram number 36 - FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] ); + FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -824,7 +824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -838,7 +838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -852,7 +852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -868,7 +868,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -882,7 +882,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -893,10 +893,10 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 123 *** // Wavefunction(s) for diagram number 42 - FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_2( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -909,7 +909,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -922,7 +922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 44 - FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -935,7 +935,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -948,7 +948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -962,7 +962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -978,17 +978,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -997,11 +997,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 123 *** // Wavefunction(s) for diagram number 49 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] ); - FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[12] ); + FFV1_2( w_fp[3], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1012,10 +1012,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 123 *** // Wavefunction(s) for diagram number 50 - VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1031,7 +1031,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1042,10 +1042,10 @@ namespace mg5amcCpu // *** DIAGRAM 52 OF 123 *** // Wavefunction(s) for diagram number 52 - FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[2], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1059,7 +1059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1075,7 +1075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1089,7 +1089,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1105,7 +1105,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1121,7 +1121,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1141,7 +1141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 58 - VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1150,7 +1150,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1159,7 +1159,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1172,10 +1172,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 123 *** // Wavefunction(s) for diagram number 59 - VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 59 - VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1195,7 +1195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1215,7 +1215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1231,7 +1231,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1245,7 +1245,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1261,7 +1261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1272,11 +1272,11 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 123 *** // Wavefunction(s) for diagram number 65 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1287,10 +1287,10 @@ namespace mg5amcCpu // *** DIAGRAM 66 OF 123 *** // Wavefunction(s) for diagram number 66 - VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1306,7 +1306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1317,10 +1317,10 @@ namespace mg5amcCpu // *** DIAGRAM 68 OF 123 *** // Wavefunction(s) for diagram number 68 - FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1334,7 +1334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1350,7 +1350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1364,7 +1364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1380,7 +1380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1396,7 +1396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1416,7 +1416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 74 - VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1425,7 +1425,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1434,7 +1434,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1447,10 +1447,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 123 *** // Wavefunction(s) for diagram number 75 - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 75 - VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1470,7 +1470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1490,7 +1490,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1506,7 +1506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1520,7 +1520,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1536,7 +1536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 80 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1547,10 +1547,10 @@ namespace mg5amcCpu // *** DIAGRAM 81 OF 123 *** // Wavefunction(s) for diagram number 81 - FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[9], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1560,10 +1560,10 @@ namespace mg5amcCpu // *** DIAGRAM 82 OF 123 *** // Wavefunction(s) for diagram number 82 - FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[15], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1576,7 +1576,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1586,10 +1586,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 123 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1602,7 +1602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1613,10 +1613,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 123 *** // Wavefunction(s) for diagram number 86 - VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1629,10 +1629,10 @@ namespace mg5amcCpu // *** DIAGRAM 87 OF 123 *** // Wavefunction(s) for diagram number 87 - FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); + FFV1_2( w_fp[16], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1642,10 +1642,10 @@ namespace mg5amcCpu // *** DIAGRAM 88 OF 123 *** // Wavefunction(s) for diagram number 88 - FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[11], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 88 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1658,7 +1658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1668,10 +1668,10 @@ namespace mg5amcCpu // *** DIAGRAM 90 OF 123 *** // Wavefunction(s) for diagram number 90 - FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); + FFV1_1( w_fp[14], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 90 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1684,7 +1684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1698,7 +1698,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1714,7 +1714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1723,7 +1723,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1732,7 +1732,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1745,10 +1745,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 123 *** // Wavefunction(s) for diagram number 94 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 94 - VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1765,10 +1765,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 123 *** // Wavefunction(s) for diagram number 95 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 95 - VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1788,7 +1788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 96 - FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1804,7 +1804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1818,7 +1818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1834,7 +1834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1848,7 +1848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1857,7 +1857,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1866,7 +1866,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1879,10 +1879,10 @@ namespace mg5amcCpu // *** DIAGRAM 101 OF 123 *** // Wavefunction(s) for diagram number 101 - VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 101 - VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1902,7 +1902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1922,7 +1922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1938,7 +1938,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1952,7 +1952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1968,7 +1968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1982,7 +1982,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1991,7 +1991,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2000,7 +2000,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2016,7 +2016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2036,7 +2036,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2056,7 +2056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2069,7 +2069,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2082,7 +2082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2095,7 +2095,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2105,12 +2105,12 @@ namespace mg5amcCpu // *** DIAGRAM 114 OF 123 *** // Wavefunction(s) for diagram number 114 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[12] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 114 - VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2119,7 +2119,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2128,7 +2128,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2144,17 +2144,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -2166,17 +2166,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -2185,12 +2185,12 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 123 *** // Wavefunction(s) for diagram number 117 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 117 - VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2199,7 +2199,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2208,7 +2208,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2224,17 +2224,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -2246,17 +2246,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -2265,22 +2265,22 @@ namespace mg5amcCpu // *** DIAGRAM 120 OF 123 *** // Wavefunction(s) for diagram number 120 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -2292,17 +2292,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -2314,7 +2314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2323,7 +2323,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2332,7 +2332,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2348,7 +2348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2357,7 +2357,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2366,7 +2366,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2961,12 +2961,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f index 4d2e1b4f8c..3f0b6e29c5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f @@ -39,7 +39,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -127,24 +126,11 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f index 2e8e377de8..663b15574e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f @@ -830,6 +830,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -1716,6 +1717,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc index 5723ed5665..e5e62a0af2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc @@ -249,16 +249,23 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); +#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) + oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); +#else + if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) + oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + else + oxxxxx( momenta, 0, cHel[ihel][4], +1, w_fp[4], 4 ) +#endif - ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); + ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); - FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +278,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +294,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +310,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +326,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,11 +342,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -350,11 +357,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_1( w_fp[4], w_fp[1], COUPs[1], 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,10 +372,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[12] ); + FFV1_2( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -379,10 +386,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -396,7 +403,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[1], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,11 +414,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,10 +429,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -439,7 +446,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[4], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[4], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -450,10 +457,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +474,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -478,11 +485,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -496,7 +503,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -510,7 +517,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -521,10 +528,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[1], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -538,7 +545,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -549,11 +556,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_2( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -567,7 +574,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -581,7 +588,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -592,10 +599,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[1], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -609,7 +616,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -620,10 +627,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -634,10 +641,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -648,10 +655,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -665,7 +672,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -676,10 +683,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -690,10 +697,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -704,10 +711,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[0], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -721,7 +728,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[4], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[4], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -735,17 +742,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[1] += 1. / 2. * amp_sv[0]; jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[9] -= 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[1] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[6] -= 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[2] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[6] -= 1. / 2. * amp_sv[0]; @@ -757,7 +764,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[1], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -773,7 +780,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1356,12 +1363,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f index 67adf83921..67decfd0d7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,24 +129,11 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f index 41e5e36e39..f401e1eb21 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f @@ -540,6 +540,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -812,6 +813,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc index b8f74ecafe..f018e86fc7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc @@ -243,7 +243,7 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); + imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); @@ -251,14 +251,14 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); + oxzxxx( momenta, cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); - FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[1], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1_2( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,11 +335,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -350,11 +350,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_1( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[1], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,10 +365,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] ); + FFV1_2( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[5], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[5], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -379,10 +379,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -396,7 +396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,11 +407,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,10 +422,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[1], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -439,7 +439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[5], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[5], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -450,10 +450,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -478,11 +478,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1_1( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[1], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -496,7 +496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -510,7 +510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -521,10 +521,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[1], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -538,7 +538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -549,11 +549,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[5], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -567,7 +567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -581,7 +581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -592,10 +592,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -609,7 +609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -620,10 +620,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -634,10 +634,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -648,10 +648,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -665,7 +665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -676,10 +676,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[1], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -690,10 +690,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[1], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -704,10 +704,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[0], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -721,7 +721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -735,17 +735,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[5] += 1. / 2. * amp_sv[0]; jamp_sv[8] -= 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[0] += 1. / 2. * amp_sv[0]; jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; jamp_sv[8] -= 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[0] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -773,7 +773,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1356,12 +1356,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f index 83a2a24681..58e83991fd 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,27 +129,14 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f index a2b48f860a..ad08208e22 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f @@ -540,6 +540,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -814,6 +815,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/processes.dat index 7fe113513b..223af0d709 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/processes.dat @@ -1,2 +1,2 @@ 1 g u > t t~ g u,g c > t t~ g c,g d > t t~ g d,g s > t t~ g s -mirror u g > t t~ g u,c g > t t~ g c,d g > t t~ g d,s g > t t~ g s \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc index 2495941a73..cf1ab85565 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc @@ -243,7 +243,7 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); @@ -251,14 +251,14 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); + ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,11 +335,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -350,11 +350,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_1( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,10 +365,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] ); + FFV1_2( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -379,10 +379,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -396,7 +396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,11 +407,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,10 +422,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -439,7 +439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -450,10 +450,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -478,11 +478,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -496,7 +496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -510,7 +510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -521,10 +521,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -538,7 +538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -549,11 +549,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_2( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -567,7 +567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -581,7 +581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -592,10 +592,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -609,7 +609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -620,10 +620,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -634,10 +634,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -648,10 +648,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -665,7 +665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -676,10 +676,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -690,10 +690,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -704,10 +704,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[0], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -721,7 +721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -735,17 +735,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[4] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; jamp_sv[8] -= 1. / 2. * amp_sv[0]; jamp_sv[11] += 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[0] -= 1. / 2. * amp_sv[0]; jamp_sv[3] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; jamp_sv[11] += 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[0] -= 1. / 2. * amp_sv[0]; jamp_sv[3] -= 1. / 2. * amp_sv[0]; jamp_sv[4] += 1. / 2. * amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -773,7 +773,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1356,12 +1356,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f index 8cb3f9af60..56e24ed83e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,27 +129,18 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f index 7ce63300ba..9517cf043b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f @@ -538,6 +538,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -811,6 +812,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/processes.dat index 163cc3efce..aa01e6faf9 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/processes.dat @@ -1,2 +1,2 @@ 1 g u~ > t t~ g u~,g c~ > t t~ g c~,g d~ > t t~ g d~,g s~ > t t~ g s~ -mirror u~ g > t t~ g u~,c~ g > t t~ g c~,d~ g > t t~ g d~,s~ g > t t~ g s~ \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc index 529477ff3e..deddedbe37 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc @@ -243,24 +243,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); + ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz - ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); + imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); + oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); - oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); + oxzxxx( momenta, cHel[ihel][5], +1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -289,10 +289,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[5], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_1( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[1], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,10 +335,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -931,12 +931,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f index 3488dfd2e6..c68a9f5a67 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f @@ -44,7 +44,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,C1 DOUBLE PRECISION D2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -132,28 +131,15 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f index efcaed5bd1..6d94cf8fc6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f @@ -472,6 +472,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -580,6 +581,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/processes.dat index ae198732c6..d745151b92 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/processes.dat @@ -1,2 +1,2 @@ 1 u c > t t~ u c,u d > t t~ u d,u s > t t~ u s,c d > t t~ c d,c s > t t~ c s,d s > t t~ d s -mirror c u > t t~ u c,d u > t t~ u d,s u > t t~ u s,d c > t t~ c d,s c > t t~ c s,s d > t t~ d s \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc index e54a24ea57..035f37f9f3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc @@ -249,24 +249,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); + ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); + oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); - ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); + ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -309,10 +309,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -341,10 +341,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -357,10 +357,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -937,12 +937,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f index 0b6e873ee4..1044310fc4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f @@ -50,7 +50,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -138,30 +137,21 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f index 3172975ef4..69472aa185 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f @@ -484,6 +484,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -592,6 +593,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/processes.dat index 42f56ba6f0..6dae119052 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/processes.dat @@ -1,2 +1,2 @@ 1 u c~ > t t~ u c~,u d~ > t t~ u d~,u s~ > t t~ u s~,c u~ > t t~ c u~,c d~ > t t~ c d~,c s~ > t t~ c s~,d u~ > t t~ d u~,d c~ > t t~ d c~,d s~ > t t~ d s~,s u~ > t t~ s u~,s c~ > t t~ s c~,s d~ > t t~ s d~ -mirror c~ u > t t~ u c~,d~ u > t t~ u d~,s~ u > t t~ u s~,u~ c > t t~ c u~,d~ c > t t~ c d~,s~ c > t t~ c s~,u~ d > t t~ d u~,c~ d > t t~ d c~,s~ d > t t~ d s~,u~ s > t t~ s u~,c~ s > t t~ s c~,d~ s > t t~ s d~ \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc index 8638bbefa2..d5d7e9e858 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc @@ -241,24 +241,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 14 *** // Wavefunction(s) for diagram number 1 - ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); + ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz - ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); + imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); + oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); - oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); + oxzxxx( momenta, cHel[ihel][5], +1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 14 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 14 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -301,10 +301,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 14 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1_2( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -317,10 +317,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 14 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1_1( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[1], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -333,12 +333,12 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 14 *** // Wavefunction(s) for diagram number 6 - FFV1P0_3( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); - FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); + FFV1P0_3( w_fp[0], w_fp[5], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); + FFV1_1( w_fp[2], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 14 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -370,7 +370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -381,10 +381,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 14 *** // Wavefunction(s) for diagram number 9 - FFV1_2( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[1], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -397,10 +397,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 14 *** // Wavefunction(s) for diagram number 10 - FFV1_1( w_fp[4], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_1( w_fp[4], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[1], w_fp[10], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -413,10 +413,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 14 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[6], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[10], w_fp[5], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[5], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,10 +429,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 14 *** // Wavefunction(s) for diagram number 12 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[10], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -445,10 +445,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 14 *** // Wavefunction(s) for diagram number 13 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[6], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -464,7 +464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1041,12 +1041,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f index 5ed7bc881f..25de63622f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,30 +129,17 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f index 77fe909abc..44d755483f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f @@ -468,6 +468,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -619,6 +620,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc index c071cc6900..ed484f79bf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc @@ -249,24 +249,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); + ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); + oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); - ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); + ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -309,10 +309,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_1( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -341,10 +341,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -357,10 +357,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -937,12 +937,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f index a32595dce6..300733b34c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f @@ -50,7 +50,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -138,30 +137,21 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f index c5a7b6787c..4a4f39ba52 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f @@ -484,6 +484,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -592,6 +593,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/processes.dat index b073954d9b..e3981c4625 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/processes.dat @@ -1,2 +1,2 @@ 1 u u~ > t t~ c c~,u u~ > t t~ d d~,u u~ > t t~ s s~,c c~ > t t~ u u~,c c~ > t t~ d d~,c c~ > t t~ s s~,d d~ > t t~ u u~,d d~ > t t~ c c~,d d~ > t t~ s s~,s s~ > t t~ u u~,s s~ > t t~ c c~,s s~ > t t~ d d~ -mirror u~ u > t t~ c c~,u~ u > t t~ d d~,u~ u > t t~ s s~,c~ c > t t~ u u~,c~ c > t t~ d d~,c~ c > t t~ s s~,d~ d > t t~ u u~,d~ d > t t~ c c~,d~ d > t t~ s s~,s~ s > t t~ u u~,s~ s > t t~ c c~,s~ s > t t~ d d~ \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc index 2eb6b491fa..57e20e66f1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc @@ -241,9 +241,9 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 36 *** // Wavefunction(s) for diagram number 1 - ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); + ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); @@ -253,12 +253,12 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1_2( w_fp[0], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,11 +335,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -350,11 +350,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_1( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[0], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[0], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,10 +365,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[12] ); + FFV1_2( w_fp[0], w_fp[5], COUPs[1], 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -379,10 +379,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -396,7 +396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[5], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,11 +407,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,10 +422,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[0], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[0], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -439,7 +439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -450,10 +450,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[5], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -478,11 +478,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1_1( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[0], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -496,7 +496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -510,7 +510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -521,10 +521,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[5], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[0], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[0], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -538,7 +538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[5], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -549,11 +549,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] ); + FFV1_2( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -567,7 +567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -581,7 +581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -592,10 +592,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[5], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -609,7 +609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -620,10 +620,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -634,10 +634,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[4], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[4], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -648,10 +648,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -665,7 +665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -676,10 +676,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[0], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[0], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -690,10 +690,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[4], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[4], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -704,10 +704,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -721,7 +721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -735,17 +735,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV1_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[3] += 1. / 2. * amp_sv[0]; jamp_sv[4] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV3_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[9] += 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); + VVVV4_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); jamp_sv[3] -= 1. / 2. * amp_sv[0]; jamp_sv[4] -= 1. / 2. * amp_sv[0]; jamp_sv[9] += 1. / 2. * amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[5], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -773,7 +773,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1356,12 +1356,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f index baaee299a2..c9d97c2911 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,30 +129,21 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f index 80fb12abe5..6fdd945c1e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f @@ -540,6 +540,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -820,6 +821,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/processes.dat index 253ae4161c..6154f86412 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/processes.dat @@ -1,2 +1,2 @@ 1 u u~ > t t~ g g,c c~ > t t~ g g,d d~ > t t~ g g,s s~ > t t~ g g -mirror u~ u > t t~ g g,c~ c > t t~ g g,d~ d > t t~ g g,s~ s > t t~ g g \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc index 8682128442..a42b3d8af9 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc @@ -241,24 +241,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 14 *** // Wavefunction(s) for diagram number 1 - ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); + ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); + oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); - ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); + ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 14 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 14 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -301,10 +301,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 14 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -317,10 +317,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 14 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1_1( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -333,12 +333,12 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 14 *** // Wavefunction(s) for diagram number 6 - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] ); - FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[6] ); + FFV1_1( w_fp[2], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 14 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -370,7 +370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -381,10 +381,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 14 *** // Wavefunction(s) for diagram number 9 - FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -397,10 +397,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 14 *** // Wavefunction(s) for diagram number 10 - FFV1_1( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_1( w_fp[1], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -413,10 +413,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 14 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[6], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,10 +429,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 14 *** // Wavefunction(s) for diagram number 12 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[10], w_fp[4], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -445,10 +445,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 14 *** // Wavefunction(s) for diagram number 13 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[6], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -464,7 +464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1041,12 +1041,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f index c2206e8d5e..c3b97a2a87 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,30 +129,21 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f index 3544d80d72..03d576c6d5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f @@ -468,6 +468,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -619,6 +620,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/processes.dat index cd7b317223..b85fa2760c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/processes.dat @@ -1,2 +1,2 @@ 1 u u~ > t t~ u u~,c c~ > t t~ c c~,d d~ > t t~ d d~,s s~ > t t~ s s~ -mirror u~ u > t t~ u u~,c~ c > t t~ c c~,d~ d > t t~ d d~,s~ s > t t~ s s~ \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc index 7d3141cfc4..d1d96581db 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc @@ -243,24 +243,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); + opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); + ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); - ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); + ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -289,10 +289,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,10 +335,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[4], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[0], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[0], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); + FFV1_2( w_fp[4], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[0], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[0], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -931,12 +931,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) - constexpr int nprocesses = 2; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + constexpr int nprocesses = 1; + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f index e92ee65fd7..adb807b78c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f @@ -44,7 +44,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION CX1,UX1,DX1 DOUBLE PRECISION CX2,SX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -132,28 +131,21 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)), QSCALE) - UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)), QSCALE) - DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)), QSCALE) + CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) + $ )) + UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) + $ )) + DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) + $ )) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f index 61d4e59741..19a22be6d6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f @@ -472,6 +472,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -580,6 +581,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/mirrorprocs.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/mirrorprocs.inc index 6f9280a1be..57f5243762 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/mirrorprocs.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/mirrorprocs.inc @@ -1 +1 @@ - DATA (MIRRORPROCS(I),I=1,1)/.TRUE./ + DATA (MIRRORPROCS(I),I=1,1)/.FALSE./ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/processes.dat b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/processes.dat index ada123d362..9c0368f622 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/processes.dat +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/processes.dat @@ -1,2 +1,2 @@ 1 u~ c~ > t t~ u~ c~,u~ d~ > t t~ u~ d~,u~ s~ > t t~ u~ s~,c~ d~ > t t~ c~ d~,c~ s~ > t t~ c~ s~,d~ s~ > t t~ d~ s~ -mirror c~ u~ > t t~ u~ c~,d~ u~ > t t~ u~ d~,s~ u~ > t t~ u~ s~,d~ c~ > t t~ c~ d~,s~ c~ > t t~ c~ s~,s~ d~ > t t~ d~ s~ \ No newline at end of file +mirror none \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc index 6ec302f68b..129dd8551a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc @@ -241,24 +241,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 14 *** // Wavefunction(s) for diagram number 1 - oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); + opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz - oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); + omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); + ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); - ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); + ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 14 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 14 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -301,10 +301,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 14 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[9], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -317,10 +317,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 14 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -333,12 +333,12 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 14 *** // Wavefunction(s) for diagram number 6 - FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); - FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); + FFV1_1( w_fp[2], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 14 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -370,7 +370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &_fp[0] ); + VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -381,10 +381,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 14 *** // Wavefunction(s) for diagram number 9 - FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[10], w_fp[0], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[0], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -397,10 +397,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 14 *** // Wavefunction(s) for diagram number 10 - FFV1_1( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_1( w_fp[0], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -413,10 +413,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 14 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,10 +429,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 14 *** // Wavefunction(s) for diagram number 12 - FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1_2( w_fp[4], w_fp[8], COUPs[1], 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[10], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -445,10 +445,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 14 *** // Wavefunction(s) for diagram number 13 - FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[7], COUPs[1], 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[6], w_fp[0], w_fp[8], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[6], w_fp[0], w_fp[8], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -464,7 +464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[10], w_fp[0], w_fp[7], COUPs[1], 1.0, &_fp[0] ); + FFV1_0( w_fp[10], w_fp[0], w_fp[7], COUPs[1], &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1041,12 +1041,13 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) + // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) constexpr int nprocesses = 1; - static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); + static_assert( nprocesses == 1, "Assume nprocesses == 1" ); + // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f index cad7f4197d..68d329862c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f @@ -42,7 +42,6 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC - DOUBLE PRECISION QSCALE DOUBLE PRECISION CX1,SX1,UX1,DX1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -130,30 +129,25 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN - QSCALE=0D0 - DO I=3,NEXTERNAL - QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) - $ -PP(3,I)))) - ENDDO - QSCALE=QSCALE/2D0 - ELSE - QSCALE=DSQRT(Q2FACT(IB(1))) - ENDIF - CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)), QSCALE) - SX1=PDG2PDF(LPP(IB(1)),-3, IB(1),XBK(IB(1)), QSCALE) - UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)), QSCALE) - DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)), QSCALE) + CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) + $ )) + SX1=PDG2PDF(LPP(IB(1)),-3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) + $ )) + UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) + $ )) + DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) + $ )) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN - QSCALE=DSQRT(Q2FACT(IB(2))) - ENDIF - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) + $ )) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc index 3bbdec9387..71fa817036 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,10 +36,13 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; + static mgOnGpu::Timer matrix1_timer; + static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -49,6 +52,19 @@ extern "C" return; } + void counters_matrix1_start_() + { + matrix1_counter++; + matrix1_timer.Start(); + return; + } + + void counters_matrix1_stop_() + { + matrix1_totaltime += matrix1_timer.GetDuration(); + return; + } + void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f index 1b50f51264..65a58589dc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f @@ -468,6 +468,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- + call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -619,6 +620,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO + call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index c6c1826de7..43cee0977e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -27,8 +27,6 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) - #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -222,8 +220,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +555,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile index 74b19033a8..74db44d848 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/generate_events b/epochX/cudacpp/pp_tt012j.mad/bin/generate_events index 5577cc66a0..107313b25d 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/generate_events +++ b/epochX/cudacpp/pp_tt012j.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME -import misc as misc + import logging import logging.config @@ -160,31 +160,17 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv - - # check for plugin customization of the launch command - launch_interface = ME.MadEventCmdShell - if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - + argument = sys.argv try: if '-h' in argument or '--help' in argument: - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = launch_interface(me_dir=root_path, force_run=True) + launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py index e9f421ae5f..7624b9f557 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py @@ -1002,14 +1002,13 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() - self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - + self.plugin_input(finput) def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model.pkl index 27a1caae3c115073669b90622e9351ab04166d39..dc38da0bfa76ea4206a3c5b2d34b98c606f7d044 100644 GIT binary patch delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/madevent b/epochX/cudacpp/pp_tt012j.mad/bin/madevent index 10b6a71fa2..c944aa1faf 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/madevent +++ b/epochX/cudacpp/pp_tt012j.mad/bin/madevent @@ -32,7 +32,6 @@ except ImportError: import os -pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -161,23 +160,10 @@ except: pass import internal.madevent_interface as cmd_interface -# check for plugin customization of the launch command -launch_interface = cmd_interface.MadEventCmdShell -if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): - with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - launch_interface = launch_plugin.MEINTERFACE - - - # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) + launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -192,7 +178,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -202,7 +188,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = launch_interface(force_run=True) + cmd_line = cmd_interface.MadEventCmdShell(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/pp_tt012j.mad/mg5.in b/epochX/cudacpp/pp_tt012j.mad/mg5.in index 91e22f5295..66c20a304b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/mg5.in +++ b/epochX/cudacpp/pp_tt012j.mad/mg5.in @@ -1,5 +1,3 @@ -set stdout_level DEBUG -set zerowidth_tchannel F define j = p generate p p > t t~ @0 add process p p > t t~ j @1 diff --git a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h index 9b946c21e1..9cea8bcbe7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h @@ -863,7 +863,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -874,7 +873,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -888,7 +886,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -899,7 +896,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -912,7 +908,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -925,7 +920,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -940,7 +934,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -952,7 +945,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -967,7 +959,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -979,7 +970,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -994,7 +984,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -1006,7 +995,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1020,7 +1008,6 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1055,7 +1042,6 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1094,7 +1080,6 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1118,7 +1103,6 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1150,7 +1134,6 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], - const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1182,7 +1165,6 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], - const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1215,7 +1197,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1244,7 +1225,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1280,7 +1260,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1309,7 +1288,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1345,7 +1323,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1374,7 +1351,6 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], - const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt b/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt index d596b33ae7..dd90c94acf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt +++ b/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt @@ -4,7 +4,7 @@ Event 0 Batch 0 2 2.647483690509011e+02 7.527657265342380e+01 -2.528976247704283e+02 -2.163164141117315e+01 3 6.252973211776936e+02 -5.721080498766041e+02 -1.578766990348905e+01 2.518727230515587e+02 4 6.099543097714056e+02 4.968314772231802e+02 2.686852946739174e+02 -2.302410816403857e+02 - ME 6.254927412618323e-05 + ME 3.498510462248670e-04 Event 1 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -12,7 +12,7 @@ Event 1 Batch 0 2 2.542827954151951e+02 1.482213322085297e+02 -1.988618298139058e+02 -5.607271498295615e+01 3 6.883656117507998e+02 1.265478873489434e+02 5.602777828023585e+02 3.793700749224233e+02 4 5.573515928340058e+02 -2.747692195574731e+02 -3.614159529884527e+02 -3.232973599394667e+02 - ME 8.120933129385430e-05 + ME 7.257243108248426e-04 Event 2 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -20,7 +20,7 @@ Event 2 Batch 0 2 4.301460683791099e+02 -3.656995432079240e+02 -2.257802895903974e+02 -1.768459985405173e+01 3 5.058528987551350e+02 2.755467101243707e+02 -2.034821274188550e+02 3.722313656043856e+02 4 5.640010328657550e+02 9.015283308355326e+01 4.292624170092524e+02 -3.545467657503340e+02 - ME 1.104115154253218e-04 + ME 8.130044127338102e-04 Event 3 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -28,7 +28,7 @@ Event 3 Batch 0 2 6.758793342627306e+02 1.455349847705337e+02 4.360940220328824e+02 -4.954335945799966e+02 3 3.008019460079605e+02 -1.607139834787174e+02 2.732727402256846e+01 2.527964523704278e+02 4 5.233187197293092e+02 1.517899870818368e+01 -4.634212960554508e+02 2.426371422095687e+02 - ME 4.288074098478053e-05 + ME 7.753277710143621e-05 Event 4 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -36,7 +36,7 @@ Event 4 Batch 0 2 3.540811678028369e+02 5.414642718170588e+01 -3.497885023717100e+02 -9.467915537920108e+00 3 7.415000547748695e+02 1.453779348794601e+00 7.277337852109665e+02 1.422102514562805e+02 4 4.044187774222938e+02 -5.560020653050046e+01 -3.779452828392566e+02 -1.327423359183605e+02 - ME 1.304731284254719e-05 + ME 2.015528729476554e-04 Event 5 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -44,7 +44,7 @@ Event 5 Batch 0 2 4.747467875786874e+02 2.462969907607520e+02 3.713870243947702e+02 1.636886763636381e+02 3 3.438196236093862e+02 -2.056491112573935e+02 2.636029701703988e+02 8.021128807897365e+01 4 6.814335888119255e+02 -4.064787950335840e+01 -6.349899945651691e+02 -2.438999644426124e+02 - ME 1.932390649640220e-04 + ME 6.140777519977192e-04 Event 6 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -52,7 +52,7 @@ Event 6 Batch 0 2 5.623951200922340e+02 4.644673798421034e+02 3.089047820108764e+02 -7.166700647426805e+01 3 2.268243199894467e+02 1.761899852590787e+02 -7.114332369064562e+01 -1.238748914321566e+02 4 7.107805599183188e+02 -6.406573651011822e+02 -2.377614583202307e+02 1.955418979064247e+02 - ME 1.929702539767979e-04 + ME 8.375373201653861e-04 Event 7 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -60,7 +60,7 @@ Event 7 Batch 0 2 4.922243378496302e+02 2.878585072835456e+02 -1.441537488072182e+02 -3.723465794939189e+02 3 2.873990637609374e+02 -5.400981623596619e+01 -8.913204919452846e+01 -2.678369642286231e+02 4 7.203765983894325e+02 -2.338486910475794e+02 2.332857980017467e+02 6.401835437225419e+02 - ME 6.280412585349807e-04 + ME 2.045598717079573e-03 Event 8 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -68,7 +68,7 @@ Event 8 Batch 0 2 3.353309706037128e+02 -7.529439061162444e+01 -4.917829145606096e+01 -3.230466069128648e+02 3 7.169322705461503e+02 -1.597426278178964e+02 -1.460012137440150e+01 6.987567601563110e+02 4 4.477367588501368e+02 2.350370184295208e+02 6.377841283046249e+01 -3.757101532434461e+02 - ME 1.424871539111113e-03 + ME 5.176104304710922e-03 Event 9 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -76,7 +76,7 @@ Event 9 Batch 0 2 2.557626120875720e+02 2.000882245504951e+02 -5.276260741790070e+01 -1.503174088272977e+02 3 7.044202058180884e+02 -6.969679478438196e+02 -1.019614549623775e+02 6.882422911146106e+00 4 5.398171820943397e+02 4.968797232933244e+02 1.547240623802783e+02 1.434349859161515e+02 - ME 1.126010180174107e-05 + ME 6.498215193902510e-05 Event 10 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -84,7 +84,7 @@ Event 10 Batch 0 2 3.466796552973448e+02 1.172124288883391e+02 -1.804077050554743e+02 2.718475489457261e+02 3 5.174471655316495e+02 -1.610456139025784e+02 -4.497410659869822e+02 -1.988689340353916e+02 4 6.358731791710053e+02 4.383318501423926e+01 6.301487710424565e+02 -7.297861491033444e+01 - ME 8.292383053707579e-05 + ME 2.111165581639245e-04 Event 11 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -92,7 +92,7 @@ Event 11 Batch 0 2 5.730783827248506e+02 -3.059484875398849e+01 3.466457017175528e+02 -4.553235612803233e+02 3 4.410994673708892e+02 -3.026218886155176e+02 -1.990641070399019e+01 3.203005892260318e+02 4 4.858221499042607e+02 3.332167373695061e+02 -3.267392910135624e+02 1.350229720542913e+02 - ME 2.195851954305949e-05 + ME 5.129802099928076e-05 Event 12 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -100,7 +100,7 @@ Event 12 Batch 0 2 2.275003875859171e+02 -1.247450244086003e+02 1.654605359856639e+02 9.390376067217456e+01 3 6.138170466352969e+02 3.363961838598331e+02 -2.139358085817026e+01 5.129827374509639e+02 4 6.586825657787861e+02 -2.116511594512328e+02 -1.440669551274935e+02 -6.068864981231385e+02 - ME 3.843244876666358e-03 + ME 5.249882090061186e-02 Event 13 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -108,7 +108,7 @@ Event 13 Batch 0 2 2.867684047377951e+02 7.055192702127012e+01 -2.028354730671929e+02 1.900429278217245e+02 3 6.990707050557395e+02 -5.605742285334717e+02 2.413419117565430e+02 -3.408965629057132e+02 4 5.141608902064654e+02 4.900223015122016e+02 -3.850643868935023e+01 1.508536350839886e+02 - ME 1.780264803426774e-05 + ME 6.422048006176975e-05 Event 14 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -116,7 +116,7 @@ Event 14 Batch 0 2 3.551549262960330e+02 1.090410064132905e+02 3.205839746298526e+02 1.071027348074892e+02 3 5.276349775014137e+02 3.895763694332612e+02 -2.529209653865598e+02 2.503196099590423e+02 4 6.172100962025531e+02 -4.986173758465519e+02 -6.766300924329285e+01 -3.574223447665315e+02 - ME 1.172793340377339e-04 + ME 7.422587439250419e-04 Event 15 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -124,7 +124,7 @@ Event 15 Batch 0 2 5.846731991828425e+02 7.106081559720657e+01 3.900476102503054e+02 4.297161529048979e+02 3 2.829885923647302e+02 -2.767806781033229e+02 5.223342094943639e+01 -2.732525156618249e+01 4 6.323382084524278e+02 2.057198625061163e+02 -4.422810311997417e+02 -4.023909013387152e+02 - ME 2.768931482482754e-04 + ME 1.255922738422332e-03 Event 16 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -132,7 +132,7 @@ Event 16 Batch 0 2 7.471577506095512e+02 1.666056475215676e+02 -5.784682380714994e+02 -4.425627187781379e+02 3 6.589296733908160e+02 -1.235441202519038e+02 5.251239647671507e+02 3.783780998595698e+02 4 9.391257599963087e+01 -4.306152726966400e+01 5.334427330434855e+01 6.418461891856485e+01 - ME 3.619360847906487e-05 + ME 5.526726502577864e-05 Event 17 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -140,7 +140,7 @@ Event 17 Batch 0 2 3.567490993131759e+02 3.856364495163717e+01 -1.708845728849435e+02 -3.107752047682324e+02 3 6.453207560475681e+02 4.468356462873772e+02 2.282834847349605e+02 4.057874246326636e+02 4 4.979301446392561e+02 -4.853992912390142e+02 -5.739891185001719e+01 -9.501221986443127e+01 - ME 3.400819398697452e-05 + ME 1.327369996555111e-04 Event 18 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -148,7 +148,7 @@ Event 18 Batch 0 2 4.856701782481425e+02 2.509110753153842e+02 -3.498523763974107e+02 -2.247720379690150e+02 3 3.014847498930008e+02 -1.059425909901355e+02 -2.435847754696140e+02 -1.426032222348426e+02 4 7.128450718588564e+02 -1.449684843252488e+02 5.934371518670247e+02 3.673752602038576e+02 - ME 1.704840743724005e-04 + ME 1.018512933050835e-03 Event 19 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -156,7 +156,7 @@ Event 19 Batch 0 2 5.848213503304410e+02 -3.141116763848333e+02 -1.950442390378232e+02 4.531088295091878e+02 3 5.769300027107226e+02 5.020221748138873e+02 2.252239828724832e+02 -1.734823378963534e+02 4 3.382486469588368e+02 -1.879104984290540e+02 -3.017974383465995e+01 -2.796264916128346e+02 - ME 1.566312636528492e-04 + ME 4.267017342507976e-03 Event 20 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -164,7 +164,7 @@ Event 20 Batch 0 2 5.550938429889906e+02 -4.478597170519693e+02 -1.958065402362923e+02 -2.630791652090858e+02 3 5.585686897587655e+02 3.351111310173187e+02 -1.360174455686903e+02 4.256744830831253e+02 4 3.863374672522434e+02 1.127485860346507e+02 3.318239858049826e+02 -1.625953178740396e+02 - ME 4.443882992804106e-05 + ME 2.768271682113988e-04 Event 21 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -172,7 +172,7 @@ Event 21 Batch 0 2 6.296556563991993e+02 -3.477135312394776e+02 -1.376147989324512e+02 -5.065804111325866e+02 3 3.137568007204202e+02 1.080474571851863e+02 -2.382188236683311e+02 1.732653140250679e+02 4 5.565875428803801e+02 2.396660740542913e+02 3.758336226007823e+02 3.333150971075189e+02 - ME 2.195742323347977e-05 + ME 5.519034669639832e-05 Event 22 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -180,7 +180,7 @@ Event 22 Batch 0 2 5.583338925767162e+02 2.471586228668332e+02 -1.597599499756147e+02 -4.744745610949311e+02 3 5.378723432497920e+02 9.149532098241385e+00 4.314513680009925e+02 3.210493120152684e+02 4 4.037937641734921e+02 -2.563081549650745e+02 -2.716914180253778e+02 1.534252490796627e+02 - ME 1.393143104564022e-05 + ME 3.705224437539572e-05 Event 23 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -188,7 +188,7 @@ Event 23 Batch 0 2 6.057340011976822e+02 6.848115528115159e+01 -5.207204912425279e+02 -3.017849923015605e+02 3 6.884459352783615e+02 -2.949639632364767e+01 6.680977958792448e+02 1.635026102131439e+02 4 2.058200635239559e+02 -3.898475895750391e+01 -1.473773046367171e+02 1.382823820884168e+02 - ME 1.074117284514867e-05 + ME 2.946248744974782e-05 Event 24 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -196,7 +196,7 @@ Event 24 Batch 0 2 4.702316790647315e+02 -1.210575128627593e+02 4.313728504035306e+02 -1.427598490831810e+02 3 7.180482366151732e+02 1.040047389253588e+02 -7.104588047260974e+02 4.956931953573291e+00 4 3.117200843200960e+02 1.705277393740069e+01 2.790859543225674e+02 1.378029171296075e+02 - ME 5.213387311993420e-06 + ME 3.146557994448562e-05 Event 25 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -204,7 +204,7 @@ Event 25 Batch 0 2 6.261365010744016e+02 -5.354018140499276e+02 -2.095559720530078e+02 2.479477970595020e+02 3 5.483958991041942e+02 5.199465180092641e+02 -9.843995208133505e+01 -1.438862620216537e+02 4 3.254675998214045e+02 1.545529604066345e+01 3.079959241343431e+02 -1.040615350378483e+02 - ME 1.695323153210731e-05 + ME 1.657640191611339e-04 Event 26 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -212,7 +212,7 @@ Event 26 Batch 0 2 4.635816356180677e+02 1.904702824079147e+02 -2.351549941335565e+02 -3.511853259118595e+02 3 3.686385821486527e+02 -2.712527815845713e+02 -6.015354190959191e+01 -2.422764621809819e+02 4 6.677797822332798e+02 8.078249917665664e+01 2.953085360431485e+02 5.934617880928415e+02 - ME 1.052251904460155e-04 + ME 3.250975879010065e-04 Event 27 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -220,7 +220,7 @@ Event 27 Batch 0 2 2.851713673150520e+02 1.387976072955998e+02 1.520424011317634e+02 -1.973348453858079e+02 3 6.747356481771329e+02 2.426633222154767e+02 -4.300238522839811e+02 4.598501858640580e+02 4 5.400929845078149e+02 -3.814609295110765e+02 2.779814511522176e+02 -2.625153404782502e+02 - ME 7.957109124083736e-05 + ME 4.155279516527712e-04 Event 28 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -228,7 +228,7 @@ Event 28 Batch 0 2 1.977804200471008e+02 -1.803202618401224e+02 -8.082809162516925e+01 -8.277519444290659e+00 3 7.197523834069627e+02 3.152541965091956e+02 6.467033971658861e+02 -2.080867841663842e+01 4 5.824671965459364e+02 -1.349339346690732e+02 -5.658753055407169e+02 2.908619786092899e+01 - ME 1.748013159755222e-05 + ME 1.172809031809504e-04 Event 29 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -236,7 +236,7 @@ Event 29 Batch 0 2 6.123364628491765e+02 -3.746492624245139e+02 3.785128791537567e+02 -3.021950929683376e+02 3 4.056577755659300e+02 1.796205570313495e+00 -8.781658530568643e+01 3.960344074293251e+02 4 4.820057615848937e+02 3.728530568542006e+02 -2.906962938480702e+02 -9.383931446098750e+01 - ME 3.085570985177973e-04 + ME 5.496242925842306e-04 Event 30 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -244,7 +244,7 @@ Event 30 Batch 0 2 7.349194950356053e+02 7.241679607953656e+02 1.425637322816703e+01 1.244354634469208e+02 3 7.321421454671275e+02 -7.253765693071590e+02 -2.895970851972107e+01 -9.498573130653318e+01 4 3.293835949726734e+01 1.208608511793152e+00 1.470333529155409e+01 -2.944973214038765e+01 - ME 3.267107835672361e-04 + ME 5.147061682527938e-02 Event 31 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -252,7 +252,7 @@ Event 31 Batch 0 2 1.718338270585457e+02 -1.344914872264095e+02 -1.021614404532311e+02 3.165350011824393e+01 3 6.313115253715935e+02 -2.849940593920691e+02 -7.916450257599642e+01 -5.577325610990745e+02 4 6.968546475698608e+02 4.194855466184786e+02 1.813259430292275e+02 5.260790609808306e+02 - ME 1.685680846028125e-04 + ME 4.645345268703414e-04 Event 32 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -260,7 +260,7 @@ Event 32 Batch 0 2 7.235176898898732e+02 -4.762113006241282e+02 -2.880822916693121e+01 5.439400065022983e+02 3 6.603902828461299e+02 4.672103814637360e+02 1.031050210016798e+02 -4.551913221650266e+02 4 1.160920272639969e+02 9.000919160392018e+00 -7.429679183474862e+01 -8.874868433727177e+01 - ME 2.173072900368875e-04 + ME 4.476006843186700e-03 Event 33 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -268,7 +268,7 @@ Event 33 Batch 0 2 4.786737271642286e+02 2.009638309376703e+02 4.090184839380260e+02 1.464443769121513e+02 3 3.795793219608408e+02 -6.057523839522271e+00 -8.244277697544294e+01 3.704685635647950e+02 4 6.417469508749314e+02 -1.949063070981495e+02 -3.265757069625828e+02 -5.169129404769461e+02 - ME 3.322437827682699e-03 + ME 1.351709676586880e-02 Event 34 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -276,7 +276,7 @@ Event 34 Batch 0 2 6.621583515140109e+02 -5.051303032557109e+02 -1.429543729176959e+02 4.035605363216953e+02 3 3.008522892707525e+02 8.677543723835062e+01 2.726747894692539e+02 -9.290092916351111e+01 4 5.369893592152367e+02 4.183548660173603e+02 -1.297204165515579e+02 -3.106596071581844e+02 - ME 9.294666462955388e-05 + ME 6.460854093057828e-04 Event 35 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -284,7 +284,7 @@ Event 35 Batch 0 2 6.158114977149372e+02 2.502256147979830e+02 4.233348779616202e+00 5.626659943296695e+02 3 1.476397433483021e+02 -1.670550278282843e+01 -6.055370982200890e+01 1.336101351676488e+02 4 7.365487589367605e+02 -2.335201120151546e+02 5.632036104239269e+01 -6.962761294973184e+02 - ME 5.450893768264864e-01 + ME 2.101231899117793e+00 Event 36 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -292,7 +292,7 @@ Event 36 Batch 0 2 7.182456511154913e+02 -7.463771462544163e+01 -6.667773110518942e+02 2.563475070450518e+02 3 4.860008755751825e+02 -7.840660561780868e+01 4.141081959217036e+02 -2.419992919944378e+02 4 2.957534733093268e+02 1.530443202432501e+02 2.526691151301903e+02 -1.434821505061448e+01 - ME 1.793136635525090e-05 + ME 9.644531209480271e-05 Event 37 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -300,7 +300,7 @@ Event 37 Batch 0 2 5.672182018814327e+02 -2.031706828392718e+00 -5.267408190306547e+02 2.104197478372323e+02 3 4.664069288608281e+02 3.712365792892206e+02 2.604523782658950e+02 -1.090109358856581e+02 4 4.663748692577387e+02 -3.692048724608279e+02 2.662884407647597e+02 -1.014088119515743e+02 - ME 1.885829354904198e-05 + ME 1.216876552012178e-04 Event 38 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -308,7 +308,7 @@ Event 38 Batch 0 2 5.068057345787187e+02 4.883513201966852e+02 -7.570036138649985e+01 -1.124032737511800e+02 3 3.871140338254017e+02 -1.153787089711745e+02 -3.599073977747533e+02 -8.373585688177315e+01 4 6.060802315958797e+02 -3.729726112255107e+02 4.356077591612532e+02 1.961391306329531e+02 - ME 2.004468492837133e-05 + ME 1.006736553113524e-04 Event 39 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -316,7 +316,7 @@ Event 39 Batch 0 2 4.960337392567769e+02 -3.669089247616476e+02 2.651961920161227e+02 -2.027271347192069e+02 3 2.837821967046824e+02 -2.822567153069604e+02 -2.935613327724534e+01 -1.303560381865560e+00 4 7.201840640385411e+02 6.491656400686079e+02 -2.358400587388775e+02 2.040306951010725e+02 - ME 2.738639406673165e-04 + ME 1.372807525012575e-03 Event 40 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -324,7 +324,7 @@ Event 40 Batch 0 2 3.080730228651936e+02 -3.065830270999447e+02 -2.484308296331460e+01 1.728167064871203e+01 3 6.842346640746094e+02 4.630487823766367e+02 8.554554725666550e+01 -4.964321303112498e+02 4 5.076923130601962e+02 -1.564657552766919e+02 -6.070246429335075e+01 4.791504596625378e+02 - ME 4.316353181637933e-05 + ME 4.192363154074847e-05 Event 41 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -332,7 +332,7 @@ Event 41 Batch 0 2 1.602650851118221e+02 -1.258781096038287e+02 -9.817642232798531e+01 1.417706342452912e+01 3 7.146392966623014e+02 6.799675591776853e+02 -1.019163870176435e+02 1.948499239342933e+02 4 6.250956182258764e+02 -5.540894495738563e+02 2.000928093456288e+02 -2.090269873588226e+02 - ME 6.118266190948034e-05 + ME 4.523507186168379e-04 Event 42 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -340,7 +340,7 @@ Event 42 Batch 0 2 1.687893235969910e+02 1.289401357197518e+02 4.788693514682045e+01 9.783209393213438e+01 3 7.042017295435162e+02 -1.022058447296739e+02 -6.640064324330017e+02 -2.110675220936915e+02 4 6.270089468594927e+02 -2.673429099007782e+01 6.161194972861812e+02 1.132354281615572e+02 - ME 4.091574289077424e-05 + ME 1.686356189272381e-04 Event 43 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -348,7 +348,7 @@ Event 43 Batch 0 2 4.729783670130408e+02 -7.983817933050123e+01 9.052957805204315e+01 4.573169538528310e+02 3 5.638402597824536e+02 4.785250044669658e+02 7.435095949863268e+01 -2.887933404236804e+02 4 4.631813732045056e+02 -3.986868251364646e+02 -1.648805375506758e+02 -1.685236134291506e+02 - ME 2.654067897204875e-04 + ME 5.938757690519573e-04 Event 44 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -356,7 +356,7 @@ Event 44 Batch 0 2 1.774791104122977e+02 -1.952605982635784e+01 6.371003613266313e+01 1.644949814321787e+02 3 7.194816205691247e+02 -3.678871192485065e+02 2.644831693887214e+01 -6.177486190667772e+02 4 6.030392690185777e+02 3.874131790748646e+02 -9.015835307153536e+01 4.532536376345985e+02 - ME 1.390282437939369e-04 + ME 2.092333697371024e-04 Event 45 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -364,7 +364,7 @@ Event 45 Batch 0 2 7.477488480180839e+02 -3.787655987618923e+02 1.634662296474455e+02 6.236535517992064e+02 3 7.458113398274099e+02 3.819163358711198e+02 -1.661042992235261e+02 -6.186952632673017e+02 4 6.439812154506046e+00 -3.150737109227506e+00 2.638069576080606e+00 -4.958288531904773e+00 - ME 4.591622113024210e-03 + ME 9.377954359926730e-02 Event 46 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -372,7 +372,7 @@ Event 46 Batch 0 2 3.243146757688279e+02 -4.392587631431587e+00 -2.496903827548322e+02 -2.069188895501946e+02 3 5.341608950426614e+02 -2.704482657861201e+02 2.711825143656835e+02 -3.723515022507137e+02 4 6.415244291885106e+02 2.748408534175518e+02 -2.149213161085120e+01 5.792703918009084e+02 - ME 7.845213441237594e-05 + ME 1.879047912263320e-04 Event 47 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -380,7 +380,7 @@ Event 47 Batch 0 2 6.742198761450968e+02 -3.282965096491567e+02 5.301803926793563e+02 -2.563251730900704e+02 3 6.484148720042493e+02 3.527030795571956e+02 -3.975273148506379e+02 3.715029176935211e+02 4 1.773652518506536e+02 -2.440656990803885e+01 -1.326530778287185e+02 -1.151777446034508e+02 - ME 5.254395938575492e-05 + ME 1.136665455996279e-03 Event 48 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -388,7 +388,7 @@ Event 48 Batch 0 2 7.321401810535270e+02 -1.843482647928687e+02 4.412348098999295e+02 5.543976952635381e+02 3 7.293058265076229e+02 2.182722651304250e+02 -4.435200216702997e+02 -5.362221528717154e+02 4 3.855399243885009e+01 -3.392400033755636e+01 2.285211770370227e+00 -1.817554239182278e+01 - ME 2.330290263553363e-04 + ME 2.278442596973106e-03 Event 49 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -396,7 +396,7 @@ Event 49 Batch 0 2 3.511117284856090e+02 -3.272266866652174e+02 5.199533974843238e+01 1.161835877338140e+02 3 7.326526490901410e+02 6.615045961628415e+02 -2.993354007364775e+02 -9.792799058578566e+01 4 4.162356224242500e+02 -3.342779094976241e+02 2.473400609880451e+02 -1.825559714802838e+01 - ME 7.863589115869630e-06 + ME 8.806759903737244e-05 Event 50 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -404,7 +404,7 @@ Event 50 Batch 0 2 7.322170903075255e+02 2.740692406080844e+02 1.952596610981929e+01 -6.787095515302592e+02 3 3.078559130669522e+02 -1.663333363406682e+02 8.625456119089935e+01 2.442716420418760e+02 4 4.599269966255216e+02 -1.077359042674159e+02 -1.057805273007185e+02 4.344379094883832e+02 - ME 6.765758192049922e-05 + ME 7.579426018596712e-05 Event 51 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -412,7 +412,7 @@ Event 51 Batch 0 2 3.473696038265160e+02 -2.922314643158454e+02 -6.759614889845234e+01 -1.752060888796554e+02 3 5.389399151999496e+02 -2.449040872454050e+02 9.346474502284556e+01 4.708954891311219e+02 4 6.136904809735339e+02 5.371355515612503e+02 -2.586859612439322e+01 -2.956894002514666e+02 - ME 2.035652280642710e-04 + ME 4.687828430739845e-04 Event 52 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -420,7 +420,7 @@ Event 52 Batch 0 2 6.818614816439094e+02 5.970116833066725e+02 3.013730734325877e+02 1.329902280423528e+02 3 2.108623144448950e+02 -4.198344769951654e+00 -1.698802183673395e+02 -1.248439063859965e+02 4 6.072762039111957e+02 -5.928133385367207e+02 -1.314928550652483e+02 -8.146321656356344e+00 - ME 4.047005152694340e-05 + ME 1.636869658416981e-04 Event 53 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -428,7 +428,7 @@ Event 53 Batch 0 2 5.157714002491656e+02 -5.140718537651751e+02 -4.182413977701254e+01 1.003899065692042e+00 3 5.148181840855221e+02 2.868792199999327e+02 1.974924151010656e+02 3.791237552236646e+02 4 4.694104156653124e+02 2.271926337652422e+02 -1.556682753240530e+02 -3.801276542893567e+02 - ME 1.547751010871262e-04 + ME 3.182294022992135e-03 Event 54 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -436,7 +436,7 @@ Event 54 Batch 0 2 6.433410767101752e+02 2.586883950027282e+02 -5.809813083922761e+02 9.710187728524583e+01 3 6.928799734080563e+02 -1.579832568796111e+02 6.405510983559769e+02 -2.117031848853746e+02 4 1.637789498817686e+02 -1.007051381231171e+02 -5.956978996370073e+01 1.146013076001288e+02 - ME 1.302720215079095e-05 + ME 3.280140142776471e-05 Event 55 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -444,7 +444,7 @@ Event 55 Batch 0 2 7.193759752058201e+02 -3.536444481659258e+02 -7.212523476050659e+01 -6.222823703878202e+02 3 5.307053661742267e+02 2.409461639849982e+02 1.900944302490854e+02 4.329633233142391e+02 4 2.499186586199529e+02 1.126982841809279e+02 -1.179691954885788e+02 1.893190470735813e+02 - ME 3.087450123310173e-05 + ME 3.939174164528502e-05 Event 56 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -452,7 +452,7 @@ Event 56 Batch 0 2 3.858864959547013e+02 1.815174721437793e+02 3.218581876578407e+02 -1.112074732396182e+02 3 4.484505297447187e+02 -3.244105157450006e+02 2.934585578803474e+02 -9.873079412811623e+01 4 6.656629743005793e+02 1.428930436012212e+02 -6.153167455381879e+02 2.099382673677345e+02 - ME 4.275995533811995e-05 + ME 2.326138625268126e-04 Event 57 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -460,7 +460,7 @@ Event 57 Batch 0 2 5.284589752749192e+02 3.868194647882293e+02 -1.709996888155517e+02 3.168575336559793e+02 3 6.299868555278971e+02 -1.587414880613579e+02 2.327134172236622e+02 -5.634971548731005e+02 4 3.415541691971835e+02 -2.280779767268714e+02 -6.171372840811043e+01 2.466396212171210e+02 - ME 2.211478424702745e-05 + ME 3.474853710074164e-05 Event 58 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -468,7 +468,7 @@ Event 58 Batch 0 2 6.172037319760957e+02 -2.246119436411400e+02 -2.286037628748728e+01 5.744278237820342e+02 3 5.117934503257735e+02 1.262762853074207e+02 3.215736628881853e+02 -3.775939815489577e+02 4 3.710028176981306e+02 9.833565833371921e+01 -2.987132866006979e+02 -1.968338422330765e+02 - ME 1.857727050583390e-04 + ME 6.183305374210038e-04 Event 59 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -476,7 +476,7 @@ Event 59 Batch 0 2 7.388935626701858e+02 -3.912134623809441e+02 -5.457789630286015e+02 3.082872805076099e+02 3 1.936051438730608e+02 1.561492575196544e+02 8.304673385628061e+01 -7.876294246644987e+01 4 5.675012934567535e+02 2.350642048612896e+02 4.627322291723209e+02 -2.295243380411600e+02 - ME 6.745345781245190e-05 + ME 4.116991424436793e-04 Event 60 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -484,7 +484,7 @@ Event 60 Batch 0 2 7.258141426633659e+02 -5.584991156701968e+02 1.635894950857984e+02 4.337319270970709e+02 3 2.789580074371136e+02 2.331554478032953e+02 6.512410160032128e+01 -1.386180308029247e+02 4 4.952278498995201e+02 3.253436678669015e+02 -2.287135966861195e+02 -2.951138962941461e+02 - ME 9.170244877267536e-05 + ME 7.295672680059989e-04 Event 61 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -492,15 +492,15 @@ Event 61 Batch 0 2 5.906141202026897e+02 4.485275282318680e+02 -2.043613424290570e+02 3.253990429020988e+02 3 4.163572165237975e+02 -4.021600557528675e+02 -4.112755461437413e+01 9.964509802161204e+01 4 4.930286632735124e+02 -4.636747247900051e+01 2.454888970434311e+02 -4.250441409237108e+02 - ME 1.836685601489136e-04 + ME 5.845307122272604e-03 Event 62 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 1 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 -7.500000000000000e+02 2 7.346180891175762e+02 3.693463141798367e+02 7.549194961263061e+01 -6.305140780380819e+02 3 4.420621433230785e+02 -2.806743363126464e+02 3.467380983154045e+01 3.397625382625571e+02 - 4 3.233197675593452e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755248e+02 - ME 3.490896135533686e-05 + 4 3.233197675593453e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755249e+02 + ME 3.963631774242112e-05 Event 63 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -508,7 +508,7 @@ Event 63 Batch 0 2 6.451039732729313e+02 -2.415045377667665e+02 1.990362537024482e+02 -5.641092662620230e+02 3 3.260870385294104e+02 2.061141051805976e+02 -2.496695602716584e+02 3.892098426606745e+01 4 5.288089881976584e+02 3.539043258616898e+01 5.063330656921013e+01 5.251882819959555e+02 - ME 4.428689394331114e-04 + ME 4.832224458906289e-04 Event 64 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -516,7 +516,7 @@ Event 64 Batch 0 2 5.275973380665291e+02 -6.064553482667328e+01 4.309976929667101e+02 -2.981980196075213e+02 3 5.799838776791826e+02 3.279821268626862e+02 -1.824214634122377e+02 4.421893627315650e+02 4 3.924187842542880e+02 -2.673365920360130e+02 -2.485762295544724e+02 -1.439913431240437e+02 - ME 4.205989960223865e-05 + ME 2.175617604507715e-04 Event 65 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -524,7 +524,7 @@ Event 65 Batch 0 2 6.480172869826541e+02 2.720879118036237e+02 -5.153900904044360e+02 -2.833154199679406e+02 3 7.075023253568394e+02 -3.440299289242928e+02 4.709796137500282e+02 4.004761563708322e+02 4 1.444803876605064e+02 7.194201712066916e+01 4.441047665440794e+01 -1.171607364028916e+02 - ME 1.103463366798231e-04 + ME 4.989956280474397e-03 Event 66 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -532,7 +532,7 @@ Event 66 Batch 0 2 5.472978185025795e+02 4.857452785131266e+02 -2.223654169683454e+02 -1.189119332799752e+02 3 3.203062148499983e+02 1.169702135976477e+02 2.922172461416276e+02 -5.935588816501102e+01 4 6.323959666474225e+02 -6.027154921107744e+02 -6.985182917328234e+01 1.782678214449862e+02 - ME 2.913920636000223e-05 + ME 1.346850069104626e-04 Event 67 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -540,7 +540,7 @@ Event 67 Batch 0 2 4.264671493042950e+02 1.195959046886511e+02 -2.647539231733031e+02 3.122121220929446e+02 3 5.059969655247565e+02 3.777175441887567e+02 -7.608313561896731e+00 -3.366073372596325e+02 4 5.675358851709483e+02 -4.973134488774080e+02 2.723622367352000e+02 2.439521516668857e+01 - ME 4.009347519102052e-05 + ME 9.763221977220593e-05 Event 68 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -548,7 +548,7 @@ Event 68 Batch 0 2 5.996105691520872e+02 -3.814725562071957e+02 -3.417794545715573e+02 3.117664637712124e+02 3 2.164196744806214e+02 1.292759463548889e+02 -1.184749651041615e+02 1.268419798013013e+02 4 6.839697563672917e+02 2.521966098523068e+02 4.602544196757188e+02 -4.386084435725137e+02 - ME 6.175473672610461e-04 + ME 2.936083529685707e-03 Event 69 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -556,7 +556,7 @@ Event 69 Batch 0 2 4.950546755511076e+02 -1.873718558932053e+02 -4.578972175289678e+02 -1.735101101888631e+01 3 4.768584394819691e+02 -1.830244097668608e+02 2.985566003539791e+02 -3.236664843936508e+02 4 5.280868849669230e+02 3.703962656600661e+02 1.593406171749887e+02 3.410174954125370e+02 - ME 1.367292435278724e-05 + ME 5.234212626720279e-05 Event 70 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -564,7 +564,7 @@ Event 70 Batch 0 2 6.918343395272258e+02 6.895733556028865e+02 -5.391072441382606e+01 -1.473005040127906e+01 3 2.169590284692678e+02 -1.127375202028747e+02 1.807969800614662e+02 4.091361110301506e+01 4 5.912066320035063e+02 -5.768358354000119e+02 -1.268862556476402e+02 -2.618356070173603e+01 - ME 3.526540789264872e-05 + ME 1.591740981760110e-04 Event 71 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -572,7 +572,7 @@ Event 71 Batch 0 2 5.156371334918733e+02 1.547202099034306e+02 -4.807172487652236e+02 1.041836686949964e+02 3 3.718518305526428e+02 -8.969821893462726e+01 -7.521366892975188e+01 -3.529460545344468e+02 4 6.125110359554843e+02 -6.502199096880338e+01 5.559309176949756e+02 2.487623858394504e+02 - ME 2.860782472746935e-05 + ME 1.125100552069616e-04 Event 72 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -580,7 +580,7 @@ Event 72 Batch 0 2 2.110577464974889e+02 5.009520239746097e+01 -1.453533690489527e+02 -1.445968227848547e+02 3 7.317124633441161e+02 -4.429659627226336e+02 5.264774879404380e+02 2.490095170354977e+02 4 5.572297901583943e+02 3.928707603251725e+02 -3.811241188914850e+02 -1.044126942506430e+02 - ME 2.666441446531882e-05 + ME 1.823320413479066e-04 Event 73 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -588,7 +588,7 @@ Event 73 Batch 0 2 3.932257450488246e+02 3.105005764664288e+01 -2.932679039283983e+02 2.601082794045340e+02 3 5.658879124646472e+02 3.645905401293642e+02 4.244364556305355e+02 8.459646951004230e+01 4 5.408863424865281e+02 -3.956405977760074e+02 -1.311685517021372e+02 -3.447047489145762e+02 - ME 7.825486685913998e-05 + ME 8.953763196089171e-04 Event 74 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -596,7 +596,7 @@ Event 74 Batch 0 2 1.374854102925440e+02 7.785209805930555e+01 4.289805712042688e+01 1.048858692406466e+02 3 6.381281910764947e+02 -1.004137270491618e+02 -1.591026937267357e+02 6.097630724433484e+02 4 7.243863986309617e+02 2.256162898985645e+01 1.162046366063089e+02 -7.146489416839951e+02 - ME 1.919068868336380e+00 + ME 1.395531292378326e+01 Event 75 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -604,7 +604,7 @@ Event 75 Batch 0 2 5.936883054156938e+02 -3.438525101293572e+00 -2.706855443967301e+02 5.283780053968293e+02 3 5.912298912592892e+02 1.109657062166288e+02 4.832067437414102e+02 -3.221034603433170e+02 4 3.150818033250173e+02 -1.075271811153352e+02 -2.125211993446803e+02 -2.062745450535123e+02 - ME 1.642862842910461e-04 + ME 1.379908325625592e-03 Event 76 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -612,7 +612,7 @@ Event 76 Batch 0 2 6.619486867997672e+02 2.801967015359571e+01 2.136411519593737e+02 6.258980909300584e+02 3 1.201252731414031e+02 2.274423842261747e+01 -8.754996679960182e+01 7.904292618103446e+01 4 7.179260400588295e+02 -5.076390857621322e+01 -1.260911851597719e+02 -7.049410171110928e+02 - ME 7.362202483972824e-01 + ME 5.870483941147637e+00 Event 77 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -620,7 +620,7 @@ Event 77 Batch 0 2 7.456676259451606e+02 -7.346624001550109e+02 6.511229493320701e+01 -1.097804865615983e+02 3 1.284204120828029e+02 1.251494694834492e+02 2.867183268690428e+01 2.708973588335753e+00 4 6.259119619720373e+02 6.095129306715618e+02 -9.378412762011118e+01 1.070715129732624e+02 - ME 4.400761364703354e-05 + ME 1.662775178233579e-04 Event 78 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -628,7 +628,7 @@ Event 78 Batch 0 2 7.040158920877628e+02 6.911264613612161e+02 -6.659640240533211e+01 -1.163937709034254e+02 3 5.185438503615327e+02 -4.976050220224222e+02 -1.270913363611937e+02 7.158742227342900e+01 4 2.774402575507044e+02 -1.935214393387939e+02 1.936877387665258e+02 4.480634862999637e+01 - ME 9.352750539306009e-06 + ME 5.328004946641866e-05 Event 79 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -636,7 +636,7 @@ Event 79 Batch 0 2 6.777589592768838e+02 1.742725197144059e+02 -4.776543849198212e+01 6.532264221831092e+02 3 5.725002211294488e+02 -1.786302554544233e+02 -1.627852110918317e+02 -5.189881598643107e+02 4 2.497408195936665e+02 4.357735740017474e+00 2.105506495838138e+02 -1.342382623187985e+02 - ME 3.598558866345749e-04 + ME 9.179311580246363e-04 Event 80 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -644,7 +644,7 @@ Event 80 Batch 0 2 6.240819586861880e+02 4.679310297228965e+02 -4.118464023828053e+02 -3.002304821964348e+01 3 6.688675489057649e+02 -5.494372353172420e+02 3.251429131208653e+02 1.994607943266771e+02 4 2.070504924080468e+02 8.150620559434545e+01 8.670348926194001e+01 -1.694377461070337e+02 - ME 5.382869847396148e-05 + ME 3.575286400583300e-03 Event 81 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -652,7 +652,7 @@ Event 81 Batch 0 2 5.198056748722776e+02 1.034797897616987e+02 -2.885605608993972e+02 4.197888462474007e+02 3 5.672098642055398e+02 -4.160331805498524e+02 2.087659545613757e+01 -3.849773895903518e+02 4 4.129844609221831e+02 3.125533907881537e+02 2.676839654432596e+02 -3.481145665704891e+01 - ME 3.612255741613163e-05 + ME 1.018936778946332e-04 Event 82 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -660,7 +660,7 @@ Event 82 Batch 0 2 2.057598609140514e+02 6.385349666266659e+01 -2.765433460911293e+01 1.936364870179372e+02 3 6.235840147705873e+02 4.654039114453895e+02 -3.828889383639962e+02 -1.601633028106901e+02 4 6.706561243153629e+02 -5.292574081080552e+02 4.105432729731107e+02 -3.347318420724690e+01 - ME 3.172622561805068e-04 + ME 6.930850923220120e-04 Event 83 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -668,7 +668,7 @@ Event 83 Batch 0 2 6.583322583736492e+02 1.865539504254553e+02 -1.926584839569474e+02 6.012334775737429e+02 3 3.620902826842561e+02 -3.107067244571256e+02 -1.177956631152976e+01 -1.855584705935048e+02 4 4.795774589420946e+02 1.241527740316703e+02 2.044380502684771e+02 -4.156750069802382e+02 - ME 6.756528802944365e-04 + ME 8.385116111585099e-03 Event 84 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -676,7 +676,7 @@ Event 84 Batch 0 2 4.849329564663161e+02 -2.622178945286150e+02 4.068620488841210e+02 -2.941124332559817e+01 3 4.737588937677760e+02 6.014532316188546e+01 -1.333934272225749e+02 4.505954095412368e+02 4 5.413081497659077e+02 2.020725713667296e+02 -2.734686216615461e+02 -4.211841662156386e+02 - ME 1.017468409980153e-03 + ME 5.162990427398554e-03 Event 85 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -684,7 +684,7 @@ Event 85 Batch 0 2 7.085742632080854e+02 -2.174614026040270e+02 -5.283468657604088e+02 -4.190914152061853e+02 3 5.315764222715953e+02 8.528530557199829e+00 3.820092234108129e+02 3.695533927738615e+02 4 2.598493145203187e+02 2.089328720468272e+02 1.463376423495959e+02 4.953802243232388e+01 - ME 1.894143727100354e-05 + ME 6.335517668355978e-05 Event 86 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -692,7 +692,7 @@ Event 86 Batch 0 2 1.724500140939190e+02 1.231518677708316e+02 -1.121928207497684e+01 1.201946443701656e+02 3 7.028475062724231e+02 -6.467096040851287e+01 -4.553168759141600e+02 -5.315061866629339e+02 4 6.247024796336580e+02 -5.848090736231883e+01 4.665361579891369e+02 4.113115422927684e+02 - ME 5.311384036847167e-05 + ME 1.165531323127631e-04 Event 87 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -700,7 +700,7 @@ Event 87 Batch 0 2 1.942099203196796e+02 -7.751148196958454e+01 -1.356691819650310e+02 -1.153400900745028e+02 3 7.314670447251594e+02 1.724617634710876e+02 7.020747158546045e+02 1.113196793791551e+02 4 5.743230349551606e+02 -9.495028150150301e+01 -5.664055338895735e+02 4.020410695347637e+00 - ME 1.874087134673149e-05 + ME 1.237609879052555e-04 Event 88 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -708,7 +708,7 @@ Event 88 Batch 0 2 6.382497024023744e+02 2.632142028760094e+02 -5.613974181649784e+02 1.513733956108635e+02 3 3.997044228265544e+02 -5.264940326118349e+01 3.435187961344461e+02 1.974500004195773e+02 4 4.620458747710724e+02 -2.105647996148253e+02 2.178786220305324e+02 -3.488233960304407e+02 - ME 9.699609186666195e-05 + ME 1.863821317258467e-03 Event 89 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -716,7 +716,7 @@ Event 89 Batch 0 2 1.419006640093282e+02 -8.677155154367878e+01 6.457545216231642e+01 -9.185046144153740e+01 3 7.131224514048055e+02 5.460003286026870e+02 -4.154556538506974e+02 -1.944836022569670e+02 4 6.449768845858670e+02 -4.592287770590082e+02 3.508802016883808e+02 2.863340636985044e+02 - ME 2.974199953519439e-05 + ME 1.136115495374629e-04 Event 90 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -724,7 +724,7 @@ Event 90 Batch 0 2 5.730615760623938e+02 -6.017783679015001e+01 -5.202921970507185e+02 -2.325386583054727e+02 3 5.389913703864468e+02 -6.302812531165206e+01 2.446311215742109e+02 4.761247390423042e+02 4 3.879470535511588e+02 1.232059621018019e+02 2.756610754765076e+02 -2.435860807368315e+02 - ME 1.667772733247344e-04 + ME 1.094721025518881e-03 Event 91 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -732,7 +732,7 @@ Event 91 Batch 0 2 4.546745139784350e+02 -1.470341619195494e+02 -1.726383255301703e+02 -3.940886669878754e+02 3 5.110976540119647e+02 -2.482119727393537e+02 -1.865817698532448e+02 4.059542728975803e+02 4 5.342278320096005e+02 3.952461346589030e+02 3.592200953834151e+02 -1.186560590970480e+01 - ME 4.420313882846059e-05 + ME 8.789722587847313e-05 Event 92 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -740,7 +740,7 @@ Event 92 Batch 0 2 6.683728375977241e+02 -1.148152650923627e+02 3.458291789782991e+02 5.603051703379153e+02 3 2.872567998557088e+02 1.635098024620329e+02 7.847331657016402e+01 -2.227620976482501e+02 4 5.443703625465666e+02 -4.869453736967034e+01 -4.243024955484631e+02 -3.375430726896653e+02 - ME 2.265252332392545e-04 + ME 8.270083568815311e-04 Event 93 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -748,7 +748,7 @@ Event 93 Batch 0 2 5.666948073002088e+02 5.408074886689032e+01 5.639942928586390e+02 -1.134525653745258e+01 3 6.168025492529713e+02 2.439040545997395e+02 -5.541969602989467e+02 1.175666879272316e+02 4 3.165026434468199e+02 -2.979848034666298e+02 -9.797332559692304e+00 -1.062214313897791e+02 - ME 1.251778043268437e-05 + ME 1.664960428447917e-04 Event 94 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -756,7 +756,7 @@ Event 94 Batch 0 2 4.964349376711385e+02 8.445930034540567e+01 -2.409007074648561e+02 -4.257712097695705e+02 3 5.660980232871289e+02 1.373833465612049e+02 5.210669225216058e+02 1.734417778711397e+02 4 4.374670390417324e+02 -2.218426469066104e+02 -2.801662150567495e+02 2.523294318984307e+02 - ME 1.007141026120618e-05 + ME 3.431641292834382e-05 Event 95 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -764,7 +764,7 @@ Event 95 Batch 0 2 7.117074025057361e+02 -3.227984571262278e+02 4.276971164854593e+02 -4.684055501468919e+02 3 1.264078228725325e+02 8.675876182178401e+01 5.074873328843479e+01 7.665781760618943e+01 4 6.618847746217315e+02 2.360396953044439e+02 -4.784458497738940e+02 3.917477325407025e+02 - ME 8.653822330208906e-05 + ME 2.121249861094822e-04 Event 96 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -772,7 +772,7 @@ Event 96 Batch 0 2 7.329769441659936e+02 -9.642859092211874e+01 6.903981466332597e+02 -2.265107649915406e+02 3 3.937873938465678e+02 -4.837693103302091e+01 -3.847118583018795e+02 6.873841850241256e+01 4 3.732356619874385e+02 1.448055219551397e+02 -3.056862883313802e+02 1.577723464891279e+02 - ME 9.822975749896163e-06 + ME 3.473186069800973e-05 Event 97 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -780,7 +780,7 @@ Event 97 Batch 0 2 3.394989963266853e+01 6.003767577498499e+00 -2.078495220615399e+01 2.616364312804199e+01 3 7.377311980366451e+02 -5.308290258162607e+02 4.681853362634530e+02 2.080152802450354e+02 4 7.283189023306861e+02 5.248252582387622e+02 -4.474003840572991e+02 -2.341789233730774e+02 - ME 2.729355315721549e-03 + ME 2.063600678642283e-02 Event 98 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -788,7 +788,7 @@ Event 98 Batch 0 2 2.496912687496082e+02 -2.485814905959506e+02 -5.435228288348340e-01 -2.350907922099247e+01 3 7.458289852530976e+02 7.373315781279124e+02 9.801365830907572e+01 -5.473885205171283e+01 4 5.044797459972945e+02 -4.887500875319618e+02 -9.747013548024091e+01 7.824793127270530e+01 - ME 8.091578731489026e-06 + ME 6.800308216903296e-05 Event 99 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -796,7 +796,7 @@ Event 99 Batch 0 2 1.698125854886770e+02 8.336002034290719e+01 8.774494220182726e+01 -1.191144253093525e+02 3 6.496622934125946e+02 5.714329899004554e+02 -6.230613627727958e+01 3.027265745152471e+02 4 6.805251210987285e+02 -6.547930102433627e+02 -2.543880592454771e+01 -1.836121492058947e+02 - ME 1.856310681395454e-04 + ME 6.115029137493471e-04 Event 100 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -804,7 +804,7 @@ Event 100 Batch 0 2 6.141460480129781e+02 -5.842473718080511e+02 -5.092222124447417e+01 1.823110095657221e+02 3 3.909476383151783e+02 2.539115798088024e+02 -2.930333502072385e+02 -5.000421191795168e+01 4 4.949063136718440e+02 3.303357919992488e+02 3.439555714517127e+02 -1.323067976477707e+02 - ME 2.380755205932631e-05 + ME 1.550407956048336e-04 Event 101 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -812,7 +812,7 @@ Event 101 Batch 0 2 7.469346538870473e+02 3.524232024688497e+02 -1.488240016505349e+02 -6.415299525912136e+02 3 6.502268999047169e+02 -2.777200960400715e+02 1.351761574712158e+02 5.721835160737410e+02 4 1.028384462082358e+02 -7.470310642877820e+01 1.364784417931910e+01 6.934643651747267e+01 - ME 7.777208667430486e-05 + ME 1.080054053054822e-04 Event 102 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -820,7 +820,7 @@ Event 102 Batch 0 2 7.426790432885583e+02 -3.141071077544728e+02 6.615000409077074e+02 1.238005738162371e+02 3 6.735764515788642e+01 -4.139700837311957e+00 -5.533298776898177e+01 -3.818606686673834e+01 4 6.899633115535552e+02 3.182468085917849e+02 -6.061670531387255e+02 -8.561450694949879e+01 - ME 1.796768498680773e-04 + ME 6.292262541994918e-04 Event 103 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -828,7 +828,7 @@ Event 103 Batch 0 2 4.837874798175253e+02 -2.731724972668680e+02 1.247027290420595e+02 -3.793103501549069e+02 3 4.466406321977809e+02 -2.904538080082218e+02 -1.536665846758871e+02 3.025078850172422e+02 4 5.695718879846930e+02 5.636263052750895e+02 2.896385563382777e+01 7.680246513766473e+01 - ME 2.998858312831636e-05 + ME 8.140894767450013e-05 Event 104 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -836,7 +836,7 @@ Event 104 Batch 0 2 5.788466572679498e+02 3.572346730226224e+02 -3.682137844992378e+02 2.680773207965347e+02 3 2.925711988065158e+02 2.155069407513812e+02 1.697995838195863e+02 -1.016010147279926e+02 4 6.285821439255348e+02 -5.727416137740034e+02 1.984142006796517e+02 -1.664763060685422e+02 - ME 7.634200862908681e-05 + ME 2.849770726480251e-04 Event 105 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -844,7 +844,7 @@ Event 105 Batch 0 2 3.361125455083114e+02 2.619004058447622e+02 4.338373361330959e+01 -2.061496357605196e+02 3 5.299016201311088e+02 2.892532450564946e+02 2.091058919093095e+02 3.916669672191841e+02 4 6.339858343605800e+02 -5.511536509012568e+02 -2.524896255226191e+02 -1.855173314586645e+02 - ME 1.089382545947932e-04 + ME 2.866662317167052e-04 Event 106 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -852,7 +852,7 @@ Event 106 Batch 0 2 3.578050478863485e+02 -2.265838270225943e+02 2.740910124726658e+02 -3.947579646386072e+01 3 5.202885196186892e+02 1.412729374205232e+02 1.631578432376887e+02 4.734148487210871e+02 4 6.219064324949621e+02 8.531088960207101e+01 -4.372488557103545e+02 -4.339390522572265e+02 - ME 4.548955126640399e-04 + ME 1.912263829178338e-03 Event 107 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -860,7 +860,7 @@ Event 107 Batch 0 2 5.409822745993889e+02 9.278463733038997e+01 5.102180459532771e+02 -1.540466750365499e+02 3 2.501852297905710e+02 1.682301834486207e+02 1.474652503315489e+02 1.120056004263085e+02 4 7.088324956100398e+02 -2.610148207790107e+02 -6.576832962848259e+02 4.204107461024153e+01 - ME 2.159102073406285e-04 + ME 7.096163321035572e-04 Event 108 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -868,7 +868,7 @@ Event 108 Batch 0 2 6.835202199428555e+02 6.670011709444186e+02 6.653656309718588e+01 1.337243986739828e+02 3 2.377887385005082e+02 -1.098327419601477e+02 7.667443498831059e+01 -1.964720946353502e+02 4 5.786910415566365e+02 -5.571684289842709e+02 -1.432109980854965e+02 6.274769596136723e+01 - ME 2.960130886583330e-05 + ME 1.143500637563713e-04 Event 109 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -876,7 +876,7 @@ Event 109 Batch 0 2 5.978180281189351e+02 4.291222314737005e+02 2.249703559956599e+02 3.501840146583366e+02 3 3.585061336071061e+02 -3.227227650115256e+02 1.541688059097761e+02 2.467071262824850e+01 4 5.436758382739589e+02 -1.063994664621746e+02 -3.791391619054360e+02 -3.748547272865851e+02 - ME 1.100286424576873e-04 + ME 1.159187207430584e-03 Event 110 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -884,7 +884,7 @@ Event 110 Batch 0 2 7.073952645543156e+01 -4.753982451958468e+01 4.872856968801237e+01 -1.922426029646691e+01 3 7.438039776014969e+02 1.707202332282495e+02 -7.225114374584515e+02 4.556513803361385e+01 4 6.854564959430718e+02 -1.231804087086648e+02 6.737828677704391e+02 -2.634087773714689e+01 - ME 1.052942530962122e-04 + ME 5.177444310012934e-04 Event 111 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -892,7 +892,7 @@ Event 111 Batch 0 2 5.206822291802364e+02 -3.873336848644893e+02 2.415505427333673e+02 -2.504714268307115e+02 3 5.478000561519707e+02 4.687653961676166e+02 -2.245690260344170e+02 -1.729527606656598e+02 4 4.315177146677929e+02 -8.143171130312743e+01 -1.698151669895031e+01 4.234241874963712e+02 - ME 8.545692640795734e-05 + ME 1.041517236520828e-04 Event 112 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -900,7 +900,7 @@ Event 112 Batch 0 2 3.610471238372959e+02 2.563298943277285e+02 9.635756626046441e+01 -2.352981732387216e+02 3 6.139063356201009e+02 1.031778254919422e+02 -4.257030126280926e+02 4.301305270271111e+02 4 5.250465405426031e+02 -3.595077198196707e+02 3.293454463676283e+02 -1.948323537883896e+02 - ME 5.572029836371622e-05 + ME 2.333567140730066e-04 Event 113 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -908,7 +908,7 @@ Event 113 Batch 0 2 5.886653054136124e+02 3.035646198144377e+02 3.278619896967805e+02 -3.832517176826292e+02 3 5.420023902452333e+02 -3.658357535838290e+02 -3.990519958595696e+02 2.623541560166928e+01 4 3.693323043411537e+02 6.227113376939163e+01 7.119000616278893e+01 3.570163020809600e+02 - ME 4.986188449478774e-05 + ME 6.906402420910258e-05 Event 114 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -916,7 +916,7 @@ Event 114 Batch 0 2 5.165204340356855e+02 2.346362244736889e+01 6.298471388966840e+00 5.159487827839334e+02 3 5.932916594323345e+02 3.608814360715946e+02 -5.336137507463695e+01 -4.678804824963537e+02 4 3.901879065319798e+02 -3.843450585189634e+02 4.706290368567026e+01 -4.806830028757967e+01 - ME 4.029549711869195e-04 + ME 5.363382776736297e-04 Event 115 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -924,7 +924,7 @@ Event 115 Batch 0 2 5.432307281524777e+02 2.250327918244370e+02 4.870559856477670e+02 -8.506664127290338e+01 3 4.265243530840496e+02 2.057819224248363e+02 -2.472237669715339e+02 2.801021835354204e+02 4 5.302449187634726e+02 -4.308147142492733e+02 -2.398322186762331e+02 -1.950355422625171e+02 - ME 4.159321993514108e-05 + ME 2.364149932043149e-04 Event 116 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -932,7 +932,7 @@ Event 116 Batch 0 2 4.402635748890415e+02 -4.240500842615081e+02 -5.733358735035193e+01 -1.035683405941509e+02 3 4.399967684638562e+02 1.183617589007452e+02 -1.041572505293867e+02 -4.107784286579766e+02 4 6.197396566471035e+02 3.056883253607625e+02 1.614908378797388e+02 5.143467692521278e+02 - ME 4.172733678506819e-05 + ME 1.343295643586522e-04 Event 117 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -940,7 +940,7 @@ Event 117 Batch 0 2 3.074085311587982e+02 -4.270248480828711e+01 -3.034838508096459e+02 2.395944736750828e+01 3 5.360984061023379e+02 3.510554986169303e+02 -1.596589010508530e+02 -3.723849798683070e+02 4 6.564930627388640e+02 -3.083530138086433e+02 4.631427518604987e+02 3.484255325007987e+02 - ME 4.142391000026985e-05 + ME 1.795895763168496e-04 Event 118 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -948,7 +948,7 @@ Event 118 Batch 0 2 5.403602961735903e+02 4.471526113902045e+02 -1.804334130868151e+02 -2.439007487679592e+02 3 5.654623567965698e+02 -5.534570111367966e+02 -1.157195831079003e+02 6.480112868522320e+00 4 3.941773470298406e+02 1.063043997465919e+02 2.961529961947150e+02 2.374206358994370e+02 - ME 7.288650603673961e-06 + ME 3.055618730902428e-05 Event 119 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -956,7 +956,7 @@ Event 119 Batch 0 2 8.009099446659010e+01 5.775399043490319e+01 -2.629604726664823e+01 4.886268393818209e+01 3 7.131140611332349e+02 2.472685400460709e+02 -2.870014097539109e+02 -6.041689532644716e+02 4 7.067949444001758e+02 -3.050225304809738e+02 3.132974570205592e+02 5.553062693262896e+02 - ME 2.815424392761942e-04 + ME 6.861262467765907e-04 Event 120 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -964,7 +964,7 @@ Event 120 Batch 0 2 5.007248873753321e+02 2.708997263130530e+02 -3.880896283797751e+02 1.634784128397387e+02 3 7.413897277398672e+02 -4.257033276374029e+02 5.921425482134987e+02 -1.334264135464211e+02 4 2.578853848848011e+02 1.548036013243502e+02 -2.040529198337238e+02 -3.005199929331748e+01 - ME 6.003662532288496e-06 + ME 1.034513276694145e-04 Event 121 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -972,7 +972,7 @@ Event 121 Batch 0 2 5.732265116821120e+02 -1.149395375629033e+02 4.260916136383032e+02 3.658189076403451e+02 3 4.323948798659248e+02 -2.148488009071912e+01 -4.178027098651986e+02 1.092914804138530e+02 4 4.943786084519640e+02 1.364244176536226e+02 -8.288903773105691e+00 -4.751103880541979e+02 - ME 7.661241871407340e-04 + ME 8.074833733477824e-02 Event 122 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -980,7 +980,7 @@ Event 122 Batch 0 2 3.423360304412701e+02 2.648046119434483e+02 2.369247279710451e+01 -2.156644197927059e+02 3 6.059487982275789e+02 2.457729689670163e+01 -4.569077875801422e+02 3.972469964635579e+02 4 5.517151713311508e+02 -2.893819088401499e+02 4.332153147830377e+02 -1.815825766708520e+02 - ME 5.274300345459390e-05 + ME 2.180123533398812e-04 Event 123 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -988,7 +988,7 @@ Event 123 Batch 0 2 1.430133297276668e+02 -4.205671322284506e+01 3.498095937953869e+01 1.321377229770999e+02 3 7.140350670908600e+02 -2.955397919833849e+01 -6.570980288365154e+02 -2.778395577453968e+02 4 6.429516031814733e+02 7.161069242118367e+01 6.221170694569771e+02 1.457018347682969e+02 - ME 2.698780233597045e-04 + ME 5.626335206455025e-04 Event 124 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -996,7 +996,7 @@ Event 124 Batch 0 2 6.053457283343441e+02 5.458657819531910e+02 -1.853964251366731e+01 -2.610177782464909e+02 3 7.499633671623128e+02 -6.784114238502394e+02 2.145325921506613e+01 3.189713933003628e+02 4 1.446909045033435e+02 1.325456418970486e+02 -2.913616701398675e+00 -5.795361505387172e+01 - ME 2.629538535113942e-05 + ME 4.169465060943616e-04 Event 125 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1004,7 +1004,7 @@ Event 125 Batch 0 2 6.695439244882118e+02 9.058534244088493e+01 6.586171675820721e+02 7.941529525294386e+01 3 9.341516463500346e+01 3.490868167113007e+01 5.232133368429144e+01 6.906703243419068e+01 4 7.370409108767834e+02 -1.254940241120154e+02 -7.109385012663632e+02 -1.484823276871337e+02 - ME 4.436636984625360e-03 + ME 1.111472366347957e-02 Event 126 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1012,7 +1012,7 @@ Event 126 Batch 0 2 6.465564354211967e+02 -2.094351601488127e+02 -1.930091683601272e+02 -5.804477571728034e+02 3 1.356182567235447e+02 -2.832094442380729e+01 9.735247446175231e+01 -9.007070211700794e+01 4 7.178253078552584e+02 2.377561045726200e+02 9.565669389837488e+01 6.705184592898115e+02 - ME 1.230970446288030e-03 + ME 1.775660879411100e-03 Event 127 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1020,7 +1020,7 @@ Event 127 Batch 0 2 4.508388003927651e+02 -3.846405138087858e+02 7.756355374444065e+01 2.220162025777267e+02 3 6.162879941073576e+02 2.174727303224461e+02 1.334711143222092e+02 -5.609830344035003e+02 4 4.328732054998774e+02 1.671677834863399e+02 -2.110346680666500e+02 3.389668318257735e+02 - ME 2.127227557837123e-05 + ME 3.922171581774212e-05 Event 128 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1028,7 +1028,7 @@ Event 128 Batch 0 2 7.468963146802857e+02 5.701805835528932e+02 -3.440982003215339e+02 -3.381488363986430e+02 3 1.196664332518719e+02 -9.337643239636876e+01 2.398139841985228e+01 7.089280393650260e+01 4 6.334372520678420e+02 -4.768041511565244e+02 3.201168019016817e+02 2.672560324621404e+02 - ME 7.842790653965437e-05 + ME 2.053620454072734e-04 Event 129 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1036,7 +1036,7 @@ Event 129 Batch 0 2 4.378966182438207e+02 -4.256397208622688e+02 4.624364030548149e+01 9.190104474357973e+01 3 7.127537996732577e+02 5.790589826349546e+02 -1.369827771626340e+02 -3.923574802896586e+02 4 3.493495820829217e+02 -1.534192617726859e+02 9.073913685715252e+01 3.004564355460789e+02 - ME 1.046217618618756e-05 + ME 1.668072874757384e-05 Event 130 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1044,7 +1044,7 @@ Event 130 Batch 0 2 6.322026526626455e+02 5.905875735566585e+02 -2.387291116192753e+01 -2.243136110600485e+02 3 5.268087771404591e+02 -3.287250458747471e+02 1.913681034684307e+02 3.644798771698754e+02 4 3.409885701968954e+02 -2.618625276819114e+02 -1.674951923065032e+02 -1.401662661098267e+02 - ME 3.412796728096272e-05 + ME 2.766647151388132e-04 Event 131 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1052,7 +1052,7 @@ Event 131 Batch 0 2 2.691964685177017e+02 -2.641651354044939e+02 4.065264362900757e+01 -3.210735842607325e+01 3 5.382709487855662e+02 -3.022535437819008e+02 -4.307865739991411e+02 1.131429946566680e+02 4 6.925325826967319e+02 5.664186791863947e+02 3.901339303701337e+02 -8.103563623059465e+01 - ME 1.516502654737588e-04 + ME 5.354423766199649e-04 Event 132 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1060,7 +1060,7 @@ Event 132 Batch 0 2 1.376388194981169e+02 -2.491804956023667e+01 3.114513197621116e+01 1.317327453336230e+02 3 7.332494677489981e+02 -3.054807357444667e+02 -6.882601889638243e+00 -6.665500220046781e+02 4 6.291117127528858e+02 3.303987853047034e+02 -2.426253008657308e+01 5.348172766710551e+02 - ME 2.459616839911958e-04 + ME 3.625143788027957e-04 Event 133 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1068,7 +1068,7 @@ Event 133 Batch 0 2 5.818916885738672e+02 -3.437736592641007e+02 -2.113522447259726e+02 -4.192228966514222e+02 3 7.075583625851592e+02 3.695171106849944e+02 9.875952986414086e+01 5.952667441040354e+02 4 2.105499488409736e+02 -2.574345142089370e+01 1.125927148618317e+02 -1.760438474526132e+02 - ME 3.278402967978973e-04 + ME 6.644965721204062e-03 Event 134 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1076,7 +1076,7 @@ Event 134 Batch 0 2 7.039051474789593e+02 -1.767404282002263e+02 5.832845063404937e+02 3.521710697233707e+02 3 6.740856043500099e+02 9.540039380435479e+01 -5.203258634262522e+02 -4.177932056695244e+02 4 1.220092481710302e+02 8.134003439587134e+01 -6.295864291424151e+01 6.562213594615410e+01 - ME 3.621089826286842e-05 + ME 6.394436352069354e-05 Event 135 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1084,7 +1084,7 @@ Event 135 Batch 0 2 7.491379873081086e+02 -6.603965492909807e+02 -9.243924572685610e+01 -3.413782470545817e+02 3 4.360367703469753e+02 3.763875731093294e+02 3.833030381995060e+01 2.167746473012021e+02 4 3.148252423449159e+02 2.840089761816513e+02 5.410894190690560e+01 1.246035997533796e+02 - ME 1.170602675185252e-05 + ME 3.729096801849378e-05 Event 136 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1092,7 +1092,7 @@ Event 136 Batch 0 2 6.907976432034611e+02 -8.965778913807024e+01 -5.375684903631193e+02 -4.244796613161184e+02 3 4.317447428217263e+02 2.541758793770707e+02 2.501815833403360e+02 2.433255445990286e+02 4 3.774576139748129e+02 -1.645180902390004e+02 2.873869070227833e+02 1.811541167170898e+02 - ME 1.221598515374744e-05 + ME 3.295715598818487e-05 Event 137 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1100,7 +1100,7 @@ Event 137 Batch 0 2 5.927917878715718e+02 -5.453882061843875e+02 -2.239274061847312e+02 6.172783069514800e+01 3 3.718333194205911e+02 2.859809174201715e+02 -2.363544177495510e+02 2.472896101988843e+01 4 5.353748927078371e+02 2.594072887642160e+02 4.602818239342820e+02 -8.645679171503701e+01 - ME 2.222722395048600e-05 + ME 1.267334233155001e-04 Event 138 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1108,7 +1108,7 @@ Event 138 Batch 0 2 1.164849493482387e+02 2.012854405109472e+01 -2.573298799707043e+01 -1.118096528381494e+02 3 7.481698498358139e+02 -1.044692284663333e+02 -4.003634472873074e+00 7.408294509656059e+02 4 6.353452008159477e+02 8.434068441523856e+01 2.973662246994375e+01 -6.290197981274564e+02 - ME 1.183014588836486e-01 + ME 3.545594402685597e+00 Event 139 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1116,7 +1116,7 @@ Event 139 Batch 0 2 3.415587822283577e+02 -2.468214832259765e+02 1.926082427237748e+02 1.365416492148350e+02 3 5.828887331044928e+02 -1.023403009989268e+02 -5.561813319045077e+02 1.412376154306548e+02 4 5.755524846671491e+02 3.491617842249035e+02 3.635730891807333e+02 -2.777792646454897e+02 - ME 5.213154494000113e-05 + ME 4.142320485322521e-04 Event 140 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1124,7 +1124,7 @@ Event 140 Batch 0 2 4.395392082109443e+02 -3.037880820376849e+02 -2.455930383243060e+02 -2.014735126343029e+02 3 4.709796125547878e+02 -2.826270024952004e+02 2.984919122515593e+02 2.298833426397907e+02 4 5.894811792342680e+02 5.864150845328855e+02 -5.289887392725340e+01 -2.840983000548780e+01 - ME 2.990357782498624e-05 + ME 1.220048440917972e-04 Event 141 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1132,7 +1132,7 @@ Event 141 Batch 0 2 3.025838986653694e+02 -2.680006525137058e+02 -6.218827689980458e+01 -1.259574698062632e+02 3 5.104624598690772e+02 -2.829910827131053e+02 4.173533268753467e+02 -7.939880721102661e+01 4 6.869536414655528e+02 5.509917352268112e+02 -3.551650499755422e+02 2.053562770172896e+02 - ME 7.151804808113674e-05 + ME 3.735313583347012e-04 Event 142 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1140,7 +1140,7 @@ Event 142 Batch 0 2 4.390011511178412e+02 -3.153925512561953e+02 3.992377088505197e+01 -3.027468279160259e+02 3 4.597282536099518e+02 2.984856708041211e+02 -2.221794712617382e+02 -2.699863960308454e+02 4 6.012705952722066e+02 1.690688045207421e+01 1.822557003766862e+02 5.727332239468712e+02 - ME 8.945447985744934e-05 + ME 1.630913878361870e-04 Event 143 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1148,7 +1148,7 @@ Event 143 Batch 0 2 7.103308443495001e+02 -3.626595603160224e+02 2.462759922459802e+02 5.589240443825270e+02 3 3.424564807343295e+02 4.507572778536915e+01 -2.357842367637252e+02 -2.442343416788665e+02 4 4.472126749161695e+02 3.175838325306533e+02 -1.049175548225529e+01 -3.146897027036604e+02 - ME 1.789392510542836e-04 + ME 1.304325296055160e-03 Event 144 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1156,7 +1156,7 @@ Event 144 Batch 0 2 6.893886390440568e+02 -2.470805413393656e+02 1.331686162420120e+02 6.296618309717105e+02 3 7.132719020730987e+02 2.482972988978650e+02 -2.304803220538649e+02 -6.276815106349294e+02 4 9.733945888284487e+01 -1.216757558499225e+00 9.731170581185302e+01 -1.980320336781234e+00 - ME 1.486904409371019e-04 + ME 3.769348793094523e-04 Event 145 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1164,7 +1164,7 @@ Event 145 Batch 0 2 3.784954309743686e+02 2.391836032855264e+02 1.115572896135236e+01 -2.931305935912622e+02 3 7.389406222827198e+02 -4.231861417520660e+02 1.513250860114713e+02 5.865555822189353e+02 4 3.825639467429113e+02 1.840025384665394e+02 -1.624808149728234e+02 -2.934249886276727e+02 - ME 2.016505354100400e-04 + ME 2.193982780219728e-03 Event 146 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1172,7 +1172,7 @@ Event 146 Batch 0 2 4.681255842987410e+02 -3.253195724522379e+01 1.754808059398437e+02 -4.327698247100133e+02 3 2.875849079819393e+02 2.091841587061404e+01 1.879781824316579e+02 -2.166372592748876e+02 4 7.442895077193195e+02 1.161354137460973e+01 -3.634589883715017e+02 6.494070839849006e+02 - ME 1.210467216316050e-02 + ME 5.347932692815789e-02 Event 147 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1180,7 +1180,7 @@ Event 147 Batch 0 2 2.442136391928777e+02 -1.784444843977844e+02 -1.666832492802189e+02 -3.816014311599316e+00 3 5.551361515401285e+02 1.378338123621512e+02 -5.199472642306259e+02 1.372327560591401e+02 4 7.006502092669938e+02 4.061067203563306e+01 6.866305135108448e+02 -1.334167417475408e+02 - ME 2.360352365747709e-04 + ME 7.450632204513606e-04 Event 148 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1188,7 +1188,7 @@ Event 148 Batch 0 2 4.547263863263726e+02 3.928375677411887e+02 5.145105706241225e+01 2.231759855356057e+02 3 7.397285466814292e+02 -5.611511356388266e+02 -1.533645573573770e+02 -4.569322031694095e+02 4 3.055450669921979e+02 1.683135678976379e+02 1.019135002949646e+02 2.337562176338038e+02 - ME 6.307552439231181e-06 + ME 1.440225905683450e-05 Event 149 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1196,7 +1196,7 @@ Event 149 Batch 0 2 2.343018799311635e+02 9.853424545130945e+01 1.924850318874441e+02 -9.021023174733594e+01 3 7.291173748950658e+02 3.429747374294529e+01 -5.990516617369192e+02 4.142136359886766e+02 4 5.365807451737705e+02 -1.328317191942547e+02 4.065666298494750e+02 -3.240034042413406e+02 - ME 8.298171355094406e-05 + ME 8.405553848068603e-04 Event 150 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1204,7 +1204,7 @@ Event 150 Batch 0 2 4.707648023587808e+02 -8.969278865174961e+01 -3.008719699078221e+02 3.507859183712497e+02 3 6.876639918976698e+02 3.906111988928598e+02 4.609284537794546e+02 -3.284046551871671e+02 4 3.415712057435500e+02 -3.009184102411105e+02 -1.600564838716325e+02 -2.238126318408256e+01 - ME 1.887585788236135e-05 + ME 1.070125715137075e-04 Event 151 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1212,7 +1212,7 @@ Event 151 Batch 0 2 6.503034458278056e+02 -1.575298496674962e+02 -3.658248853789647e+01 -6.298735108350154e+02 3 6.998690336552314e+02 1.302751858829802e+02 -1.019415103826456e+02 6.800389464387812e+02 4 1.498275205169629e+02 2.725466378451580e+01 1.385239989205421e+02 -5.016543560376590e+01 - ME 4.060174493404880e-04 + ME 6.663776898009472e-04 Event 152 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1220,7 +1220,7 @@ Event 152 Batch 0 2 7.401192382353395e+02 1.493701961830190e+02 6.288419447382046e+02 3.605867993093739e+02 3 7.332111095478891e+02 -1.230079111936445e+02 -6.287602831147091e+02 -3.565502647954901e+02 4 2.666965221677112e+01 -2.636228498937447e+01 -8.166162349550861e-02 -4.036534513883709e+00 - ME 1.210964379505254e-04 + ME 8.446403371723604e-04 Event 153 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1228,7 +1228,7 @@ Event 153 Batch 0 2 5.645797071775899e+02 7.941901905692946e+01 3.691428696980725e+02 -4.197337333594241e+02 3 6.079979027943974e+02 1.021455738177839e+02 -5.566920170809548e+02 2.220849604771994e+02 4 3.274223900280123e+02 -1.815645928747133e+02 1.875491473828823e+02 1.976487728822249e+02 - ME 9.895323747190810e-06 + ME 2.846663840296023e-05 Event 154 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1236,7 +1236,7 @@ Event 154 Batch 0 2 6.022174885419887e+02 -5.152457849782368e+02 -1.493252664732707e+02 -2.736597328082223e+02 3 3.617627670199851e+02 1.925398333816265e+02 -2.626238171638091e+02 1.575736108034646e+02 4 5.360197444380261e+02 3.227059515966102e+02 4.119490836370796e+02 1.160861220047577e+02 - ME 1.660411512586943e-05 + ME 6.437319974597944e-05 Event 155 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1244,7 +1244,7 @@ Event 155 Batch 0 2 6.202229507100907e+02 -2.107861924791831e+02 -3.212541876154504e+02 4.868690137883067e+02 3 2.943040328093193e+02 2.940980302320592e+02 1.073731199058907e+01 2.433613089266508e+00 4 5.854730164805898e+02 -8.331183775287627e+01 3.105168756248616e+02 -4.893026268775732e+02 - ME 4.918845171174253e-04 + ME 5.904510654775639e-03 Event 156 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1252,7 +1252,7 @@ Event 156 Batch 0 2 4.945486805149833e+02 4.540818864859257e+02 -1.431706201593249e+02 -1.337542944644701e+02 3 5.997303202813281e+02 -3.624214233270367e+02 -5.726286247273350e+01 4.743923835389624e+02 4 4.057209992036886e+02 -9.166046315888883e+01 2.004334826320584e+02 -3.406380890744924e+02 - ME 1.986837824231628e-04 + ME 4.701306652347430e-03 Event 157 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1260,7 +1260,7 @@ Event 157 Batch 0 2 4.617003083190191e+02 3.118400043328062e+02 3.404502064148864e+02 -4.079626411035589e+00 3 5.720097526413113e+02 -4.999240316044806e+01 -4.329264075474301e+02 -3.705005295422582e+02 4 4.662899390396696e+02 -2.618476011723578e+02 9.247620113254365e+01 3.745801559532937e+02 - ME 1.403598809900552e-05 + ME 3.907978340087068e-05 Event 158 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1268,7 +1268,7 @@ Event 158 Batch 0 2 6.784877363061535e+02 -5.707102180762959e+02 -3.102223423027389e+02 -1.959529373021938e+02 3 5.650909444059712e+02 5.525284805868615e+02 7.765167789879932e+01 8.950011457818250e+01 4 2.564213192878751e+02 1.818173748943443e+01 2.325706644039396e+02 1.064528227240114e+02 - ME 8.470133063482862e-06 + ME 3.503179830087694e-05 Event 159 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1276,7 +1276,7 @@ Event 159 Batch 0 2 5.369491563274252e+02 2.154713482252002e+02 -2.912667909729743e+02 3.962955349875316e+02 3 6.066564496499102e+02 -4.020061311781470e+01 5.572389608252350e+02 -2.364332868806716e+02 4 3.563943940226648e+02 -1.752707351073854e+02 -2.659721698522608e+02 -1.598622481068599e+02 - ME 3.562393617300492e-05 + ME 3.198473025834927e-04 Event 160 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1284,7 +1284,7 @@ Event 160 Batch 0 2 6.492474755438517e+02 3.490068395973682e+02 1.460348644657111e+02 -5.276270735801970e+02 3 2.857818814470013e+02 -2.550253586192556e+02 1.227259509083862e+02 3.964456076362119e+01 4 5.649706430091471e+02 -9.398148097811273e+01 -2.687608153740973e+02 4.879825128165764e+02 - ME 3.516238941302227e-05 + ME 6.719464076924620e-05 Event 161 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1292,7 +1292,7 @@ Event 161 Batch 0 2 6.770282049439580e+02 -2.863253153105184e+02 -4.911270786072976e+02 -3.676672364525180e+02 3 1.598243093356544e+02 -7.505362471426160e+01 1.299195075310522e+02 -5.506073768810752e+01 4 6.631474857203874e+02 3.613789400247800e+02 3.612075710762453e+02 4.227279741406256e+02 - ME 5.970757951131334e-05 + ME 1.577168105051119e-04 Event 162 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1300,7 +1300,7 @@ Event 162 Batch 0 2 5.178592782584632e+02 -3.271131571456631e+02 3.943743741889439e+02 -7.512700901574514e+01 3 3.730686930366258e+02 -2.885924195736573e+01 -1.360208443078026e+02 -3.461874113706257e+02 4 6.090720287049110e+02 3.559723991030290e+02 -2.583535298811414e+02 4.213144203863710e+02 - ME 2.768303103320498e-05 + ME 1.031749267713353e-04 Event 163 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1308,7 +1308,7 @@ Event 163 Batch 0 2 5.388642316037673e+02 3.152159924116781e+02 3.539969933522669e+01 -4.356149670486711e+02 3 5.364171791816749e+02 -5.299694218906361e+02 3.369785517714305e+01 7.576448071880543e+01 4 4.247185892145582e+02 2.147534294789580e+02 -6.909755451236977e+01 3.598504863298658e+02 - ME 1.485600561394433e-05 + ME 3.508094027565679e-05 Event 164 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1316,7 +1316,7 @@ Event 164 Batch 0 2 6.862697092177667e+02 4.132218376422068e+02 1.310202162324327e+02 -5.320221138485150e+02 3 4.476895523579005e+02 -2.769046850483522e+02 1.374187337517142e+02 3.238299280529301e+02 4 3.660407384243329e+02 -1.363171525938544e+02 -2.684389499841469e+02 2.081921857955847e+02 - ME 1.755563256840939e-05 + ME 3.375894779915149e-05 Event 165 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1324,7 +1324,7 @@ Event 165 Batch 0 2 2.382444910715278e+02 -2.158277263671036e+02 -9.471372817531817e+00 -1.004446273032522e+02 3 7.304591383576048e+02 4.619003715882296e+02 -1.223345688256177e+02 5.524969256086772e+02 4 5.312963705708673e+02 -2.460726452211260e+02 1.318059416431495e+02 -4.520522983054250e+02 - ME 4.549138184301779e-04 + ME 6.966498968932957e-03 Event 166 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1332,7 +1332,7 @@ Event 166 Batch 0 2 2.131352071380649e+02 -7.633553084455029e+01 -1.899581415396244e+02 5.929087379418958e+01 3 7.305557876753161e+02 8.980971292745940e+01 7.136333043711877e+02 1.279589045828712e+02 4 5.563090051866194e+02 -1.347418208290915e+01 -5.236751628315633e+02 -1.872497783770607e+02 - ME 3.352199959657985e-05 + ME 3.314006956523505e-04 Event 167 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1340,7 +1340,7 @@ Event 167 Batch 0 2 4.122964103002419e+02 -3.405127102276982e+02 6.366431608201744e+01 2.235761145061386e+02 3 4.697083356610920e+02 -2.521100678451879e+02 -2.856113063438232e+01 -3.952855880214881e+02 4 6.179952540386658e+02 5.926227780728861e+02 -3.510318544763516e+01 1.717094735153495e+02 - ME 3.829535931496594e-05 + ME 1.146777177775239e-04 Event 168 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1348,7 +1348,7 @@ Event 168 Batch 0 2 7.156643283953484e+02 -3.999734570317170e+02 4.816586825103861e+02 3.467009924560655e+02 3 6.192344221355605e+02 2.722545660880235e+02 -4.999454120042317e+02 -2.436869012025525e+02 4 1.651012494690919e+02 1.277188909436936e+02 1.828672949384504e+01 -1.030140912535133e+02 - ME 5.027887292283473e-05 + ME 1.017624049822302e-03 Event 169 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1356,7 +1356,7 @@ Event 169 Batch 0 2 3.626022684949455e+02 7.511110909567982e+01 -2.030941161665286e+02 -2.908461902563517e+02 3 5.580565590514408e+02 -2.529981754432838e+02 -3.439969378312538e+02 3.592842232626199e+02 4 5.793411724536141e+02 1.778870663476037e+02 5.470910539977822e+02 -6.843803300626824e+01 - ME 4.350242525242475e-05 + ME 1.371698416063432e-04 Event 170 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1364,7 +1364,7 @@ Event 170 Batch 0 2 6.602909342483501e+02 4.699653539595539e+02 -3.020118498241596e+02 3.520021683086903e+02 3 1.039297502933440e+02 3.247420585022842e+01 -9.851348423194945e+01 6.473976746580508e+00 4 7.357793154583061e+02 -5.024395598097824e+02 4.005253340561092e+02 -3.584761450552709e+02 - ME 9.967260301798612e-03 + ME 1.673719496447659e-02 Event 171 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1372,7 +1372,7 @@ Event 171 Batch 0 2 1.506693011949600e+02 -3.657300520509282e+01 -1.244227366169959e+02 -7.669834565089053e+01 3 6.344013325830570e+02 -2.026333084464634e+02 -4.956100871165362e+02 3.402578943089165e+02 4 7.149293662219835e+02 2.392063136515561e+02 6.200328237335323e+02 -2.635595486580261e+02 - ME 9.157902172934166e-04 + ME 2.133207113512388e-03 Event 172 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1380,7 +1380,7 @@ Event 172 Batch 0 2 5.352445157558213e+02 -2.018352690102651e+02 3.892440882325296e+02 -3.069825004886504e+02 3 6.716112180685394e+02 2.825227203806547e+02 -5.978593235713698e+02 1.175022124175027e+02 4 2.931442661756383e+02 -8.068745137038898e+01 2.086152353388391e+02 1.894802880711483e+02 - ME 8.067092159940342e-06 + ME 2.630379932615259e-05 Event 173 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1388,7 +1388,7 @@ Event 173 Batch 0 2 6.571348515648592e+02 -2.769863586381786e+02 5.805753619381593e+02 1.343019708712704e+02 3 5.332990408103321e+02 1.871824832342877e+02 -4.782426732337677e+02 1.437168410371092e+02 4 3.095661076248081e+02 8.980387540389081e+01 -1.023326887043915e+02 -2.780188119083794e+02 - ME 1.269359653092767e-04 + ME 9.985413945498126e-03 Event 174 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1396,7 +1396,7 @@ Event 174 Batch 0 2 6.091496911716730e+02 -4.752584064243671e+02 3.135726231883978e+01 -3.797492797588730e+02 3 6.417481529658018e+02 3.309293137608124e+02 9.015643604119191e+01 5.424004960996682e+02 4 2.491021558625255e+02 1.443290926635548e+02 -1.215136983600317e+02 -1.626512163407953e+02 - ME 1.362612102685676e-04 + ME 1.319192968737130e-03 Event 175 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1404,7 +1404,7 @@ Event 175 Batch 0 2 5.399801778396885e+02 1.966672297646830e+02 2.343185748302537e+02 -4.449667388535759e+02 3 6.987953575798327e+02 -1.857207036318898e+02 -9.664246188148675e+01 6.666955876403318e+02 4 2.612244645804785e+02 -1.094652613279307e+01 -1.376761129487668e+02 -2.217288487867561e+02 - ME 9.613528518728674e-04 + ME 9.528877211334405e-03 Event 176 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1412,7 +1412,7 @@ Event 176 Batch 0 2 6.615757321243968e+02 -4.129469954321281e+02 4.686878756164518e+02 -2.179194886871010e+02 3 1.607981401590110e+02 -6.355407199259605e+01 7.929314438200207e+00 1.474925346731048e+02 4 6.776261277165921e+02 4.765010674247242e+02 -4.766171900546519e+02 7.042695401399614e+01 - ME 3.097907077728356e-04 + ME 6.965204353376922e-04 Event 177 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1420,7 +1420,7 @@ Event 177 Batch 0 2 4.314334067424883e+02 -3.493619040652741e+02 -2.026482683689240e+01 -2.523299055494341e+02 3 4.840006500668400e+02 -1.846595828310067e+02 -1.450727057198388e+02 4.232155216776995e+02 4 5.845659431906716e+02 5.340214868962809e+02 1.653375325567312e+02 -1.708856161282654e+02 - ME 1.084300812640113e-04 + ME 2.160100049311594e-04 Event 178 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1428,7 +1428,7 @@ Event 178 Batch 0 2 4.528135981327372e+02 -2.544528544607913e+02 1.436928116455424e+02 3.458992272209776e+02 3 3.053350882587867e+02 -1.380299578048218e+02 2.072032295570572e+02 1.767599177741536e+02 4 7.418513136084770e+02 3.924828122656132e+02 -3.508960412025996e+02 -5.226591449951313e+02 - ME 5.382438151181503e-02 + ME 7.384409254828141e-02 Event 179 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1436,7 +1436,7 @@ Event 179 Batch 0 2 7.433145319259943e+02 -2.538538580850882e+02 -6.778753511348521e+02 -1.689962142519080e+02 3 1.647945947160298e+02 1.009041857568576e+02 1.171651165877689e+02 5.699069397138987e+01 4 5.918908733579761e+02 1.529496723282306e+02 5.607102345470832e+02 1.120055202805181e+02 - ME 3.739915465576335e-05 + ME 1.335347052581446e-04 Event 180 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1444,7 +1444,7 @@ Event 180 Batch 0 2 2.396120216689867e+02 1.204528233788652e+02 -1.081248155319049e+02 1.766750195544080e+02 3 5.541470271917004e+02 2.767127195685322e+02 2.999096875483201e+02 3.749175614572557e+02 4 7.062409511393131e+02 -3.971655429473975e+02 -1.917848720164151e+02 -5.515925810116636e+02 - ME 2.792447184071457e-03 + ME 1.316593054412419e-02 Event 181 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1452,7 +1452,7 @@ Event 181 Batch 0 2 2.165494222755782e+02 1.336973493521793e+02 -1.495065670853883e+02 -8.164837697364385e+01 3 6.960869932595207e+02 -2.848973600545249e+02 2.209041937252092e+01 6.347303441548928e+02 4 5.873635844649011e+02 1.512000107023455e+02 1.274161477128675e+02 -5.530819671812490e+02 - ME 3.488874737600980e-03 + ME 6.164296623062663e-02 Event 182 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1460,7 +1460,7 @@ Event 182 Batch 0 2 6.472681881349898e+02 4.279258056181361e+02 3.994050733201775e+02 -2.762448183472868e+02 3 5.337197582091030e+02 -3.479343829022644e+02 -4.034091782989213e+02 -3.254965992745409e+01 4 3.190120536559070e+02 -7.999142271587166e+01 4.004104978744005e+00 3.087944782747408e+02 - ME 5.523679400573375e-05 + ME 6.393158381765308e-05 Event 183 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1468,7 +1468,7 @@ Event 183 Batch 0 2 6.165307808531154e+02 -3.276949594572818e+02 8.808524820164887e+01 -5.147496540405800e+02 3 2.975460412740734e+02 -1.030095950018341e+02 -2.375020297789284e+02 1.466814775843215e+02 4 5.859231778728107e+02 4.307045544591158e+02 1.494167815772794e+02 3.680681764562588e+02 - ME 2.562496117427957e-05 + ME 6.887775529805495e-05 Event 184 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1476,7 +1476,7 @@ Event 184 Batch 0 2 5.645337360463252e+02 -3.940276919793660e+02 3.776398996283964e+02 1.443212503288767e+02 3 5.368100353438223e+02 2.392766596964613e+02 -1.719264331693737e+02 -4.487237410122139e+02 4 3.986562286098531e+02 1.547510322829050e+02 -2.057134664590229e+02 3.044024906833372e+02 - ME 1.712138666139329e-05 + ME 3.553984578535888e-05 Event 185 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1484,7 +1484,7 @@ Event 185 Batch 0 2 6.347397779710931e+02 2.522092504724420e+02 -1.599825720327363e+02 5.600809373302327e+02 3 4.566768168089404e+02 -3.359958684022406e+02 -1.272903681003782e+02 -2.818823400219340e+02 4 4.085834052199659e+02 8.378661792979838e+01 2.872729401331145e+02 -2.781985973082986e+02 - ME 1.836859309200860e-04 + ME 1.184197550833168e-03 Event 186 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1492,7 +1492,7 @@ Event 186 Batch 0 2 7.089823220133230e+02 -5.197119220861886e+02 4.248734840868308e+02 -2.281183322067745e+02 3 5.364076825758043e+02 3.588264146200084e+02 -3.973752875032956e+02 3.270606945152315e+01 4 2.546099954108725e+02 1.608855074661802e+02 -2.749819658353518e+01 1.954122627552515e+02 - ME 1.318469173008218e-05 + ME 2.583895514537347e-05 Event 187 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1500,7 +1500,7 @@ Event 187 Batch 0 2 4.835105223217566e+02 -2.128653471696258e+02 1.375287019182911e+02 -4.117725407538514e+02 3 7.240136612790383e+02 4.407273454759851e+02 -4.896543389042274e+01 5.723264583716990e+02 4 2.924758163992057e+02 -2.278619983063593e+02 -8.856326802786833e+01 -1.605539176178473e+02 - ME 9.185777086042985e-05 + ME 5.307563978210835e-04 Event 188 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1508,7 +1508,7 @@ Event 188 Batch 0 2 6.611118500396009e+02 3.502021063704277e+02 -2.011693879247277e+02 -5.234102027267809e+02 3 3.072944371702247e+02 -6.894916504330918e+01 -1.599953986835475e+02 2.531350551695447e+02 4 5.315937127901742e+02 -2.812529413271184e+02 3.611647866082752e+02 2.702751475572362e+02 - ME 3.862980709292737e-05 + ME 6.863567490702385e-05 Event 189 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1516,7 +1516,7 @@ Event 189 Batch 0 2 7.498478362545707e+02 6.780504955298834e+02 -3.199144947524264e+02 -1.319162971889924e+01 3 3.253008430749361e+02 -2.985087551774363e+02 1.291384938207140e+02 6.034152914782593e+00 4 4.248513206704935e+02 -3.795417403524470e+02 1.907760009317124e+02 7.157476804116639e+00 - ME 1.504471760657040e-05 + ME 8.583750584152986e-05 Event 190 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1524,7 +1524,7 @@ Event 190 Batch 0 2 4.938867893347995e+02 3.689671478502748e+02 -1.218724623869293e+02 3.048516153777389e+02 3 5.264063001598521e+02 6.631942569346465e+01 1.276367949726208e+02 -5.063735530147588e+02 4 4.797069105053494e+02 -4.352865735437401e+02 -5.764332585691415e+00 2.015219376370201e+02 - ME 2.269926034328256e-05 + ME 4.759343488474735e-05 Event 191 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1532,7 +1532,7 @@ Event 191 Batch 0 2 3.681793141805986e+02 -3.225132888415706e+02 1.579589482507471e+02 -8.117977937027918e+01 3 5.431126642386394e+02 4.058413736814005e+01 9.147123993851424e+01 5.338139246166097e+02 4 5.887080215807621e+02 2.819291514734305e+02 -2.494301881892614e+02 -4.526341452463304e+02 - ME 1.427494731558637e-03 + ME 4.908990110546420e-03 Event 192 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1540,7 +1540,7 @@ Event 192 Batch 0 2 6.054165399887861e+02 1.497087111729466e+02 8.905021611535379e+01 5.798159601983524e+02 3 2.106656439489222e+02 1.451894976721945e+02 -1.487249448604451e+02 3.436443048222171e+01 4 6.839178160622922e+02 -2.948982088451411e+02 5.967472874509133e+01 -6.141803906805740e+02 - ME 6.984876913518998e-03 + ME 4.294450320853435e-02 Event 193 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1548,7 +1548,7 @@ Event 193 Batch 0 2 2.753169163933055e+02 -1.695475157411122e+02 -2.139406274107579e+02 3.581134319495643e+01 3 5.760219428901971e+02 -3.264616044953138e+02 1.527507522369444e+02 -4.493231656306969e+02 4 6.486611407164972e+02 4.960091202364260e+02 6.118987517381347e+01 4.135118224357404e+02 - ME 4.273063058931925e-05 + ME 1.537583375796735e-04 Event 194 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1556,7 +1556,7 @@ Event 194 Batch 0 2 3.445934948105150e+02 -2.970257025567896e+02 -8.183019525038441e+01 1.543509890854414e+02 3 7.485441862377920e+02 6.623797851941252e+02 1.083400559332054e+02 -3.314119056355291e+02 4 4.068623189516925e+02 -3.653540826373358e+02 -2.650986068282081e+01 1.770609165500877e+02 - ME 4.921158833271929e-06 + ME 3.024610065690235e-05 Event 195 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1564,7 +1564,7 @@ Event 195 Batch 0 2 2.012122274303647e+02 -5.190018365965096e+01 1.322177369426910e+02 -1.425173724194237e+02 3 7.122630330184543e+02 -3.054768058087834e+02 -2.528097616133813e+02 5.916838461125119e+02 4 5.865247395511832e+02 3.573769894684365e+02 1.205920246706904e+02 -4.491664736930883e+02 - ME 4.696445912229638e-04 + ME 3.011639483286710e-03 Event 196 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1572,7 +1572,7 @@ Event 196 Batch 0 2 4.490485793345989e+02 3.485190427929747e+02 -2.661098616642627e+01 -2.819059396826192e+02 3 5.531554978829222e+02 -3.330165694254377e+02 4.416170126965178e+02 7.442003978758296e+00 4 4.977959227824785e+02 -1.550247336753688e+01 -4.150060265300915e+02 2.744639357038610e+02 - ME 9.363355109875406e-06 + ME 4.340266456570635e-05 Event 197 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1580,7 +1580,7 @@ Event 197 Batch 0 2 3.951249254444253e+02 -2.278358800090239e+02 3.101157211704546e+02 -8.968142489336992e+01 3 3.607080640108546e+02 -2.889948719219027e+02 2.155030307719242e+02 -1.227661082778765e+01 4 7.441670105447209e+02 5.168307519309257e+02 -5.256187519423792e+02 1.019580357211576e+02 - ME 6.597373610109231e-03 + ME 3.377741088449004e-02 Event 198 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1588,7 +1588,7 @@ Event 198 Batch 0 2 3.750236904637998e+02 1.183014344420310e+02 -1.005952209347265e+02 -3.413621838211424e+02 3 4.381296266085964e+02 -2.726825461625328e+02 1.003845461170281e+02 -3.279096546785175e+02 4 6.868466829276033e+02 1.543811117205018e+02 2.106748176980602e-01 6.692718384996598e+02 - ME 6.145502577419889e-04 + ME 9.606390506705955e-04 Event 199 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1596,7 +1596,7 @@ Event 199 Batch 0 2 2.454478562244572e+02 -2.058455361543722e+02 -1.131056012155068e+02 -7.126982772660261e+01 3 5.321797086694488e+02 -9.806778012582416e+01 -4.820333037417012e+02 -2.030808875905193e+02 4 7.223724351060940e+02 3.039133162801963e+02 5.951389049572081e+02 2.743507153171219e+02 - ME 3.088173795554332e-04 + ME 1.577081887352965e-03 Event 200 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1604,7 +1604,7 @@ Event 200 Batch 0 2 3.952431318363244e+02 3.031309873729303e+02 9.337877017948550e+01 2.358159092128122e+02 3 6.094031244332663e+02 -7.796753338981905e+01 -5.315426896439308e+02 -2.876727322709444e+02 4 4.953537437304092e+02 -2.251634539831113e+02 4.381639194644453e+02 5.185682305813224e+01 - ME 1.668296552597111e-05 + ME 6.703240553489506e-05 Event 201 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1612,7 +1612,7 @@ Event 201 Batch 0 2 6.497938633639732e+02 3.771120671245744e+02 3.553445817627057e+02 -3.921081252746440e+02 3 3.369790646193914e+02 -2.140351778515325e+02 1.061239955238163e+02 2.376584318047305e+02 4 5.132270720166357e+02 -1.630768892730420e+02 -4.614685772865220e+02 1.544496934699135e+02 - ME 2.404518058628388e-05 + ME 6.283412004793947e-05 Event 202 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1620,7 +1620,7 @@ Event 202 Batch 0 2 7.267802742470179e+02 6.523432021666289e+02 -1.481957728499301e+02 2.840702844913056e+02 3 3.546086620137576e+02 -3.102429173963679e+02 -5.939291787501398e+01 -1.611493614224694e+02 4 4.186110637392242e+02 -3.421002847702610e+02 2.075886907249440e+02 -1.229209230688360e+02 - ME 2.830403199974809e-05 + ME 1.894138330341389e-04 Event 203 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1628,7 +1628,7 @@ Event 203 Batch 0 2 4.830190702985662e+02 2.789429895135886e+02 -3.943102945050296e+02 -4.197918611657844e+00 3 5.247163710833165e+02 -4.266462829986153e+02 3.263988520595893e+01 3.037019215942698e+02 4 4.922645586181170e+02 1.477032934850268e+02 3.616704092990706e+02 -2.995040029826120e+02 - ME 5.153190919865371e-05 + ME 5.831910678002871e-04 Event 204 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1636,7 +1636,7 @@ Event 204 Batch 0 2 6.952375769935185e+02 3.823764713153302e+01 6.531840992713522e+02 -2.350397908115460e+02 3 6.250862947179036e+02 1.031861473443961e+02 -5.506835576815644e+02 2.771878679515999e+02 4 1.796761282885781e+02 -1.414237944759291e+02 -1.025005415897879e+02 -4.214807714005369e+01 - ME 1.903000177287069e-05 + ME 1.802858800889920e-04 Event 205 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1644,7 +1644,7 @@ Event 205 Batch 0 2 5.625197268936781e+02 2.955060596751036e+02 4.395356105446072e+02 -1.895074112086703e+02 3 3.144813194259642e+02 -1.941101430078122e+02 -7.073026664887073e+00 -2.473251401357733e+02 4 6.229989536803572e+02 -1.013959166672914e+02 -4.324625838797200e+02 4.368325513444433e+02 - ME 3.163472493443465e-05 + ME 1.140145509231641e-04 Event 206 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1652,7 +1652,7 @@ Event 206 Batch 0 2 5.487698581700869e+02 -4.771827558939671e+02 -2.639484985605369e+02 6.145050708573941e+01 3 4.357856725513919e+02 1.877155863290790e+02 1.701172104948722e+02 3.545872893148349e+02 4 5.154444692785200e+02 2.894671695648880e+02 9.383128806566407e+01 -4.160377964005746e+02 - ME 3.341888001113221e-04 + ME 4.167786087259531e-03 Event 207 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1660,7 +1660,7 @@ Event 207 Batch 0 2 5.289473514933904e+02 -3.230637718239221e+02 -3.258094337294262e+02 2.631792409740627e+02 3 3.730441408755686e+02 -1.145152671243400e+02 -7.298530142052728e+01 -3.474497523579300e+02 4 5.980085076310412e+02 4.375790389482623e+02 3.987947351499535e+02 8.427051138386733e+01 - ME 3.789028948405571e-05 + ME 1.161501350367753e-04 Event 208 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1668,7 +1668,7 @@ Event 208 Batch 0 2 3.144460531270953e+02 3.105028133645123e+02 -3.495125011961062e+01 3.525242310830974e+01 3 7.230517599976935e+02 -6.554206809343713e+02 2.220922910679198e+02 2.095294558946058e+02 4 4.625021868752117e+02 3.449178675698588e+02 -1.871410409483092e+02 -2.447818790029155e+02 - ME 2.941989209837521e-05 + ME 4.858457850437588e-04 Event 209 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1676,7 +1676,7 @@ Event 209 Batch 0 2 2.827014058170527e+02 -6.682954863774688e+01 -1.958656753088385e+02 -1.925890275057887e+02 3 5.969812148172332e+02 5.625717004655273e+02 1.060136244597389e+02 -1.692949027847388e+02 4 6.203173793657136e+02 -4.957421518277804e+02 8.985205084909943e+01 3.618839302905275e+02 - ME 2.261939336541961e-05 + ME 1.004351001266980e-04 Event 210 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1684,7 +1684,7 @@ Event 210 Batch 0 2 3.369223392964550e+02 -2.366581006943837e+02 8.850719545688517e+01 -2.228813191927023e+02 3 6.926279093100447e+02 9.835546321295956e+01 -1.581805884470998e+02 6.671120783270956e+02 4 4.704497513935005e+02 1.383026374814242e+02 6.967339299021461e+01 -4.442307591343933e+02 - ME 3.044010300440331e-03 + ME 5.974710408786874e-02 Event 211 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1692,7 +1692,7 @@ Event 211 Batch 0 2 5.754314663824422e+02 -1.965408456680789e+02 -5.399725108422632e+02 3.037689947684008e+01 3 6.656941886103589e+02 4.112771407945243e+02 5.114655840792436e+02 1.113679599883347e+02 4 2.588743450071987e+02 -2.147362951264454e+02 2.850692676301957e+01 -1.417448594651748e+02 - ME 1.754510489093768e-05 + ME 4.382347812376007e-04 Event 212 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1700,7 +1700,7 @@ Event 212 Batch 0 2 5.922157374848572e+02 8.073316194509509e+00 4.947261155542873e+02 -3.254233732830556e+02 3 3.635572903001510e+02 8.951663862813328e+01 4.011175755255380e+01 3.500738802669425e+02 4 5.442269722149914e+02 -9.758995482264278e+01 -5.348378731068407e+02 -2.465050698388706e+01 - ME 1.919214373141161e-04 + ME 3.041427876287276e-04 Event 213 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1708,7 +1708,7 @@ Event 213 Batch 0 2 7.434820262506830e+02 2.991548764052629e+02 2.111623598614188e+02 -6.470566753063675e+02 3 5.607612173038236e+02 -2.664197873565705e+02 -1.905271140771768e+02 4.551626726109781e+02 4 1.957567564454930e+02 -3.273508904869271e+01 -2.063524578424195e+01 1.918940026953895e+02 - ME 1.896082550340891e-04 + ME 1.827786070323022e-04 Event 214 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1716,7 +1716,7 @@ Event 214 Batch 0 2 5.400874280734793e+02 3.457358963402696e+02 2.445843697627679e+02 -3.351710101016577e+02 3 3.400793067879315e+02 1.482066942304564e+02 1.256466447865830e+02 2.791086371729012e+02 4 6.198332651385892e+02 -4.939425905707261e+02 -3.702310145493508e+02 5.606237292875651e+01 - ME 6.515553919952984e-05 + ME 1.356968066378560e-04 Event 215 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1724,7 +1724,7 @@ Event 215 Batch 0 2 3.916345321859864e+02 3.271767110560381e+02 -1.945589530122144e+02 9.208594000107233e+01 3 6.136750729169615e+02 -1.269585669220027e+02 2.644680756040779e+02 -5.390132228350478e+02 4 4.946903948970534e+02 -2.002181441340350e+02 -6.990912259186331e+01 4.469272828339764e+02 - ME 3.427926940877871e-05 + ME 6.207321332343461e-05 Event 216 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1732,7 +1732,7 @@ Event 216 Batch 0 2 3.767411090262154e+02 1.602503356822860e+02 2.758455349572533e+02 -2.004069210086422e+02 3 4.061922956351256e+02 3.340053729931861e+02 2.237650079776778e+02 5.798114391563544e+01 4 7.170665953386593e+02 -4.942557086754721e+02 -4.996105429349309e+02 1.424257770930068e+02 - ME 2.360785017217177e-04 + ME 1.232271832865728e-03 Event 217 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1740,7 +1740,7 @@ Event 217 Batch 0 2 6.474118977458852e+02 -5.378641111590873e+02 -3.279650037002520e+02 1.492759847325320e+02 3 5.088298200539713e+02 3.261878344469131e+02 1.555821256186315e+02 -3.581947579501665e+02 4 3.437582822001433e+02 2.116762767121744e+02 1.723828780816206e+02 2.089187732176345e+02 - ME 1.388331578224744e-05 + ME 3.357118960820415e-05 Event 218 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1748,7 +1748,7 @@ Event 218 Batch 0 2 6.658501161076259e+02 -6.577627036244854e+02 -3.020200479570956e+01 9.895676706252418e+01 3 2.516345839620714e+02 1.565221509782131e+02 -1.156477271957936e+02 1.595192254662914e+02 4 5.825152999303023e+02 5.012405526462722e+02 1.458497319915031e+02 -2.584759925288157e+02 - ME 1.036808356896783e-04 + ME 5.956187308313417e-04 Event 219 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1756,7 +1756,7 @@ Event 219 Batch 0 2 4.328556070633435e+02 6.122246558068494e+01 -1.687441385117925e+02 3.938796795879554e+02 3 6.500677455605621e+02 -3.703058656885360e+02 4.356876543064814e+02 -3.092537914719426e+02 4 4.170766473760945e+02 3.090834001078509e+02 -2.669435157946888e+02 -8.462588811601287e+01 - ME 9.046106878448173e-05 + ME 2.797067114354785e-04 Event 220 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1764,7 +1764,7 @@ Event 220 Batch 0 2 3.686297280598666e+02 -3.497113779929074e+02 -8.765282776369953e+01 7.685577594963354e+01 3 4.155522773953191e+02 -1.777404948015450e+02 -1.525848366500187e+02 3.432344379292750e+02 4 7.158179945448145e+02 5.274518727944524e+02 2.402376644137182e+02 -4.200902138789084e+02 - ME 1.676729229638681e-03 + ME 3.485410710153060e-03 Event 221 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1772,7 +1772,7 @@ Event 221 Batch 0 2 5.295220830718469e+02 3.654688468413813e+01 4.204675060608333e+02 3.197890523886257e+02 3 7.127556392876786e+02 -1.727486268095863e+02 -4.342549693537605e+02 -5.381460163035255e+02 4 2.577222776404743e+02 1.362017421254481e+02 1.378746329292729e+01 2.183569639148998e+02 - ME 2.031931825964470e-05 + ME 2.819264207321091e-05 Event 222 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1780,7 +1780,7 @@ Event 222 Batch 0 2 2.464305981122427e+02 -2.054199106396077e+02 6.127423271580306e+01 1.215572638876956e+02 3 6.926647117218595e+02 4.702892479611936e+02 3.872350261814336e+02 -3.296383785530530e+02 4 5.609046901658980e+02 -2.648693373215859e+02 -4.485092588972366e+02 2.080811146653574e+02 - ME 1.678695785515194e-05 + ME 6.319142394583372e-05 Event 223 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1788,7 +1788,7 @@ Event 223 Batch 0 2 2.463384302181125e+02 -1.209251938955738e+02 -2.140981972257043e+02 -1.488897673935926e+01 3 6.819620845265065e+02 -2.400891875757811e+02 5.819023806457059e+02 2.623339210620683e+02 4 5.716994852553812e+02 3.610143814713547e+02 -3.678041834200016e+02 -2.474449443227091e+02 - ME 4.810915220985587e-05 + ME 3.931927185620913e-04 Event 224 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1796,7 +1796,7 @@ Event 224 Batch 0 2 2.236851263016067e+02 -8.671871524968952e+01 1.717231909970332e+02 1.141317038679677e+02 3 5.308972974363861e+02 -3.715833295102001e+01 4.680039348616383e+02 2.478780257941054e+02 4 7.454175762620068e+02 1.238770482007099e+02 -6.397271258586715e+02 -3.620097296620728e+02 - ME 6.017706528853119e-02 + ME 8.708656265179471e-02 Event 225 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1804,7 +1804,7 @@ Event 225 Batch 0 2 5.094176014319268e+02 1.569347096242780e+02 -1.561291130928888e+00 -4.846394040251013e+02 3 7.252311334449815e+02 -3.845161955462210e+02 -4.374219820797174e+01 6.133466494377277e+02 4 2.653512651230916e+02 2.275814859219426e+02 4.530348933890067e+01 -1.287072454126262e+02 - ME 1.151501859389029e-04 + ME 3.974215742688118e-04 Event 226 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1812,7 +1812,7 @@ Event 226 Batch 0 2 6.863217264048350e+02 -2.391756120967483e+02 -6.171186323675804e+02 1.816511279850093e+02 3 5.332348374442744e+02 1.096335504493486e+02 4.112484130583279e+02 -3.212391931833643e+02 4 2.804434361508906e+02 1.295420616473995e+02 2.058702193092524e+02 1.395880651983551e+02 - ME 1.438206074993319e-05 + ME 3.797053871351767e-05 Event 227 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1820,7 +1820,7 @@ Event 227 Batch 0 2 7.243206345463230e+02 -5.280189925476210e+02 -1.406011303275692e+02 4.754657162080069e+02 3 5.487499634657129e+02 3.840442912861271e+02 -1.353123555187442e+01 -3.917312987222202e+02 4 2.269294019879644e+02 1.439747012614939e+02 1.541323658794436e+02 -8.373441748578679e+01 - ME 5.165623507180856e-05 + ME 2.903986554770466e-04 Event 228 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1828,7 +1828,7 @@ Event 228 Batch 0 2 2.119578664379945e+02 1.625437651479949e+01 -1.806612394559917e+02 1.096514885776142e+02 3 6.254097456672617e+02 -3.200704000326812e+01 3.158243706171928e+02 5.388579277416935e+02 4 6.626323878947439e+02 1.575266348846865e+01 -1.351631311612011e+02 -6.485094163193077e+02 - ME 3.800526374221887e-02 + ME 8.951233069377997e-01 Event 229 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1836,7 +1836,7 @@ Event 229 Batch 0 2 5.921227120343664e+02 -3.877491982207575e+02 4.449193714386763e+02 -4.802726626309342e+01 3 4.688278331283221e+02 3.470549659129084e+02 -1.517581364471262e+02 -2.762641051115459e+02 4 4.390494548373113e+02 4.069423230784909e+01 -2.931612349915501e+02 3.242913713746393e+02 - ME 1.250052930035257e-05 + ME 3.492131538818778e-05 Event 230 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1844,7 +1844,7 @@ Event 230 Batch 0 2 4.261952284727868e+02 2.153699775439378e+02 -1.171086083390750e+02 3.486312082969335e+02 3 3.540619701921573e+02 3.070144260847319e+01 1.307424531367546e+02 3.276029778648147e+02 4 7.197428013350559e+02 -2.460714201524109e+02 -1.363384479767965e+01 -6.762341861617483e+02 - ME 4.711214236813061e-02 + ME 3.186738302883428e-01 Event 231 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1852,7 +1852,7 @@ Event 231 Batch 0 2 4.205236024420392e+02 7.533931576750228e+01 -3.260217181731272e+02 -2.547036061581322e+02 3 5.397543491930860e+02 8.423195081267914e+01 -1.158376015978276e+02 5.204050211049134e+02 4 5.397220483648740e+02 -1.595712665801811e+02 4.418593197709548e+02 -2.657014149467809e+02 - ME 3.265984123744224e-04 + ME 5.532186388062512e-04 Event 232 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1860,7 +1860,7 @@ Event 232 Batch 0 2 4.295782852421121e+02 3.239064445356881e+02 9.240815775655221e-01 2.821724019337124e+02 3 7.183371274312143e+02 -6.155391061575082e+02 -1.955291718271078e+02 -3.144649112405858e+02 4 3.520845873266736e+02 2.916326616218201e+02 1.946050902495422e+02 3.229250930687335e+01 - ME 1.049779024540051e-05 + ME 6.730603828970119e-05 Event 233 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1868,7 +1868,7 @@ Event 233 Batch 0 2 3.640046126075324e+02 -2.220120664068515e+02 -1.165482463207536e+02 2.638683509799470e+02 3 4.682121509308883e+02 -1.009786196736112e+02 3.762431872847591e+02 2.597441061312976e+02 4 6.677832364615790e+02 3.229906860804628e+02 -2.596949409640055e+02 -5.236124571112447e+02 - ME 7.598357868514145e-04 + ME 5.385640989777132e-03 Event 234 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1876,7 +1876,7 @@ Event 234 Batch 0 2 8.690043548936441e+01 -2.607433849884744e+01 -7.258333015587984e+01 4.004341073848801e+01 3 6.785651905172676e+02 -3.574930335951373e+02 -4.725723606052789e+01 5.748184081539155e+02 4 7.345343739933678e+02 3.835673720939847e+02 1.198405662164078e+02 -6.148618188924036e+02 - ME 8.152211059226219e-02 + ME 1.962113644780599e-01 Event 235 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1884,7 +1884,7 @@ Event 235 Batch 0 2 3.000566282865331e+02 1.219146462304108e+01 -2.126850238006026e+02 2.113064812540423e+02 3 7.160981218147422e+02 2.575873756248088e+02 2.779062108697769e+02 -6.076293293985470e+02 4 4.838452498987246e+02 -2.697788402478500e+02 -6.522118706917435e+01 3.963228481445046e+02 - ME 2.498899672933017e-05 + ME 3.940402333844027e-05 Event 236 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1892,7 +1892,7 @@ Event 236 Batch 0 2 1.510518772182422e+02 -9.497518588910037e+01 1.467158067736534e+01 1.165380984781943e+02 3 6.955499852411461e+02 5.933480346078575e+02 3.495450158124774e+02 9.770452249822526e+01 4 6.533981375406115e+02 -4.983728487187572e+02 -3.642165964898426e+02 -2.142426209764196e+02 - ME 2.623118294900277e-04 + ME 1.121647028585911e-03 Event 237 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1900,7 +1900,7 @@ Event 237 Batch 0 2 2.173874152942701e+02 2.069918593916189e+02 -3.850229167793934e+01 -5.412237993169356e+01 3 7.305677895866185e+02 -6.701932224704495e+02 -2.421540700080861e+02 1.610333695687662e+02 4 5.520447951191120e+02 4.632013630788306e+02 2.806563616860255e+02 -1.069109896370727e+02 - ME 2.170005261464319e-05 + ME 1.822378225061386e-04 Event 238 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1908,7 +1908,7 @@ Event 238 Batch 0 2 6.349573912113930e+02 -3.336495545457479e+02 -4.785400196851591e+02 2.506956580500139e+02 3 5.768887318987100e+02 4.812119270965607e+02 2.334547330568691e+02 -2.161818165921041e+02 4 2.881538768898968e+02 -1.475623725508129e+02 2.450852866282900e+02 -3.451384145790988e+01 - ME 1.383744831772315e-05 + ME 9.810731053503000e-05 Event 239 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1916,7 +1916,7 @@ Event 239 Batch 0 2 5.349076725903783e+02 -5.331874414268931e+02 1.887721601290929e+01 -3.848403846142781e+01 3 3.658437465440003e+02 8.335465236419728e+01 1.670818061666301e+01 -3.558292926602242e+02 4 5.992485808656214e+02 4.498327890626960e+02 -3.558539662957234e+01 3.943133311216517e+02 - ME 2.560110521983184e-05 + ME 9.226736931333760e-05 Event 240 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1924,7 +1924,7 @@ Event 240 Batch 0 2 2.870582387324442e+02 1.830793600232297e+02 -1.562409872742485e+02 1.564389154054251e+02 3 6.007192677438852e+02 3.433229388031108e+02 4.688113613010560e+02 -1.523446941819630e+02 4 6.122224935236703e+02 -5.264022988263405e+02 -3.125703740268075e+02 -4.094221223461989e+00 - ME 3.548113744927254e-05 + ME 1.424405912705748e-04 Event 241 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1932,7 +1932,7 @@ Event 241 Batch 0 2 7.424696267657401e+02 4.823783107714221e+02 2.498315161211407e+02 5.061190823507636e+02 3 2.455726236162737e+02 -1.827879695947952e+02 -1.199757723946156e+02 -1.118046764652876e+02 4 5.119577496179861e+02 -2.995903411766270e+02 -1.298557437265251e+02 -3.943144058854759e+02 - ME 2.366266620918590e-04 + ME 2.705973755259623e-03 Event 242 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1940,7 +1940,7 @@ Event 242 Batch 0 2 7.249130370348905e+02 1.676828147928013e+02 6.059046362201677e+02 -3.609168279440810e+02 3 6.240672718074169e+02 -4.529413961306761e+01 -5.490982345027019e+02 2.930862151720549e+02 4 1.510196911576933e+02 -1.223886751797337e+02 -5.680640171746593e+01 6.783061277202641e+01 - ME 1.668420503127583e-05 + ME 4.587322306592483e-05 Event 243 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1948,7 +1948,7 @@ Event 243 Batch 0 2 4.655090712555229e+02 2.096323612054770e+02 2.113490506800235e+02 3.578890153850057e+02 3 5.764797256412519e+02 6.697224883641857e+01 -5.382210340689440e+02 -1.953502251008744e+02 4 4.580112031032257e+02 -2.766046100418949e+02 3.268719833889206e+02 -1.625387902841314e+02 - ME 3.999521919602606e-05 + ME 2.309042201876567e-04 Event 244 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1956,7 +1956,7 @@ Event 244 Batch 0 2 5.237109195354749e+02 1.305098338947756e+02 -4.868141165486322e+02 -1.423106687020528e+02 3 5.804450110242352e+02 -4.045654344879671e+02 2.643676733537771e+02 3.214855413949400e+02 4 3.958440694402901e+02 2.740556005931916e+02 2.224464431948551e+02 -1.791748726928872e+02 - ME 2.634847163425152e-05 + ME 2.644202232750943e-04 Event 245 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1964,7 +1964,7 @@ Event 245 Batch 0 2 2.629169357520612e+02 2.457511487795889e+02 -4.402365929491729e+01 -8.242333044139184e+01 3 6.931386101565748e+02 -5.195573187661655e+02 4.004017488088275e+02 -2.240084037645317e+02 4 5.439444540913644e+02 2.738061699865766e+02 -3.563780895139104e+02 3.064317342059234e+02 - ME 1.052590061693975e-05 + ME 4.288053786412853e-05 Event 246 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1972,7 +1972,7 @@ Event 246 Batch 0 2 6.300937687157445e+02 -5.459948028041557e+02 3.085954426748102e+02 6.063567799240802e+01 3 1.673910408536145e+02 -3.546130270298926e+01 7.662824936562275e+01 -1.445350060290698e+02 4 7.025151904306430e+02 5.814561055071442e+02 -3.852236920404341e+02 8.389932803666261e+01 - ME 1.915763997923398e-04 + ME 6.282756509154168e-04 Event 247 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1980,7 +1980,7 @@ Event 247 Batch 0 2 2.577847506495701e+02 2.418237207037818e+02 -8.449121421856779e+01 2.890502538162603e+01 3 5.130193185035739e+02 4.381905811488919e+02 1.366496386102691e+02 2.291390669832418e+02 4 7.291959308468561e+02 -6.800143018526737e+02 -5.215842439170134e+01 -2.580440923648679e+02 - ME 1.831864018495938e-03 + ME 4.005872724472581e-03 Event 248 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1988,7 +1988,7 @@ Event 248 Batch 0 2 7.033207479153643e+02 -5.040306065309413e+02 -2.020637997366072e+02 4.469714117975369e+02 3 1.758360012551320e+02 -1.471306652922549e+01 -4.035460943683606e+00 -1.751728862172264e+02 4 6.208432508295037e+02 5.187436730601667e+02 2.060992606802909e+02 -2.717985255803103e+02 - ME 1.512538512828554e-04 + ME 5.592865021063005e-04 Event 249 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1996,7 +1996,7 @@ Event 249 Batch 0 2 3.018816177222694e+02 5.523075638651412e+01 1.752331212074551e+02 2.395316845419020e+02 3 6.597415560701297e+02 6.315352823685419e+01 -6.561001191322722e+02 -2.834054254405022e+01 4 5.383768262076012e+02 -1.183842846233684e+02 4.808669979248172e+02 -2.111911419978518e+02 - ME 9.225490912808109e-05 + ME 4.868100986861644e-04 Event 250 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2004,7 +2004,7 @@ Event 250 Batch 0 2 2.166381935101301e+02 -1.289072913913530e+02 -1.189615590004073e+02 -1.271344351215279e+02 3 6.815426093761062e+02 -2.511966318704653e+02 5.323234433390903e+02 3.435583388650892e+02 4 6.018191971137635e+02 3.801039232618182e+02 -4.133618843386827e+02 -2.164239037435611e+02 - ME 6.586594805989363e-05 + ME 3.468666532553966e-04 Event 251 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2012,7 +2012,7 @@ Event 251 Batch 0 2 6.676961532387151e+02 -3.991265595084280e+01 -4.419965947723094e+02 4.988628500443886e+02 3 7.150412702460949e+02 3.921851524844908e+01 5.505653759000154e+02 -4.545587894617490e+02 4 1.172625765151894e+02 6.941407023942340e-01 -1.085687811277060e+02 -4.430406058263954e+01 - ME 4.930952510857648e-05 + ME 5.615833562023813e-04 Event 252 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2020,7 +2020,7 @@ Event 252 Batch 0 2 2.112668789066533e+02 -1.147554660376938e+02 3.364589711187055e+01 -1.741632301749357e+02 3 7.393007599584276e+02 2.529046383258835e+02 -3.593132473314827e+02 5.945576909606565e+02 4 5.494323611349191e+02 -1.381491722881897e+02 3.256673502196121e+02 -4.203944607857206e+02 - ME 3.541023077707110e-04 + ME 2.709805393201018e-03 Event 253 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2028,7 +2028,7 @@ Event 253 Batch 0 2 7.299659304470913e+01 -4.405884533650594e+01 -5.451291667290519e+01 2.038780663930336e+01 3 7.253475305576840e+02 3.245698054519170e+02 -1.402290280555607e+02 -6.333397991328418e+02 4 7.016558763976062e+02 -2.805109601154107e+02 1.947419447284657e+02 6.129519924935382e+02 - ME 3.511004874943257e-04 + ME 6.484723438037138e-04 Event 254 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2036,7 +2036,7 @@ Event 254 Batch 0 2 1.982520535096858e+02 -6.164633378269741e+01 1.773450413210087e+02 -6.365801262063783e+01 3 7.183815394471145e+02 -1.984891252513599e+02 -6.893152145826987e+02 -3.896971029099802e+01 4 5.833664070431995e+02 2.601354590340572e+02 5.119701732616900e+02 1.026277229116358e+02 - ME 1.539519794804785e-05 + ME 9.210498573936143e-05 Event 255 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2044,7 +2044,7 @@ Event 255 Batch 0 2 5.347080663542586e+02 -5.063606624096446e+02 1.592577719822621e+02 6.440929941880935e+01 3 2.475406015289465e+02 -1.856063881081879e+02 3.468010668896048e+00 -1.637516137347836e+02 4 7.177513321167953e+02 6.919670505178326e+02 -1.627257826511582e+02 9.934231431597431e+01 - ME 3.137689362725149e-04 + ME 1.305481727349711e-03 Event 0 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2052,7 +2052,7 @@ Event 0 Batch 1 2 5.775677821222389e+02 4.314431287975208e+02 -2.652567205762379e+02 -2.776332864556192e+02 3 6.023469575940325e+02 -3.228069847179709e+02 5.005558924007591e+02 8.978477890465942e+01 4 3.200852602837275e+02 -1.086361440795499e+02 -2.352991718245218e+02 1.878485075509607e+02 - ME 7.533072458757011e-06 + ME 2.846168667868940e-05 Event 1 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2060,7 +2060,7 @@ Event 1 Batch 1 2 7.241206267812560e+02 3.541578305635416e+02 -4.894807402105655e+02 3.991635230623179e+02 3 7.375567605136832e+02 -3.903081173548693e+02 4.920451519627784e+02 -3.867054653560791e+02 4 3.832261270506111e+01 3.615028679132773e+01 -2.564411752212873e+00 -1.245805770623896e+01 - ME 7.043932941624384e-05 + ME 1.002871021831580e-03 Event 2 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2068,7 +2068,7 @@ Event 2 Batch 1 2 4.849204091734790e+02 2.108660079931152e+02 4.054727376659824e+02 1.620962335024329e+02 3 2.728468517759738e+02 4.961449545460115e+01 2.005017763154939e+02 1.782774356422519e+02 4 7.422327390505470e+02 -2.604805034477164e+02 -6.059745139814763e+02 -3.403736691446848e+02 - ME 1.721146206228212e-02 + ME 2.729395913593408e-02 Event 3 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2076,7 +2076,7 @@ Event 3 Batch 1 2 4.264155576764489e+02 -4.170952165204416e+02 -7.054834331799705e+01 5.370977042744418e+01 3 7.108631972082329e+02 6.832597695609467e+02 -1.727180704166534e+02 -9.301097030017993e+01 4 3.627212451153183e+02 -2.661645530405051e+02 2.432664137346505e+02 3.930119987273574e+01 - ME 5.739226791327231e-06 + ME 5.466137525204964e-05 Event 4 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2084,7 +2084,7 @@ Event 4 Batch 1 2 7.183269968238449e+02 -3.584978055671311e+02 -5.048824553914336e+02 -3.640971079361008e+02 3 7.387431276480253e+02 4.013538934928407e+02 5.036810263913359e+02 3.618865629982628e+02 4 4.292987552812846e+01 -4.285608792570924e+01 1.201429000097643e+00 2.210544937839338e+00 - ME 5.884725836744927e-05 + ME 3.145606575501715e-04 Event 5 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2092,7 +2092,7 @@ Event 5 Batch 1 2 4.529780005473896e+02 -8.443182436392424e+01 4.445408460134587e+02 -2.106590230986445e+01 3 4.683757780543924e+02 -6.076819021151039e+01 -1.335482427838441e+02 -4.448010379662153e+02 4 5.786462213982179e+02 1.452000145754347e+02 -3.109926032296145e+02 4.658669402760799e+02 - ME 2.851579396246287e-05 + ME 8.481958952475706e-05 Event 6 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2100,7 +2100,7 @@ Event 6 Batch 1 2 6.238848262005389e+02 -1.065131260140052e+02 -4.741487807795934e+02 -3.912418229627633e+02 3 1.729069432107234e+02 -1.460869767542721e+02 -8.199113358821990e+01 4.281191710484079e+01 4 7.032082305887380e+02 2.526001027682771e+02 5.561399143678132e+02 3.484299058579224e+02 - ME 1.468701510222534e-04 + ME 4.868510537699180e-04 Event 7 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2108,7 +2108,7 @@ Event 7 Batch 1 2 6.977203086376783e+02 -6.126072843634399e+02 -1.744636661244187e+02 2.847602033865263e+02 3 1.614193396272251e+02 -4.571584237043670e+00 8.497734613495712e+01 -1.371646983269120e+02 4 6.408603517350967e+02 6.171788686004836e+02 8.948631998946138e+01 -1.475955050596143e+02 - ME 9.523334397108766e-05 + ME 3.540796080305845e-04 Event 8 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2116,7 +2116,7 @@ Event 8 Batch 1 2 6.871091945484288e+02 4.059708628308462e+02 2.886614153103366e+02 4.732666173272762e+02 3 5.653302025665631e+02 -2.838835484844413e+02 -7.353399035097291e+01 -4.833229987253825e+02 4 2.475606028850081e+02 -1.220873143464048e+02 -2.151274249593637e+02 1.005638139810634e+01 - ME 3.726341895116938e-05 + ME 8.785466054587446e-05 Event 9 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2124,7 +2124,7 @@ Event 9 Batch 1 2 1.618579955503452e+02 1.385215220188489e+01 1.601201234527701e+02 -1.917484467788566e+01 3 7.196660585644588e+02 -4.527189715496824e+02 -4.214090439733052e+02 3.679391067910628e+02 4 6.184759458851959e+02 4.388668193477974e+02 2.612889205205349e+02 -3.487642621131772e+02 - ME 1.276556148007894e-04 + ME 1.054640649369016e-03 Event 10 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2132,7 +2132,7 @@ Event 10 Batch 1 2 7.832785200561162e+01 1.027681340851886e+01 -7.242726264265977e+01 -2.799877018853974e+01 3 7.448007230566494e+02 2.520540107528716e+02 6.813719334665398e+02 1.641011304445167e+02 4 6.768714249377393e+02 -2.623308241613905e+02 -6.089446708238800e+02 -1.361023602559769e+02 - ME 1.087112534498832e-04 + ME 5.876642887714617e-04 Event 11 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2140,7 +2140,7 @@ Event 11 Batch 1 2 5.478627446486676e+02 2.070882322301630e+02 -4.708081692757452e+02 1.887000762823861e+02 3 6.997827604382593e+02 -4.209013422316021e+02 4.569873120768409e+02 -3.220257264800591e+02 4 2.523544949130733e+02 2.138131100014392e+02 1.382085719890436e+01 1.333256501976729e+02 - ME 7.092902148917371e-06 + ME 2.703695959900953e-05 Event 12 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2148,7 +2148,7 @@ Event 12 Batch 1 2 5.802868936311938e+02 -4.467002255894120e+01 5.211262762381961e+02 -2.513262266832405e+02 3 5.208038834706859e+02 2.151797013176283e+01 -4.993650129388666e+02 -1.463155694111945e+02 4 3.989092228981199e+02 2.315205242717860e+01 -2.176126329932955e+01 3.976417960944350e+02 - ME 4.980323856672599e-04 + ME 5.046437564325244e-04 Event 13 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2156,7 +2156,7 @@ Event 13 Batch 1 2 5.774880087360024e+02 1.576445054854711e+02 5.481077151088400e+02 -9.065617884226717e+01 3 5.915098138161557e+02 -3.018001633277128e+02 -3.808656371901898e+02 3.372564123391869e+02 4 3.310021774478421e+02 1.441556578422419e+02 -1.672420779186502e+02 -2.466002334969197e+02 - ME 5.587942683639647e-05 + ME 1.505341700965184e-03 Event 14 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2164,7 +2164,7 @@ Event 14 Batch 1 2 2.531797527967491e+02 -8.400833666640553e+01 -2.384535242035555e+02 -1.350938161690895e+01 3 5.261064571264828e+02 -1.751971590790252e+02 -3.334570051994592e+02 3.672878780523887e+02 4 7.207137900767681e+02 2.592054957454308e+02 5.719105294030147e+02 -3.537784964354798e+02 - ME 1.659114310450813e-03 + ME 3.373121845959189e-03 Event 15 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2172,7 +2172,7 @@ Event 15 Batch 1 2 4.605848765362425e+02 3.563504404614684e+02 1.735853700506503e+02 2.345653669687875e+02 3 4.216445088607453e+02 1.370719005416187e+02 -3.933730877164850e+02 6.521502736890037e+01 4 6.177706146030118e+02 -4.934223410030871e+02 2.197877176658347e+02 -2.997803943376878e+02 - ME 9.110622752737525e-05 + ME 4.613631402771334e-04 Event 16 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2180,7 +2180,7 @@ Event 16 Batch 1 2 4.972484926572777e+02 -1.474122335888775e+02 -4.748950276275915e+02 -6.399787981958280e-01 3 5.072511849723048e+02 4.846784046822065e+02 1.224000792205880e+02 -8.607455661990267e+01 4 4.955003223704169e+02 -3.372661710933285e+02 3.524949484070036e+02 8.671453541809866e+01 - ME 1.035537635543116e-05 + ME 5.856804747367533e-05 Event 17 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2188,7 +2188,7 @@ Event 17 Batch 1 2 3.182636773520259e+02 -9.176062613973060e+01 -1.890905041641619e+02 2.389906630959087e+02 3 6.376303990615819e+02 -4.240378519397394e+02 2.706855745366566e+02 -3.917827786765570e+02 4 5.441059235863918e+02 5.157984780794702e+02 -8.159507037249479e+01 1.527921155806483e+02 - ME 2.964570775197734e-05 + ME 7.445984612273079e-05 Event 18 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2196,7 +2196,7 @@ Event 18 Batch 1 2 5.532560008158404e+02 -4.148613005881325e+02 1.689647846464811e+02 -3.247047971041214e+02 3 3.650144721835348e+02 -1.597348634907620e+02 -2.160675866909894e+02 2.470529017650751e+02 4 5.817295270006244e+02 5.745961640788944e+02 4.710280204450838e+01 7.765189533904635e+01 - ME 3.148325734685632e-05 + ME 9.119298978738387e-05 Event 19 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2204,7 +2204,7 @@ Event 19 Batch 1 2 3.263687475619531e+02 -1.904667433734991e+02 2.390747946355329e+02 -1.143775398573919e+02 3 7.331345945903582e+02 2.597391859223821e+02 -6.739404183465077e+02 1.258022320965774e+02 4 4.404966578476884e+02 -6.927244254888298e+01 4.348656237109747e+02 -1.142469223918529e+01 - ME 9.665339952809457e-06 + ME 8.793129888044293e-05 Event 20 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2212,7 +2212,7 @@ Event 20 Batch 1 2 9.588718605412237e+01 4.259536217794532e+01 8.056474827260676e+01 -2.982128277051557e+01 3 7.250265356668370e+02 3.120913743414047e+02 -4.446787057645155e+02 4.801284204484703e+02 4 6.790862782790414e+02 -3.546867365193502e+02 3.641139574919093e+02 -4.503071376779550e+02 - ME 6.402422614019696e-04 + ME 3.686389281265799e-03 Event 21 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2220,7 +2220,7 @@ Event 21 Batch 1 2 1.825278201605081e+02 -1.533737674675502e+02 8.574830442242751e+01 4.939757963742074e+01 3 7.183016103669913e+02 1.713205736990392e+02 -6.275703015775031e+02 -3.045685162014731e+02 4 5.991705694725008e+02 -1.794680623148897e+01 5.418219971550755e+02 2.551709365640523e+02 - ME 1.806434468406198e-05 + ME 7.470861105912214e-05 Event 22 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2228,7 +2228,7 @@ Event 22 Batch 1 2 2.349542451120770e+02 9.235159917618290e+01 -2.156570331301489e+02 -1.291214495308476e+01 3 7.360601907662837e+02 -2.182033070539752e+02 6.568866822530020e+02 -2.503433799808774e+02 4 5.289855641216395e+02 1.258517078777923e+02 -4.412296491228531e+02 2.632555249339621e+02 - ME 8.007442232312076e-06 + ME 3.893602972207037e-05 Event 23 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2236,7 +2236,7 @@ Event 23 Batch 1 2 2.350908908124364e+02 -7.377772511691019e+00 -2.298431804723787e+02 -4.884063683135331e+01 3 6.797114625392685e+02 -5.485955088721076e+02 3.603976926464840e+02 1.765336882516069e+02 4 5.851976466482949e+02 5.559732813837987e+02 -1.305545121741055e+02 -1.276930514202538e+02 - ME 3.185713653214173e-05 + ME 2.057468423101862e-04 Event 24 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2244,7 +2244,7 @@ Event 24 Batch 1 2 4.355364173804401e+02 2.538053291625626e+02 -2.665393838801487e+02 -2.328767540869265e+02 3 4.093863144993796e+02 -1.953012891316528e+02 -3.573484670764558e+02 4.191221827828568e+01 4 6.550772681201798e+02 -5.850404003090968e+01 6.238878509566048e+02 1.909645358086408e+02 - ME 3.721637657688893e-05 + ME 1.895168702655672e-04 Event 25 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2252,7 +2252,7 @@ Event 25 Batch 1 2 7.365386968907909e+02 3.875876454009267e+02 3.151568854896985e+02 5.412404333367775e+02 3 5.208510884285567e+02 -2.430585576296288e+02 -1.518636440371932e+02 -4.349089876054084e+02 4 2.426102146806534e+02 -1.445290877712977e+02 -1.632932414525050e+02 -1.063314457313693e+02 - ME 7.982561935336398e-05 + ME 3.717867207603688e-04 Event 26 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2260,7 +2260,7 @@ Event 26 Batch 1 2 7.198867014174701e+02 5.189601929589824e+02 4.797253921416957e+02 -1.370428003807496e+02 3 3.889101953712928e+02 -1.847394503243419e+02 -2.837815501141775e+02 1.912864537085460e+02 4 3.912031032112371e+02 -3.342207426346404e+02 -1.959438420275183e+02 -5.424365332779646e+01 - ME 1.928349098758061e-05 + ME 1.222836766708484e-04 Event 27 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2268,7 +2268,7 @@ Event 27 Batch 1 2 6.732032222628646e+02 5.870808395006010e+02 -9.126179303429218e+01 3.165595544104447e+02 3 1.177373967283342e+02 7.847176641415683e+01 5.304379211899001e+00 -8.761358356661104e+01 4 7.090593810088013e+02 -6.655526059147578e+02 8.595741382239324e+01 -2.289459708438336e+02 - ME 6.795383824785976e-04 + ME 1.603290018002586e-03 Event 28 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2276,7 +2276,7 @@ Event 28 Batch 1 2 6.475300414228806e+02 3.136396845517189e+02 3.816259196370642e+02 -4.186728559156669e+02 3 7.290923529036073e+02 -2.791764769994177e+02 -4.112865540505715e+02 5.333662195995520e+02 4 1.233776056735125e+02 -3.446320755230100e+01 2.966063441350738e+01 -1.146933636838856e+02 - ME 6.311296815400830e-04 + ME 5.037107889244314e-02 Event 29 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2284,7 +2284,7 @@ Event 29 Batch 1 2 3.156754590345620e+02 -2.870540678871016e+02 4.159516713841874e+01 -1.245825012466667e+02 3 4.770060274033896e+02 -2.355061130652810e+02 -3.231858413754910e+02 -2.600433287405434e+02 4 7.073185135620483e+02 5.225601809523826e+02 2.815906742370723e+02 3.846258299872100e+02 - ME 1.321807869823317e-04 + ME 7.956699356695784e-04 Event 30 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2292,7 +2292,7 @@ Event 30 Batch 1 2 6.091290614220995e+02 1.543004089904798e+02 4.216196287493766e+00 -5.892468251447810e+02 3 2.079357839022729e+02 2.034647466922837e+02 4.185675980476618e+01 9.348729279626889e+00 4 6.829351546756266e+02 -3.577651556827627e+02 -4.607295609226003e+01 5.798980958651539e+02 - ME 1.448382779935031e-04 + ME 3.902231064020147e-04 Event 31 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2300,7 +2300,7 @@ Event 31 Batch 1 2 6.901710072855793e+02 1.433309098684656e+01 6.447948515477649e+02 -2.457034416076623e+02 3 5.898919363861644e+02 1.120085307876391e+02 -4.815950471622465e+02 3.217029626736535e+02 4 2.199370563282564e+02 -1.263416217744856e+02 -1.631998043855182e+02 -7.599952106599136e+01 - ME 2.376400497996635e-05 + ME 2.415465849322543e-04 Event 32 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2308,7 +2308,7 @@ Event 32 Batch 1 2 6.144498311923271e+02 5.832947925341469e+02 -1.925283703230110e+02 1.576726595169125e+01 3 2.478450424037004e+02 5.004284035329792e+01 2.389954177960992e+02 4.247433867565734e+01 4 6.377051264039724e+02 -6.333376328874447e+02 -4.646704747308818e+01 -5.824160462734862e+01 - ME 5.390650629646604e-05 + ME 2.160220890176678e-04 Event 33 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2316,7 +2316,7 @@ Event 33 Batch 1 2 6.134536717469736e+02 -1.625429495269566e+02 -1.853973484494194e+02 5.617232593785355e+02 3 5.361644687950269e+02 -3.755831293394986e+01 -9.992652347025609e+01 -5.254297294928764e+02 4 3.503818594579993e+02 2.001012624609065e+02 2.853238719196754e+02 -3.629352988565911e+01 - ME 1.005452860076771e-04 + ME 1.224582992507153e-04 Event 34 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2324,7 +2324,7 @@ Event 34 Batch 1 2 3.840838099420727e+02 -2.442269925519278e+02 -3.827314394217582e+01 -2.939535943332559e+02 3 6.022630974514659e+02 3.956891925431131e+01 5.086724982658299e+02 3.200116071158652e+02 4 5.136530926064613e+02 2.046580732976165e+02 -4.703993543236541e+02 -2.605801278260916e+01 - ME 2.313941306740064e-05 + ME 9.608243105510499e-05 Event 35 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2332,7 +2332,7 @@ Event 35 Batch 1 2 3.454350783663418e+02 -3.439607925797615e+02 2.363778141880094e+01 -2.139209721976717e+01 3 6.705698302143294e+02 5.215327591153251e+02 4.060443141865528e+02 -1.131171661597076e+02 4 4.839950914193290e+02 -1.775719665355635e+02 -4.296820956053536e+02 1.345092633794747e+02 - ME 7.982017052260048e-06 + ME 4.862206803317224e-05 Event 36 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2340,7 +2340,7 @@ Event 36 Batch 1 2 7.098652154429357e+02 2.489290984574327e+02 -1.674080692141068e+02 -6.433641786725617e+02 3 6.178479130357197e+02 -1.435715807033598e+02 2.588953561477193e+02 5.423065917191846e+02 4 1.722868715213448e+02 -1.053575177540730e+02 -9.148728693361247e+01 1.010575869533772e+02 - ME 5.562249548714765e-05 + ME 6.680529568232270e-05 Event 37 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2348,7 +2348,7 @@ Event 37 Batch 1 2 6.906872786346031e+02 1.495946561071237e+02 1.712833879510068e+02 6.521750966909805e+02 3 3.682276595245592e+02 -1.358558710218083e+02 1.194309698061993e+02 -3.207351477449753e+02 4 4.410850618408380e+02 -1.373878508531530e+01 -2.907143577572061e+02 -3.314399489460051e+02 - ME 5.542438863722841e-04 + ME 2.014943348935539e-03 Event 38 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2356,7 +2356,7 @@ Event 38 Batch 1 2 6.131720166645955e+02 -5.222102655174087e+02 6.340623138461877e+00 3.213038392347352e+02 3 4.540063357567760e+02 2.932429176443922e+02 -3.207297067242505e+02 -1.313879727496968e+02 4 4.328216475786277e+02 2.289673478730168e+02 3.143890835857886e+02 -1.899158664850380e+02 - ME 3.150821423911933e-05 + ME 2.589645049118943e-04 Event 39 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2364,7 +2364,7 @@ Event 39 Batch 1 2 2.929747896182304e+02 2.510117592312210e+02 -1.378648144805472e+02 6.181113983529403e+01 3 6.287164314722783e+02 3.864928360025993e+01 6.254120614625328e+02 5.148142827864510e+01 4 5.783087789094894e+02 -2.896610428314818e+02 -4.875472469819856e+02 -1.132925681139394e+02 - ME 2.723120294663496e-05 + ME 1.708238325115053e-04 Event 40 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2372,7 +2372,7 @@ Event 40 Batch 1 2 1.143487538112954e+02 -3.203572478439017e+01 1.022340126870988e+02 3.996944439980560e+01 3 7.361483923235807e+02 5.924235295921244e+02 -3.838567751530157e+02 -2.088128187524163e+02 4 6.495028538651248e+02 -5.603878048077345e+02 2.816227624659169e+02 1.688433743526105e+02 - ME 4.279185076498264e-05 + ME 2.026369815874481e-04 Event 41 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2380,7 +2380,7 @@ Event 41 Batch 1 2 6.384898508133350e+02 5.540399192408263e+02 -3.014826159773289e+02 -9.908223727147148e+01 3 3.510407251698805e+02 -1.719168197014114e+02 2.065966849440144e+02 -2.258140996521069e+02 4 5.104694240167846e+02 -3.821230995394149e+02 9.488593103331458e+01 3.248963369235784e+02 - ME 1.488395965626735e-05 + ME 4.455092331482675e-05 Event 42 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2388,7 +2388,7 @@ Event 42 Batch 1 2 3.291654598309212e+02 -1.090829060981258e+02 2.972891943885482e+02 -8.983292515941632e+01 3 6.884965239796815e+02 4.933628807557017e+02 -2.919492821202986e+02 3.812953554581829e+02 4 4.823380161893969e+02 -3.842799746575757e+02 -5.339912268249619e+00 -2.914624302987665e+02 - ME 5.767145017550451e-05 + ME 6.690811667999076e-04 Event 43 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2396,7 +2396,7 @@ Event 43 Batch 1 2 3.674173006007981e+02 2.791827424102563e+02 1.079644067383057e+02 2.130637369397045e+02 3 7.392205647816575e+02 -6.110484627794917e+02 -4.247874240022372e+01 -4.138385868609020e+02 4 3.933621346175442e+02 3.318657203692355e+02 -6.548566433808202e+01 2.007748499211975e+02 - ME 6.513986915725277e-06 + ME 2.734436884563990e-05 Event 44 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2404,7 +2404,7 @@ Event 44 Batch 1 2 2.081359682230012e+02 -1.082501549908087e+02 1.771964605001424e+02 1.427934167997762e+01 3 7.449563315308093e+02 5.092828751965591e+02 -5.388739609944279e+02 7.215083562608928e+01 4 5.469077002461893e+02 -4.010327202057504e+02 3.616775004942854e+02 -8.643017730606689e+01 - ME 1.838899544278803e-05 + ME 1.760644262839344e-04 Event 45 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2412,7 +2412,7 @@ Event 45 Batch 1 2 5.180982465404422e+02 4.470261481799612e+02 -3.368837017252423e+01 -2.597277606009553e+02 3 3.377595659674062e+02 -7.316527185649456e+01 2.454727770679006e+02 -2.201624016839132e+02 4 6.441421874921515e+02 -3.738608763234666e+02 -2.117844068953763e+02 4.798901622848684e+02 - ME 4.091340785269233e-05 + ME 1.645403798734011e-04 Event 46 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2420,7 +2420,7 @@ Event 46 Batch 1 2 6.296560291524888e+02 2.172411497655985e+02 5.821614514430422e+02 -1.017892054705761e+02 3 6.224001894826197e+02 1.405102091633609e+01 -6.218608257778048e+02 2.176414579432105e+01 4 2.479437813648912e+02 -2.312921706819346e+02 3.969937433476264e+01 8.002505967625511e+01 - ME 7.434320230190137e-06 + ME 4.041878897626609e-05 Event 47 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2428,7 +2428,7 @@ Event 47 Batch 1 2 5.458843469271557e+02 -1.019033861791133e+02 -1.559739004096151e+02 5.131058004898495e+02 3 2.573134207008558e+02 6.791700498899543e+01 -2.412204887508016e+02 5.839651284901167e+01 4 6.968022323719882e+02 3.398638119011781e+01 3.971943891604168e+02 -5.715023133388611e+02 - ME 4.005478861198618e-03 + ME 1.408798022766008e-02 Event 48 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2436,7 +2436,7 @@ Event 48 Batch 1 2 6.623920218006384e+02 -6.284562032939594e+02 -1.837527125398962e+02 -1.002044496053409e+02 3 1.251779629744606e+02 -7.502448682133647e+01 9.550779386908961e+01 3.031682869117444e+01 4 7.124300152249010e+02 7.034806901152959e+02 8.824491867080658e+01 6.988762091416655e+01 - ME 3.004757451335502e-04 + ME 8.682321044518227e-04 Event 49 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2444,7 +2444,7 @@ Event 49 Batch 1 2 2.397494808364364e+02 2.393958238941666e+02 -4.144666783354266e+00 -1.233996761053010e+01 3 6.782491241100328e+02 -3.516321535544010e+02 -2.705899831712919e+02 5.129890485673947e+02 4 5.820013950535307e+02 1.122363296602344e+02 2.747346499546462e+02 -5.006490809568646e+02 - ME 6.040872325723622e-04 + ME 9.041285542966720e-03 Event 50 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2452,7 +2452,7 @@ Event 50 Batch 1 2 4.764898792162554e+02 4.667163214316568e+02 5.900817880915086e+01 -7.573978570375913e+01 3 5.114228101321805e+02 -2.035689445851523e+02 -4.549677995197112e+02 -1.145306811477843e+02 4 5.120873106515638e+02 -2.631473768465044e+02 3.959596207105603e+02 1.902704668515434e+02 - ME 9.692662313613028e-06 + ME 5.157319121365441e-05 Event 51 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2460,7 +2460,7 @@ Event 51 Batch 1 2 4.678795643859630e+02 4.629737719234085e+02 5.365495313512251e+01 4.108186077915564e+01 3 6.311645871918951e+02 -4.500610707732837e+02 -4.345770688214700e+02 8.340587481742408e+01 4 4.009558484221416e+02 -1.291270115012470e+01 3.809221156863474e+02 -1.244877355965797e+02 - ME 1.293558494013996e-05 + ME 1.517985021504320e-04 Event 52 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2468,7 +2468,7 @@ Event 52 Batch 1 2 3.696230029266819e+02 2.516704934433110e+02 2.514038675722595e+02 1.003953305301004e+02 3 6.696174214325739e+02 -2.754912388418390e+01 -6.493999246431116e+02 -1.609604756850079e+02 4 4.607595756407442e+02 -2.241213695591271e+02 3.979960570708519e+02 6.056514515490756e+01 - ME 8.655753222194317e-06 + ME 5.727699238559496e-05 Event 53 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2476,7 +2476,7 @@ Event 53 Batch 1 2 7.284624742442375e+01 -4.271742504396477e+01 -2.683807109937144e+01 -5.255012179908527e+01 3 7.493542950735829e+02 3.356513586119740e+02 2.501807367708783e+02 6.215139772812374e+02 4 6.777994575019936e+02 -2.929339335680093e+02 -2.233426656715069e+02 -5.689638554821522e+02 - ME 2.372423861687152e-03 + ME 1.612275481129464e-02 Event 54 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2484,7 +2484,7 @@ Event 54 Batch 1 2 7.460259847230064e+02 2.055186857047568e+01 6.233229443227743e+02 4.093908861479223e+02 3 5.756222844616437e+02 2.606063779094539e+01 -4.696411468594731e+02 -3.318117699890848e+02 4 1.783517308153497e+02 -4.661250636142109e+01 -1.536817974633012e+02 -7.757911615883735e+01 - ME 5.046268590690708e-05 + ME 4.374243668355642e-04 Event 55 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2492,7 +2492,7 @@ Event 55 Batch 1 2 5.967428482894213e+02 -8.165820254184375e+01 5.098287527914877e+02 -2.991798919868828e+02 3 5.942526243827265e+02 5.606061544962815e+01 -2.905196430116550e+02 5.153559216750568e+02 4 3.090045273278509e+02 2.559758709221549e+01 -2.193091097798325e+02 -2.161760296881746e+02 - ME 1.849048785615045e-04 + ME 1.779007466146034e-03 Event 56 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2500,7 +2500,7 @@ Event 56 Batch 1 2 5.610874267302015e+02 -4.199055433713192e+02 3.580252469767042e+02 1.015694718309908e+02 3 6.303091265298390e+02 2.130872195586830e+02 -5.453843477211296e+02 -2.333224059286980e+02 4 3.086034467399593e+02 2.068183238126362e+02 1.873591007444254e+02 1.317529340977073e+02 - ME 7.213009143835112e-06 + ME 3.258989367177766e-05 Event 57 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2508,7 +2508,7 @@ Event 57 Batch 1 2 6.552053965855981e+02 4.516249927537604e+02 7.110694105335197e+00 4.746350341729917e+02 3 6.035190443408458e+02 -3.717228873476765e+02 2.148772607224587e+02 -4.241286299324850e+02 4 2.412755590735562e+02 -7.990210540608396e+01 -2.219879548277939e+02 -5.050640424050685e+01 - ME 3.752873989265266e-05 + ME 1.623545585873121e-04 Event 58 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2516,7 +2516,7 @@ Event 58 Batch 1 2 2.959982971085279e+02 1.850007048157144e+02 -2.304987961744356e+02 1.612563397119956e+01 3 7.018897389129390e+02 -3.764226030262936e+02 4.376344751014918e+02 3.992884868423144e+02 4 5.021119639785326e+02 1.914218982105791e+02 -2.071356789270567e+02 -4.154141208135139e+02 - ME 1.901193343270815e-04 + ME 4.558573859477246e-03 Event 59 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2524,7 +2524,7 @@ Event 59 Batch 1 2 5.521089721327345e+02 1.223876815062619e+02 -3.629066091228882e+01 -5.371485459866160e+02 3 4.098988410471214e+02 -5.841964900319319e+01 -3.626461945087767e+02 1.819119075553315e+02 4 5.379921868201441e+02 -6.396803250306872e+01 3.989368554210655e+02 3.552366384312845e+02 - ME 1.780280399801712e-05 + ME 5.148841296796537e-05 Event 60 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2532,7 +2532,7 @@ Event 60 Batch 1 2 7.143828168925960e+02 -4.584044193456332e+02 -2.419772079280938e+02 -4.915844060170314e+02 3 1.284110307517517e+02 8.324300347118127e+01 -7.889851197070540e+01 5.774963203893758e+01 4 6.572061523556514e+02 3.751614158744520e+02 3.208757198987992e+02 4.338347739780938e+02 - ME 7.144001898958308e-05 + ME 1.673517837789511e-04 Event 61 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2540,7 +2540,7 @@ Event 61 Batch 1 2 4.394390210968651e+02 -2.137451655543886e+02 -3.779414621253704e+02 -6.767502250635177e+01 3 4.431311911324728e+02 3.845666395406355e+02 -2.150363068358313e+02 4.725610065709574e+01 4 6.174297877706618e+02 -1.708214739862469e+02 5.929777689612018e+02 2.041892184925626e+01 - ME 2.870354731125455e-05 + ME 1.368591177943825e-04 Event 62 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2548,7 +2548,7 @@ Event 62 Batch 1 2 7.301725729481176e+02 4.281927891852710e+02 5.652737593150771e+02 -1.739784429324868e+02 3 7.567373964415995e+01 2.589885732647599e+01 -5.696550981957816e+01 4.255225906941358e+01 4 6.941536874077224e+02 -4.540916465117469e+02 -5.083082494954988e+02 1.314261838630732e+02 - ME 2.379197431250548e-04 + ME 8.513592598060080e-04 Event 63 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2556,7 +2556,7 @@ Event 63 Batch 1 2 4.361152320236988e+02 -3.738769057978321e+02 1.427754799584550e+02 -1.732850750548248e+02 3 5.817148313055657e+02 5.081993893256957e+02 2.829214478037172e+02 -8.998890070513914e+00 4 4.821699366707353e+02 -1.343224835278637e+02 -4.256969277621721e+02 1.822839651253387e+02 - ME 8.350404272725701e-06 + ME 4.544766189571194e-05 Event 64 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2564,7 +2564,7 @@ Event 64 Batch 1 2 6.097675704107204e+02 3.288514690970509e+02 4.971291587853200e+02 -1.285916042465611e+02 3 5.709532610348123e+02 -6.501292612520263e+01 -4.768258747557200e+02 3.072426254385416e+02 4 3.192791685544673e+02 -2.638385429718484e+02 -2.030328402960006e+01 -1.786510211919805e+02 - ME 3.000969253297957e-05 + ME 4.598138986874043e-04 Event 65 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2572,7 +2572,7 @@ Event 65 Batch 1 2 6.258641293880484e+02 3.743515439843765e+02 -1.622018320411498e+02 -4.746128903155367e+02 3 7.438702198751357e+02 -4.029113627030089e+02 2.325939036896868e+02 5.804355380128616e+02 4 1.302656507368158e+02 2.855981871863233e+01 -7.039207164853700e+01 -1.058226476973252e+02 - ME 3.162776051460646e-04 + ME 6.427333508548903e-03 Event 66 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2580,7 +2580,7 @@ Event 66 Batch 1 2 3.731957242404369e+02 1.596860493342637e+01 -3.714568973276624e+02 3.224632809376674e+01 3 6.079923612940432e+02 4.451199598539357e+02 3.189341902600864e+02 -2.642043054431177e+02 4 5.188119144655197e+02 -4.610885647873621e+02 5.252270706757586e+01 2.319579773493509e+02 - ME 1.034065067393998e-05 + ME 4.681392980523237e-05 Event 67 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2588,7 +2588,7 @@ Event 67 Batch 1 2 7.084256499213539e+02 6.318790977834966e+02 -2.229764540025608e+02 2.299504472951746e+02 3 5.168612394424738e+01 1.130069959366449e+01 -1.428140623590627e+01 4.837138651102398e+01 4 7.398882261343989e+02 -6.431797973771612e+02 2.372578602384670e+02 -2.783218338061985e+02 - ME 1.479715191731530e-02 + ME 5.878400132197954e-02 Event 68 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2596,7 +2596,7 @@ Event 68 Batch 1 2 5.644037677826096e+02 -7.446914007305443e+01 3.170710956176409e+02 4.609467220707991e+02 3 4.303832728799333e+02 -1.588265612792408e+02 -3.994808673830752e+02 -2.046757440246668e+01 4 5.052129593374568e+02 2.332957013522950e+02 8.240977176543441e+01 -4.404791476683325e+02 - ME 3.274273226082449e-04 + ME 8.108482137897523e-03 Event 69 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2604,7 +2604,7 @@ Event 69 Batch 1 2 2.379282923937934e+02 -4.413455715133102e+01 1.058497776082811e+02 -2.084654354245804e+02 3 5.822935131976616e+02 -5.806422676829345e+02 4.095409019445288e+01 -1.559022092337181e+01 4 6.797781944085444e+02 6.247768248342655e+02 -1.468038678027338e+02 2.240556563479522e+02 - ME 6.379305675073031e-05 + ME 3.039802585689931e-04 Event 70 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2612,7 +2612,7 @@ Event 70 Batch 1 2 5.861861307468000e+02 1.831219916849830e+02 2.904683423406074e+02 -4.750880530376756e+02 3 4.633200606614189e+02 -4.245314712871158e+02 -1.339518705596282e+02 1.284344380284135e+02 4 4.504938085917810e+02 2.414094796021329e+02 -1.565164717809791e+02 3.466536150092620e+02 - ME 1.325653453486623e-05 + ME 3.530491740557932e-05 Event 71 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2620,7 +2620,7 @@ Event 71 Batch 1 2 7.383412459951699e+02 5.748049255568963e+02 -1.639684737984460e+02 -4.334298474879633e+02 3 3.973981306646684e+02 -3.228684354469153e+02 -4.837114091238284e+00 2.316416412804533e+02 4 3.642606233401616e+02 -2.519364901099809e+02 1.688055878896842e+02 2.017882062075102e+02 - ME 1.333441808219846e-05 + ME 3.103530482016079e-05 Event 72 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2628,7 +2628,7 @@ Event 72 Batch 1 2 3.538199915090663e+02 3.512029503136998e+02 -6.467835580753929e+00 -4.246458742680748e+01 3 5.344234504985296e+02 1.310173344785605e+01 3.836805260246265e+01 5.328833470497182e+02 4 6.117565579924039e+02 -3.643046837615559e+02 -3.190021702170876e+01 -4.904187596229107e+02 - ME 2.994704399169685e-03 + ME 9.376669006106200e-03 Event 73 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2636,7 +2636,7 @@ Event 73 Batch 1 2 4.694927197571710e+02 1.451947293992222e+02 -1.807863847612341e+02 4.082379055705570e+02 3 5.537325951281179e+02 -5.796379956652479e+01 5.401382741253894e+02 -1.072876026015002e+02 4 4.767746851147115e+02 -8.723092983269744e+01 -3.593518893641554e+02 -3.009503029690568e+02 - ME 1.535829386616431e-04 + ME 1.077472469645428e-03 Event 74 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2644,7 +2644,7 @@ Event 74 Batch 1 2 6.258444305735198e+02 -3.349227552763227e+02 4.941036656040852e+02 1.880679848209580e+02 3 5.555040664889822e+02 3.765538795180102e+01 -5.474422011270130e+02 -8.645158222500005e+01 4 3.186515029374982e+02 2.972673673245214e+02 5.333853552292791e+01 -1.016164025959578e+02 - ME 1.487896902219418e-05 + ME 1.623439923565115e-04 Event 75 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2652,7 +2652,7 @@ Event 75 Batch 1 2 3.943316317993887e+02 5.588489849751632e+01 -2.552251009651266e+02 -2.953548066221912e+02 3 5.467466262348042e+02 -3.021648543602057e+02 -2.377479281839000e+02 3.887212326756534e+02 4 5.589217419658066e+02 2.462799558626894e+02 4.929730291490265e+02 -9.336642605346221e+01 - ME 4.632408498797698e-05 + ME 1.348649436679123e-04 Event 76 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2660,7 +2660,7 @@ Event 76 Batch 1 2 5.517772830004059e+02 2.282681125856672e+02 -4.885490190451381e+02 -1.169260227747471e+02 3 4.245403880864563e+02 -2.793100283061228e+02 1.521744876196477e+02 -2.811821020654221e+02 4 5.236823289131380e+02 5.104191572045557e+01 3.363745314254903e+02 3.981081248401691e+02 - ME 1.645260485784409e-05 + ME 5.074216551061466e-05 Event 77 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2668,7 +2668,7 @@ Event 77 Batch 1 2 3.781543446472003e+02 -5.926925448310480e+01 -1.775497893613220e+02 3.285786605157444e+02 3 6.702964816234122e+02 -6.066564226432872e+01 -1.057468051743550e+02 -6.591165802199176e+02 4 4.515491737293867e+02 1.199348967474336e+02 2.832965945356770e+02 3.305379197041734e+02 - ME 5.041095643414513e-05 + ME 6.321080405055773e-05 Event 78 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2676,7 +2676,7 @@ Event 78 Batch 1 2 4.564262045363139e+02 1.882572856930395e+02 1.751822011208171e+02 -3.770878823051468e+02 3 3.809544602625751e+02 -2.816334489555117e+02 1.992812047321844e+02 -1.615422627793184e+02 4 6.626193352011103e+02 9.337616326247226e+01 -3.744634058530013e+02 5.386301450844651e+02 - ME 6.222463480998997e-05 + ME 2.572921643188974e-04 Event 79 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2684,7 +2684,7 @@ Event 79 Batch 1 2 6.126536521478922e+02 6.075062399138452e+02 -4.178945028651393e+01 6.733726903166659e+01 3 2.872846052831658e+02 -1.084163947926161e+02 2.139961846825774e+01 2.651799127051085e+02 4 6.000617425689430e+02 -4.990898451212283e+02 2.038983181825616e+01 -3.325171817367756e+02 - ME 6.289823950094716e-04 + ME 1.996659951821530e-03 Event 80 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2692,7 +2692,7 @@ Event 80 Batch 1 2 4.171281258707700e+02 -2.756641813219371e+02 1.445082905894664e+01 3.127240094205691e+02 3 3.805235327384960e+02 -2.955852199231463e+02 2.395269588958384e+02 7.373784162959287e+00 4 7.023483413907342e+02 5.712494012450838e+02 -2.539777879547846e+02 -3.200977935835284e+02 - ME 5.629434448779270e-04 + ME 1.297520069620947e-03 Event 81 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2700,7 +2700,7 @@ Event 81 Batch 1 2 7.471091333863935e+02 -9.753029041192970e+01 7.407154559164039e+02 -7.162458282065091e-01 3 6.775352561453885e+02 9.550863422814814e+01 -6.702673865908516e+02 -2.595678293896889e+01 4 7.535561046821789e+01 2.021656183781575e+00 -7.044806932555213e+01 2.667302876717550e+01 - ME 2.904529061551848e-05 + ME 1.022399816924924e-04 Event 82 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2708,7 +2708,7 @@ Event 82 Batch 1 2 4.309094465924175e+02 3.042233433179616e+02 2.799835808203350e+02 -1.214096495919827e+02 3 5.540384887187945e+02 -4.824447657759213e+02 1.988969596446625e+02 1.861335391629672e+02 4 5.150520646887885e+02 1.782214224579596e+02 -4.788805404649973e+02 -6.472388957098450e+01 - ME 1.778678120024833e-05 + ME 1.053635072607165e-04 Event 83 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2716,7 +2716,7 @@ Event 83 Batch 1 2 4.869534474909295e+02 -4.727010820510885e+02 1.062322962656182e+02 4.890855018466118e+01 3 3.520990385354405e+02 -1.437544586613779e+02 -3.142298368411062e+02 6.758696761482639e+01 4 6.609475139736298e+02 6.164555407124665e+02 2.079975405754878e+02 -1.164955177994876e+02 - ME 7.948516811691567e-05 + ME 2.998516055200512e-04 Event 84 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2724,7 +2724,7 @@ Event 84 Batch 1 2 1.391975815431583e+01 -3.682657486111166e-01 -1.138840508663312e+01 -7.995516055627093e+00 3 7.493632094786751e+02 -3.452281541586202e+01 3.833012084573049e+02 6.429880080772211e+02 4 7.367170323670085e+02 3.489108116447313e+01 -3.719128033706718e+02 -6.349924920215940e+02 - ME 8.671177508029917e-02 + ME 3.806217512266510e-01 Event 85 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2732,7 +2732,7 @@ Event 85 Batch 1 2 7.362448947738020e+02 6.409220704967113e+02 3.243429451315054e+02 1.614840505254833e+02 3 1.517836214454495e+02 -1.266859291808411e+02 -6.780846852200752e+01 4.889738933094901e+01 4 6.119714837807480e+02 -5.142361413158706e+02 -2.565344766094980e+02 -2.103814398564324e+02 - ME 1.062305495679385e-04 + ME 5.694785892689211e-04 Event 86 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2740,7 +2740,7 @@ Event 86 Batch 1 2 5.451728369778392e+02 -6.605005893803180e+01 1.066920544886257e+02 -5.305352178712969e+02 3 3.158718592284829e+02 -1.755596039144849e+02 2.550395858012225e+02 6.251932981237656e+01 4 6.389553037936773e+02 2.416096628525165e+02 -3.617316402898481e+02 4.680158880589203e+02 - ME 4.057626974930324e-05 + ME 1.469986179099727e-04 Event 87 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2748,7 +2748,7 @@ Event 87 Batch 1 2 3.414211232216659e+02 1.437256906952883e+02 1.534640422371205e+02 -2.689983214749668e+02 3 5.081668091119999e+02 4.794742948200324e+02 -1.464748766741243e+02 8.296394996143997e+01 4 6.504120676663341e+02 -6.231999855153207e+02 -6.989165562996117e+00 1.860343715135268e+02 - ME 3.656584417835253e-05 + ME 1.823135893899652e-04 Event 88 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2756,7 +2756,7 @@ Event 88 Batch 1 2 2.925516585730864e+02 1.655911293372511e+01 2.598275245766865e+02 -1.334238591297045e+02 3 7.159840369510271e+02 -1.056844973272874e+02 -3.694097043713192e+02 6.041526284885822e+02 4 4.914643044758866e+02 8.912538439356234e+01 1.095821797946327e+02 -4.707287693588777e+02 - ME 2.327745727475104e-03 + ME 8.728488941697977e-02 Event 89 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2764,7 +2764,7 @@ Event 89 Batch 1 2 6.333634651097186e+02 1.209853522660007e+02 5.372166546881791e+02 -3.129058794565919e+02 3 6.221307427802806e+02 5.757192259699385e+01 -4.327483989541182e+02 4.432391657372765e+02 4 2.445057921100010e+02 -1.785572748629945e+02 -1.044682557340609e+02 -1.303332862806847e+02 - ME 5.047204144927262e-05 + ME 5.497507832908574e-04 Event 90 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2772,7 +2772,7 @@ Event 90 Batch 1 2 3.111538587406461e+02 2.628215106651484e+02 -6.985334981761831e+01 -1.512021390726355e+02 3 5.216486323898988e+02 1.252715366480781e+02 4.457714554600226e+02 -2.402335265468457e+02 4 6.671975088694549e+02 -3.880930473132266e+02 -3.759181056424042e+02 3.914356656194811e+02 - ME 4.503542584588689e-05 + ME 2.329075524537458e-04 Event 91 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2780,7 +2780,7 @@ Event 91 Batch 1 2 3.007803348469016e+02 8.390513937949677e+01 2.884042062049404e+02 -1.586667134655829e+01 3 6.256884422056424e+02 2.364580673743878e+02 -3.590826126759745e+02 -4.545693416378727e+02 4 5.735312229474563e+02 -3.203632067538847e+02 7.067840647103421e+01 4.704360129844310e+02 - ME 2.635583378174906e-05 + ME 6.478111274774788e-05 Event 92 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2788,7 +2788,7 @@ Event 92 Batch 1 2 6.843865618656529e+02 -2.264962467301474e+02 -5.909185329480341e+02 2.605757158639088e+02 3 6.645516272550811e+02 3.453347116263074e+02 4.983670680340538e+02 -2.720350487207341e+02 4 1.510618108792659e+02 -1.188384648961601e+02 9.255146491398015e+01 1.145933285682523e+01 - ME 1.711437740567050e-05 + ME 9.365402433981294e-05 Event 93 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2796,7 +2796,7 @@ Event 93 Batch 1 2 5.579763469381434e+02 2.180908585044468e+02 5.135246110359701e+02 8.151996049100932e+00 3 3.333821836060117e+02 1.681122988324202e+02 -1.261705574188212e+02 2.587719570738210e+02 4 6.086414694558448e+02 -3.862031573368670e+02 -3.873540536171486e+02 -2.669239531229223e+02 - ME 1.157787815150910e-04 + ME 5.183695239236329e-04 Event 94 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2804,7 +2804,7 @@ Event 94 Batch 1 2 4.534979734151987e+02 1.139662723650677e+02 2.686183171543304e+01 4.381216071501101e+02 3 3.856184698299744e+02 1.545134372854228e+02 -3.452526490806396e+02 7.501873282757614e+01 4 6.608835567548277e+02 -2.684797096504910e+02 3.183908173652065e+02 -5.131403399776862e+02 - ME 1.545010233607317e-03 + ME 6.944325623628402e-03 Event 95 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2812,7 +2812,7 @@ Event 95 Batch 1 2 2.828073115974175e+02 -5.711637476392460e+01 5.915078172645698e+01 -2.705898746219725e+02 3 6.809618671276158e+02 3.772100991821226e+02 3.247893528880094e+02 4.646864338535512e+02 4 5.362308212749670e+02 -3.200937244181981e+02 -3.839401346144663e+02 -1.940965592315787e+02 - ME 6.408796328924562e-05 + ME 2.560512106670314e-04 Event 96 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2820,7 +2820,7 @@ Event 96 Batch 1 2 4.639832102051440e+02 -4.275497908582962e+02 -1.317248975374901e+02 -1.230046627491649e+02 3 7.474114851375481e+02 6.594176555428718e+02 2.654537688070380e+02 2.309254864669502e+02 4 2.886053046573076e+02 -2.318678646845757e+02 -1.337288712695479e+02 -1.079208237177853e+02 - ME 1.445191791082226e-05 + ME 2.440162169445852e-04 Event 97 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2828,7 +2828,7 @@ Event 97 Batch 1 2 5.095921959312568e+02 3.190102848863560e+02 3.100341192456060e+02 2.485869851668986e+02 3 4.555541331018014e+02 -2.788120391899956e+02 2.221549471930723e+02 -2.836205112936887e+02 4 5.348536709669415e+02 -4.019824569636059e+01 -5.321890664386783e+02 3.503352612679014e+01 - ME 2.250661525403011e-05 + ME 8.198891770965733e-05 Event 98 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2836,7 +2836,7 @@ Event 98 Batch 1 2 5.299941952467790e+02 -2.570048161992350e+02 -4.630296380940593e+02 -2.111695271961878e+01 3 7.352146396921255e+02 2.361229278157243e+02 6.962552486063584e+02 3.893348873424185e+00 4 2.347911650610957e+02 2.088188838351074e+01 -2.332256105122990e+02 1.722360384619465e+01 - ME 5.654417419793765e-06 + ME 6.760444392591968e-05 Event 99 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2844,7 +2844,7 @@ Event 99 Batch 1 2 4.290897291078425e+02 3.747236205606835e+02 2.040795775432686e+02 -4.529602465443949e+01 3 6.438744429739487e+02 -5.215755139094103e+02 2.133414139578182e+01 3.769325350988583e+02 4 4.270358279182090e+02 1.468518933487271e+02 -2.254137189390505e+02 -3.316365104444187e+02 - ME 8.457850707842401e-05 + ME 2.024851967866169e-03 Event 100 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2852,7 +2852,7 @@ Event 100 Batch 1 2 5.119062275524872e+02 -4.721600394809319e+02 -1.845880136125884e+02 7.099400083769524e+01 3 4.523854579707449e+02 2.836789572262426e+02 -3.060214184981774e+02 -1.747276258374610e+02 4 5.357083144767672e+02 1.884810822546894e+02 4.906094321107658e+02 1.037336249997658e+02 - ME 1.420495101373495e-05 + ME 6.898305006855298e-05 Event 101 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2860,7 +2860,7 @@ Event 101 Batch 1 2 6.024072815192737e+02 -3.080418730730875e+02 -4.692284526425155e+02 2.186993289696520e+02 3 3.347434020484399e+02 8.940653726951260e+01 -3.939923552329941e+01 -3.201676381969582e+02 4 5.628493164322859e+02 2.186353358035749e+02 5.086276881658150e+02 1.014683092273061e+02 - ME 2.743452031293993e-05 + ME 9.290725627447436e-05 Event 102 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2868,7 +2868,7 @@ Event 102 Batch 1 2 5.910857738801296e+02 3.707548039128416e+02 -7.516477307090547e+01 -4.541734518311494e+02 3 2.311218706704979e+02 4.536804143672514e+01 -2.262982016400413e+02 1.217307902336991e+01 4 6.777923554493723e+02 -4.161228453495667e+02 3.014629747109467e+02 4.420003728077793e+02 - ME 7.158169676479796e-05 + ME 2.633339755449651e-04 Event 103 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2876,7 +2876,7 @@ Event 103 Batch 1 2 6.627949406417042e+02 7.189602123685950e+01 -6.391860825813610e+02 -1.599038689489492e+02 3 5.519979886399102e+02 1.442810582977179e+02 4.734454174874869e+02 2.444057944057306e+02 4 2.852070707183856e+02 -2.161770795345774e+02 1.657406650938741e+02 -8.450192545678139e+01 - ME 1.658567428345252e-05 + ME 1.652798222861839e-04 Event 104 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2884,7 +2884,7 @@ Event 104 Batch 1 2 4.368180791462563e+02 -3.483499330357901e+02 -2.596280064690262e+02 4.533935023690698e+01 3 4.635715977792429e+02 1.873023362819025e+02 -2.251347602994603e+02 -3.593477435519053e+02 4 5.996103230745010e+02 1.610475967538876e+02 4.847627667684865e+02 3.140083933149983e+02 - ME 2.162124469235967e-05 + ME 9.158171748371188e-05 Event 105 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2892,7 +2892,7 @@ Event 105 Batch 1 2 5.701708357490469e+02 2.288495716262106e+02 -4.521314661478370e+02 -2.613422905391967e+02 3 3.711008490497917e+02 -3.362590561223710e+02 -8.126001400906793e+01 1.343223639771668e+02 4 5.587283152011612e+02 1.074094844961603e+02 5.333914801569049e+02 1.270199265620299e+02 - ME 1.720246557093887e-05 + ME 7.043372303967046e-05 Event 106 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2900,7 +2900,7 @@ Event 106 Batch 1 2 6.775588183099673e+02 5.149765831731705e+02 3.445381345095063e+02 -2.741870619150275e+02 3 7.044100837534635e+02 -4.546975847980706e+02 -4.392260662935809e+02 3.106833358270535e+02 4 1.180310979365712e+02 -6.027899837509908e+01 9.468793178407486e+01 -3.649627391202603e+01 - ME 2.786544600802367e-05 + ME 3.259673897057837e-04 Event 107 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2908,7 +2908,7 @@ Event 107 Batch 1 2 6.046880513041550e+02 2.289413119004024e+02 -5.349774474143721e+02 -1.644160754103499e+02 3 3.366746442316215e+02 -7.166101576320902e+01 2.452245434825371e+01 3.280444544890399e+02 4 5.586373044642238e+02 -1.572802961371935e+02 5.104549930661184e+02 -1.636283790786902e+02 - ME 4.667002706670146e-04 + ME 8.859556065170558e-04 Event 108 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2916,7 +2916,7 @@ Event 108 Batch 1 2 6.239206451413978e+02 -2.218030564243363e+02 5.011455197099735e+02 -2.982172759400455e+02 3 2.841199272340513e+02 1.209406641294798e+02 7.967327320293104e+01 2.444374323800143e+02 4 5.919594276245514e+02 1.008623922948564e+02 -5.808187929129044e+02 5.377984356003120e+01 - ME 7.961277501126149e-05 + ME 1.727643234936365e-04 Event 109 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2924,7 +2924,7 @@ Event 109 Batch 1 2 3.093404598873124e+02 1.546999830656544e+02 1.629193992247174e+02 2.126421988200774e+02 3 5.287372542258961e+02 -2.136116696975048e+02 -1.865832176193536e+02 4.462284633214169e+02 4 6.619222858867909e+02 5.891168663185049e+01 2.366381839463621e+01 -6.588706621414941e+02 - ME 2.902408960420708e-01 + ME 1.686695657867669e+01 Event 110 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2932,7 +2932,7 @@ Event 110 Batch 1 2 4.920948406187608e+02 -8.595212543403569e+01 -4.824913009925944e+02 -4.440392734262522e+01 3 4.634042325716594e+02 -2.085760624772916e+00 1.255608851371819e+02 4.460645653843308e+02 4 5.445009268095798e+02 8.803788605880843e+01 3.569304158554124e+02 -4.016606380417056e+02 - ME 1.043536440561108e-03 + ME 4.151412887207382e-03 Event 111 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2940,7 +2940,7 @@ Event 111 Batch 1 2 4.637454700443120e+02 1.543048221589588e+02 -4.372769385391800e+02 6.225902899506631e+00 3 3.246747011850293e+02 -5.128652792678845e+01 -2.274142471268230e+02 2.259781269206006e+02 4 7.115798287706589e+02 -1.030182942321705e+02 6.646911856660031e+02 -2.322040298201072e+02 - ME 5.219332617201280e-04 + ME 1.240833065187375e-03 Event 112 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2948,7 +2948,7 @@ Event 112 Batch 1 2 6.923761777814550e+02 3.939190124845535e+02 4.398224952082178e+01 -5.676954684419625e+02 3 5.277418353503033e+02 -4.270527740856185e+02 4.970714905179168e+01 3.060499505927539e+02 4 2.798819868682421e+02 3.313376160106501e+01 -9.368939857261346e+01 2.616455178492087e+02 - ME 4.381536575941429e-05 + ME 5.385735959435035e-05 Event 113 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2956,7 +2956,7 @@ Event 113 Batch 1 2 7.174898838850694e+02 -6.130145063482008e+02 3.726797356942233e+02 1.071275347265524e+01 3 1.705115822510491e+02 3.993583199494100e+01 -1.624320619120163e+02 3.309311510932528e+01 4 6.119985338638814e+02 5.730786743532599e+02 -2.102476737822071e+02 -4.380586858198049e+01 - ME 4.914674319256647e-05 + ME 2.197559713387976e-04 Event 114 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2964,7 +2964,7 @@ Event 114 Batch 1 2 6.772826088252357e+02 -1.430288042596954e+02 -3.410390118171982e+02 5.674036356844296e+02 3 6.725037798358682e+02 3.626161999767239e+01 2.510744134018114e+02 -6.228226615527174e+02 4 1.502136113388951e+02 1.067671842620232e+02 8.996459841538707e+01 5.541902586828807e+01 - ME 7.986648389935193e-05 + ME 8.926156406775035e-05 Event 115 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2972,7 +2972,7 @@ Event 115 Batch 1 2 9.320551230331124e+01 1.288474310894606e+01 -2.581623869377880e+01 8.862715576190526e+01 3 6.672654287607164e+02 1.525114284892182e+02 2.829200767588875e+02 5.847560574856374e+02 4 7.395290589359720e+02 -1.653961715981643e+02 -2.571038380651088e+02 -6.733832132475428e+02 - ME 4.304938165075599e-01 + ME 1.800237703627863e+00 Event 116 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2980,7 +2980,7 @@ Event 116 Batch 1 2 4.951202926530015e+02 -4.575339943514647e+02 4.220102313368785e+01 1.844608951947751e+02 3 3.101750696753587e+02 -4.711582585559527e+01 2.172188132736168e+02 2.163438466008694e+02 4 6.947046376716394e+02 5.046498202070600e+02 -2.594198364073050e+02 -4.008047417956444e+02 - ME 5.988625984136040e-04 + ME 1.933367100533606e-03 Event 117 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2988,7 +2988,7 @@ Event 117 Batch 1 2 6.543248494478489e+02 1.390926466871539e+02 9.107024539473488e+01 6.328510524967589e+02 3 5.040443237953712e+02 6.874740772121054e+01 1.336336536624387e+02 -4.811200690999848e+02 4 3.416308267567792e+02 -2.078400544083643e+02 -2.247038990571737e+02 -1.517309833967742e+02 - ME 3.026560085299302e-04 + ME 4.207453923038474e-04 Event 118 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2996,7 +2996,7 @@ Event 118 Batch 1 2 5.829230400014206e+02 5.307803371482089e+02 -3.192285892796672e+01 2.388565162167381e+02 3 3.965113090906140e+02 -5.470249758902820e+01 2.256187790844517e+02 -3.214420966810604e+02 4 5.205656509079653e+02 -4.760778395591807e+02 -1.936959201564850e+02 8.258558046432242e+01 - ME 2.168340782914014e-05 + ME 7.464562943747175e-05 Event 119 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3004,7 +3004,7 @@ Event 119 Batch 1 2 3.549567073991255e+02 2.281637891139605e+02 1.474502150787006e+02 2.284600261271838e+02 3 4.727085372220640e+02 7.463684946128350e+01 -3.092948822053327e+02 3.495988811576870e+02 4 6.723347553788102e+02 -3.028006385752440e+02 1.618446671266322e+02 -5.780589072848707e+02 - ME 1.664672733965846e-03 + ME 1.455012849105755e-02 Event 120 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3012,7 +3012,7 @@ Event 120 Batch 1 2 7.192117275853698e+02 4.094232477570927e+02 -5.552624156333899e+02 -2.032775518283800e+02 3 3.685061529232585e+02 -2.522084621786424e+02 1.741347663658646e+02 2.046087962197375e+02 4 4.122821194913712e+02 -1.572147855784500e+02 3.811276492675253e+02 -1.331244391357209e+00 - ME 1.900262756274459e-05 + ME 9.281995463485567e-05 Event 121 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3020,7 +3020,7 @@ Event 121 Batch 1 2 1.923953846467517e+02 -5.182078839520096e+01 -1.486351786617837e+02 -1.106262789198433e+02 3 6.582127150877787e+02 -3.509182841037630e+02 -1.191939510078701e+02 5.439606035624541e+02 4 6.493919002654695e+02 4.027390724989639e+02 2.678291296696539e+02 -4.333343246426108e+02 - ME 5.360055113881300e-04 + ME 1.925188892577692e-03 Event 122 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3028,7 +3028,7 @@ Event 122 Batch 1 2 6.905732817636248e+02 3.462508192534570e+02 -5.375670569609784e+02 -2.608131264380775e+02 3 7.097575386120018e+02 -2.677396278645660e+02 5.849221766424142e+02 2.998954860604125e+02 4 9.966917962437387e+01 -7.851119138889094e+01 -4.735511968143584e+01 -3.908235962233509e+01 - ME 3.451011759976180e-05 + ME 5.007312135859238e-04 Event 123 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3036,7 +3036,7 @@ Event 123 Batch 1 2 4.035126033432560e+02 2.481103298242076e+01 -3.878573016343356e+02 -1.085059780294573e+02 3 3.541388771651666e+02 1.572344474048876e+02 -3.105653677404273e+02 -6.512161875550808e+01 4 7.423485194915780e+02 -1.820454803873083e+02 6.984226693747627e+02 1.736275967849660e+02 - ME 3.471230489499830e-03 + ME 2.043564129780385e-02 Event 124 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3044,7 +3044,7 @@ Event 124 Batch 1 2 5.353042728143347e+02 -4.785252055946481e+02 -2.279396245170433e+02 7.488537693644093e+01 3 7.454081943698113e+02 6.785307544150930e+02 3.069354144183444e+02 -3.193811081429426e+01 4 2.192875328158541e+02 -2.000055488204448e+02 -7.899578990130104e+01 -4.294726612214667e+01 - ME 6.765427234678898e-06 + ME 1.399009675490331e-04 Event 125 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3052,7 +3052,7 @@ Event 125 Batch 1 2 7.351681880566981e+02 -1.932492970253984e+01 -4.393064933429818e+02 -5.891592456452273e+02 3 6.537497908129355e+02 -2.883189353576726e+01 3.454898907503182e+02 5.542510679217788e+02 4 1.110820211303664e+02 4.815682323830688e+01 9.381660259266363e+01 3.490817772344844e+01 - ME 6.639428548470109e-05 + ME 1.431077255619906e-04 Event 126 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3060,7 +3060,7 @@ Event 126 Batch 1 2 5.568747108147126e+02 1.149185667256990e+02 4.264979152236775e+02 -3.391204725116689e+02 3 6.934211462641822e+02 -1.939160042589616e+02 -6.294239612595663e+02 2.169215212257340e+02 4 2.497041429211053e+02 7.899743753326281e+01 2.029260460358889e+02 1.221989512859350e+02 - ME 9.143592130512915e-06 + ME 3.344185566612618e-05 Event 127 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3068,7 +3068,7 @@ Event 127 Batch 1 2 7.108931196972316e+02 4.270547743949553e+02 5.664613189451065e+02 -4.598718776252147e+01 3 4.445675167124290e+02 -1.247884466860518e+02 -4.129475031266345e+02 1.074359351009545e+02 4 3.445393635903407e+02 -3.022663277089035e+02 -1.535138158184720e+02 -6.144874733843321e+01 - ME 1.427738327825488e-05 + ME 1.180920695556687e-04 Event 128 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3076,7 +3076,7 @@ Event 128 Batch 1 2 5.312407894292422e+02 -7.192118124205533e+01 -4.398126160332176e+02 -2.891521793453568e+02 3 5.717192413787027e+02 3.434745903572437e+02 1.811915566412192e+02 4.195923218357252e+02 4 3.970399691920551e+02 -2.715534091151883e+02 2.586210593919984e+02 -1.304401424903685e+02 - ME 3.532660248239223e-05 + ME 1.848006274423395e-04 Event 129 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3084,7 +3084,7 @@ Event 129 Batch 1 2 6.644129951428383e+02 -3.595672586482287e+02 4.645590915434784e+02 3.103882489514914e+02 3 1.967652372382455e+02 -5.204943416929049e+01 8.794498000645085e+00 -1.895522930301724e+02 4 6.388217676189169e+02 4.116166928175192e+02 -4.733535895441232e+02 -1.208359559213191e+02 - ME 9.192558188476414e-05 + ME 3.082956717278722e-04 Event 130 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3092,7 +3092,7 @@ Event 130 Batch 1 2 7.302263990443511e+02 -1.919590472356484e+02 3.836584700935805e+02 -5.909217345563752e+02 3 4.156541164903923e+02 2.203243106780774e+02 -1.767969453775071e+02 3.049071707664833e+02 4 3.541194844652567e+02 -2.836526344242890e+01 -2.068615247160734e+02 2.860145637898919e+02 - ME 2.258971422042701e-05 + ME 3.110012368642411e-05 Event 131 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3100,7 +3100,7 @@ Event 131 Batch 1 2 2.308323688168238e+02 -1.780469473698228e+02 1.469011263880862e+02 1.710582294195638e+00 3 7.308075033948297e+02 5.219262643529272e+02 -3.840435213624620e+02 3.379099810545737e+02 4 5.383601277883465e+02 -3.438793169831044e+02 2.371423949743758e+02 -3.396205633487694e+02 - ME 7.770640764079256e-05 + ME 1.061667055612532e-03 Event 132 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3108,7 +3108,7 @@ Event 132 Batch 1 2 5.909630762789660e+02 -4.293852116769707e+02 -3.988922148105424e+02 7.583335995300355e+01 3 5.415993952096327e+02 2.260703809971038e+02 3.221145619770360e+02 -3.721079100067703e+02 4 3.674375285114020e+02 2.033148306798666e+02 7.677765283350686e+01 2.962745500537670e+02 - ME 1.628447412544396e-05 + ME 3.321676569401813e-05 Event 133 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3116,7 +3116,7 @@ Event 133 Batch 1 2 4.506052863582997e+02 2.189991325227701e+02 -3.914006430783634e+02 -4.347459771134355e+01 3 4.043998006859111e+02 3.160348074769272e+02 8.738893432792010e+01 2.366946839598570e+02 4 6.449949129557901e+02 -5.350339399996973e+02 3.040117087504433e+02 -1.932200862485142e+02 - ME 8.705579101282482e-05 + ME 3.121497332919934e-04 Event 134 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3124,7 +3124,7 @@ Event 134 Batch 1 2 7.151470882937614e+02 -1.041377497037516e+01 -4.186394096729767e+01 7.138447461686595e+02 3 3.416424731356660e+02 1.638631808685801e+02 3.081581136487586e+01 -2.981925940995343e+02 4 4.432104385705719e+02 -1.534494058982047e+02 1.104812960242199e+01 -4.156521520691248e+02 - ME 6.342792451335309e-03 + ME 5.534325530265236e-02 Event 135 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3132,7 +3132,7 @@ Event 135 Batch 1 2 7.115730144432832e+02 -3.219296530898238e+02 2.184242454110169e+02 -5.958089478700319e+02 3 1.627059459894212e+02 -6.880794311551747e+01 -3.259803939022061e+01 1.437917231708342e+02 4 6.257210395672955e+02 3.907375962053413e+02 -1.858262060207963e+02 4.520172246991979e+02 - ME 1.277979532321233e-04 + ME 2.112989182930814e-04 Event 136 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3140,7 +3140,7 @@ Event 136 Batch 1 2 7.195404287114588e+02 -4.369992732083461e+02 -4.270318019286997e+02 3.800182941743402e+02 3 6.668605996318223e+02 3.634158794560479e+02 4.690430049045651e+02 -3.043527845290675e+02 4 1.135989716567186e+02 7.358339375229815e+01 -4.201120297586535e+01 -7.566550964527264e+01 - ME 7.515399240093053e-05 + ME 1.804344388349211e-03 Event 137 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3148,7 +3148,7 @@ Event 137 Batch 1 2 6.722782806744999e+02 -6.045581260407005e+02 -2.538460778300668e+02 1.484241478840623e+02 3 6.869263774705689e+02 6.661257235671316e+02 1.481819739565761e+02 -7.865412297735662e+01 4 1.407953418549304e+02 -6.156759752643097e+01 1.056641038734908e+02 -6.977002490670534e+01 - ME 2.119149330726453e-05 + ME 5.192812231664224e-04 Event 138 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3156,7 +3156,7 @@ Event 138 Batch 1 2 6.463287544295633e+02 8.684709774942756e+01 2.409249839962013e+02 -5.934253049048401e+02 3 3.917330799270068e+02 1.767690441671677e+02 4.696120064017492e+01 3.464132742372293e+02 4 4.619381656434300e+02 -2.636161419165952e+02 -2.878861846363762e+02 2.470120306676108e+02 - ME 4.203806696206548e-05 + ME 5.804753959762886e-05 Event 139 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3164,7 +3164,7 @@ Event 139 Batch 1 2 2.994802063237944e+02 -1.272876183039153e+02 6.552211336810879e+00 2.710042891410713e+02 3 7.257546970836092e+02 -8.848613612326799e+00 5.127896146768584e+00 -7.256826352181574e+02 4 4.747650965925943e+02 1.361362319162416e+02 -1.168010748357900e+01 4.546783460770868e+02 - ME 1.500396153249019e-04 + ME 1.724196014694060e-04 Event 140 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3172,7 +3172,7 @@ Event 140 Batch 1 2 7.326756101999780e+02 5.655005379385240e+02 4.343799907428446e+02 1.683351270988810e+02 3 7.428339005597779e+02 -5.680473426214219e+02 -4.534832054058505e+02 -1.532233754243464e+02 4 2.449048924024402e+01 2.546804682897962e+00 1.910321466300584e+01 -1.511175167453447e+01 - ME 1.024603362434272e-04 + ME 4.669436438173466e-03 Event 141 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3180,7 +3180,7 @@ Event 141 Batch 1 2 7.363238871411332e+02 -6.772722174663238e+02 -2.824373475598683e+02 -6.086341204880675e+01 3 5.504260535970963e+02 4.650298533191528e+02 2.914345410616540e+02 4.221355560271704e+01 4 2.132500592617708e+02 2.122423641471711e+02 -8.997193501785816e+00 1.864985644608987e+01 - ME 1.166401869382226e-05 + ME 7.300791864660033e-05 Event 142 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3188,7 +3188,7 @@ Event 142 Batch 1 2 5.862280565156834e+02 4.248793793115829e+01 -2.479279504752411e+02 -5.295184989682986e+02 3 4.287264749982929e+02 -3.025296967755320e+02 2.785471849307642e+02 1.212173201341831e+02 4 4.850454684860405e+02 2.600417588443628e+02 -3.061923445551928e+01 4.083011788341197e+02 - ME 1.949810022878841e-05 + ME 4.569028399965169e-05 Event 143 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3196,7 +3196,7 @@ Event 143 Batch 1 2 2.464531733710510e+02 4.046044690030688e+01 -2.103865804466287e+02 1.218179201483223e+02 3 5.378449948854583e+02 4.607829603950880e+02 -2.747641700963839e+02 3.822241180409925e+01 4 7.157018317434903e+02 -5.012434072953949e+02 4.851507505430126e+02 -1.600403319524219e+02 - ME 4.863434295951330e-04 + ME 1.284493741497843e-03 Event 144 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3204,7 +3204,7 @@ Event 144 Batch 1 2 5.367418008803521e+02 -1.343004856786532e+02 -4.048537736989352e+02 -3.258044847458254e+02 3 6.294877130859599e+02 3.313530054622211e+02 5.282137272543231e+02 8.631468610520756e+01 4 3.337704860336884e+02 -1.970525197835678e+02 -1.233599535553879e+02 2.394897986406179e+02 - ME 8.754930746282009e-06 + ME 2.612855607885159e-05 Event 145 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3212,7 +3212,7 @@ Event 145 Batch 1 2 6.805380148481771e+01 -3.411514819754512e+01 -4.339750646760406e+01 -3.980116822894492e+01 3 6.831461500979880e+02 -3.834019790669201e+02 -2.756424954453614e+02 -4.936727656514237e+02 4 7.488000484171945e+02 4.175171272644653e+02 3.190400019129655e+02 5.334739338803686e+02 - ME 4.117012994651258e-01 + ME 4.832444287218038e-01 Event 146 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3220,7 +3220,7 @@ Event 146 Batch 1 2 5.031746658797123e+02 4.202301876294930e+02 2.767377273314875e+02 2.750283520766640e+00 3 4.317115817339341e+02 -1.098088257924671e+02 -5.455162180567243e+01 4.139336083717602e+02 4 5.651137523863538e+02 -3.104213618370259e+02 -2.221861055258150e+02 -4.166838918925268e+02 - ME 1.122040831263755e-03 + ME 4.446377084117306e-03 Event 147 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3228,7 +3228,7 @@ Event 147 Batch 1 2 4.251223043705630e+02 -4.223502783198938e+02 -4.694338569631599e+01 1.206377286808446e+01 3 5.457819748703678e+02 2.791608945230574e+02 -4.384138579515959e+02 -1.665546403390879e+02 4 5.290957207590696e+02 1.431893837968364e+02 4.853572436479118e+02 1.544908674710035e+02 - ME 1.117959404473985e-05 + ME 5.820013407126093e-05 Event 148 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3236,7 +3236,7 @@ Event 148 Batch 1 2 6.905785821272525e+02 6.249608768654489e+02 -6.243387159972350e+01 -2.870970082698929e+02 3 1.361638260920089e+02 2.862044352088506e+01 1.704210379179796e+01 1.320266050727362e+02 4 6.732575917807402e+02 -6.535813203863343e+02 4.539176780792534e+01 1.550704031971573e+02 - ME 5.047601105033982e-04 + ME 9.573948308169230e-04 Event 149 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3244,7 +3244,7 @@ Event 149 Batch 1 2 6.694705528096943e+02 -5.216497821741067e+02 -3.785079074709545e+02 1.811189935345937e+02 3 2.821401257551277e+02 1.148500354702071e-01 2.786662494166578e+02 -4.413795199872407e+01 4 5.483893214351779e+02 5.215349321386365e+02 9.984165805429673e+01 -1.369810415358697e+02 - ME 3.486097449584098e-05 + ME 1.943324414096923e-04 Event 150 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3252,7 +3252,7 @@ Event 150 Batch 1 2 4.637486188995366e+02 -4.033412855298819e+02 -2.279949807412008e+02 -1.992178895453991e+01 3 3.756800751656199e+02 6.230662615514293e+01 -2.632310737913946e+02 -2.606967683041707e+02 4 6.605713059348438e+02 3.410346593747391e+02 4.912260545325952e+02 2.806185572587107e+02 - ME 4.211370643652993e-05 + ME 2.156945366470290e-04 Event 151 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3260,7 +3260,7 @@ Event 151 Batch 1 2 3.821954355913596e+02 -2.528320044280690e+02 2.861764538722267e+02 1.588602445142563e+01 3 6.796189325418250e+02 2.911670128135291e+02 -4.900375979142738e+02 3.700902818893582e+02 4 4.381856318668152e+02 -3.833500838546018e+01 2.038611440420471e+02 -3.859763063407838e+02 - ME 1.923941526207248e-04 + ME 8.197229841786387e-03 Event 152 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3268,7 +3268,7 @@ Event 152 Batch 1 2 6.751133298339792e+02 -2.999578895043981e+02 -2.855974213275218e+02 -5.331391803034741e+02 3 4.976977783498468e+02 -3.003988119418482e+00 1.843802943840355e+02 4.622747685874795e+02 4 3.271888918161745e+02 3.029618776238166e+02 1.012171269434863e+02 7.086441171599445e+01 - ME 6.977738125195056e-05 + ME 1.204579535049519e-04 Event 153 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3276,7 +3276,7 @@ Event 153 Batch 1 2 1.729293620257127e+02 1.558357805102956e+02 -7.193392860849491e+01 2.110174585940510e+01 3 6.524550819255464e+02 2.410158908712478e+02 5.786677971610501e+02 1.809766692333240e+02 4 6.746155560487412e+02 -3.968516713815435e+02 -5.067338685525552e+02 -2.020784150927291e+02 - ME 1.391654510317005e-04 + ME 5.985591428637023e-04 Event 154 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3284,7 +3284,7 @@ Event 154 Batch 1 2 6.585658455851002e+02 -2.410305357139302e+02 -2.116446673272157e+02 -5.751693564652295e+02 3 5.764400833248005e+02 3.388133979948972e+02 3.092747322371399e+02 3.490527051926400e+02 4 2.649940710900988e+02 -9.778286228096688e+01 -9.763006490992416e+01 2.261166512725894e+02 - ME 2.686434432328395e-05 + ME 3.655181799213059e-05 Event 155 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3292,7 +3292,7 @@ Event 155 Batch 1 2 5.686586231936359e+02 -1.693366246265498e+02 -1.542203680657918e+02 5.204938187588979e+02 3 1.882190564276536e+02 -1.089234770645493e+02 -9.145416397064866e+01 1.232810822434430e+02 4 7.431223203787102e+02 2.782601016910992e+02 2.456745320364404e+02 -6.437749010023409e+02 - ME 4.701119881405690e-01 + ME 6.696396361607482e-01 Event 156 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3300,7 +3300,7 @@ Event 156 Batch 1 2 6.143652095725128e+02 2.879464601546110e+02 5.379391909976823e+02 -7.178351904348040e+01 3 6.287751645293085e+02 -4.584164185734781e+02 -4.225140875260598e+02 -8.181956094447702e+01 4 2.568596258981782e+02 1.704699584188668e+02 -1.154251034716223e+02 1.536030799879581e+02 - ME 7.769660148731367e-06 + ME 2.899571701789112e-05 Event 157 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3308,7 +3308,7 @@ Event 157 Batch 1 2 5.050842109798973e+02 4.185498850973046e+02 -1.305174306570672e+02 -2.507812875014723e+02 3 5.170424494038050e+02 -3.084595065654854e+02 3.930456446728388e+02 -1.330441599566699e+02 4 4.778733396162975e+02 -1.100903785318191e+02 -2.625282140157716e+02 3.838254474581424e+02 - ME 1.243977993100618e-05 + ME 4.033251359625283e-05 Event 158 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3316,7 +3316,7 @@ Event 158 Batch 1 2 4.312542366204098e+02 -3.114503370626313e+02 2.737030704635235e+02 1.185982013584742e+02 3 6.944315393047829e+02 2.166643175309468e+02 -6.173965008138002e+02 -2.326226495269423e+02 4 3.743142240748070e+02 9.478601953168439e+01 3.436934303502764e+02 1.140244481684682e+02 - ME 5.864250821924803e-06 + ME 3.680357310121394e-05 Event 159 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3324,7 +3324,7 @@ Event 159 Batch 1 2 5.860112473308646e+02 -1.581297551692178e+02 4.935632758462007e+02 2.734948907463652e+02 3 3.772013313646349e+02 -2.371132827856262e+02 -1.305099443644436e+02 -2.627266448837395e+02 4 5.367874213045002e+02 3.952430379548442e+02 -3.630533314817573e+02 -1.076824586262577e+01 - ME 2.805189658646002e-05 + ME 1.030382455754272e-04 Event 160 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3332,7 +3332,7 @@ Event 160 Batch 1 2 5.883409724804535e+02 -3.739819298758817e+02 -2.887651121595530e+02 3.505671490956299e+02 3 4.300332553173178e+02 1.788055146224819e+02 3.829208006453583e+02 7.955406370837679e+01 4 4.816257722022287e+02 1.951764152533999e+02 -9.415568848580530e+01 -4.301212128040066e+02 - ME 2.307516153071828e-04 + ME 9.797271586219467e-03 Event 161 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3340,7 +3340,7 @@ Event 161 Batch 1 2 6.868305165969147e+02 4.119610488151656e+00 5.515184990814985e+02 4.093244831537709e+02 3 3.260821955312833e+02 -1.956999890649130e+02 -2.483451099187458e+02 -7.972338993006402e+01 4 4.870872878718022e+02 1.915803785767614e+02 -3.031733891627526e+02 -3.296010932237070e+02 - ME 9.860610555787331e-05 + ME 1.075603053132144e-03 Event 162 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3348,7 +3348,7 @@ Event 162 Batch 1 2 2.159818802305119e+02 -2.018126805027919e+02 4.096951387107715e+01 -6.512536763314942e+01 3 6.870078865581224e+02 4.896730732821633e+02 -2.356527215298929e+02 -4.203188222421333e+02 4 5.970102332113654e+02 -2.878603927793715e+02 1.946832076588156e+02 4.854441898752826e+02 - ME 2.809071549115161e-05 + ME 5.344822454174306e-05 Event 163 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3356,7 +3356,7 @@ Event 163 Batch 1 2 4.889699854403287e+02 -4.067839821807834e+01 -2.740835242435768e+02 4.028835269878222e+02 3 4.282392920294498e+02 4.007468150560176e+02 -8.832740907173851e+01 -1.224301852772270e+02 4 5.827907225302220e+02 -3.600684168379390e+02 3.624109333153153e+02 -2.804533417105952e+02 - ME 1.173701793303044e-04 + ME 4.336231422638298e-04 Event 164 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3364,7 +3364,7 @@ Event 164 Batch 1 2 6.224346677404150e+02 -1.282049393554146e+02 5.480608628970117e+02 -2.657399098565701e+02 3 7.444531740822750e+02 1.794330131141779e+02 -6.708967511266460e+02 2.681638893170603e+02 4 1.331121581773107e+02 -5.122807375876333e+01 1.228358882296343e+02 -2.423979460490191e+00 - ME 1.571413941583783e-05 + ME 1.368953177788070e-04 Event 165 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3372,7 +3372,7 @@ Event 165 Batch 1 2 6.980339706506675e+02 -5.154669325341684e+01 -4.947847840614098e+02 4.896757907618869e+02 3 1.362964882116331e+02 4.252532371924361e+01 -5.641238783031591e+01 -1.165588780002596e+02 4 6.656695411377010e+02 9.021369534174053e+00 5.511971718917263e+02 -3.731169127616273e+02 - ME 4.238311927693088e-04 + ME 1.450267418906797e-03 Event 166 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3380,7 +3380,7 @@ Event 166 Batch 1 2 3.060640747281171e+02 -1.981167412190918e+02 -9.095380261170779e+01 -2.148310510107333e+02 3 5.580104478575086e+02 -3.585720992432471e+02 -1.558095186186280e+02 3.981521109704927e+02 4 6.359254774143739e+02 5.566888404623389e+02 2.467633212303362e+02 -1.833210599597597e+02 - ME 1.099447007687216e-04 + ME 3.000804338470548e-04 Event 167 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3388,7 +3388,7 @@ Event 167 Batch 1 2 2.833153623322893e+02 2.526850217013923e+02 8.687924899084067e+01 9.417998957332070e+01 3 6.595685044563415e+02 -8.780626893611850e+01 -2.875856231737449e+02 -5.870393347553995e+02 4 5.571161332113688e+02 -1.648787527652738e+02 2.007063741829043e+02 4.928593451820789e+02 - ME 4.244421486768831e-05 + ME 7.367447958524992e-05 Event 168 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3396,7 +3396,7 @@ Event 168 Batch 1 2 6.026267479353969e+02 -5.987968578530475e+02 5.775180228477150e+00 6.758674164241529e+01 3 4.991211680715713e+02 3.812575567959843e+02 3.220701575873951e+02 -5.952259631185711e+00 4 3.982520839930309e+02 2.175393010570631e+02 -3.278453378158730e+02 -6.163448201122968e+01 - ME 1.203107058680061e-05 + ME 9.606399998327532e-05 Event 169 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3404,7 +3404,7 @@ Event 169 Batch 1 2 5.510662376679772e+02 -9.251111075413947e+01 -5.291920243323356e+02 -1.227660134875281e+02 3 5.034535790022877e+02 -2.816014265681677e+02 3.283802195198170e+02 2.575511098657944e+02 4 4.454801833297348e+02 3.741125373223072e+02 2.008118048125185e+02 -1.347850963782663e+02 - ME 2.085195230877358e-05 + ME 1.532484123791625e-04 Event 170 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3412,7 +3412,7 @@ Event 170 Batch 1 2 2.814808559369750e+02 3.658097943502287e+01 -1.412301634042880e+02 -2.407225480659935e+02 3 6.646522150540470e+02 2.753499086551696e+02 -1.631412967142655e+02 5.825203104495404e+02 4 5.538669290089779e+02 -3.119308880901926e+02 3.043714601185535e+02 -3.417977623835468e+02 - ME 2.587160315460459e-04 + ME 7.823510217753851e-04 Event 171 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3420,7 +3420,7 @@ Event 171 Batch 1 2 1.777965289077954e+02 -6.143496808852239e+01 -1.603735842336773e+00 1.668375809551635e+02 3 7.439290290569696e+02 2.163074211412066e+01 -1.907051550939623e+01 -7.433699124308462e+02 4 5.782744420352348e+02 3.980422597440174e+01 2.067425135173305e+01 5.765323314756826e+02 - ME 1.981167274383509e-03 + ME 2.063755640794395e-03 Event 172 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3428,7 +3428,7 @@ Event 172 Batch 1 2 1.369499454750680e+02 -1.250080331667568e+01 -3.518152151649629e+01 -1.317622025690455e+02 3 6.692885586315896e+02 -2.346283187163472e+02 -6.130705295376303e+02 1.305421486874673e+02 4 6.937614958933425e+02 2.471291220330227e+02 6.482520510541266e+02 1.220053881578238e+00 - ME 1.548169060571347e-04 + ME 5.039586079692636e-04 Event 173 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3436,7 +3436,7 @@ Event 173 Batch 1 2 7.088772083623137e+02 4.973951266878932e+01 3.171232495758680e+01 -7.064185769505260e+02 3 5.785136264307895e+02 8.584813303397833e+01 5.766505028397120e+01 5.691949191590089e+02 4 2.126091652068944e+02 -1.355876457027672e+02 -8.937737524155732e+01 1.372236577915166e+02 - ME 1.732961413682620e-04 + ME 1.743760900867476e-04 Event 174 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3444,7 +3444,7 @@ Event 174 Batch 1 2 4.367208701713482e+02 -3.923163287174704e+01 4.325755195957351e+02 -4.543585887727652e+01 3 3.528978856725088e+02 9.622572295106905e+01 1.987077746703234e+02 -2.753048278549415e+02 4 7.103812441561454e+02 -5.699409007932221e+01 -6.312832942660567e+02 3.207406867322186e+02 - ME 1.541208918572365e-04 + ME 9.353677491192390e-04 Event 175 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3452,7 +3452,7 @@ Event 175 Batch 1 2 6.418562164876806e+02 1.962785648722137e+02 -6.110736372974047e+02 -6.567908015856712e+00 3 4.843421844702149e+02 -1.886631806266161e+02 3.569879071908527e+02 -2.674942804112337e+02 4 3.738015990421035e+02 -7.615384245597569e+00 2.540857301065516e+02 2.740621884270906e+02 - ME 1.279055979705581e-05 + ME 3.029111560812189e-05 Event 176 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3460,7 +3460,7 @@ Event 176 Batch 1 2 6.288652703123263e+02 4.005522031116294e+02 3.691482793515075e+02 3.142594606996526e+02 3 7.209127580467475e+02 -4.124575135572966e+02 -5.165298058232565e+02 -2.877341896975221e+02 4 1.502219716409257e+02 1.190531044566666e+01 1.473815264717492e+02 -2.652527100213051e+01 - ME 1.300720357566141e-05 + ME 1.719274466020296e-04 Event 177 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3468,7 +3468,7 @@ Event 177 Batch 1 2 4.716578040000077e+02 -4.521622645932388e+02 -1.012739918234145e+01 1.338200520767543e+02 3 3.021382980750606e+02 -2.714821202364266e+02 6.773215888881064e+01 -1.140059832109250e+02 4 7.262038979249317e+02 7.236443848296653e+02 -5.760475970646905e+01 -1.981406886582933e+01 - ME 6.442260552556652e-04 + ME 2.354271252348000e-03 Event 178 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3476,7 +3476,7 @@ Event 178 Batch 1 2 7.350088877399502e+02 -3.684484945749095e+02 -2.561732769425163e+02 -5.821159885132296e+02 3 1.415495174310248e+02 7.181268644032879e+01 1.095010133995263e+02 5.374692563910759e+01 4 6.234415948290248e+02 2.966358081345808e+02 1.466722635429900e+02 5.283690628741219e+02 - ME 6.828487731379645e-05 + ME 1.035408980291912e-04 Event 179 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3484,7 +3484,7 @@ Event 179 Batch 1 2 7.426064621425413e+02 6.748632301344054e+01 7.201624948975951e+02 -1.681544967131679e+02 3 5.821031882499326e+02 8.394276920418550e-01 -5.588194474899291e+02 1.629854049874919e+02 4 1.752903496075256e+02 -6.832575070548241e+01 -1.613430474076661e+02 5.169091725675888e+00 - ME 1.412410550503903e-05 + ME 9.197132478706931e-05 Event 180 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3492,7 +3492,7 @@ Event 180 Batch 1 2 6.099515195485484e+02 2.272495331206023e+02 1.762692760011278e+02 -5.378918555193875e+02 3 5.718889655176699e+02 4.324570510796980e+01 -3.278409766521432e+02 4.665909256493895e+02 4 3.181595149337819e+02 -2.704952382285720e+02 1.515717006510154e+02 7.130092986999803e+01 - ME 3.043963963928669e-05 + ME 5.401477812349802e-05 Event 181 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3500,7 +3500,7 @@ Event 181 Batch 1 2 1.206370886915177e+02 -8.151225636567759e+01 1.767749325039422e+01 8.715827822142556e+01 3 6.451493408002739e+02 -6.748216257939080e+01 4.373428479320614e+02 4.694625256943417e+02 4 7.342135705082084e+02 1.489944189450684e+02 -4.550203411824557e+02 -5.566208039157672e+02 - ME 2.625479922313071e-02 + ME 7.131653341377736e-02 Event 182 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3508,7 +3508,7 @@ Event 182 Batch 1 2 4.626866082364760e+02 -3.084610429505738e+02 3.306629079434072e+02 9.794245113140897e+01 3 4.974966719253473e+02 3.582955998671217e+02 1.664640547097976e+02 -3.023523113558579e+02 4 5.398167198381765e+02 -4.983455691654795e+01 -4.971269626532048e+02 2.044098602244489e+02 - ME 1.414799589613471e-05 + ME 5.959042767905828e-05 Event 183 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3516,7 +3516,7 @@ Event 183 Batch 1 2 3.304723045950491e+02 3.244647182058462e+00 3.209425641774955e+02 7.872284845075714e+01 3 4.379804819457451e+02 2.312428523500660e+02 3.131807483468383e+02 2.006775141049615e+02 4 7.315472134592065e+02 -2.344874995321247e+02 -6.341233125243344e+02 -2.794003625557186e+02 - ME 2.330806393221907e-03 + ME 4.899988668912175e-03 Event 184 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3524,7 +3524,7 @@ Event 184 Batch 1 2 7.470051035005908e+02 -4.953964753944513e+02 -4.028924750569613e+02 3.876552725878485e+02 3 2.183325716323390e+02 1.119040172022777e+02 1.451703047217021e+02 -1.186262424448778e+02 4 5.346623248670695e+02 3.834924581921736e+02 2.577221703352594e+02 -2.690290301429710e+02 - ME 7.987999480474686e-05 + ME 5.441344453720516e-04 Event 185 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3532,7 +3532,7 @@ Event 185 Batch 1 2 4.448583927494090e+02 2.810173563272025e+02 -3.384637477435971e+02 6.610995769032235e+01 3 6.236443795626774e+02 -1.690803760724666e+02 5.125139620028374e+02 3.125277225134823e+02 4 4.314972276879136e+02 -1.119369802547359e+02 -1.740502142592404e+02 -3.786376802038046e+02 - ME 1.405605442011058e-04 + ME 6.949230823829164e-03 Event 186 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3540,7 +3540,7 @@ Event 186 Batch 1 2 6.802792190696962e+02 -1.681815241656754e+02 5.427923640013703e+02 3.739936368565512e+02 3 6.331554869749547e+02 3.172201723440435e+02 -4.588808692389625e+02 -2.994755095011972e+02 4 1.865652939553488e+02 -1.490386481783679e+02 -8.391149476240778e+01 -7.451812735535422e+01 - ME 3.045129627255903e-05 + ME 3.276943053321406e-04 Event 187 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3548,7 +3548,7 @@ Event 187 Batch 1 2 7.472897115267965e+02 -6.988402471604775e+02 -2.391684329048669e+02 1.134137672609268e+02 3 6.826908170748527e+02 6.328852277257668e+02 2.212839847556716e+02 -1.286718241709738e+02 4 7.001947139835140e+01 6.595501943471052e+01 1.788444814919547e+01 1.525805691004725e+01 - ME 3.485925693242860e-05 + ME 1.461490870437387e-04 Event 188 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3556,7 +3556,7 @@ Event 188 Batch 1 2 6.496068877140275e+02 -5.024316730938291e+02 -3.980061777252906e+02 -1.055585379310702e+02 3 4.885976180718368e+02 4.424928723138696e+02 1.459942636040002e+02 -1.470148473169288e+02 4 3.617954942141354e+02 5.993880077995960e+01 2.520119141212904e+02 2.525733852479991e+02 - ME 1.006519408431335e-05 + ME 2.843805826594158e-05 Event 189 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3564,7 +3564,7 @@ Event 189 Batch 1 2 4.082379946778654e+02 2.679237131173331e+02 -7.718184435750955e+01 2.981913934867987e+02 3 5.864211573889181e+02 -5.780822197382728e+02 -6.394893886953379e+01 7.497502433004084e+01 4 5.053408479332167e+02 3.101585066209396e+02 1.411307832270433e+02 -3.731664178168398e+02 - ME 1.322787627040098e-04 + ME 1.937644878671120e-03 Event 190 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3572,7 +3572,7 @@ Event 190 Batch 1 2 6.472516823166364e+02 6.463779961822676e+02 -3.289365889632791e+01 6.945035458816692e+00 3 4.318767277050750e+02 -3.286790725415815e+02 -7.183748821760624e+00 -2.800642229191639e+02 4 4.208715899782885e+02 -3.176989236406859e+02 4.007740771808847e+01 2.731191874603472e+02 - ME 1.272332211942340e-05 + ME 3.409584379294133e-05 Event 191 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3580,7 +3580,7 @@ Event 191 Batch 1 2 6.757500036387052e+02 6.222744522021635e+02 -2.261571472854044e+02 1.351499844096745e+02 3 3.644673602666567e+02 -2.020102809038697e+02 1.114149692296405e+02 -2.821613151026251e+02 4 4.597826360946380e+02 -4.202641712982938e+02 1.147421780557637e+02 1.470113306929507e+02 - ME 1.560703181590231e-05 + ME 5.389305783035389e-05 Event 192 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3588,7 +3588,7 @@ Event 192 Batch 1 2 7.394562478491531e+02 -7.307873850878615e+02 3.988568028534699e+01 1.056147375500683e+02 3 8.098058518630978e+01 5.419286926826393e+01 4.244928426361276e+00 -6.002473390399248e+01 4 6.795631669645365e+02 6.765945158195976e+02 -4.413060871170821e+01 -4.559000364607596e+01 - ME 1.231033846344155e-04 + ME 4.204295748489254e-04 Event 193 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3596,7 +3596,7 @@ Event 193 Batch 1 2 5.607395612273153e+02 -3.164229781907934e+02 -3.517992386171808e+02 -3.009030576558548e+02 3 3.741643617741927e+02 -2.156271676189966e+02 1.666697084176705e+02 2.563690747778811e+02 4 5.650960769984922e+02 5.320501458097899e+02 1.851295301995104e+02 4.453398287797368e+01 - ME 3.026844143728605e-05 + ME 9.141090879934244e-05 Event 194 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3604,7 +3604,7 @@ Event 194 Batch 1 2 5.729373416862012e+02 -2.155045544874616e+02 -1.679805246197324e+02 5.035846779262559e+02 3 2.831035485618876e+02 -2.543279085173982e+02 1.042261812492671e+02 -6.783684323208054e+01 4 6.439591097519118e+02 4.698324630048598e+02 6.375434337046515e+01 -4.357478346941756e+02 - ME 5.497724763810379e-04 + ME 1.781231321893996e-03 Event 195 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3612,7 +3612,7 @@ Event 195 Batch 1 2 5.572874060171201e+02 -5.433144409127298e+02 3.646295232533866e+01 1.185290019729285e+02 3 6.765845568040619e+02 5.574999049241243e+02 -1.212989803269169e+01 -3.831623469093195e+02 4 2.661280371788181e+02 -1.418546401139455e+01 -2.433305429264712e+01 2.646333449363910e+02 - ME 3.378534889977447e-04 + ME 3.395618115588225e-04 Event 196 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3620,7 +3620,7 @@ Event 196 Batch 1 2 5.405888343305829e+02 3.940239871950471e+02 -8.826690628749978e+01 -3.594305754554688e+02 3 6.983754392688073e+02 -3.888370902622853e+02 -5.513072771506098e+01 5.774898910559966e+02 4 2.610357264006097e+02 -5.186896932761887e+00 1.433976340025607e+02 -2.180593156005277e+02 - ME 2.676929502290073e-04 + ME 5.539073969003598e-03 Event 197 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3628,7 +3628,7 @@ Event 197 Batch 1 2 2.783346334111661e+02 2.282410890438732e+02 -1.474467226896361e+02 6.029624695020830e+01 3 6.434654504578666e+02 1.172104173128919e+01 6.205939438823057e+02 1.696277097949658e+02 4 5.781999161309674e+02 -2.399621307751624e+02 -4.731472211926695e+02 -2.299239567451741e+02 - ME 4.280180350752636e-05 + ME 3.321087064690878e-04 Event 198 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3636,7 +3636,7 @@ Event 198 Batch 1 2 4.349536439683943e+02 1.774777254208009e+02 -9.709992209949135e+01 3.850427697141142e+02 3 4.134500153047116e+02 7.095914770071803e+01 -4.041194890923881e+02 -5.092301099466194e+01 4 6.515963407268921e+02 -2.484368731215197e+02 5.012194111918782e+02 -3.341197587194521e+02 - ME 2.926862112764983e-04 + ME 7.849443582399766e-04 Event 199 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3644,7 +3644,7 @@ Event 199 Batch 1 2 6.682109290882580e+02 2.136897997740939e+02 -5.035763266519416e+02 3.837361052354048e+02 3 1.424120473397155e+02 8.952788458880865e+01 -4.686863299276860e+01 -1.003458038481504e+02 4 6.893770235720265e+02 -3.032176843629025e+02 5.504449596447103e+02 -2.833903013872543e+02 - ME 4.183851150998592e-04 + ME 1.167594898598604e-03 Event 200 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3652,7 +3652,7 @@ Event 200 Batch 1 2 5.959952693237885e+02 -4.878566955018547e+02 -2.510837703973929e+01 -3.414319479966339e+02 3 4.479637599869168e+02 4.499951041477978e+01 7.146287716862105e+01 4.399313940955211e+02 4 4.560409706892941e+02 4.428571850870749e+02 -4.635450012888173e+01 -9.849944609888662e+01 - ME 3.228844805909175e-04 + ME 5.545496796633981e-04 Event 201 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3660,7 +3660,7 @@ Event 201 Batch 1 2 5.203096708642927e+02 -1.112696379946441e+02 1.367824427202020e+02 4.895219960522141e+02 3 2.871951825199399e+02 -2.582762312778227e+02 1.200876310962787e+02 3.678888524092984e+01 4 6.924951466157675e+02 3.695458692724667e+02 -2.568700738164807e+02 -5.263108812931440e+02 - ME 2.285182473348715e-03 + ME 6.577575910850049e-03 Event 202 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3668,7 +3668,7 @@ Event 202 Batch 1 2 2.158792376054218e+02 2.112389782008981e+01 -7.195062193526132e+01 -2.024369881546198e+02 3 5.463652944256570e+02 2.787950008966254e+02 -3.108926376755554e+02 -3.523267663221479e+02 4 7.377554679689213e+02 -2.999188987167153e+02 3.828432596108168e+02 5.547637544767679e+02 - ME 1.952686275320307e-03 + ME 8.695282964050810e-03 Event 203 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3676,7 +3676,7 @@ Event 203 Batch 1 2 7.124273471334275e+02 4.879265047129839e+02 -1.059167473143779e+02 -5.081949365946950e+02 3 6.746108110440506e+02 -5.248642991835990e+02 4.352799102536777e+01 4.215714978711400e+02 4 1.129618418225217e+02 3.693779447061509e+01 6.238875628901040e+01 8.662343872355494e+01 - ME 4.211918129012132e-05 + ME 5.361938367485652e-05 Event 204 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3684,7 +3684,7 @@ Event 204 Batch 1 2 7.084787759842808e+02 4.992472551829619e+02 -4.528122431715626e+02 -2.183012291454193e+02 3 1.034373169902747e+02 -8.959882065299325e+01 -3.938861547415055e+01 -3.346441176487074e+01 4 6.880839070254444e+02 -4.096484345299685e+02 4.922008586457131e+02 2.517656409102901e+02 - ME 1.033102023766027e-04 + ME 2.988048706021647e-04 Event 205 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3692,7 +3692,7 @@ Event 205 Batch 1 2 6.496569846879349e+02 -5.869603795046561e+02 -2.345911576090251e+02 1.499956646614410e+02 3 2.543878192344406e+02 -1.851019090219859e+00 2.474675926596849e+02 -5.890268997594536e+01 4 5.959551960776247e+02 5.888113985948760e+02 -1.287643505065981e+01 -9.109297468549572e+01 - ME 4.134215827558992e-05 + ME 1.871447246980874e-04 Event 206 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3700,7 +3700,7 @@ Event 206 Batch 1 2 6.172060642836410e+02 2.978040691523503e+02 4.166709400833434e+02 3.444435946201744e+02 3 7.205754982426181e+02 -2.468045809177361e+02 -5.690387091428452e+02 -3.667580878490107e+02 4 1.622184374737409e+02 -5.099948823461420e+01 1.523677690595017e+02 2.231449322883641e+01 - ME 1.138691716042452e-05 + ME 7.356489425273393e-05 Event 207 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3708,7 +3708,7 @@ Event 207 Batch 1 2 5.250113096394139e+02 -1.091977068802181e+02 -4.322753509449321e+02 2.772196909074646e+02 3 5.240251005653129e+02 3.541948269240045e+02 3.738549241960732e+02 9.685466564450643e+01 4 4.509635897952731e+02 -2.449971200437864e+02 5.842042674885889e+01 -3.740743565519710e+02 - ME 9.518274156960593e-05 + ME 3.378615964480245e-03 Event 208 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3716,7 +3716,7 @@ Event 208 Batch 1 2 4.449444343820048e+02 1.928662436733418e+02 -3.595193210859464e+02 1.775500478872298e+02 3 4.894053462810564e+02 -2.195789585225567e+02 2.295326432211599e+02 3.723136307450180e+02 4 5.656502193369389e+02 2.671271484921488e+01 1.299866778647865e+02 -5.498636786322478e+02 - ME 2.179806976662403e-03 + ME 2.068943926258950e-01 Event 209 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3724,7 +3724,7 @@ Event 209 Batch 1 2 4.949423498078044e+02 -2.830370809537592e+02 -1.684680620467476e+02 -3.694271951395289e+02 3 6.326444171345161e+02 3.898538983719823e+02 -1.748162179498052e+02 4.665749526039372e+02 4 3.724132330576786e+02 -1.068168174182231e+02 3.432842799965525e+02 -9.714775746440780e+01 - ME 3.638076645868775e-05 + ME 1.473942246791387e-04 Event 210 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3732,7 +3732,7 @@ Event 210 Batch 1 2 5.469464199121014e+02 -4.947084169679945e+02 2.319240083666633e+02 -2.500445517953792e+01 3 2.929141603572806e+02 -5.602902696925145e+01 2.099470855189298e+01 2.867379913571110e+02 4 6.601394197306178e+02 5.507374439372461e+02 -2.529187169185561e+02 -2.617335361775729e+02 - ME 7.792286450853471e-04 + ME 1.577330101330874e-03 Event 211 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3740,7 +3740,7 @@ Event 211 Batch 1 2 5.484404249965427e+02 1.659778109685243e+01 3.514591842057613e+02 -4.206992456262192e+02 3 4.635537606517395e+02 -3.607884938122542e+02 -3.140996451540818e+01 2.893564685231623e+02 4 4.880058143517181e+02 3.441907127154018e+02 -3.200492196903532e+02 1.313427771030569e+02 - ME 1.717788621912363e-05 + ME 4.999214184618137e-05 Event 212 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3748,7 +3748,7 @@ Event 212 Batch 1 2 6.930853388432640e+02 -3.424793196872474e+02 -8.152110066892747e+01 5.970171795281683e+02 3 9.131624224772825e+01 6.738328155058525e+01 1.365968298972706e+01 6.009627714210347e+01 4 7.155984189090078e+02 2.750960381366621e+02 6.786141767920034e+01 -6.571134566702718e+02 - ME 4.440767413899675e-02 + ME 3.224436999651524e-01 Event 213 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3756,7 +3756,7 @@ Event 213 Batch 1 2 7.316448870278512e+02 4.203233031264803e+02 4.913598772661251e+02 -3.423419819067778e+02 3 4.750162603483208e+02 -1.726357548525294e+02 -3.708603862154638e+02 2.414537588813190e+02 4 2.933388526238279e+02 -2.476875482739507e+02 -1.204994910506614e+02 1.008882230254589e+02 - ME 1.166473784051930e-05 + ME 4.008080891216109e-05 Event 214 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3764,7 +3764,7 @@ Event 214 Batch 1 2 4.805779599533694e+02 3.904513572450257e+02 -1.742898429406511e+02 2.193763065287195e+02 3 6.164938851206517e+02 -5.563771061772993e+02 2.227142270499353e+02 1.445946028815716e+02 4 4.029281549259790e+02 1.659257489322735e+02 -4.842438410928419e+01 -3.639709094102910e+02 - ME 1.644694060635318e-04 + ME 1.130096726278085e-02 Event 215 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3772,7 +3772,7 @@ Event 215 Batch 1 2 4.610896439725640e+02 -3.106576460930037e+02 -3.050258363865880e+02 -1.518378274323046e+02 3 7.153470686812809e+02 2.726436938726979e+02 6.046054769368644e+02 2.680280994976061e+02 4 3.235632873461531e+02 3.801395222030658e+01 -2.995796405502758e+02 -1.161902720653026e+02 - ME 1.638803663744001e-05 + ME 2.130646114222361e-04 Event 216 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3780,7 +3780,7 @@ Event 216 Batch 1 2 5.309452696424389e+02 -4.912950836090372e+02 -3.608909251460832e+01 -1.980646298023531e+02 3 6.627369363365399e+02 4.479096066616000e+02 2.308759280187052e+02 4.304573578259469e+02 4 3.063177940210212e+02 4.338547694743724e+01 -1.947868355040969e+02 -2.323927280235938e+02 - ME 7.684209531203918e-05 + ME 1.881406502208647e-03 Event 217 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3788,7 +3788,7 @@ Event 217 Batch 1 2 4.608032244164870e+02 2.215832851737383e+02 3.318832460795877e+02 -2.304212888079594e+02 3 3.107022283044695e+02 -4.724697178681157e+01 2.830528592337836e+02 -1.190994425256424e+02 4 7.284945472790432e+02 -1.743363133869267e+02 -6.149361053133712e+02 3.495207313336019e+02 - ME 4.426756984161849e-04 + ME 2.894775763457067e-03 Event 218 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3796,7 +3796,7 @@ Event 218 Batch 1 2 6.336891602166270e+02 5.249943224110900e+02 1.648031440577737e+02 -3.142973702098814e+02 3 5.195346944320743e+02 -3.655895580768890e+02 -3.610279413409480e+02 7.693763263116504e+01 4 3.467761453512956e+02 -1.594047643342018e+02 1.962247972831736e+02 2.373597375787177e+02 - ME 8.957256945094420e-06 + ME 2.703962034458943e-05 Event 219 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3804,7 +3804,7 @@ Event 219 Batch 1 2 2.579228498517417e+02 -4.166553381892272e+01 1.191899344508913e+02 2.249042891828000e+02 3 7.453266221408651e+02 -3.354388163550532e+01 -3.947818065141064e+02 -6.312954196904914e+02 4 4.967505280073930e+02 7.520941545442813e+01 2.755918720632151e+02 4.063911305076915e+02 - ME 4.019449398167179e-05 + ME 6.103184694489295e-05 Event 220 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3812,7 +3812,7 @@ Event 220 Batch 1 2 4.940336288355577e+02 -2.383755021420815e+02 -2.918661661143953e+02 3.194690712363630e+02 3 7.129224521449780e+02 2.727447507998269e+02 2.535039959962389e+02 -6.079510240944473e+02 4 2.930439190194635e+02 -3.436924865774512e+01 3.836217011815621e+01 2.884819528580837e+02 - ME 1.677977866215262e-04 + ME 1.761519882509421e-04 Event 221 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3820,7 +3820,7 @@ Event 221 Batch 1 2 3.305414381337777e+02 -2.712796684963201e+02 -1.199910663213094e+02 -1.458325333632650e+02 3 7.388441803280767e+02 5.510455284380058e+02 4.375213740715825e+02 2.254209298704556e+02 4 4.306143815381457e+02 -2.797658599416856e+02 -3.175303077502730e+02 -7.958839650719051e+01 - ME 1.392897982206581e-05 + ME 1.338118621913618e-04 Event 222 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3828,7 +3828,7 @@ Event 222 Batch 1 2 4.657562074797755e+02 2.823280548971349e+02 2.956503281023745e+02 2.231828795335844e+02 3 4.791948192186352e+02 -3.228825926298714e+02 2.575611801233854e+02 -2.429747818931873e+02 4 5.550489733015891e+02 4.055453773273638e+01 -5.532115082257600e+02 1.979190235960287e+01 - ME 2.328731171682892e-05 + ME 9.040551632672907e-05 Event 223 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3836,7 +3836,7 @@ Event 223 Batch 1 2 1.612164685986321e+02 -4.527922182271191e+01 -1.095260585492910e+01 1.543391792239740e+02 3 6.984218503485876e+02 -4.629950983513680e+02 2.605715575888556e+02 -4.533553609726805e+02 4 6.403616810527805e+02 5.082743201740799e+02 -2.496189517339264e+02 2.990161817487066e+02 - ME 2.446487784841432e-04 + ME 4.148580235863498e-04 Event 224 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3844,7 +3844,7 @@ Event 224 Batch 1 2 1.663853414671972e+02 -1.350882138037309e+02 9.706071747767010e+01 3.804401292344658e+00 3 6.436745581417563e+02 -4.469273298203079e+02 -4.412749113764766e+02 -1.408877256838118e+02 4 6.899401003910457e+02 5.820155436240389e+02 3.442141938988058e+02 1.370833243914657e+02 - ME 9.431632941984795e-05 + ME 3.449215697364171e-04 Event 225 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3852,7 +3852,7 @@ Event 225 Batch 1 2 6.702356777533546e+02 6.117158080352369e+02 -2.649249521350114e+02 -6.952987609335720e+01 3 6.901224376513153e+02 -6.564819557015361e+02 1.560869289536550e+02 1.446972404640001e+02 4 1.396418845953297e+02 4.476614766629927e+01 1.088380231813564e+02 -7.516736437064299e+01 - ME 2.456039108263569e-05 + ME 6.407468428023662e-04 Event 226 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3860,7 +3860,7 @@ Event 226 Batch 1 2 7.307777643673112e+02 -4.569648094661606e+02 4.416236342013199e+02 -3.608155616351098e+02 3 1.446420186345137e+02 4.133161435221925e+01 -3.411742569426914e+01 1.343466131828505e+02 4 6.245802169981752e+02 4.156331951139413e+02 -4.075062085070508e+02 2.264689484522593e+02 - ME 2.774761612267077e-04 + ME 4.858390443010437e-04 Event 227 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3868,7 +3868,7 @@ Event 227 Batch 1 2 7.408615397889290e+02 -4.398089081634772e+02 -5.325812259979131e+02 2.679574278743413e+02 3 4.035753807128123e+02 3.000971513323747e+02 2.468113220276344e+02 -1.090823496201683e+02 4 3.555630794982585e+02 1.397117568311025e+02 2.857699039702786e+02 -1.588750782541728e+02 - ME 3.077346064218035e-05 + ME 3.215647103618368e-04 Event 228 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3876,7 +3876,7 @@ Event 228 Batch 1 2 5.775455372723294e+02 -3.656199842755111e+02 -6.289501053880601e+01 4.426342647953073e+02 3 3.247306314578497e+02 8.776645762339835e+01 3.116872137482897e+02 2.445634292125525e+01 4 5.977238312698206e+02 2.778535266521127e+02 -2.487922032094836e+02 -4.670906077165625e+02 - ME 3.399241079583280e-04 + ME 3.156934429573604e-03 Event 229 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3884,7 +3884,7 @@ Event 229 Batch 1 2 3.665477125629453e+02 -2.081014917770363e+02 2.317985113364040e+02 -1.931850016112187e+02 3 6.187040836990479e+02 -2.134593092471877e+02 -3.484367286517815e+02 4.645661552545953e+02 4 5.147482037380067e+02 4.215608010242241e+02 1.166382173153775e+02 -2.713811536433765e+02 - ME 8.330968691049859e-05 + ME 4.392210547845218e-04 Event 230 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3892,7 +3892,7 @@ Event 230 Batch 1 2 5.913978529013565e+02 -4.986092821675885e+02 -3.028328044703767e+02 9.712104143419764e+01 3 3.439186614041002e+02 -6.573524045766426e+01 3.216488491089061e+02 -1.024741025375549e+02 4 5.646834856945436e+02 5.643445226252528e+02 -1.881604463852933e+01 5.353061103357447e+00 - ME 2.296146042402505e-05 + ME 1.067159092411647e-04 Event 231 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3900,7 +3900,7 @@ Event 231 Batch 1 2 5.760768557894827e+02 -7.075794524290799e+01 5.609870884449791e+02 1.102331327656218e+02 3 6.038619762337338e+02 -2.467027894308989e+02 -5.464177649873398e+02 -7.221250677108812e+01 4 3.200611679767834e+02 3.174607346738069e+02 -1.456932345763944e+01 -3.802062599453370e+01 - ME 9.438631267217403e-06 + ME 8.750887998909065e-05 Event 232 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3908,7 +3908,7 @@ Event 232 Batch 1 2 7.230187249684843e+02 -2.426041066061352e+02 1.884455685697195e+02 -6.545132479937492e+02 3 4.821326920133732e+02 2.438648429837413e+02 -1.563760752388986e+01 4.156168142598493e+02 4 2.948485830181424e+02 -1.260736377606032e+00 -1.728079610458298e+02 2.388964337338999e+02 - ME 3.745272037455064e-05 + ME 4.549716999825542e-05 Event 233 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3916,7 +3916,7 @@ Event 233 Batch 1 2 3.540260977608100e+02 -1.904526694678991e+02 -1.042089619355360e+02 -2.796475475319170e+02 3 4.925592302096041e+02 1.195034224421750e+02 3.554637678715695e+02 -3.193415679485398e+02 4 6.534146720295859e+02 7.094924702572415e+01 -2.512548059360335e+02 5.989891154804569e+02 - ME 1.035644942794080e-04 + ME 2.494643034161164e-04 Event 234 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3924,7 +3924,7 @@ Event 234 Batch 1 2 1.866526101194276e+02 7.776953530733704e+01 -1.047503781897390e+01 1.693557493124073e+02 3 6.012752698516817e+02 5.974840035795012e+02 -4.570329760029643e+01 4.955829083294186e+01 4 7.120721200288899e+02 -6.752535388868379e+02 5.617833541927040e+01 -2.189140401453492e+02 - ME 6.655948749153013e-04 + ME 2.154454342135980e-03 Event 235 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3932,7 +3932,7 @@ Event 235 Batch 1 2 5.032945404607945e+02 1.612889276925247e+02 2.561838854094329e+02 -4.020710050699558e+02 3 7.153634726767370e+02 -3.739069589148947e+02 -1.979140468542061e+02 5.768609140624169e+02 4 2.813419868624690e+02 2.126180312223700e+02 -5.826983855522722e+01 -1.747899089924609e+02 - ME 1.137471703441233e-04 + ME 8.184939555880423e-04 Event 236 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3940,7 +3940,7 @@ Event 236 Batch 1 2 6.980797829886610e+02 -9.803971882836288e+00 4.740144261428889e+02 5.123764137440797e+02 3 5.519387921056282e+02 -1.638876688381594e+02 -3.209728652821290e+02 -4.180355032606608e+02 4 2.499814249057108e+02 1.736916407209956e+02 -1.530415608607599e+02 -9.434091048341891e+01 - ME 5.842524801707843e-05 + ME 2.813360227943072e-04 Event 237 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3948,7 +3948,7 @@ Event 237 Batch 1 2 1.604490925133743e+02 6.212857081252698e+01 9.075394990141041e+01 1.168232534834160e+02 3 6.578242662283152e+02 5.348507070161563e+02 -3.810396531957998e+02 3.842224792439630e+01 4 6.817266412583107e+02 -5.969792778286832e+02 2.902857032943894e+02 -1.552455014078122e+02 - ME 1.834055676127939e-04 + ME 8.205069948818567e-04 Event 238 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3956,7 +3956,7 @@ Event 238 Batch 1 2 2.789018340499539e+02 1.069933592962543e+02 -2.572713415352736e+02 1.225197647611563e+01 3 4.761759619803052e+02 7.755191627191856e+01 -4.591043622469822e+02 -9.976187456245104e+01 4 7.449222039697408e+02 -1.845452755681728e+02 7.163757037822556e+02 8.750989808633538e+01 - ME 9.445005309896021e-03 + ME 4.130258343824905e-02 Event 239 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3964,7 +3964,7 @@ Event 239 Batch 1 2 4.581461811054764e+02 -3.899520773556200e+02 2.006122777919944e+02 1.326273524830990e+02 3 3.013476461129690e+02 -2.996604136348060e+02 3.145663680794619e+01 4.951799549362093e+00 4 7.405061727815548e+02 6.896124909904260e+02 -2.320689145999406e+02 -1.375791520324611e+02 - ME 4.970363634614722e-03 + ME 1.351152256907066e-02 Event 240 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3972,7 +3972,7 @@ Event 240 Batch 1 2 5.932490652975304e+02 -4.094504138983958e+01 -3.300190662632461e+02 4.912793227530680e+02 3 3.147487537014150e+02 3.081803657249563e+02 4.097350029662016e+01 -4.912038692507519e+01 4 5.920021810010543e+02 -2.672353243351168e+02 2.890455659666260e+02 -4.421589358279927e+02 - ME 3.420638167820422e-04 + ME 2.300291351402201e-03 Event 241 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3980,7 +3980,7 @@ Event 241 Batch 1 2 4.438703186026563e+01 1.425431959717181e+01 -4.430288595443099e+00 -4.180186016371768e+01 3 7.139617398095604e+02 -8.415544716076485e+01 -5.657765076565163e+02 -4.272659242311072e+02 4 7.416512283301737e+02 6.990112756359306e+01 5.702067962519594e+02 4.690677843948249e+02 - ME 9.983667466725972e-03 + ME 9.657825758456334e-03 Event 242 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3988,7 +3988,7 @@ Event 242 Batch 1 2 3.798759956195423e+02 -1.259218082844715e+02 -3.429343473884153e+02 1.041417477651927e+02 3 6.208895880511435e+02 5.354328139337265e+02 1.248673426784089e+02 -2.884852319370315e+02 4 4.992344163293142e+02 -4.095110056492549e+02 2.180670047100064e+02 1.843434841718389e+02 - ME 1.030886114253601e-05 + ME 4.523810239016752e-05 Event 243 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3996,7 +3996,7 @@ Event 243 Batch 1 2 2.320641800899440e+02 1.658639294991472e+02 7.783463994856535e+01 1.424243988788334e+02 3 6.251485586341132e+02 -2.328139095298017e+02 -4.262931976140131e+02 3.935511574875350e+02 4 6.427872612759426e+02 6.694998003065477e+01 3.484585576654476e+02 -5.359755563663684e+02 - ME 8.493072129055412e-04 + ME 1.068434238404496e-02 Event 244 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4004,7 +4004,7 @@ Event 244 Batch 1 2 6.609991843787810e+02 -2.293678857540617e+02 -4.971623496474938e+02 -3.703240376037023e+02 3 1.091403980947070e+02 1.154537470975927e+01 -9.115666825632124e+00 -1.081445118228680e+02 4 7.298604175265119e+02 2.178225110443025e+02 5.062780164731259e+02 4.784685494265703e+02 - ME 9.635755455313371e-04 + ME 2.129811247265830e-03 Event 245 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4012,7 +4012,7 @@ Event 245 Batch 1 2 4.893629130846664e+02 -3.546974954177181e+02 3.112856868655738e+02 -1.294873298810978e+02 3 7.129026631852477e+02 5.703735458058533e+02 -4.257115617679147e+02 -4.091322034012423e+01 4 2.977344237300874e+02 -2.156760503881352e+02 1.144258749023406e+02 1.704005502212233e+02 - ME 5.312368446054512e-06 + ME 2.548352504440589e-05 Event 246 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4020,7 +4020,7 @@ Event 246 Batch 1 2 3.999457395350199e+02 9.605025124341067e+01 9.072234098128430e+01 3.774922524438975e+02 3 3.675469088581873e+02 -1.615841482674670e+01 2.570183669846762e+02 2.622426259669196e+02 4 7.325073516067924e+02 -7.989183641666393e+01 -3.477407079659604e+02 -6.397348784108170e+02 - ME 5.023802198964801e-02 + ME 1.294421983622042e-01 Event 247 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4028,7 +4028,7 @@ Event 247 Batch 1 2 6.711864521923226e+02 3.763073240556692e+02 5.338170415278108e+02 1.546719678644905e+02 3 5.231557804938882e+02 -1.057595517177888e+02 -5.121603131388773e+02 -1.409615302513522e+01 4 3.056577673137891e+02 -2.705477723378804e+02 -2.165672838893370e+01 -1.405758148393554e+02 - ME 1.980507958825256e-05 + ME 2.873345328272106e-04 Event 248 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4036,7 +4036,7 @@ Event 248 Batch 1 2 6.307803946875938e+02 -6.240065811552291e+01 -3.654556314590158e+02 5.103256270499047e+02 3 3.935347424219227e+02 -2.188782290807617e+02 2.916853933646314e+01 -3.257470040392325e+02 4 4.756848628904837e+02 2.812788871962847e+02 3.362870921225527e+02 -1.845786230106721e+02 - ME 8.712398839363553e-05 + ME 2.418190194667681e-04 Event 249 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4044,7 +4044,7 @@ Event 249 Batch 1 2 4.326970760901858e+02 -4.070406664121577e+02 -1.467447404863359e+02 3.261392852829594e+00 3 4.839435229991528e+02 2.335311811831339e+01 2.018595963184923e+02 -4.392136936630267e+02 4 5.833594009106607e+02 3.836875482938447e+02 -5.511485583215654e+01 4.359523008101972e+02 - ME 2.487145538635957e-05 + ME 8.354140201035124e-05 Event 250 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4052,7 +4052,7 @@ Event 250 Batch 1 2 7.010671671345858e+02 -6.122994886156980e+02 -2.473946684860857e+02 2.353303785738851e+02 3 5.574643785654457e+02 3.902114201641945e+02 2.260985614407801e+02 -3.276904354069721e+02 4 2.414684542999681e+02 2.220880684515034e+02 2.129610704530562e+01 9.236005683308701e+01 - ME 1.645582299148298e-05 + ME 4.704118057291807e-05 Event 251 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4060,7 +4060,7 @@ Event 251 Batch 1 2 7.364006127103795e+02 5.379960890463808e+02 4.302640987755426e+02 2.602285070392761e+02 3 3.051282143252570e+01 -2.901685968644106e+00 1.337962970917706e+01 -2.726899336532026e+01 4 7.330865658570956e+02 -5.350944030777371e+02 -4.436437284847198e+02 -2.329595136739561e+02 - ME 6.389613086136084e-03 + ME 8.340546584740779e-03 Event 252 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4068,7 +4068,7 @@ Event 252 Batch 1 2 5.965625584838610e+02 -7.369842915522101e+01 -5.671364104158780e+02 -1.697401534860145e+02 3 6.549338760881149e+02 -1.514014639568436e+02 6.313240788068730e+02 8.628954906696529e+01 4 2.485035654280235e+02 2.250998931120648e+02 -6.418766839099484e+01 8.345060441904938e+01 - ME 7.225550854378042e-06 + ME 3.985162011735342e-05 Event 253 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4076,7 +4076,7 @@ Event 253 Batch 1 2 5.728678540484714e+02 3.212236187283236e+01 -4.622666283104808e+02 -3.368312580807653e+02 3 7.160302400837320e+02 1.132435775281999e+02 5.206369974620781e+02 4.783433011307397e+02 4 2.111019058677967e+02 -1.453659394010323e+02 -5.837036915159722e+01 -1.415120430499744e+02 - ME 7.499676590470843e-05 + ME 1.248429186447426e-03 Event 254 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4084,7 +4084,7 @@ Event 254 Batch 1 2 5.579357369440610e+02 1.333150067790222e+02 -6.785864805882139e+01 5.375077668373273e+02 3 6.202682598689536e+02 -4.039338689731095e+02 2.012068793592834e+02 -4.255419314189536e+02 4 3.217960031869852e+02 2.706188621940872e+02 -1.333482313004621e+02 -1.119658354183736e+02 - ME 2.226893396847405e-04 + ME 6.088720978226072e-04 Event 255 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4092,5 +4092,5 @@ Event 255 Batch 1 2 7.263612771087843e+02 3.396063850675520e+02 -6.401091575508393e+02 5.028393902637355e+01 3 1.540578578981475e+02 -3.080387127739228e+01 1.060177193258910e+02 -1.074485378375538e+02 4 6.195808649930684e+02 -3.088025137901597e+02 5.340914382249483e+02 5.716459881118030e+01 - ME 4.003666322732326e-05 + ME 1.547064591142216e-04 From 39e7519223499a517ed734ceaf346a478264ebb9 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 17:11:00 +0200 Subject: [PATCH 041/119] [oct23av] go back to the latest 8 mad and 7 sa generated processes (undo temporary changes before merging PR #762) git checkout 8a0c4d748 $(git ls-tree --name-only HEAD *.mad) git checkout 8a0c4d748 $(git ls-tree --name-only HEAD *.sa) --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 118 +- .../ee_mumu.mad/Cards/me5_configuration.txt | 4 +- .../ee_mumu.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_epem_mupmum/CPPProcess.cc | 30 +- .../SubProcesses/P1_epem_mupmum/auto_dsig1.f | 20 +- .../SubProcesses/P1_epem_mupmum/counters.cc | 18 +- .../SubProcesses/P1_epem_mupmum/matrix1.f | 2 - .../ee_mumu.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/ee_mumu.mad/SubProcesses/makefile | 4 +- .../cudacpp/ee_mumu.mad/bin/generate_events | 22 +- .../ee_mumu.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42833 -> 42831 bytes epochX/cudacpp/ee_mumu.mad/bin/madevent | 20 +- epochX/cudacpp/ee_mumu.mad/mg5.in | 2 + epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h | 30 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 78 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc | 30 +- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/ee_mumu.sa/mg5.in | 5 +- epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h | 30 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 123 +- epochX/cudacpp/gg_tt.mad/Cards/ident_card.dat | 28 +- .../gg_tt.mad/Cards/me5_configuration.txt | 4 +- .../cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat | 10 +- .../gg_tt.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 19 +- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttx/counters.cc | 99 +- .../SubProcesses/P1_gg_ttx/matrix1.f | 4 +- .../SubProcesses/P1_gg_ttx/ompnumthreads.cc | 26 +- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gg_tt.mad/SubProcesses/makefile | 4 +- .../SubProcesses/proc_characteristics | 2 +- epochX/cudacpp/gg_tt.mad/bin/generate_events | 22 +- .../cudacpp/gg_tt.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42821 -> 42831 bytes epochX/cudacpp/gg_tt.mad/bin/madevent | 20 +- epochX/cudacpp/gg_tt.mad/mg5.in | 2 + epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h | 8 + .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 87 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_gg_ttx/CPPProcess.cc | 19 +- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/gg_tt.sa/mg5.in | 5 +- epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h | 8 + .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 169 +- .../cudacpp/gg_tt01g.mad/Cards/ident_card.dat | 28 +- .../gg_tt01g.mad/Cards/me5_configuration.txt | 4 +- .../gg_tt01g.mad/Cards/proc_card_mg5.dat | 10 +- .../gg_tt01g.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 19 +- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttx/counters.cc | 99 +- .../SubProcesses/P1_gg_ttx/matrix1.f | 4 +- .../SubProcesses/P1_gg_ttx/ompnumthreads.cc | 26 +- .../SubProcesses/P2_gg_ttxg/CPPProcess.cc | 69 +- .../SubProcesses/P2_gg_ttxg/auto_dsig1.f | 18 +- .../SubProcesses/P2_gg_ttxg/counters.cc | 99 +- .../SubProcesses/P2_gg_ttxg/matrix1.f | 8 +- .../SubProcesses/P2_gg_ttxg/ompnumthreads.cc | 26 +- .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 6 +- .../gg_tt01g.mad/SubProcesses/makefile | 4 +- .../SubProcesses/proc_characteristics | 2 +- .../cudacpp/gg_tt01g.mad/bin/generate_events | 22 +- .../gg_tt01g.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42821 -> 42831 bytes epochX/cudacpp/gg_tt01g.mad/bin/madevent | 20 +- epochX/cudacpp/gg_tt01g.mad/mg5.in | 3 +- epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h | 18 + .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 129 +- .../cudacpp/gg_ttg.mad/Cards/ident_card.dat | 28 +- .../gg_ttg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttg.mad/Cards/proc_card_mg5.dat | 10 +- .../gg_ttg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 69 +- .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxg/counters.cc | 99 +- .../SubProcesses/P1_gg_ttxg/matrix1.f | 8 +- .../SubProcesses/P1_gg_ttxg/ompnumthreads.cc | 26 +- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gg_ttg.mad/SubProcesses/makefile | 4 +- .../SubProcesses/proc_characteristics | 2 +- epochX/cudacpp/gg_ttg.mad/bin/generate_events | 22 +- .../cudacpp/gg_ttg.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42821 -> 42831 bytes epochX/cudacpp/gg_ttg.mad/bin/madevent | 20 +- epochX/cudacpp/gg_ttg.mad/mg5.in | 2 + epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h | 18 + .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 89 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_gg_ttxg/CPPProcess.cc | 69 +- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/gg_ttg.sa/mg5.in | 5 +- epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h | 18 + .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 131 +- .../cudacpp/gg_ttgg.mad/Cards/ident_card.dat | 28 +- .../gg_ttgg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttgg.mad/Cards/proc_card_mg5.dat | 10 +- .../gg_ttgg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gg_ttxgg/CPPProcess.cc | 439 +- .../SubProcesses/P1_gg_ttxgg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxgg/counters.cc | 99 +- .../SubProcesses/P1_gg_ttxgg/matrix1.f | 32 +- .../SubProcesses/P1_gg_ttxgg/ompnumthreads.cc | 26 +- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gg_ttgg.mad/SubProcesses/makefile | 4 +- .../SubProcesses/proc_characteristics | 2 +- .../cudacpp/gg_ttgg.mad/bin/generate_events | 22 +- .../gg_ttgg.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42821 -> 42831 bytes epochX/cudacpp/gg_ttgg.mad/bin/madevent | 20 +- epochX/cudacpp/gg_ttgg.mad/mg5.in | 2 + epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h | 24 + .../CODEGEN_cudacpp_gg_ttgg_log.txt | 91 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_gg_ttxgg/CPPProcess.cc | 439 +- .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/gg_ttgg.sa/mg5.in | 5 +- epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h | 24 + .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 135 +- .../cudacpp/gg_ttggg.mad/Cards/ident_card.dat | 28 +- .../gg_ttggg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttggg.mad/Cards/proc_card_mg5.dat | 10 +- .../gg_ttggg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gg_ttxggg/CPPProcess.cc | 4555 ++++++++--------- .../SubProcesses/P1_gg_ttxggg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxggg/counters.cc | 99 +- .../SubProcesses/P1_gg_ttxggg/matrix1.f | 181 +- .../P1_gg_ttxggg/ompnumthreads.cc | 26 +- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 6 +- .../gg_ttggg.mad/SubProcesses/makefile | 4 +- .../SubProcesses/proc_characteristics | 2 +- .../cudacpp/gg_ttggg.mad/bin/generate_events | 22 +- .../gg_ttggg.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42821 -> 42831 bytes epochX/cudacpp/gg_ttggg.mad/bin/madevent | 20 +- epochX/cudacpp/gg_ttggg.mad/mg5.in | 3 +- epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h | 24 + .../CODEGEN_cudacpp_gg_ttggg_log.txt | 93 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_gg_ttxggg/CPPProcess.cc | 4555 ++++++++--------- .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/gg_ttggg.sa/mg5.in | 5 +- epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h | 24 + .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 165 +- .../cudacpp/gq_ttq.mad/Cards/ident_card.dat | 28 +- .../gq_ttq.mad/Cards/me5_configuration.txt | 4 +- .../gq_ttq.mad/Cards/proc_card_mg5.dat | 9 +- .../gq_ttq.mad/Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 40 +- .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 24 +- .../SubProcesses/P1_gu_ttxu/counters.cc | 99 +- .../SubProcesses/P1_gu_ttxu/matrix1.f | 4 +- .../SubProcesses/P1_gu_ttxu/ompnumthreads.cc | 26 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 33 +- .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 28 +- .../SubProcesses/P1_gux_ttxux/counters.cc | 99 +- .../SubProcesses/P1_gux_ttxux/matrix1.f | 4 +- .../P1_gux_ttxux/ompnumthreads.cc | 26 +- .../gq_ttq.mad/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/gq_ttq.mad/SubProcesses/makefile | 4 +- .../SubProcesses/proc_characteristics | 2 +- epochX/cudacpp/gq_ttq.mad/bin/generate_events | 22 +- .../cudacpp/gq_ttq.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42821 -> 42831 bytes epochX/cudacpp/gq_ttq.mad/bin/madevent | 20 +- epochX/cudacpp/gq_ttq.mad/mg5.in | 2 + epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h | 10 + .../cudacpp/gq_ttq.mad/src/Parameters_sm.cc | 4 +- epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h | 18 +- .../ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt | 1026 ++-- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 124 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_sm_gu_ttxu/CPPProcess.cc | 40 +- .../P1_Sigma_sm_gux_ttxux/CPPProcess.cc | 33 +- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 6 +- epochX/cudacpp/gq_ttq.sa/mg5.in | 5 +- epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h | 10 + epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc | 4 +- epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h | 18 +- .../ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt | 1026 ++-- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 85 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../P1_Sigma_heft_gg_h/CPPProcess.cc | 9 +- .../heft_gg_h.sa/SubProcesses/cudacpp.mk | 6 +- .../cudacpp/heft_gg_h.sa/src/HelAmps_heft.h | 2 + .../CODEGEN_mad_pp_tt012j_log.txt | 792 +-- .../pp_tt012j.mad/Cards/ident_card.dat | 28 +- .../pp_tt012j.mad/Cards/me5_configuration.txt | 4 +- .../pp_tt012j.mad/Cards/proc_card_mg5.dat | 9 +- .../Source/DHELAS/aloha_file.inc | 2 +- .../SubProcesses/MatrixElementKernels.cc | 9 +- .../SubProcesses/P0_gg_ttx/CPPProcess.cc | 19 +- .../SubProcesses/P0_gg_ttx/auto_dsig1.f | 18 +- .../SubProcesses/P0_gg_ttx/counters.cc | 99 +- .../SubProcesses/P0_gg_ttx/matrix1.f | 4 +- .../SubProcesses/P0_gg_ttx/ompnumthreads.cc | 26 +- .../SubProcesses/P0_uux_ttx/CPPProcess.cc | 17 +- .../SubProcesses/P0_uux_ttx/auto_dsig1.f | 34 +- .../SubProcesses/P0_uux_ttx/counters.cc | 99 +- .../SubProcesses/P0_uux_ttx/ompnumthreads.cc | 26 +- .../SubProcesses/P1_gg_ttxg/CPPProcess.cc | 69 +- .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 18 +- .../SubProcesses/P1_gg_ttxg/counters.cc | 99 +- .../SubProcesses/P1_gg_ttxg/matrix1.f | 8 +- .../SubProcesses/P1_gg_ttxg/ompnumthreads.cc | 26 +- .../SubProcesses/P1_gu_ttxu/CPPProcess.cc | 35 +- .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 24 +- .../SubProcesses/P1_gu_ttxu/counters.cc | 99 +- .../SubProcesses/P1_gu_ttxu/matrix1.f | 4 +- .../SubProcesses/P1_gu_ttxu/ompnumthreads.cc | 26 +- .../SubProcesses/P1_gux_ttxux/CPPProcess.cc | 35 +- .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 28 +- .../SubProcesses/P1_gux_ttxux/counters.cc | 99 +- .../SubProcesses/P1_gux_ttxux/matrix1.f | 4 +- .../P1_gux_ttxux/ompnumthreads.cc | 26 +- .../SubProcesses/P1_uux_ttxg/CPPProcess.cc | 35 +- .../SubProcesses/P1_uux_ttxg/auto_dsig1.f | 34 +- .../SubProcesses/P1_uux_ttxg/counters.cc | 99 +- .../SubProcesses/P1_uux_ttxg/ompnumthreads.cc | 26 +- .../SubProcesses/P2_gg_ttxgg/CPPProcess.cc | 439 +- .../SubProcesses/P2_gg_ttxgg/auto_dsig1.f | 18 +- .../SubProcesses/P2_gg_ttxgg/counters.cc | 99 +- .../SubProcesses/P2_gg_ttxgg/matrix1.f | 32 +- .../SubProcesses/P2_gg_ttxgg/ompnumthreads.cc | 26 +- .../SubProcesses/P2_gg_ttxuux/CPPProcess.cc | 152 +- .../SubProcesses/P2_gg_ttxuux/auto_dsig1.f | 18 +- .../SubProcesses/P2_gg_ttxuux/counters.cc | 99 +- .../SubProcesses/P2_gg_ttxuux/matrix1.f | 8 +- .../P2_gg_ttxuux/ompnumthreads.cc | 26 +- .../SubProcesses/P2_gu_ttxgu/CPPProcess.cc | 147 +- .../SubProcesses/P2_gu_ttxgu/auto_dsig1.f | 24 +- .../SubProcesses/P2_gu_ttxgu/counters.cc | 99 +- .../SubProcesses/P2_gu_ttxgu/matrix1.f | 16 +- .../SubProcesses/P2_gu_ttxgu/ompnumthreads.cc | 26 +- .../SubProcesses/P2_gux_ttxgux/CPPProcess.cc | 147 +- .../SubProcesses/P2_gux_ttxgux/auto_dsig1.f | 28 +- .../SubProcesses/P2_gux_ttxgux/counters.cc | 99 +- .../SubProcesses/P2_gux_ttxgux/matrix1.f | 16 +- .../P2_gux_ttxgux/ompnumthreads.cc | 26 +- .../SubProcesses/P2_uc_ttxuc/CPPProcess.cc | 49 +- .../SubProcesses/P2_uc_ttxuc/auto_dsig1.f | 26 +- .../SubProcesses/P2_uc_ttxuc/counters.cc | 99 +- .../SubProcesses/P2_uc_ttxuc/matrix1.f | 4 +- .../SubProcesses/P2_uc_ttxuc/ompnumthreads.cc | 26 +- .../SubProcesses/P2_ucx_ttxucx/CPPProcess.cc | 49 +- .../SubProcesses/P2_ucx_ttxucx/auto_dsig1.f | 34 +- .../SubProcesses/P2_ucx_ttxucx/counters.cc | 99 +- .../SubProcesses/P2_ucx_ttxucx/matrix1.f | 4 +- .../P2_ucx_ttxucx/ompnumthreads.cc | 26 +- .../SubProcesses/P2_uu_ttxuu/CPPProcess.cc | 75 +- .../SubProcesses/P2_uu_ttxuu/auto_dsig1.f | 30 +- .../SubProcesses/P2_uu_ttxuu/counters.cc | 99 +- .../SubProcesses/P2_uu_ttxuu/matrix1.f | 8 +- .../SubProcesses/P2_uu_ttxuu/ompnumthreads.cc | 26 +- .../SubProcesses/P2_uux_ttxccx/CPPProcess.cc | 49 +- .../SubProcesses/P2_uux_ttxccx/auto_dsig1.f | 34 +- .../SubProcesses/P2_uux_ttxccx/counters.cc | 99 +- .../P2_uux_ttxccx/ompnumthreads.cc | 26 +- .../SubProcesses/P2_uux_ttxgg/CPPProcess.cc | 147 +- .../SubProcesses/P2_uux_ttxgg/auto_dsig1.f | 34 +- .../SubProcesses/P2_uux_ttxgg/counters.cc | 99 +- .../P2_uux_ttxgg/ompnumthreads.cc | 26 +- .../SubProcesses/P2_uux_ttxuux/CPPProcess.cc | 77 +- .../SubProcesses/P2_uux_ttxuux/auto_dsig1.f | 34 +- .../SubProcesses/P2_uux_ttxuux/counters.cc | 99 +- .../SubProcesses/P2_uux_ttxuux/matrix1.f | 4 +- .../P2_uux_ttxuux/ompnumthreads.cc | 26 +- .../P2_uxcx_ttxuxcx/CPPProcess.cc | 49 +- .../SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f | 32 +- .../SubProcesses/P2_uxcx_ttxuxcx/counters.cc | 99 +- .../SubProcesses/P2_uxcx_ttxuxcx/matrix1.f | 4 +- .../P2_uxcx_ttxuxcx/ompnumthreads.cc | 26 +- .../P2_uxux_ttxuxux/CPPProcess.cc | 75 +- .../SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f | 38 +- .../SubProcesses/P2_uxux_ttxuxux/counters.cc | 99 +- .../SubProcesses/P2_uxux_ttxuxux/matrix1.f | 8 +- .../P2_uxux_ttxuxux/ompnumthreads.cc | 26 +- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 6 +- .../pp_tt012j.mad/SubProcesses/makefile | 4 +- .../SubProcesses/proc_characteristics | 2 +- .../cudacpp/pp_tt012j.mad/bin/generate_events | 22 +- .../pp_tt012j.mad/bin/internal/banner.py | 3 +- .../bin/internal/ufomodel/py3_model.pkl | Bin 42821 -> 42831 bytes epochX/cudacpp/pp_tt012j.mad/bin/madevent | 20 +- epochX/cudacpp/pp_tt012j.mad/mg5.in | 2 + epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h | 24 + .../ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt | 1026 ++-- 296 files changed, 13219 insertions(+), 10032 deletions(-) mode change 120000 => 100644 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc mode change 120000 => 100644 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc mode change 120000 => 100644 epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc mode change 120000 => 100644 epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc mode change 120000 => 100644 epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc mode change 120000 => 100644 epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc mode change 120000 => 100644 epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc mode change 120000 => 100644 epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc mode change 120000 => 100644 epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc mode change 120000 => 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 2d9aaf2a44..be5cee0fb8 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004520893096923828  +DEBUG: model prefixing takes 0.005257368087768555  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -161,68 +161,49 @@ Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_ee_mumu INFO: remove old information in CODEGEN_mad_ee_mumu -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  WARNING: vector code for lepton pdf not implemented. We removed the option to run dressed lepton  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.094 s +Wrote files for 8 helas calls in 0.097 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.174 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 3 routines in 0.197 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.217 s +ALOHA: aloha creates 7 routines in 0.251 s FFV1 FFV1 FFV2 @@ -231,24 +212,22 @@ ALOHA: aloha creates 7 routines in 0.217 s FFV4 FFV2_4 FFV2_4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -269,14 +248,16 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -300,40 +281,39 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). +Hunk #1 succeeded at 495 (offset 26 lines). patching file driver.f patching file matrix1.f Hunk #3 succeeded at 230 (offset 9 lines). Hunk #4 succeeded at 267 (offset 18 lines). Hunk #5 succeeded at 312 (offset 18 lines). -Hunk #6 succeeded at 410 (offset 14 lines). -Hunk #7 succeeded at 478 (offset 8 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README Run "open index.html" to see more information about this process. quit -real 0m2.433s -user 0m2.060s -sys 0m0.335s +real 0m2.494s +user 0m2.190s +sys 0m0.291s diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt index 5ca005676e..cdeedc7863 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc index 4f385d6435..738db319fd 100644 --- a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV2_3.o FFV2_0.o FFV4_0.o FFV4_3.o FFV1_0.o FFV1P0_3.o +ALOHARoutine = FFV1_0.o FFV4_3.o FFV1P0_3.o FFV2_0.o FFV4_0.o FFV2_3.o diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc index a6d90a2d1a..8d370a6b34 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc @@ -238,25 +238,18 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 2 *** // Wavefunction(s) for diagram number 1 -#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz -#else - if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz - else - oxxxxx( momenta, 0, cHel[ihel][0], -1, w_fp[0], 0 ); -#endif + oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); - ixzxxx( momenta, cHel[ihel][2], -1, w_fp[2], 2 ); + ixxxxx( momenta, 0., cHel[ihel][2], -1, w_fp[2], 2 ); - oxzxxx( momenta, cHel[ihel][3], +1, w_fp[3], 3 ); + oxxxxx( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 ); - FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[4] ); + FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,10 +259,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 2 *** // Wavefunction(s) for diagram number 2 - FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], COUPs[2], cIPD[0], cIPD[1], w_fp[4] ); + FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], 1.0, COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], COUPs[2], &_fp[0] ); + FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], 1.0, COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -786,13 +779,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f index b836e34865..31e7790d2d 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION EP1 DOUBLE PRECISION EM2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,15 +130,26 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - EP1=PDG2PDF(LPP(IB(1)),-11, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1) - $ ))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + EP1=PDG2PDF(LPP(IB(1)),-11, IB(1),XBK(IB(1)), QSCALE) IF (PDLABEL.EQ.'dressed') EP1_COMPONENTS(1:4) = $ EE_COMPONENTS(1:4) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - EM2=PDG2PDF(LPP(IB(2)),11, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + EM2=PDG2PDF(LPP(IB(2)),11, IB(2),XBK(IB(2)), QSCALE) IF (PDLABEL.EQ.'dressed') EM2_COMPONENTS(1:4) = $ EE_COMPONENTS(1:4) ENDIF diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc index 71fa817036..3bbdec9387 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2023 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. #include "timer.h" #define TIMERTYPE std::chrono::high_resolution_clock @@ -36,13 +36,10 @@ extern "C" static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer matrix1_timer; - static float matrix1_totaltime = 0; static mgOnGpu::Timer smatrix1_timer; static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; @@ -52,19 +49,6 @@ extern "C" return; } - void counters_matrix1_start_() - { - matrix1_counter++; - matrix1_timer.Start(); - return; - } - - void counters_matrix1_stop_() - { - matrix1_totaltime += matrix1_timer.GetDuration(); - return; - } - void counters_smatrix1_start_() { smatrix1_counter++; diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f index e00f0e1b64..21e300b33e 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f @@ -410,7 +410,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C ---------- C BEGIN CODE C ---------- - call counters_matrix1_start() IF (FIRST) THEN FIRST=.FALSE. IF(ZERO.NE.0D0) FK_ZERO = SIGN(MAX(ABS(ZERO), ABS(ZERO @@ -478,7 +477,6 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) ENDDO ENDDO - call counters_matrix1_stop() END SUBROUTINE PRINT_ZERO_AMP_1() diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/ee_mumu.mad/bin/generate_events b/epochX/cudacpp/ee_mumu.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/generate_events +++ b/epochX/cudacpp/ee_mumu.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/py3_model.pkl index dc38da0bfa76ea4206a3c5b2d34b98c606f7d044..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 21 dcmcb3j_Le4rVZZ97>g(SEK_5Qm|VK-4ghc73FH6( delta 23 fcmX?qj_Kk#rVZZ97)vMnEK_6l^>>+EyzCADfsqPs diff --git a/epochX/cudacpp/ee_mumu.mad/bin/madevent b/epochX/cudacpp/ee_mumu.mad/bin/madevent index c944aa1faf..10b6a71fa2 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/madevent +++ b/epochX/cudacpp/ee_mumu.mad/bin/madevent @@ -32,6 +32,7 @@ except ImportError: import os +pjoin = os.path.join import optparse # Get the directory of the script real path (bin) @@ -160,10 +161,23 @@ except: pass import internal.madevent_interface as cmd_interface +# check for plugin customization of the launch command +launch_interface = cmd_interface.MadEventCmdShell +if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + # Call the cmd interface main loop try: if '-h' in args or '--help' in args: - launch = ME.MadEventCmdShell(me_dir=os.path.dirname(root_path), force_run=True) + launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): @@ -178,7 +192,7 @@ try: cmd_line.run_cmd('import command ' + input_file) cmd_line.run_cmd('quit') else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) cmd_line.use_rawinput = False cmd_line.haspiping = False cmd_line.run_cmd('import command ' + input_file) @@ -188,7 +202,7 @@ try: if options.web: cmd_line = cmd_interface.MadEventCmd(force_run=True) else: - cmd_line = cmd_interface.MadEventCmdShell(force_run=True) + cmd_line = launch_interface(force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print( parser_error) diff --git a/epochX/cudacpp/ee_mumu.mad/mg5.in b/epochX/cudacpp/ee_mumu.mad/mg5.in index d868684019..12a2c58512 100644 --- a/epochX/cudacpp/ee_mumu.mad/mg5.in +++ b/epochX/cudacpp/ee_mumu.mad/mg5.in @@ -1,2 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate e+ e- > mu+ mu- output madevent ee_mumu.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h index 6a8781b113..19819e2451 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -909,6 +913,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -919,6 +924,7 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -932,7 +938,9 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -943,7 +951,9 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -957,6 +967,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -980,6 +991,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1011,6 +1023,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1034,6 +1047,7 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1067,6 +1081,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1093,6 +1108,7 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1129,7 +1145,9 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1144,7 +1162,7 @@ namespace mg5amcCpu constexpr fptype two( 2. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); const cxtype_sv TMP3 = ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ); - ( *vertex ) = ( -one ) * ( COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * COUP1 ) ); + ( *vertex ) = ( -one ) * ( Ccoeff2 * COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1 * COUP1 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1157,7 +1175,9 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1179,10 +1199,10 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); const cxtype_sv TMP4 = ( F1[4] * ( F2[2] * ( P3[0] - P3[3] ) - F2[3] * ( P3[1] + cI * P3[2] ) ) + F1[5] * ( F2[2] * ( -P3[1] + cI * P3[2] ) + F2[3] * ( P3[0] + P3[3] ) ) ); const cxtype_sv denom = one / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); - V3[2] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); - V3[3] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); - V3[4] = denom * cI * ( COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); - V3[5] = denom * ( two * cI ) * ( COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); + V3[2] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); + V3[3] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); + V3[4] = denom * cI * ( Ccoeff2 * COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + Ccoeff1 * COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); + V3[5] = denom * ( two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index e8795ee643..b13f728dee 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004621267318725586  +DEBUG: model prefixing takes 0.00538325309753418  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -160,49 +160,28 @@ output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_epem_mupmum.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. +Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.227 s +ALOHA: aloha creates 4 routines in 0.262 s FFV1 FFV1 FFV2 @@ -211,20 +190,17 @@ ALOHA: aloha creates 4 routines in 0.227 s FFV4 FFV2_4 FFV2_4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.627s -user 0m0.560s -sys 0m0.060s +real 0m0.651s +user 0m0.601s +sys 0m0.044s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc index 12a28d3f7a..11472d834e 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc @@ -238,25 +238,18 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 2 *** // Wavefunction(s) for diagram number 1 -#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz -#else - if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz - else - oxxxxx( momenta, 0, cHel[ihel][0], -1, w_fp[0], 0 ); -#endif + oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); - ixzxxx( momenta, cHel[ihel][2], -1, w_fp[2], 2 ); + ixxxxx( momenta, 0., cHel[ihel][2], -1, w_fp[2], 2 ); - oxzxxx( momenta, cHel[ihel][3], +1, w_fp[3], 3 ); + oxxxxx( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 ); - FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[4] ); + FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -265,10 +258,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 2 *** // Wavefunction(s) for diagram number 2 - FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], COUPs[2], cIPD[0], cIPD[1], w_fp[4] ); + FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], 1.0, COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], COUPs[2], &_fp[0] ); + FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], 1.0, COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -784,13 +777,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/ee_mumu.sa/mg5.in b/epochX/cudacpp/ee_mumu.sa/mg5.in index 5c48dc6ef9..dcdf875dea 100644 --- a/epochX/cudacpp/ee_mumu.sa/mg5.in +++ b/epochX/cudacpp/ee_mumu.sa/mg5.in @@ -1,3 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate e+ e- > mu+ mu- -output standalone_cudacpp ee_mumu.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp ee_mumu.sa diff --git a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h index 6a8781b113..19819e2451 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -909,6 +913,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -919,6 +924,7 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -932,7 +938,9 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -943,7 +951,9 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -957,6 +967,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -980,6 +991,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1011,6 +1023,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1034,6 +1047,7 @@ namespace mg5amcCpu FFV2_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1067,6 +1081,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1093,6 +1108,7 @@ namespace mg5amcCpu FFV4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1129,7 +1145,9 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1144,7 +1162,7 @@ namespace mg5amcCpu constexpr fptype two( 2. ); const cxtype_sv TMP1 = ( F1[2] * ( F2[4] * ( V3[2] + V3[5] ) + F2[5] * ( V3[3] + cI * V3[4] ) ) + F1[3] * ( F2[4] * ( V3[3] - cI * V3[4] ) + F2[5] * ( V3[2] - V3[5] ) ) ); const cxtype_sv TMP3 = ( F1[4] * ( F2[2] * ( V3[2] - V3[5] ) - F2[3] * ( V3[3] + cI * V3[4] ) ) + F1[5] * ( F2[2] * ( -V3[3] + cI * V3[4] ) + F2[3] * ( V3[2] + V3[5] ) ) ); - ( *vertex ) = ( -one ) * ( COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * COUP1 ) ); + ( *vertex ) = ( -one ) * ( Ccoeff2 * COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1 * COUP1 ) ); mgDebug( 1, __FUNCTION__ ); return; } @@ -1157,7 +1175,9 @@ namespace mg5amcCpu FFV2_4_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP1[], + const double Ccoeff1, const fptype allCOUP2[], + const double Ccoeff2, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1179,10 +1199,10 @@ namespace mg5amcCpu const cxtype_sv TMP2 = ( F1[2] * ( F2[4] * ( P3[0] + P3[3] ) + F2[5] * ( P3[1] + cI * P3[2] ) ) + F1[3] * ( F2[4] * ( P3[1] - cI * P3[2] ) + F2[5] * ( P3[0] - P3[3] ) ) ); const cxtype_sv TMP4 = ( F1[4] * ( F2[2] * ( P3[0] - P3[3] ) - F2[3] * ( P3[1] + cI * P3[2] ) ) + F1[5] * ( F2[2] * ( -P3[1] + cI * P3[2] ) + F2[3] * ( P3[0] + P3[3] ) ) ); const cxtype_sv denom = one / ( ( P3[0] * P3[0] ) - ( P3[1] * P3[1] ) - ( P3[2] * P3[2] ) - ( P3[3] * P3[3] ) - M3 * ( M3 - cI * W3 ) ); - V3[2] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); - V3[3] = denom * ( -two * cI ) * ( COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); - V3[4] = denom * cI * ( COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); - V3[5] = denom * ( two * cI ) * ( COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); + V3[2] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[0] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] + F1[3] * F2[5] ) + F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + F1[3] * F2[5] - P3[0] * OM3 * TMP2 ) ) ); + V3[3] = denom * ( -two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * -half * P3[1] * ( TMP2 + two * TMP4 ) + ( -half * ( F1[2] * F2[5] + F1[3] * F2[4] ) + F1[4] * F2[3] + F1[5] * F2[2] ) ) - half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[5] + F1[3] * F2[4] + P3[1] * OM3 * TMP2 ) ) ); + V3[4] = denom * cI * ( Ccoeff2 * COUP2 * ( OM3 * P3[2] * ( TMP2 + two * TMP4 ) + ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + ( -two * cI ) * ( F1[4] * F2[3] ) + ( two * cI ) * ( F1[5] * F2[2] ) ) ) + Ccoeff1 * COUP1 * ( +cI * ( F1[2] * F2[5] ) - cI * ( F1[3] * F2[4] ) + P3[2] * OM3 * TMP2 ) ); + V3[5] = denom * ( two * cI ) * ( Ccoeff2 * COUP2 * ( OM3 * half * P3[3] * ( TMP2 + two * TMP4 ) + ( +half * ( F1[2] * F2[4] ) - half * ( F1[3] * F2[5] ) - F1[4] * F2[2] + F1[5] * F2[3] ) ) + half * ( Ccoeff1 * COUP1 * ( F1[2] * F2[4] + P3[3] * OM3 * TMP2 - F1[3] * F2[5] ) ) ); mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index c777d7154a..b5c53c1161 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004714488983154297  +DEBUG: model prefixing takes 0.005456686019897461  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,100 +155,72 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.007 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gg_tt INFO: remove old information in CODEGEN_mad_gg_tt -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.104 s +Wrote files for 10 helas calls in 0.103 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.126 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 2 routines in 0.143 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.112 s +ALOHA: aloha creates 4 routines in 0.140 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -269,14 +241,15 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -300,34 +273,36 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README Run "open index.html" to see more information about this process. quit -real 0m2.341s -user 0m1.909s -sys 0m0.352s +real 0m2.913s +user 0m2.038s +sys 0m0.292s diff --git a/epochX/cudacpp/gg_tt.mad/Cards/ident_card.dat b/epochX/cudacpp/gg_tt.mad/Cards/ident_card.dat index b37758a42a..0ba87b008f 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/ident_card.dat +++ b/epochX/cudacpp/gg_tt.mad/Cards/ident_card.dat @@ -2,32 +2,32 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c written by the UFO converter ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc -mass 5 mdl_MB -mass 6 mdl_MT -mass 15 mdl_MTA -mass 23 mdl_MZ -mass 25 mdl_MH -sminputs 1 aEWM1 -sminputs 2 mdl_Gf -sminputs 3 aS -yukawa 5 mdl_ymb -yukawa 6 mdl_ymt -yukawa 15 mdl_ymtau -decay 6 mdl_WT -decay 23 mdl_WZ -decay 24 mdl_WW +decay 23 mdl_WZ +decay 24 mdl_WW decay 25 mdl_WH +decay 6 mdl_WT +mass 15 mdl_MTA +mass 23 mdl_MZ +mass 25 mdl_MH +mass 5 mdl_MB +mass 6 mdl_MT +sminputs 1 aEWM1 +sminputs 2 mdl_Gf +sminputs 3 aS +yukawa 15 mdl_ymtau +yukawa 5 mdl_ymb +yukawa 6 mdl_ymt diff --git a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt index 27acb12a1e..cdeedc7863 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat index fa1b56a168..2a2fd25453 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat @@ -35,15 +35,15 @@ set loop_color_flows False set max_npoint_for_channel 0 set default_unset_couplings 99 set max_t_for_channel 99 -set zerowidth_tchannel True set nlo_mixed_expansion True -import model sm +set stdout_level DEBUG +set zerowidth_tchannel F +generate g g > t t~ define p = g u c d s u~ c~ d~ s~ define j = g u c d s u~ c~ d~ s~ define l+ = e+ mu+ define l- = e- mu- define vl = ve vm vt define vl~ = ve~ vm~ vt~ -generate g g > t t~ -output madevent gg_tt.mad_gen --hel_recycling=False --vector_size=1638\ -4 --me_exporter=standalone_cudacpp +output madevent ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --ve\ +ctor_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc index 3a21194b00..5597c614b0 100644 --- a/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o +ALOHARoutine = FFV1_1.o FFV1_0.o FFV1_2.o VVV1P0_1.o diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 0afa202e07..02f655f48c 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -260,10 +260,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,13 +794,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index 3b24a9924c..0b493ae244 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index 85baf477c9..daea73a6df 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -416,10 +416,10 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1P0_1(W(1,1),W(1,2),GC_10(IVEC),ZERO, FK_ZERO,W(1,5)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,4),W(1,3),W(1,5),GC_11(IVEC),AMP(1)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,5)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,5)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,4),W(1,5),W(1,2),GC_11(IVEC),AMP(2)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,5)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,5)) C Amplitude(s) for diagram number 3 CALL FFV1_0(W(1,5),W(1,3),W(1,2),GC_11(IVEC),AMP(3)) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/gg_tt.mad/SubProcesses/proc_characteristics index 1e1cd93bfa..51cb12fb25 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/proc_characteristics +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/proc_characteristics @@ -8,7 +8,7 @@ ninitial = 2 grouped_matrix = True has_loops = False - bias_module = None + bias_module = dummy max_n_matched_jets = 0 colored_pdgs = [1, 2, 3, 4, 5, 6, 21] complex_mass_scheme = False diff --git a/epochX/cudacpp/gg_tt.mad/bin/generate_events b/epochX/cudacpp/gg_tt.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/generate_events +++ b/epochX/cudacpp/gg_tt.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/py3_model.pkl index afc2ca4e273b368050537e3f722b85c825bbf510..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 54 zcmX?lj_Le4rVZZ9G>RD*81z#TOA_@H%Mx=Ei;FY$-2+0642+ERa}!h2ixLYmOwtQV KBPM4qy8{3ztQ2Sf delta 44 zcmX?qj_K$*rVZZ9 t t~ output madevent gg_tt.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h index 94bf8aca52..07d0bfa887 100644 --- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h @@ -862,6 +862,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -875,6 +876,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -885,6 +887,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -897,6 +900,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -909,6 +913,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -947,6 +952,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -970,6 +976,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1001,6 +1008,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index cb7b25ef28..23c04c9100 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004621744155883789  +DEBUG: model prefixing takes 0.005602359771728516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,78 +155,47 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.007 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (3 diagrams) in 0.005 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.126 s +ALOHA: aloha creates 2 routines in 0.143 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.572s -user 0m0.500s -sys 0m0.057s +real 0m0.539s +user 0m0.490s +sys 0m0.045s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc index ea42fb3e96..141d1f24ac 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -259,10 +259,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -791,13 +791,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt.sa/mg5.in b/epochX/cudacpp/gg_tt.sa/mg5.in index a2f1230ada..8298656d17 100644 --- a/epochX/cudacpp/gg_tt.sa/mg5.in +++ b/epochX/cudacpp/gg_tt.sa/mg5.in @@ -1,3 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ -output standalone_cudacpp gg_tt.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp gg_tt.sa diff --git a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h index 94bf8aca52..07d0bfa887 100644 --- a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h @@ -862,6 +862,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -875,6 +876,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -885,6 +887,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -897,6 +900,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -909,6 +913,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -947,6 +952,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -970,6 +976,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1001,6 +1008,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 666f2f1d0b..b27d021202 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0046291351318359375  +DEBUG: model prefixing takes 0.005415201187133789  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,132 +155,84 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.007 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.018 s +1 processes with 16 diagrams generated in 0.019 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gg_tt01g INFO: remove old information in CODEGEN_mad_gg_tt01g -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @2 INFO: Processing color information for process: g g > t t~ g @2 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.039 s -Wrote files for 46 helas calls in 0.247 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.041 s +Wrote files for 46 helas calls in 0.238 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.276 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 5 routines in 0.321 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.263 s +ALOHA: aloha creates 10 routines in 0.305 s VVV1 VVV1 FFV1 @@ -290,27 +242,22 @@ ALOHA: aloha creates 10 routines in 0.263 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -331,14 +278,15 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -362,44 +310,45 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -Hunk #6 succeeded at 434 (offset 38 lines). -Hunk #7 succeeded at 588 (offset 118 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README Run "open index.html" to see more information about this process. quit -real 0m2.847s -user 0m2.424s -sys 0m0.365s +real 0m2.906s +user 0m2.575s +sys 0m0.318s diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/ident_card.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/ident_card.dat index b37758a42a..0ba87b008f 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/ident_card.dat +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/ident_card.dat @@ -2,32 +2,32 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c written by the UFO converter ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc -mass 5 mdl_MB -mass 6 mdl_MT -mass 15 mdl_MTA -mass 23 mdl_MZ -mass 25 mdl_MH -sminputs 1 aEWM1 -sminputs 2 mdl_Gf -sminputs 3 aS -yukawa 5 mdl_ymb -yukawa 6 mdl_ymt -yukawa 15 mdl_ymtau -decay 6 mdl_WT -decay 23 mdl_WZ -decay 24 mdl_WW +decay 23 mdl_WZ +decay 24 mdl_WW decay 25 mdl_WH +decay 6 mdl_WT +mass 15 mdl_MTA +mass 23 mdl_MZ +mass 25 mdl_MH +mass 5 mdl_MB +mass 6 mdl_MT +sminputs 1 aEWM1 +sminputs 2 mdl_Gf +sminputs 3 aS +yukawa 15 mdl_ymtau +yukawa 5 mdl_ymb +yukawa 6 mdl_ymt diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt index 27acb12a1e..cdeedc7863 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat index 766af51c8a..cdb64729b1 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat @@ -35,16 +35,16 @@ set loop_color_flows False set max_npoint_for_channel 0 set default_unset_couplings 99 set max_t_for_channel 99 -set zerowidth_tchannel True set nlo_mixed_expansion True -import model sm +set stdout_level DEBUG +set zerowidth_tchannel F +generate g g > t t~ define p = g u c d s u~ c~ d~ s~ define j = g u c d s u~ c~ d~ s~ define l+ = e+ mu+ define l- = e- mu- define vl = ve vm vt define vl~ = ve~ vm~ vt~ -generate g g > t t~ add process g g > t t~ g -output madevent gg_tt01g.mad_gen --hel_recycling=False --vector_size=1\ -6384 --me_exporter=standalone_cudacpp +output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False -\ +-vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc index 7639734c1c..50c12b0804 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o VVVV1P0_1.o VVVV3P0_1.o VVVV4P0_1.o +ALOHARoutine = FFV1_1.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 0afa202e07..02f655f48c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -260,10 +260,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,13 +794,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index 3b24a9924c..0b493ae244 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f index 85baf477c9..daea73a6df 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -416,10 +416,10 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1P0_1(W(1,1),W(1,2),GC_10(IVEC),ZERO, FK_ZERO,W(1,5)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,4),W(1,3),W(1,5),GC_11(IVEC),AMP(1)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,5)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,5)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,4),W(1,5),W(1,2),GC_11(IVEC),AMP(2)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,5)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,5)) C Amplitude(s) for diagram number 3 CALL FFV1_0(W(1,5),W(1,3),W(1,2),GC_11(IVEC),AMP(3)) diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc index 9dfd471c50..ce1badffca 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -265,10 +265,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -293,11 +293,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -307,10 +307,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -324,7 +324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -334,11 +334,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,7 +351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,7 +365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -375,10 +375,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -392,7 +392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -406,7 +406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -435,7 +435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -448,7 +448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -461,22 +461,22 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -1015,13 +1015,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f index 071034763a..68e664f70c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f index 4a9d848b70..eb85b7ebb0 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f @@ -462,8 +462,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_2(W(1,4),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 3 CALL FFV1_0(W(1,9),W(1,3),W(1,6),GC_11(IVEC),AMP(3)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) - CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) + CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) C Amplitude(s) for diagram number 4 CALL FFV1_0(W(1,10),W(1,6),W(1,5),GC_11(IVEC),AMP(4)) CALL VVV1P0_1(W(1,2),W(1,5),GC_10(IVEC),ZERO, FK_ZERO,W(1,11)) @@ -471,8 +471,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,6),W(1,11),GC_11(IVEC),AMP(5)) C Amplitude(s) for diagram number 6 CALL FFV1_0(W(1,9),W(1,6),W(1,2),GC_11(IVEC),AMP(6)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) - CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,12)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) + CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,12)) C Amplitude(s) for diagram number 7 CALL FFV1_0(W(1,6),W(1,12),W(1,5),GC_11(IVEC),AMP(7)) C Amplitude(s) for diagram number 8 diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/proc_characteristics index 7b4b4a0dab..6711fb7544 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/proc_characteristics +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/proc_characteristics @@ -8,7 +8,7 @@ ninitial = 2 grouped_matrix = True has_loops = False - bias_module = None + bias_module = dummy max_n_matched_jets = 1 colored_pdgs = [1, 2, 3, 4, 5, 6, 21] complex_mass_scheme = False diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/generate_events b/epochX/cudacpp/gg_tt01g.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/generate_events +++ b/epochX/cudacpp/gg_tt01g.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/py3_model.pkl index afc2ca4e273b368050537e3f722b85c825bbf510..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 54 zcmX?lj_Le4rVZZ9G>RD*81z#TOA_@H%Mx=Ei;FY$-2+0642+ERa}!h2ixLYmOwtQV KBPM4qy8{3ztQ2Sf delta 44 zcmX?qj_K$*rVZZ9 t t~ add process g g > t t~ g output madevent gg_tt01g.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - diff --git a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h index 4a326fae62..8995b15c82 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -933,6 +939,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -946,6 +953,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -972,6 +981,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1006,6 +1016,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1044,6 +1055,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1067,6 +1079,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1098,6 +1111,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1129,6 +1143,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1160,6 +1175,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1194,6 +1210,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1228,6 +1245,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index eea422eba1..d367fef872 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004817008972167969  +DEBUG: model prefixing takes 0.005761861801147461  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,83 +155,58 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gg_ttg INFO: remove old information in CODEGEN_mad_gg_ttg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.035 s -Wrote files for 36 helas calls in 0.153 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +Wrote files for 36 helas calls in 0.146 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.277 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 5 routines in 0.323 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.263 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -241,27 +216,22 @@ ALOHA: aloha creates 10 routines in 0.263 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -282,14 +252,15 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -313,40 +284,40 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -Hunk #6 succeeded at 434 (offset 38 lines). -Hunk #7 succeeded at 588 (offset 118 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README Run "open index.html" to see more information about this process. quit -real 0m2.802s -user 0m2.323s -sys 0m0.356s +real 0m2.947s +user 0m2.470s +sys 0m0.321s diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/ident_card.dat b/epochX/cudacpp/gg_ttg.mad/Cards/ident_card.dat index b37758a42a..0ba87b008f 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/ident_card.dat +++ b/epochX/cudacpp/gg_ttg.mad/Cards/ident_card.dat @@ -2,32 +2,32 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c written by the UFO converter ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc -mass 5 mdl_MB -mass 6 mdl_MT -mass 15 mdl_MTA -mass 23 mdl_MZ -mass 25 mdl_MH -sminputs 1 aEWM1 -sminputs 2 mdl_Gf -sminputs 3 aS -yukawa 5 mdl_ymb -yukawa 6 mdl_ymt -yukawa 15 mdl_ymtau -decay 6 mdl_WT -decay 23 mdl_WZ -decay 24 mdl_WW +decay 23 mdl_WZ +decay 24 mdl_WW decay 25 mdl_WH +decay 6 mdl_WT +mass 15 mdl_MTA +mass 23 mdl_MZ +mass 25 mdl_MH +mass 5 mdl_MB +mass 6 mdl_MT +sminputs 1 aEWM1 +sminputs 2 mdl_Gf +sminputs 3 aS +yukawa 15 mdl_ymtau +yukawa 5 mdl_ymb +yukawa 6 mdl_ymt diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt index 27acb12a1e..cdeedc7863 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat index e47a896364..3af4991f01 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat @@ -35,15 +35,15 @@ set loop_color_flows False set max_npoint_for_channel 0 set default_unset_couplings 99 set max_t_for_channel 99 -set zerowidth_tchannel True set nlo_mixed_expansion True -import model sm +set stdout_level DEBUG +set zerowidth_tchannel F +generate g g > t t~ g define p = g u c d s u~ c~ d~ s~ define j = g u c d s u~ c~ d~ s~ define l+ = e+ mu+ define l- = e- mu- define vl = ve vm vt define vl~ = ve~ vm~ vt~ -generate g g > t t~ g -output madevent gg_ttg.mad_gen --hel_recycling=False --vector_size=163\ -84 --me_exporter=standalone_cudacpp +output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --v\ +ector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc index 7639734c1c..50c12b0804 100644 --- a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o VVVV1P0_1.o VVVV3P0_1.o VVVV4P0_1.o +ALOHARoutine = FFV1_1.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 8cc007dff8..f7f5899260 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -265,10 +265,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -293,11 +293,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -307,10 +307,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -324,7 +324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -334,11 +334,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,7 +351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,7 +365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -375,10 +375,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -392,7 +392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -406,7 +406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -435,7 +435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -448,7 +448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -461,22 +461,22 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -1015,13 +1015,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index 668cc26192..b8615bc68f 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 7188daef76..fc924825c2 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -462,8 +462,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_2(W(1,4),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 3 CALL FFV1_0(W(1,9),W(1,3),W(1,6),GC_11(IVEC),AMP(3)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) - CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) + CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) C Amplitude(s) for diagram number 4 CALL FFV1_0(W(1,10),W(1,6),W(1,5),GC_11(IVEC),AMP(4)) CALL VVV1P0_1(W(1,2),W(1,5),GC_10(IVEC),ZERO, FK_ZERO,W(1,11)) @@ -471,8 +471,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,6),W(1,11),GC_11(IVEC),AMP(5)) C Amplitude(s) for diagram number 6 CALL FFV1_0(W(1,9),W(1,6),W(1,2),GC_11(IVEC),AMP(6)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) - CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,12)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) + CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,12)) C Amplitude(s) for diagram number 7 CALL FFV1_0(W(1,6),W(1,12),W(1,5),GC_11(IVEC),AMP(7)) C Amplitude(s) for diagram number 8 diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/gg_ttg.mad/SubProcesses/proc_characteristics index 94bf45fc5a..119c7424dc 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/proc_characteristics +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/proc_characteristics @@ -8,7 +8,7 @@ ninitial = 2 grouped_matrix = True has_loops = False - bias_module = None + bias_module = dummy max_n_matched_jets = 1 colored_pdgs = [1, 2, 3, 4, 5, 6, 21] complex_mass_scheme = False diff --git a/epochX/cudacpp/gg_ttg.mad/bin/generate_events b/epochX/cudacpp/gg_ttg.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/generate_events +++ b/epochX/cudacpp/gg_ttg.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/py3_model.pkl index afc2ca4e273b368050537e3f722b85c825bbf510..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 54 zcmX?lj_Le4rVZZ9G>RD*81z#TOA_@H%Mx=Ei;FY$-2+0642+ERa}!h2ixLYmOwtQV KBPM4qy8{3ztQ2Sf delta 44 zcmX?qj_K$*rVZZ9 t t~ g output madevent gg_ttg.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h index 4a326fae62..8995b15c82 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -933,6 +939,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -946,6 +953,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -972,6 +981,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1006,6 +1016,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1044,6 +1055,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1067,6 +1079,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1098,6 +1111,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1129,6 +1143,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1160,6 +1175,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1194,6 +1210,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1228,6 +1245,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index c23adaa32c..20d22ac1c4 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005018949508666992  +DEBUG: model prefixing takes 0.005301237106323242  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,62 +155,35 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.020 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.034 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.278 s +ALOHA: aloha creates 5 routines in 0.321 s VVV1 VVV1 FFV1 @@ -220,23 +193,17 @@ ALOHA: aloha creates 5 routines in 0.278 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.847s -user 0m0.711s -sys 0m0.052s +real 0m0.792s +user 0m0.717s +sys 0m0.059s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc index 6f71af24b1..9393033e26 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -264,10 +264,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -277,10 +277,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -290,11 +290,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -319,7 +319,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -328,11 +328,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -344,7 +344,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -357,7 +357,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -366,10 +366,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -382,7 +382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -395,7 +395,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -410,7 +410,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -434,7 +434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -446,12 +446,12 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -459,7 +459,7 @@ namespace mg5amcCpu jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -467,7 +467,7 @@ namespace mg5amcCpu jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1009,13 +1009,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttg.sa/mg5.in b/epochX/cudacpp/gg_ttg.sa/mg5.in index 9fd2517e11..fbf08862ec 100644 --- a/epochX/cudacpp/gg_ttg.sa/mg5.in +++ b/epochX/cudacpp/gg_ttg.sa/mg5.in @@ -1,3 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ g -output standalone_cudacpp gg_ttg.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp gg_ttg.sa diff --git a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h index 4a326fae62..8995b15c82 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -933,6 +939,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -946,6 +953,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -972,6 +981,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1006,6 +1016,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1044,6 +1055,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1067,6 +1079,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1098,6 +1111,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1129,6 +1143,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1160,6 +1175,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1194,6 +1210,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1228,6 +1245,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 0dfbe85bbc..745da9d88c 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004771232604980469  +DEBUG: model prefixing takes 0.005432605743408203  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,85 +155,58 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.145 s +1 processes with 123 diagrams generated in 0.157 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gg_ttgg INFO: remove old information in CODEGEN_mad_gg_ttgg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.385 s -Wrote files for 222 helas calls in 0.655 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.423 s +Wrote files for 222 helas calls in 0.710 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.285 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 5 routines in 0.325 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.268 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -246,27 +219,22 @@ ALOHA: aloha creates 10 routines in 0.268 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -287,14 +255,15 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -318,40 +287,40 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 191 (offset 48 lines). Hunk #3 succeeded at 269 (offset 48 lines). Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). -Hunk #6 succeeded at 830 (offset 434 lines). -Hunk #7 succeeded at 1717 (offset 1247 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README Run "open index.html" to see more information about this process. quit -real 0m3.946s -user 0m3.315s -sys 0m0.356s +real 0m3.903s +user 0m3.552s +sys 0m0.335s diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/ident_card.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/ident_card.dat index b37758a42a..0ba87b008f 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/ident_card.dat +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/ident_card.dat @@ -2,32 +2,32 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c written by the UFO converter ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc -mass 5 mdl_MB -mass 6 mdl_MT -mass 15 mdl_MTA -mass 23 mdl_MZ -mass 25 mdl_MH -sminputs 1 aEWM1 -sminputs 2 mdl_Gf -sminputs 3 aS -yukawa 5 mdl_ymb -yukawa 6 mdl_ymt -yukawa 15 mdl_ymtau -decay 6 mdl_WT -decay 23 mdl_WZ -decay 24 mdl_WW +decay 23 mdl_WZ +decay 24 mdl_WW decay 25 mdl_WH +decay 6 mdl_WT +mass 15 mdl_MTA +mass 23 mdl_MZ +mass 25 mdl_MH +mass 5 mdl_MB +mass 6 mdl_MT +sminputs 1 aEWM1 +sminputs 2 mdl_Gf +sminputs 3 aS +yukawa 15 mdl_ymtau +yukawa 5 mdl_ymb +yukawa 6 mdl_ymt diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt index 27acb12a1e..cdeedc7863 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat index ba87549835..e4d3fe550f 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat @@ -35,15 +35,15 @@ set loop_color_flows False set max_npoint_for_channel 0 set default_unset_couplings 99 set max_t_for_channel 99 -set zerowidth_tchannel True set nlo_mixed_expansion True -import model sm +set stdout_level DEBUG +set zerowidth_tchannel F +generate g g > t t~ g g define p = g u c d s u~ c~ d~ s~ define j = g u c d s u~ c~ d~ s~ define l+ = e+ mu+ define l- = e- mu- define vl = ve vm vt define vl~ = ve~ vm~ vt~ -generate g g > t t~ g g -output madevent gg_ttgg.mad_gen --hel_recycling=False --vector_size=16\ -384 --me_exporter=standalone_cudacpp +output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --\ +vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc index fa0f3d86f5..ec923afd6d 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o VVVV1P0_1.o VVVV1_0.o VVVV3P0_1.o VVVV3_0.o VVVV4P0_1.o VVVV4_0.o +ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc index 442d769ae3..896d64343e 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc @@ -250,11 +250,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 1 - VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -263,7 +263,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -272,7 +272,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -285,10 +285,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 123 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -305,10 +305,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 123 *** // Wavefunction(s) for diagram number 3 - VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 123 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -345,11 +345,11 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 123 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -363,7 +363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -376,10 +376,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 123 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -390,10 +390,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 123 *** // Wavefunction(s) for diagram number 8 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,7 +407,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -420,10 +420,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 123 *** // Wavefunction(s) for diagram number 10 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -434,10 +434,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 123 *** // Wavefunction(s) for diagram number 11 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -451,7 +451,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -481,7 +481,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -497,7 +497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -513,7 +513,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -526,12 +526,12 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 123 *** // Wavefunction(s) for diagram number 17 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -541,10 +541,10 @@ namespace mg5amcCpu // *** DIAGRAM 18 OF 123 *** // Wavefunction(s) for diagram number 18 - FFV1_1( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -557,7 +557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -568,11 +568,11 @@ namespace mg5amcCpu // *** DIAGRAM 20 OF 123 *** // Wavefunction(s) for diagram number 20 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -588,7 +588,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -602,7 +602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -613,10 +613,10 @@ namespace mg5amcCpu // *** DIAGRAM 23 OF 123 *** // Wavefunction(s) for diagram number 23 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[18] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] ); // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -632,7 +632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -646,7 +646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -657,10 +657,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 123 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[19] ); + FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -673,7 +673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -686,7 +686,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -699,7 +699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -712,7 +712,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -726,7 +726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -739,22 +739,22 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 123 *** // Wavefunction(s) for diagram number 32 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[8] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -763,12 +763,12 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 123 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -778,10 +778,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 123 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,7 +794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -805,10 +805,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 123 *** // Wavefunction(s) for diagram number 36 - FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 0., 0., w_fp[22] ); + FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -824,7 +824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -838,7 +838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -852,7 +852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -868,7 +868,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -882,7 +882,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -893,10 +893,10 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 123 *** // Wavefunction(s) for diagram number 42 - FFV1_2( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -909,7 +909,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -922,7 +922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 44 - FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -935,7 +935,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -948,7 +948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -962,7 +962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -978,17 +978,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -997,11 +997,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 123 *** // Wavefunction(s) for diagram number 49 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[12] ); - FFV1_2( w_fp[3], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] ); + FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1012,10 +1012,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 123 *** // Wavefunction(s) for diagram number 50 - VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1031,7 +1031,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1042,10 +1042,10 @@ namespace mg5amcCpu // *** DIAGRAM 52 OF 123 *** // Wavefunction(s) for diagram number 52 - FFV1_1( w_fp[2], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1059,7 +1059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1075,7 +1075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1089,7 +1089,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1105,7 +1105,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1121,7 +1121,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1141,7 +1141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 58 - VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1150,7 +1150,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1159,7 +1159,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1172,10 +1172,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 123 *** // Wavefunction(s) for diagram number 59 - VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 59 - VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1195,7 +1195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1215,7 +1215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1231,7 +1231,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1245,7 +1245,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1261,7 +1261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1272,11 +1272,11 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 123 *** // Wavefunction(s) for diagram number 65 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1287,10 +1287,10 @@ namespace mg5amcCpu // *** DIAGRAM 66 OF 123 *** // Wavefunction(s) for diagram number 66 - VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1306,7 +1306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1317,10 +1317,10 @@ namespace mg5amcCpu // *** DIAGRAM 68 OF 123 *** // Wavefunction(s) for diagram number 68 - FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1334,7 +1334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1350,7 +1350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1364,7 +1364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1380,7 +1380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1396,7 +1396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1416,7 +1416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 74 - VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1425,7 +1425,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1434,7 +1434,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1447,10 +1447,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 123 *** // Wavefunction(s) for diagram number 75 - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 75 - VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1470,7 +1470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1490,7 +1490,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1506,7 +1506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1520,7 +1520,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1536,7 +1536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 80 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1547,10 +1547,10 @@ namespace mg5amcCpu // *** DIAGRAM 81 OF 123 *** // Wavefunction(s) for diagram number 81 - FFV1_1( w_fp[9], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1560,10 +1560,10 @@ namespace mg5amcCpu // *** DIAGRAM 82 OF 123 *** // Wavefunction(s) for diagram number 82 - FFV1_2( w_fp[15], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1576,7 +1576,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1586,10 +1586,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 123 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1602,7 +1602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1613,10 +1613,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 123 *** // Wavefunction(s) for diagram number 86 - VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1629,10 +1629,10 @@ namespace mg5amcCpu // *** DIAGRAM 87 OF 123 *** // Wavefunction(s) for diagram number 87 - FFV1_2( w_fp[16], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1642,10 +1642,10 @@ namespace mg5amcCpu // *** DIAGRAM 88 OF 123 *** // Wavefunction(s) for diagram number 88 - FFV1_1( w_fp[11], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 88 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1658,7 +1658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1668,10 +1668,10 @@ namespace mg5amcCpu // *** DIAGRAM 90 OF 123 *** // Wavefunction(s) for diagram number 90 - FFV1_1( w_fp[14], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); + FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 90 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1684,7 +1684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1698,7 +1698,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1714,7 +1714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1723,7 +1723,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1732,7 +1732,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1745,10 +1745,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 123 *** // Wavefunction(s) for diagram number 94 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 94 - VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1765,10 +1765,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 123 *** // Wavefunction(s) for diagram number 95 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 95 - VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1788,7 +1788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 96 - FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1804,7 +1804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1818,7 +1818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1834,7 +1834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1848,7 +1848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1857,7 +1857,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1866,7 +1866,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1879,10 +1879,10 @@ namespace mg5amcCpu // *** DIAGRAM 101 OF 123 *** // Wavefunction(s) for diagram number 101 - VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 101 - VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1902,7 +1902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1922,7 +1922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1938,7 +1938,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1952,7 +1952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1968,7 +1968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1982,7 +1982,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1991,7 +1991,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2000,7 +2000,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2016,7 +2016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2036,7 +2036,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2056,7 +2056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2069,7 +2069,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2082,7 +2082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2095,7 +2095,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2105,12 +2105,12 @@ namespace mg5amcCpu // *** DIAGRAM 114 OF 123 *** // Wavefunction(s) for diagram number 114 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[12] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 114 - VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2119,7 +2119,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2128,7 +2128,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2144,17 +2144,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -2166,17 +2166,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -2185,12 +2185,12 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 123 *** // Wavefunction(s) for diagram number 117 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 117 - VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2199,7 +2199,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2208,7 +2208,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2224,17 +2224,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -2246,17 +2246,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -2265,22 +2265,22 @@ namespace mg5amcCpu // *** DIAGRAM 120 OF 123 *** // Wavefunction(s) for diagram number 120 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -2292,17 +2292,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -2314,7 +2314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2323,7 +2323,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2332,7 +2332,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2348,7 +2348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2357,7 +2357,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2366,7 +2366,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2961,13 +2961,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f index d12d34daf6..0fa6436690 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f index cdf77037f6..77f5152327 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f @@ -894,12 +894,12 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,17),W(1,11),GC_11(IVEC),AMP(17)) C Amplitude(s) for diagram number 16 CALL FFV1_0(W(1,13),W(1,3),W(1,11),GC_11(IVEC),AMP(18)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,13)) - CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,17)) - CALL FFV1_1(W(1,13),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,13)) + CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,17)) + CALL FFV1_1(W(1,13),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 17 CALL FFV1_0(W(1,17),W(1,9),W(1,6),GC_11(IVEC),AMP(19)) - CALL FFV1_1(W(1,13),W(1,6),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_1(W(1,13),W(1,6),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) C Amplitude(s) for diagram number 18 CALL FFV1_0(W(1,17),W(1,10),W(1,5),GC_11(IVEC),AMP(20)) C Amplitude(s) for diagram number 19 @@ -942,12 +942,12 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,13),W(1,18),GC_11(IVEC),AMP(34)) CALL FFV1_0(W(1,4),W(1,13),W(1,20),GC_11(IVEC),AMP(35)) CALL FFV1_0(W(1,4),W(1,13),W(1,9),GC_11(IVEC),AMP(36)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,13)) - CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) - CALL FFV1_2(W(1,13),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,21)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,13)) + CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) + CALL FFV1_2(W(1,13),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,21)) C Amplitude(s) for diagram number 33 CALL FFV1_0(W(1,21),W(1,10),W(1,6),GC_11(IVEC),AMP(37)) - CALL FFV1_2(W(1,13),W(1,6),GC_11(IVEC),MDL_MT, ZERO,W(1,22)) + CALL FFV1_2(W(1,13),W(1,6),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,22)) C Amplitude(s) for diagram number 34 CALL FFV1_0(W(1,22),W(1,10),W(1,5),GC_11(IVEC),AMP(38)) C Amplitude(s) for diagram number 35 @@ -983,7 +983,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,13),W(1,3),W(1,20),GC_11(IVEC),AMP(53)) CALL FFV1_0(W(1,13),W(1,3),W(1,9),GC_11(IVEC),AMP(54)) CALL VVV1P0_1(W(1,1),W(1,5),GC_10(IVEC),ZERO, FK_ZERO,W(1,13)) - CALL FFV1_2(W(1,4),W(1,13),GC_11(IVEC),MDL_MT, ZERO,W(1,23)) + CALL FFV1_2(W(1,4),W(1,13),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,23)) C Amplitude(s) for diagram number 49 CALL FFV1_0(W(1,23),W(1,10),W(1,6),GC_11(IVEC),AMP(55)) CALL VVV1P0_1(W(1,13),W(1,6),GC_10(IVEC),ZERO, FK_ZERO,W(1,24)) @@ -991,7 +991,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,10),W(1,24),GC_11(IVEC),AMP(56)) C Amplitude(s) for diagram number 51 CALL FFV1_0(W(1,14),W(1,10),W(1,13),GC_11(IVEC),AMP(57)) - CALL FFV1_1(W(1,3),W(1,13),GC_11(IVEC),MDL_MT, ZERO,W(1,21)) + CALL FFV1_1(W(1,3),W(1,13),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,21)) C Amplitude(s) for diagram number 52 CALL FFV1_0(W(1,17),W(1,21),W(1,6),GC_11(IVEC),AMP(58)) C Amplitude(s) for diagram number 53 @@ -1022,7 +1022,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C Amplitude(s) for diagram number 64 CALL FFV1_0(W(1,14),W(1,21),W(1,2),GC_11(IVEC),AMP(72)) CALL VVV1P0_1(W(1,1),W(1,6),GC_10(IVEC),ZERO, FK_ZERO,W(1,21)) - CALL FFV1_2(W(1,4),W(1,21),GC_11(IVEC),MDL_MT, ZERO,W(1,22)) + CALL FFV1_2(W(1,4),W(1,21),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,22)) C Amplitude(s) for diagram number 65 CALL FFV1_0(W(1,22),W(1,10),W(1,5),GC_11(IVEC),AMP(73)) CALL VVV1P0_1(W(1,21),W(1,5),GC_10(IVEC),ZERO, FK_ZERO,W(1,23)) @@ -1030,7 +1030,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,10),W(1,23),GC_11(IVEC),AMP(74)) C Amplitude(s) for diagram number 67 CALL FFV1_0(W(1,16),W(1,10),W(1,21),GC_11(IVEC),AMP(75)) - CALL FFV1_1(W(1,3),W(1,21),GC_11(IVEC),MDL_MT, ZERO,W(1,24)) + CALL FFV1_1(W(1,3),W(1,21),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,24)) C Amplitude(s) for diagram number 68 CALL FFV1_0(W(1,17),W(1,24),W(1,5),GC_11(IVEC),AMP(76)) C Amplitude(s) for diagram number 69 @@ -1063,12 +1063,12 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_1(W(1,10),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,24)) C Amplitude(s) for diagram number 81 CALL FFV1_0(W(1,16),W(1,24),W(1,6),GC_11(IVEC),AMP(91)) - CALL FFV1_2(W(1,16),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,13)) + CALL FFV1_2(W(1,16),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,13)) C Amplitude(s) for diagram number 82 CALL FFV1_0(W(1,13),W(1,10),W(1,6),GC_11(IVEC),AMP(92)) C Amplitude(s) for diagram number 83 CALL FFV1_0(W(1,14),W(1,24),W(1,5),GC_11(IVEC),AMP(93)) - CALL FFV1_2(W(1,14),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,22)) + CALL FFV1_2(W(1,14),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,22)) C Amplitude(s) for diagram number 84 CALL FFV1_0(W(1,22),W(1,10),W(1,5),GC_11(IVEC),AMP(94)) C Amplitude(s) for diagram number 85 @@ -1079,12 +1079,12 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_2(W(1,17),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,23)) C Amplitude(s) for diagram number 87 CALL FFV1_0(W(1,23),W(1,12),W(1,6),GC_11(IVEC),AMP(97)) - CALL FFV1_1(W(1,12),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,21)) + CALL FFV1_1(W(1,12),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,21)) C Amplitude(s) for diagram number 88 CALL FFV1_0(W(1,17),W(1,21),W(1,6),GC_11(IVEC),AMP(98)) C Amplitude(s) for diagram number 89 CALL FFV1_0(W(1,23),W(1,15),W(1,5),GC_11(IVEC),AMP(99)) - CALL FFV1_1(W(1,15),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,25)) + CALL FFV1_1(W(1,15),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,25)) C Amplitude(s) for diagram number 90 CALL FFV1_0(W(1,17),W(1,25),W(1,5),GC_11(IVEC),AMP(100)) C Amplitude(s) for diagram number 91 diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/proc_characteristics index 9e078c54a2..61b8ce0c6c 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/proc_characteristics +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/proc_characteristics @@ -8,7 +8,7 @@ ninitial = 2 grouped_matrix = True has_loops = False - bias_module = None + bias_module = dummy max_n_matched_jets = 2 colored_pdgs = [1, 2, 3, 4, 5, 6, 21] complex_mass_scheme = False diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/generate_events b/epochX/cudacpp/gg_ttgg.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/generate_events +++ b/epochX/cudacpp/gg_ttgg.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/py3_model.pkl index afc2ca4e273b368050537e3f722b85c825bbf510..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 54 zcmX?lj_Le4rVZZ9G>RD*81z#TOA_@H%Mx=Ei;FY$-2+0642+ERa}!h2ixLYmOwtQV KBPM4qy8{3ztQ2Sf delta 44 zcmX?qj_K$*rVZZ9 t t~ g g output madevent gg_ttgg.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h index 9cea8bcbe7..9b946c21e1 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -934,6 +940,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -945,6 +952,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -970,6 +979,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -984,6 +994,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -995,6 +1006,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1008,6 +1020,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1042,6 +1055,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1080,6 +1094,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1103,6 +1118,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1134,6 +1150,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1165,6 +1182,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1197,6 +1215,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1225,6 +1244,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1260,6 +1280,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1288,6 +1309,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1323,6 +1345,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1351,6 +1374,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index fb29a354ab..373a89a800 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0045855045318603516  +DEBUG: model prefixing takes 0.005511283874511719  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,64 +155,35 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.142 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.382 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. +Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.601 s +ALOHA: aloha creates 5 routines in 0.315 s VVV1 VVV1 FFV1 @@ -225,23 +196,17 @@ ALOHA: aloha creates 5 routines in 0.601 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.922s -user 0m1.324s -sys 0m0.063s +real 0m1.438s +user 0m1.366s +sys 0m0.060s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc index 25f123c774..927a19a802 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc @@ -250,11 +250,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 1 - VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -266,7 +266,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -278,7 +278,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -294,10 +294,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 123 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -313,10 +313,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 123 *** // Wavefunction(s) for diagram number 3 - VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -332,10 +332,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 123 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -351,11 +351,11 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 123 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -368,7 +368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -380,10 +380,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 123 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -393,10 +393,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 123 *** // Wavefunction(s) for diagram number 8 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -409,7 +409,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -421,10 +421,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 123 *** // Wavefunction(s) for diagram number 10 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -434,10 +434,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 123 *** // Wavefunction(s) for diagram number 11 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -450,7 +450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -465,7 +465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -478,7 +478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -493,7 +493,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -508,7 +508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -520,12 +520,12 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 123 *** // Wavefunction(s) for diagram number 17 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -534,10 +534,10 @@ namespace mg5amcCpu // *** DIAGRAM 18 OF 123 *** // Wavefunction(s) for diagram number 18 - FFV1_1( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -549,7 +549,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -559,11 +559,11 @@ namespace mg5amcCpu // *** DIAGRAM 20 OF 123 *** // Wavefunction(s) for diagram number 20 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -578,7 +578,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -591,7 +591,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -601,10 +601,10 @@ namespace mg5amcCpu // *** DIAGRAM 23 OF 123 *** // Wavefunction(s) for diagram number 23 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[18] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] ); // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -619,7 +619,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -632,7 +632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -642,10 +642,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 123 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[19] ); + FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -657,7 +657,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -669,7 +669,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -681,7 +681,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -693,7 +693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -706,7 +706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -718,12 +718,12 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 123 *** // Wavefunction(s) for diagram number 32 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[8] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -731,7 +731,7 @@ namespace mg5amcCpu jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -739,7 +739,7 @@ namespace mg5amcCpu jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -751,12 +751,12 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 123 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -765,10 +765,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 123 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -780,7 +780,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -790,10 +790,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 123 *** // Wavefunction(s) for diagram number 36 - FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 0., 0., w_fp[22] ); + FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -808,7 +808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -821,7 +821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -834,7 +834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -849,7 +849,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -862,7 +862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -872,10 +872,10 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 123 *** // Wavefunction(s) for diagram number 42 - FFV1_2( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -887,7 +887,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -899,7 +899,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 44 - FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -911,7 +911,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -923,7 +923,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -936,7 +936,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -951,7 +951,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -959,7 +959,7 @@ namespace mg5amcCpu jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -967,7 +967,7 @@ namespace mg5amcCpu jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -979,11 +979,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 123 *** // Wavefunction(s) for diagram number 49 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[12] ); - FFV1_2( w_fp[3], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] ); + FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -993,10 +993,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 123 *** // Wavefunction(s) for diagram number 50 - VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1011,7 +1011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1021,10 +1021,10 @@ namespace mg5amcCpu // *** DIAGRAM 52 OF 123 *** // Wavefunction(s) for diagram number 52 - FFV1_1( w_fp[2], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1037,7 +1037,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1052,7 +1052,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1065,7 +1065,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1080,7 +1080,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1095,7 +1095,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1114,7 +1114,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 58 - VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1126,7 +1126,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1138,7 +1138,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1154,10 +1154,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 123 *** // Wavefunction(s) for diagram number 59 - VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 59 - VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1176,7 +1176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1195,7 +1195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1210,7 +1210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1223,7 +1223,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1238,7 +1238,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1248,11 +1248,11 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 123 *** // Wavefunction(s) for diagram number 65 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1262,10 +1262,10 @@ namespace mg5amcCpu // *** DIAGRAM 66 OF 123 *** // Wavefunction(s) for diagram number 66 - VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1280,7 +1280,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1290,10 +1290,10 @@ namespace mg5amcCpu // *** DIAGRAM 68 OF 123 *** // Wavefunction(s) for diagram number 68 - FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1306,7 +1306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1321,7 +1321,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1334,7 +1334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1349,7 +1349,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1364,7 +1364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1383,7 +1383,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 74 - VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1395,7 +1395,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1407,7 +1407,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1423,10 +1423,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 123 *** // Wavefunction(s) for diagram number 75 - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 75 - VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1445,7 +1445,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1464,7 +1464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1479,7 +1479,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1492,7 +1492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1507,7 +1507,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1517,10 +1517,10 @@ namespace mg5amcCpu // *** DIAGRAM 81 OF 123 *** // Wavefunction(s) for diagram number 81 - FFV1_1( w_fp[9], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1529,10 +1529,10 @@ namespace mg5amcCpu // *** DIAGRAM 82 OF 123 *** // Wavefunction(s) for diagram number 82 - FFV1_2( w_fp[15], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1544,7 +1544,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1553,10 +1553,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 123 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1568,7 +1568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1578,10 +1578,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 123 *** // Wavefunction(s) for diagram number 86 - VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1593,10 +1593,10 @@ namespace mg5amcCpu // *** DIAGRAM 87 OF 123 *** // Wavefunction(s) for diagram number 87 - FFV1_2( w_fp[16], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1605,10 +1605,10 @@ namespace mg5amcCpu // *** DIAGRAM 88 OF 123 *** // Wavefunction(s) for diagram number 88 - FFV1_1( w_fp[11], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 88 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1620,7 +1620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1629,10 +1629,10 @@ namespace mg5amcCpu // *** DIAGRAM 90 OF 123 *** // Wavefunction(s) for diagram number 90 - FFV1_1( w_fp[14], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); + FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1644,7 +1644,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1657,7 +1657,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1672,7 +1672,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1684,7 +1684,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1696,7 +1696,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1712,10 +1712,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 123 *** // Wavefunction(s) for diagram number 94 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 94 - VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1731,10 +1731,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 123 *** // Wavefunction(s) for diagram number 95 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 95 - VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1753,7 +1753,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 96 - FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1768,7 +1768,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1781,7 +1781,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1796,7 +1796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1809,7 +1809,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1821,7 +1821,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1833,7 +1833,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1849,10 +1849,10 @@ namespace mg5amcCpu // *** DIAGRAM 101 OF 123 *** // Wavefunction(s) for diagram number 101 - VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 101 - VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1871,7 +1871,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1890,7 +1890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1905,7 +1905,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1918,7 +1918,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1933,7 +1933,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1946,7 +1946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1958,7 +1958,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1970,7 +1970,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1989,7 +1989,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2008,7 +2008,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2027,7 +2027,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2039,7 +2039,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2051,7 +2051,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2063,7 +2063,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2072,12 +2072,12 @@ namespace mg5amcCpu // *** DIAGRAM 114 OF 123 *** // Wavefunction(s) for diagram number 114 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[12] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 114 - VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2089,7 +2089,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2101,7 +2101,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2120,7 +2120,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2128,7 +2128,7 @@ namespace mg5amcCpu jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2136,7 +2136,7 @@ namespace mg5amcCpu jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2151,7 +2151,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2159,7 +2159,7 @@ namespace mg5amcCpu jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2167,7 +2167,7 @@ namespace mg5amcCpu jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2179,12 +2179,12 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 123 *** // Wavefunction(s) for diagram number 117 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 117 - VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2196,7 +2196,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2208,7 +2208,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2227,7 +2227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2235,7 +2235,7 @@ namespace mg5amcCpu jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2243,7 +2243,7 @@ namespace mg5amcCpu jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2258,7 +2258,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2266,7 +2266,7 @@ namespace mg5amcCpu jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2274,7 +2274,7 @@ namespace mg5amcCpu jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2286,12 +2286,12 @@ namespace mg5amcCpu // *** DIAGRAM 120 OF 123 *** // Wavefunction(s) for diagram number 120 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2299,7 +2299,7 @@ namespace mg5amcCpu jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2307,7 +2307,7 @@ namespace mg5amcCpu jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2322,7 +2322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2330,7 +2330,7 @@ namespace mg5amcCpu jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2338,7 +2338,7 @@ namespace mg5amcCpu jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2353,7 +2353,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2365,7 +2365,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2377,7 +2377,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2396,7 +2396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2408,7 +2408,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2420,7 +2420,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3018,13 +3018,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttgg.sa/mg5.in b/epochX/cudacpp/gg_ttgg.sa/mg5.in index 0ec559d9b8..5b27867642 100644 --- a/epochX/cudacpp/gg_ttgg.sa/mg5.in +++ b/epochX/cudacpp/gg_ttgg.sa/mg5.in @@ -1,3 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ g g -output standalone_cudacpp gg_ttgg.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp gg_ttgg.sa diff --git a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h index 9cea8bcbe7..9b946c21e1 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -934,6 +940,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -945,6 +952,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -970,6 +979,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -984,6 +994,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -995,6 +1006,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1008,6 +1020,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1042,6 +1055,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1080,6 +1094,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1103,6 +1118,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1134,6 +1150,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1165,6 +1182,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1197,6 +1215,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1225,6 +1244,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1260,6 +1280,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1288,6 +1309,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1323,6 +1345,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1351,6 +1374,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index b0f5bcbfef..7e024b5fd3 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0044476985931396484  +DEBUG: model prefixing takes 0.005335092544555664  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,89 +155,60 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.726 s +1 processes with 1240 diagrams generated in 1.895 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gg_ttggg INFO: remove old information in CODEGEN_mad_gg_ttggg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1592 term in 30s. Introduce 2768 contraction -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [3], 4: [4], 5: [6], 6: [7], 7: [13], 8: [14], 9: [15], 10: [17], 11: [18], 12: [19], 13: [21], 14: [22], 15: [23], 16: [25], 17: [26], 18: [27], 19: [28], 20: [29], 21: [30], 22: [32], 23: [33], 24: [34], 25: [35], 26: [36], 27: [37], 28: [38], 29: [39], 30: [40], 31: [41], 32: [42], 33: [43], 34: [44], 35: [45], 36: [46], 37: [48], 38: [49], 39: [50], 40: [51], 41: [52], 42: [53], 43: [54], 44: [55], 45: [56], 46: [57], 47: [58], 48: [59], 49: [60], 50: [61], 51: [62], 52: [64], 53: [65], 54: [66], 55: [67], 56: [68], 57: [69], 58: [70], 59: [71], 60: [72], 61: [73], 62: [74], 63: [75], 64: [76], 65: [77], 66: [78], 67: [80], 68: [81], 69: [82], 70: [83], 71: [84], 72: [85], 73: [86], 74: [87], 75: [88], 76: [90], 77: [91], 78: [92], 79: [93], 80: [94], 81: [95], 82: [96], 83: [97], 84: [98], 85: [100], 86: [101], 87: [102], 88: [103], 89: [104], 90: [105], 91: [106], 92: [107], 93: [108], 94: [109], 95: [110], 96: [111], 97: [112], 98: [113], 99: [114], 100: [115], 101: [116], 102: [117], 103: [118], 104: [119], 105: [120], 106: [123], 107: [124], 108: [125], 109: [126], 110: [127], 111: [128], 112: [129], 113: [130], 114: [131], 115: [132], 116: [133], 117: [134], 118: [135], 119: [136], 120: [137], 121: [139], 122: [140], 123: [142], 124: [143], 125: [144], 126: [145], 127: [146], 128: [147], 129: [148], 130: [149], 131: [150], 132: [151], 133: [152], 134: [153], 135: [154], 136: [155], 137: [156], 138: [158], 139: [159], 140: [160], 141: [161], 142: [162], 143: [163], 144: [164], 145: [165], 146: [166], 147: [167], 148: [168], 149: [169], 150: [170], 151: [171], 152: [172], 153: [174], 154: [175], 155: [176], 156: [177], 157: [178], 158: [179], 159: [180], 160: [181], 161: [182], 162: [183], 163: [184], 164: [185], 165: [186], 166: [187], 167: [188], 168: [189], 169: [190], 170: [191], 171: [192], 172: [193], 173: [194], 174: [195], 175: [196], 176: [197], 177: [198], 178: [199], 179: [200], 180: [201], 181: [202], 182: [203], 183: [204], 184: [205], 185: [206], 186: [207], 187: [208], 188: [209], 189: [210], 190: [211], 191: [212], 192: [213], 193: [214], 194: [215], 195: [216], 196: [217], 197: [219], 198: [220], 199: [221], 200: [222], 201: [223], 202: [224], 203: [226], 204: [227], 205: [228], 206: [229], 207: [230], 208: [231], 209: [233], 210: [234], 211: [246], 212: [247], 213: [248], 214: [249], 215: [250], 216: [251], 217: [252], 218: [253], 219: [254], 220: [255], 221: [256], 222: [257], 223: [258], 224: [259], 225: [260], 226: [262], 227: [263], 228: [265], 229: [266], 230: [267], 231: [268], 232: [269], 233: [270], 234: [271], 235: [272], 236: [273], 237: [274], 238: [275], 239: [276], 240: [277], 241: [278], 242: [279], 243: [281], 244: [282], 245: [283], 246: [284], 247: [285], 248: [286], 249: [287], 250: [288], 251: [289], 252: [290], 253: [291], 254: [292], 255: [293], 256: [294], 257: [295], 258: [297], 259: [298], 260: [299], 261: [300], 262: [301], 263: [302], 264: [303], 265: [304], 266: [305], 267: [306], 268: [307], 269: [308], 270: [309], 271: [310], 272: [311], 273: [312], 274: [313], 275: [314], 276: [315], 277: [316], 278: [317], 279: [318], 280: [319], 281: [320], 282: [321], 283: [322], 284: [323], 285: [324], 286: [325], 287: [326], 288: [327], 289: [328], 290: [329], 291: [330], 292: [331], 293: [332], 294: [333], 295: [334], 296: [335], 297: [336], 298: [337], 299: [338], 300: [339], 301: [340], 302: [342], 303: [343], 304: [344], 305: [345], 306: [346], 307: [347], 308: [349], 309: [350], 310: [351], 311: [352], 312: [353], 313: [354], 314: [356], 315: [357], 316: [369], 317: [370], 318: [371], 319: [372], 320: [373], 321: [374], 322: [376], 323: [377], 324: [378], 325: [379], 326: [380], 327: [381], 328: [382], 329: [383], 330: [384], 331: [385], 332: [386], 333: [387], 334: [388], 335: [389], 336: [390], 337: [392], 338: [393], 339: [394], 340: [395], 341: [396], 342: [397], 343: [398], 344: [399], 345: [400], 346: [401], 347: [402], 348: [403], 349: [404], 350: [405], 351: [406], 352: [408], 353: [409], 354: [410], 355: [411], 356: [412], 357: [413], 358: [414], 359: [415], 360: [416], 361: [417], 362: [418], 363: [419], 364: [420], 365: [421], 366: [422], 367: [424], 368: [425], 369: [426], 370: [427], 371: [428], 372: [429], 373: [430], 374: [431], 375: [432], 376: [433], 377: [434], 378: [436], 379: [437], 380: [439], 381: [440], 382: [446], 383: [447], 384: [448], 385: [449], 386: [450], 387: [451], 388: [452], 389: [453], 390: [454], 391: [456], 392: [457], 393: [458], 394: [459], 395: [460], 396: [461], 397: [462], 398: [463], 399: [464], 400: [466], 401: [467], 402: [468], 403: [469], 404: [470], 405: [471], 406: [472], 407: [473], 408: [474], 409: [476], 410: [477], 411: [478], 412: [479], 413: [480], 414: [481], 415: [483], 416: [484], 417: [485], 418: [486], 419: [487], 420: [488], 421: [492], 422: [493], 423: [494], 424: [495], 425: [496], 426: [497], 427: [499], 428: [500], 429: [501], 430: [502], 431: [503], 432: [504], 433: [505], 434: [506], 435: [507], 436: [508], 437: [509], 438: [510], 439: [511], 440: [512], 441: [513], 442: [515], 443: [516], 444: [517], 445: [518], 446: [519], 447: [520], 448: [521], 449: [522], 450: [523], 451: [524], 452: [525], 453: [526], 454: [527], 455: [528], 456: [529], 457: [531], 458: [532], 459: [533], 460: [534], 461: [535], 462: [536], 463: [537], 464: [538], 465: [539], 466: [540], 467: [541], 468: [542], 469: [543], 470: [544], 471: [545], 472: [547], 473: [548], 474: [549], 475: [550], 476: [551], 477: [552], 478: [553], 479: [554], 480: [555], 481: [556], 482: [557], 483: [559], 484: [560], 485: [562], 486: [563], 487: [569], 488: [570], 489: [571], 490: [572], 491: [573], 492: [574], 493: [575], 494: [576], 495: [577], 496: [579], 497: [580], 498: [581], 499: [582], 500: [583], 501: [584], 502: [585], 503: [586], 504: [587], 505: [589], 506: [590], 507: [591], 508: [592], 509: [593], 510: [594], 511: [595], 512: [596], 513: [597], 514: [599], 515: [600], 516: [601], 517: [602], 518: [603], 519: [604], 520: [606], 521: [607], 522: [608], 523: [609], 524: [610], 525: [611], 526: [615], 527: [616], 528: [617], 529: [618], 530: [619], 531: [620], 532: [622], 533: [623], 534: [624], 535: [625], 536: [626], 537: [627], 538: [628], 539: [629], 540: [630], 541: [631], 542: [632], 543: [633], 544: [634], 545: [635], 546: [636], 547: [638], 548: [639], 549: [640], 550: [641], 551: [642], 552: [643], 553: [644], 554: [645], 555: [646], 556: [647], 557: [648], 558: [649], 559: [650], 560: [651], 561: [652], 562: [654], 563: [655], 564: [656], 565: [657], 566: [658], 567: [659], 568: [660], 569: [661], 570: [662], 571: [663], 572: [664], 573: [665], 574: [666], 575: [667], 576: [668], 577: [670], 578: [671], 579: [672], 580: [673], 581: [674], 582: [675], 583: [676], 584: [677], 585: [678], 586: [679], 587: [680], 588: [682], 589: [683], 590: [685], 591: [686], 592: [692], 593: [693], 594: [694], 595: [695], 596: [696], 597: [697], 598: [698], 599: [699], 600: [700], 601: [702], 602: [703], 603: [704], 604: [705], 605: [706], 606: [707], 607: [708], 608: [709], 609: [710], 610: [712], 611: [713], 612: [714], 613: [715], 614: [716], 615: [717], 616: [718], 617: [719], 618: [720], 619: [722], 620: [723], 621: [724], 622: [725], 623: [726], 624: [727], 625: [729], 626: [730], 627: [731], 628: [732], 629: [733], 630: [734], 631: [738], 632: [739], 633: [740], 634: [741], 635: [742], 636: [743], 637: [744], 638: [745], 639: [746], 640: [747], 641: [748], 642: [749], 643: [750], 644: [751], 645: [752], 646: [753], 647: [754], 648: [755], 649: [756], 650: [757], 651: [758], 652: [759], 653: [760], 654: [761], 655: [762], 656: [763], 657: [764], 658: [765], 659: [766], 660: [767], 661: [768], 662: [769], 663: [770], 664: [772], 665: [773], 666: [774], 667: [775], 668: [776], 669: [777], 670: [779], 671: [780], 672: [781], 673: [782], 674: [783], 675: [784], 676: [788], 677: [789], 678: [790], 679: [791], 680: [792], 681: [793], 682: [794], 683: [795], 684: [796], 685: [797], 686: [798], 687: [799], 688: [800], 689: [801], 690: [802], 691: [803], 692: [804], 693: [805], 694: [806], 695: [807], 696: [808], 697: [809], 698: [810], 699: [811], 700: [812], 701: [813], 702: [814], 703: [815], 704: [816], 705: [817], 706: [818], 707: [819], 708: [820], 709: [822], 710: [823], 711: [824], 712: [825], 713: [826], 714: [827], 715: [829], 716: [830], 717: [831], 718: [832], 719: [833], 720: [834], 721: [838], 722: [839], 723: [841], 724: [842], 725: [844], 726: [845], 727: [851], 728: [852], 729: [853], 730: [854], 731: [855], 732: [856], 733: [857], 734: [858], 735: [859], 736: [861], 737: [862], 738: [863], 739: [864], 740: [865], 741: [866], 742: [867], 743: [868], 744: [869], 745: [871], 746: [872], 747: [873], 748: [874], 749: [875], 750: [876], 751: [877], 752: [878], 753: [879], 754: [881], 755: [882], 756: [883], 757: [884], 758: [885], 759: [886], 760: [888], 761: [889], 762: [890], 763: [891], 764: [892], 765: [893], 766: [894], 767: [895], 768: [897], 769: [898], 770: [900], 771: [901], 772: [907], 773: [908], 774: [909], 775: [910], 776: [911], 777: [912], 778: [913], 779: [914], 780: [915], 781: [917], 782: [918], 783: [919], 784: [920], 785: [921], 786: [922], 787: [923], 788: [924], 789: [925], 790: [927], 791: [928], 792: [929], 793: [930], 794: [931], 795: [932], 796: [933], 797: [934], 798: [935], 799: [937], 800: [938], 801: [939], 802: [940], 803: [941], 804: [942], 805: [944], 806: [945], 807: [946], 808: [947], 809: [948], 810: [949], 811: [950], 812: [951], 813: [953], 814: [954], 815: [956], 816: [957], 817: [963], 818: [964], 819: [965], 820: [966], 821: [967], 822: [968], 823: [969], 824: [970], 825: [971], 826: [973], 827: [974], 828: [975], 829: [976], 830: [977], 831: [978], 832: [979], 833: [980], 834: [981], 835: [983], 836: [984], 837: [985], 838: [986], 839: [987], 840: [988], 841: [989], 842: [990], 843: [991], 844: [993], 845: [994], 846: [995], 847: [996], 848: [997], 849: [998], 850: [1000], 851: [1001], 852: [1002], 853: [1003], 854: [1004], 855: [1005], 856: [1006], 857: [1007], 858: [1009], 859: [1010], 860: [1012], 861: [1013], 862: [1018], 863: [1019], 864: [1021], 865: [1022], 866: [1024], 867: [1025], 868: [1030], 869: [1031], 870: [1033], 871: [1034], 872: [1036], 873: [1037], 874: [1045], 875: [1046], 876: [1047], 877: [1048], 878: [1049], 879: [1050], 880: [1051], 881: [1052], 882: [1053], 883: [1054], 884: [1055], 885: [1056], 886: [1057], 887: [1058], 888: [1059], 889: [1060], 890: [1061], 891: [1062], 892: [1064], 893: [1065], 894: [1066], 895: [1067], 896: [1068], 897: [1069], 898: [1070], 899: [1071], 900: [1072], 901: [1073], 902: [1074], 903: [1075], 904: [1076], 905: [1077], 906: [1078], 907: [1079], 908: [1080], 909: [1081], 910: [1083], 911: [1084], 912: [1085], 913: [1086], 914: [1087], 915: [1088], 916: [1089], 917: [1090], 918: [1091], 919: [1092], 920: [1093], 921: [1094], 922: [1095], 923: [1096], 924: [1097], 925: [1098], 926: [1099], 927: [1100], 928: [1102], 929: [1103], 930: [1104], 931: [1105], 932: [1106], 933: [1107], 934: [1109], 935: [1110], 936: [1111], 937: [1112], 938: [1113], 939: [1114], 940: [1116], 941: [1117], 942: [1118], 943: [1119], 944: [1120], 945: [1121]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 6: 3, 7: 4, 11: 5, 12: 6, 28: 7, 29: 8, 30: 9, 34: 10, 35: 11, 36: 12, 40: 13, 41: 14, 42: 15, 46: 16, 47: 17, 48: 18, 49: 19, 50: 20, 51: 21, 55: 22, 56: 23, 57: 24, 58: 25, 59: 26, 60: 27, 61: 28, 62: 29, 63: 30, 64: 31, 65: 32, 66: 33, 67: 34, 68: 35, 69: 36, 73: 37, 74: 38, 75: 39, 76: 40, 77: 41, 78: 42, 79: 43, 80: 44, 81: 45, 82: 46, 83: 47, 84: 48, 85: 49, 86: 50, 87: 51, 91: 52, 92: 53, 93: 54, 94: 55, 95: 56, 96: 57, 97: 58, 98: 59, 99: 60, 100: 61, 101: 62, 102: 63, 103: 64, 104: 65, 105: 66, 109: 67, 110: 68, 111: 69, 112: 70, 113: 71, 114: 72, 115: 73, 116: 74, 117: 75, 121: 76, 122: 77, 123: 78, 124: 79, 125: 80, 126: 81, 127: 82, 128: 83, 129: 84, 133: 85, 134: 86, 135: 87, 136: 88, 137: 89, 138: 90, 139: 91, 140: 92, 141: 93, 142: 94, 143: 95, 144: 96, 145: 97, 146: 98, 147: 99, 148: 100, 149: 101, 150: 102, 151: 103, 152: 104, 153: 105, 160: 106, 161: 107, 162: 108, 163: 109, 164: 110, 165: 111, 166: 112, 167: 113, 168: 114, 169: 115, 170: 116, 171: 117, 172: 118, 173: 119, 174: 120, 178: 121, 179: 122, 183: 123, 184: 124, 185: 125, 186: 126, 187: 127, 188: 128, 189: 129, 190: 130, 191: 131, 192: 132, 193: 133, 194: 134, 195: 135, 196: 136, 197: 137, 201: 138, 202: 139, 203: 140, 204: 141, 205: 142, 206: 143, 207: 144, 208: 145, 209: 146, 210: 147, 211: 148, 212: 149, 213: 150, 214: 151, 215: 152, 219: 153, 220: 154, 221: 155, 222: 156, 223: 157, 224: 158, 225: 159, 226: 160, 227: 161, 228: 162, 229: 163, 230: 164, 231: 165, 232: 166, 233: 167, 234: 168, 235: 169, 236: 170, 237: 171, 238: 172, 239: 173, 240: 174, 241: 175, 242: 176, 243: 177, 244: 178, 245: 179, 246: 180, 247: 181, 248: 182, 249: 183, 250: 184, 251: 185, 252: 186, 253: 187, 254: 188, 255: 189, 256: 190, 257: 191, 258: 192, 259: 193, 260: 194, 261: 195, 262: 196, 266: 197, 267: 198, 268: 199, 269: 200, 270: 201, 271: 202, 275: 203, 276: 204, 277: 205, 278: 206, 279: 207, 280: 208, 284: 209, 285: 210, 319: 211, 320: 212, 321: 213, 322: 214, 323: 215, 324: 216, 325: 217, 326: 218, 327: 219, 328: 220, 329: 221, 330: 222, 331: 223, 332: 224, 333: 225, 337: 226, 338: 227, 342: 228, 343: 229, 344: 230, 345: 231, 346: 232, 347: 233, 348: 234, 349: 235, 350: 236, 351: 237, 352: 238, 353: 239, 354: 240, 355: 241, 356: 242, 360: 243, 361: 244, 362: 245, 363: 246, 364: 247, 365: 248, 366: 249, 367: 250, 368: 251, 369: 252, 370: 253, 371: 254, 372: 255, 373: 256, 374: 257, 378: 258, 379: 259, 380: 260, 381: 261, 382: 262, 383: 263, 384: 264, 385: 265, 386: 266, 387: 267, 388: 268, 389: 269, 390: 270, 391: 271, 392: 272, 393: 273, 394: 274, 395: 275, 396: 276, 397: 277, 398: 278, 399: 279, 400: 280, 401: 281, 402: 282, 403: 283, 404: 284, 405: 285, 406: 286, 407: 287, 408: 288, 409: 289, 410: 290, 411: 291, 412: 292, 413: 293, 414: 294, 415: 295, 416: 296, 417: 297, 418: 298, 419: 299, 420: 300, 421: 301, 425: 302, 426: 303, 427: 304, 428: 305, 429: 306, 430: 307, 434: 308, 435: 309, 436: 310, 437: 311, 438: 312, 439: 313, 443: 314, 444: 315, 478: 316, 479: 317, 480: 318, 481: 319, 482: 320, 483: 321, 487: 322, 488: 323, 489: 324, 490: 325, 491: 326, 492: 327, 493: 328, 494: 329, 495: 330, 496: 331, 497: 332, 498: 333, 499: 334, 500: 335, 501: 336, 505: 337, 506: 338, 507: 339, 508: 340, 509: 341, 510: 342, 511: 343, 512: 344, 513: 345, 514: 346, 515: 347, 516: 348, 517: 349, 518: 350, 519: 351, 523: 352, 524: 353, 525: 354, 526: 355, 527: 356, 528: 357, 529: 358, 530: 359, 531: 360, 532: 361, 533: 362, 534: 363, 535: 364, 536: 365, 537: 366, 541: 367, 542: 368, 543: 369, 544: 370, 545: 371, 546: 372, 547: 373, 548: 374, 549: 375, 550: 376, 551: 377, 555: 378, 556: 379, 560: 380, 561: 381, 577: 382, 578: 383, 579: 384, 580: 385, 581: 386, 582: 387, 583: 388, 584: 389, 585: 390, 589: 391, 590: 392, 591: 393, 592: 394, 593: 395, 594: 396, 595: 397, 596: 398, 597: 399, 601: 400, 602: 401, 603: 402, 604: 403, 605: 404, 606: 405, 607: 406, 608: 407, 609: 408, 613: 409, 614: 410, 615: 411, 616: 412, 617: 413, 618: 414, 622: 415, 623: 416, 624: 417, 625: 418, 626: 419, 627: 420, 637: 421, 638: 422, 639: 423, 640: 424, 641: 425, 642: 426, 646: 427, 647: 428, 648: 429, 649: 430, 650: 431, 651: 432, 652: 433, 653: 434, 654: 435, 655: 436, 656: 437, 657: 438, 658: 439, 659: 440, 660: 441, 664: 442, 665: 443, 666: 444, 667: 445, 668: 446, 669: 447, 670: 448, 671: 449, 672: 450, 673: 451, 674: 452, 675: 453, 676: 454, 677: 455, 678: 456, 682: 457, 683: 458, 684: 459, 685: 460, 686: 461, 687: 462, 688: 463, 689: 464, 690: 465, 691: 466, 692: 467, 693: 468, 694: 469, 695: 470, 696: 471, 700: 472, 701: 473, 702: 474, 703: 475, 704: 476, 705: 477, 706: 478, 707: 479, 708: 480, 709: 481, 710: 482, 714: 483, 715: 484, 719: 485, 720: 486, 736: 487, 737: 488, 738: 489, 739: 490, 740: 491, 741: 492, 742: 493, 743: 494, 744: 495, 748: 496, 749: 497, 750: 498, 751: 499, 752: 500, 753: 501, 754: 502, 755: 503, 756: 504, 760: 505, 761: 506, 762: 507, 763: 508, 764: 509, 765: 510, 766: 511, 767: 512, 768: 513, 772: 514, 773: 515, 774: 516, 775: 517, 776: 518, 777: 519, 781: 520, 782: 521, 783: 522, 784: 523, 785: 524, 786: 525, 796: 526, 797: 527, 798: 528, 799: 529, 800: 530, 801: 531, 805: 532, 806: 533, 807: 534, 808: 535, 809: 536, 810: 537, 811: 538, 812: 539, 813: 540, 814: 541, 815: 542, 816: 543, 817: 544, 818: 545, 819: 546, 823: 547, 824: 548, 825: 549, 826: 550, 827: 551, 828: 552, 829: 553, 830: 554, 831: 555, 832: 556, 833: 557, 834: 558, 835: 559, 836: 560, 837: 561, 841: 562, 842: 563, 843: 564, 844: 565, 845: 566, 846: 567, 847: 568, 848: 569, 849: 570, 850: 571, 851: 572, 852: 573, 853: 574, 854: 575, 855: 576, 859: 577, 860: 578, 861: 579, 862: 580, 863: 581, 864: 582, 865: 583, 866: 584, 867: 585, 868: 586, 869: 587, 873: 588, 874: 589, 878: 590, 879: 591, 895: 592, 896: 593, 897: 594, 898: 595, 899: 596, 900: 597, 901: 598, 902: 599, 903: 600, 907: 601, 908: 602, 909: 603, 910: 604, 911: 605, 912: 606, 913: 607, 914: 608, 915: 609, 919: 610, 920: 611, 921: 612, 922: 613, 923: 614, 924: 615, 925: 616, 926: 617, 927: 618, 931: 619, 932: 620, 933: 621, 934: 622, 935: 623, 936: 624, 940: 625, 941: 626, 942: 627, 943: 628, 944: 629, 945: 630, 955: 631, 956: 632, 957: 633, 958: 634, 959: 635, 960: 636, 961: 637, 962: 638, 963: 639, 964: 640, 965: 641, 966: 642, 967: 643, 968: 644, 969: 645, 970: 646, 971: 647, 972: 648, 973: 649, 974: 650, 975: 651, 976: 652, 977: 653, 978: 654, 979: 655, 980: 656, 981: 657, 982: 658, 983: 659, 984: 660, 985: 661, 986: 662, 987: 663, 991: 664, 992: 665, 993: 666, 994: 667, 995: 668, 996: 669, 1000: 670, 1001: 671, 1002: 672, 1003: 673, 1004: 674, 1005: 675, 1015: 676, 1016: 677, 1017: 678, 1018: 679, 1019: 680, 1020: 681, 1021: 682, 1022: 683, 1023: 684, 1024: 685, 1025: 686, 1026: 687, 1027: 688, 1028: 689, 1029: 690, 1030: 691, 1031: 692, 1032: 693, 1033: 694, 1034: 695, 1035: 696, 1036: 697, 1037: 698, 1038: 699, 1039: 700, 1040: 701, 1041: 702, 1042: 703, 1043: 704, 1044: 705, 1045: 706, 1046: 707, 1047: 708, 1051: 709, 1052: 710, 1053: 711, 1054: 712, 1055: 713, 1056: 714, 1060: 715, 1061: 716, 1062: 717, 1063: 718, 1064: 719, 1065: 720, 1075: 721, 1076: 722, 1080: 723, 1081: 724, 1085: 725, 1086: 726, 1102: 727, 1103: 728, 1104: 729, 1105: 730, 1106: 731, 1107: 732, 1108: 733, 1109: 734, 1110: 735, 1114: 736, 1115: 737, 1116: 738, 1117: 739, 1118: 740, 1119: 741, 1120: 742, 1121: 743, 1122: 744, 1126: 745, 1127: 746, 1128: 747, 1129: 748, 1130: 749, 1131: 750, 1132: 751, 1133: 752, 1134: 753, 1138: 754, 1139: 755, 1140: 756, 1141: 757, 1142: 758, 1143: 759, 1147: 760, 1148: 761, 1149: 762, 1150: 763, 1151: 764, 1152: 765, 1153: 766, 1154: 767, 1158: 768, 1159: 769, 1163: 770, 1164: 771, 1180: 772, 1181: 773, 1182: 774, 1183: 775, 1184: 776, 1185: 777, 1186: 778, 1187: 779, 1188: 780, 1192: 781, 1193: 782, 1194: 783, 1195: 784, 1196: 785, 1197: 786, 1198: 787, 1199: 788, 1200: 789, 1204: 790, 1205: 791, 1206: 792, 1207: 793, 1208: 794, 1209: 795, 1210: 796, 1211: 797, 1212: 798, 1216: 799, 1217: 800, 1218: 801, 1219: 802, 1220: 803, 1221: 804, 1225: 805, 1226: 806, 1227: 807, 1228: 808, 1229: 809, 1230: 810, 1231: 811, 1232: 812, 1236: 813, 1237: 814, 1241: 815, 1242: 816, 1258: 817, 1259: 818, 1260: 819, 1261: 820, 1262: 821, 1263: 822, 1264: 823, 1265: 824, 1266: 825, 1270: 826, 1271: 827, 1272: 828, 1273: 829, 1274: 830, 1275: 831, 1276: 832, 1277: 833, 1278: 834, 1282: 835, 1283: 836, 1284: 837, 1285: 838, 1286: 839, 1287: 840, 1288: 841, 1289: 842, 1290: 843, 1294: 844, 1295: 845, 1296: 846, 1297: 847, 1298: 848, 1299: 849, 1303: 850, 1304: 851, 1305: 852, 1306: 853, 1307: 854, 1308: 855, 1309: 856, 1310: 857, 1314: 858, 1315: 859, 1319: 860, 1320: 861, 1333: 862, 1334: 863, 1338: 864, 1339: 865, 1343: 866, 1344: 867, 1357: 868, 1358: 869, 1362: 870, 1363: 871, 1367: 872, 1368: 873, 1396: 874, 1397: 875, 1398: 876, 1399: 877, 1400: 878, 1401: 879, 1402: 880, 1403: 881, 1404: 882, 1405: 883, 1406: 884, 1407: 885, 1408: 886, 1409: 887, 1410: 888, 1411: 889, 1412: 890, 1413: 891, 1417: 892, 1418: 893, 1419: 894, 1420: 895, 1421: 896, 1422: 897, 1423: 898, 1424: 899, 1425: 900, 1426: 901, 1427: 902, 1428: 903, 1429: 904, 1430: 905, 1431: 906, 1432: 907, 1433: 908, 1434: 909, 1438: 910, 1439: 911, 1440: 912, 1441: 913, 1442: 914, 1443: 915, 1444: 916, 1445: 917, 1446: 918, 1447: 919, 1448: 920, 1449: 921, 1450: 922, 1451: 923, 1452: 924, 1453: 925, 1454: 926, 1455: 927, 1459: 928, 1460: 929, 1461: 930, 1462: 931, 1463: 932, 1464: 933, 1468: 934, 1469: 935, 1470: 936, 1471: 937, 1472: 938, 1473: 939, 1477: 940, 1478: 941, 1479: 942, 1480: 943, 1481: 944, 1482: 945} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 5.721 s -Wrote files for 2281 helas calls in 39.057 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.515 s +Wrote files for 2281 helas calls in 46.436 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.276 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 5 routines in 0.315 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.271 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -250,27 +221,22 @@ ALOHA: aloha creates 10 routines in 0.271 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -291,14 +257,15 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -322,40 +289,40 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 255 (offset 112 lines). Hunk #3 succeeded at 333 (offset 112 lines). Hunk #4 succeeded at 361 (offset 112 lines). Hunk #5 succeeded at 406 (offset 112 lines). -Hunk #6 succeeded at 9862 (offset 9466 lines). -Hunk #7 succeeded at 19616 (offset 19146 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README Run "open index.html" to see more information about this process. quit -real 0m49.632s -user 0m48.102s -sys 0m0.988s +real 0m57.704s +user 0m56.670s +sys 0m0.842s diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/ident_card.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/ident_card.dat index b37758a42a..0ba87b008f 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/ident_card.dat +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/ident_card.dat @@ -2,32 +2,32 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c written by the UFO converter ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc -mass 5 mdl_MB -mass 6 mdl_MT -mass 15 mdl_MTA -mass 23 mdl_MZ -mass 25 mdl_MH -sminputs 1 aEWM1 -sminputs 2 mdl_Gf -sminputs 3 aS -yukawa 5 mdl_ymb -yukawa 6 mdl_ymt -yukawa 15 mdl_ymtau -decay 6 mdl_WT -decay 23 mdl_WZ -decay 24 mdl_WW +decay 23 mdl_WZ +decay 24 mdl_WW decay 25 mdl_WH +decay 6 mdl_WT +mass 15 mdl_MTA +mass 23 mdl_MZ +mass 25 mdl_MH +mass 5 mdl_MB +mass 6 mdl_MT +sminputs 1 aEWM1 +sminputs 2 mdl_Gf +sminputs 3 aS +yukawa 15 mdl_ymtau +yukawa 5 mdl_ymb +yukawa 6 mdl_ymt diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt index 27acb12a1e..cdeedc7863 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat index 4030ee3e96..05d11d495d 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat @@ -35,15 +35,15 @@ set loop_color_flows False set max_npoint_for_channel 0 set default_unset_couplings 99 set max_t_for_channel 99 -set zerowidth_tchannel True set nlo_mixed_expansion True -import model sm +set stdout_level DEBUG +set zerowidth_tchannel F +generate g g > t t~ g g g define p = g u c d s u~ c~ d~ s~ define j = g u c d s u~ c~ d~ s~ define l+ = e+ mu+ define l- = e- mu- define vl = ve vm vt define vl~ = ve~ vm~ vt~ -generate g g > t t~ g g g -output madevent gg_ttggg.mad_gen --hel_recycling=False --vector_size=1\ -6384 --me_exporter=standalone_cudacpp +output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False -\ +-vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc index fa0f3d86f5..ec923afd6d 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o VVVV1P0_1.o VVVV1_0.o VVVV3P0_1.o VVVV3_0.o VVVV4P0_1.o VVVV4_0.o +ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc index 18c4db8539..a525c4ba3f 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc @@ -252,13 +252,13 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][6], +1, w_fp[6], 6 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); - VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 0., 0., w_fp[9] ); - VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -283,10 +283,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 1240 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -314,7 +314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 3 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -331,7 +331,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -348,7 +348,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -369,11 +369,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 1240 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 0., 0., w_fp[12] ); - VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 0., 0., w_fp[13] ); + VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -401,7 +401,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,7 +429,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -446,7 +446,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -463,7 +463,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -484,10 +484,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 1240 *** // Wavefunction(s) for diagram number 7 - VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 0., 0., w_fp[14] ); + VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 7 - VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -515,7 +515,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -543,7 +543,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -560,7 +560,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -577,7 +577,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -598,12 +598,12 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 1240 *** // Wavefunction(s) for diagram number 10 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -620,7 +620,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -637,7 +637,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -658,12 +658,12 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 1240 *** // Wavefunction(s) for diagram number 11 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[18] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[20] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 11 - VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -680,7 +680,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -697,7 +697,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -718,12 +718,12 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 1240 *** // Wavefunction(s) for diagram number 12 - VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); + VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -740,7 +740,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -778,10 +778,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 1240 *** // Wavefunction(s) for diagram number 13 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 13 - VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[4] -= amp_sv[0]; @@ -798,7 +798,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -815,7 +815,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[28] -= amp_sv[0]; @@ -836,10 +836,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 1240 *** // Wavefunction(s) for diagram number 14 - VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 14 - VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -864,10 +864,10 @@ namespace mg5amcCpu // *** DIAGRAM 15 OF 1240 *** // Wavefunction(s) for diagram number 15 - VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 0., 0., w_fp[26] ); + VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] ); // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -895,7 +895,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -920,10 +920,10 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 1240 *** // Wavefunction(s) for diagram number 17 - VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] ); // Amplitude(s) for diagram number 17 - VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -940,7 +940,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -957,7 +957,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[26] -= amp_sv[0]; @@ -981,7 +981,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1006,10 +1006,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 1240 *** // Wavefunction(s) for diagram number 19 - VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 0., 0., w_fp[28] ); + VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] ); // Amplitude(s) for diagram number 19 - VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1037,7 +1037,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1062,10 +1062,10 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 1240 *** // Wavefunction(s) for diagram number 21 - VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 21 - VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -1082,7 +1082,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -1099,7 +1099,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -1123,7 +1123,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1151,7 +1151,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1176,10 +1176,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 1240 *** // Wavefunction(s) for diagram number 24 - VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 24 - VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1204,12 +1204,12 @@ namespace mg5amcCpu // *** DIAGRAM 25 OF 1240 *** // Wavefunction(s) for diagram number 25 - VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[30] ); - VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[31] ); - VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[32] ); + VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] ); + VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] ); + VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] ); // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -1226,7 +1226,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -1243,7 +1243,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -1264,12 +1264,12 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 1240 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[33] ); - FFV1_2( w_fp[3], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[33], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[35] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] ); + FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1280,10 +1280,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 1240 *** // Wavefunction(s) for diagram number 27 - FFV1_1( w_fp[33], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[36] ); + FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1294,10 +1294,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 1240 *** // Wavefunction(s) for diagram number 28 - FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 0., 0., w_fp[37] ); + FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] ); // Amplitude(s) for diagram number 28 - VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1317,7 +1317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1333,7 +1333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1353,7 +1353,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1369,7 +1369,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1378,7 +1378,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1387,7 +1387,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1400,11 +1400,11 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 1240 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[38] ); - FFV1_1( w_fp[33], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] ); + FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1415,10 +1415,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 1240 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[38], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1432,7 +1432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1445,10 +1445,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 1240 *** // Wavefunction(s) for diagram number 36 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[41] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] ); // Amplitude(s) for diagram number 36 - FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1459,10 +1459,10 @@ namespace mg5amcCpu // *** DIAGRAM 37 OF 1240 *** // Wavefunction(s) for diagram number 37 - FFV1_2( w_fp[41], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[42] ); + FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] ); // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1476,7 +1476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1492,7 +1492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1508,7 +1508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1524,7 +1524,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1541,11 +1541,11 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 1240 *** // Wavefunction(s) for diagram number 42 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); - FFV1_1( w_fp[39], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[43] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); + FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1556,10 +1556,10 @@ namespace mg5amcCpu // *** DIAGRAM 43 OF 1240 *** // Wavefunction(s) for diagram number 43 - FFV1_1( w_fp[39], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[44] ); + FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] ); // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1570,10 +1570,10 @@ namespace mg5amcCpu // *** DIAGRAM 44 OF 1240 *** // Wavefunction(s) for diagram number 44 - FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 0., 0., w_fp[45] ); + FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] ); // Amplitude(s) for diagram number 44 - VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1593,7 +1593,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1609,7 +1609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1629,7 +1629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1645,7 +1645,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1654,7 +1654,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1663,7 +1663,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1676,11 +1676,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 1240 *** // Wavefunction(s) for diagram number 49 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[46] ); - FFV1_1( w_fp[39], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] ); + FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1691,10 +1691,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 1240 *** // Wavefunction(s) for diagram number 50 - FFV1_2( w_fp[46], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1708,7 +1708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1724,7 +1724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1738,7 +1738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1752,7 +1752,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1768,7 +1768,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1784,7 +1784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1800,7 +1800,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1817,11 +1817,11 @@ namespace mg5amcCpu // *** DIAGRAM 58 OF 1240 *** // Wavefunction(s) for diagram number 58 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); - FFV1_1( w_fp[47], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[49] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); + FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] ); // Amplitude(s) for diagram number 58 - FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 58 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1832,10 +1832,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 1240 *** // Wavefunction(s) for diagram number 59 - FFV1_1( w_fp[47], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[50] ); + FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] ); // Amplitude(s) for diagram number 59 - FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1846,10 +1846,10 @@ namespace mg5amcCpu // *** DIAGRAM 60 OF 1240 *** // Wavefunction(s) for diagram number 60 - FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 0., 0., w_fp[51] ); + FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] ); // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1869,7 +1869,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1885,7 +1885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1905,7 +1905,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1921,7 +1921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1930,7 +1930,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1939,7 +1939,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1952,10 +1952,10 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 1240 *** // Wavefunction(s) for diagram number 65 - FFV1_1( w_fp[47], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1969,7 +1969,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1983,7 +1983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1999,7 +1999,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2013,7 +2013,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2027,7 +2027,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2043,7 +2043,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2059,7 +2059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2075,7 +2075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2092,11 +2092,11 @@ namespace mg5amcCpu // *** DIAGRAM 74 OF 1240 *** // Wavefunction(s) for diagram number 74 - FFV1_1( w_fp[2], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); - FFV1_2( w_fp[46], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); + FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 74 - FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 74 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2107,10 +2107,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 1240 *** // Wavefunction(s) for diagram number 75 - FFV1_2( w_fp[46], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[53] ); + FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] ); // Amplitude(s) for diagram number 75 - FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2121,10 +2121,10 @@ namespace mg5amcCpu // *** DIAGRAM 76 OF 1240 *** // Wavefunction(s) for diagram number 76 - FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 0., 0., w_fp[54] ); + FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] ); // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2144,7 +2144,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2160,7 +2160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2180,7 +2180,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2196,7 +2196,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2205,7 +2205,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2214,7 +2214,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2230,7 +2230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2246,7 +2246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2262,7 +2262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2279,10 +2279,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 1240 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[38], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[25] ); + FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2293,10 +2293,10 @@ namespace mg5amcCpu // *** DIAGRAM 85 OF 1240 *** // Wavefunction(s) for diagram number 85 - FFV1_2( w_fp[38], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2307,10 +2307,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 1240 *** // Wavefunction(s) for diagram number 86 - FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 0., 0., w_fp[23] ); + FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2330,7 +2330,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2346,7 +2346,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 88 - VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2366,7 +2366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2382,7 +2382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2391,7 +2391,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2400,7 +2400,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2416,7 +2416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2432,7 +2432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2448,7 +2448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 93 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2465,10 +2465,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 1240 *** // Wavefunction(s) for diagram number 94 - FFV1_2( w_fp[41], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[28] ); + FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] ); // Amplitude(s) for diagram number 94 - FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2479,10 +2479,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 1240 *** // Wavefunction(s) for diagram number 95 - FFV1_2( w_fp[41], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 95 - FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2493,10 +2493,10 @@ namespace mg5amcCpu // *** DIAGRAM 96 OF 1240 *** // Wavefunction(s) for diagram number 96 - FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 0., 0., w_fp[20] ); + FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 96 - VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2516,7 +2516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2532,7 +2532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2552,7 +2552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2568,7 +2568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2577,7 +2577,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2586,7 +2586,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2602,7 +2602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 101 - FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2618,7 +2618,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2634,7 +2634,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2651,10 +2651,10 @@ namespace mg5amcCpu // *** DIAGRAM 104 OF 1240 *** // Wavefunction(s) for diagram number 104 - FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[26] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] ); // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2667,10 +2667,10 @@ namespace mg5amcCpu // *** DIAGRAM 105 OF 1240 *** // Wavefunction(s) for diagram number 105 - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[42] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] ); // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2687,10 +2687,10 @@ namespace mg5amcCpu // *** DIAGRAM 106 OF 1240 *** // Wavefunction(s) for diagram number 106 - FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2706,7 +2706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 107 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2726,7 +2726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2746,7 +2746,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2763,10 +2763,10 @@ namespace mg5amcCpu // *** DIAGRAM 110 OF 1240 *** // Wavefunction(s) for diagram number 110 - FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2779,10 +2779,10 @@ namespace mg5amcCpu // *** DIAGRAM 111 OF 1240 *** // Wavefunction(s) for diagram number 111 - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2799,10 +2799,10 @@ namespace mg5amcCpu // *** DIAGRAM 112 OF 1240 *** // Wavefunction(s) for diagram number 112 - FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2818,7 +2818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2838,7 +2838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 114 - FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 114 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2858,7 +2858,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 115 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2875,10 +2875,10 @@ namespace mg5amcCpu // *** DIAGRAM 116 OF 1240 *** // Wavefunction(s) for diagram number 116 - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 116 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2891,10 +2891,10 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 1240 *** // Wavefunction(s) for diagram number 117 - VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 0., 0., w_fp[19] ); + VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] ); // Amplitude(s) for diagram number 117 - FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 117 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2911,10 +2911,10 @@ namespace mg5amcCpu // *** DIAGRAM 118 OF 1240 *** // Wavefunction(s) for diagram number 118 - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[18] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] ); // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 118 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2930,7 +2930,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 119 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2950,7 +2950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 120 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2970,7 +2970,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 121 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2990,7 +2990,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2999,7 +2999,7 @@ namespace mg5amcCpu jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3008,7 +3008,7 @@ namespace mg5amcCpu jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3024,7 +3024,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3033,7 +3033,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3042,7 +3042,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3055,13 +3055,13 @@ namespace mg5amcCpu // *** DIAGRAM 124 OF 1240 *** // Wavefunction(s) for diagram number 124 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); - FFV1_1( w_fp[34], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[52], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 124 - FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 124 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3071,10 +3071,10 @@ namespace mg5amcCpu // *** DIAGRAM 125 OF 1240 *** // Wavefunction(s) for diagram number 125 - FFV1_2( w_fp[52], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 125 - FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 125 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3084,11 +3084,11 @@ namespace mg5amcCpu // *** DIAGRAM 126 OF 1240 *** // Wavefunction(s) for diagram number 126 - FFV1_1( w_fp[34], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[55] ); - FFV1_2( w_fp[52], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[56] ); + FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] ); + FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] ); // Amplitude(s) for diagram number 126 - FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 126 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3101,7 +3101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 127 - FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 127 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3111,10 +3111,10 @@ namespace mg5amcCpu // *** DIAGRAM 128 OF 1240 *** // Wavefunction(s) for diagram number 128 - FFV1_1( w_fp[34], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[57] ); + FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] ); // Amplitude(s) for diagram number 128 - FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 128 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3127,7 +3127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 129 - FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 129 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3137,10 +3137,10 @@ namespace mg5amcCpu // *** DIAGRAM 130 OF 1240 *** // Wavefunction(s) for diagram number 130 - FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 0., 0., w_fp[58] ); + FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] ); // Amplitude(s) for diagram number 130 - VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 130 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3153,10 +3153,10 @@ namespace mg5amcCpu // *** DIAGRAM 131 OF 1240 *** // Wavefunction(s) for diagram number 131 - FFV1_1( w_fp[34], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); // Amplitude(s) for diagram number 131 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 131 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3170,7 +3170,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 132 - FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 132 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3184,7 +3184,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 133 - VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 133 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3197,10 +3197,10 @@ namespace mg5amcCpu // *** DIAGRAM 134 OF 1240 *** // Wavefunction(s) for diagram number 134 - FFV1_1( w_fp[34], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 134 - FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 134 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3214,7 +3214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 135 - FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 135 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3228,7 +3228,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 136 - VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 136 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3244,7 +3244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 137 - FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 137 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3255,10 +3255,10 @@ namespace mg5amcCpu // *** DIAGRAM 138 OF 1240 *** // Wavefunction(s) for diagram number 138 - FFV1_1( w_fp[34], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); + FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 138 - FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 138 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3272,17 +3272,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 139 - FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -3291,12 +3291,12 @@ namespace mg5amcCpu // *** DIAGRAM 140 OF 1240 *** // Wavefunction(s) for diagram number 140 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[61] ); - FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 0., 0., w_fp[63] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] ); + FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 140 - VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 140 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3313,10 +3313,10 @@ namespace mg5amcCpu // *** DIAGRAM 141 OF 1240 *** // Wavefunction(s) for diagram number 141 - VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 0., 0., w_fp[64] ); + VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] ); // Amplitude(s) for diagram number 141 - VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 141 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3336,7 +3336,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 142 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3345,7 +3345,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -3354,7 +3354,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3367,10 +3367,10 @@ namespace mg5amcCpu // *** DIAGRAM 143 OF 1240 *** // Wavefunction(s) for diagram number 143 - FFV1_2( w_fp[3], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[65] ); + FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] ); // Amplitude(s) for diagram number 143 - FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 143 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3384,7 +3384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 144 - FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 144 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3400,7 +3400,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 145 - FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 145 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3414,7 +3414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 146 - FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 146 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3427,10 +3427,10 @@ namespace mg5amcCpu // *** DIAGRAM 147 OF 1240 *** // Wavefunction(s) for diagram number 147 - FFV1_1( w_fp[34], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); + FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 147 - FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 147 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3441,10 +3441,10 @@ namespace mg5amcCpu // *** DIAGRAM 148 OF 1240 *** // Wavefunction(s) for diagram number 148 - FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 0., 0., w_fp[67] ); + FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 148 - VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 148 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3460,7 +3460,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 149 - FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 149 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3474,7 +3474,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 150 - FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 150 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3485,10 +3485,10 @@ namespace mg5amcCpu // *** DIAGRAM 151 OF 1240 *** // Wavefunction(s) for diagram number 151 - FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 0., 0., w_fp[68] ); + FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 151 - VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 151 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3504,7 +3504,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 152 - FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 152 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3518,7 +3518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 153 - FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 153 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3534,7 +3534,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 154 - VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 154 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3554,7 +3554,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 155 - FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 155 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3567,11 +3567,11 @@ namespace mg5amcCpu // *** DIAGRAM 156 OF 1240 *** // Wavefunction(s) for diagram number 156 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 0., 0., w_fp[69] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 156 - VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 156 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3588,10 +3588,10 @@ namespace mg5amcCpu // *** DIAGRAM 157 OF 1240 *** // Wavefunction(s) for diagram number 157 - VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 0., 0., w_fp[70] ); + VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] ); // Amplitude(s) for diagram number 157 - VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 157 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3611,7 +3611,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 158 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3620,7 +3620,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3629,7 +3629,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3642,10 +3642,10 @@ namespace mg5amcCpu // *** DIAGRAM 159 OF 1240 *** // Wavefunction(s) for diagram number 159 - FFV1_2( w_fp[3], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 159 - FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 159 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3659,7 +3659,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 160 - FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 160 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3675,7 +3675,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 161 - FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 161 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3689,7 +3689,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 162 - FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 162 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3702,10 +3702,10 @@ namespace mg5amcCpu // *** DIAGRAM 163 OF 1240 *** // Wavefunction(s) for diagram number 163 - FFV1_1( w_fp[34], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); + FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 163 - FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 163 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3716,10 +3716,10 @@ namespace mg5amcCpu // *** DIAGRAM 164 OF 1240 *** // Wavefunction(s) for diagram number 164 - FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 0., 0., w_fp[73] ); + FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] ); // Amplitude(s) for diagram number 164 - VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 164 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3735,7 +3735,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 165 - FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 165 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3749,7 +3749,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 166 - FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 166 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3763,7 +3763,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 167 - VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 167 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3779,7 +3779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 168 - FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 168 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3793,7 +3793,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 169 - FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 169 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3809,7 +3809,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 170 - VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 170 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3829,7 +3829,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 171 - FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 171 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3842,11 +3842,11 @@ namespace mg5amcCpu // *** DIAGRAM 172 OF 1240 *** // Wavefunction(s) for diagram number 172 - VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 0., 0., w_fp[74] ); + VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] ); // Amplitude(s) for diagram number 172 - VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 172 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3863,10 +3863,10 @@ namespace mg5amcCpu // *** DIAGRAM 173 OF 1240 *** // Wavefunction(s) for diagram number 173 - VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 0., 0., w_fp[75] ); + VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] ); // Amplitude(s) for diagram number 173 - VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 173 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3886,7 +3886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 174 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3895,7 +3895,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3904,7 +3904,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -3917,10 +3917,10 @@ namespace mg5amcCpu // *** DIAGRAM 175 OF 1240 *** // Wavefunction(s) for diagram number 175 - FFV1_2( w_fp[3], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[76] ); + FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] ); // Amplitude(s) for diagram number 175 - FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 175 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3934,7 +3934,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 176 - FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 176 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3950,7 +3950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 177 - FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 177 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3964,7 +3964,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 178 - FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 178 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3977,10 +3977,10 @@ namespace mg5amcCpu // *** DIAGRAM 179 OF 1240 *** // Wavefunction(s) for diagram number 179 - FFV1_1( w_fp[34], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 179 - FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 179 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -3994,7 +3994,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 180 - VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 180 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4010,7 +4010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 181 - FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 181 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4024,7 +4024,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 182 - FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 182 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4038,7 +4038,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 183 - VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 183 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4054,7 +4054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 184 - FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 184 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4068,7 +4068,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 185 - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 185 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4084,7 +4084,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 186 - VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 186 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4104,7 +4104,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 187 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 187 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4117,10 +4117,10 @@ namespace mg5amcCpu // *** DIAGRAM 188 OF 1240 *** // Wavefunction(s) for diagram number 188 - FFV1_1( w_fp[34], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 188 - FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 188 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4133,7 +4133,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 189 - FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 189 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4143,10 +4143,10 @@ namespace mg5amcCpu // *** DIAGRAM 190 OF 1240 *** // Wavefunction(s) for diagram number 190 - FFV1_2( w_fp[46], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[78] ); + FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] ); // Amplitude(s) for diagram number 190 - FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 190 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4159,7 +4159,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 191 - FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 191 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4172,7 +4172,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 192 - FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 192 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4185,7 +4185,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 193 - FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 193 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4198,7 +4198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 194 - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 194 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4212,7 +4212,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 195 - VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 195 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4228,7 +4228,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 196 - FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 196 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4242,7 +4242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 197 - FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 197 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4255,7 +4255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 198 - FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 198 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4265,10 +4265,10 @@ namespace mg5amcCpu // *** DIAGRAM 199 OF 1240 *** // Wavefunction(s) for diagram number 199 - FFV1_2( w_fp[38], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); + FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 199 - FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 199 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4281,7 +4281,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 200 - FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 200 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4294,7 +4294,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 201 - FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 201 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4307,7 +4307,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 202 - FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 202 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4320,7 +4320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 203 - FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 203 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4334,7 +4334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 204 - VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 204 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4350,7 +4350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 205 - FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 205 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4364,7 +4364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 206 - FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 206 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4377,7 +4377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 207 - FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 207 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4387,10 +4387,10 @@ namespace mg5amcCpu // *** DIAGRAM 208 OF 1240 *** // Wavefunction(s) for diagram number 208 - FFV1_2( w_fp[41], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 208 - FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 208 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4403,7 +4403,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 209 - FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 209 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4416,7 +4416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 210 - FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 210 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4429,7 +4429,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 211 - FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 211 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4442,7 +4442,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 212 - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 212 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4456,7 +4456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 213 - VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 213 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4472,7 +4472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 214 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 214 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4486,7 +4486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 215 - FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 215 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4500,7 +4500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 216 - FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 216 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4513,10 +4513,10 @@ namespace mg5amcCpu // *** DIAGRAM 217 OF 1240 *** // Wavefunction(s) for diagram number 217 - VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] ); // Amplitude(s) for diagram number 217 - VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 217 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4536,7 +4536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 218 - VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 218 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4556,7 +4556,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 219 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4565,7 +4565,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4574,7 +4574,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -4590,7 +4590,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 220 - FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 220 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4606,7 +4606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 221 - FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 221 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4620,7 +4620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 222 - FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 222 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4634,7 +4634,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 223 - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 223 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4647,10 +4647,10 @@ namespace mg5amcCpu // *** DIAGRAM 224 OF 1240 *** // Wavefunction(s) for diagram number 224 - VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 224 - VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 224 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4670,7 +4670,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 225 - VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 225 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4690,7 +4690,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 226 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4699,7 +4699,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4708,7 +4708,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4724,7 +4724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 227 - FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 227 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4740,7 +4740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 228 - FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 228 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4754,7 +4754,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 229 - FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 229 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4768,7 +4768,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 230 - FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 230 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4781,10 +4781,10 @@ namespace mg5amcCpu // *** DIAGRAM 231 OF 1240 *** // Wavefunction(s) for diagram number 231 - VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 0., 0., w_fp[67] ); + VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 231 - VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 231 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4804,7 +4804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 232 - VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 232 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4824,7 +4824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 233 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4833,7 +4833,7 @@ namespace mg5amcCpu jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4842,7 +4842,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -4858,7 +4858,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 234 - FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 234 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4874,7 +4874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 235 - FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 235 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -4885,12 +4885,12 @@ namespace mg5amcCpu // *** DIAGRAM 236 OF 1240 *** // Wavefunction(s) for diagram number 236 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[73] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[79] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[80] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] ); // Amplitude(s) for diagram number 236 - VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4899,7 +4899,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4908,7 +4908,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -4924,17 +4924,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 237 - FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -4946,17 +4946,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 238 - FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -4965,12 +4965,12 @@ namespace mg5amcCpu // *** DIAGRAM 239 OF 1240 *** // Wavefunction(s) for diagram number 239 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[57] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[81] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[82] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] ); // Amplitude(s) for diagram number 239 - VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4979,7 +4979,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -4988,7 +4988,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5004,17 +5004,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 240 - FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -5026,17 +5026,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 241 - FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -5045,12 +5045,12 @@ namespace mg5amcCpu // *** DIAGRAM 242 OF 1240 *** // Wavefunction(s) for diagram number 242 - VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[55] ); - VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[83] ); - VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[84] ); + VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] ); + VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] ); + VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] ); // Amplitude(s) for diagram number 242 - VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5059,7 +5059,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5068,7 +5068,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5084,17 +5084,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 243 - FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -5106,17 +5106,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 244 - FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -5128,17 +5128,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 245 - FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -5150,7 +5150,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 246 - VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5159,7 +5159,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5168,7 +5168,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5181,13 +5181,13 @@ namespace mg5amcCpu // *** DIAGRAM 247 OF 1240 *** // Wavefunction(s) for diagram number 247 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); - FFV1_2( w_fp[62], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[77], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 247 - FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 247 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5197,10 +5197,10 @@ namespace mg5amcCpu // *** DIAGRAM 248 OF 1240 *** // Wavefunction(s) for diagram number 248 - FFV1_1( w_fp[77], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[85] ); + FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] ); // Amplitude(s) for diagram number 248 - FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 248 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5210,11 +5210,11 @@ namespace mg5amcCpu // *** DIAGRAM 249 OF 1240 *** // Wavefunction(s) for diagram number 249 - FFV1_2( w_fp[62], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); - FFV1_1( w_fp[77], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[87] ); + FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); + FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] ); // Amplitude(s) for diagram number 249 - FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 249 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5227,7 +5227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 250 - FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 250 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5237,10 +5237,10 @@ namespace mg5amcCpu // *** DIAGRAM 251 OF 1240 *** // Wavefunction(s) for diagram number 251 - FFV1_2( w_fp[62], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 251 - FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 251 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5253,7 +5253,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 252 - FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 252 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5263,10 +5263,10 @@ namespace mg5amcCpu // *** DIAGRAM 253 OF 1240 *** // Wavefunction(s) for diagram number 253 - FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 0., 0., w_fp[89] ); + FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] ); // Amplitude(s) for diagram number 253 - VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 253 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5279,10 +5279,10 @@ namespace mg5amcCpu // *** DIAGRAM 254 OF 1240 *** // Wavefunction(s) for diagram number 254 - FFV1_2( w_fp[62], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 254 - FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 254 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5296,7 +5296,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 255 - FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 255 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5310,7 +5310,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 256 - VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 256 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5323,10 +5323,10 @@ namespace mg5amcCpu // *** DIAGRAM 257 OF 1240 *** // Wavefunction(s) for diagram number 257 - FFV1_2( w_fp[62], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); + FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 257 - FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 257 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5340,7 +5340,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 258 - FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 258 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5354,7 +5354,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 259 - VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 259 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5370,7 +5370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 260 - FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 260 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5381,10 +5381,10 @@ namespace mg5amcCpu // *** DIAGRAM 261 OF 1240 *** // Wavefunction(s) for diagram number 261 - FFV1_2( w_fp[62], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); + FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 261 - FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 261 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5398,17 +5398,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 262 - FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[33] += amp_sv[0]; jamp_sv[35] -= amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[35] -= amp_sv[0]; jamp_sv[39] += amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[45] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[33] -= amp_sv[0]; jamp_sv[39] += amp_sv[0]; jamp_sv[45] += amp_sv[0]; @@ -5417,10 +5417,10 @@ namespace mg5amcCpu // *** DIAGRAM 263 OF 1240 *** // Wavefunction(s) for diagram number 263 - FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 0., 0., w_fp[92] ); + FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] ); // Amplitude(s) for diagram number 263 - VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 263 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5440,7 +5440,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 264 - VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 264 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5460,7 +5460,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 265 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5469,7 +5469,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; @@ -5478,7 +5478,7 @@ namespace mg5amcCpu jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5491,10 +5491,10 @@ namespace mg5amcCpu // *** DIAGRAM 266 OF 1240 *** // Wavefunction(s) for diagram number 266 - FFV1_1( w_fp[2], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[93] ); + FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] ); // Amplitude(s) for diagram number 266 - FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 266 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5508,7 +5508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 267 - FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 267 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5524,7 +5524,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 268 - FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 268 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5538,7 +5538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 269 - FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 269 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5551,10 +5551,10 @@ namespace mg5amcCpu // *** DIAGRAM 270 OF 1240 *** // Wavefunction(s) for diagram number 270 - FFV1_2( w_fp[62], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); + FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 270 - FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 270 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5565,10 +5565,10 @@ namespace mg5amcCpu // *** DIAGRAM 271 OF 1240 *** // Wavefunction(s) for diagram number 271 - FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 0., 0., w_fp[95] ); + FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 271 - VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 271 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5584,7 +5584,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 272 - FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 272 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5598,7 +5598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 273 - FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 273 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5609,10 +5609,10 @@ namespace mg5amcCpu // *** DIAGRAM 274 OF 1240 *** // Wavefunction(s) for diagram number 274 - FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 0., 0., w_fp[96] ); + FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 274 - VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 274 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5628,7 +5628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 275 - FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 275 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5642,7 +5642,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 276 - FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 276 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5658,7 +5658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 277 - VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 277 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5678,7 +5678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 278 - FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 278 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5694,7 +5694,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 279 - VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 279 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5714,7 +5714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 280 - VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 280 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5734,7 +5734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 281 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5743,7 +5743,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5752,7 +5752,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -5765,10 +5765,10 @@ namespace mg5amcCpu // *** DIAGRAM 282 OF 1240 *** // Wavefunction(s) for diagram number 282 - FFV1_1( w_fp[2], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); + FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 282 - FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 282 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5782,7 +5782,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 283 - FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 283 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5798,7 +5798,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 284 - FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 284 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5812,7 +5812,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 285 - FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 285 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5825,10 +5825,10 @@ namespace mg5amcCpu // *** DIAGRAM 286 OF 1240 *** // Wavefunction(s) for diagram number 286 - FFV1_2( w_fp[62], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); + FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 286 - FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 286 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5839,10 +5839,10 @@ namespace mg5amcCpu // *** DIAGRAM 287 OF 1240 *** // Wavefunction(s) for diagram number 287 - FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 0., 0., w_fp[98] ); + FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 287 - VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 287 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5858,7 +5858,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 288 - FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 288 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5872,7 +5872,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 289 - FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 289 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5886,7 +5886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 290 - VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 290 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5902,7 +5902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 291 - FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 291 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5916,7 +5916,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 292 - FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 292 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5932,7 +5932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 293 - VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 293 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5952,7 +5952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 294 - FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 294 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5968,7 +5968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 295 - VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 295 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -5988,7 +5988,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 296 - VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 296 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6008,7 +6008,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 297 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6017,7 +6017,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6026,7 +6026,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6039,10 +6039,10 @@ namespace mg5amcCpu // *** DIAGRAM 298 OF 1240 *** // Wavefunction(s) for diagram number 298 - FFV1_1( w_fp[2], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); + FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 298 - FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 298 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6056,7 +6056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 299 - FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 299 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6072,7 +6072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 300 - FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 300 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6086,7 +6086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 301 - FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 301 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6099,10 +6099,10 @@ namespace mg5amcCpu // *** DIAGRAM 302 OF 1240 *** // Wavefunction(s) for diagram number 302 - FFV1_2( w_fp[62], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 302 - FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 302 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6116,7 +6116,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 303 - VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 303 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6132,7 +6132,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 304 - FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 304 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6146,7 +6146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 305 - FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 305 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6160,7 +6160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 306 - VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 306 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6176,7 +6176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 307 - FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 307 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6190,7 +6190,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 308 - FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 308 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6206,7 +6206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 309 - VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 309 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6226,7 +6226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 310 - FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 310 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6239,10 +6239,10 @@ namespace mg5amcCpu // *** DIAGRAM 311 OF 1240 *** // Wavefunction(s) for diagram number 311 - FFV1_2( w_fp[62], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 311 - FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 311 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6255,7 +6255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 312 - FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 312 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6265,10 +6265,10 @@ namespace mg5amcCpu // *** DIAGRAM 313 OF 1240 *** // Wavefunction(s) for diagram number 313 - FFV1_1( w_fp[33], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[100] ); + FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] ); // Amplitude(s) for diagram number 313 - FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 313 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6281,7 +6281,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 314 - FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 314 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6294,7 +6294,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 315 - FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 315 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6307,7 +6307,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 316 - FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 316 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6320,7 +6320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 317 - FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 317 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6334,7 +6334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 318 - VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 318 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6350,7 +6350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 319 - FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 319 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6364,7 +6364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 320 - FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 320 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6377,7 +6377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 321 - FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 321 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6387,10 +6387,10 @@ namespace mg5amcCpu // *** DIAGRAM 322 OF 1240 *** // Wavefunction(s) for diagram number 322 - FFV1_1( w_fp[39], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); + FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 322 - FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 322 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6403,7 +6403,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 323 - FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 323 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6416,7 +6416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 324 - FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 324 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6429,7 +6429,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 325 - FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 325 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6442,7 +6442,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 326 - FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 326 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6456,7 +6456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 327 - VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 327 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6472,7 +6472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 328 - FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 328 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6486,7 +6486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 329 - FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 329 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6499,7 +6499,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 330 - FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 330 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6509,10 +6509,10 @@ namespace mg5amcCpu // *** DIAGRAM 331 OF 1240 *** // Wavefunction(s) for diagram number 331 - FFV1_1( w_fp[47], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); + FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 331 - FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 331 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6525,7 +6525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 332 - FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 332 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6538,7 +6538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 333 - FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 333 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6551,7 +6551,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 334 - FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 334 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6564,7 +6564,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 335 - FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 335 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6578,7 +6578,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 336 - VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 336 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6594,7 +6594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 337 - FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 337 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6608,7 +6608,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 338 - FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 338 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6622,7 +6622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 339 - FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 339 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6638,7 +6638,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 340 - VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 340 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6658,7 +6658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 341 - VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 341 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6678,7 +6678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 342 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6687,7 +6687,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6696,7 +6696,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6712,7 +6712,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 343 - FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 343 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6728,7 +6728,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 344 - FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 344 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6742,7 +6742,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 345 - FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 345 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6756,7 +6756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 346 - FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 346 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6772,7 +6772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 347 - VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 347 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6792,7 +6792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 348 - VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 348 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6812,7 +6812,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 349 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6821,7 +6821,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6830,7 +6830,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6846,7 +6846,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 350 - FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 350 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6862,7 +6862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 351 - FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 351 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6876,7 +6876,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 352 - FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 352 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6890,7 +6890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 353 - FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 353 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6906,7 +6906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 354 - VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 354 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6926,7 +6926,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 355 - VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 355 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6946,7 +6946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 356 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6955,7 +6955,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; @@ -6964,7 +6964,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -6980,7 +6980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 357 - FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 357 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -6996,7 +6996,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 358 - FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 358 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7010,7 +7010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 359 - VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7019,7 +7019,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7028,7 +7028,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7044,17 +7044,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 360 - FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[33] += amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[39] -= amp_sv[0]; jamp_sv[57] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[81] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[33] -= amp_sv[0]; jamp_sv[57] += amp_sv[0]; jamp_sv[81] += amp_sv[0]; @@ -7066,17 +7066,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 361 - FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[107] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[105] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; @@ -7088,7 +7088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 362 - VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7097,7 +7097,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7106,7 +7106,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7122,17 +7122,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 363 - FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[35] += amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[45] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[35] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; @@ -7144,17 +7144,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 364 - FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[81] += amp_sv[0]; jamp_sv[83] -= amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[83] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[81] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; @@ -7166,7 +7166,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 365 - VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7175,7 +7175,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7184,7 +7184,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7200,17 +7200,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 366 - FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[41] += amp_sv[0]; jamp_sv[47] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[47] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[41] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; jamp_sv[107] += amp_sv[0]; @@ -7222,17 +7222,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 367 - FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[57] += amp_sv[0]; jamp_sv[59] -= amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[59] -= amp_sv[0]; jamp_sv[63] += amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[69] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[57] -= amp_sv[0]; jamp_sv[63] += amp_sv[0]; jamp_sv[69] += amp_sv[0]; @@ -7244,17 +7244,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 368 - FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[65] += amp_sv[0]; jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[71] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[65] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; @@ -7266,7 +7266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 369 - VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7275,7 +7275,7 @@ namespace mg5amcCpu jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7284,7 +7284,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7297,11 +7297,11 @@ namespace mg5amcCpu // *** DIAGRAM 370 OF 1240 *** // Wavefunction(s) for diagram number 370 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 370 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 370 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7315,7 +7315,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 371 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 371 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7326,11 +7326,11 @@ namespace mg5amcCpu // *** DIAGRAM 372 OF 1240 *** // Wavefunction(s) for diagram number 372 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[62] ); - FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 0., 0., w_fp[34] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] ); + FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] ); // Amplitude(s) for diagram number 372 - VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 372 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7350,7 +7350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 373 - FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 373 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7363,10 +7363,10 @@ namespace mg5amcCpu // *** DIAGRAM 374 OF 1240 *** // Wavefunction(s) for diagram number 374 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 374 - VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 374 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7386,7 +7386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 375 - FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 375 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7399,12 +7399,12 @@ namespace mg5amcCpu // *** DIAGRAM 376 OF 1240 *** // Wavefunction(s) for diagram number 376 - VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); - VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); + VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); + VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 376 - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7413,7 +7413,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7422,7 +7422,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7435,10 +7435,10 @@ namespace mg5amcCpu // *** DIAGRAM 377 OF 1240 *** // Wavefunction(s) for diagram number 377 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[95] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] ); // Amplitude(s) for diagram number 377 - FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 377 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7449,10 +7449,10 @@ namespace mg5amcCpu // *** DIAGRAM 378 OF 1240 *** // Wavefunction(s) for diagram number 378 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 378 - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 378 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7466,7 +7466,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 379 - FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 379 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7482,7 +7482,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 380 - FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 380 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7493,10 +7493,10 @@ namespace mg5amcCpu // *** DIAGRAM 381 OF 1240 *** // Wavefunction(s) for diagram number 381 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[101] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] ); // Amplitude(s) for diagram number 381 - FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 381 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7510,7 +7510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 382 - FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 382 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7526,7 +7526,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 383 - FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 383 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7542,7 +7542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 384 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 384 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7555,10 +7555,10 @@ namespace mg5amcCpu // *** DIAGRAM 385 OF 1240 *** // Wavefunction(s) for diagram number 385 - VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 0., 0., w_fp[95] ); + VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 385 - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 385 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7575,10 +7575,10 @@ namespace mg5amcCpu // *** DIAGRAM 386 OF 1240 *** // Wavefunction(s) for diagram number 386 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 386 - FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 386 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7592,7 +7592,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 387 - FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 387 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7603,10 +7603,10 @@ namespace mg5amcCpu // *** DIAGRAM 388 OF 1240 *** // Wavefunction(s) for diagram number 388 - FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 0., 0., w_fp[103] ); + FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] ); // Amplitude(s) for diagram number 388 - VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 388 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7626,7 +7626,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 389 - FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 389 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7642,7 +7642,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 390 - VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 390 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7662,7 +7662,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 391 - FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 391 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7678,7 +7678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 392 - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -7687,7 +7687,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7696,7 +7696,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -7709,10 +7709,10 @@ namespace mg5amcCpu // *** DIAGRAM 393 OF 1240 *** // Wavefunction(s) for diagram number 393 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 393 - FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 393 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7723,10 +7723,10 @@ namespace mg5amcCpu // *** DIAGRAM 394 OF 1240 *** // Wavefunction(s) for diagram number 394 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[105] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] ); // Amplitude(s) for diagram number 394 - FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 394 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7740,7 +7740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 395 - FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 395 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7756,7 +7756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 396 - FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 396 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7767,10 +7767,10 @@ namespace mg5amcCpu // *** DIAGRAM 397 OF 1240 *** // Wavefunction(s) for diagram number 397 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 397 - FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 397 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7784,7 +7784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 398 - FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 398 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7800,7 +7800,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 399 - FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 399 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7816,7 +7816,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 400 - FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 400 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7832,7 +7832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 401 - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 401 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7852,7 +7852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 402 - FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 402 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7868,7 +7868,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 403 - FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 403 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7888,7 +7888,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 404 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 404 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7904,7 +7904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 405 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 405 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7924,7 +7924,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 406 - FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 406 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7944,7 +7944,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 407 - FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 407 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -7964,7 +7964,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 408 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -7981,7 +7981,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -7998,7 +7998,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -8019,10 +8019,10 @@ namespace mg5amcCpu // *** DIAGRAM 409 OF 1240 *** // Wavefunction(s) for diagram number 409 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 409 - VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 409 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8047,10 +8047,10 @@ namespace mg5amcCpu // *** DIAGRAM 410 OF 1240 *** // Wavefunction(s) for diagram number 410 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[107] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 410 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 410 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8078,7 +8078,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 411 - VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 411 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8106,7 +8106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 412 - FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 412 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8126,7 +8126,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 413 - FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 413 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8142,7 +8142,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 414 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 414 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8158,7 +8158,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 415 - FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 415 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8178,7 +8178,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 416 - FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 416 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8194,7 +8194,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 417 - FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 417 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8210,7 +8210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 418 - FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 418 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8226,7 +8226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 419 - FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 419 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8246,7 +8246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 420 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 420 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8262,7 +8262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 421 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 421 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8282,7 +8282,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 422 - FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 422 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8302,7 +8302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 423 - FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 423 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8322,7 +8322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 424 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -8339,7 +8339,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -8356,7 +8356,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -8377,10 +8377,10 @@ namespace mg5amcCpu // *** DIAGRAM 425 OF 1240 *** // Wavefunction(s) for diagram number 425 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 425 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 425 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8408,7 +8408,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 426 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 426 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8436,7 +8436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 427 - VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 427 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8464,7 +8464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 428 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 428 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8484,7 +8484,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 429 - FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 429 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8500,7 +8500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 430 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 430 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8516,7 +8516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 431 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 431 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8536,7 +8536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 432 - FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 432 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8552,7 +8552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 433 - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 433 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8565,10 +8565,10 @@ namespace mg5amcCpu // *** DIAGRAM 434 OF 1240 *** // Wavefunction(s) for diagram number 434 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 434 - VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 434 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8596,7 +8596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 435 - VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 435 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8624,7 +8624,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 436 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -8641,7 +8641,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8658,7 +8658,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -8679,10 +8679,10 @@ namespace mg5amcCpu // *** DIAGRAM 437 OF 1240 *** // Wavefunction(s) for diagram number 437 - VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 0., 0., w_fp[108] ); + VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] ); // Amplitude(s) for diagram number 437 - VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 437 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8710,7 +8710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 438 - VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 438 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8738,7 +8738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 439 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -8755,7 +8755,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[115] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -8772,7 +8772,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8796,7 +8796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 440 - VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 440 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8824,7 +8824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 441 - VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 441 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -8852,7 +8852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 442 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -8869,7 +8869,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -8886,7 +8886,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -8907,12 +8907,12 @@ namespace mg5amcCpu // *** DIAGRAM 443 OF 1240 *** // Wavefunction(s) for diagram number 443 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 443 - VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -8929,7 +8929,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8946,7 +8946,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -8967,12 +8967,12 @@ namespace mg5amcCpu // *** DIAGRAM 444 OF 1240 *** // Wavefunction(s) for diagram number 444 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[113] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[114] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] ); // Amplitude(s) for diagram number 444 - VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -8989,7 +8989,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -9006,7 +9006,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -9030,7 +9030,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 445 - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -9047,7 +9047,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -9064,7 +9064,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -9088,7 +9088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 446 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -9105,7 +9105,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; @@ -9122,7 +9122,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -9146,7 +9146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 447 - VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 447 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9174,7 +9174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 448 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 448 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9202,7 +9202,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 449 - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 449 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9230,7 +9230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 450 - VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 450 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9250,7 +9250,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 451 - FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 451 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9266,7 +9266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 452 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 452 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9280,7 +9280,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 453 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 453 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9294,7 +9294,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 454 - FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 454 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9310,7 +9310,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 455 - VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 455 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9330,7 +9330,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 456 - FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9339,7 +9339,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9348,7 +9348,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &_fp[0] ); jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9364,7 +9364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 457 - FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 457 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9380,7 +9380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 458 - FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 458 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9394,7 +9394,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 459 - FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 459 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9408,7 +9408,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 460 - VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 460 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9428,7 +9428,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 461 - FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 461 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9444,7 +9444,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 462 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 462 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9458,7 +9458,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 463 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 463 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9472,7 +9472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 464 - FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 464 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9488,7 +9488,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 465 - VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 465 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9508,7 +9508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 466 - FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9517,7 +9517,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9526,7 +9526,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9542,7 +9542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 467 - FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 467 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9558,7 +9558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 468 - FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 468 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9572,7 +9572,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 469 - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 469 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9586,7 +9586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 470 - VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 470 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9606,7 +9606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 471 - FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 471 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9622,7 +9622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 472 - FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 472 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9636,7 +9636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 473 - FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 473 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9650,7 +9650,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 474 - FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 474 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9666,7 +9666,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 475 - VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 475 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9686,7 +9686,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 476 - FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9695,7 +9695,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9704,7 +9704,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9720,7 +9720,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 477 - VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 477 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9740,7 +9740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 478 - FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 478 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9756,7 +9756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 479 - FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 479 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9770,7 +9770,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 480 - FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 480 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9784,7 +9784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 481 - FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 481 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9800,7 +9800,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 482 - VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 482 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9820,7 +9820,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 483 - FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9829,7 +9829,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9838,7 +9838,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9854,7 +9854,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 484 - FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 484 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9874,7 +9874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 485 - FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 485 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9894,7 +9894,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 486 - FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 486 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9914,7 +9914,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 487 - FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 487 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9930,7 +9930,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 488 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 488 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9950,7 +9950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 489 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 489 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -9966,7 +9966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 490 - FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9975,7 +9975,7 @@ namespace mg5amcCpu jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; @@ -9984,7 +9984,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -10000,7 +10000,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 491 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10009,7 +10009,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -10018,7 +10018,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -10034,7 +10034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 492 - VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -10051,7 +10051,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -10068,7 +10068,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -10089,11 +10089,11 @@ namespace mg5amcCpu // *** DIAGRAM 493 OF 1240 *** // Wavefunction(s) for diagram number 493 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 493 - FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 493 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10107,7 +10107,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 494 - FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 494 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10118,10 +10118,10 @@ namespace mg5amcCpu // *** DIAGRAM 495 OF 1240 *** // Wavefunction(s) for diagram number 495 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 495 - VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 495 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10141,7 +10141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 496 - FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 496 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10154,10 +10154,10 @@ namespace mg5amcCpu // *** DIAGRAM 497 OF 1240 *** // Wavefunction(s) for diagram number 497 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 497 - VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 497 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10177,7 +10177,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 498 - FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 498 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10190,12 +10190,12 @@ namespace mg5amcCpu // *** DIAGRAM 499 OF 1240 *** // Wavefunction(s) for diagram number 499 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 499 - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10204,7 +10204,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10213,7 +10213,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10226,10 +10226,10 @@ namespace mg5amcCpu // *** DIAGRAM 500 OF 1240 *** // Wavefunction(s) for diagram number 500 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 500 - FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 500 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10240,10 +10240,10 @@ namespace mg5amcCpu // *** DIAGRAM 501 OF 1240 *** // Wavefunction(s) for diagram number 501 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 501 - FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 501 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10257,7 +10257,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 502 - FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 502 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10273,7 +10273,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 503 - FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 503 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10284,10 +10284,10 @@ namespace mg5amcCpu // *** DIAGRAM 504 OF 1240 *** // Wavefunction(s) for diagram number 504 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 504 - FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 504 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10301,7 +10301,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 505 - FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 505 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10317,7 +10317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 506 - FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 506 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10333,7 +10333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 507 - FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 507 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10346,10 +10346,10 @@ namespace mg5amcCpu // *** DIAGRAM 508 OF 1240 *** // Wavefunction(s) for diagram number 508 - VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] ); // Amplitude(s) for diagram number 508 - FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 508 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10366,10 +10366,10 @@ namespace mg5amcCpu // *** DIAGRAM 509 OF 1240 *** // Wavefunction(s) for diagram number 509 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[112] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] ); // Amplitude(s) for diagram number 509 - FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 509 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10383,7 +10383,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 510 - FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 510 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10397,7 +10397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 511 - VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 511 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10417,7 +10417,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 512 - FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 512 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10433,7 +10433,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 513 - VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 513 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10453,7 +10453,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 514 - FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 514 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10469,7 +10469,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 515 - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10478,7 +10478,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10487,7 +10487,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -10500,10 +10500,10 @@ namespace mg5amcCpu // *** DIAGRAM 516 OF 1240 *** // Wavefunction(s) for diagram number 516 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); // Amplitude(s) for diagram number 516 - FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 516 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10514,10 +10514,10 @@ namespace mg5amcCpu // *** DIAGRAM 517 OF 1240 *** // Wavefunction(s) for diagram number 517 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 517 - FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 517 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10531,7 +10531,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 518 - FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 518 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10547,7 +10547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 519 - FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 519 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10558,10 +10558,10 @@ namespace mg5amcCpu // *** DIAGRAM 520 OF 1240 *** // Wavefunction(s) for diagram number 520 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 520 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 520 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10575,7 +10575,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 521 - FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 521 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10591,7 +10591,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 522 - FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 522 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10607,7 +10607,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 523 - FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 523 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10623,7 +10623,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 524 - FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 524 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10643,7 +10643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 525 - FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 525 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10659,7 +10659,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 526 - FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 526 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10679,7 +10679,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 527 - FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 527 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10695,7 +10695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 528 - FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 528 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10715,7 +10715,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 529 - FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 529 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10735,7 +10735,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 530 - FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 530 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10755,7 +10755,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 531 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -10772,7 +10772,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -10789,7 +10789,7 @@ namespace mg5amcCpu jamp_sv[105] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -10810,10 +10810,10 @@ namespace mg5amcCpu // *** DIAGRAM 532 OF 1240 *** // Wavefunction(s) for diagram number 532 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 532 - VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 532 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10838,10 +10838,10 @@ namespace mg5amcCpu // *** DIAGRAM 533 OF 1240 *** // Wavefunction(s) for diagram number 533 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 533 - VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 533 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10869,7 +10869,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 534 - VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 534 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10897,7 +10897,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 535 - FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 535 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10917,7 +10917,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 536 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 536 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10933,7 +10933,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 537 - FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 537 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10949,7 +10949,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 538 - FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 538 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10969,7 +10969,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 539 - FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 539 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -10985,7 +10985,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 540 - FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 540 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11001,7 +11001,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 541 - FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 541 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11017,7 +11017,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 542 - FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 542 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11037,7 +11037,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 543 - FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 543 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11053,7 +11053,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 544 - FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 544 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11073,7 +11073,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 545 - FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 545 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11093,7 +11093,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 546 - FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 546 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11113,7 +11113,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 547 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -11130,7 +11130,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -11147,7 +11147,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -11168,10 +11168,10 @@ namespace mg5amcCpu // *** DIAGRAM 548 OF 1240 *** // Wavefunction(s) for diagram number 548 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 548 - VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 548 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11199,7 +11199,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 549 - VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 549 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11227,7 +11227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 550 - VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 550 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11255,7 +11255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 551 - FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 551 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11275,7 +11275,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 552 - FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 552 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11291,7 +11291,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 553 - FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 553 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11307,7 +11307,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 554 - FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 554 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11327,7 +11327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 555 - FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 555 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11343,7 +11343,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 556 - FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 556 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11356,10 +11356,10 @@ namespace mg5amcCpu // *** DIAGRAM 557 OF 1240 *** // Wavefunction(s) for diagram number 557 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 557 - VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 557 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11387,7 +11387,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 558 - VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 558 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11415,7 +11415,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 559 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11432,7 +11432,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -11449,7 +11449,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -11473,7 +11473,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 560 - VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 560 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11501,7 +11501,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 561 - VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 561 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11529,7 +11529,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 562 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11546,7 +11546,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -11563,7 +11563,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -11587,7 +11587,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 563 - VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 563 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11615,7 +11615,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 564 - VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 564 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11643,7 +11643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 565 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11660,7 +11660,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11677,7 +11677,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11698,12 +11698,12 @@ namespace mg5amcCpu // *** DIAGRAM 566 OF 1240 *** // Wavefunction(s) for diagram number 566 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 566 - VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -11720,7 +11720,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -11737,7 +11737,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -11758,12 +11758,12 @@ namespace mg5amcCpu // *** DIAGRAM 567 OF 1240 *** // Wavefunction(s) for diagram number 567 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); // Amplitude(s) for diagram number 567 - VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -11780,7 +11780,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11797,7 +11797,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -11821,7 +11821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 568 - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -11838,7 +11838,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11855,7 +11855,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -11879,7 +11879,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 569 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[26] -= amp_sv[0]; @@ -11896,7 +11896,7 @@ namespace mg5amcCpu jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; @@ -11913,7 +11913,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -11937,7 +11937,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 570 - VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 570 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11965,7 +11965,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 571 - VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 571 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -11993,7 +11993,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 572 - VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 572 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12021,7 +12021,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 573 - VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 573 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12041,7 +12041,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 574 - FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 574 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12057,7 +12057,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 575 - FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 575 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12071,7 +12071,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 576 - FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 576 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12085,7 +12085,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 577 - FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 577 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12101,7 +12101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 578 - VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 578 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12121,7 +12121,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 579 - FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12130,7 +12130,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12139,7 +12139,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12155,7 +12155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 580 - FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 580 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12171,7 +12171,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 581 - FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 581 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12185,7 +12185,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 582 - FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 582 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12199,7 +12199,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 583 - VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 583 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12219,7 +12219,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 584 - FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 584 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12235,7 +12235,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 585 - FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 585 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12249,7 +12249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 586 - FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 586 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12263,7 +12263,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 587 - FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 587 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12279,7 +12279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 588 - VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 588 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12299,7 +12299,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 589 - FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12308,7 +12308,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12317,7 +12317,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12333,7 +12333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 590 - FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 590 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12349,7 +12349,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 591 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 591 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12363,7 +12363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 592 - FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 592 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12377,7 +12377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 593 - VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 593 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12397,7 +12397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 594 - FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 594 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12413,7 +12413,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 595 - FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 595 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12427,7 +12427,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 596 - FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 596 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12441,7 +12441,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 597 - FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 597 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12457,7 +12457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 598 - VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 598 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12477,7 +12477,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 599 - FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12486,7 +12486,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12495,7 +12495,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12511,7 +12511,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 600 - VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 600 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12531,7 +12531,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 601 - FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 601 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12547,7 +12547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 602 - FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 602 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12561,7 +12561,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 603 - FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 603 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12575,7 +12575,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 604 - FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 604 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12591,7 +12591,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 605 - VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 605 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12611,7 +12611,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 606 - FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12620,7 +12620,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12629,7 +12629,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12645,7 +12645,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 607 - FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 607 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12665,7 +12665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 608 - FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 608 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12685,7 +12685,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 609 - FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 609 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12705,7 +12705,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 610 - FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 610 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12721,7 +12721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 611 - FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 611 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12741,7 +12741,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 612 - FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 612 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12757,7 +12757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 613 - FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12766,7 +12766,7 @@ namespace mg5amcCpu jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12775,7 +12775,7 @@ namespace mg5amcCpu jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -12791,7 +12791,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 614 - FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12800,7 +12800,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -12809,7 +12809,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -12825,7 +12825,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 615 - VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -12842,7 +12842,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -12859,7 +12859,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -12880,11 +12880,11 @@ namespace mg5amcCpu // *** DIAGRAM 616 OF 1240 *** // Wavefunction(s) for diagram number 616 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 616 - FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 616 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12898,7 +12898,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 617 - FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 617 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12909,10 +12909,10 @@ namespace mg5amcCpu // *** DIAGRAM 618 OF 1240 *** // Wavefunction(s) for diagram number 618 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[112] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] ); // Amplitude(s) for diagram number 618 - VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 618 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12932,7 +12932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 619 - FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 619 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12945,10 +12945,10 @@ namespace mg5amcCpu // *** DIAGRAM 620 OF 1240 *** // Wavefunction(s) for diagram number 620 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 620 - VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 620 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12968,7 +12968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 621 - FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 621 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -12981,12 +12981,12 @@ namespace mg5amcCpu // *** DIAGRAM 622 OF 1240 *** // Wavefunction(s) for diagram number 622 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 622 - FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -12995,7 +12995,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13004,7 +13004,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13017,10 +13017,10 @@ namespace mg5amcCpu // *** DIAGRAM 623 OF 1240 *** // Wavefunction(s) for diagram number 623 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 623 - FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 623 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13031,10 +13031,10 @@ namespace mg5amcCpu // *** DIAGRAM 624 OF 1240 *** // Wavefunction(s) for diagram number 624 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 624 - FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 624 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13048,7 +13048,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 625 - FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 625 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13064,7 +13064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 626 - FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 626 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13075,10 +13075,10 @@ namespace mg5amcCpu // *** DIAGRAM 627 OF 1240 *** // Wavefunction(s) for diagram number 627 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 627 - FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 627 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13092,7 +13092,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 628 - FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 628 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13108,7 +13108,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 629 - FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 629 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13124,7 +13124,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 630 - FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 630 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13137,10 +13137,10 @@ namespace mg5amcCpu // *** DIAGRAM 631 OF 1240 *** // Wavefunction(s) for diagram number 631 - VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 631 - FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 631 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13157,10 +13157,10 @@ namespace mg5amcCpu // *** DIAGRAM 632 OF 1240 *** // Wavefunction(s) for diagram number 632 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[96] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] ); // Amplitude(s) for diagram number 632 - FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 632 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13174,7 +13174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 633 - FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 633 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13188,7 +13188,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 634 - VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 634 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13208,7 +13208,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 635 - FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 635 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13224,7 +13224,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 636 - VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 636 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13244,7 +13244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 637 - FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 637 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13260,7 +13260,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 638 - FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13269,7 +13269,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13278,7 +13278,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -13291,10 +13291,10 @@ namespace mg5amcCpu // *** DIAGRAM 639 OF 1240 *** // Wavefunction(s) for diagram number 639 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 639 - FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 639 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13305,10 +13305,10 @@ namespace mg5amcCpu // *** DIAGRAM 640 OF 1240 *** // Wavefunction(s) for diagram number 640 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 640 - FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 640 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13322,7 +13322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 641 - FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 641 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13338,7 +13338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 642 - FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 642 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13349,10 +13349,10 @@ namespace mg5amcCpu // *** DIAGRAM 643 OF 1240 *** // Wavefunction(s) for diagram number 643 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 643 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 643 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13366,7 +13366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 644 - FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 644 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13382,7 +13382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 645 - FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 645 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13398,7 +13398,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 646 - FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 646 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13414,7 +13414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 647 - FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 647 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13434,7 +13434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 648 - FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 648 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13450,7 +13450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 649 - FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 649 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13470,7 +13470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 650 - FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 650 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13486,7 +13486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 651 - FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 651 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13506,7 +13506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 652 - FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 652 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13526,7 +13526,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 653 - FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 653 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13546,7 +13546,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 654 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -13563,7 +13563,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -13580,7 +13580,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -13601,10 +13601,10 @@ namespace mg5amcCpu // *** DIAGRAM 655 OF 1240 *** // Wavefunction(s) for diagram number 655 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 655 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 655 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13629,10 +13629,10 @@ namespace mg5amcCpu // *** DIAGRAM 656 OF 1240 *** // Wavefunction(s) for diagram number 656 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[113] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] ); // Amplitude(s) for diagram number 656 - VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 656 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13660,7 +13660,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 657 - VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 657 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13688,7 +13688,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 658 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 658 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13708,7 +13708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 659 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 659 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13724,7 +13724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 660 - FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 660 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13740,7 +13740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 661 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 661 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13760,7 +13760,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 662 - FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 662 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13776,7 +13776,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 663 - FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 663 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13792,7 +13792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 664 - FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 664 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13808,7 +13808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 665 - FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 665 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13828,7 +13828,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 666 - FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 666 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13844,7 +13844,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 667 - FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 667 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13864,7 +13864,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 668 - FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 668 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13884,7 +13884,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 669 - FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 669 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13904,7 +13904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 670 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -13921,7 +13921,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -13938,7 +13938,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -13959,10 +13959,10 @@ namespace mg5amcCpu // *** DIAGRAM 671 OF 1240 *** // Wavefunction(s) for diagram number 671 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 671 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 671 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -13990,7 +13990,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 672 - VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 672 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14018,7 +14018,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 673 - VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 673 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14046,7 +14046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 674 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 674 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14066,7 +14066,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 675 - FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 675 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14082,7 +14082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 676 - FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 676 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14098,7 +14098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 677 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 677 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14118,7 +14118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 678 - FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 678 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14134,7 +14134,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 679 - FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 679 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14147,10 +14147,10 @@ namespace mg5amcCpu // *** DIAGRAM 680 OF 1240 *** // Wavefunction(s) for diagram number 680 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 680 - VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 680 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14178,7 +14178,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 681 - VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 681 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14206,7 +14206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 682 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -14223,7 +14223,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -14240,7 +14240,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -14264,7 +14264,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 683 - VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 683 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14292,7 +14292,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 684 - VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 684 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14320,7 +14320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 685 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14337,7 +14337,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -14354,7 +14354,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -14378,7 +14378,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 686 - VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 686 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14406,7 +14406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 687 - VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 687 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14434,7 +14434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 688 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14451,7 +14451,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14468,7 +14468,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -14489,12 +14489,12 @@ namespace mg5amcCpu // *** DIAGRAM 689 OF 1240 *** // Wavefunction(s) for diagram number 689 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[62] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[101] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 689 - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -14511,7 +14511,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -14528,7 +14528,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -14549,12 +14549,12 @@ namespace mg5amcCpu // *** DIAGRAM 690 OF 1240 *** // Wavefunction(s) for diagram number 690 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 690 - VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -14571,7 +14571,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -14588,7 +14588,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -14612,7 +14612,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 691 - VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -14629,7 +14629,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14646,7 +14646,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -14670,7 +14670,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 692 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[28] -= amp_sv[0]; @@ -14687,7 +14687,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -14704,7 +14704,7 @@ namespace mg5amcCpu jamp_sv[97] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -14728,7 +14728,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 693 - VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 693 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14756,7 +14756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 694 - VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 694 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14784,7 +14784,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 695 - VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 695 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14812,7 +14812,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 696 - VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 696 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14832,7 +14832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 697 - FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 697 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14848,7 +14848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 698 - FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 698 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14862,7 +14862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 699 - FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 699 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14876,7 +14876,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 700 - FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 700 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14892,7 +14892,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 701 - VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 701 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14912,7 +14912,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 702 - FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -14921,7 +14921,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -14930,7 +14930,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -14946,7 +14946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 703 - FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 703 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14962,7 +14962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 704 - FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 704 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14976,7 +14976,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 705 - FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 705 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -14990,7 +14990,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 706 - VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 706 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15010,7 +15010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 707 - FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 707 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15026,7 +15026,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 708 - FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 708 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15040,7 +15040,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 709 - FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 709 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15054,7 +15054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 710 - FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 710 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15070,7 +15070,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 711 - VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 711 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15090,7 +15090,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 712 - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15099,7 +15099,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15108,7 +15108,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15124,7 +15124,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 713 - FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 713 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15140,7 +15140,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 714 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 714 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15154,7 +15154,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 715 - FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 715 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15168,7 +15168,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 716 - VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 716 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15188,7 +15188,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 717 - FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 717 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15204,7 +15204,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 718 - FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 718 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15218,7 +15218,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 719 - FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 719 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15232,7 +15232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 720 - FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 720 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15248,7 +15248,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 721 - VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 721 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15268,7 +15268,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 722 - FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15277,7 +15277,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15286,7 +15286,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15302,7 +15302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 723 - VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 723 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15322,7 +15322,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 724 - FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 724 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15338,7 +15338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 725 - FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 725 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15352,7 +15352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 726 - FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 726 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15366,7 +15366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 727 - FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 727 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15382,7 +15382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 728 - VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 728 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15402,7 +15402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 729 - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15411,7 +15411,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15420,7 +15420,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15436,7 +15436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 730 - FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 730 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15456,7 +15456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 731 - FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 731 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15476,7 +15476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 732 - FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 732 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15496,7 +15496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 733 - FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 733 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15512,7 +15512,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 734 - FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 734 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15532,7 +15532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 735 - FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 735 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15548,7 +15548,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 736 - FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15557,7 +15557,7 @@ namespace mg5amcCpu jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15566,7 +15566,7 @@ namespace mg5amcCpu jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -15582,7 +15582,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 737 - FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; @@ -15591,7 +15591,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -15600,7 +15600,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -15616,7 +15616,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 738 - VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -15633,7 +15633,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -15650,7 +15650,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -15671,10 +15671,10 @@ namespace mg5amcCpu // *** DIAGRAM 739 OF 1240 *** // Wavefunction(s) for diagram number 739 - FFV1_1( w_fp[77], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[92] ); + FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] ); // Amplitude(s) for diagram number 739 - FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 739 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15687,7 +15687,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 740 - FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 740 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15697,10 +15697,10 @@ namespace mg5amcCpu // *** DIAGRAM 741 OF 1240 *** // Wavefunction(s) for diagram number 741 - FFV1_2( w_fp[46], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 741 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 741 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15713,7 +15713,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 742 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 742 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15726,7 +15726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 743 - FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 743 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15739,7 +15739,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 744 - FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 744 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15752,7 +15752,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 745 - FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 745 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15766,7 +15766,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 746 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 746 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15777,10 +15777,10 @@ namespace mg5amcCpu // *** DIAGRAM 747 OF 1240 *** // Wavefunction(s) for diagram number 747 - VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 0., 0., w_fp[96] ); + VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 747 - FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 747 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15796,7 +15796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 748 - FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 748 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15809,7 +15809,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 749 - FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 749 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15819,10 +15819,10 @@ namespace mg5amcCpu // *** DIAGRAM 750 OF 1240 *** // Wavefunction(s) for diagram number 750 - FFV1_2( w_fp[38], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 750 - FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 750 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15835,7 +15835,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 751 - FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 751 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15848,7 +15848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 752 - FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 752 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15861,7 +15861,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 753 - FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 753 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15874,7 +15874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 754 - FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 754 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15888,7 +15888,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 755 - FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 755 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15899,10 +15899,10 @@ namespace mg5amcCpu // *** DIAGRAM 756 OF 1240 *** // Wavefunction(s) for diagram number 756 - VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 756 - FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 756 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15918,7 +15918,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 757 - FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 757 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15931,7 +15931,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 758 - FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 758 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15941,10 +15941,10 @@ namespace mg5amcCpu // *** DIAGRAM 759 OF 1240 *** // Wavefunction(s) for diagram number 759 - FFV1_2( w_fp[41], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); + FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 759 - FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 759 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15957,7 +15957,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 760 - FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 760 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15970,7 +15970,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 761 - FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 761 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15983,7 +15983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 762 - FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 762 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -15996,7 +15996,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 763 - FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 763 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16010,7 +16010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 764 - FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 764 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16021,10 +16021,10 @@ namespace mg5amcCpu // *** DIAGRAM 765 OF 1240 *** // Wavefunction(s) for diagram number 765 - VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 765 - FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 765 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16040,7 +16040,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 766 - FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 766 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16054,7 +16054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 767 - FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 767 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16070,7 +16070,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 768 - VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 768 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16090,7 +16090,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 769 - FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 769 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16106,7 +16106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 770 - VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 770 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16126,7 +16126,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 771 - FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 771 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16137,12 +16137,12 @@ namespace mg5amcCpu // *** DIAGRAM 772 OF 1240 *** // Wavefunction(s) for diagram number 772 - VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[85] ); - VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); - VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] ); + VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); + VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 772 - FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16151,7 +16151,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16160,7 +16160,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16176,7 +16176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 773 - FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 773 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16190,7 +16190,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 774 - FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 774 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16206,7 +16206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 775 - VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 775 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16226,7 +16226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 776 - FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 776 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16242,7 +16242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 777 - VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 777 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16262,7 +16262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 778 - FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 778 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16273,12 +16273,12 @@ namespace mg5amcCpu // *** DIAGRAM 779 OF 1240 *** // Wavefunction(s) for diagram number 779 - VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[9] ); - VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] ); + VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 779 - FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16287,7 +16287,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16296,7 +16296,7 @@ namespace mg5amcCpu jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16312,7 +16312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 780 - FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 780 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16326,7 +16326,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 781 - FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 781 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16342,7 +16342,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 782 - VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 782 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16362,7 +16362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 783 - FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 783 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16378,7 +16378,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 784 - VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 784 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16398,7 +16398,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 785 - FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 785 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16409,12 +16409,12 @@ namespace mg5amcCpu // *** DIAGRAM 786 OF 1240 *** // Wavefunction(s) for diagram number 786 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[87] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[34] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[86] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 786 - FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16423,7 +16423,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16432,7 +16432,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16448,17 +16448,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 787 - FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -16467,12 +16467,12 @@ namespace mg5amcCpu // *** DIAGRAM 788 OF 1240 *** // Wavefunction(s) for diagram number 788 - VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 0., 0., w_fp[92] ); - VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 0., 0., w_fp[88] ); - VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 0., 0., w_fp[106] ); + VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] ); + VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] ); + VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] ); // Amplitude(s) for diagram number 788 - FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16481,7 +16481,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16490,7 +16490,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16503,10 +16503,10 @@ namespace mg5amcCpu // *** DIAGRAM 789 OF 1240 *** // Wavefunction(s) for diagram number 789 - FFV1_2( w_fp[52], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 789 - FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 789 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16519,7 +16519,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 790 - FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 790 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16529,10 +16529,10 @@ namespace mg5amcCpu // *** DIAGRAM 791 OF 1240 *** // Wavefunction(s) for diagram number 791 - FFV1_1( w_fp[33], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 791 - FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 791 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16545,7 +16545,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 792 - FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 792 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16558,7 +16558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 793 - FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 793 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16571,7 +16571,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 794 - FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 794 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16584,7 +16584,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 795 - FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 795 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16598,7 +16598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 796 - FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 796 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16612,7 +16612,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 797 - FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 797 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16628,7 +16628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 798 - FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 798 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16641,7 +16641,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 799 - FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 799 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16651,10 +16651,10 @@ namespace mg5amcCpu // *** DIAGRAM 800 OF 1240 *** // Wavefunction(s) for diagram number 800 - FFV1_1( w_fp[39], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 800 - FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 800 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16667,7 +16667,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 801 - FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 801 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16680,7 +16680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 802 - FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 802 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16693,7 +16693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 803 - FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 803 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16706,7 +16706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 804 - FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 804 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16720,7 +16720,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 805 - FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 805 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16734,7 +16734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 806 - FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 806 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16750,7 +16750,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 807 - FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 807 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16763,7 +16763,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 808 - FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 808 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16773,10 +16773,10 @@ namespace mg5amcCpu // *** DIAGRAM 809 OF 1240 *** // Wavefunction(s) for diagram number 809 - FFV1_1( w_fp[47], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); + FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 809 - FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 809 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16789,7 +16789,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 810 - FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 810 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16802,7 +16802,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 811 - FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 811 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16815,7 +16815,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 812 - FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 812 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16828,7 +16828,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 813 - FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 813 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16842,7 +16842,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 814 - FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 814 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16856,7 +16856,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 815 - FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 815 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16872,7 +16872,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 816 - FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 816 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16886,7 +16886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 817 - FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 817 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16902,7 +16902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 818 - VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 818 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16922,7 +16922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 819 - FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 819 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16938,7 +16938,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 820 - VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 820 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16958,7 +16958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 821 - FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 821 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -16972,7 +16972,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 822 - FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; @@ -16981,7 +16981,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -16990,7 +16990,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17006,7 +17006,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 823 - FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 823 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17020,7 +17020,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 824 - FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 824 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17036,7 +17036,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 825 - VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 825 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17056,7 +17056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 826 - FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 826 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17072,7 +17072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 827 - VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 827 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17092,7 +17092,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 828 - FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 828 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17106,7 +17106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 829 - FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17115,7 +17115,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17124,7 +17124,7 @@ namespace mg5amcCpu jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17140,7 +17140,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 830 - FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 830 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17154,7 +17154,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 831 - FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 831 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17170,7 +17170,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 832 - VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 832 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17190,7 +17190,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 833 - FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 833 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17206,7 +17206,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 834 - VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 834 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17226,7 +17226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 835 - FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 835 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17240,7 +17240,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 836 - FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; @@ -17249,7 +17249,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17258,7 +17258,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17274,17 +17274,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 837 - FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); jamp_sv[64] += amp_sv[0]; jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); jamp_sv[70] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); jamp_sv[64] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; @@ -17296,7 +17296,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 838 - FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; @@ -17305,7 +17305,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; @@ -17314,7 +17314,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -17327,10 +17327,10 @@ namespace mg5amcCpu // *** DIAGRAM 839 OF 1240 *** // Wavefunction(s) for diagram number 839 - VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 0., 0., w_fp[90] ); + VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] ); // Amplitude(s) for diagram number 839 - VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 839 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17358,7 +17358,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 840 - VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 840 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17386,7 +17386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 841 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -17403,7 +17403,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -17420,7 +17420,7 @@ namespace mg5amcCpu jamp_sv[115] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[6] += amp_sv[0]; @@ -17441,10 +17441,10 @@ namespace mg5amcCpu // *** DIAGRAM 842 OF 1240 *** // Wavefunction(s) for diagram number 842 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[56] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] ); // Amplitude(s) for diagram number 842 - VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 842 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17472,7 +17472,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 843 - VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 843 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17500,7 +17500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 844 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17517,7 +17517,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17534,7 +17534,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[6] += amp_sv[0]; @@ -17558,7 +17558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 845 - VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 845 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17586,7 +17586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 846 - VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 846 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17611,12 +17611,12 @@ namespace mg5amcCpu // *** DIAGRAM 847 OF 1240 *** // Wavefunction(s) for diagram number 847 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[103] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 847 - VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17633,7 +17633,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -17650,7 +17650,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -17671,12 +17671,12 @@ namespace mg5amcCpu // *** DIAGRAM 848 OF 1240 *** // Wavefunction(s) for diagram number 848 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 848 - VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -17693,7 +17693,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -17710,7 +17710,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[98] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -17731,12 +17731,12 @@ namespace mg5amcCpu // *** DIAGRAM 849 OF 1240 *** // Wavefunction(s) for diagram number 849 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[115] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[116] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[117] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] ); // Amplitude(s) for diagram number 849 - VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -17753,7 +17753,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -17770,7 +17770,7 @@ namespace mg5amcCpu jamp_sv[105] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17791,12 +17791,12 @@ namespace mg5amcCpu // *** DIAGRAM 850 OF 1240 *** // Wavefunction(s) for diagram number 850 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[118] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[119] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[120] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] ); // Amplitude(s) for diagram number 850 - VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -17813,7 +17813,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -17830,7 +17830,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -17854,7 +17854,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 851 - VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -17871,7 +17871,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -17888,7 +17888,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -17912,7 +17912,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 852 - VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 852 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17940,7 +17940,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 853 - VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 853 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17968,7 +17968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 854 - VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 854 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -17996,7 +17996,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 855 - VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 855 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18016,7 +18016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 856 - FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 856 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18032,7 +18032,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 857 - FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 857 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18046,7 +18046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 858 - FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 858 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18062,7 +18062,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 859 - FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 859 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18076,7 +18076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 860 - VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 860 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18096,7 +18096,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 861 - FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18105,7 +18105,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18114,7 +18114,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18130,7 +18130,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 862 - FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 862 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18146,7 +18146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 863 - FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 863 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18160,7 +18160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 864 - FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 864 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18174,7 +18174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 865 - VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 865 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18194,7 +18194,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 866 - FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 866 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18210,7 +18210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 867 - FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 867 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18224,7 +18224,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 868 - FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 868 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18240,7 +18240,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 869 - FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 869 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18254,7 +18254,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 870 - VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 870 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18274,7 +18274,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 871 - FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18283,7 +18283,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18292,7 +18292,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18308,7 +18308,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 872 - FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 872 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18324,7 +18324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 873 - FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 873 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18338,7 +18338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 874 - FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 874 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18352,7 +18352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 875 - VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 875 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18372,7 +18372,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 876 - FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 876 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18388,7 +18388,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 877 - FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 877 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18402,7 +18402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 878 - FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 878 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18418,7 +18418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 879 - FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 879 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18432,7 +18432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 880 - VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 880 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18452,7 +18452,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 881 - FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18461,7 +18461,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18470,7 +18470,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18486,7 +18486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 882 - VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 882 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18506,7 +18506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 883 - FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 883 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18522,7 +18522,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 884 - FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 884 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18536,7 +18536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 885 - FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 885 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18552,7 +18552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 886 - FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 886 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18566,7 +18566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 887 - VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 887 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18586,7 +18586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 888 - FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -18595,7 +18595,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18604,7 +18604,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -18620,7 +18620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 889 - FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 889 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18640,7 +18640,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 890 - FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 890 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18660,7 +18660,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 891 - FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 891 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18680,7 +18680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 892 - FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 892 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18700,7 +18700,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 893 - FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 893 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18716,7 +18716,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 894 - FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 894 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18729,10 +18729,10 @@ namespace mg5amcCpu // *** DIAGRAM 895 OF 1240 *** // Wavefunction(s) for diagram number 895 - VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 0., 0., w_fp[65] ); + VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] ); // Amplitude(s) for diagram number 895 - VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 895 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18760,7 +18760,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 896 - VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 896 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18788,7 +18788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 897 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -18805,7 +18805,7 @@ namespace mg5amcCpu jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -18822,7 +18822,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -18846,7 +18846,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 898 - VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 898 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18874,7 +18874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 899 - VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 899 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18902,7 +18902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 900 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -18919,7 +18919,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -18936,7 +18936,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[107] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -18960,7 +18960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 901 - VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 901 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -18988,7 +18988,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 902 - VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 902 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19013,12 +19013,12 @@ namespace mg5amcCpu // *** DIAGRAM 903 OF 1240 *** // Wavefunction(s) for diagram number 903 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[93] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 903 - VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -19035,7 +19035,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -19052,7 +19052,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -19073,12 +19073,12 @@ namespace mg5amcCpu // *** DIAGRAM 904 OF 1240 *** // Wavefunction(s) for diagram number 904 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[103] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[63] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 904 - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[19] -= amp_sv[0]; @@ -19095,7 +19095,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -19112,7 +19112,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -19133,12 +19133,12 @@ namespace mg5amcCpu // *** DIAGRAM 905 OF 1240 *** // Wavefunction(s) for diagram number 905 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 905 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -19155,7 +19155,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -19172,7 +19172,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -19196,7 +19196,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 906 - VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -19213,7 +19213,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -19230,7 +19230,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -19254,7 +19254,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 907 - VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -19271,7 +19271,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -19288,7 +19288,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[20] += amp_sv[0]; @@ -19312,7 +19312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 908 - VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 908 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19340,7 +19340,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 909 - VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 909 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19368,7 +19368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 910 - VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 910 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19396,7 +19396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 911 - VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 911 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19416,7 +19416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 912 - FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 912 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19432,7 +19432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 913 - FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 913 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19446,7 +19446,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 914 - FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 914 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19462,7 +19462,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 915 - FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 915 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19476,7 +19476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 916 - VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 916 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19496,7 +19496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 917 - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; @@ -19505,7 +19505,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &_fp[0] ); jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19514,7 +19514,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19530,7 +19530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 918 - FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 918 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19546,7 +19546,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 919 - FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 919 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19560,7 +19560,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 920 - FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 920 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19574,7 +19574,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 921 - VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 921 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19594,7 +19594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 922 - FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 922 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19610,7 +19610,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 923 - FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 923 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19624,7 +19624,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 924 - FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 924 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19640,7 +19640,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 925 - FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 925 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19654,7 +19654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 926 - VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 926 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19674,7 +19674,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 927 - FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19683,7 +19683,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19692,7 +19692,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19708,7 +19708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 928 - FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 928 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19724,7 +19724,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 929 - FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 929 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19738,7 +19738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 930 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 930 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19752,7 +19752,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 931 - VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 931 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19772,7 +19772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 932 - FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 932 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19788,7 +19788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 933 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 933 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19802,7 +19802,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 934 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 934 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19818,7 +19818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 935 - FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 935 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19832,7 +19832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 936 - VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 936 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19852,7 +19852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 937 - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; @@ -19861,7 +19861,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19870,7 +19870,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19886,7 +19886,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 938 - VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 938 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19906,7 +19906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 939 - FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 939 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19922,7 +19922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 940 - FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 940 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19936,7 +19936,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 941 - FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 941 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19952,7 +19952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 942 - FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 942 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19966,7 +19966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 943 - VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 943 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -19986,7 +19986,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 944 - FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -19995,7 +19995,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20004,7 +20004,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20020,7 +20020,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 945 - FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 945 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20040,7 +20040,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 946 - FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 946 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20060,7 +20060,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 947 - FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 947 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20080,7 +20080,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 948 - FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 948 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20100,7 +20100,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 949 - FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 949 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20116,7 +20116,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 950 - FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 950 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20129,10 +20129,10 @@ namespace mg5amcCpu // *** DIAGRAM 951 OF 1240 *** // Wavefunction(s) for diagram number 951 - VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 951 - VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 951 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20160,7 +20160,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 952 - VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 952 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20188,7 +20188,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 953 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -20205,7 +20205,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -20222,7 +20222,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -20246,7 +20246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 954 - VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 954 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20274,7 +20274,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 955 - VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 955 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20302,7 +20302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 956 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -20319,7 +20319,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -20336,7 +20336,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -20360,7 +20360,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 957 - VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 957 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20388,7 +20388,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 958 - VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 958 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20413,12 +20413,12 @@ namespace mg5amcCpu // *** DIAGRAM 959 OF 1240 *** // Wavefunction(s) for diagram number 959 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[94] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[65] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 959 - VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -20435,7 +20435,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -20452,7 +20452,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -20473,12 +20473,12 @@ namespace mg5amcCpu // *** DIAGRAM 960 OF 1240 *** // Wavefunction(s) for diagram number 960 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[90] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[93] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[69] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 960 - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -20495,7 +20495,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -20512,7 +20512,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -20536,7 +20536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 961 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -20553,7 +20553,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; @@ -20570,7 +20570,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -20594,7 +20594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 962 - VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -20611,7 +20611,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; @@ -20628,7 +20628,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -20652,7 +20652,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 963 - VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -20669,7 +20669,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -20686,7 +20686,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[14] += amp_sv[0]; @@ -20710,7 +20710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 964 - VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 964 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20738,7 +20738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 965 - VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 965 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20766,7 +20766,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 966 - VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 966 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20794,7 +20794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 967 - VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 967 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20814,7 +20814,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 968 - FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 968 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20830,7 +20830,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 969 - FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 969 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20844,7 +20844,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 970 - FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 970 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20860,7 +20860,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 971 - FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 971 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20874,7 +20874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 972 - VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 972 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20894,7 +20894,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 973 - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20903,7 +20903,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &_fp[0] ); jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20912,7 +20912,7 @@ namespace mg5amcCpu jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -20928,7 +20928,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 974 - FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 974 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20944,7 +20944,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 975 - FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 975 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20958,7 +20958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 976 - FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 976 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20972,7 +20972,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 977 - VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 977 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -20992,7 +20992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 978 - FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 978 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21008,7 +21008,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 979 - FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 979 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21022,7 +21022,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 980 - FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 980 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21038,7 +21038,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 981 - FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 981 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21052,7 +21052,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 982 - VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 982 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21072,7 +21072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 983 - FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21081,7 +21081,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &_fp[0] ); jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21090,7 +21090,7 @@ namespace mg5amcCpu jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21106,7 +21106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 984 - FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 984 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21122,7 +21122,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 985 - FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 985 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21136,7 +21136,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 986 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 986 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21150,7 +21150,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 987 - VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 987 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21170,7 +21170,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 988 - FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 988 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21186,7 +21186,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 989 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 989 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21200,7 +21200,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 990 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 990 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21216,7 +21216,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 991 - FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 991 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21230,7 +21230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 992 - VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 992 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21250,7 +21250,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 993 - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21259,7 +21259,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21268,7 +21268,7 @@ namespace mg5amcCpu jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21284,7 +21284,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 994 - VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 994 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21304,7 +21304,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 995 - FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 995 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21320,7 +21320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 996 - FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 996 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21334,7 +21334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 997 - FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 997 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21350,7 +21350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 998 - FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 998 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21364,7 +21364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 999 - VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 999 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21384,7 +21384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1000 - FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21393,7 +21393,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21402,7 +21402,7 @@ namespace mg5amcCpu jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -21418,7 +21418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1001 - FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1001 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21438,7 +21438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1002 - FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1002 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21458,7 +21458,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1003 - FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1003 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21478,7 +21478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1004 - FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1004 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21498,7 +21498,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1005 - FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1005 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21514,7 +21514,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1006 - FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1006 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21530,7 +21530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1007 - VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1007 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21558,7 +21558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1008 - VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1008 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21586,7 +21586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1009 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21603,7 +21603,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21620,7 +21620,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -21644,7 +21644,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1010 - VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1010 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21672,7 +21672,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1011 - VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1011 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21700,7 +21700,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1012 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -21717,7 +21717,7 @@ namespace mg5amcCpu jamp_sv[101] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[14] += amp_sv[0]; @@ -21734,7 +21734,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -21758,7 +21758,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1013 - VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1013 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21786,7 +21786,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1014 - VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1014 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -21811,12 +21811,12 @@ namespace mg5amcCpu // *** DIAGRAM 1015 OF 1240 *** // Wavefunction(s) for diagram number 1015 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[11] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[76] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] ); // Amplitude(s) for diagram number 1015 - VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -21833,7 +21833,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -21850,7 +21850,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -21871,12 +21871,12 @@ namespace mg5amcCpu // *** DIAGRAM 1016 OF 1240 *** // Wavefunction(s) for diagram number 1016 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[97] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1016 - VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21893,7 +21893,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -21910,7 +21910,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -21934,7 +21934,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1017 - VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[8] += amp_sv[0]; @@ -21951,7 +21951,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -21968,7 +21968,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -21992,7 +21992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1018 - VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -22009,7 +22009,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[28] -= amp_sv[0]; @@ -22026,7 +22026,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -22050,7 +22050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1019 - VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1019 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22078,7 +22078,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1020 - VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1020 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22106,7 +22106,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1021 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22123,7 +22123,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22140,7 +22140,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -22164,7 +22164,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1022 - VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1022 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22192,7 +22192,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1023 - VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1023 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22220,7 +22220,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1024 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -22237,7 +22237,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[20] += amp_sv[0]; @@ -22254,7 +22254,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -22278,7 +22278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1025 - VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1025 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22306,7 +22306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1026 - VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1026 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22334,7 +22334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1027 - VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -22351,7 +22351,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -22368,7 +22368,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -22389,12 +22389,12 @@ namespace mg5amcCpu // *** DIAGRAM 1028 OF 1240 *** // Wavefunction(s) for diagram number 1028 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1028 - VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22411,7 +22411,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -22428,7 +22428,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -22452,7 +22452,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1029 - VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -22469,7 +22469,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -22486,7 +22486,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -22510,7 +22510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1030 - VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; @@ -22527,7 +22527,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[26] -= amp_sv[0]; @@ -22544,7 +22544,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -22568,7 +22568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1031 - VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1031 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22596,7 +22596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1032 - VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1032 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22624,7 +22624,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1033 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -22641,7 +22641,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -22658,7 +22658,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -22682,7 +22682,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1034 - VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1034 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22710,7 +22710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1035 - VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1035 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22738,7 +22738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1036 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -22755,7 +22755,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -22772,7 +22772,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -22796,7 +22796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1037 - VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1037 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22824,7 +22824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1038 - VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1038 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -22852,7 +22852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1039 - VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; @@ -22869,7 +22869,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -22886,7 +22886,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[3] += amp_sv[0]; @@ -22907,12 +22907,12 @@ namespace mg5amcCpu // *** DIAGRAM 1040 OF 1240 *** // Wavefunction(s) for diagram number 1040 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[76] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[11] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 1040 - VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -22929,7 +22929,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -22946,7 +22946,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[90] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -22970,7 +22970,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1041 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -22987,7 +22987,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -23004,7 +23004,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -23028,7 +23028,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1042 - VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23045,7 +23045,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -23062,7 +23062,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -23086,7 +23086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1043 - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23103,7 +23103,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23120,7 +23120,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23137,7 +23137,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23154,7 +23154,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23171,7 +23171,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23188,7 +23188,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -23205,7 +23205,7 @@ namespace mg5amcCpu jamp_sv[113] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -23222,7 +23222,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -23246,7 +23246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1044 - VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23263,7 +23263,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; @@ -23280,7 +23280,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -23304,7 +23304,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1045 - VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23321,7 +23321,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; @@ -23338,7 +23338,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; @@ -23362,7 +23362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1046 - FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1046 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23375,7 +23375,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1047 - FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1047 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23388,7 +23388,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1048 - FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1048 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23401,7 +23401,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1049 - FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1049 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23414,7 +23414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1050 - FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1050 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23427,7 +23427,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1051 - FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1051 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23440,7 +23440,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1052 - FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1052 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23453,7 +23453,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1053 - FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1053 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23466,7 +23466,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1054 - FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1054 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23479,7 +23479,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1055 - FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1055 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23492,7 +23492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1056 - FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1056 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23505,7 +23505,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1057 - FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1057 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23518,7 +23518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1058 - FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1058 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23534,7 +23534,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1059 - FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1059 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23548,7 +23548,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1060 - FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1060 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23564,7 +23564,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1061 - VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1061 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23584,7 +23584,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1062 - FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1062 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23598,7 +23598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1063 - VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1063 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23618,7 +23618,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1064 - FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -23627,7 +23627,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23636,7 +23636,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23652,7 +23652,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1065 - FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1065 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23665,7 +23665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1066 - FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1066 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23678,7 +23678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1067 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1067 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23691,7 +23691,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1068 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1068 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23704,7 +23704,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1069 - FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1069 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23717,7 +23717,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1070 - FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1070 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23730,7 +23730,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1071 - FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1071 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23743,7 +23743,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1072 - FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1072 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23756,7 +23756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1073 - FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1073 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23769,7 +23769,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1074 - FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1074 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23782,7 +23782,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1075 - FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1075 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23795,7 +23795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1076 - FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1076 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23808,7 +23808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1077 - FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1077 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23824,7 +23824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1078 - FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1078 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23838,7 +23838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1079 - FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1079 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23854,7 +23854,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1080 - VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1080 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23874,7 +23874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1081 - FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1081 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23888,7 +23888,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1082 - VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1082 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23908,7 +23908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1083 - FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; @@ -23917,7 +23917,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23926,7 +23926,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -23942,7 +23942,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1084 - FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1084 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23955,7 +23955,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1085 - FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1085 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23968,7 +23968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1086 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1086 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23981,7 +23981,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1087 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1087 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -23994,7 +23994,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1088 - FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1088 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24007,7 +24007,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1089 - FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1089 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24020,7 +24020,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1090 - FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1090 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24033,7 +24033,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1091 - FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1091 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24046,7 +24046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1092 - FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1092 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24059,7 +24059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1093 - FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1093 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24072,7 +24072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1094 - FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1094 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24085,7 +24085,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1095 - FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1095 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24098,7 +24098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1096 - FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1096 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24114,7 +24114,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1097 - FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1097 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24128,7 +24128,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1098 - FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1098 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24144,7 +24144,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1099 - VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1099 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24164,7 +24164,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1100 - FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1100 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24178,7 +24178,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1101 - VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24198,7 +24198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1102 - FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24207,7 +24207,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24216,7 +24216,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24232,7 +24232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1103 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24248,7 +24248,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1104 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24262,7 +24262,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1105 - FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24278,7 +24278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1106 - VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24298,7 +24298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1107 - FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1107 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24312,7 +24312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1108 - VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24332,7 +24332,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1109 - FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24341,7 +24341,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24350,7 +24350,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24366,7 +24366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1110 - FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24382,7 +24382,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1111 - FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24396,7 +24396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1112 - FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24412,7 +24412,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1113 - VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24432,7 +24432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1114 - FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1114 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24446,7 +24446,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1115 - VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1115 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24466,7 +24466,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1116 - FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24475,7 +24475,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24484,7 +24484,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24500,7 +24500,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1117 - FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1117 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24516,7 +24516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1118 - FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1118 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24530,7 +24530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1119 - FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1119 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24546,7 +24546,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1120 - VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1120 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24566,7 +24566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1121 - FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1121 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24580,7 +24580,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1122 - VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1122 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -24600,7 +24600,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1123 - FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -24609,7 +24609,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24618,7 +24618,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -24631,12 +24631,12 @@ namespace mg5amcCpu // *** DIAGRAM 1124 OF 1240 *** // Wavefunction(s) for diagram number 1124 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[97] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] ); // Amplitude(s) for diagram number 1124 - VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -24653,7 +24653,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24670,7 +24670,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24687,7 +24687,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24704,7 +24704,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24721,7 +24721,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24738,7 +24738,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24755,7 +24755,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -24772,7 +24772,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -24793,12 +24793,12 @@ namespace mg5amcCpu // *** DIAGRAM 1125 OF 1240 *** // Wavefunction(s) for diagram number 1125 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); - VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] ); // Amplitude(s) for diagram number 1125 - VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24815,7 +24815,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -24832,7 +24832,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -24853,12 +24853,12 @@ namespace mg5amcCpu // *** DIAGRAM 1126 OF 1240 *** // Wavefunction(s) for diagram number 1126 - VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 0., 0., w_fp[111] ); + VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1126 - VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24875,7 +24875,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -24892,7 +24892,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -24916,7 +24916,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1127 - VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -24933,7 +24933,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24950,7 +24950,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[1] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -24971,22 +24971,22 @@ namespace mg5amcCpu // *** DIAGRAM 1128 OF 1240 *** // Wavefunction(s) for diagram number 1128 - FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); - FFV1_2( w_fp[3], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); // Amplitude(s) for diagram number 1128 - FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[90] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[91] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[90] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[94] += amp_sv[0]; @@ -24998,7 +24998,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1129 - FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25007,7 +25007,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25016,7 +25016,7 @@ namespace mg5amcCpu jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25032,17 +25032,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1130 - FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += amp_sv[0]; jamp_sv[74] -= amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[74] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; jamp_sv[84] += amp_sv[0]; @@ -25054,17 +25054,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1131 - FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[115] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; @@ -25076,7 +25076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1132 - FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25085,7 +25085,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25094,7 +25094,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25110,17 +25110,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1133 - FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += amp_sv[0]; jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[98] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; @@ -25129,22 +25129,22 @@ namespace mg5amcCpu // *** DIAGRAM 1134 OF 1240 *** // Wavefunction(s) for diagram number 1134 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); - FFV1_1( w_fp[2], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 1134 - FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[49] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -25156,7 +25156,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1135 - FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25165,7 +25165,7 @@ namespace mg5amcCpu jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25174,7 +25174,7 @@ namespace mg5amcCpu jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25190,17 +25190,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1136 - FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[48] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25212,7 +25212,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1137 - FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25221,7 +25221,7 @@ namespace mg5amcCpu jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25230,7 +25230,7 @@ namespace mg5amcCpu jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25246,7 +25246,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1138 - FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25255,7 +25255,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25264,7 +25264,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25280,7 +25280,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1139 - FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25289,7 +25289,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25298,7 +25298,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25311,12 +25311,12 @@ namespace mg5amcCpu // *** DIAGRAM 1140 OF 1240 *** // Wavefunction(s) for diagram number 1140 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[68] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[29] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[10] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1140 - VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -25333,7 +25333,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25350,7 +25350,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25367,7 +25367,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25384,7 +25384,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25401,7 +25401,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25418,7 +25418,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25435,7 +25435,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25452,7 +25452,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25473,12 +25473,12 @@ namespace mg5amcCpu // *** DIAGRAM 1141 OF 1240 *** // Wavefunction(s) for diagram number 1141 - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[16] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[71] ); - VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1141 - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25495,7 +25495,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -25512,7 +25512,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25533,12 +25533,12 @@ namespace mg5amcCpu // *** DIAGRAM 1142 OF 1240 *** // Wavefunction(s) for diagram number 1142 - VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 1142 - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25555,7 +25555,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -25572,7 +25572,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[48] += amp_sv[0]; @@ -25596,7 +25596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1143 - VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[12] -= amp_sv[0]; @@ -25613,7 +25613,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25630,7 +25630,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[3] += amp_sv[0]; jamp_sv[26] += amp_sv[0]; @@ -25651,22 +25651,22 @@ namespace mg5amcCpu // *** DIAGRAM 1144 OF 1240 *** // Wavefunction(s) for diagram number 1144 - FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[111] ); - FFV1_2( w_fp[3], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] ); + FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1144 - FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[66] += amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[67] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[66] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; jamp_sv[70] += amp_sv[0]; @@ -25678,7 +25678,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1145 - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25687,7 +25687,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25696,7 +25696,7 @@ namespace mg5amcCpu jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25712,17 +25712,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1146 - FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += amp_sv[0]; jamp_sv[50] -= amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[50] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[60] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; jamp_sv[60] += amp_sv[0]; @@ -25734,17 +25734,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1147 - FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[108] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[109] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[108] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; @@ -25756,7 +25756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1148 - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25765,7 +25765,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25774,7 +25774,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25790,17 +25790,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1149 - FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[100] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; @@ -25809,22 +25809,22 @@ namespace mg5amcCpu // *** DIAGRAM 1150 OF 1240 *** // Wavefunction(s) for diagram number 1150 - FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); // Amplitude(s) for diagram number 1150 - FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[73] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[73] += amp_sv[0]; @@ -25836,7 +25836,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1151 - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25845,7 +25845,7 @@ namespace mg5amcCpu jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25854,7 +25854,7 @@ namespace mg5amcCpu jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25870,17 +25870,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1152 - FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += amp_sv[0]; jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[72] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[72] += amp_sv[0]; @@ -25892,7 +25892,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1153 - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25901,7 +25901,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25910,7 +25910,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25926,7 +25926,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1154 - FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25935,7 +25935,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25944,7 +25944,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25960,7 +25960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1155 - FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25969,7 +25969,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; @@ -25978,7 +25978,7 @@ namespace mg5amcCpu jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -25991,12 +25991,12 @@ namespace mg5amcCpu // *** DIAGRAM 1156 OF 1240 *** // Wavefunction(s) for diagram number 1156 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[27] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1156 - VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -26013,7 +26013,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26030,7 +26030,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26047,7 +26047,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26064,7 +26064,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26081,7 +26081,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26098,7 +26098,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26115,7 +26115,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26132,7 +26132,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26153,12 +26153,12 @@ namespace mg5amcCpu // *** DIAGRAM 1157 OF 1240 *** // Wavefunction(s) for diagram number 1157 - VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 0., 0., w_fp[29] ); - VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1157 - VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26175,7 +26175,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26192,7 +26192,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26213,12 +26213,12 @@ namespace mg5amcCpu // *** DIAGRAM 1158 OF 1240 *** // Wavefunction(s) for diagram number 1158 - VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1158 - VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26235,7 +26235,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26252,7 +26252,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[49] += amp_sv[0]; @@ -26276,7 +26276,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1159 - VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[18] -= amp_sv[0]; @@ -26293,7 +26293,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26310,7 +26310,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; jamp_sv[28] += amp_sv[0]; @@ -26331,22 +26331,22 @@ namespace mg5amcCpu // *** DIAGRAM 1160 OF 1240 *** // Wavefunction(s) for diagram number 1160 - FFV1_2( w_fp[3], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); - FFV1_2( w_fp[3], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 1160 - FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[60] += amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[65] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[61] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[64] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[60] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; jamp_sv[64] += amp_sv[0]; @@ -26358,7 +26358,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1161 - FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26367,7 +26367,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26376,7 +26376,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26392,17 +26392,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1162 - FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] += amp_sv[0]; jamp_sv[52] -= amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[52] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[66] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; jamp_sv[66] += amp_sv[0]; @@ -26414,17 +26414,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1163 - FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[84] += amp_sv[0]; jamp_sv[85] -= amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[85] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[84] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; jamp_sv[88] += amp_sv[0]; @@ -26436,7 +26436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1164 - FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26445,7 +26445,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26454,7 +26454,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26470,17 +26470,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1165 - FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] += amp_sv[0]; jamp_sv[76] -= amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[76] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[90] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; jamp_sv[90] += amp_sv[0]; @@ -26489,22 +26489,22 @@ namespace mg5amcCpu // *** DIAGRAM 1166 OF 1240 *** // Wavefunction(s) for diagram number 1166 - FFV1_1( w_fp[2], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); - FFV1_1( w_fp[2], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1166 - FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; @@ -26516,7 +26516,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1167 - FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26525,7 +26525,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26534,7 +26534,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26550,17 +26550,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1168 - FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += amp_sv[0]; jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; @@ -26572,7 +26572,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1169 - FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26581,7 +26581,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26590,7 +26590,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26606,7 +26606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1170 - FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26615,7 +26615,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26624,7 +26624,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26640,7 +26640,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1171 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26649,7 +26649,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26658,7 +26658,7 @@ namespace mg5amcCpu jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26671,25 +26671,25 @@ namespace mg5amcCpu // *** DIAGRAM 1172 OF 1240 *** // Wavefunction(s) for diagram number 1172 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[60] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1172 - FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[42] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[43] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[42] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; jamp_sv[46] += amp_sv[0]; @@ -26698,12 +26698,12 @@ namespace mg5amcCpu // *** DIAGRAM 1173 OF 1240 *** // Wavefunction(s) for diagram number 1173 - VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[68] ); - VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 1173 - FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26712,7 +26712,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26721,7 +26721,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26737,17 +26737,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1174 - FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[36] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -26756,22 +26756,22 @@ namespace mg5amcCpu // *** DIAGRAM 1175 OF 1240 *** // Wavefunction(s) for diagram number 1175 - FFV1_1( w_fp[2], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); - FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); - FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 1175 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[15] -= amp_sv[0]; jamp_sv[51] += amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[75] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[51] += amp_sv[0]; jamp_sv[75] += amp_sv[0]; @@ -26783,7 +26783,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1176 - FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26792,7 +26792,7 @@ namespace mg5amcCpu jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26801,7 +26801,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26817,17 +26817,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1177 - FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[101] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[115] += amp_sv[0]; @@ -26839,7 +26839,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1178 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26848,7 +26848,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26857,7 +26857,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[9] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26873,7 +26873,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1179 - FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26882,7 +26882,7 @@ namespace mg5amcCpu jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; @@ -26891,7 +26891,7 @@ namespace mg5amcCpu jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -26907,7 +26907,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1180 - VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[14] += amp_sv[0]; @@ -26924,7 +26924,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; @@ -26941,7 +26941,7 @@ namespace mg5amcCpu jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; @@ -26965,7 +26965,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1181 - VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -26982,7 +26982,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[14] -= amp_sv[0]; @@ -26999,7 +26999,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -27016,7 +27016,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27033,7 +27033,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -27050,7 +27050,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27067,7 +27067,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27084,7 +27084,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -27101,7 +27101,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27122,12 +27122,12 @@ namespace mg5amcCpu // *** DIAGRAM 1182 OF 1240 *** // Wavefunction(s) for diagram number 1182 - VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1182 - VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[24] -= amp_sv[0]; @@ -27144,7 +27144,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[26] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27161,7 +27161,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -27185,7 +27185,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1183 - VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[24] += amp_sv[0]; @@ -27202,7 +27202,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &_fp[0] ); jamp_sv[15] += amp_sv[0]; jamp_sv[26] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27219,7 +27219,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[24] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -27243,7 +27243,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1184 - FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27252,7 +27252,7 @@ namespace mg5amcCpu jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27261,7 +27261,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27277,17 +27277,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1185 - FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[103] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; @@ -27299,7 +27299,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1186 - FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27308,7 +27308,7 @@ namespace mg5amcCpu jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[14] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[26] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27317,7 +27317,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[24] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27333,17 +27333,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1187 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[14] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[8] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[74] += amp_sv[0]; @@ -27352,25 +27352,25 @@ namespace mg5amcCpu // *** DIAGRAM 1188 OF 1240 *** // Wavefunction(s) for diagram number 1188 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[59] ); - FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); - FFV1_2( w_fp[3], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 1188 - FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[36] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[41] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[37] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[40] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[36] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; jamp_sv[40] += amp_sv[0]; @@ -27379,12 +27379,12 @@ namespace mg5amcCpu // *** DIAGRAM 1189 OF 1240 *** // Wavefunction(s) for diagram number 1189 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1189 - FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27393,7 +27393,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27402,7 +27402,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27418,17 +27418,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1190 - FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[42] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; jamp_sv[42] += amp_sv[0]; @@ -27437,22 +27437,22 @@ namespace mg5amcCpu // *** DIAGRAM 1191 OF 1240 *** // Wavefunction(s) for diagram number 1191 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 1191 - FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[21] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; @@ -27464,7 +27464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1192 - FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27473,7 +27473,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27482,7 +27482,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27498,17 +27498,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1193 - FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] += amp_sv[0]; jamp_sv[77] -= amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[77] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; @@ -27520,7 +27520,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1194 - FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27529,7 +27529,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27538,7 +27538,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27554,7 +27554,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1195 - FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[36] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27563,7 +27563,7 @@ namespace mg5amcCpu jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27572,7 +27572,7 @@ namespace mg5amcCpu jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[36] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -27588,7 +27588,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1196 - VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[20] += amp_sv[0]; @@ -27605,7 +27605,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -27622,7 +27622,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; @@ -27646,7 +27646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1197 - VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -27663,7 +27663,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[20] -= amp_sv[0]; @@ -27680,7 +27680,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -27697,7 +27697,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27714,7 +27714,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[37] += amp_sv[0]; @@ -27731,7 +27731,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27748,7 +27748,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27765,7 +27765,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; jamp_sv[36] += amp_sv[0]; @@ -27782,7 +27782,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27803,12 +27803,12 @@ namespace mg5amcCpu // *** DIAGRAM 1198 OF 1240 *** // Wavefunction(s) for diagram number 1198 - VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1198 - VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[25] -= amp_sv[0]; @@ -27825,7 +27825,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27842,7 +27842,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -27866,7 +27866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1199 - VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; jamp_sv[25] += amp_sv[0]; @@ -27883,7 +27883,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); jamp_sv[21] += amp_sv[0]; jamp_sv[28] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27900,7 +27900,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &_fp[0] ); jamp_sv[11] += amp_sv[0]; jamp_sv[25] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -27924,7 +27924,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1200 - FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27933,7 +27933,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27942,7 +27942,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27958,17 +27958,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1201 - FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[78] += amp_sv[0]; jamp_sv[79] -= amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[79] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[78] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; jamp_sv[82] += amp_sv[0]; @@ -27980,7 +27980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1202 - FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27989,7 +27989,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -27998,7 +27998,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28014,17 +28014,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1203 - FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[20] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[10] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; @@ -28033,25 +28033,25 @@ namespace mg5amcCpu // *** DIAGRAM 1204 OF 1240 *** // Wavefunction(s) for diagram number 1204 - VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); - VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[68] ); - VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[29] ); - FFV1_2( w_fp[3], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); - FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); + VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); + VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] ); + VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] ); + FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 1204 - FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[30] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[35] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[31] -= amp_sv[0]; jamp_sv[32] += amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[34] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[30] -= amp_sv[0]; jamp_sv[32] += amp_sv[0]; jamp_sv[34] += amp_sv[0]; @@ -28060,12 +28060,12 @@ namespace mg5amcCpu // *** DIAGRAM 1205 OF 1240 *** // Wavefunction(s) for diagram number 1205 - VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1205 - FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28074,7 +28074,7 @@ namespace mg5amcCpu jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28083,7 +28083,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28099,17 +28099,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1206 - FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[29] -= amp_sv[0]; jamp_sv[37] += amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[43] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[27] -= amp_sv[0]; jamp_sv[37] += amp_sv[0]; jamp_sv[43] += amp_sv[0]; @@ -28118,22 +28118,22 @@ namespace mg5amcCpu // *** DIAGRAM 1207 OF 1240 *** // Wavefunction(s) for diagram number 1207 - FFV1_1( w_fp[2], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); - FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1207 - FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[23] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; jamp_sv[101] += amp_sv[0]; @@ -28145,7 +28145,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1208 - FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28154,7 +28154,7 @@ namespace mg5amcCpu jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28163,7 +28163,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28179,17 +28179,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1209 - FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] += amp_sv[0]; jamp_sv[53] -= amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[53] -= amp_sv[0]; jamp_sv[61] += amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[67] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] -= amp_sv[0]; jamp_sv[61] += amp_sv[0]; jamp_sv[67] += amp_sv[0]; @@ -28201,7 +28201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1210 - FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28210,7 +28210,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -28219,7 +28219,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -28235,7 +28235,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1211 - FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[30] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28244,7 +28244,7 @@ namespace mg5amcCpu jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[31] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[33] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28253,7 +28253,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[30] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[32] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[34] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -28269,7 +28269,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1212 - VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -28286,7 +28286,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[22] += amp_sv[0]; jamp_sv[23] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; @@ -28303,7 +28303,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; @@ -28327,7 +28327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1213 - VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -28344,7 +28344,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -28361,7 +28361,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -28378,7 +28378,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28395,7 +28395,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -28412,7 +28412,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -28429,7 +28429,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28446,7 +28446,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -28463,7 +28463,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -28484,12 +28484,12 @@ namespace mg5amcCpu // *** DIAGRAM 1214 OF 1240 *** // Wavefunction(s) for diagram number 1214 - VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 0., 0., w_fp[61] ); - VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] ); + VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1214 - VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; @@ -28506,7 +28506,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28523,7 +28523,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; @@ -28547,7 +28547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1215 - VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &_fp[0] ); jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; jamp_sv[27] += amp_sv[0]; @@ -28564,7 +28564,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &_fp[0] ); jamp_sv[23] += amp_sv[0]; jamp_sv[29] -= amp_sv[0]; jamp_sv[31] += amp_sv[0]; @@ -28581,7 +28581,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); jamp_sv[17] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[30] += amp_sv[0]; @@ -28605,7 +28605,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1216 - FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28614,7 +28614,7 @@ namespace mg5amcCpu jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28623,7 +28623,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28639,17 +28639,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1217 - FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[54] += amp_sv[0]; jamp_sv[55] -= amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[55] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[54] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[58] += amp_sv[0]; @@ -28661,7 +28661,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1218 - FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28670,7 +28670,7 @@ namespace mg5amcCpu jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28679,7 +28679,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; @@ -28695,17 +28695,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1219 - FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] += amp_sv[0]; jamp_sv[22] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[22] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[16] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; @@ -28717,7 +28717,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1220 - VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28734,7 +28734,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28751,7 +28751,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -28768,7 +28768,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28785,7 +28785,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28802,7 +28802,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[19] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -28819,7 +28819,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28836,7 +28836,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28853,7 +28853,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[20] -= amp_sv[0]; jamp_sv[22] -= amp_sv[0]; @@ -28874,12 +28874,12 @@ namespace mg5amcCpu // *** DIAGRAM 1221 OF 1240 *** // Wavefunction(s) for diagram number 1221 - VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 0., 0., w_fp[1] ); - VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] ); + VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1221 - VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28896,7 +28896,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28913,7 +28913,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28937,7 +28937,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1222 - VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28954,7 +28954,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -28971,7 +28971,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -28995,7 +28995,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1223 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29004,7 +29004,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29013,7 +29013,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29029,17 +29029,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1224 - FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[97] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; @@ -29051,7 +29051,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1225 - FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29060,7 +29060,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29069,7 +29069,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29085,17 +29085,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1226 - FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); jamp_sv[32] += amp_sv[0]; jamp_sv[38] -= amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[38] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[32] -= amp_sv[0]; jamp_sv[56] += amp_sv[0]; jamp_sv[80] += amp_sv[0]; @@ -29107,7 +29107,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1227 - VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29124,7 +29124,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29141,7 +29141,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -29158,7 +29158,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29175,7 +29175,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29192,7 +29192,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[13] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; @@ -29209,7 +29209,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -29226,7 +29226,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -29243,7 +29243,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[14] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29264,12 +29264,12 @@ namespace mg5amcCpu // *** DIAGRAM 1228 OF 1240 *** // Wavefunction(s) for diagram number 1228 - VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 0., 0., w_fp[80] ); - VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 0., 0., w_fp[79] ); + VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] ); + VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] ); // Amplitude(s) for diagram number 1228 - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29286,7 +29286,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29303,7 +29303,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -29327,7 +29327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1229 - VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29344,7 +29344,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29361,7 +29361,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -29385,7 +29385,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1230 - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29394,7 +29394,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29403,7 +29403,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29419,17 +29419,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1231 - FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] += amp_sv[0]; jamp_sv[73] -= amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[73] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[72] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; jamp_sv[76] += amp_sv[0]; @@ -29441,7 +29441,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1232 - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29450,7 +29450,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29459,7 +29459,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29475,17 +29475,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1233 - FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); jamp_sv[34] += amp_sv[0]; jamp_sv[44] -= amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[44] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[34] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; @@ -29497,7 +29497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1234 - VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29514,7 +29514,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -29531,7 +29531,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -29548,7 +29548,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29565,7 +29565,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29582,7 +29582,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[9] += amp_sv[0]; @@ -29599,7 +29599,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -29616,7 +29616,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29633,7 +29633,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[107] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; @@ -29654,12 +29654,12 @@ namespace mg5amcCpu // *** DIAGRAM 1235 OF 1240 *** // Wavefunction(s) for diagram number 1235 - VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 0., 0., w_fp[104] ); - VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 0., 0., w_fp[82] ); - VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 0., 0., w_fp[81] ); + VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] ); + VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] ); // Amplitude(s) for diagram number 1235 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29676,7 +29676,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; @@ -29693,7 +29693,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -29717,7 +29717,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1236 - VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[6] -= amp_sv[0]; @@ -29734,7 +29734,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29751,7 +29751,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; @@ -29775,7 +29775,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1237 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29784,7 +29784,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29793,7 +29793,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29809,17 +29809,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1238 - FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] += amp_sv[0]; jamp_sv[49] -= amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[49] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[48] -= amp_sv[0]; jamp_sv[50] += amp_sv[0]; jamp_sv[52] += amp_sv[0]; @@ -29831,7 +29831,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1239 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29840,7 +29840,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; @@ -29849,7 +29849,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -29865,17 +29865,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1240 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); jamp_sv[40] += amp_sv[0]; jamp_sv[46] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); jamp_sv[46] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); jamp_sv[40] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; @@ -30628,13 +30628,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f index e6d2fc3099..6828f1c252 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f index 51965a0fe6..fc156798a8 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f @@ -10244,22 +10244,22 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,35),W(1,3),W(1,31),GC_11(IVEC),AMP(157)) CALL FFV1_0(W(1,35),W(1,3),W(1,32),GC_11(IVEC),AMP(158)) CALL FFV1_0(W(1,35),W(1,3),W(1,33),GC_11(IVEC),AMP(159)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,35)) - CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,53)) - CALL FFV1_1(W(1,35),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) - CALL FFV1_2(W(1,53),W(1,6),GC_11(IVEC),MDL_MT, ZERO,W(1,23)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,35)) + CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,53)) + CALL FFV1_1(W(1,35),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) + CALL FFV1_2(W(1,53),W(1,6),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,23)) C Amplitude(s) for diagram number 124 CALL FFV1_0(W(1,23),W(1,10),W(1,7),GC_11(IVEC),AMP(160)) - CALL FFV1_2(W(1,53),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,22)) + CALL FFV1_2(W(1,53),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,22)) C Amplitude(s) for diagram number 125 CALL FFV1_0(W(1,22),W(1,10),W(1,6),GC_11(IVEC),AMP(161)) - CALL FFV1_1(W(1,35),W(1,6),GC_11(IVEC),MDL_MT, ZERO,W(1,56)) - CALL FFV1_2(W(1,53),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,57)) + CALL FFV1_1(W(1,35),W(1,6),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,56)) + CALL FFV1_2(W(1,53),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,57)) C Amplitude(s) for diagram number 126 CALL FFV1_0(W(1,57),W(1,56),W(1,7),GC_11(IVEC),AMP(162)) C Amplitude(s) for diagram number 127 CALL FFV1_0(W(1,22),W(1,56),W(1,5),GC_11(IVEC),AMP(163)) - CALL FFV1_1(W(1,35),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,58)) + CALL FFV1_1(W(1,35),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,58)) C Amplitude(s) for diagram number 128 CALL FFV1_0(W(1,57),W(1,58),W(1,6),GC_11(IVEC),AMP(164)) C Amplitude(s) for diagram number 129 @@ -10267,14 +10267,16 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1P0_3(W(1,53),W(1,35),GC_11(IVEC),ZERO, FK_ZERO,W(1,59)) C Amplitude(s) for diagram number 130 CALL VVV1_0(W(1,25),W(1,7),W(1,59),GC_10(IVEC),AMP(166)) - CALL FFV1_1(W(1,35),W(1,25),GC_11(IVEC),MDL_MT, ZERO,W(1,60)) + CALL FFV1_1(W(1,35),W(1,25),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,60) + $ ) C Amplitude(s) for diagram number 131 CALL FFV1_0(W(1,53),W(1,60),W(1,7),GC_11(IVEC),AMP(167)) C Amplitude(s) for diagram number 132 CALL FFV1_0(W(1,53),W(1,58),W(1,25),GC_11(IVEC),AMP(168)) C Amplitude(s) for diagram number 133 CALL VVV1_0(W(1,28),W(1,6),W(1,59),GC_10(IVEC),AMP(169)) - CALL FFV1_1(W(1,35),W(1,28),GC_11(IVEC),MDL_MT, ZERO,W(1,61)) + CALL FFV1_1(W(1,35),W(1,28),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,61) + $ ) C Amplitude(s) for diagram number 134 CALL FFV1_0(W(1,53),W(1,61),W(1,6),GC_11(IVEC),AMP(170)) C Amplitude(s) for diagram number 135 @@ -10283,7 +10285,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1_0(W(1,5),W(1,30),W(1,59),GC_10(IVEC),AMP(172)) C Amplitude(s) for diagram number 137 CALL FFV1_0(W(1,53),W(1,10),W(1,30),GC_11(IVEC),AMP(173)) - CALL FFV1_1(W(1,35),W(1,30),GC_11(IVEC),MDL_MT, ZERO,W(1,59)) + CALL FFV1_1(W(1,35),W(1,30),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,59) + $ ) C Amplitude(s) for diagram number 138 CALL FFV1_0(W(1,53),W(1,59),W(1,5),GC_11(IVEC),AMP(174)) C Amplitude(s) for diagram number 139 @@ -10302,7 +10305,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVVV1_0(W(1,62),W(1,6),W(1,7),W(1,63),GC_12(IVEC),AMP(180)) CALL VVVV3_0(W(1,62),W(1,6),W(1,7),W(1,63),GC_12(IVEC),AMP(181)) CALL VVVV4_0(W(1,62),W(1,6),W(1,7),W(1,63),GC_12(IVEC),AMP(182)) - CALL FFV1_2(W(1,4),W(1,62),GC_11(IVEC),MDL_MT, ZERO,W(1,66)) + CALL FFV1_2(W(1,4),W(1,62),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,66)) C Amplitude(s) for diagram number 143 CALL FFV1_0(W(1,66),W(1,56),W(1,7),GC_11(IVEC),AMP(183)) C Amplitude(s) for diagram number 144 @@ -10344,7 +10347,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVVV1_0(W(1,67),W(1,5),W(1,7),W(1,63),GC_12(IVEC),AMP(198)) CALL VVVV3_0(W(1,67),W(1,5),W(1,7),W(1,63),GC_12(IVEC),AMP(199)) CALL VVVV4_0(W(1,67),W(1,5),W(1,7),W(1,63),GC_12(IVEC),AMP(200)) - CALL FFV1_2(W(1,4),W(1,67),GC_11(IVEC),MDL_MT, ZERO,W(1,72)) + CALL FFV1_2(W(1,4),W(1,67),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,72)) C Amplitude(s) for diagram number 159 CALL FFV1_0(W(1,72),W(1,10),W(1,7),GC_11(IVEC),AMP(201)) C Amplitude(s) for diagram number 160 @@ -10385,7 +10388,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVVV1_0(W(1,73),W(1,5),W(1,6),W(1,63),GC_12(IVEC),AMP(216)) CALL VVVV3_0(W(1,73),W(1,5),W(1,6),W(1,63),GC_12(IVEC),AMP(217)) CALL VVVV4_0(W(1,73),W(1,5),W(1,6),W(1,63),GC_12(IVEC),AMP(218)) - CALL FFV1_2(W(1,4),W(1,73),GC_11(IVEC),MDL_MT, ZERO,W(1,77)) + CALL FFV1_2(W(1,4),W(1,73),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,77)) C Amplitude(s) for diagram number 175 CALL FFV1_0(W(1,77),W(1,10),W(1,6),GC_11(IVEC),AMP(219)) C Amplitude(s) for diagram number 176 @@ -10419,7 +10422,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,8),W(1,78),W(1,7),GC_11(IVEC),AMP(232)) C Amplitude(s) for diagram number 189 CALL FFV1_0(W(1,54),W(1,78),W(1,6),GC_11(IVEC),AMP(233)) - CALL FFV1_2(W(1,47),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,79)) + CALL FFV1_2(W(1,47),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,79)) C Amplitude(s) for diagram number 190 CALL FFV1_0(W(1,79),W(1,56),W(1,7),GC_11(IVEC),AMP(234)) C Amplitude(s) for diagram number 191 @@ -10438,7 +10441,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,26),W(1,78),W(1,7),GC_11(IVEC),AMP(241)) C Amplitude(s) for diagram number 198 CALL FFV1_0(W(1,49),W(1,78),W(1,5),GC_11(IVEC),AMP(242)) - CALL FFV1_2(W(1,39),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,59)) + CALL FFV1_2(W(1,39),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,59)) C Amplitude(s) for diagram number 199 CALL FFV1_0(W(1,59),W(1,10),W(1,7),GC_11(IVEC),AMP(243)) C Amplitude(s) for diagram number 200 @@ -10457,7 +10460,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,29),W(1,78),W(1,6),GC_11(IVEC),AMP(250)) C Amplitude(s) for diagram number 207 CALL FFV1_0(W(1,41),W(1,78),W(1,5),GC_11(IVEC),AMP(251)) - CALL FFV1_2(W(1,42),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,61)) + CALL FFV1_2(W(1,42),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,61)) C Amplitude(s) for diagram number 208 CALL FFV1_0(W(1,61),W(1,10),W(1,6),GC_11(IVEC),AMP(252)) C Amplitude(s) for diagram number 209 @@ -10585,22 +10588,22 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1_0(W(1,2),W(1,31),W(1,63),GC_10(IVEC),AMP(316)) CALL VVV1_0(W(1,2),W(1,32),W(1,63),GC_10(IVEC),AMP(317)) CALL VVV1_0(W(1,2),W(1,33),W(1,63),GC_10(IVEC),AMP(318)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,63)) - CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,78)) - CALL FFV1_2(W(1,63),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,35)) - CALL FFV1_1(W(1,78),W(1,6),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,63)) + CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,78)) + CALL FFV1_2(W(1,63),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,35)) + CALL FFV1_1(W(1,78),W(1,6),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) C Amplitude(s) for diagram number 247 CALL FFV1_0(W(1,35),W(1,10),W(1,7),GC_11(IVEC),AMP(319)) - CALL FFV1_1(W(1,78),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,86)) + CALL FFV1_1(W(1,78),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,86)) C Amplitude(s) for diagram number 248 CALL FFV1_0(W(1,35),W(1,86),W(1,6),GC_11(IVEC),AMP(320)) - CALL FFV1_2(W(1,63),W(1,6),GC_11(IVEC),MDL_MT, ZERO,W(1,87)) - CALL FFV1_1(W(1,78),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,88)) + CALL FFV1_2(W(1,63),W(1,6),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,87)) + CALL FFV1_1(W(1,78),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,88)) C Amplitude(s) for diagram number 249 CALL FFV1_0(W(1,87),W(1,88),W(1,7),GC_11(IVEC),AMP(321)) C Amplitude(s) for diagram number 250 CALL FFV1_0(W(1,87),W(1,86),W(1,5),GC_11(IVEC),AMP(322)) - CALL FFV1_2(W(1,63),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,89)) + CALL FFV1_2(W(1,63),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,89)) C Amplitude(s) for diagram number 251 CALL FFV1_0(W(1,89),W(1,88),W(1,6),GC_11(IVEC),AMP(323)) C Amplitude(s) for diagram number 252 @@ -10608,14 +10611,16 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1P0_3(W(1,63),W(1,78),GC_11(IVEC),ZERO, FK_ZERO,W(1,90)) C Amplitude(s) for diagram number 253 CALL VVV1_0(W(1,25),W(1,7),W(1,90),GC_10(IVEC),AMP(325)) - CALL FFV1_2(W(1,63),W(1,25),GC_11(IVEC),MDL_MT, ZERO,W(1,91)) + CALL FFV1_2(W(1,63),W(1,25),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,91) + $ ) C Amplitude(s) for diagram number 254 CALL FFV1_0(W(1,91),W(1,78),W(1,7),GC_11(IVEC),AMP(326)) C Amplitude(s) for diagram number 255 CALL FFV1_0(W(1,89),W(1,78),W(1,25),GC_11(IVEC),AMP(327)) C Amplitude(s) for diagram number 256 CALL VVV1_0(W(1,28),W(1,6),W(1,90),GC_10(IVEC),AMP(328)) - CALL FFV1_2(W(1,63),W(1,28),GC_11(IVEC),MDL_MT, ZERO,W(1,92)) + CALL FFV1_2(W(1,63),W(1,28),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,92) + $ ) C Amplitude(s) for diagram number 257 CALL FFV1_0(W(1,92),W(1,78),W(1,6),GC_11(IVEC),AMP(329)) C Amplitude(s) for diagram number 258 @@ -10624,7 +10629,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1_0(W(1,5),W(1,30),W(1,90),GC_10(IVEC),AMP(331)) C Amplitude(s) for diagram number 260 CALL FFV1_0(W(1,35),W(1,78),W(1,30),GC_11(IVEC),AMP(332)) - CALL FFV1_2(W(1,63),W(1,30),GC_11(IVEC),MDL_MT, ZERO,W(1,90)) + CALL FFV1_2(W(1,63),W(1,30),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,90) + $ ) C Amplitude(s) for diagram number 261 CALL FFV1_0(W(1,90),W(1,78),W(1,5),GC_11(IVEC),AMP(333)) C Amplitude(s) for diagram number 262 @@ -10640,7 +10646,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVVV1_0(W(1,62),W(1,6),W(1,7),W(1,93),GC_12(IVEC),AMP(339)) CALL VVVV3_0(W(1,62),W(1,6),W(1,7),W(1,93),GC_12(IVEC),AMP(340)) CALL VVVV4_0(W(1,62),W(1,6),W(1,7),W(1,93),GC_12(IVEC),AMP(341)) - CALL FFV1_1(W(1,3),W(1,62),GC_11(IVEC),MDL_MT, ZERO,W(1,94)) + CALL FFV1_1(W(1,3),W(1,62),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,94)) C Amplitude(s) for diagram number 266 CALL FFV1_0(W(1,87),W(1,94),W(1,7),GC_11(IVEC),AMP(342)) C Amplitude(s) for diagram number 267 @@ -10679,7 +10685,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVVV1_0(W(1,67),W(1,5),W(1,7),W(1,93),GC_12(IVEC),AMP(357)) CALL VVVV3_0(W(1,67),W(1,5),W(1,7),W(1,93),GC_12(IVEC),AMP(358)) CALL VVVV4_0(W(1,67),W(1,5),W(1,7),W(1,93),GC_12(IVEC),AMP(359)) - CALL FFV1_1(W(1,3),W(1,67),GC_11(IVEC),MDL_MT, ZERO,W(1,95)) + CALL FFV1_1(W(1,3),W(1,67),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,95)) C Amplitude(s) for diagram number 282 CALL FFV1_0(W(1,35),W(1,95),W(1,7),GC_11(IVEC),AMP(360)) C Amplitude(s) for diagram number 283 @@ -10717,7 +10723,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVVV1_0(W(1,73),W(1,5),W(1,6),W(1,93),GC_12(IVEC),AMP(375)) CALL VVVV3_0(W(1,73),W(1,5),W(1,6),W(1,93),GC_12(IVEC),AMP(376)) CALL VVVV4_0(W(1,73),W(1,5),W(1,6),W(1,93),GC_12(IVEC),AMP(377)) - CALL FFV1_1(W(1,3),W(1,73),GC_11(IVEC),MDL_MT, ZERO,W(1,98)) + CALL FFV1_1(W(1,3),W(1,73),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,98)) C Amplitude(s) for diagram number 298 CALL FFV1_0(W(1,35),W(1,98),W(1,6),GC_11(IVEC),AMP(378)) C Amplitude(s) for diagram number 299 @@ -10752,7 +10758,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,100),W(1,36),W(1,7),GC_11(IVEC),AMP(391)) C Amplitude(s) for diagram number 312 CALL FFV1_0(W(1,100),W(1,37),W(1,6),GC_11(IVEC),AMP(392)) - CALL FFV1_1(W(1,34),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,101)) + CALL FFV1_1(W(1,34),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,101) + $ ) C Amplitude(s) for diagram number 313 CALL FFV1_0(W(1,87),W(1,101),W(1,7),GC_11(IVEC),AMP(393)) C Amplitude(s) for diagram number 314 @@ -10771,7 +10778,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,100),W(1,44),W(1,7),GC_11(IVEC),AMP(400)) C Amplitude(s) for diagram number 321 CALL FFV1_0(W(1,100),W(1,45),W(1,5),GC_11(IVEC),AMP(401)) - CALL FFV1_1(W(1,40),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,90)) + CALL FFV1_1(W(1,40),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,90)) C Amplitude(s) for diagram number 322 CALL FFV1_0(W(1,35),W(1,90),W(1,7),GC_11(IVEC),AMP(402)) C Amplitude(s) for diagram number 323 @@ -10790,7 +10797,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,100),W(1,50),W(1,6),GC_11(IVEC),AMP(409)) C Amplitude(s) for diagram number 330 CALL FFV1_0(W(1,100),W(1,51),W(1,5),GC_11(IVEC),AMP(410)) - CALL FFV1_1(W(1,48),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,92)) + CALL FFV1_1(W(1,48),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,92)) C Amplitude(s) for diagram number 331 CALL FFV1_0(W(1,35),W(1,92),W(1,6),GC_11(IVEC),AMP(411)) C Amplitude(s) for diagram number 332 @@ -10898,7 +10905,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1_0(W(1,2),W(1,32),W(1,93),GC_10(IVEC),AMP(476)) CALL VVV1_0(W(1,2),W(1,33),W(1,93),GC_10(IVEC),AMP(477)) CALL VVV1P0_1(W(1,1),W(1,5),GC_10(IVEC),ZERO, FK_ZERO,W(1,93)) - CALL FFV1_2(W(1,4),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,100)) + CALL FFV1_2(W(1,4),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,100) + $ ) C Amplitude(s) for diagram number 370 CALL FFV1_0(W(1,100),W(1,10),W(1,7),GC_11(IVEC),AMP(478)) C Amplitude(s) for diagram number 371 @@ -10928,14 +10936,16 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ) C Amplitude(s) for diagram number 377 CALL FFV1_0(W(1,39),W(1,96),W(1,7),GC_11(IVEC),AMP(487)) - CALL FFV1_2(W(1,39),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,99)) + CALL FFV1_2(W(1,39),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,99) + $ ) C Amplitude(s) for diagram number 378 CALL FFV1_0(W(1,99),W(1,78),W(1,7),GC_11(IVEC),AMP(488)) C Amplitude(s) for diagram number 379 CALL FFV1_0(W(1,39),W(1,78),W(1,87),GC_11(IVEC),AMP(489)) C Amplitude(s) for diagram number 380 CALL FFV1_0(W(1,42),W(1,96),W(1,6),GC_11(IVEC),AMP(490)) - CALL FFV1_2(W(1,42),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,102)) + CALL FFV1_2(W(1,42),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1 + $ ,102)) C Amplitude(s) for diagram number 381 CALL FFV1_0(W(1,102),W(1,78),W(1,6),GC_11(IVEC),AMP(491)) C Amplitude(s) for diagram number 382 @@ -10947,7 +10957,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1P0_1(W(1,93),W(1,30),GC_10(IVEC),ZERO, FK_ZERO,W(1,96)) C Amplitude(s) for diagram number 385 CALL FFV1_0(W(1,4),W(1,78),W(1,96),GC_11(IVEC),AMP(495)) - CALL FFV1_1(W(1,3),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,103)) + CALL FFV1_1(W(1,3),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,103) + $ ) C Amplitude(s) for diagram number 386 CALL FFV1_0(W(1,23),W(1,103),W(1,7),GC_11(IVEC),AMP(496)) C Amplitude(s) for diagram number 387 @@ -10969,14 +10980,16 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,105)) C Amplitude(s) for diagram number 393 CALL FFV1_0(W(1,105),W(1,40),W(1,7),GC_11(IVEC),AMP(505)) - CALL FFV1_1(W(1,40),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,106)) + CALL FFV1_1(W(1,40),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1 + $ ,106)) C Amplitude(s) for diagram number 394 CALL FFV1_0(W(1,53),W(1,106),W(1,7),GC_11(IVEC),AMP(506)) C Amplitude(s) for diagram number 395 CALL FFV1_0(W(1,53),W(1,40),W(1,87),GC_11(IVEC),AMP(507)) C Amplitude(s) for diagram number 396 CALL FFV1_0(W(1,105),W(1,48),W(1,6),GC_11(IVEC),AMP(508)) - CALL FFV1_1(W(1,48),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,107)) + CALL FFV1_1(W(1,48),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1 + $ ,107)) C Amplitude(s) for diagram number 397 CALL FFV1_0(W(1,53),W(1,107),W(1,6),GC_11(IVEC),AMP(509)) C Amplitude(s) for diagram number 398 @@ -11219,7 +11232,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1_0(W(1,93),W(1,84),W(1,9),GC_10(IVEC),AMP(635)) CALL VVV1_0(W(1,93),W(1,85),W(1,9),GC_10(IVEC),AMP(636)) CALL VVV1P0_1(W(1,1),W(1,6),GC_10(IVEC),ZERO, FK_ZERO,W(1,93)) - CALL FFV1_2(W(1,4),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,100)) + CALL FFV1_2(W(1,4),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,100) + $ ) C Amplitude(s) for diagram number 493 CALL FFV1_0(W(1,100),W(1,88),W(1,7),GC_11(IVEC),AMP(637)) C Amplitude(s) for diagram number 494 @@ -11248,14 +11262,16 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ) C Amplitude(s) for diagram number 500 CALL FFV1_0(W(1,47),W(1,63),W(1,7),GC_11(IVEC),AMP(646)) - CALL FFV1_2(W(1,47),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,115)) + CALL FFV1_2(W(1,47),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1 + $ ,115)) C Amplitude(s) for diagram number 501 CALL FFV1_0(W(1,115),W(1,78),W(1,7),GC_11(IVEC),AMP(647)) C Amplitude(s) for diagram number 502 CALL FFV1_0(W(1,47),W(1,78),W(1,105),GC_11(IVEC),AMP(648)) C Amplitude(s) for diagram number 503 CALL FFV1_0(W(1,42),W(1,63),W(1,5),GC_11(IVEC),AMP(649)) - CALL FFV1_2(W(1,42),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,114)) + CALL FFV1_2(W(1,42),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1 + $ ,114)) C Amplitude(s) for diagram number 504 CALL FFV1_0(W(1,114),W(1,78),W(1,5),GC_11(IVEC),AMP(650)) C Amplitude(s) for diagram number 505 @@ -11267,7 +11283,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1P0_1(W(1,93),W(1,28),GC_10(IVEC),ZERO, FK_ZERO,W(1,63)) C Amplitude(s) for diagram number 508 CALL FFV1_0(W(1,4),W(1,78),W(1,63),GC_11(IVEC),AMP(654)) - CALL FFV1_1(W(1,3),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,113)) + CALL FFV1_1(W(1,3),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,113) + $ ) C Amplitude(s) for diagram number 509 CALL FFV1_0(W(1,57),W(1,113),W(1,7),GC_11(IVEC),AMP(655)) C Amplitude(s) for diagram number 510 @@ -11288,14 +11305,16 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ) C Amplitude(s) for diagram number 516 CALL FFV1_0(W(1,87),W(1,34),W(1,7),GC_11(IVEC),AMP(664)) - CALL FFV1_1(W(1,34),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,99)) + CALL FFV1_1(W(1,34),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,99) + $ ) C Amplitude(s) for diagram number 517 CALL FFV1_0(W(1,53),W(1,99),W(1,7),GC_11(IVEC),AMP(665)) C Amplitude(s) for diagram number 518 CALL FFV1_0(W(1,53),W(1,34),W(1,105),GC_11(IVEC),AMP(666)) C Amplitude(s) for diagram number 519 CALL FFV1_0(W(1,87),W(1,48),W(1,5),GC_11(IVEC),AMP(667)) - CALL FFV1_1(W(1,48),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,107)) + CALL FFV1_1(W(1,48),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1 + $ ,107)) C Amplitude(s) for diagram number 520 CALL FFV1_0(W(1,53),W(1,107),W(1,5),GC_11(IVEC),AMP(668)) C Amplitude(s) for diagram number 521 @@ -11537,7 +11556,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1_0(W(1,93),W(1,82),W(1,9),GC_10(IVEC),AMP(794)) CALL VVV1_0(W(1,93),W(1,83),W(1,9),GC_10(IVEC),AMP(795)) CALL VVV1P0_1(W(1,1),W(1,7),GC_10(IVEC),ZERO, FK_ZERO,W(1,93)) - CALL FFV1_2(W(1,4),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,100)) + CALL FFV1_2(W(1,4),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,100) + $ ) C Amplitude(s) for diagram number 616 CALL FFV1_0(W(1,100),W(1,88),W(1,6),GC_11(IVEC),AMP(796)) C Amplitude(s) for diagram number 617 @@ -11566,14 +11586,16 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,103)) C Amplitude(s) for diagram number 623 CALL FFV1_0(W(1,47),W(1,103),W(1,6),GC_11(IVEC),AMP(805)) - CALL FFV1_2(W(1,47),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,89)) + CALL FFV1_2(W(1,47),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,89) + $ ) C Amplitude(s) for diagram number 624 CALL FFV1_0(W(1,89),W(1,78),W(1,6),GC_11(IVEC),AMP(806)) C Amplitude(s) for diagram number 625 CALL FFV1_0(W(1,47),W(1,78),W(1,87),GC_11(IVEC),AMP(807)) C Amplitude(s) for diagram number 626 CALL FFV1_0(W(1,39),W(1,103),W(1,5),GC_11(IVEC),AMP(808)) - CALL FFV1_2(W(1,39),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,91)) + CALL FFV1_2(W(1,39),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,91) + $ ) C Amplitude(s) for diagram number 627 CALL FFV1_0(W(1,91),W(1,78),W(1,5),GC_11(IVEC),AMP(809)) C Amplitude(s) for diagram number 628 @@ -11585,7 +11607,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1P0_1(W(1,93),W(1,25),GC_10(IVEC),ZERO, FK_ZERO,W(1,103)) C Amplitude(s) for diagram number 631 CALL FFV1_0(W(1,4),W(1,78),W(1,103),GC_11(IVEC),AMP(813)) - CALL FFV1_1(W(1,3),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,97)) + CALL FFV1_1(W(1,3),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,97)) C Amplitude(s) for diagram number 632 CALL FFV1_0(W(1,57),W(1,97),W(1,6),GC_11(IVEC),AMP(814)) C Amplitude(s) for diagram number 633 @@ -11606,14 +11628,16 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,105)) C Amplitude(s) for diagram number 639 CALL FFV1_0(W(1,105),W(1,34),W(1,6),GC_11(IVEC),AMP(823)) - CALL FFV1_1(W(1,34),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,115)) + CALL FFV1_1(W(1,34),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1 + $ ,115)) C Amplitude(s) for diagram number 640 CALL FFV1_0(W(1,53),W(1,115),W(1,6),GC_11(IVEC),AMP(824)) C Amplitude(s) for diagram number 641 CALL FFV1_0(W(1,53),W(1,34),W(1,87),GC_11(IVEC),AMP(825)) C Amplitude(s) for diagram number 642 CALL FFV1_0(W(1,105),W(1,40),W(1,5),GC_11(IVEC),AMP(826)) - CALL FFV1_1(W(1,40),W(1,93),GC_11(IVEC),MDL_MT, ZERO,W(1,107)) + CALL FFV1_1(W(1,40),W(1,93),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1 + $ ,107)) C Amplitude(s) for diagram number 643 CALL FFV1_0(W(1,53),W(1,107),W(1,5),GC_11(IVEC),AMP(827)) C Amplitude(s) for diagram number 644 @@ -11859,7 +11883,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,8),W(1,93),W(1,7),GC_11(IVEC),AMP(955)) C Amplitude(s) for diagram number 740 CALL FFV1_0(W(1,54),W(1,93),W(1,6),GC_11(IVEC),AMP(956)) - CALL FFV1_2(W(1,47),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,100)) + CALL FFV1_2(W(1,47),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,100) + $ ) C Amplitude(s) for diagram number 741 CALL FFV1_0(W(1,100),W(1,10),W(1,7),GC_11(IVEC),AMP(957)) C Amplitude(s) for diagram number 742 @@ -11879,7 +11904,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,26),W(1,93),W(1,7),GC_11(IVEC),AMP(964)) C Amplitude(s) for diagram number 749 CALL FFV1_0(W(1,49),W(1,93),W(1,5),GC_11(IVEC),AMP(965)) - CALL FFV1_2(W(1,39),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,105)) + CALL FFV1_2(W(1,39),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,105) + $ ) C Amplitude(s) for diagram number 750 CALL FFV1_0(W(1,105),W(1,88),W(1,7),GC_11(IVEC),AMP(966)) C Amplitude(s) for diagram number 751 @@ -11899,7 +11925,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,29),W(1,93),W(1,6),GC_11(IVEC),AMP(973)) C Amplitude(s) for diagram number 758 CALL FFV1_0(W(1,41),W(1,93),W(1,5),GC_11(IVEC),AMP(974)) - CALL FFV1_2(W(1,42),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,63)) + CALL FFV1_2(W(1,42),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,63)) C Amplitude(s) for diagram number 759 CALL FFV1_0(W(1,63),W(1,88),W(1,6),GC_11(IVEC),AMP(975)) C Amplitude(s) for diagram number 760 @@ -11997,7 +12023,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,91),W(1,36),W(1,7),GC_11(IVEC),AMP(1015)) C Amplitude(s) for diagram number 790 CALL FFV1_0(W(1,91),W(1,37),W(1,6),GC_11(IVEC),AMP(1016)) - CALL FFV1_1(W(1,34),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,115)) + CALL FFV1_1(W(1,34),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,115) + $ ) C Amplitude(s) for diagram number 791 CALL FFV1_0(W(1,23),W(1,115),W(1,7),GC_11(IVEC),AMP(1017)) C Amplitude(s) for diagram number 792 @@ -12016,7 +12043,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,91),W(1,44),W(1,7),GC_11(IVEC),AMP(1024)) C Amplitude(s) for diagram number 799 CALL FFV1_0(W(1,91),W(1,45),W(1,5),GC_11(IVEC),AMP(1025)) - CALL FFV1_1(W(1,40),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,103)) + CALL FFV1_1(W(1,40),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,103) + $ ) C Amplitude(s) for diagram number 800 CALL FFV1_0(W(1,57),W(1,103),W(1,7),GC_11(IVEC),AMP(1026)) C Amplitude(s) for diagram number 801 @@ -12035,7 +12063,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,91),W(1,50),W(1,6),GC_11(IVEC),AMP(1033)) C Amplitude(s) for diagram number 808 CALL FFV1_0(W(1,91),W(1,51),W(1,5),GC_11(IVEC),AMP(1034)) - CALL FFV1_1(W(1,48),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,114)) + CALL FFV1_1(W(1,48),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,114) + $ ) C Amplitude(s) for diagram number 809 CALL FFV1_0(W(1,57),W(1,114),W(1,6),GC_11(IVEC),AMP(1035)) C Amplitude(s) for diagram number 810 @@ -13157,9 +13186,9 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,W(1,25)) CALL VVVV4P0_1(W(1,1),W(1,5),W(1,6),GC_12(IVEC),ZERO, FK_ZERO $ ,W(1,21)) - CALL FFV1_2(W(1,4),W(1,61),GC_11(IVEC),MDL_MT, ZERO,W(1,17)) - CALL FFV1_2(W(1,4),W(1,25),GC_11(IVEC),MDL_MT, ZERO,W(1,28)) - CALL FFV1_2(W(1,4),W(1,21),GC_11(IVEC),MDL_MT, ZERO,W(1,99)) + CALL FFV1_2(W(1,4),W(1,61),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,17)) + CALL FFV1_2(W(1,4),W(1,25),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,28)) + CALL FFV1_2(W(1,4),W(1,21),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,99)) C Amplitude(s) for diagram number 1172 CALL FFV1_0(W(1,17),W(1,78),W(1,7),GC_11(IVEC),AMP(1648)) CALL FFV1_0(W(1,28),W(1,78),W(1,7),GC_11(IVEC),AMP(1649)) @@ -13175,9 +13204,9 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,42),W(1,78),W(1,61),GC_11(IVEC),AMP(1654)) CALL FFV1_0(W(1,42),W(1,78),W(1,25),GC_11(IVEC),AMP(1655)) CALL FFV1_0(W(1,42),W(1,78),W(1,21),GC_11(IVEC),AMP(1656)) - CALL FFV1_1(W(1,3),W(1,61),GC_11(IVEC),MDL_MT, ZERO,W(1,60)) - CALL FFV1_1(W(1,3),W(1,25),GC_11(IVEC),MDL_MT, ZERO,W(1,72)) - CALL FFV1_1(W(1,3),W(1,21),GC_11(IVEC),MDL_MT, ZERO,W(1,22)) + CALL FFV1_1(W(1,3),W(1,61),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,60)) + CALL FFV1_1(W(1,3),W(1,25),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,72)) + CALL FFV1_1(W(1,3),W(1,21),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,22)) C Amplitude(s) for diagram number 1175 CALL FFV1_0(W(1,53),W(1,60),W(1,7),GC_11(IVEC),AMP(1657)) CALL FFV1_0(W(1,53),W(1,72),W(1,7),GC_11(IVEC),AMP(1658)) @@ -13245,9 +13274,9 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,W(1,72)) CALL VVVV4P0_1(W(1,1),W(1,5),W(1,7),GC_12(IVEC),ZERO, FK_ZERO $ ,W(1,60)) - CALL FFV1_2(W(1,4),W(1,22),GC_11(IVEC),MDL_MT, ZERO,W(1,25)) - CALL FFV1_2(W(1,4),W(1,72),GC_11(IVEC),MDL_MT, ZERO,W(1,61)) - CALL FFV1_2(W(1,4),W(1,60),GC_11(IVEC),MDL_MT, ZERO,W(1,73)) + CALL FFV1_2(W(1,4),W(1,22),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,25)) + CALL FFV1_2(W(1,4),W(1,72),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,61)) + CALL FFV1_2(W(1,4),W(1,60),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,73)) C Amplitude(s) for diagram number 1188 CALL FFV1_0(W(1,25),W(1,78),W(1,6),GC_11(IVEC),AMP(1702)) CALL FFV1_0(W(1,61),W(1,78),W(1,6),GC_11(IVEC),AMP(1703)) @@ -13263,9 +13292,9 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,39),W(1,78),W(1,22),GC_11(IVEC),AMP(1708)) CALL FFV1_0(W(1,39),W(1,78),W(1,72),GC_11(IVEC),AMP(1709)) CALL FFV1_0(W(1,39),W(1,78),W(1,60),GC_11(IVEC),AMP(1710)) - CALL FFV1_1(W(1,3),W(1,22),GC_11(IVEC),MDL_MT, ZERO,W(1,30)) - CALL FFV1_1(W(1,3),W(1,72),GC_11(IVEC),MDL_MT, ZERO,W(1,69)) - CALL FFV1_1(W(1,3),W(1,60),GC_11(IVEC),MDL_MT, ZERO,W(1,24)) + CALL FFV1_1(W(1,3),W(1,22),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,30)) + CALL FFV1_1(W(1,3),W(1,72),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,69)) + CALL FFV1_1(W(1,3),W(1,60),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,24)) C Amplitude(s) for diagram number 1191 CALL FFV1_0(W(1,53),W(1,30),W(1,6),GC_11(IVEC),AMP(1711)) CALL FFV1_0(W(1,53),W(1,69),W(1,6),GC_11(IVEC),AMP(1712)) @@ -13333,9 +13362,9 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,W(1,69)) CALL VVVV4P0_1(W(1,1),W(1,6),W(1,7),GC_12(IVEC),ZERO, FK_ZERO $ ,W(1,30)) - CALL FFV1_2(W(1,4),W(1,24),GC_11(IVEC),MDL_MT, ZERO,W(1,72)) - CALL FFV1_2(W(1,4),W(1,69),GC_11(IVEC),MDL_MT, ZERO,W(1,22)) - CALL FFV1_2(W(1,4),W(1,30),GC_11(IVEC),MDL_MT, ZERO,W(1,67)) + CALL FFV1_2(W(1,4),W(1,24),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,72)) + CALL FFV1_2(W(1,4),W(1,69),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,22)) + CALL FFV1_2(W(1,4),W(1,30),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,67)) C Amplitude(s) for diagram number 1204 CALL FFV1_0(W(1,72),W(1,78),W(1,5),GC_11(IVEC),AMP(1756)) CALL FFV1_0(W(1,22),W(1,78),W(1,5),GC_11(IVEC),AMP(1757)) @@ -13351,9 +13380,9 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,47),W(1,78),W(1,24),GC_11(IVEC),AMP(1762)) CALL FFV1_0(W(1,47),W(1,78),W(1,69),GC_11(IVEC),AMP(1763)) CALL FFV1_0(W(1,47),W(1,78),W(1,30),GC_11(IVEC),AMP(1764)) - CALL FFV1_1(W(1,3),W(1,24),GC_11(IVEC),MDL_MT, ZERO,W(1,78)) - CALL FFV1_1(W(1,3),W(1,69),GC_11(IVEC),MDL_MT, ZERO,W(1,17)) - CALL FFV1_1(W(1,3),W(1,30),GC_11(IVEC),MDL_MT, ZERO,W(1,28)) + CALL FFV1_1(W(1,3),W(1,24),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,78)) + CALL FFV1_1(W(1,3),W(1,69),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,17)) + CALL FFV1_1(W(1,3),W(1,30),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,28)) C Amplitude(s) for diagram number 1207 CALL FFV1_0(W(1,53),W(1,78),W(1,5),GC_11(IVEC),AMP(1765)) CALL FFV1_0(W(1,53),W(1,17),W(1,5),GC_11(IVEC),AMP(1766)) diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/proc_characteristics index 18d859620b..e7f6392d16 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/proc_characteristics +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/proc_characteristics @@ -8,7 +8,7 @@ ninitial = 2 grouped_matrix = True has_loops = False - bias_module = None + bias_module = dummy max_n_matched_jets = 3 colored_pdgs = [1, 2, 3, 4, 5, 6, 21] complex_mass_scheme = False diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/generate_events b/epochX/cudacpp/gg_ttggg.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/generate_events +++ b/epochX/cudacpp/gg_ttggg.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/py3_model.pkl index afc2ca4e273b368050537e3f722b85c825bbf510..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 54 zcmX?lj_Le4rVZZ9G>RD*81z#TOA_@H%Mx=Ei;FY$-2+0642+ERa}!h2ixLYmOwtQV KBPM4qy8{3ztQ2Sf delta 44 zcmX?qj_K$*rVZZ9 t t~ g g g output madevent gg_ttggg.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - diff --git a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h index 9cea8bcbe7..9b946c21e1 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -934,6 +940,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -945,6 +952,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -970,6 +979,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -984,6 +994,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -995,6 +1006,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1008,6 +1020,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1042,6 +1055,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1080,6 +1094,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1103,6 +1118,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1134,6 +1150,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1165,6 +1182,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1197,6 +1215,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1225,6 +1244,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1260,6 +1280,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1288,6 +1309,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1323,6 +1345,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1351,6 +1374,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 9bee64b205..31573e7e51 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004521608352661133  +DEBUG: model prefixing takes 0.005338430404663086  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,66 +155,35 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.695 s +1 processes with 1240 diagrams generated in 1.850 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 6, 1, 6, 6) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxggg.txt [model_handling.py at line 1336]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 5.784 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.482 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.291 s +ALOHA: aloha creates 5 routines in 0.364 s VVV1 VVV1 FFV1 @@ -227,23 +196,17 @@ ALOHA: aloha creates 5 routines in 0.291 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m11.544s -user 0m11.267s -sys 0m0.097s +real 0m13.206s +user 0m12.699s +sys 0m0.116s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc index 36675814b4..a67b74e5b7 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc @@ -252,13 +252,13 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][6], +1, w_fp[6], 6 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); - VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 0., 0., w_fp[9] ); - VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[7], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[8], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -282,10 +282,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 1240 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[8], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 3 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -332,7 +332,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -352,7 +352,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[9], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -376,11 +376,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 1240 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 0., 0., w_fp[12] ); - VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 0., 0., w_fp[13] ); + VVV1P0_1( w_fp[7], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[8], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -407,7 +407,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -434,7 +434,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -454,7 +454,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -474,7 +474,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[12], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -498,10 +498,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 1240 *** // Wavefunction(s) for diagram number 7 - VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 0., 0., w_fp[14] ); + VVV1P0_1( w_fp[7], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 7 - VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -528,7 +528,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -555,7 +555,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -575,7 +575,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -595,7 +595,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[14], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -619,12 +619,12 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 1240 *** // Wavefunction(s) for diagram number 10 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[15], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -644,7 +644,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -664,7 +664,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -688,12 +688,12 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 1240 *** // Wavefunction(s) for diagram number 11 - VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[18] ); - VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[20] ); + VVVV1P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[18] ); + VVVV3P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[7], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 11 - VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[18], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -713,7 +713,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -733,7 +733,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -757,12 +757,12 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 1240 *** // Wavefunction(s) for diagram number 12 - VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); + VVVV1P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[7], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -782,7 +782,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -802,7 +802,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -826,10 +826,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 1240 *** // Wavefunction(s) for diagram number 13 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 13 - VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -849,7 +849,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -869,7 +869,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[24], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -893,10 +893,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 1240 *** // Wavefunction(s) for diagram number 14 - VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 14 - VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -920,10 +920,10 @@ namespace mg5amcCpu // *** DIAGRAM 15 OF 1240 *** // Wavefunction(s) for diagram number 15 - VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 0., 0., w_fp[26] ); + VVV1P0_1( w_fp[7], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[26] ); // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[26], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -950,7 +950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -974,10 +974,10 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 1240 *** // Wavefunction(s) for diagram number 17 - VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[4], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[27] ); // Amplitude(s) for diagram number 17 - VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -997,7 +997,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1017,7 +1017,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[27], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1044,7 +1044,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1068,10 +1068,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 1240 *** // Wavefunction(s) for diagram number 19 - VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 0., 0., w_fp[28] ); + VVV1P0_1( w_fp[7], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[28] ); // Amplitude(s) for diagram number 19 - VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[28], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1098,7 +1098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[12], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1122,10 +1122,10 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 1240 *** // Wavefunction(s) for diagram number 21 - VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[5], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 21 - VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1145,7 +1145,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1165,7 +1165,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[7], w_fp[8], w_fp[4], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1192,7 +1192,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1219,7 +1219,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1243,10 +1243,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 1240 *** // Wavefunction(s) for diagram number 24 - VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[7], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 24 - VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1270,12 +1270,12 @@ namespace mg5amcCpu // *** DIAGRAM 25 OF 1240 *** // Wavefunction(s) for diagram number 25 - VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[30] ); - VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[31] ); - VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[32] ); + VVVV1P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[30] ); + VVVV3P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[31] ); + VVVV4P0_1( w_fp[4], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[32] ); // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[30], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1295,7 +1295,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[31], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1315,7 +1315,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[8], w_fp[32], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1339,12 +1339,12 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 1240 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[33] ); - FFV1_2( w_fp[3], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[33], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[35] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[33] ); + FFV1_2( w_fp[3], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[33], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[35] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1354,10 +1354,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 1240 *** // Wavefunction(s) for diagram number 27 - FFV1_1( w_fp[33], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[36] ); + FFV1_1( w_fp[33], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[36] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1367,10 +1367,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 1240 *** // Wavefunction(s) for diagram number 28 - FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 0., 0., w_fp[37] ); + FFV1P0_3( w_fp[3], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[37] ); // Amplitude(s) for diagram number 28 - VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1389,7 +1389,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1404,7 +1404,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1423,7 +1423,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1438,7 +1438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1450,7 +1450,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1462,7 +1462,7 @@ namespace mg5amcCpu jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1478,11 +1478,11 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 1240 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[38] ); - FFV1_1( w_fp[33], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[38] ); + FFV1_1( w_fp[33], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1492,10 +1492,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 1240 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[38], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[38], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1508,7 +1508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1520,10 +1520,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 1240 *** // Wavefunction(s) for diagram number 36 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[41] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[41] ); // Amplitude(s) for diagram number 36 - FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1533,10 +1533,10 @@ namespace mg5amcCpu // *** DIAGRAM 37 OF 1240 *** // Wavefunction(s) for diagram number 37 - FFV1_2( w_fp[41], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[42] ); + FFV1_2( w_fp[41], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[42] ); // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1549,7 +1549,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1564,7 +1564,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1579,7 +1579,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1594,7 +1594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[25], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1610,11 +1610,11 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 1240 *** // Wavefunction(s) for diagram number 42 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[39] ); - FFV1_1( w_fp[39], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[43] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[39] ); + FFV1_1( w_fp[39], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[43] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1624,10 +1624,10 @@ namespace mg5amcCpu // *** DIAGRAM 43 OF 1240 *** // Wavefunction(s) for diagram number 43 - FFV1_1( w_fp[39], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[44] ); + FFV1_1( w_fp[39], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[44] ); // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1637,10 +1637,10 @@ namespace mg5amcCpu // *** DIAGRAM 44 OF 1240 *** // Wavefunction(s) for diagram number 44 - FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 0., 0., w_fp[45] ); + FFV1P0_3( w_fp[3], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[45] ); // Amplitude(s) for diagram number 44 - VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1659,7 +1659,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1674,7 +1674,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1693,7 +1693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1708,7 +1708,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1720,7 +1720,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1732,7 +1732,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1748,11 +1748,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 1240 *** // Wavefunction(s) for diagram number 49 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[46] ); - FFV1_1( w_fp[39], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[46] ); + FFV1_1( w_fp[39], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1762,10 +1762,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 1240 *** // Wavefunction(s) for diagram number 50 - FFV1_2( w_fp[46], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[46], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1778,7 +1778,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1793,7 +1793,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1806,7 +1806,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1819,7 +1819,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1834,7 +1834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1849,7 +1849,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1864,7 +1864,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[28], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1880,11 +1880,11 @@ namespace mg5amcCpu // *** DIAGRAM 58 OF 1240 *** // Wavefunction(s) for diagram number 58 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[47] ); - FFV1_1( w_fp[47], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[49] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[47] ); + FFV1_1( w_fp[47], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[49] ); // Amplitude(s) for diagram number 58 - FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1894,10 +1894,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 1240 *** // Wavefunction(s) for diagram number 59 - FFV1_1( w_fp[47], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[50] ); + FFV1_1( w_fp[47], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[50] ); // Amplitude(s) for diagram number 59 - FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1907,10 +1907,10 @@ namespace mg5amcCpu // *** DIAGRAM 60 OF 1240 *** // Wavefunction(s) for diagram number 60 - FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 0., 0., w_fp[51] ); + FFV1P0_3( w_fp[3], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[51] ); // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1929,7 +1929,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1944,7 +1944,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1963,7 +1963,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1978,7 +1978,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[15], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -1990,7 +1990,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2002,7 +2002,7 @@ namespace mg5amcCpu jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2018,10 +2018,10 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 1240 *** // Wavefunction(s) for diagram number 65 - FFV1_1( w_fp[47], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[47], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2034,7 +2034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2047,7 +2047,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2062,7 +2062,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2075,7 +2075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2088,7 +2088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2103,7 +2103,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2118,7 +2118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2133,7 +2133,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[26], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2149,11 +2149,11 @@ namespace mg5amcCpu // *** DIAGRAM 74 OF 1240 *** // Wavefunction(s) for diagram number 74 - FFV1_1( w_fp[2], w_fp[7], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); - FFV1_2( w_fp[46], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[7], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); + FFV1_2( w_fp[46], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 74 - FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2163,10 +2163,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 1240 *** // Wavefunction(s) for diagram number 75 - FFV1_2( w_fp[46], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[53] ); + FFV1_2( w_fp[46], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[53] ); // Amplitude(s) for diagram number 75 - FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2176,10 +2176,10 @@ namespace mg5amcCpu // *** DIAGRAM 76 OF 1240 *** // Wavefunction(s) for diagram number 76 - FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 0., 0., w_fp[54] ); + FFV1P0_3( w_fp[46], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[54] ); // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2198,7 +2198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2213,7 +2213,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2232,7 +2232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2247,7 +2247,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2259,7 +2259,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2271,7 +2271,7 @@ namespace mg5amcCpu jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2290,7 +2290,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[52], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2305,7 +2305,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2320,7 +2320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[25], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2336,10 +2336,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 1240 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[38], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[25] ); + FFV1_2( w_fp[38], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[25] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2349,10 +2349,10 @@ namespace mg5amcCpu // *** DIAGRAM 85 OF 1240 *** // Wavefunction(s) for diagram number 85 - FFV1_2( w_fp[38], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[48] ); + FFV1_2( w_fp[38], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[48] ); // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2362,10 +2362,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 1240 *** // Wavefunction(s) for diagram number 86 - FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 0., 0., w_fp[23] ); + FFV1P0_3( w_fp[38], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2384,7 +2384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2399,7 +2399,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 88 - VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[14], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2418,7 +2418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2433,7 +2433,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2445,7 +2445,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2457,7 +2457,7 @@ namespace mg5amcCpu jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2476,7 +2476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[52], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2491,7 +2491,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2506,7 +2506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[28], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2522,10 +2522,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 1240 *** // Wavefunction(s) for diagram number 94 - FFV1_2( w_fp[41], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[28] ); + FFV1_2( w_fp[41], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[28] ); // Amplitude(s) for diagram number 94 - FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2535,10 +2535,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 1240 *** // Wavefunction(s) for diagram number 95 - FFV1_2( w_fp[41], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[40] ); + FFV1_2( w_fp[41], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[40] ); // Amplitude(s) for diagram number 95 - FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2548,10 +2548,10 @@ namespace mg5amcCpu // *** DIAGRAM 96 OF 1240 *** // Wavefunction(s) for diagram number 96 - FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 0., 0., w_fp[20] ); + FFV1P0_3( w_fp[41], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 96 - VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2570,7 +2570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2585,7 +2585,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2604,7 +2604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2619,7 +2619,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2631,7 +2631,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2643,7 +2643,7 @@ namespace mg5amcCpu jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2662,7 +2662,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 101 - FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[52], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2677,7 +2677,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[42], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2692,7 +2692,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[26], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2708,10 +2708,10 @@ namespace mg5amcCpu // *** DIAGRAM 104 OF 1240 *** // Wavefunction(s) for diagram number 104 - FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[26] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[26] ); // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[52], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2723,10 +2723,10 @@ namespace mg5amcCpu // *** DIAGRAM 105 OF 1240 *** // Wavefunction(s) for diagram number 105 - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[42] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[42] ); // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2742,10 +2742,10 @@ namespace mg5amcCpu // *** DIAGRAM 106 OF 1240 *** // Wavefunction(s) for diagram number 106 - FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2760,7 +2760,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2779,7 +2779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2798,7 +2798,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2814,10 +2814,10 @@ namespace mg5amcCpu // *** DIAGRAM 110 OF 1240 *** // Wavefunction(s) for diagram number 110 - FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[52], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2829,10 +2829,10 @@ namespace mg5amcCpu // *** DIAGRAM 111 OF 1240 *** // Wavefunction(s) for diagram number 111 - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2848,10 +2848,10 @@ namespace mg5amcCpu // *** DIAGRAM 112 OF 1240 *** // Wavefunction(s) for diagram number 112 - FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2866,7 +2866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2885,7 +2885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 114 - FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2904,7 +2904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2920,10 +2920,10 @@ namespace mg5amcCpu // *** DIAGRAM 116 OF 1240 *** // Wavefunction(s) for diagram number 116 - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[52], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2935,10 +2935,10 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 1240 *** // Wavefunction(s) for diagram number 117 - VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 0., 0., w_fp[19] ); + VVV1P0_1( w_fp[4], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[19] ); // Amplitude(s) for diagram number 117 - FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2954,10 +2954,10 @@ namespace mg5amcCpu // *** DIAGRAM 118 OF 1240 *** // Wavefunction(s) for diagram number 118 - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[18] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[18] ); // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2972,7 +2972,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -2991,7 +2991,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3010,7 +3010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3029,7 +3029,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3041,7 +3041,7 @@ namespace mg5amcCpu jamp_sv[25] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[29] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3053,7 +3053,7 @@ namespace mg5amcCpu jamp_sv[26] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[27] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[28] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[52], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3072,7 +3072,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3084,7 +3084,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3096,7 +3096,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3112,13 +3112,13 @@ namespace mg5amcCpu // *** DIAGRAM 124 OF 1240 *** // Wavefunction(s) for diagram number 124 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[52] ); - FFV1_1( w_fp[34], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[52], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[52] ); + FFV1_1( w_fp[34], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[52], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 124 - FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3127,10 +3127,10 @@ namespace mg5amcCpu // *** DIAGRAM 125 OF 1240 *** // Wavefunction(s) for diagram number 125 - FFV1_2( w_fp[52], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[52], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 125 - FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3139,11 +3139,11 @@ namespace mg5amcCpu // *** DIAGRAM 126 OF 1240 *** // Wavefunction(s) for diagram number 126 - FFV1_1( w_fp[34], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[55] ); - FFV1_2( w_fp[52], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[56] ); + FFV1_1( w_fp[34], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[55] ); + FFV1_2( w_fp[52], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[56] ); // Amplitude(s) for diagram number 126 - FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3155,7 +3155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 127 - FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3164,10 +3164,10 @@ namespace mg5amcCpu // *** DIAGRAM 128 OF 1240 *** // Wavefunction(s) for diagram number 128 - FFV1_1( w_fp[34], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[57] ); + FFV1_1( w_fp[34], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[57] ); // Amplitude(s) for diagram number 128 - FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3179,7 +3179,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 129 - FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3188,10 +3188,10 @@ namespace mg5amcCpu // *** DIAGRAM 130 OF 1240 *** // Wavefunction(s) for diagram number 130 - FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 0., 0., w_fp[58] ); + FFV1P0_3( w_fp[52], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[58] ); // Amplitude(s) for diagram number 130 - VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3203,10 +3203,10 @@ namespace mg5amcCpu // *** DIAGRAM 131 OF 1240 *** // Wavefunction(s) for diagram number 131 - FFV1_1( w_fp[34], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[34], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); // Amplitude(s) for diagram number 131 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3219,7 +3219,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 132 - FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[57], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3232,7 +3232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 133 - VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3244,10 +3244,10 @@ namespace mg5amcCpu // *** DIAGRAM 134 OF 1240 *** // Wavefunction(s) for diagram number 134 - FFV1_1( w_fp[34], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_1( w_fp[34], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 134 - FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[60], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3260,7 +3260,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 135 - FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[55], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3273,7 +3273,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 136 - VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[58], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3288,7 +3288,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 137 - FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[9], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3298,10 +3298,10 @@ namespace mg5amcCpu // *** DIAGRAM 138 OF 1240 *** // Wavefunction(s) for diagram number 138 - FFV1_1( w_fp[34], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); + FFV1_1( w_fp[34], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 138 - FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[58], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3314,7 +3314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 139 - FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3322,7 +3322,7 @@ namespace mg5amcCpu jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3330,7 +3330,7 @@ namespace mg5amcCpu jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[34], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3342,12 +3342,12 @@ namespace mg5amcCpu // *** DIAGRAM 140 OF 1240 *** // Wavefunction(s) for diagram number 140 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[61] ); - FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 0., 0., w_fp[63] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[61] ); + FFV1P0_3( w_fp[3], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[61], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 140 - VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3363,10 +3363,10 @@ namespace mg5amcCpu // *** DIAGRAM 141 OF 1240 *** // Wavefunction(s) for diagram number 141 - VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 0., 0., w_fp[64] ); + VVV1P0_1( w_fp[61], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[64] ); // Amplitude(s) for diagram number 141 - VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3385,7 +3385,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 142 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3397,7 +3397,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3409,7 +3409,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3425,10 +3425,10 @@ namespace mg5amcCpu // *** DIAGRAM 143 OF 1240 *** // Wavefunction(s) for diagram number 143 - FFV1_2( w_fp[3], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[65] ); + FFV1_2( w_fp[3], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[65] ); // Amplitude(s) for diagram number 143 - FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3441,7 +3441,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 144 - FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3456,7 +3456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 145 - FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3469,7 +3469,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 146 - FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3481,10 +3481,10 @@ namespace mg5amcCpu // *** DIAGRAM 147 OF 1240 *** // Wavefunction(s) for diagram number 147 - FFV1_1( w_fp[34], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); + FFV1_1( w_fp[34], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 147 - FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[66], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3494,10 +3494,10 @@ namespace mg5amcCpu // *** DIAGRAM 148 OF 1240 *** // Wavefunction(s) for diagram number 148 - FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 0., 0., w_fp[67] ); + FFV1P0_3( w_fp[38], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 148 - VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3512,7 +3512,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 149 - FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[57], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3525,7 +3525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 150 - FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[66], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3535,10 +3535,10 @@ namespace mg5amcCpu // *** DIAGRAM 151 OF 1240 *** // Wavefunction(s) for diagram number 151 - FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 0., 0., w_fp[68] ); + FFV1P0_3( w_fp[41], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 151 - VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3553,7 +3553,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 152 - FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[55], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3566,7 +3566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 153 - FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[66], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3581,7 +3581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 154 - VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3600,7 +3600,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 155 - FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[58], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3612,11 +3612,11 @@ namespace mg5amcCpu // *** DIAGRAM 156 OF 1240 *** // Wavefunction(s) for diagram number 156 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 0., 0., w_fp[69] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[66], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 156 - VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3632,10 +3632,10 @@ namespace mg5amcCpu // *** DIAGRAM 157 OF 1240 *** // Wavefunction(s) for diagram number 157 - VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 0., 0., w_fp[70] ); + VVV1P0_1( w_fp[66], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[70] ); // Amplitude(s) for diagram number 157 - VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3654,7 +3654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 158 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3666,7 +3666,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3678,7 +3678,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3694,10 +3694,10 @@ namespace mg5amcCpu // *** DIAGRAM 159 OF 1240 *** // Wavefunction(s) for diagram number 159 - FFV1_2( w_fp[3], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 159 - FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3710,7 +3710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 160 - FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3725,7 +3725,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 161 - FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3738,7 +3738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 162 - FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3750,10 +3750,10 @@ namespace mg5amcCpu // *** DIAGRAM 163 OF 1240 *** // Wavefunction(s) for diagram number 163 - FFV1_1( w_fp[34], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); + FFV1_1( w_fp[34], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 163 - FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[72], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3763,10 +3763,10 @@ namespace mg5amcCpu // *** DIAGRAM 164 OF 1240 *** // Wavefunction(s) for diagram number 164 - FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 0., 0., w_fp[73] ); + FFV1P0_3( w_fp[46], w_fp[34], COUPs[1], 1.0, 0., 0., w_fp[73] ); // Amplitude(s) for diagram number 164 - VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3781,7 +3781,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 165 - FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[57], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3794,7 +3794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 166 - FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[72], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3807,7 +3807,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 167 - VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3822,7 +3822,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 168 - FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[9], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3835,7 +3835,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 169 - FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[72], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3850,7 +3850,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 170 - VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3869,7 +3869,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 171 - FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[60], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3881,11 +3881,11 @@ namespace mg5amcCpu // *** DIAGRAM 172 OF 1240 *** // Wavefunction(s) for diagram number 172 - VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 0., 0., w_fp[74] ); + VVV1P0_1( w_fp[1], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[72], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[74] ); // Amplitude(s) for diagram number 172 - VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3901,10 +3901,10 @@ namespace mg5amcCpu // *** DIAGRAM 173 OF 1240 *** // Wavefunction(s) for diagram number 173 - VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 0., 0., w_fp[75] ); + VVV1P0_1( w_fp[72], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[75] ); // Amplitude(s) for diagram number 173 - VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3923,7 +3923,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 174 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3935,7 +3935,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3947,7 +3947,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3963,10 +3963,10 @@ namespace mg5amcCpu // *** DIAGRAM 175 OF 1240 *** // Wavefunction(s) for diagram number 175 - FFV1_2( w_fp[3], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[76] ); + FFV1_2( w_fp[3], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[76] ); // Amplitude(s) for diagram number 175 - FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3979,7 +3979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 176 - FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -3994,7 +3994,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 177 - FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4007,7 +4007,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 178 - FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4019,10 +4019,10 @@ namespace mg5amcCpu // *** DIAGRAM 179 OF 1240 *** // Wavefunction(s) for diagram number 179 - FFV1_1( w_fp[34], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 179 - FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4035,7 +4035,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 180 - VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4050,7 +4050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 181 - FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[55], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4063,7 +4063,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 182 - FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4076,7 +4076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 183 - VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4091,7 +4091,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 184 - FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[9], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4104,7 +4104,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 185 - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4119,7 +4119,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 186 - VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4138,7 +4138,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 187 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4150,10 +4150,10 @@ namespace mg5amcCpu // *** DIAGRAM 188 OF 1240 *** // Wavefunction(s) for diagram number 188 - FFV1_1( w_fp[34], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[34], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); // Amplitude(s) for diagram number 188 - FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4165,7 +4165,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 189 - FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4174,10 +4174,10 @@ namespace mg5amcCpu // *** DIAGRAM 190 OF 1240 *** // Wavefunction(s) for diagram number 190 - FFV1_2( w_fp[46], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[78] ); + FFV1_2( w_fp[46], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[78] ); // Amplitude(s) for diagram number 190 - FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[55], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4189,7 +4189,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 191 - FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4201,7 +4201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 192 - FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[57], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4213,7 +4213,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 193 - FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4225,7 +4225,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 194 - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4238,7 +4238,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 195 - VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[73], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4253,7 +4253,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 196 - FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[58], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4266,7 +4266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 197 - FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4278,7 +4278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 198 - FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4287,10 +4287,10 @@ namespace mg5amcCpu // *** DIAGRAM 199 OF 1240 *** // Wavefunction(s) for diagram number 199 - FFV1_2( w_fp[38], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[58] ); + FFV1_2( w_fp[38], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[58] ); // Amplitude(s) for diagram number 199 - FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4302,7 +4302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 200 - FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4314,7 +4314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 201 - FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[57], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4326,7 +4326,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 202 - FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4338,7 +4338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 203 - FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4351,7 +4351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 204 - VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[67], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4366,7 +4366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 205 - FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[60], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4379,7 +4379,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 206 - FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4391,7 +4391,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 207 - FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4400,10 +4400,10 @@ namespace mg5amcCpu // *** DIAGRAM 208 OF 1240 *** // Wavefunction(s) for diagram number 208 - FFV1_2( w_fp[41], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[41], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 208 - FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4415,7 +4415,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 209 - FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4427,7 +4427,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 210 - FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[55], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4439,7 +4439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 211 - FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4451,7 +4451,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 212 - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4464,7 +4464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 213 - VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4479,7 +4479,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 214 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4492,7 +4492,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 215 - FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4505,7 +4505,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 216 - FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4517,10 +4517,10 @@ namespace mg5amcCpu // *** DIAGRAM 217 OF 1240 *** // Wavefunction(s) for diagram number 217 - VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[1], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[59] ); // Amplitude(s) for diagram number 217 - VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4539,7 +4539,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 218 - VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4558,7 +4558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 219 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4570,7 +4570,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4582,7 +4582,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4601,7 +4601,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 220 - FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4616,7 +4616,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 221 - FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[57], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4629,7 +4629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 222 - FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4642,7 +4642,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 223 - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4654,10 +4654,10 @@ namespace mg5amcCpu // *** DIAGRAM 224 OF 1240 *** // Wavefunction(s) for diagram number 224 - VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[1], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 224 - VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4676,7 +4676,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 225 - VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4695,7 +4695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 226 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4707,7 +4707,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4719,7 +4719,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4738,7 +4738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 227 - FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4753,7 +4753,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 228 - FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[55], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4766,7 +4766,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 229 - FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4779,7 +4779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 230 - FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4791,10 +4791,10 @@ namespace mg5amcCpu // *** DIAGRAM 231 OF 1240 *** // Wavefunction(s) for diagram number 231 - VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 0., 0., w_fp[67] ); + VVV1P0_1( w_fp[1], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[67] ); // Amplitude(s) for diagram number 231 - VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4813,7 +4813,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 232 - VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4832,7 +4832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 233 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4844,7 +4844,7 @@ namespace mg5amcCpu jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4856,7 +4856,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4875,7 +4875,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 234 - FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4890,7 +4890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 235 - FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4900,12 +4900,12 @@ namespace mg5amcCpu // *** DIAGRAM 236 OF 1240 *** // Wavefunction(s) for diagram number 236 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[73] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[79] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[80] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[73] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[79] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[80] ); // Amplitude(s) for diagram number 236 - VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4917,7 +4917,7 @@ namespace mg5amcCpu jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4929,7 +4929,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4948,7 +4948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 237 - FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4956,7 +4956,7 @@ namespace mg5amcCpu jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4964,7 +4964,7 @@ namespace mg5amcCpu jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[57], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4979,7 +4979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 238 - FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4987,7 +4987,7 @@ namespace mg5amcCpu jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -4995,7 +4995,7 @@ namespace mg5amcCpu jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[34], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5007,12 +5007,12 @@ namespace mg5amcCpu // *** DIAGRAM 239 OF 1240 *** // Wavefunction(s) for diagram number 239 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[57] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[81] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[82] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[57] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[81] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[82] ); // Amplitude(s) for diagram number 239 - VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5024,7 +5024,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5036,7 +5036,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5055,7 +5055,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 240 - FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5063,7 +5063,7 @@ namespace mg5amcCpu jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5071,7 +5071,7 @@ namespace mg5amcCpu jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[55], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5086,7 +5086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 241 - FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5094,7 +5094,7 @@ namespace mg5amcCpu jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5102,7 +5102,7 @@ namespace mg5amcCpu jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[34], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5114,12 +5114,12 @@ namespace mg5amcCpu // *** DIAGRAM 242 OF 1240 *** // Wavefunction(s) for diagram number 242 - VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[55] ); - VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[83] ); - VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[84] ); + VVVV1P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[55] ); + VVVV3P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[83] ); + VVVV4P0_1( w_fp[1], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[84] ); // Amplitude(s) for diagram number 242 - VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5131,7 +5131,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5143,7 +5143,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5162,7 +5162,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 243 - FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5170,7 +5170,7 @@ namespace mg5amcCpu jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5178,7 +5178,7 @@ namespace mg5amcCpu jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5193,7 +5193,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 244 - FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5201,7 +5201,7 @@ namespace mg5amcCpu jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5209,7 +5209,7 @@ namespace mg5amcCpu jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[34], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5224,7 +5224,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 245 - FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5232,7 +5232,7 @@ namespace mg5amcCpu jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5240,7 +5240,7 @@ namespace mg5amcCpu jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5255,7 +5255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 246 - VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5267,7 +5267,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5279,7 +5279,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5295,13 +5295,13 @@ namespace mg5amcCpu // *** DIAGRAM 247 OF 1240 *** // Wavefunction(s) for diagram number 247 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); - FFV1_2( w_fp[62], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[34] ); - FFV1_1( w_fp[77], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_2( w_fp[62], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[34] ); + FFV1_1( w_fp[77], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 247 - FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5310,10 +5310,10 @@ namespace mg5amcCpu // *** DIAGRAM 248 OF 1240 *** // Wavefunction(s) for diagram number 248 - FFV1_1( w_fp[77], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[85] ); + FFV1_1( w_fp[77], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[85] ); // Amplitude(s) for diagram number 248 - FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5322,11 +5322,11 @@ namespace mg5amcCpu // *** DIAGRAM 249 OF 1240 *** // Wavefunction(s) for diagram number 249 - FFV1_2( w_fp[62], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); - FFV1_1( w_fp[77], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[87] ); + FFV1_2( w_fp[62], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); + FFV1_1( w_fp[77], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[87] ); // Amplitude(s) for diagram number 249 - FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5338,7 +5338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 250 - FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5347,10 +5347,10 @@ namespace mg5amcCpu // *** DIAGRAM 251 OF 1240 *** // Wavefunction(s) for diagram number 251 - FFV1_2( w_fp[62], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[62], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 251 - FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5362,7 +5362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 252 - FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5371,10 +5371,10 @@ namespace mg5amcCpu // *** DIAGRAM 253 OF 1240 *** // Wavefunction(s) for diagram number 253 - FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 0., 0., w_fp[89] ); + FFV1P0_3( w_fp[62], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[89] ); // Amplitude(s) for diagram number 253 - VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5386,10 +5386,10 @@ namespace mg5amcCpu // *** DIAGRAM 254 OF 1240 *** // Wavefunction(s) for diagram number 254 - FFV1_2( w_fp[62], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[62], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 254 - FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5402,7 +5402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 255 - FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5415,7 +5415,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 256 - VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5427,10 +5427,10 @@ namespace mg5amcCpu // *** DIAGRAM 257 OF 1240 *** // Wavefunction(s) for diagram number 257 - FFV1_2( w_fp[62], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); + FFV1_2( w_fp[62], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 257 - FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5443,7 +5443,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 258 - FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5456,7 +5456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 259 - VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[89], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5471,7 +5471,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 260 - FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5481,10 +5481,10 @@ namespace mg5amcCpu // *** DIAGRAM 261 OF 1240 *** // Wavefunction(s) for diagram number 261 - FFV1_2( w_fp[62], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); + FFV1_2( w_fp[62], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 261 - FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5497,7 +5497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 262 - FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5505,7 +5505,7 @@ namespace mg5amcCpu jamp_sv[35] -= amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5513,7 +5513,7 @@ namespace mg5amcCpu jamp_sv[39] += amp_sv[0]; jamp_sv[41] -= amp_sv[0]; jamp_sv[45] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5525,10 +5525,10 @@ namespace mg5amcCpu // *** DIAGRAM 263 OF 1240 *** // Wavefunction(s) for diagram number 263 - FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 0., 0., w_fp[92] ); + FFV1P0_3( w_fp[62], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[92] ); // Amplitude(s) for diagram number 263 - VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5547,7 +5547,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 264 - VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5566,7 +5566,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 265 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5578,7 +5578,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5590,7 +5590,7 @@ namespace mg5amcCpu jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5606,10 +5606,10 @@ namespace mg5amcCpu // *** DIAGRAM 266 OF 1240 *** // Wavefunction(s) for diagram number 266 - FFV1_1( w_fp[2], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[93] ); + FFV1_1( w_fp[2], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[93] ); // Amplitude(s) for diagram number 266 - FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5622,7 +5622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 267 - FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5637,7 +5637,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 268 - FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5650,7 +5650,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 269 - FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5662,10 +5662,10 @@ namespace mg5amcCpu // *** DIAGRAM 270 OF 1240 *** // Wavefunction(s) for diagram number 270 - FFV1_2( w_fp[62], w_fp[61], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); + FFV1_2( w_fp[62], w_fp[61], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 270 - FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5675,10 +5675,10 @@ namespace mg5amcCpu // *** DIAGRAM 271 OF 1240 *** // Wavefunction(s) for diagram number 271 - FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 0., 0., w_fp[95] ); + FFV1P0_3( w_fp[62], w_fp[39], COUPs[1], 1.0, 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 271 - VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5693,7 +5693,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 272 - FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5706,7 +5706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 273 - FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5716,10 +5716,10 @@ namespace mg5amcCpu // *** DIAGRAM 274 OF 1240 *** // Wavefunction(s) for diagram number 274 - FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 0., 0., w_fp[96] ); + FFV1P0_3( w_fp[62], w_fp[47], COUPs[1], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 274 - VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5734,7 +5734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 275 - FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5747,7 +5747,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 276 - FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[94], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5762,7 +5762,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 277 - VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5781,7 +5781,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 278 - FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5796,7 +5796,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 279 - VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5815,7 +5815,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 280 - VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5834,7 +5834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 281 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5846,7 +5846,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5858,7 +5858,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5874,10 +5874,10 @@ namespace mg5amcCpu // *** DIAGRAM 282 OF 1240 *** // Wavefunction(s) for diagram number 282 - FFV1_1( w_fp[2], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[94] ); + FFV1_1( w_fp[2], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[94] ); // Amplitude(s) for diagram number 282 - FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5890,7 +5890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 283 - FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5905,7 +5905,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 284 - FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5918,7 +5918,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 285 - FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5930,10 +5930,10 @@ namespace mg5amcCpu // *** DIAGRAM 286 OF 1240 *** // Wavefunction(s) for diagram number 286 - FFV1_2( w_fp[62], w_fp[66], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); + FFV1_2( w_fp[62], w_fp[66], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 286 - FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5943,10 +5943,10 @@ namespace mg5amcCpu // *** DIAGRAM 287 OF 1240 *** // Wavefunction(s) for diagram number 287 - FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 0., 0., w_fp[98] ); + FFV1P0_3( w_fp[62], w_fp[33], COUPs[1], 1.0, 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 287 - VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5961,7 +5961,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 288 - FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5974,7 +5974,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 289 - FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -5987,7 +5987,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 290 - VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6002,7 +6002,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 291 - FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6015,7 +6015,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 292 - FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[97], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6030,7 +6030,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 293 - VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6049,7 +6049,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 294 - FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6064,7 +6064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 295 - VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6083,7 +6083,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 296 - VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6102,7 +6102,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 297 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6114,7 +6114,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6126,7 +6126,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6142,10 +6142,10 @@ namespace mg5amcCpu // *** DIAGRAM 298 OF 1240 *** // Wavefunction(s) for diagram number 298 - FFV1_1( w_fp[2], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[97] ); + FFV1_1( w_fp[2], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[97] ); // Amplitude(s) for diagram number 298 - FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6158,7 +6158,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 299 - FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6173,7 +6173,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 300 - FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6186,7 +6186,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 301 - FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6198,10 +6198,10 @@ namespace mg5amcCpu // *** DIAGRAM 302 OF 1240 *** // Wavefunction(s) for diagram number 302 - FFV1_2( w_fp[62], w_fp[72], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[72], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 302 - FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6214,7 +6214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 303 - VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6229,7 +6229,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 304 - FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6242,7 +6242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 305 - FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6255,7 +6255,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 306 - VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6270,7 +6270,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 307 - FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6283,7 +6283,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 308 - FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6298,7 +6298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 309 - VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6317,7 +6317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 310 - FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6329,10 +6329,10 @@ namespace mg5amcCpu // *** DIAGRAM 311 OF 1240 *** // Wavefunction(s) for diagram number 311 - FFV1_2( w_fp[62], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[62], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 311 - FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6344,7 +6344,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 312 - FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6353,10 +6353,10 @@ namespace mg5amcCpu // *** DIAGRAM 313 OF 1240 *** // Wavefunction(s) for diagram number 313 - FFV1_1( w_fp[33], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[100] ); + FFV1_1( w_fp[33], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[100] ); // Amplitude(s) for diagram number 313 - FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6368,7 +6368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 314 - FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6380,7 +6380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 315 - FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6392,7 +6392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 316 - FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6404,7 +6404,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 317 - FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6417,7 +6417,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 318 - VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6432,7 +6432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 319 - FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[89], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6445,7 +6445,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 320 - FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6457,7 +6457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 321 - FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6466,10 +6466,10 @@ namespace mg5amcCpu // *** DIAGRAM 322 OF 1240 *** // Wavefunction(s) for diagram number 322 - FFV1_1( w_fp[39], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[89] ); + FFV1_1( w_fp[39], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[89] ); // Amplitude(s) for diagram number 322 - FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6481,7 +6481,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 323 - FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6493,7 +6493,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 324 - FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6505,7 +6505,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 325 - FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6517,7 +6517,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 326 - FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6530,7 +6530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 327 - VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6545,7 +6545,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 328 - FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[91], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6558,7 +6558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 329 - FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6570,7 +6570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 330 - FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6579,10 +6579,10 @@ namespace mg5amcCpu // *** DIAGRAM 331 OF 1240 *** // Wavefunction(s) for diagram number 331 - FFV1_1( w_fp[47], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[91] ); + FFV1_1( w_fp[47], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[91] ); // Amplitude(s) for diagram number 331 - FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6594,7 +6594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 332 - FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6606,7 +6606,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 333 - FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6618,7 +6618,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 334 - FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6630,7 +6630,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 335 - FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6643,7 +6643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 336 - VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6658,7 +6658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 337 - FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6671,7 +6671,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 338 - FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6684,7 +6684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 339 - FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6699,7 +6699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 340 - VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6718,7 +6718,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 341 - VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6737,7 +6737,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 342 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6749,7 +6749,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6761,7 +6761,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6780,7 +6780,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 343 - FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6795,7 +6795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 344 - FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6808,7 +6808,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 345 - FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6821,7 +6821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 346 - FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6836,7 +6836,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 347 - VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6855,7 +6855,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 348 - VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6874,7 +6874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 349 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6886,7 +6886,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6898,7 +6898,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6917,7 +6917,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 350 - FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6932,7 +6932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 351 - FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6945,7 +6945,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 352 - FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6958,7 +6958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 353 - FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6973,7 +6973,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 354 - VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -6992,7 +6992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 355 - VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7011,7 +7011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 356 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7023,7 +7023,7 @@ namespace mg5amcCpu jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7035,7 +7035,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[92], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7054,7 +7054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 357 - FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7069,7 +7069,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 358 - FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7082,7 +7082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 359 - VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7094,7 +7094,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7106,7 +7106,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7125,7 +7125,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 360 - FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7133,7 +7133,7 @@ namespace mg5amcCpu jamp_sv[39] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[87] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7141,7 +7141,7 @@ namespace mg5amcCpu jamp_sv[57] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[81] += amp_sv[0]; - FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7156,7 +7156,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 361 - FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7164,7 +7164,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7172,7 +7172,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[47], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7187,7 +7187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 362 - VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7199,7 +7199,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7211,7 +7211,7 @@ namespace mg5amcCpu jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7230,7 +7230,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 363 - FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7238,7 +7238,7 @@ namespace mg5amcCpu jamp_sv[45] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7246,7 +7246,7 @@ namespace mg5amcCpu jamp_sv[59] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7261,7 +7261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 364 - FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7269,7 +7269,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7277,7 +7277,7 @@ namespace mg5amcCpu jamp_sv[87] += amp_sv[0]; jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7292,7 +7292,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 365 - VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7304,7 +7304,7 @@ namespace mg5amcCpu jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7316,7 +7316,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7335,7 +7335,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 366 - FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7343,7 +7343,7 @@ namespace mg5amcCpu jamp_sv[47] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7351,7 +7351,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[34], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7366,7 +7366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 367 - FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7374,7 +7374,7 @@ namespace mg5amcCpu jamp_sv[59] -= amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7382,7 +7382,7 @@ namespace mg5amcCpu jamp_sv[63] += amp_sv[0]; jamp_sv[65] -= amp_sv[0]; jamp_sv[69] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7397,7 +7397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 368 - FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7405,7 +7405,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7413,7 +7413,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7428,7 +7428,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 369 - VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7440,7 +7440,7 @@ namespace mg5amcCpu jamp_sv[71] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7452,7 +7452,7 @@ namespace mg5amcCpu jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7468,11 +7468,11 @@ namespace mg5amcCpu // *** DIAGRAM 370 OF 1240 *** // Wavefunction(s) for diagram number 370 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 370 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7485,7 +7485,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 371 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7495,11 +7495,11 @@ namespace mg5amcCpu // *** DIAGRAM 372 OF 1240 *** // Wavefunction(s) for diagram number 372 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[62] ); - FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 0., 0., w_fp[34] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[62] ); + FFV1P0_3( w_fp[3], w_fp[77], COUPs[1], 1.0, 0., 0., w_fp[34] ); // Amplitude(s) for diagram number 372 - VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7518,7 +7518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 373 - FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7530,10 +7530,10 @@ namespace mg5amcCpu // *** DIAGRAM 374 OF 1240 *** // Wavefunction(s) for diagram number 374 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 374 - VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7552,7 +7552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 375 - FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7564,12 +7564,12 @@ namespace mg5amcCpu // *** DIAGRAM 376 OF 1240 *** // Wavefunction(s) for diagram number 376 - VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); - VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); + VVVV1P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); + VVVV3P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 376 - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7581,7 +7581,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7593,7 +7593,7 @@ namespace mg5amcCpu jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7609,10 +7609,10 @@ namespace mg5amcCpu // *** DIAGRAM 377 OF 1240 *** // Wavefunction(s) for diagram number 377 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[95] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[95] ); // Amplitude(s) for diagram number 377 - FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[95], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7622,10 +7622,10 @@ namespace mg5amcCpu // *** DIAGRAM 378 OF 1240 *** // Wavefunction(s) for diagram number 378 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 378 - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7638,7 +7638,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 379 - FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7653,7 +7653,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 380 - FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[95], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7663,10 +7663,10 @@ namespace mg5amcCpu // *** DIAGRAM 381 OF 1240 *** // Wavefunction(s) for diagram number 381 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[101] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[101] ); // Amplitude(s) for diagram number 381 - FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7679,7 +7679,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 382 - FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7694,7 +7694,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 383 - FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[95], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7709,7 +7709,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 384 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7721,10 +7721,10 @@ namespace mg5amcCpu // *** DIAGRAM 385 OF 1240 *** // Wavefunction(s) for diagram number 385 - VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 0., 0., w_fp[95] ); + VVV1P0_1( w_fp[92], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[95] ); // Amplitude(s) for diagram number 385 - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7740,10 +7740,10 @@ namespace mg5amcCpu // *** DIAGRAM 386 OF 1240 *** // Wavefunction(s) for diagram number 386 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 386 - FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7756,7 +7756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 387 - FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7766,10 +7766,10 @@ namespace mg5amcCpu // *** DIAGRAM 388 OF 1240 *** // Wavefunction(s) for diagram number 388 - FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 0., 0., w_fp[103] ); + FFV1P0_3( w_fp[52], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[103] ); // Amplitude(s) for diagram number 388 - VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7788,7 +7788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 389 - FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7803,7 +7803,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 390 - VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7822,7 +7822,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 391 - FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7837,7 +7837,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 392 - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7849,7 +7849,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7861,7 +7861,7 @@ namespace mg5amcCpu jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7877,10 +7877,10 @@ namespace mg5amcCpu // *** DIAGRAM 393 OF 1240 *** // Wavefunction(s) for diagram number 393 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 393 - FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7890,10 +7890,10 @@ namespace mg5amcCpu // *** DIAGRAM 394 OF 1240 *** // Wavefunction(s) for diagram number 394 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[105] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[105] ); // Amplitude(s) for diagram number 394 - FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[105], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7906,7 +7906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 395 - FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7921,7 +7921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 396 - FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7931,10 +7931,10 @@ namespace mg5amcCpu // *** DIAGRAM 397 OF 1240 *** // Wavefunction(s) for diagram number 397 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 397 - FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7947,7 +7947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 398 - FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7962,7 +7962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 399 - FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7977,7 +7977,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 400 - FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -7992,7 +7992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 401 - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8011,7 +8011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 402 - FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8026,7 +8026,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 403 - FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8045,7 +8045,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 404 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8060,7 +8060,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 405 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8079,7 +8079,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 406 - FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8098,7 +8098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 407 - FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8117,7 +8117,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 408 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8137,7 +8137,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8157,7 +8157,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8181,10 +8181,10 @@ namespace mg5amcCpu // *** DIAGRAM 409 OF 1240 *** // Wavefunction(s) for diagram number 409 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 409 - VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8208,10 +8208,10 @@ namespace mg5amcCpu // *** DIAGRAM 410 OF 1240 *** // Wavefunction(s) for diagram number 410 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[107] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 410 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8238,7 +8238,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 411 - VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8265,7 +8265,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 412 - FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8284,7 +8284,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 413 - FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8299,7 +8299,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 414 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8314,7 +8314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 415 - FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8333,7 +8333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 416 - FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8348,7 +8348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 417 - FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8363,7 +8363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 418 - FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8378,7 +8378,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 419 - FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8397,7 +8397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 420 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8412,7 +8412,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 421 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8431,7 +8431,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 422 - FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8450,7 +8450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 423 - FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8469,7 +8469,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 424 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8489,7 +8489,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8509,7 +8509,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8533,10 +8533,10 @@ namespace mg5amcCpu // *** DIAGRAM 425 OF 1240 *** // Wavefunction(s) for diagram number 425 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 425 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8563,7 +8563,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 426 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8590,7 +8590,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 427 - VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8617,7 +8617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 428 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8636,7 +8636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 429 - FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[105], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8651,7 +8651,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 430 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8666,7 +8666,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 431 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8685,7 +8685,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 432 - FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8700,7 +8700,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 433 - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8712,10 +8712,10 @@ namespace mg5amcCpu // *** DIAGRAM 434 OF 1240 *** // Wavefunction(s) for diagram number 434 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 434 - VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8742,7 +8742,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 435 - VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8769,7 +8769,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 436 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8789,7 +8789,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8809,7 +8809,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8833,10 +8833,10 @@ namespace mg5amcCpu // *** DIAGRAM 437 OF 1240 *** // Wavefunction(s) for diagram number 437 - VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 0., 0., w_fp[108] ); + VVV1P0_1( w_fp[1], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[108] ); // Amplitude(s) for diagram number 437 - VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8863,7 +8863,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 438 - VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8890,7 +8890,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 439 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8910,7 +8910,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[115] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8930,7 +8930,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[62], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8957,7 +8957,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 440 - VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -8984,7 +8984,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 441 - VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9011,7 +9011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 442 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9031,7 +9031,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9051,7 +9051,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9075,12 +9075,12 @@ namespace mg5amcCpu // *** DIAGRAM 443 OF 1240 *** // Wavefunction(s) for diagram number 443 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 443 - VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[109], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9100,7 +9100,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[110], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9120,7 +9120,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9144,12 +9144,12 @@ namespace mg5amcCpu // *** DIAGRAM 444 OF 1240 *** // Wavefunction(s) for diagram number 444 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[113] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[114] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[113] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[114] ); // Amplitude(s) for diagram number 444 - VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[112], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9169,7 +9169,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9189,7 +9189,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[114], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9216,7 +9216,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 445 - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9236,7 +9236,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9256,7 +9256,7 @@ namespace mg5amcCpu jamp_sv[94] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9283,7 +9283,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 446 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9303,7 +9303,7 @@ namespace mg5amcCpu jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9323,7 +9323,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[116] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9350,7 +9350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 447 - VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9377,7 +9377,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 448 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9404,7 +9404,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 449 - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9431,7 +9431,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 450 - VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9450,7 +9450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 451 - FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9465,7 +9465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 452 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9478,7 +9478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 453 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9491,7 +9491,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 454 - FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9506,7 +9506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 455 - VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9525,7 +9525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 456 - FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9537,7 +9537,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[113], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9549,7 +9549,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[114], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9568,7 +9568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 457 - FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9583,7 +9583,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 458 - FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[105], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9596,7 +9596,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 459 - FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[101], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9609,7 +9609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 460 - VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9628,7 +9628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 461 - FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9643,7 +9643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 462 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9656,7 +9656,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 463 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9669,7 +9669,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 464 - FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9684,7 +9684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 465 - VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9703,7 +9703,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 466 - FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9715,7 +9715,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9727,7 +9727,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9746,7 +9746,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 467 - FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9761,7 +9761,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 468 - FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9774,7 +9774,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 469 - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9787,7 +9787,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 470 - VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9806,7 +9806,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 471 - FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9821,7 +9821,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 472 - FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9834,7 +9834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 473 - FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9847,7 +9847,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 474 - FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9862,7 +9862,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 475 - VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9881,7 +9881,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 476 - FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9893,7 +9893,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[113], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9905,7 +9905,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[114], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9924,7 +9924,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 477 - VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9943,7 +9943,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 478 - FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9958,7 +9958,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 479 - FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9971,7 +9971,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 480 - FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9984,7 +9984,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 481 - FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -9999,7 +9999,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 482 - VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[62], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10018,7 +10018,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 483 - FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10030,7 +10030,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10042,7 +10042,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10061,7 +10061,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 484 - FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10080,7 +10080,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 485 - FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10099,7 +10099,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 486 - FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10118,7 +10118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 487 - FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10133,7 +10133,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 488 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10152,7 +10152,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 489 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10167,7 +10167,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 490 - FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10179,7 +10179,7 @@ namespace mg5amcCpu jamp_sv[49] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[53] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10191,7 +10191,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[51] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10210,7 +10210,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 491 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10222,7 +10222,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10234,7 +10234,7 @@ namespace mg5amcCpu jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10253,7 +10253,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 492 - VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[55], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10273,7 +10273,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[83], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10293,7 +10293,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[84], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10317,11 +10317,11 @@ namespace mg5amcCpu // *** DIAGRAM 493 OF 1240 *** // Wavefunction(s) for diagram number 493 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 493 - FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10334,7 +10334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 494 - FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10344,10 +10344,10 @@ namespace mg5amcCpu // *** DIAGRAM 495 OF 1240 *** // Wavefunction(s) for diagram number 495 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 495 - VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10366,7 +10366,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 496 - FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10378,10 +10378,10 @@ namespace mg5amcCpu // *** DIAGRAM 497 OF 1240 *** // Wavefunction(s) for diagram number 497 - VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 497 - VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10400,7 +10400,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 498 - FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10412,12 +10412,12 @@ namespace mg5amcCpu // *** DIAGRAM 499 OF 1240 *** // Wavefunction(s) for diagram number 499 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 499 - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10429,7 +10429,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10441,7 +10441,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10457,10 +10457,10 @@ namespace mg5amcCpu // *** DIAGRAM 500 OF 1240 *** // Wavefunction(s) for diagram number 500 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 500 - FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[62], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10470,10 +10470,10 @@ namespace mg5amcCpu // *** DIAGRAM 501 OF 1240 *** // Wavefunction(s) for diagram number 501 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 501 - FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10486,7 +10486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 502 - FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10501,7 +10501,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 503 - FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[62], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10511,10 +10511,10 @@ namespace mg5amcCpu // *** DIAGRAM 504 OF 1240 *** // Wavefunction(s) for diagram number 504 - FFV1_2( w_fp[41], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); + FFV1_2( w_fp[41], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 504 - FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10527,7 +10527,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 505 - FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10542,7 +10542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 506 - FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[62], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10557,7 +10557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 507 - FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10569,10 +10569,10 @@ namespace mg5amcCpu // *** DIAGRAM 508 OF 1240 *** // Wavefunction(s) for diagram number 508 - VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[92], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[62] ); // Amplitude(s) for diagram number 508 - FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10588,10 +10588,10 @@ namespace mg5amcCpu // *** DIAGRAM 509 OF 1240 *** // Wavefunction(s) for diagram number 509 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[112] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[112] ); // Amplitude(s) for diagram number 509 - FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10604,7 +10604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 510 - FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10617,7 +10617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 511 - VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10636,7 +10636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 512 - FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10651,7 +10651,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 513 - VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10670,7 +10670,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 514 - FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10685,7 +10685,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 515 - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10697,7 +10697,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10709,7 +10709,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10725,10 +10725,10 @@ namespace mg5amcCpu // *** DIAGRAM 516 OF 1240 *** // Wavefunction(s) for diagram number 516 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[86] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[86] ); // Amplitude(s) for diagram number 516 - FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10738,10 +10738,10 @@ namespace mg5amcCpu // *** DIAGRAM 517 OF 1240 *** // Wavefunction(s) for diagram number 517 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 517 - FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[98], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10754,7 +10754,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 518 - FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10769,7 +10769,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 519 - FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10779,10 +10779,10 @@ namespace mg5amcCpu // *** DIAGRAM 520 OF 1240 *** // Wavefunction(s) for diagram number 520 - FFV1_1( w_fp[47], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[47], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 520 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10795,7 +10795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 521 - FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10810,7 +10810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 522 - FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[86], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10825,7 +10825,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 523 - FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[112], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10840,7 +10840,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 524 - FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10859,7 +10859,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 525 - FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10874,7 +10874,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 526 - FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10893,7 +10893,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 527 - FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10908,7 +10908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 528 - FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10927,7 +10927,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 529 - FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10946,7 +10946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 530 - FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10965,7 +10965,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 531 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -10985,7 +10985,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11005,7 +11005,7 @@ namespace mg5amcCpu jamp_sv[105] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11029,10 +11029,10 @@ namespace mg5amcCpu // *** DIAGRAM 532 OF 1240 *** // Wavefunction(s) for diagram number 532 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 532 - VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11056,10 +11056,10 @@ namespace mg5amcCpu // *** DIAGRAM 533 OF 1240 *** // Wavefunction(s) for diagram number 533 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 533 - VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11086,7 +11086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 534 - VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11113,7 +11113,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 535 - FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11132,7 +11132,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 536 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11147,7 +11147,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 537 - FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11162,7 +11162,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 538 - FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11181,7 +11181,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 539 - FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[112], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11196,7 +11196,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 540 - FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11211,7 +11211,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 541 - FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11226,7 +11226,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 542 - FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11245,7 +11245,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 543 - FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11260,7 +11260,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 544 - FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11279,7 +11279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 545 - FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11298,7 +11298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 546 - FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11317,7 +11317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 547 - VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11337,7 +11337,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11357,7 +11357,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[72], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11381,10 +11381,10 @@ namespace mg5amcCpu // *** DIAGRAM 548 OF 1240 *** // Wavefunction(s) for diagram number 548 - VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 548 - VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11411,7 +11411,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 549 - VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11438,7 +11438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 550 - VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11465,7 +11465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 551 - FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11484,7 +11484,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 552 - FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11499,7 +11499,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 553 - FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11514,7 +11514,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 554 - FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11533,7 +11533,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 555 - FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[112], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11548,7 +11548,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 556 - FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11560,10 +11560,10 @@ namespace mg5amcCpu // *** DIAGRAM 557 OF 1240 *** // Wavefunction(s) for diagram number 557 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 557 - VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11590,7 +11590,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 558 - VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11617,7 +11617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 559 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11637,7 +11637,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11657,7 +11657,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11684,7 +11684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 560 - VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11711,7 +11711,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 561 - VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11738,7 +11738,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 562 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11758,7 +11758,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11778,7 +11778,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[102], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11805,7 +11805,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 563 - VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11832,7 +11832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 564 - VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11859,7 +11859,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 565 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11879,7 +11879,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11899,7 +11899,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11923,12 +11923,12 @@ namespace mg5amcCpu // *** DIAGRAM 566 OF 1240 *** // Wavefunction(s) for diagram number 566 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 566 - VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11948,7 +11948,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11968,7 +11968,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -11992,12 +11992,12 @@ namespace mg5amcCpu // *** DIAGRAM 567 OF 1240 *** // Wavefunction(s) for diagram number 567 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[96] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[88] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[96] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[88] ); // Amplitude(s) for diagram number 567 - VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12017,7 +12017,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12037,7 +12037,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[88], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12064,7 +12064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 568 - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12084,7 +12084,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12104,7 +12104,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[101] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12131,7 +12131,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 569 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12151,7 +12151,7 @@ namespace mg5amcCpu jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12171,7 +12171,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[110] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12198,7 +12198,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 570 - VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12225,7 +12225,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 571 - VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12252,7 +12252,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 572 - VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12279,7 +12279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 573 - VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12298,7 +12298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 574 - FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12313,7 +12313,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 575 - FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12326,7 +12326,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 576 - FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12339,7 +12339,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 577 - FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12354,7 +12354,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 578 - VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12373,7 +12373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 579 - FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12385,7 +12385,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12397,7 +12397,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12416,7 +12416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 580 - FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12431,7 +12431,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 581 - FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[98], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12444,7 +12444,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 582 - FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[113], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12457,7 +12457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 583 - VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12476,7 +12476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 584 - FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12491,7 +12491,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 585 - FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12504,7 +12504,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 586 - FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12517,7 +12517,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 587 - FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12532,7 +12532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 588 - VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12551,7 +12551,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 589 - FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12563,7 +12563,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12575,7 +12575,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12594,7 +12594,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 590 - FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12609,7 +12609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 591 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12622,7 +12622,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 592 - FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[114], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12635,7 +12635,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 593 - VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12654,7 +12654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 594 - FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12669,7 +12669,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 595 - FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[112], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12682,7 +12682,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 596 - FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12695,7 +12695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 597 - FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12710,7 +12710,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 598 - VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12729,7 +12729,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 599 - FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12741,7 +12741,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12753,7 +12753,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12772,7 +12772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 600 - VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12791,7 +12791,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 601 - FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12806,7 +12806,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 602 - FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[112], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12819,7 +12819,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 603 - FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12832,7 +12832,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 604 - FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12847,7 +12847,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 605 - VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[102], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12866,7 +12866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 606 - FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12878,7 +12878,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12890,7 +12890,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12909,7 +12909,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 607 - FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12928,7 +12928,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 608 - FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12947,7 +12947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 609 - FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12966,7 +12966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 610 - FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[112], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -12981,7 +12981,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 611 - FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13000,7 +13000,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 612 - FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13015,7 +13015,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 613 - FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13027,7 +13027,7 @@ namespace mg5amcCpu jamp_sv[73] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13039,7 +13039,7 @@ namespace mg5amcCpu jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[112], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13058,7 +13058,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 614 - FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13070,7 +13070,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13082,7 +13082,7 @@ namespace mg5amcCpu jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13101,7 +13101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 615 - VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[57], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13121,7 +13121,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[81], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13141,7 +13141,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[82], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13165,11 +13165,11 @@ namespace mg5amcCpu // *** DIAGRAM 616 OF 1240 *** // Wavefunction(s) for diagram number 616 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[92] ); - FFV1_2( w_fp[3], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[92] ); + FFV1_2( w_fp[3], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 616 - FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13182,7 +13182,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 617 - FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13192,10 +13192,10 @@ namespace mg5amcCpu // *** DIAGRAM 618 OF 1240 *** // Wavefunction(s) for diagram number 618 - VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 0., 0., w_fp[112] ); + VVV1P0_1( w_fp[92], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[112] ); // Amplitude(s) for diagram number 618 - VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13214,7 +13214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 619 - FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13226,10 +13226,10 @@ namespace mg5amcCpu // *** DIAGRAM 620 OF 1240 *** // Wavefunction(s) for diagram number 620 - VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 0., 0., w_fp[86] ); + VVV1P0_1( w_fp[92], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 620 - VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13248,7 +13248,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 621 - FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13260,12 +13260,12 @@ namespace mg5amcCpu // *** DIAGRAM 622 OF 1240 *** // Wavefunction(s) for diagram number 622 - VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[92], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 622 - FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13277,7 +13277,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13289,7 +13289,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13305,10 +13305,10 @@ namespace mg5amcCpu // *** DIAGRAM 623 OF 1240 *** // Wavefunction(s) for diagram number 623 - FFV1_1( w_fp[77], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[77], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 623 - FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13318,10 +13318,10 @@ namespace mg5amcCpu // *** DIAGRAM 624 OF 1240 *** // Wavefunction(s) for diagram number 624 - FFV1_2( w_fp[46], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[88] ); + FFV1_2( w_fp[46], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[88] ); // Amplitude(s) for diagram number 624 - FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13334,7 +13334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 625 - FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13349,7 +13349,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 626 - FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13359,10 +13359,10 @@ namespace mg5amcCpu // *** DIAGRAM 627 OF 1240 *** // Wavefunction(s) for diagram number 627 - FFV1_2( w_fp[38], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[38], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 627 - FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13375,7 +13375,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 628 - FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13390,7 +13390,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 629 - FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13405,7 +13405,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 630 - FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13417,10 +13417,10 @@ namespace mg5amcCpu // *** DIAGRAM 631 OF 1240 *** // Wavefunction(s) for diagram number 631 - VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 0., 0., w_fp[102] ); + VVV1P0_1( w_fp[92], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[102] ); // Amplitude(s) for diagram number 631 - FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13436,10 +13436,10 @@ namespace mg5amcCpu // *** DIAGRAM 632 OF 1240 *** // Wavefunction(s) for diagram number 632 - FFV1_1( w_fp[2], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[96] ); + FFV1_1( w_fp[2], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[96] ); // Amplitude(s) for diagram number 632 - FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13452,7 +13452,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 633 - FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13465,7 +13465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 634 - VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13484,7 +13484,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 635 - FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13499,7 +13499,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 636 - VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13518,7 +13518,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 637 - FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13533,7 +13533,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 638 - FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13545,7 +13545,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13557,7 +13557,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13573,10 +13573,10 @@ namespace mg5amcCpu // *** DIAGRAM 639 OF 1240 *** // Wavefunction(s) for diagram number 639 - FFV1_2( w_fp[52], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[52], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 639 - FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13586,10 +13586,10 @@ namespace mg5amcCpu // *** DIAGRAM 640 OF 1240 *** // Wavefunction(s) for diagram number 640 - FFV1_1( w_fp[33], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 640 - FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13602,7 +13602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 641 - FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13617,7 +13617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 642 - FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13627,10 +13627,10 @@ namespace mg5amcCpu // *** DIAGRAM 643 OF 1240 *** // Wavefunction(s) for diagram number 643 - FFV1_1( w_fp[39], w_fp[92], COUPs[1], cIPD[0], cIPD[1], w_fp[106] ); + FFV1_1( w_fp[39], w_fp[92], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[106] ); // Amplitude(s) for diagram number 643 - FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[106], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13643,7 +13643,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 644 - FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13658,7 +13658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 645 - FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13673,7 +13673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 646 - FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[96], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13688,7 +13688,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 647 - FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[102], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13707,7 +13707,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 648 - FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13722,7 +13722,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 649 - FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13741,7 +13741,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 650 - FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13756,7 +13756,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 651 - FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13775,7 +13775,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 652 - FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13794,7 +13794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 653 - FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13813,7 +13813,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 654 - VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13833,7 +13833,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13853,7 +13853,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[61], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13877,10 +13877,10 @@ namespace mg5amcCpu // *** DIAGRAM 655 OF 1240 *** // Wavefunction(s) for diagram number 655 - VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 655 - VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13904,10 +13904,10 @@ namespace mg5amcCpu // *** DIAGRAM 656 OF 1240 *** // Wavefunction(s) for diagram number 656 - VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 0., 0., w_fp[113] ); + VVV1P0_1( w_fp[92], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[113] ); // Amplitude(s) for diagram number 656 - VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13934,7 +13934,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 657 - VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13961,7 +13961,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 658 - FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13980,7 +13980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 659 - FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[106], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -13995,7 +13995,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 660 - FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14010,7 +14010,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 661 - FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14029,7 +14029,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 662 - FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[96], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14044,7 +14044,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 663 - FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14059,7 +14059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 664 - FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14074,7 +14074,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 665 - FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14093,7 +14093,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 666 - FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14108,7 +14108,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 667 - FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14127,7 +14127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 668 - FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14146,7 +14146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 669 - FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14165,7 +14165,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 670 - VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14185,7 +14185,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14205,7 +14205,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[66], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14229,10 +14229,10 @@ namespace mg5amcCpu // *** DIAGRAM 671 OF 1240 *** // Wavefunction(s) for diagram number 671 - VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 671 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14259,7 +14259,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 672 - VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14286,7 +14286,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 673 - VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14313,7 +14313,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 674 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14332,7 +14332,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 675 - FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14347,7 +14347,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 676 - FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14362,7 +14362,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 677 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14381,7 +14381,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 678 - FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[96], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14396,7 +14396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 679 - FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14408,10 +14408,10 @@ namespace mg5amcCpu // *** DIAGRAM 680 OF 1240 *** // Wavefunction(s) for diagram number 680 - VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[92], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[104] ); // Amplitude(s) for diagram number 680 - VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14438,7 +14438,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 681 - VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14465,7 +14465,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 682 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14485,7 +14485,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14505,7 +14505,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[104], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14532,7 +14532,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 683 - VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14559,7 +14559,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 684 - VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14586,7 +14586,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 685 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14606,7 +14606,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14626,7 +14626,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[112], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14653,7 +14653,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 686 - VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14680,7 +14680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 687 - VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14707,7 +14707,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 688 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14727,7 +14727,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14747,7 +14747,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[86], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14771,12 +14771,12 @@ namespace mg5amcCpu // *** DIAGRAM 689 OF 1240 *** // Wavefunction(s) for diagram number 689 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[62] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[101] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[62] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 689 - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14796,7 +14796,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14816,7 +14816,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14840,12 +14840,12 @@ namespace mg5amcCpu // *** DIAGRAM 690 OF 1240 *** // Wavefunction(s) for diagram number 690 - VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); - VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); + VVVV3P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[92], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 690 - VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[109], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14865,7 +14865,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[97] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[110], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14885,7 +14885,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14912,7 +14912,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 691 - VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14932,7 +14932,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14952,7 +14952,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14979,7 +14979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 692 - VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -14999,7 +14999,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[96] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15019,7 +15019,7 @@ namespace mg5amcCpu jamp_sv[97] += amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[92], w_fp[1], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15046,7 +15046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 693 - VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15073,7 +15073,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 694 - VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[113], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15100,7 +15100,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 695 - VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[102], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15127,7 +15127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 696 - VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15146,7 +15146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 697 - FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15161,7 +15161,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 698 - FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15174,7 +15174,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 699 - FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15187,7 +15187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 700 - FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15202,7 +15202,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 701 - VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15221,7 +15221,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 702 - FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15233,7 +15233,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15245,7 +15245,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15264,7 +15264,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 703 - FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15279,7 +15279,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 704 - FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15292,7 +15292,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 705 - FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15305,7 +15305,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 706 - VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15324,7 +15324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 707 - FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15339,7 +15339,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 708 - FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15352,7 +15352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 709 - FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15365,7 +15365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 710 - FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15380,7 +15380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 711 - VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15399,7 +15399,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 712 - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15411,7 +15411,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15423,7 +15423,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15442,7 +15442,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 713 - FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15457,7 +15457,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 714 - FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[106], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15470,7 +15470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 715 - FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[88], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15483,7 +15483,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 716 - VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15502,7 +15502,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 717 - FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15517,7 +15517,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 718 - FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[96], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15530,7 +15530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 719 - FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15543,7 +15543,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 720 - FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15558,7 +15558,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 721 - VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[86], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15577,7 +15577,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 722 - FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15589,7 +15589,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15601,7 +15601,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15620,7 +15620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 723 - VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[104], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15639,7 +15639,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 724 - FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15654,7 +15654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 725 - FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[96], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15667,7 +15667,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 726 - FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15680,7 +15680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 727 - FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15695,7 +15695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 728 - VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[112], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15714,7 +15714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 729 - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15726,7 +15726,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15738,7 +15738,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15757,7 +15757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 730 - FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15776,7 +15776,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 731 - FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15795,7 +15795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 732 - FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15814,7 +15814,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 733 - FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[96], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15829,7 +15829,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 734 - FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15848,7 +15848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 735 - FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15863,7 +15863,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 736 - FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15875,7 +15875,7 @@ namespace mg5amcCpu jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15887,7 +15887,7 @@ namespace mg5amcCpu jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[96], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15906,7 +15906,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 737 - FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15918,7 +15918,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15930,7 +15930,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15949,7 +15949,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 738 - VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[73], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15969,7 +15969,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[79], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -15989,7 +15989,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[92], w_fp[80], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16013,10 +16013,10 @@ namespace mg5amcCpu // *** DIAGRAM 739 OF 1240 *** // Wavefunction(s) for diagram number 739 - FFV1_1( w_fp[77], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[92] ); + FFV1_1( w_fp[77], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[92] ); // Amplitude(s) for diagram number 739 - FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16028,7 +16028,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 740 - FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16037,10 +16037,10 @@ namespace mg5amcCpu // *** DIAGRAM 741 OF 1240 *** // Wavefunction(s) for diagram number 741 - FFV1_2( w_fp[46], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[99] ); + FFV1_2( w_fp[46], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[99] ); // Amplitude(s) for diagram number 741 - FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16052,7 +16052,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 742 - FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[85], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16064,7 +16064,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 743 - FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16076,7 +16076,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 744 - FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16088,7 +16088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 745 - FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[92], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16101,7 +16101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 746 - FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16111,10 +16111,10 @@ namespace mg5amcCpu // *** DIAGRAM 747 OF 1240 *** // Wavefunction(s) for diagram number 747 - VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 0., 0., w_fp[96] ); + VVV1P0_1( w_fp[0], w_fp[29], COUPs[0], 1.0, 0., 0., w_fp[96] ); // Amplitude(s) for diagram number 747 - FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16129,7 +16129,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 748 - FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16141,7 +16141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 749 - FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16150,10 +16150,10 @@ namespace mg5amcCpu // *** DIAGRAM 750 OF 1240 *** // Wavefunction(s) for diagram number 750 - FFV1_2( w_fp[38], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[104] ); + FFV1_2( w_fp[38], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[104] ); // Amplitude(s) for diagram number 750 - FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[87], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16165,7 +16165,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 751 - FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[85], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16177,7 +16177,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 752 - FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16189,7 +16189,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 753 - FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16201,7 +16201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 754 - FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[92], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16214,7 +16214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 755 - FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16224,10 +16224,10 @@ namespace mg5amcCpu // *** DIAGRAM 756 OF 1240 *** // Wavefunction(s) for diagram number 756 - VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 0., 0., w_fp[101] ); + VVV1P0_1( w_fp[0], w_fp[27], COUPs[0], 1.0, 0., 0., w_fp[101] ); // Amplitude(s) for diagram number 756 - FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16242,7 +16242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 757 - FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16254,7 +16254,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 758 - FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16263,10 +16263,10 @@ namespace mg5amcCpu // *** DIAGRAM 759 OF 1240 *** // Wavefunction(s) for diagram number 759 - FFV1_2( w_fp[41], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[62] ); + FFV1_2( w_fp[41], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[62] ); // Amplitude(s) for diagram number 759 - FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[87], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16278,7 +16278,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 760 - FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16290,7 +16290,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 761 - FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16302,7 +16302,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 762 - FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16314,7 +16314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 763 - FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[92], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16327,7 +16327,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 764 - FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16337,10 +16337,10 @@ namespace mg5amcCpu // *** DIAGRAM 765 OF 1240 *** // Wavefunction(s) for diagram number 765 - VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[0], w_fp[24], COUPs[0], 1.0, 0., 0., w_fp[98] ); // Amplitude(s) for diagram number 765 - FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16355,7 +16355,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 766 - FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[92], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16368,7 +16368,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 767 - FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16383,7 +16383,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 768 - VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[34], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16402,7 +16402,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 769 - FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[85], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16417,7 +16417,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 770 - VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16436,7 +16436,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 771 - FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[85], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16446,12 +16446,12 @@ namespace mg5amcCpu // *** DIAGRAM 772 OF 1240 *** // Wavefunction(s) for diagram number 772 - VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[85] ); - VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[112] ); - VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[85] ); + VVVV3P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[112] ); + VVVV4P0_1( w_fp[0], w_fp[24], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 772 - FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[85], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16463,7 +16463,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16475,7 +16475,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16494,7 +16494,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 773 - FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[92], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16507,7 +16507,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 774 - FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16522,7 +16522,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 775 - VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[34], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16541,7 +16541,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 776 - FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16556,7 +16556,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 777 - VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16575,7 +16575,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 778 - FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[9], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16585,12 +16585,12 @@ namespace mg5amcCpu // *** DIAGRAM 779 OF 1240 *** // Wavefunction(s) for diagram number 779 - VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[9] ); - VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[110] ); - VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 0., 0., w_fp[109] ); + VVVV1P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[9] ); + VVVV3P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[110] ); + VVVV4P0_1( w_fp[0], w_fp[27], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[109] ); // Amplitude(s) for diagram number 779 - FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16602,7 +16602,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16614,7 +16614,7 @@ namespace mg5amcCpu jamp_sv[37] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16633,7 +16633,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 780 - FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[92], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16646,7 +16646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 781 - FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16661,7 +16661,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 782 - VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[34], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16680,7 +16680,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 783 - FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[87], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16695,7 +16695,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 784 - VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[34], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16714,7 +16714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 785 - FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[87], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16724,12 +16724,12 @@ namespace mg5amcCpu // *** DIAGRAM 786 OF 1240 *** // Wavefunction(s) for diagram number 786 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[87] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[34] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 0., 0., w_fp[86] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[87] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[34] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[86] ); // Amplitude(s) for diagram number 786 - FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[87], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16741,7 +16741,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[34], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16753,7 +16753,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16772,7 +16772,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 787 - FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16780,7 +16780,7 @@ namespace mg5amcCpu jamp_sv[25] -= amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[29] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16788,7 +16788,7 @@ namespace mg5amcCpu jamp_sv[26] += amp_sv[0]; jamp_sv[27] -= amp_sv[0]; jamp_sv[28] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[92], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16800,12 +16800,12 @@ namespace mg5amcCpu // *** DIAGRAM 788 OF 1240 *** // Wavefunction(s) for diagram number 788 - VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 0., 0., w_fp[92] ); - VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 0., 0., w_fp[88] ); - VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 0., 0., w_fp[106] ); + VVV1P0_1( w_fp[0], w_fp[30], COUPs[0], 1.0, 0., 0., w_fp[92] ); + VVV1P0_1( w_fp[0], w_fp[31], COUPs[0], 1.0, 0., 0., w_fp[88] ); + VVV1P0_1( w_fp[0], w_fp[32], COUPs[0], 1.0, 0., 0., w_fp[106] ); // Amplitude(s) for diagram number 788 - FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[92], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16817,7 +16817,7 @@ namespace mg5amcCpu jamp_sv[35] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16829,7 +16829,7 @@ namespace mg5amcCpu jamp_sv[39] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[106], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16845,10 +16845,10 @@ namespace mg5amcCpu // *** DIAGRAM 789 OF 1240 *** // Wavefunction(s) for diagram number 789 - FFV1_2( w_fp[52], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[90] ); + FFV1_2( w_fp[52], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[90] ); // Amplitude(s) for diagram number 789 - FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[35], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16860,7 +16860,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 790 - FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[36], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16869,10 +16869,10 @@ namespace mg5amcCpu // *** DIAGRAM 791 OF 1240 *** // Wavefunction(s) for diagram number 791 - FFV1_1( w_fp[33], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[114] ); + FFV1_1( w_fp[33], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[114] ); // Amplitude(s) for diagram number 791 - FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16884,7 +16884,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 792 - FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16896,7 +16896,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 793 - FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16908,7 +16908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 794 - FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16920,7 +16920,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 795 - FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16933,7 +16933,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 796 - FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[114], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16946,7 +16946,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 797 - FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16961,7 +16961,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 798 - FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[43], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16973,7 +16973,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 799 - FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[44], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16982,10 +16982,10 @@ namespace mg5amcCpu // *** DIAGRAM 800 OF 1240 *** // Wavefunction(s) for diagram number 800 - FFV1_1( w_fp[39], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[102] ); + FFV1_1( w_fp[39], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[102] ); // Amplitude(s) for diagram number 800 - FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -16997,7 +16997,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 801 - FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17009,7 +17009,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 802 - FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17021,7 +17021,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 803 - FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17033,7 +17033,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 804 - FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17046,7 +17046,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 805 - FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[102], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17059,7 +17059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 806 - FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17074,7 +17074,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 807 - FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[49], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17086,7 +17086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 808 - FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[50], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17095,10 +17095,10 @@ namespace mg5amcCpu // *** DIAGRAM 809 OF 1240 *** // Wavefunction(s) for diagram number 809 - FFV1_1( w_fp[47], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[113] ); + FFV1_1( w_fp[47], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[113] ); // Amplitude(s) for diagram number 809 - FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17110,7 +17110,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 810 - FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17122,7 +17122,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 811 - FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17134,7 +17134,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 812 - FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17146,7 +17146,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 813 - FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17159,7 +17159,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 814 - FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[113], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17172,7 +17172,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 815 - FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17187,7 +17187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 816 - FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17200,7 +17200,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 817 - FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17215,7 +17215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 818 - VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[103], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17234,7 +17234,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 819 - FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17249,7 +17249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 820 - VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17268,7 +17268,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 821 - FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17281,7 +17281,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 822 - FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[85], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17293,7 +17293,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[112], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17305,7 +17305,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17324,7 +17324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 823 - FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[15], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17337,7 +17337,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 824 - FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17352,7 +17352,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 825 - VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[103], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17371,7 +17371,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 826 - FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17386,7 +17386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 827 - VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17405,7 +17405,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 828 - FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17418,7 +17418,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 829 - FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17430,7 +17430,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[110], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17442,7 +17442,7 @@ namespace mg5amcCpu jamp_sv[77] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[109], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17461,7 +17461,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 830 - FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[18], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17474,7 +17474,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 831 - FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17489,7 +17489,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 832 - VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[103], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17508,7 +17508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 833 - FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17523,7 +17523,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 834 - VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[103], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17542,7 +17542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 835 - FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[56], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17555,7 +17555,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 836 - FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[87], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17567,7 +17567,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[34], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17579,7 +17579,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[86], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17598,7 +17598,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 837 - FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[30], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17606,7 +17606,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[31], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17614,7 +17614,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[90], w_fp[2], w_fp[32], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17629,7 +17629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 838 - FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[92], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17641,7 +17641,7 @@ namespace mg5amcCpu jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[88], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17653,7 +17653,7 @@ namespace mg5amcCpu jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[106], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17669,10 +17669,10 @@ namespace mg5amcCpu // *** DIAGRAM 839 OF 1240 *** // Wavefunction(s) for diagram number 839 - VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 0., 0., w_fp[90] ); + VVV1P0_1( w_fp[0], w_fp[61], COUPs[0], 1.0, 0., 0., w_fp[90] ); // Amplitude(s) for diagram number 839 - VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17699,7 +17699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 840 - VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[11], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17726,7 +17726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 841 - VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17746,7 +17746,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17766,7 +17766,7 @@ namespace mg5amcCpu jamp_sv[115] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[5], w_fp[6], w_fp[90], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17790,10 +17790,10 @@ namespace mg5amcCpu // *** DIAGRAM 842 OF 1240 *** // Wavefunction(s) for diagram number 842 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[56] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[56] ); // Amplitude(s) for diagram number 842 - VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[63], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17820,7 +17820,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 843 - VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[64], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17847,7 +17847,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 844 - VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17867,7 +17867,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17887,7 +17887,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[61], w_fp[5], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17914,7 +17914,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 845 - VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17941,7 +17941,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 846 - VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17965,12 +17965,12 @@ namespace mg5amcCpu // *** DIAGRAM 847 OF 1240 *** // Wavefunction(s) for diagram number 847 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[103] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[22] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[103] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 847 - VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[103], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -17990,7 +17990,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18010,7 +18010,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18034,12 +18034,12 @@ namespace mg5amcCpu // *** DIAGRAM 848 OF 1240 *** // Wavefunction(s) for diagram number 848 - VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[105] ); - VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 0., 0., w_fp[107] ); + VVVV1P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[105] ); + VVVV3P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[61], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[107] ); // Amplitude(s) for diagram number 848 - VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18059,7 +18059,7 @@ namespace mg5amcCpu jamp_sv[95] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18079,7 +18079,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[98] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18103,12 +18103,12 @@ namespace mg5amcCpu // *** DIAGRAM 849 OF 1240 *** // Wavefunction(s) for diagram number 849 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[115] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[116] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 0., 0., w_fp[117] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[115] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[116] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[117] ); // Amplitude(s) for diagram number 849 - VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[115], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18128,7 +18128,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[116], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18148,7 +18148,7 @@ namespace mg5amcCpu jamp_sv[105] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[6], w_fp[117], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18172,12 +18172,12 @@ namespace mg5amcCpu // *** DIAGRAM 850 OF 1240 *** // Wavefunction(s) for diagram number 850 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[118] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[119] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 0., 0., w_fp[120] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[118] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[119] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[120] ); // Amplitude(s) for diagram number 850 - VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[118], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18197,7 +18197,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[119], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18217,7 +18217,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[5], w_fp[120], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18244,7 +18244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 851 - VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18264,7 +18264,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18284,7 +18284,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[61], w_fp[8], w_fp[29], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18311,7 +18311,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 852 - VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[29], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18338,7 +18338,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 853 - VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[29], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18365,7 +18365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 854 - VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[61], w_fp[8], w_fp[96], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18392,7 +18392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 855 - VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[45], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18411,7 +18411,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 856 - FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[44], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18426,7 +18426,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 857 - FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18439,7 +18439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 858 - FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18454,7 +18454,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 859 - FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18467,7 +18467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 860 - VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18486,7 +18486,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 861 - FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18498,7 +18498,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18510,7 +18510,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18529,7 +18529,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 862 - FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18544,7 +18544,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 863 - FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[102], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18557,7 +18557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 864 - FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[39], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18570,7 +18570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 865 - VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[51], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18589,7 +18589,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 866 - FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[50], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18604,7 +18604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 867 - FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18617,7 +18617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 868 - FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18632,7 +18632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 869 - FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18645,7 +18645,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 870 - VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18664,7 +18664,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 871 - FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[103], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18676,7 +18676,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18688,7 +18688,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18707,7 +18707,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 872 - FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18722,7 +18722,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 873 - FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[113], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18735,7 +18735,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 874 - FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[47], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18748,7 +18748,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 875 - VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[23], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18767,7 +18767,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 876 - FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18782,7 +18782,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 877 - FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[93], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18795,7 +18795,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 878 - FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[64], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18810,7 +18810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 879 - FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18823,7 +18823,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 880 - VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[64], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18842,7 +18842,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 881 - FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[105], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18854,7 +18854,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[95], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18866,7 +18866,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[107], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18885,7 +18885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 882 - VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[90], w_fp[20], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18904,7 +18904,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 883 - FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18919,7 +18919,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 884 - FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[93], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18932,7 +18932,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 885 - FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18947,7 +18947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 886 - FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18960,7 +18960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 887 - VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[63], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18979,7 +18979,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 888 - FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -18991,7 +18991,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19003,7 +19003,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19022,7 +19022,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 889 - FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[18], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19041,7 +19041,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 890 - FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19060,7 +19060,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 891 - FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[93], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19079,7 +19079,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 892 - FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19098,7 +19098,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 893 - FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[93], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19113,7 +19113,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 894 - FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[65], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19125,10 +19125,10 @@ namespace mg5amcCpu // *** DIAGRAM 895 OF 1240 *** // Wavefunction(s) for diagram number 895 - VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 0., 0., w_fp[65] ); + VVV1P0_1( w_fp[0], w_fp[66], COUPs[0], 1.0, 0., 0., w_fp[65] ); // Amplitude(s) for diagram number 895 - VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[13], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19155,7 +19155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 896 - VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[11], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19182,7 +19182,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 897 - VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19202,7 +19202,7 @@ namespace mg5amcCpu jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19222,7 +19222,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[6], w_fp[65], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19249,7 +19249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 898 - VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[69], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19276,7 +19276,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 899 - VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[70], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19303,7 +19303,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 900 - VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19323,7 +19323,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19343,7 +19343,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[107] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[66], w_fp[4], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19370,7 +19370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 901 - VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19397,7 +19397,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 902 - VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19421,12 +19421,12 @@ namespace mg5amcCpu // *** DIAGRAM 903 OF 1240 *** // Wavefunction(s) for diagram number 903 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[93] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[90] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[93] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 903 - VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[93], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19446,7 +19446,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19466,7 +19466,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19490,12 +19490,12 @@ namespace mg5amcCpu // *** DIAGRAM 904 OF 1240 *** // Wavefunction(s) for diagram number 904 - VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[22] ); - VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[103] ); - VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 0., 0., w_fp[63] ); + VVVV1P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[22] ); + VVVV3P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[103] ); + VVVV4P0_1( w_fp[0], w_fp[66], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[63] ); // Amplitude(s) for diagram number 904 - VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19515,7 +19515,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[103], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19535,7 +19535,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[97] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[63], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19559,12 +19559,12 @@ namespace mg5amcCpu // *** DIAGRAM 905 OF 1240 *** // Wavefunction(s) for diagram number 905 - VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[107] ); - VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[95] ); - VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 0., 0., w_fp[105] ); + VVVV1P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[107] ); + VVVV3P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[95] ); + VVVV4P0_1( w_fp[0], w_fp[8], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[105] ); // Amplitude(s) for diagram number 905 - VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19584,7 +19584,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19604,7 +19604,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[6], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19631,7 +19631,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 906 - VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[118], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19651,7 +19651,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[119], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19671,7 +19671,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; jamp_sv[100] -= amp_sv[0]; - VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[4], w_fp[120], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19698,7 +19698,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 907 - VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19718,7 +19718,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19738,7 +19738,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[66], w_fp[8], w_fp[27], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19765,7 +19765,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 908 - VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[27], w_fp[65], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19792,7 +19792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 909 - VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[27], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19819,7 +19819,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 910 - VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[66], w_fp[8], w_fp[101], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19846,7 +19846,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 911 - VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[37], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19865,7 +19865,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 912 - FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[36], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19880,7 +19880,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 913 - FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19893,7 +19893,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 914 - FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19908,7 +19908,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 915 - FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19921,7 +19921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 916 - VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19940,7 +19940,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 917 - FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19952,7 +19952,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[103], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19964,7 +19964,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19983,7 +19983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 918 - FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -19998,7 +19998,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 919 - FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[114], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20011,7 +20011,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 920 - FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[33], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20024,7 +20024,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 921 - VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[51], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20043,7 +20043,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 922 - FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[49], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20058,7 +20058,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 923 - FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20071,7 +20071,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 924 - FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20086,7 +20086,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 925 - FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20099,7 +20099,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 926 - VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20118,7 +20118,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 927 - FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[93], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20130,7 +20130,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20142,7 +20142,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20161,7 +20161,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 928 - FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20176,7 +20176,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 929 - FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[113], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20189,7 +20189,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 930 - FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[47], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20202,7 +20202,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 931 - VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[54], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20221,7 +20221,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 932 - FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20236,7 +20236,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 933 - FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[94], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20249,7 +20249,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 934 - FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[70], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20264,7 +20264,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 935 - FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20277,7 +20277,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 936 - VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[70], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20296,7 +20296,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 937 - FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20308,7 +20308,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[103], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20320,7 +20320,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[63], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20339,7 +20339,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 938 - VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[65], w_fp[20], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20358,7 +20358,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 939 - FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20373,7 +20373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 940 - FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[94], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20386,7 +20386,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 941 - FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20401,7 +20401,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 942 - FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20414,7 +20414,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 943 - VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[69], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20433,7 +20433,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 944 - FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20445,7 +20445,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20457,7 +20457,7 @@ namespace mg5amcCpu jamp_sv[50] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20476,7 +20476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 945 - FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[15], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20495,7 +20495,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 946 - FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20514,7 +20514,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 947 - FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[94], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20533,7 +20533,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 948 - FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20552,7 +20552,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 949 - FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[94], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20567,7 +20567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 950 - FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20579,10 +20579,10 @@ namespace mg5amcCpu // *** DIAGRAM 951 OF 1240 *** // Wavefunction(s) for diagram number 951 - VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[0], w_fp[72], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 951 - VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[13], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20609,7 +20609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 952 - VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[10], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20636,7 +20636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 953 - VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20656,7 +20656,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20676,7 +20676,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[8], w_fp[4], w_fp[5], w_fp[71], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20703,7 +20703,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 954 - VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[74], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20730,7 +20730,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 955 - VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[75], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20757,7 +20757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 956 - VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20777,7 +20777,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20797,7 +20797,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[72], w_fp[4], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20824,7 +20824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 957 - VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20851,7 +20851,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 958 - VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20875,12 +20875,12 @@ namespace mg5amcCpu // *** DIAGRAM 959 OF 1240 *** // Wavefunction(s) for diagram number 959 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[94] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[65] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[94] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[65] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 959 - VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[94], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20900,7 +20900,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[65], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20920,7 +20920,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20944,12 +20944,12 @@ namespace mg5amcCpu // *** DIAGRAM 960 OF 1240 *** // Wavefunction(s) for diagram number 960 - VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[90] ); - VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[93] ); - VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 0., 0., w_fp[69] ); + VVVV1P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[90] ); + VVVV3P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[93] ); + VVVV4P0_1( w_fp[0], w_fp[72], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[69] ); // Amplitude(s) for diagram number 960 - VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[90], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20969,7 +20969,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[93], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -20989,7 +20989,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[69], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21016,7 +21016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 961 - VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21036,7 +21036,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21056,7 +21056,7 @@ namespace mg5amcCpu jamp_sv[93] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[5], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21083,7 +21083,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 962 - VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[115], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21103,7 +21103,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[116], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21123,7 +21123,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; - VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[4], w_fp[117], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21150,7 +21150,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 963 - VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21170,7 +21170,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21190,7 +21190,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[72], w_fp[8], w_fp[24], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21217,7 +21217,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 964 - VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[24], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21244,7 +21244,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 965 - VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[24], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21271,7 +21271,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 966 - VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[72], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21298,7 +21298,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 967 - VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[37], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21317,7 +21317,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 968 - FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[35], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21332,7 +21332,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 969 - FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21345,7 +21345,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 970 - FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21360,7 +21360,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 971 - FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21373,7 +21373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 972 - VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21392,7 +21392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 973 - FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21404,7 +21404,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[93], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21416,7 +21416,7 @@ namespace mg5amcCpu jamp_sv[61] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21435,7 +21435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 974 - FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21450,7 +21450,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 975 - FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[114], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21463,7 +21463,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 976 - FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[33], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21476,7 +21476,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 977 - VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[45], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21495,7 +21495,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 978 - FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[43], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21510,7 +21510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 979 - FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21523,7 +21523,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 980 - FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21538,7 +21538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 981 - FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21551,7 +21551,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 982 - VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21570,7 +21570,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 983 - FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[94], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21582,7 +21582,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21594,7 +21594,7 @@ namespace mg5amcCpu jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21613,7 +21613,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 984 - FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21628,7 +21628,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 985 - FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[102], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21641,7 +21641,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 986 - FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[39], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21654,7 +21654,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 987 - VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[54], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21673,7 +21673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 988 - FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21688,7 +21688,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 989 - FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[97], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21701,7 +21701,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 990 - FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[75], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21716,7 +21716,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 991 - FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21729,7 +21729,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 992 - VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[75], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21748,7 +21748,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 993 - FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[90], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21760,7 +21760,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[93], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21772,7 +21772,7 @@ namespace mg5amcCpu jamp_sv[76] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[69], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21791,7 +21791,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 994 - VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[23], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21810,7 +21810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 995 - FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21825,7 +21825,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 996 - FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[97], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21838,7 +21838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 997 - FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[74], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21853,7 +21853,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 998 - FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21866,7 +21866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 999 - VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[74], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21885,7 +21885,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1000 - FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[94], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21897,7 +21897,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[65], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21909,7 +21909,7 @@ namespace mg5amcCpu jamp_sv[52] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21928,7 +21928,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1001 - FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21947,7 +21947,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1002 - FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21966,7 +21966,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1003 - FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[97], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -21985,7 +21985,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1004 - FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22004,7 +22004,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1005 - FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[97], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22019,7 +22019,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1006 - FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[76], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22034,7 +22034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1007 - VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[59], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22061,7 +22061,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1008 - VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22088,7 +22088,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1009 - VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22108,7 +22108,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22128,7 +22128,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[24], w_fp[6], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22155,7 +22155,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1010 - VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[108], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22182,7 +22182,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1011 - VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22209,7 +22209,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1012 - VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22229,7 +22229,7 @@ namespace mg5amcCpu jamp_sv[101] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22249,7 +22249,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[6], w_fp[98], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22276,7 +22276,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1013 - VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22303,7 +22303,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1014 - VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22327,12 +22327,12 @@ namespace mg5amcCpu // *** DIAGRAM 1015 OF 1240 *** // Wavefunction(s) for diagram number 1015 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[11] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 0., 0., w_fp[76] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[11] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[8], COUPs[2], 1.0, 0., 0., w_fp[76] ); // Amplitude(s) for diagram number 1015 - VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22352,7 +22352,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22372,7 +22372,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[6], w_fp[76], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22396,12 +22396,12 @@ namespace mg5amcCpu // *** DIAGRAM 1016 OF 1240 *** // Wavefunction(s) for diagram number 1016 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[97] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[97] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[24], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1016 - VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[97], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22421,7 +22421,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22441,7 +22441,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22468,7 +22468,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1017 - VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[118], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22488,7 +22488,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[119], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22508,7 +22508,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[24], w_fp[120], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22535,7 +22535,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1018 - VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[85], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22555,7 +22555,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[112], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22575,7 +22575,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22602,7 +22602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1019 - VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[68], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22629,7 +22629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1020 - VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22656,7 +22656,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1021 - VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22676,7 +22676,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22696,7 +22696,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[27], w_fp[5], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22723,7 +22723,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1022 - VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[108], w_fp[5], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22750,7 +22750,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1023 - VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22777,7 +22777,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1024 - VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22797,7 +22797,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22817,7 +22817,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[5], w_fp[101], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22844,7 +22844,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1025 - VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22871,7 +22871,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1026 - VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22898,7 +22898,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1027 - VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22918,7 +22918,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22938,7 +22938,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[5], w_fp[76], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22962,12 +22962,12 @@ namespace mg5amcCpu // *** DIAGRAM 1028 OF 1240 *** // Wavefunction(s) for diagram number 1028 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[16] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[16] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[27], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1028 - VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -22987,7 +22987,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23007,7 +23007,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23034,7 +23034,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1029 - VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[115], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23054,7 +23054,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[116], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23074,7 +23074,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[27], w_fp[117], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23101,7 +23101,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1030 - VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23121,7 +23121,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[110], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23141,7 +23141,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[109], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23168,7 +23168,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1031 - VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[67], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23195,7 +23195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1032 - VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[56], w_fp[1], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23222,7 +23222,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1033 - VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23242,7 +23242,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23262,7 +23262,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[4], w_fp[29], w_fp[56], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23289,7 +23289,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1034 - VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[108], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23316,7 +23316,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1035 - VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23343,7 +23343,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1036 - VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23363,7 +23363,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23383,7 +23383,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[1], w_fp[8], w_fp[4], w_fp[96], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23410,7 +23410,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1037 - VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[108], w_fp[19], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23437,7 +23437,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1038 - VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[13], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23464,7 +23464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1039 - VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23484,7 +23484,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23504,7 +23504,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[29], w_fp[76], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23528,12 +23528,12 @@ namespace mg5amcCpu // *** DIAGRAM 1040 OF 1240 *** // Wavefunction(s) for diagram number 1040 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[76] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[42] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 0., 0., w_fp[11] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[76] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[42] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[29], COUPs[2], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 1040 - VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[76], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23553,7 +23553,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[42], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23573,7 +23573,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[90] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23600,7 +23600,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1041 - VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[107], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23620,7 +23620,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[95], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23640,7 +23640,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[29], w_fp[105], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23667,7 +23667,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1042 - VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[87], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23687,7 +23687,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[34], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23707,7 +23707,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[86], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23734,7 +23734,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1043 - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23754,7 +23754,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[118] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23774,7 +23774,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[30], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23794,7 +23794,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23814,7 +23814,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23834,7 +23834,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[31], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23854,7 +23854,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23874,7 +23874,7 @@ namespace mg5amcCpu jamp_sv[113] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23894,7 +23894,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[8], w_fp[32], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23921,7 +23921,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1044 - VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[30], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23941,7 +23941,7 @@ namespace mg5amcCpu jamp_sv[71] -= amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[31], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23961,7 +23961,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[95] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[32], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -23988,7 +23988,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1045 - VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[92], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24008,7 +24008,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[88], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24028,7 +24028,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[106], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24055,7 +24055,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1046 - FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[114], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24067,7 +24067,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1047 - FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24079,7 +24079,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1048 - FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[100], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24091,7 +24091,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1049 - FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[36], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24103,7 +24103,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1050 - FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[48], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24115,7 +24115,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1051 - FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[36], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24127,7 +24127,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1052 - FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[114], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24139,7 +24139,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1053 - FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24151,7 +24151,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1054 - FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[100], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24163,7 +24163,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1055 - FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[35], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24175,7 +24175,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1056 - FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[40], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24187,7 +24187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1057 - FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[35], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24199,7 +24199,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1058 - FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24214,7 +24214,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1059 - FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[114], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24227,7 +24227,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1060 - FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[100], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24242,7 +24242,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1061 - VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24261,7 +24261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1062 - FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[100], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24274,7 +24274,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1063 - VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[37], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24293,7 +24293,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1064 - FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[76], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24305,7 +24305,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24317,7 +24317,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24336,7 +24336,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1065 - FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[102], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24348,7 +24348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1066 - FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24360,7 +24360,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1067 - FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[89], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24372,7 +24372,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1068 - FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[44], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24384,7 +24384,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1069 - FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[53], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24396,7 +24396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1070 - FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[44], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24408,7 +24408,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1071 - FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[102], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24420,7 +24420,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1072 - FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24432,7 +24432,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1073 - FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[89], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24444,7 +24444,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1074 - FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[43], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24456,7 +24456,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1075 - FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[28], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24468,7 +24468,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1076 - FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[43], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24480,7 +24480,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1077 - FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24495,7 +24495,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1078 - FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[102], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24508,7 +24508,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1079 - FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[89], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24523,7 +24523,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1080 - VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24542,7 +24542,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1081 - FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[89], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24555,7 +24555,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1082 - VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[45], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24574,7 +24574,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1083 - FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24586,7 +24586,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24598,7 +24598,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24617,7 +24617,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1084 - FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[113], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24629,7 +24629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1085 - FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24641,7 +24641,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1086 - FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[91], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24653,7 +24653,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1087 - FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[50], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24665,7 +24665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1088 - FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[7], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24677,7 +24677,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1089 - FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[50], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24689,7 +24689,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1090 - FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[113], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24701,7 +24701,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1091 - FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24713,7 +24713,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1092 - FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[91], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24725,7 +24725,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1093 - FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[49], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24737,7 +24737,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1094 - FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[25], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24749,7 +24749,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1095 - FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[49], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24761,7 +24761,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1096 - FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24776,7 +24776,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1097 - FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[113], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24789,7 +24789,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1098 - FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[91], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24804,7 +24804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1099 - VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24823,7 +24823,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1100 - FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[26], w_fp[91], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24836,7 +24836,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1101 - VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[51], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24855,7 +24855,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1102 - FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24867,7 +24867,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24879,7 +24879,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24898,7 +24898,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1103 - FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[67], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24913,7 +24913,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1104 - FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[18], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24926,7 +24926,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1105 - FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[2], w_fp[96], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24941,7 +24941,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1106 - VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[96], w_fp[1], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24960,7 +24960,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1107 - FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[78], w_fp[18], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24973,7 +24973,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1108 - VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[67], w_fp[54], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -24992,7 +24992,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1109 - FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[76], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25004,7 +25004,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[42], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25016,7 +25016,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25035,7 +25035,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1110 - FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25050,7 +25050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1111 - FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[15], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25063,7 +25063,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1112 - FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[2], w_fp[101], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25078,7 +25078,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1113 - VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[101], w_fp[1], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25097,7 +25097,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1114 - FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[58], w_fp[15], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25110,7 +25110,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1115 - VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[68], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25129,7 +25129,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1116 - FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25141,7 +25141,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25153,7 +25153,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25172,7 +25172,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1117 - FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25187,7 +25187,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1118 - FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[17], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25200,7 +25200,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1119 - FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25215,7 +25215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1120 - VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[1], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25234,7 +25234,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1121 - FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[17], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25247,7 +25247,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1122 - VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[59], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25266,7 +25266,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1123 - FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[97], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25278,7 +25278,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25290,7 +25290,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25306,12 +25306,12 @@ namespace mg5amcCpu // *** DIAGRAM 1124 OF 1240 *** // Wavefunction(s) for diagram number 1124 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[97] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[97] ); // Amplitude(s) for diagram number 1124 - VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25331,7 +25331,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25351,7 +25351,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25371,7 +25371,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25391,7 +25391,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25411,7 +25411,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25431,7 +25431,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25451,7 +25451,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25471,7 +25471,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[118] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[97], w_fp[8], w_fp[5], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25495,12 +25495,12 @@ namespace mg5amcCpu // *** DIAGRAM 1125 OF 1240 *** // Wavefunction(s) for diagram number 1125 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); - VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[97], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[60] ); // Amplitude(s) for diagram number 1125 - VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[59], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25520,7 +25520,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25540,7 +25540,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25564,12 +25564,12 @@ namespace mg5amcCpu // *** DIAGRAM 1126 OF 1240 *** // Wavefunction(s) for diagram number 1126 - VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 0., 0., w_fp[111] ); + VVV1P0_1( w_fp[21], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[71], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[97], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1126 - VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25589,7 +25589,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25609,7 +25609,7 @@ namespace mg5amcCpu jamp_sv[102] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[111], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25636,7 +25636,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1127 - VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25656,7 +25656,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25676,7 +25676,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[97], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25700,12 +25700,12 @@ namespace mg5amcCpu // *** DIAGRAM 1128 OF 1240 *** // Wavefunction(s) for diagram number 1128 - FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); - FFV1_2( w_fp[3], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); // Amplitude(s) for diagram number 1128 - FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25713,7 +25713,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25721,7 +25721,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[93] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[39], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25736,7 +25736,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1129 - FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25748,7 +25748,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25760,7 +25760,7 @@ namespace mg5amcCpu jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25779,7 +25779,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1130 - FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25787,7 +25787,7 @@ namespace mg5amcCpu jamp_sv[74] -= amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25795,7 +25795,7 @@ namespace mg5amcCpu jamp_sv[78] += amp_sv[0]; jamp_sv[80] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[39], w_fp[97], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25810,7 +25810,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1131 - FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25818,7 +25818,7 @@ namespace mg5amcCpu jamp_sv[115] -= amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25826,7 +25826,7 @@ namespace mg5amcCpu jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[47], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25841,7 +25841,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1132 - FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25853,7 +25853,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25865,7 +25865,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25884,7 +25884,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1133 - FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25892,7 +25892,7 @@ namespace mg5amcCpu jamp_sv[98] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25900,7 +25900,7 @@ namespace mg5amcCpu jamp_sv[102] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[47], w_fp[97], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25912,12 +25912,12 @@ namespace mg5amcCpu // *** DIAGRAM 1134 OF 1240 *** // Wavefunction(s) for diagram number 1134 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); - FFV1_1( w_fp[2], w_fp[97], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[97], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); // Amplitude(s) for diagram number 1134 - FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25925,7 +25925,7 @@ namespace mg5amcCpu jamp_sv[7] -= amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[55] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25933,7 +25933,7 @@ namespace mg5amcCpu jamp_sv[25] += amp_sv[0]; jamp_sv[31] -= amp_sv[0]; jamp_sv[49] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25948,7 +25948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1135 - FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25960,7 +25960,7 @@ namespace mg5amcCpu jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25972,7 +25972,7 @@ namespace mg5amcCpu jamp_sv[102] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25991,7 +25991,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1136 - FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -25999,7 +25999,7 @@ namespace mg5amcCpu jamp_sv[6] -= amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[54] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26007,7 +26007,7 @@ namespace mg5amcCpu jamp_sv[24] += amp_sv[0]; jamp_sv[30] -= amp_sv[0]; jamp_sv[48] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26022,7 +26022,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1137 - FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26034,7 +26034,7 @@ namespace mg5amcCpu jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26046,7 +26046,7 @@ namespace mg5amcCpu jamp_sv[78] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26065,7 +26065,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1138 - FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26077,7 +26077,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[54] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[55] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26089,7 +26089,7 @@ namespace mg5amcCpu jamp_sv[31] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[48] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[49] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26108,7 +26108,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1139 - FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26120,7 +26120,7 @@ namespace mg5amcCpu jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26132,7 +26132,7 @@ namespace mg5amcCpu jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[68], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26148,12 +26148,12 @@ namespace mg5amcCpu // *** DIAGRAM 1140 OF 1240 *** // Wavefunction(s) for diagram number 1140 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[68] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[29] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[10] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[68] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[29] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 1140 - VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26173,7 +26173,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26193,7 +26193,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26213,7 +26213,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26233,7 +26233,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26253,7 +26253,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26273,7 +26273,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26293,7 +26293,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; - VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26313,7 +26313,7 @@ namespace mg5amcCpu jamp_sv[113] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[10], w_fp[8], w_fp[4], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26337,12 +26337,12 @@ namespace mg5amcCpu // *** DIAGRAM 1141 OF 1240 *** // Wavefunction(s) for diagram number 1141 - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[16] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[71] ); - VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[10], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 1141 - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26362,7 +26362,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26382,7 +26382,7 @@ namespace mg5amcCpu jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26406,12 +26406,12 @@ namespace mg5amcCpu // *** DIAGRAM 1142 OF 1240 *** // Wavefunction(s) for diagram number 1142 - VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 0., 0., w_fp[20] ); + VVV1P0_1( w_fp[68], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[10], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[20] ); // Amplitude(s) for diagram number 1142 - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26431,7 +26431,7 @@ namespace mg5amcCpu jamp_sv[100] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[60], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26451,7 +26451,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[20], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26478,7 +26478,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1143 - VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[68], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26498,7 +26498,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[29], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26518,7 +26518,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[10], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26542,12 +26542,12 @@ namespace mg5amcCpu // *** DIAGRAM 1144 OF 1240 *** // Wavefunction(s) for diagram number 1144 - FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[111] ); - FFV1_2( w_fp[3], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[111] ); + FFV1_2( w_fp[3], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1144 - FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26555,7 +26555,7 @@ namespace mg5amcCpu jamp_sv[67] -= amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[71] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26563,7 +26563,7 @@ namespace mg5amcCpu jamp_sv[68] += amp_sv[0]; jamp_sv[69] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[33], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26578,7 +26578,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1145 - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26590,7 +26590,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26602,7 +26602,7 @@ namespace mg5amcCpu jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26621,7 +26621,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1146 - FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26629,7 +26629,7 @@ namespace mg5amcCpu jamp_sv[50] -= amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[62] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26637,7 +26637,7 @@ namespace mg5amcCpu jamp_sv[54] += amp_sv[0]; jamp_sv[56] -= amp_sv[0]; jamp_sv[60] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[33], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26652,7 +26652,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1147 - FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26660,7 +26660,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[113] += amp_sv[0]; - FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26668,7 +26668,7 @@ namespace mg5amcCpu jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26683,7 +26683,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1148 - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26695,7 +26695,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26707,7 +26707,7 @@ namespace mg5amcCpu jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26726,7 +26726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1149 - FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26734,7 +26734,7 @@ namespace mg5amcCpu jamp_sv[100] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26742,7 +26742,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[47], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26754,12 +26754,12 @@ namespace mg5amcCpu // *** DIAGRAM 1150 OF 1240 *** // Wavefunction(s) for diagram number 1150 - FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[17] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[10], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[17] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[10], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); // Amplitude(s) for diagram number 1150 - FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[17], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26767,7 +26767,7 @@ namespace mg5amcCpu jamp_sv[13] -= amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[79] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[68], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26775,7 +26775,7 @@ namespace mg5amcCpu jamp_sv[27] += amp_sv[0]; jamp_sv[37] -= amp_sv[0]; jamp_sv[73] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[29], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26790,7 +26790,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1151 - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26802,7 +26802,7 @@ namespace mg5amcCpu jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26814,7 +26814,7 @@ namespace mg5amcCpu jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26833,7 +26833,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1152 - FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[17], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26841,7 +26841,7 @@ namespace mg5amcCpu jamp_sv[12] -= amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[78] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[68], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26849,7 +26849,7 @@ namespace mg5amcCpu jamp_sv[26] += amp_sv[0]; jamp_sv[36] -= amp_sv[0]; jamp_sv[72] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[29], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26864,7 +26864,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1153 - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26876,7 +26876,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26888,7 +26888,7 @@ namespace mg5amcCpu jamp_sv[56] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26907,7 +26907,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1154 - FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[17], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26919,7 +26919,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[78] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[79] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26931,7 +26931,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[72] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[73] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26950,7 +26950,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1155 - FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[59], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26962,7 +26962,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[111], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26974,7 +26974,7 @@ namespace mg5amcCpu jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[111] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -26990,12 +26990,12 @@ namespace mg5amcCpu // *** DIAGRAM 1156 OF 1240 *** // Wavefunction(s) for diagram number 1156 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[98] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[27] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 0., 0., w_fp[111] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[98] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[27] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[111] ); // Amplitude(s) for diagram number 1156 - VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27015,7 +27015,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27035,7 +27035,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[98], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27055,7 +27055,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27075,7 +27075,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27095,7 +27095,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[27], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27115,7 +27115,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27135,7 +27135,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[102] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27155,7 +27155,7 @@ namespace mg5amcCpu jamp_sv[92] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[102] -= amp_sv[0]; - VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[111], w_fp[8], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27179,12 +27179,12 @@ namespace mg5amcCpu // *** DIAGRAM 1157 OF 1240 *** // Wavefunction(s) for diagram number 1157 - VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 0., 0., w_fp[59] ); - VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 0., 0., w_fp[29] ); - VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[98], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[59] ); + VVV1P0_1( w_fp[27], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[111], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1157 - VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[59], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27204,7 +27204,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[29], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27224,7 +27224,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27248,12 +27248,12 @@ namespace mg5amcCpu // *** DIAGRAM 1158 OF 1240 *** // Wavefunction(s) for diagram number 1158 - VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 0., 0., w_fp[17] ); - VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[98], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[27], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[111], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1158 - VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27273,7 +27273,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27293,7 +27293,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27320,7 +27320,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1159 - VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[98], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27340,7 +27340,7 @@ namespace mg5amcCpu jamp_sv[89] += amp_sv[0]; jamp_sv[102] += amp_sv[0]; jamp_sv[103] -= amp_sv[0]; - VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[27], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27360,7 +27360,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[96] += amp_sv[0]; jamp_sv[97] -= amp_sv[0]; - VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[111], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27384,12 +27384,12 @@ namespace mg5amcCpu // *** DIAGRAM 1160 OF 1240 *** // Wavefunction(s) for diagram number 1160 - FFV1_2( w_fp[3], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); - FFV1_2( w_fp[3], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); // Amplitude(s) for diagram number 1160 - FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27397,7 +27397,7 @@ namespace mg5amcCpu jamp_sv[61] -= amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[65] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27405,7 +27405,7 @@ namespace mg5amcCpu jamp_sv[62] += amp_sv[0]; jamp_sv[63] -= amp_sv[0]; jamp_sv[64] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[33], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27420,7 +27420,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1161 - FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27432,7 +27432,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27444,7 +27444,7 @@ namespace mg5amcCpu jamp_sv[63] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27463,7 +27463,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1162 - FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27471,7 +27471,7 @@ namespace mg5amcCpu jamp_sv[52] -= amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[68] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27479,7 +27479,7 @@ namespace mg5amcCpu jamp_sv[55] += amp_sv[0]; jamp_sv[58] -= amp_sv[0]; jamp_sv[66] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[33], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27494,7 +27494,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1163 - FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27502,7 +27502,7 @@ namespace mg5amcCpu jamp_sv[85] -= amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[89] += amp_sv[0]; - FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27510,7 +27510,7 @@ namespace mg5amcCpu jamp_sv[86] += amp_sv[0]; jamp_sv[87] -= amp_sv[0]; jamp_sv[88] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27525,7 +27525,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1164 - FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27537,7 +27537,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27549,7 +27549,7 @@ namespace mg5amcCpu jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27568,7 +27568,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1165 - FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27576,7 +27576,7 @@ namespace mg5amcCpu jamp_sv[76] -= amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27584,7 +27584,7 @@ namespace mg5amcCpu jamp_sv[79] += amp_sv[0]; jamp_sv[82] -= amp_sv[0]; jamp_sv[90] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[39], w_fp[111], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27596,12 +27596,12 @@ namespace mg5amcCpu // *** DIAGRAM 1166 OF 1240 *** // Wavefunction(s) for diagram number 1166 - FFV1_1( w_fp[2], w_fp[98], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); - FFV1_1( w_fp[2], w_fp[27], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); - FFV1_1( w_fp[2], w_fp[111], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[98], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[27], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); + FFV1_1( w_fp[2], w_fp[111], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1166 - FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27609,7 +27609,7 @@ namespace mg5amcCpu jamp_sv[19] -= amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[103] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[98], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27617,7 +27617,7 @@ namespace mg5amcCpu jamp_sv[29] += amp_sv[0]; jamp_sv[43] -= amp_sv[0]; jamp_sv[97] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27632,7 +27632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1167 - FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27644,7 +27644,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27656,7 +27656,7 @@ namespace mg5amcCpu jamp_sv[82] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27675,7 +27675,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1168 - FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27683,7 +27683,7 @@ namespace mg5amcCpu jamp_sv[18] -= amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[102] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[98], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27691,7 +27691,7 @@ namespace mg5amcCpu jamp_sv[28] += amp_sv[0]; jamp_sv[42] -= amp_sv[0]; jamp_sv[96] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27706,7 +27706,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1169 - FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27718,7 +27718,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27730,7 +27730,7 @@ namespace mg5amcCpu jamp_sv[58] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27749,7 +27749,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1170 - FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27761,7 +27761,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[102] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[103] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[98], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27773,7 +27773,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[96] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[97] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27792,7 +27792,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1171 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27804,7 +27804,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27816,7 +27816,7 @@ namespace mg5amcCpu jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[87] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27832,15 +27832,15 @@ namespace mg5amcCpu // *** DIAGRAM 1172 OF 1240 *** // Wavefunction(s) for diagram number 1172 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[60] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_2( w_fp[3], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[98] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[60] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_2( w_fp[3], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[98] ); // Amplitude(s) for diagram number 1172 - FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27848,7 +27848,7 @@ namespace mg5amcCpu jamp_sv[43] -= amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[47] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27856,7 +27856,7 @@ namespace mg5amcCpu jamp_sv[44] += amp_sv[0]; jamp_sv[45] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[77], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27868,12 +27868,12 @@ namespace mg5amcCpu // *** DIAGRAM 1173 OF 1240 *** // Wavefunction(s) for diagram number 1173 - VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 0., 0., w_fp[68] ); - VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 0., 0., w_fp[29] ); + VVV1P0_1( w_fp[60], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[24], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[20], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[29] ); // Amplitude(s) for diagram number 1173 - FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27885,7 +27885,7 @@ namespace mg5amcCpu jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[47] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27897,7 +27897,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[45] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27916,7 +27916,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1174 - FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27924,7 +27924,7 @@ namespace mg5amcCpu jamp_sv[26] -= amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[38] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27932,7 +27932,7 @@ namespace mg5amcCpu jamp_sv[30] += amp_sv[0]; jamp_sv[32] -= amp_sv[0]; jamp_sv[36] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[77], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27944,12 +27944,12 @@ namespace mg5amcCpu // *** DIAGRAM 1175 OF 1240 *** // Wavefunction(s) for diagram number 1175 - FFV1_1( w_fp[2], w_fp[60], COUPs[1], cIPD[0], cIPD[1], w_fp[59] ); - FFV1_1( w_fp[2], w_fp[24], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); - FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_1( w_fp[2], w_fp[60], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[59] ); + FFV1_1( w_fp[2], w_fp[24], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 1175 - FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[59], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27957,7 +27957,7 @@ namespace mg5amcCpu jamp_sv[15] -= amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[85] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[71], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27965,7 +27965,7 @@ namespace mg5amcCpu jamp_sv[51] += amp_sv[0]; jamp_sv[61] -= amp_sv[0]; jamp_sv[75] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[21], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27980,7 +27980,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1176 - FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -27992,7 +27992,7 @@ namespace mg5amcCpu jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28004,7 +28004,7 @@ namespace mg5amcCpu jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28023,7 +28023,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1177 - FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28031,7 +28031,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28039,7 +28039,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[47], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28054,7 +28054,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1178 - FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[59], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28066,7 +28066,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[85] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[71], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28078,7 +28078,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[75] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[21], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28097,7 +28097,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1179 - FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28109,7 +28109,7 @@ namespace mg5amcCpu jamp_sv[103] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28121,7 +28121,7 @@ namespace mg5amcCpu jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[105] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28140,7 +28140,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1180 - VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[60], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28160,7 +28160,7 @@ namespace mg5amcCpu jamp_sv[103] += amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28180,7 +28180,7 @@ namespace mg5amcCpu jamp_sv[104] -= amp_sv[0]; jamp_sv[105] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[20], w_fp[72], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28207,7 +28207,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1181 - VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28227,7 +28227,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28247,7 +28247,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[60], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28267,7 +28267,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28287,7 +28287,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28307,7 +28307,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[24], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28327,7 +28327,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28347,7 +28347,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[115] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28367,7 +28367,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28391,12 +28391,12 @@ namespace mg5amcCpu // *** DIAGRAM 1182 OF 1240 *** // Wavefunction(s) for diagram number 1182 - VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[60], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[24], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1182 - VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[72], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28416,7 +28416,7 @@ namespace mg5amcCpu jamp_sv[107] += amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[118] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[60], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28436,7 +28436,7 @@ namespace mg5amcCpu jamp_sv[109] -= amp_sv[0]; jamp_sv[112] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[24], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28463,7 +28463,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1183 - VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28483,7 +28483,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[118] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28503,7 +28503,7 @@ namespace mg5amcCpu jamp_sv[109] += amp_sv[0]; jamp_sv[112] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[29], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28530,7 +28530,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1184 - FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28542,7 +28542,7 @@ namespace mg5amcCpu jamp_sv[107] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[118] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28554,7 +28554,7 @@ namespace mg5amcCpu jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[112] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28573,7 +28573,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1185 - FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28581,7 +28581,7 @@ namespace mg5amcCpu jamp_sv[103] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[107] += amp_sv[0]; - FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[27], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28589,7 +28589,7 @@ namespace mg5amcCpu jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[98], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28604,7 +28604,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1186 - FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28616,7 +28616,7 @@ namespace mg5amcCpu jamp_sv[38] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[84] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28628,7 +28628,7 @@ namespace mg5amcCpu jamp_sv[50] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[60] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[74] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28647,7 +28647,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1187 - FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[59], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28655,7 +28655,7 @@ namespace mg5amcCpu jamp_sv[14] -= amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[84] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[71], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28663,7 +28663,7 @@ namespace mg5amcCpu jamp_sv[50] += amp_sv[0]; jamp_sv[60] -= amp_sv[0]; jamp_sv[74] += amp_sv[0]; - FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[21], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28675,15 +28675,15 @@ namespace mg5amcCpu // *** DIAGRAM 1188 OF 1240 *** // Wavefunction(s) for diagram number 1188 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[71] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 0., 0., w_fp[59] ); - FFV1_2( w_fp[3], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); - FFV1_2( w_fp[3], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[60] ); - FFV1_2( w_fp[3], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[72] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[71] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[59] ); + FFV1_2( w_fp[3], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); + FFV1_2( w_fp[3], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[60] ); + FFV1_2( w_fp[3], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[72] ); // Amplitude(s) for diagram number 1188 - FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28691,7 +28691,7 @@ namespace mg5amcCpu jamp_sv[37] -= amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[41] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28699,7 +28699,7 @@ namespace mg5amcCpu jamp_sv[38] += amp_sv[0]; jamp_sv[39] -= amp_sv[0]; jamp_sv[40] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[77], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28711,12 +28711,12 @@ namespace mg5amcCpu // *** DIAGRAM 1189 OF 1240 *** // Wavefunction(s) for diagram number 1189 - VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 0., 0., w_fp[98] ); - VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[21], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[98] ); + VVV1P0_1( w_fp[71], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[59], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1189 - FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28728,7 +28728,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[41] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28740,7 +28740,7 @@ namespace mg5amcCpu jamp_sv[39] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[42] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28759,7 +28759,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1190 - FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28767,7 +28767,7 @@ namespace mg5amcCpu jamp_sv[28] -= amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[44] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28775,7 +28775,7 @@ namespace mg5amcCpu jamp_sv[31] += amp_sv[0]; jamp_sv[34] -= amp_sv[0]; jamp_sv[42] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[77], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28787,12 +28787,12 @@ namespace mg5amcCpu // *** DIAGRAM 1191 OF 1240 *** // Wavefunction(s) for diagram number 1191 - FFV1_1( w_fp[2], w_fp[21], COUPs[1], cIPD[0], cIPD[1], w_fp[29] ); - FFV1_1( w_fp[2], w_fp[71], COUPs[1], cIPD[0], cIPD[1], w_fp[68] ); - FFV1_1( w_fp[2], w_fp[59], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[21], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[29] ); + FFV1_1( w_fp[2], w_fp[71], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[68] ); + FFV1_1( w_fp[2], w_fp[59], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 1191 - FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[29], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28800,7 +28800,7 @@ namespace mg5amcCpu jamp_sv[21] -= amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[68], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28808,7 +28808,7 @@ namespace mg5amcCpu jamp_sv[53] += amp_sv[0]; jamp_sv[67] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28823,7 +28823,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1192 - FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[98], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28835,7 +28835,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28847,7 +28847,7 @@ namespace mg5amcCpu jamp_sv[88] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28866,7 +28866,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1193 - FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28874,7 +28874,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28882,7 +28882,7 @@ namespace mg5amcCpu jamp_sv[85] += amp_sv[0]; jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[39], w_fp[59], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28897,7 +28897,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1194 - FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[29], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28909,7 +28909,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[109] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[68], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28921,7 +28921,7 @@ namespace mg5amcCpu jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[99] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28940,7 +28940,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1195 - FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28952,7 +28952,7 @@ namespace mg5amcCpu jamp_sv[79] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28964,7 +28964,7 @@ namespace mg5amcCpu jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[81] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -28983,7 +28983,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1196 - VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29003,7 +29003,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[71], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29023,7 +29023,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[98] -= amp_sv[0]; jamp_sv[99] += amp_sv[0]; - VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[59], w_fp[66], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29050,7 +29050,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1197 - VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29070,7 +29070,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29090,7 +29090,7 @@ namespace mg5amcCpu jamp_sv[83] += amp_sv[0]; jamp_sv[108] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[21], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29110,7 +29110,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29130,7 +29130,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29150,7 +29150,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[71], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29170,7 +29170,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29190,7 +29190,7 @@ namespace mg5amcCpu jamp_sv[94] += amp_sv[0]; jamp_sv[98] += amp_sv[0]; jamp_sv[108] -= amp_sv[0]; - VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29210,7 +29210,7 @@ namespace mg5amcCpu jamp_sv[99] -= amp_sv[0]; jamp_sv[108] -= amp_sv[0]; jamp_sv[109] += amp_sv[0]; - VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[59], w_fp[1], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29234,12 +29234,12 @@ namespace mg5amcCpu // *** DIAGRAM 1198 OF 1240 *** // Wavefunction(s) for diagram number 1198 - VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 0., 0., w_fp[66] ); - VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); - VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 0., 0., w_fp[71] ); + VVV1P0_1( w_fp[21], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[66] ); + VVV1P0_1( w_fp[71], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[59], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[71] ); // Amplitude(s) for diagram number 1198 - VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[66], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29259,7 +29259,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[94] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29279,7 +29279,7 @@ namespace mg5amcCpu jamp_sv[88] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[71], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29306,7 +29306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1199 - VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[98], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29326,7 +29326,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[94] += amp_sv[0]; jamp_sv[109] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[27], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29346,7 +29346,7 @@ namespace mg5amcCpu jamp_sv[88] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29373,7 +29373,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1200 - FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29385,7 +29385,7 @@ namespace mg5amcCpu jamp_sv[83] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[94] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29397,7 +29397,7 @@ namespace mg5amcCpu jamp_sv[85] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[88] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29416,7 +29416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1201 - FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[24], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29424,7 +29424,7 @@ namespace mg5amcCpu jamp_sv[79] -= amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[83] += amp_sv[0]; - FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[60], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29432,7 +29432,7 @@ namespace mg5amcCpu jamp_sv[80] += amp_sv[0]; jamp_sv[81] -= amp_sv[0]; jamp_sv[82] += amp_sv[0]; - FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[72], w_fp[39], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29447,7 +29447,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1202 - FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[66], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29459,7 +29459,7 @@ namespace mg5amcCpu jamp_sv[44] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[108] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29471,7 +29471,7 @@ namespace mg5amcCpu jamp_sv[52] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[66] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[98] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[71], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29490,7 +29490,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1203 - FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[29], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29498,7 +29498,7 @@ namespace mg5amcCpu jamp_sv[20] -= amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[108] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[68], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29506,7 +29506,7 @@ namespace mg5amcCpu jamp_sv[52] += amp_sv[0]; jamp_sv[66] -= amp_sv[0]; jamp_sv[98] += amp_sv[0]; - FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29518,15 +29518,15 @@ namespace mg5amcCpu // *** DIAGRAM 1204 OF 1240 *** // Wavefunction(s) for diagram number 1204 - VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[23] ); - VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[68] ); - VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 0., 0., w_fp[29] ); - FFV1_2( w_fp[3], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[71] ); - FFV1_2( w_fp[3], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); - FFV1_2( w_fp[3], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[66] ); + VVVV1P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[23] ); + VVVV3P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[68] ); + VVVV4P0_1( w_fp[0], w_fp[5], w_fp[6], COUPs[2], 1.0, 0., 0., w_fp[29] ); + FFV1_2( w_fp[3], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[71] ); + FFV1_2( w_fp[3], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[3], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[66] ); // Amplitude(s) for diagram number 1204 - FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29534,7 +29534,7 @@ namespace mg5amcCpu jamp_sv[31] -= amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[35] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29542,7 +29542,7 @@ namespace mg5amcCpu jamp_sv[32] += amp_sv[0]; jamp_sv[33] -= amp_sv[0]; jamp_sv[34] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29554,12 +29554,12 @@ namespace mg5amcCpu // *** DIAGRAM 1205 OF 1240 *** // Wavefunction(s) for diagram number 1205 - VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 0., 0., w_fp[72] ); - VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 0., 0., w_fp[60] ); - VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 0., 0., w_fp[24] ); + VVV1P0_1( w_fp[23], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[72] ); + VVV1P0_1( w_fp[68], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[60] ); + VVV1P0_1( w_fp[29], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 1205 - FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29571,7 +29571,7 @@ namespace mg5amcCpu jamp_sv[35] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29583,7 +29583,7 @@ namespace mg5amcCpu jamp_sv[37] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[40] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[43] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29602,7 +29602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1206 - FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29610,7 +29610,7 @@ namespace mg5amcCpu jamp_sv[29] -= amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[46] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29618,7 +29618,7 @@ namespace mg5amcCpu jamp_sv[37] += amp_sv[0]; jamp_sv[40] -= amp_sv[0]; jamp_sv[43] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29630,12 +29630,12 @@ namespace mg5amcCpu // *** DIAGRAM 1207 OF 1240 *** // Wavefunction(s) for diagram number 1207 - FFV1_1( w_fp[2], w_fp[23], COUPs[1], cIPD[0], cIPD[1], w_fp[77] ); - FFV1_1( w_fp[2], w_fp[68], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[2], w_fp[29], COUPs[1], cIPD[0], cIPD[1], w_fp[27] ); + FFV1_1( w_fp[2], w_fp[23], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[77] ); + FFV1_1( w_fp[2], w_fp[68], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[29], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[27] ); // Amplitude(s) for diagram number 1207 - FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[77], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29643,7 +29643,7 @@ namespace mg5amcCpu jamp_sv[23] -= amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29651,7 +29651,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[91] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[27], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29666,7 +29666,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1208 - FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[72], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29678,7 +29678,7 @@ namespace mg5amcCpu jamp_sv[70] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[60], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29690,7 +29690,7 @@ namespace mg5amcCpu jamp_sv[77] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[91] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29709,7 +29709,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1209 - FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29717,7 +29717,7 @@ namespace mg5amcCpu jamp_sv[53] -= amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[70] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29725,7 +29725,7 @@ namespace mg5amcCpu jamp_sv[61] += amp_sv[0]; jamp_sv[64] -= amp_sv[0]; jamp_sv[67] += amp_sv[0]; - FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[52], w_fp[33], w_fp[29], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29740,7 +29740,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1210 - FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[77], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29752,7 +29752,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[115] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29764,7 +29764,7 @@ namespace mg5amcCpu jamp_sv[91] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[101] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[27], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29783,7 +29783,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1211 - FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29795,7 +29795,7 @@ namespace mg5amcCpu jamp_sv[55] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29807,7 +29807,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[57] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29826,7 +29826,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1212 - VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[23], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29846,7 +29846,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[68], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29866,7 +29866,7 @@ namespace mg5amcCpu jamp_sv[91] -= amp_sv[0]; jamp_sv[100] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[29], w_fp[61], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29893,7 +29893,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1213 - VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29913,7 +29913,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29933,7 +29933,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[114] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[23], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29953,7 +29953,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29973,7 +29973,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -29993,7 +29993,7 @@ namespace mg5amcCpu jamp_sv[91] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[68], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30013,7 +30013,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30033,7 +30033,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[100] += amp_sv[0]; jamp_sv[114] -= amp_sv[0]; - VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30053,7 +30053,7 @@ namespace mg5amcCpu jamp_sv[101] -= amp_sv[0]; jamp_sv[114] -= amp_sv[0]; jamp_sv[115] += amp_sv[0]; - VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[29], w_fp[1], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30077,12 +30077,12 @@ namespace mg5amcCpu // *** DIAGRAM 1214 OF 1240 *** // Wavefunction(s) for diagram number 1214 - VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 0., 0., w_fp[61] ); - VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 0., 0., w_fp[23] ); - VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 0., 0., w_fp[68] ); + VVV1P0_1( w_fp[23], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[61] ); + VVV1P0_1( w_fp[68], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[29], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[68] ); // Amplitude(s) for diagram number 1214 - VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[61], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30102,7 +30102,7 @@ namespace mg5amcCpu jamp_sv[70] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30122,7 +30122,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[68], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30149,7 +30149,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1215 - VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[72], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30169,7 +30169,7 @@ namespace mg5amcCpu jamp_sv[70] += amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[115] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[60], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30189,7 +30189,7 @@ namespace mg5amcCpu jamp_sv[77] -= amp_sv[0]; jamp_sv[91] += amp_sv[0]; jamp_sv[101] -= amp_sv[0]; - VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[24], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30216,7 +30216,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1216 - FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30228,7 +30228,7 @@ namespace mg5amcCpu jamp_sv[59] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[70] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30240,7 +30240,7 @@ namespace mg5amcCpu jamp_sv[61] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[64] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[67] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30259,7 +30259,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1217 - FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[71], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30267,7 +30267,7 @@ namespace mg5amcCpu jamp_sv[55] -= amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[59] += amp_sv[0]; - FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30275,7 +30275,7 @@ namespace mg5amcCpu jamp_sv[56] += amp_sv[0]; jamp_sv[57] -= amp_sv[0]; jamp_sv[58] += amp_sv[0]; - FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[66], w_fp[33], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30290,7 +30290,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1218 - FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[61], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30302,7 +30302,7 @@ namespace mg5amcCpu jamp_sv[46] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[114] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30314,7 +30314,7 @@ namespace mg5amcCpu jamp_sv[76] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[90] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[100] -= cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[68], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30333,7 +30333,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1219 - FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[77], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30341,7 +30341,7 @@ namespace mg5amcCpu jamp_sv[22] -= amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[114] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[16], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30349,7 +30349,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[90] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[27], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30364,7 +30364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1220 - VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30384,7 +30384,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30404,7 +30404,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[73], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30424,7 +30424,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30444,7 +30444,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30464,7 +30464,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[79], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30484,7 +30484,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30504,7 +30504,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30524,7 +30524,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[117] += amp_sv[0]; jamp_sv[119] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[80], w_fp[8], w_fp[6], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30548,12 +30548,12 @@ namespace mg5amcCpu // *** DIAGRAM 1221 OF 1240 *** // Wavefunction(s) for diagram number 1221 - VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 0., 0., w_fp[27] ); - VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 0., 0., w_fp[1] ); - VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 0., 0., w_fp[16] ); + VVV1P0_1( w_fp[0], w_fp[73], COUPs[0], 1.0, 0., 0., w_fp[27] ); + VVV1P0_1( w_fp[0], w_fp[79], COUPs[0], 1.0, 0., 0., w_fp[1] ); + VVV1P0_1( w_fp[0], w_fp[80], COUPs[0], 1.0, 0., 0., w_fp[16] ); // Amplitude(s) for diagram number 1221 - VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[27], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30573,7 +30573,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[1], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30593,7 +30593,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[6], w_fp[16], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30620,7 +30620,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1222 - VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[73], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30640,7 +30640,7 @@ namespace mg5amcCpu jamp_sv[107] -= amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[119] += amp_sv[0]; - VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[79], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30660,7 +30660,7 @@ namespace mg5amcCpu jamp_sv[111] += amp_sv[0]; jamp_sv[113] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[80], w_fp[6], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30687,7 +30687,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1223 - FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30699,7 +30699,7 @@ namespace mg5amcCpu jamp_sv[107] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[119] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30711,7 +30711,7 @@ namespace mg5amcCpu jamp_sv[111] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[113] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[117] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[47], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30730,7 +30730,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1224 - FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30738,7 +30738,7 @@ namespace mg5amcCpu jamp_sv[97] -= amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[101] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30746,7 +30746,7 @@ namespace mg5amcCpu jamp_sv[98] += amp_sv[0]; jamp_sv[99] -= amp_sv[0]; jamp_sv[100] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[113], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30761,7 +30761,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1225 - FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[27], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30773,7 +30773,7 @@ namespace mg5amcCpu jamp_sv[38] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[86] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30785,7 +30785,7 @@ namespace mg5amcCpu jamp_sv[56] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[62] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[80] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[41], w_fp[2], w_fp[16], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30804,7 +30804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1226 - FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[73], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30812,7 +30812,7 @@ namespace mg5amcCpu jamp_sv[38] -= amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[86] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30820,7 +30820,7 @@ namespace mg5amcCpu jamp_sv[56] += amp_sv[0]; jamp_sv[62] -= amp_sv[0]; jamp_sv[80] += amp_sv[0]; - FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[62], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30835,7 +30835,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1227 - VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30855,7 +30855,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30875,7 +30875,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[57], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30895,7 +30895,7 @@ namespace mg5amcCpu jamp_sv[77] += amp_sv[0]; jamp_sv[110] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30915,7 +30915,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30935,7 +30935,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[81], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30955,7 +30955,7 @@ namespace mg5amcCpu jamp_sv[76] += amp_sv[0]; jamp_sv[104] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30975,7 +30975,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[104] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -30995,7 +30995,7 @@ namespace mg5amcCpu jamp_sv[95] -= amp_sv[0]; jamp_sv[105] -= amp_sv[0]; jamp_sv[111] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[82], w_fp[8], w_fp[5], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31019,12 +31019,12 @@ namespace mg5amcCpu // *** DIAGRAM 1228 OF 1240 *** // Wavefunction(s) for diagram number 1228 - VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 0., 0., w_fp[62] ); - VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 0., 0., w_fp[80] ); - VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 0., 0., w_fp[79] ); + VVV1P0_1( w_fp[0], w_fp[57], COUPs[0], 1.0, 0., 0., w_fp[62] ); + VVV1P0_1( w_fp[0], w_fp[81], COUPs[0], 1.0, 0., 0., w_fp[80] ); + VVV1P0_1( w_fp[0], w_fp[82], COUPs[0], 1.0, 0., 0., w_fp[79] ); // Amplitude(s) for diagram number 1228 - VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[62], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31044,7 +31044,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[110] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[80], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31064,7 +31064,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[104] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[5], w_fp[79], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31091,7 +31091,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1229 - VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[57], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31111,7 +31111,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[95] += amp_sv[0]; jamp_sv[111] -= amp_sv[0]; - VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[81], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31131,7 +31131,7 @@ namespace mg5amcCpu jamp_sv[89] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[105] -= amp_sv[0]; - VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[82], w_fp[5], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31158,7 +31158,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1230 - FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31170,7 +31170,7 @@ namespace mg5amcCpu jamp_sv[83] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[95] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31182,7 +31182,7 @@ namespace mg5amcCpu jamp_sv[87] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[89] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[93] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[39], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31201,7 +31201,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1231 - FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31209,7 +31209,7 @@ namespace mg5amcCpu jamp_sv[73] -= amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[77] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31217,7 +31217,7 @@ namespace mg5amcCpu jamp_sv[74] += amp_sv[0]; jamp_sv[75] -= amp_sv[0]; jamp_sv[76] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[102], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31232,7 +31232,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1232 - FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[62], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31244,7 +31244,7 @@ namespace mg5amcCpu jamp_sv[44] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[110] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[80], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31256,7 +31256,7 @@ namespace mg5amcCpu jamp_sv[58] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[68] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[104] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[38], w_fp[2], w_fp[79], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31275,7 +31275,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1233 - FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[57], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31283,7 +31283,7 @@ namespace mg5amcCpu jamp_sv[44] -= amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[110] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31291,7 +31291,7 @@ namespace mg5amcCpu jamp_sv[58] += amp_sv[0]; jamp_sv[68] -= amp_sv[0]; jamp_sv[104] += amp_sv[0]; - FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[104], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31306,7 +31306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1234 - VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31326,7 +31326,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31346,7 +31346,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[55], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31366,7 +31366,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[116] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31386,7 +31386,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31406,7 +31406,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[83], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31426,7 +31426,7 @@ namespace mg5amcCpu jamp_sv[93] += amp_sv[0]; jamp_sv[106] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31446,7 +31446,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[106] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31466,7 +31466,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[107] -= amp_sv[0]; jamp_sv[117] += amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[84], w_fp[8], w_fp[4], COUPs[2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31490,12 +31490,12 @@ namespace mg5amcCpu // *** DIAGRAM 1235 OF 1240 *** // Wavefunction(s) for diagram number 1235 - VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 0., 0., w_fp[104] ); - VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 0., 0., w_fp[82] ); - VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 0., 0., w_fp[81] ); + VVV1P0_1( w_fp[0], w_fp[55], COUPs[0], 1.0, 0., 0., w_fp[104] ); + VVV1P0_1( w_fp[0], w_fp[83], COUPs[0], 1.0, 0., 0., w_fp[82] ); + VVV1P0_1( w_fp[0], w_fp[84], COUPs[0], 1.0, 0., 0., w_fp[81] ); // Amplitude(s) for diagram number 1235 - VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[104], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31515,7 +31515,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[116] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[82], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31535,7 +31535,7 @@ namespace mg5amcCpu jamp_sv[82] -= amp_sv[0]; jamp_sv[92] += amp_sv[0]; jamp_sv[106] -= amp_sv[0]; - VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[8], w_fp[4], w_fp[81], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31562,7 +31562,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1236 - VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[55], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31582,7 +31582,7 @@ namespace mg5amcCpu jamp_sv[71] += amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[117] -= amp_sv[0]; - VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[83], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31602,7 +31602,7 @@ namespace mg5amcCpu jamp_sv[83] -= amp_sv[0]; jamp_sv[93] += amp_sv[0]; jamp_sv[107] -= amp_sv[0]; - VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[84], w_fp[4], w_fp[56], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31629,7 +31629,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1237 - FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31641,7 +31641,7 @@ namespace mg5amcCpu jamp_sv[59] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[71] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31653,7 +31653,7 @@ namespace mg5amcCpu jamp_sv[63] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[65] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[69] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[33], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31672,7 +31672,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1238 - FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31680,7 +31680,7 @@ namespace mg5amcCpu jamp_sv[49] -= amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[53] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31688,7 +31688,7 @@ namespace mg5amcCpu jamp_sv[50] += amp_sv[0]; jamp_sv[51] -= amp_sv[0]; jamp_sv[52] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[114], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31703,7 +31703,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1239 - FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[104], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31715,7 +31715,7 @@ namespace mg5amcCpu jamp_sv[46] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[116] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[82], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31727,7 +31727,7 @@ namespace mg5amcCpu jamp_sv[82] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[92] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[106] += cxtype( 0, 1 ) * amp_sv[0]; - FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[46], w_fp[2], w_fp[81], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31746,7 +31746,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 1240 - FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[55], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31754,7 +31754,7 @@ namespace mg5amcCpu jamp_sv[46] -= amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[116] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[83], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -31762,7 +31762,7 @@ namespace mg5amcCpu jamp_sv[82] += amp_sv[0]; jamp_sv[92] -= amp_sv[0]; jamp_sv[106] += amp_sv[0]; - FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[99], w_fp[2], w_fp[84], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -32518,13 +32518,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttggg.sa/mg5.in b/epochX/cudacpp/gg_ttggg.sa/mg5.in index 644e3be9b4..2a135334ff 100644 --- a/epochX/cudacpp/gg_ttggg.sa/mg5.in +++ b/epochX/cudacpp/gg_ttggg.sa/mg5.in @@ -1,3 +1,4 @@ +set stdout_level DEBUG +set zerowidth_tchannel F generate g g > t t~ g g g -output standalone_cudacpp gg_ttggg.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp gg_ttggg.sa diff --git a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h index 9cea8bcbe7..9b946c21e1 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -934,6 +940,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -945,6 +952,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -970,6 +979,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -984,6 +994,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -995,6 +1006,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1008,6 +1020,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1042,6 +1055,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1080,6 +1094,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1103,6 +1118,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1134,6 +1150,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1165,6 +1182,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1197,6 +1215,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1225,6 +1244,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1260,6 +1280,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1288,6 +1309,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1323,6 +1345,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1351,6 +1374,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index c3d29c5b80..63bb0f3c9e 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004678249359130859  +DEBUG: model prefixing takes 0.005384206771850586  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,21 +169,21 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.070 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_gq_ttq INFO: remove old information in CODEGEN_mad_gq_ttq -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -196,118 +196,71 @@ INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.028 s -Wrote files for 32 helas calls in 0.210 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Wrote files for 32 helas calls in 0.216 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.125 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 2 routines in 0.143 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.113 s +ALOHA: aloha creates 4 routines in 0.129 s FFV1 FFV1 FFV1 FFV1 VVV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -328,14 +281,15 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -359,27 +313,28 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 513 (offset 44 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -387,11 +342,9 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 247 (offset 26 lines). Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). -Hunk #6 succeeded at 441 (offset 45 lines). -Hunk #7 succeeded at 531 (offset 61 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 517 (offset 48 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -399,14 +352,12 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 247 (offset 26 lines). Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). -Hunk #6 succeeded at 441 (offset 45 lines). -Hunk #7 succeeded at 531 (offset 61 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README Run "open index.html" to see more information about this process. quit -real 0m2.498s -user 0m2.119s -sys 0m0.353s +real 0m2.606s +user 0m2.256s +sys 0m0.312s diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/ident_card.dat b/epochX/cudacpp/gq_ttq.mad/Cards/ident_card.dat index b37758a42a..0ba87b008f 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/ident_card.dat +++ b/epochX/cudacpp/gq_ttq.mad/Cards/ident_card.dat @@ -2,32 +2,32 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c written by the UFO converter ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc -mass 5 mdl_MB -mass 6 mdl_MT -mass 15 mdl_MTA -mass 23 mdl_MZ -mass 25 mdl_MH -sminputs 1 aEWM1 -sminputs 2 mdl_Gf -sminputs 3 aS -yukawa 5 mdl_ymb -yukawa 6 mdl_ymt -yukawa 15 mdl_ymtau -decay 6 mdl_WT -decay 23 mdl_WZ -decay 24 mdl_WW +decay 23 mdl_WZ +decay 24 mdl_WW decay 25 mdl_WH +decay 6 mdl_WT +mass 15 mdl_MTA +mass 23 mdl_MZ +mass 25 mdl_MH +mass 5 mdl_MB +mass 6 mdl_MT +sminputs 1 aEWM1 +sminputs 2 mdl_Gf +sminputs 3 aS +yukawa 15 mdl_ymtau +yukawa 5 mdl_ymb +yukawa 6 mdl_ymt diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt index 27acb12a1e..cdeedc7863 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat index 477d0d8c12..dc07af3836 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat @@ -35,9 +35,10 @@ set loop_color_flows False set max_npoint_for_channel 0 set default_unset_couplings 99 set max_t_for_channel 99 -set zerowidth_tchannel True set nlo_mixed_expansion True -import model sm +set stdout_level DEBUG +set zerowidth_tchannel F +define q = u c d s u~ c~ d~ s~ define p = g u c d s u~ c~ d~ s~ define j = g u c d s u~ c~ d~ s~ define l+ = e+ mu+ @@ -46,5 +47,5 @@ define vl = ve vm vt define vl~ = ve~ vm~ vt~ define q = u c d s u~ c~ d~ s~ generate g q > t t~ q -output madevent gq_ttq.mad_gen --hel_recycling=False --vector_size=163\ -84 --me_exporter=standalone_cudacpp +output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --v\ +ector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc index dccc9da9d2..4457933199 100644 --- a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1_0.o +ALOHARoutine = FFV1_1.o FFV1_0.o VVV1_0.o FFV1_2.o FFV1P0_3.o diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 47666e308a..c526dd6b31 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -243,26 +243,19 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); -#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz -#else - if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz - else - ixxxxx( momenta, 0, cHel[ihel][1], +1, w_fp[1], 1 ); -#endif + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -288,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -302,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -858,13 +851,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index ca1b7c1dc5..81ab70f6d1 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,14 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f index f0a88f8da0..e6d01dad0b 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -463,11 +463,11 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1P0_3(W(1,4),W(1,3),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,6),W(1,5),W(1,7),GC_11(IVEC),AMP(1)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) CALL FFV1P0_3(W(1,2),W(1,5),GC_11(IVEC),ZERO, FK_ZERO,W(1,8)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,4),W(1,6),W(1,8),GC_11(IVEC),AMP(2)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) C Amplitude(s) for diagram number 3 CALL FFV1_0(W(1,6),W(1,3),W(1,8),GC_11(IVEC),AMP(3)) CALL FFV1_1(W(1,5),W(1,1),GC_11(IVEC),ZERO, FK_ZERO,W(1,6)) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index 04a5cc423c..8d92e4e769 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -243,19 +243,19 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); + ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); - FFV1_2( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -851,13 +851,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 33e638e237..b58c5d70bd 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,18 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f index f711933894..7a2e329e64 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -463,11 +463,11 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1P0_3(W(1,4),W(1,3),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,6),W(1,2),W(1,7),GC_11(IVEC),AMP(1)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) CALL FFV1P0_3(W(1,5),W(1,2),GC_11(IVEC),ZERO, FK_ZERO,W(1,8)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,4),W(1,6),W(1,8),GC_11(IVEC),AMP(2)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) C Amplitude(s) for diagram number 3 CALL FFV1_0(W(1,6),W(1,3),W(1,8),GC_11(IVEC),AMP(3)) CALL FFV1_1(W(1,2),W(1,1),GC_11(IVEC),ZERO, FK_ZERO,W(1,6)) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/gq_ttq.mad/SubProcesses/proc_characteristics index 8b6947bc5a..76a6154ffb 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/proc_characteristics +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/proc_characteristics @@ -8,7 +8,7 @@ ninitial = 2 grouped_matrix = True has_loops = False - bias_module = None + bias_module = dummy max_n_matched_jets = 1 colored_pdgs = [1, 2, 3, 4, 5, 6, 21] complex_mass_scheme = False diff --git a/epochX/cudacpp/gq_ttq.mad/bin/generate_events b/epochX/cudacpp/gq_ttq.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/generate_events +++ b/epochX/cudacpp/gq_ttq.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/py3_model.pkl index afc2ca4e273b368050537e3f722b85c825bbf510..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 54 zcmX?lj_Le4rVZZ9G>RD*81z#TOA_@H%Mx=Ei;FY$-2+0642+ERa}!h2ixLYmOwtQV KBPM4qy8{3ztQ2Sf delta 44 zcmX?qj_K$*rVZZ9 t t~ q output madevent gq_ttq.mad --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h index 901400d447..0dd5f20f71 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -885,6 +887,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -897,6 +900,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -910,6 +914,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //========================================================================== @@ -921,6 +926,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -944,6 +950,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -975,6 +982,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1006,6 +1014,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1037,6 +1046,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc index d5eda63ee0..3452d1e8da 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc @@ -111,8 +111,8 @@ Parameters_sm::setDependentParameters() // now computed event-by-event (running void Parameters_sm::setDependentCouplings() // now computed event-by-event (running alphas #373) { - GC_11 = mdl_complexi * G; GC_10 = -G; + GC_11 = mdl_complexi * G; } */ @@ -195,7 +195,7 @@ void Parameters_sm::printDependentCouplings() // now computed event-by-event (running alphas #373) { std::cout << "sm model couplings dependent on event kinematics:" << std::endl; - std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; + std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; } */ diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h index 0c77cf58f0..4f6f322ed9 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h @@ -54,7 +54,7 @@ namespace mg5amcCpu //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //cxsmpl GC_11, GC_10; // now computed event-by-event (running alphas #373) + //cxsmpl GC_10, GC_11; // now computed event-by-event (running alphas #373) // Set parameters that are unchanged during the run void setIndependentParameters( SLHAReader& slha ); @@ -194,8 +194,8 @@ namespace mg5amcCpu //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) + //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) // Print parameters that are unchanged during the run void printIndependentParameters(); @@ -226,12 +226,12 @@ namespace mg5amcCpu namespace Parameters_sm_dependentCouplings { constexpr size_t ndcoup = 2; // #couplings that vary event by event because they depend on the running alphas QCD - constexpr size_t idcoup_GC_11 = 0; - constexpr size_t idcoup_GC_10 = 1; + constexpr size_t idcoup_GC_10 = 0; + constexpr size_t idcoup_GC_11 = 1; struct DependentCouplings_sv { - cxtype_sv GC_11; cxtype_sv GC_10; + cxtype_sv GC_11; }; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> @@ -257,8 +257,8 @@ namespace mg5amcCpu //const fptype_sv G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); const fptype_sv mdl_G__exp__2 = ( ( G ) * ( G ) ); // Model couplings dependent on aS - out.GC_11 = cI * G; out.GC_10 = -G; + out.GC_11 = cI * G; } // End SM implementation - no special handling of vectors of floats as in EFT (#439) return out; @@ -293,12 +293,12 @@ namespace mg5amcCpu using namespace Parameters_sm_dependentCouplings; const fptype_sv& gs_sv = G_ACCESS::kernelAccessConst( gs ); DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv ); - fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); - cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); + fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); - GC_11s_sv = couplings_sv.GC_11; + cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); GC_10s_sv = couplings_sv.GC_10; + GC_11s_sv = couplings_sv.GC_11; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt b/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt index dd90c94acf..d596b33ae7 100644 --- a/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt +++ b/epochX/cudacpp/gq_ttq.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt @@ -4,7 +4,7 @@ Event 0 Batch 0 2 2.647483690509011e+02 7.527657265342380e+01 -2.528976247704283e+02 -2.163164141117315e+01 3 6.252973211776936e+02 -5.721080498766041e+02 -1.578766990348905e+01 2.518727230515587e+02 4 6.099543097714056e+02 4.968314772231802e+02 2.686852946739174e+02 -2.302410816403857e+02 - ME 3.498510462248670e-04 + ME 6.254927412618323e-05 Event 1 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -12,7 +12,7 @@ Event 1 Batch 0 2 2.542827954151951e+02 1.482213322085297e+02 -1.988618298139058e+02 -5.607271498295615e+01 3 6.883656117507998e+02 1.265478873489434e+02 5.602777828023585e+02 3.793700749224233e+02 4 5.573515928340058e+02 -2.747692195574731e+02 -3.614159529884527e+02 -3.232973599394667e+02 - ME 7.257243108248426e-04 + ME 8.120933129385430e-05 Event 2 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -20,7 +20,7 @@ Event 2 Batch 0 2 4.301460683791099e+02 -3.656995432079240e+02 -2.257802895903974e+02 -1.768459985405173e+01 3 5.058528987551350e+02 2.755467101243707e+02 -2.034821274188550e+02 3.722313656043856e+02 4 5.640010328657550e+02 9.015283308355326e+01 4.292624170092524e+02 -3.545467657503340e+02 - ME 8.130044127338102e-04 + ME 1.104115154253218e-04 Event 3 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -28,7 +28,7 @@ Event 3 Batch 0 2 6.758793342627306e+02 1.455349847705337e+02 4.360940220328824e+02 -4.954335945799966e+02 3 3.008019460079605e+02 -1.607139834787174e+02 2.732727402256846e+01 2.527964523704278e+02 4 5.233187197293092e+02 1.517899870818368e+01 -4.634212960554508e+02 2.426371422095687e+02 - ME 7.753277710143621e-05 + ME 4.288074098478053e-05 Event 4 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -36,7 +36,7 @@ Event 4 Batch 0 2 3.540811678028369e+02 5.414642718170588e+01 -3.497885023717100e+02 -9.467915537920108e+00 3 7.415000547748695e+02 1.453779348794601e+00 7.277337852109665e+02 1.422102514562805e+02 4 4.044187774222938e+02 -5.560020653050046e+01 -3.779452828392566e+02 -1.327423359183605e+02 - ME 2.015528729476554e-04 + ME 1.304731284254719e-05 Event 5 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -44,7 +44,7 @@ Event 5 Batch 0 2 4.747467875786874e+02 2.462969907607520e+02 3.713870243947702e+02 1.636886763636381e+02 3 3.438196236093862e+02 -2.056491112573935e+02 2.636029701703988e+02 8.021128807897365e+01 4 6.814335888119255e+02 -4.064787950335840e+01 -6.349899945651691e+02 -2.438999644426124e+02 - ME 6.140777519977192e-04 + ME 1.932390649640220e-04 Event 6 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -52,7 +52,7 @@ Event 6 Batch 0 2 5.623951200922340e+02 4.644673798421034e+02 3.089047820108764e+02 -7.166700647426805e+01 3 2.268243199894467e+02 1.761899852590787e+02 -7.114332369064562e+01 -1.238748914321566e+02 4 7.107805599183188e+02 -6.406573651011822e+02 -2.377614583202307e+02 1.955418979064247e+02 - ME 8.375373201653861e-04 + ME 1.929702539767979e-04 Event 7 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -60,7 +60,7 @@ Event 7 Batch 0 2 4.922243378496302e+02 2.878585072835456e+02 -1.441537488072182e+02 -3.723465794939189e+02 3 2.873990637609374e+02 -5.400981623596619e+01 -8.913204919452846e+01 -2.678369642286231e+02 4 7.203765983894325e+02 -2.338486910475794e+02 2.332857980017467e+02 6.401835437225419e+02 - ME 2.045598717079573e-03 + ME 6.280412585349807e-04 Event 8 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -68,7 +68,7 @@ Event 8 Batch 0 2 3.353309706037128e+02 -7.529439061162444e+01 -4.917829145606096e+01 -3.230466069128648e+02 3 7.169322705461503e+02 -1.597426278178964e+02 -1.460012137440150e+01 6.987567601563110e+02 4 4.477367588501368e+02 2.350370184295208e+02 6.377841283046249e+01 -3.757101532434461e+02 - ME 5.176104304710922e-03 + ME 1.424871539111113e-03 Event 9 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -76,7 +76,7 @@ Event 9 Batch 0 2 2.557626120875720e+02 2.000882245504951e+02 -5.276260741790070e+01 -1.503174088272977e+02 3 7.044202058180884e+02 -6.969679478438196e+02 -1.019614549623775e+02 6.882422911146106e+00 4 5.398171820943397e+02 4.968797232933244e+02 1.547240623802783e+02 1.434349859161515e+02 - ME 6.498215193902510e-05 + ME 1.126010180174107e-05 Event 10 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -84,7 +84,7 @@ Event 10 Batch 0 2 3.466796552973448e+02 1.172124288883391e+02 -1.804077050554743e+02 2.718475489457261e+02 3 5.174471655316495e+02 -1.610456139025784e+02 -4.497410659869822e+02 -1.988689340353916e+02 4 6.358731791710053e+02 4.383318501423926e+01 6.301487710424565e+02 -7.297861491033444e+01 - ME 2.111165581639245e-04 + ME 8.292383053707579e-05 Event 11 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -92,7 +92,7 @@ Event 11 Batch 0 2 5.730783827248506e+02 -3.059484875398849e+01 3.466457017175528e+02 -4.553235612803233e+02 3 4.410994673708892e+02 -3.026218886155176e+02 -1.990641070399019e+01 3.203005892260318e+02 4 4.858221499042607e+02 3.332167373695061e+02 -3.267392910135624e+02 1.350229720542913e+02 - ME 5.129802099928076e-05 + ME 2.195851954305949e-05 Event 12 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -100,7 +100,7 @@ Event 12 Batch 0 2 2.275003875859171e+02 -1.247450244086003e+02 1.654605359856639e+02 9.390376067217456e+01 3 6.138170466352969e+02 3.363961838598331e+02 -2.139358085817026e+01 5.129827374509639e+02 4 6.586825657787861e+02 -2.116511594512328e+02 -1.440669551274935e+02 -6.068864981231385e+02 - ME 5.249882090061186e-02 + ME 3.843244876666358e-03 Event 13 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -108,7 +108,7 @@ Event 13 Batch 0 2 2.867684047377951e+02 7.055192702127012e+01 -2.028354730671929e+02 1.900429278217245e+02 3 6.990707050557395e+02 -5.605742285334717e+02 2.413419117565430e+02 -3.408965629057132e+02 4 5.141608902064654e+02 4.900223015122016e+02 -3.850643868935023e+01 1.508536350839886e+02 - ME 6.422048006176975e-05 + ME 1.780264803426774e-05 Event 14 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -116,7 +116,7 @@ Event 14 Batch 0 2 3.551549262960330e+02 1.090410064132905e+02 3.205839746298526e+02 1.071027348074892e+02 3 5.276349775014137e+02 3.895763694332612e+02 -2.529209653865598e+02 2.503196099590423e+02 4 6.172100962025531e+02 -4.986173758465519e+02 -6.766300924329285e+01 -3.574223447665315e+02 - ME 7.422587439250419e-04 + ME 1.172793340377339e-04 Event 15 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -124,7 +124,7 @@ Event 15 Batch 0 2 5.846731991828425e+02 7.106081559720657e+01 3.900476102503054e+02 4.297161529048979e+02 3 2.829885923647302e+02 -2.767806781033229e+02 5.223342094943639e+01 -2.732525156618249e+01 4 6.323382084524278e+02 2.057198625061163e+02 -4.422810311997417e+02 -4.023909013387152e+02 - ME 1.255922738422332e-03 + ME 2.768931482482754e-04 Event 16 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -132,7 +132,7 @@ Event 16 Batch 0 2 7.471577506095512e+02 1.666056475215676e+02 -5.784682380714994e+02 -4.425627187781379e+02 3 6.589296733908160e+02 -1.235441202519038e+02 5.251239647671507e+02 3.783780998595698e+02 4 9.391257599963087e+01 -4.306152726966400e+01 5.334427330434855e+01 6.418461891856485e+01 - ME 5.526726502577864e-05 + ME 3.619360847906487e-05 Event 17 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -140,7 +140,7 @@ Event 17 Batch 0 2 3.567490993131759e+02 3.856364495163717e+01 -1.708845728849435e+02 -3.107752047682324e+02 3 6.453207560475681e+02 4.468356462873772e+02 2.282834847349605e+02 4.057874246326636e+02 4 4.979301446392561e+02 -4.853992912390142e+02 -5.739891185001719e+01 -9.501221986443127e+01 - ME 1.327369996555111e-04 + ME 3.400819398697452e-05 Event 18 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -148,7 +148,7 @@ Event 18 Batch 0 2 4.856701782481425e+02 2.509110753153842e+02 -3.498523763974107e+02 -2.247720379690150e+02 3 3.014847498930008e+02 -1.059425909901355e+02 -2.435847754696140e+02 -1.426032222348426e+02 4 7.128450718588564e+02 -1.449684843252488e+02 5.934371518670247e+02 3.673752602038576e+02 - ME 1.018512933050835e-03 + ME 1.704840743724005e-04 Event 19 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -156,7 +156,7 @@ Event 19 Batch 0 2 5.848213503304410e+02 -3.141116763848333e+02 -1.950442390378232e+02 4.531088295091878e+02 3 5.769300027107226e+02 5.020221748138873e+02 2.252239828724832e+02 -1.734823378963534e+02 4 3.382486469588368e+02 -1.879104984290540e+02 -3.017974383465995e+01 -2.796264916128346e+02 - ME 4.267017342507976e-03 + ME 1.566312636528492e-04 Event 20 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -164,7 +164,7 @@ Event 20 Batch 0 2 5.550938429889906e+02 -4.478597170519693e+02 -1.958065402362923e+02 -2.630791652090858e+02 3 5.585686897587655e+02 3.351111310173187e+02 -1.360174455686903e+02 4.256744830831253e+02 4 3.863374672522434e+02 1.127485860346507e+02 3.318239858049826e+02 -1.625953178740396e+02 - ME 2.768271682113988e-04 + ME 4.443882992804106e-05 Event 21 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -172,7 +172,7 @@ Event 21 Batch 0 2 6.296556563991993e+02 -3.477135312394776e+02 -1.376147989324512e+02 -5.065804111325866e+02 3 3.137568007204202e+02 1.080474571851863e+02 -2.382188236683311e+02 1.732653140250679e+02 4 5.565875428803801e+02 2.396660740542913e+02 3.758336226007823e+02 3.333150971075189e+02 - ME 5.519034669639832e-05 + ME 2.195742323347977e-05 Event 22 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -180,7 +180,7 @@ Event 22 Batch 0 2 5.583338925767162e+02 2.471586228668332e+02 -1.597599499756147e+02 -4.744745610949311e+02 3 5.378723432497920e+02 9.149532098241385e+00 4.314513680009925e+02 3.210493120152684e+02 4 4.037937641734921e+02 -2.563081549650745e+02 -2.716914180253778e+02 1.534252490796627e+02 - ME 3.705224437539572e-05 + ME 1.393143104564022e-05 Event 23 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -188,7 +188,7 @@ Event 23 Batch 0 2 6.057340011976822e+02 6.848115528115159e+01 -5.207204912425279e+02 -3.017849923015605e+02 3 6.884459352783615e+02 -2.949639632364767e+01 6.680977958792448e+02 1.635026102131439e+02 4 2.058200635239559e+02 -3.898475895750391e+01 -1.473773046367171e+02 1.382823820884168e+02 - ME 2.946248744974782e-05 + ME 1.074117284514867e-05 Event 24 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -196,7 +196,7 @@ Event 24 Batch 0 2 4.702316790647315e+02 -1.210575128627593e+02 4.313728504035306e+02 -1.427598490831810e+02 3 7.180482366151732e+02 1.040047389253588e+02 -7.104588047260974e+02 4.956931953573291e+00 4 3.117200843200960e+02 1.705277393740069e+01 2.790859543225674e+02 1.378029171296075e+02 - ME 3.146557994448562e-05 + ME 5.213387311993420e-06 Event 25 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -204,7 +204,7 @@ Event 25 Batch 0 2 6.261365010744016e+02 -5.354018140499276e+02 -2.095559720530078e+02 2.479477970595020e+02 3 5.483958991041942e+02 5.199465180092641e+02 -9.843995208133505e+01 -1.438862620216537e+02 4 3.254675998214045e+02 1.545529604066345e+01 3.079959241343431e+02 -1.040615350378483e+02 - ME 1.657640191611339e-04 + ME 1.695323153210731e-05 Event 26 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -212,7 +212,7 @@ Event 26 Batch 0 2 4.635816356180677e+02 1.904702824079147e+02 -2.351549941335565e+02 -3.511853259118595e+02 3 3.686385821486527e+02 -2.712527815845713e+02 -6.015354190959191e+01 -2.422764621809819e+02 4 6.677797822332798e+02 8.078249917665664e+01 2.953085360431485e+02 5.934617880928415e+02 - ME 3.250975879010065e-04 + ME 1.052251904460155e-04 Event 27 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -220,7 +220,7 @@ Event 27 Batch 0 2 2.851713673150520e+02 1.387976072955998e+02 1.520424011317634e+02 -1.973348453858079e+02 3 6.747356481771329e+02 2.426633222154767e+02 -4.300238522839811e+02 4.598501858640580e+02 4 5.400929845078149e+02 -3.814609295110765e+02 2.779814511522176e+02 -2.625153404782502e+02 - ME 4.155279516527712e-04 + ME 7.957109124083736e-05 Event 28 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -228,7 +228,7 @@ Event 28 Batch 0 2 1.977804200471008e+02 -1.803202618401224e+02 -8.082809162516925e+01 -8.277519444290659e+00 3 7.197523834069627e+02 3.152541965091956e+02 6.467033971658861e+02 -2.080867841663842e+01 4 5.824671965459364e+02 -1.349339346690732e+02 -5.658753055407169e+02 2.908619786092899e+01 - ME 1.172809031809504e-04 + ME 1.748013159755222e-05 Event 29 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -236,7 +236,7 @@ Event 29 Batch 0 2 6.123364628491765e+02 -3.746492624245139e+02 3.785128791537567e+02 -3.021950929683376e+02 3 4.056577755659300e+02 1.796205570313495e+00 -8.781658530568643e+01 3.960344074293251e+02 4 4.820057615848937e+02 3.728530568542006e+02 -2.906962938480702e+02 -9.383931446098750e+01 - ME 5.496242925842306e-04 + ME 3.085570985177973e-04 Event 30 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -244,7 +244,7 @@ Event 30 Batch 0 2 7.349194950356053e+02 7.241679607953656e+02 1.425637322816703e+01 1.244354634469208e+02 3 7.321421454671275e+02 -7.253765693071590e+02 -2.895970851972107e+01 -9.498573130653318e+01 4 3.293835949726734e+01 1.208608511793152e+00 1.470333529155409e+01 -2.944973214038765e+01 - ME 5.147061682527938e-02 + ME 3.267107835672361e-04 Event 31 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -252,7 +252,7 @@ Event 31 Batch 0 2 1.718338270585457e+02 -1.344914872264095e+02 -1.021614404532311e+02 3.165350011824393e+01 3 6.313115253715935e+02 -2.849940593920691e+02 -7.916450257599642e+01 -5.577325610990745e+02 4 6.968546475698608e+02 4.194855466184786e+02 1.813259430292275e+02 5.260790609808306e+02 - ME 4.645345268703414e-04 + ME 1.685680846028125e-04 Event 32 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -260,7 +260,7 @@ Event 32 Batch 0 2 7.235176898898732e+02 -4.762113006241282e+02 -2.880822916693121e+01 5.439400065022983e+02 3 6.603902828461299e+02 4.672103814637360e+02 1.031050210016798e+02 -4.551913221650266e+02 4 1.160920272639969e+02 9.000919160392018e+00 -7.429679183474862e+01 -8.874868433727177e+01 - ME 4.476006843186700e-03 + ME 2.173072900368875e-04 Event 33 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -268,7 +268,7 @@ Event 33 Batch 0 2 4.786737271642286e+02 2.009638309376703e+02 4.090184839380260e+02 1.464443769121513e+02 3 3.795793219608408e+02 -6.057523839522271e+00 -8.244277697544294e+01 3.704685635647950e+02 4 6.417469508749314e+02 -1.949063070981495e+02 -3.265757069625828e+02 -5.169129404769461e+02 - ME 1.351709676586880e-02 + ME 3.322437827682699e-03 Event 34 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -276,7 +276,7 @@ Event 34 Batch 0 2 6.621583515140109e+02 -5.051303032557109e+02 -1.429543729176959e+02 4.035605363216953e+02 3 3.008522892707525e+02 8.677543723835062e+01 2.726747894692539e+02 -9.290092916351111e+01 4 5.369893592152367e+02 4.183548660173603e+02 -1.297204165515579e+02 -3.106596071581844e+02 - ME 6.460854093057828e-04 + ME 9.294666462955388e-05 Event 35 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -284,7 +284,7 @@ Event 35 Batch 0 2 6.158114977149372e+02 2.502256147979830e+02 4.233348779616202e+00 5.626659943296695e+02 3 1.476397433483021e+02 -1.670550278282843e+01 -6.055370982200890e+01 1.336101351676488e+02 4 7.365487589367605e+02 -2.335201120151546e+02 5.632036104239269e+01 -6.962761294973184e+02 - ME 2.101231899117793e+00 + ME 5.450893768264864e-01 Event 36 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -292,7 +292,7 @@ Event 36 Batch 0 2 7.182456511154913e+02 -7.463771462544163e+01 -6.667773110518942e+02 2.563475070450518e+02 3 4.860008755751825e+02 -7.840660561780868e+01 4.141081959217036e+02 -2.419992919944378e+02 4 2.957534733093268e+02 1.530443202432501e+02 2.526691151301903e+02 -1.434821505061448e+01 - ME 9.644531209480271e-05 + ME 1.793136635525090e-05 Event 37 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -300,7 +300,7 @@ Event 37 Batch 0 2 5.672182018814327e+02 -2.031706828392718e+00 -5.267408190306547e+02 2.104197478372323e+02 3 4.664069288608281e+02 3.712365792892206e+02 2.604523782658950e+02 -1.090109358856581e+02 4 4.663748692577387e+02 -3.692048724608279e+02 2.662884407647597e+02 -1.014088119515743e+02 - ME 1.216876552012178e-04 + ME 1.885829354904198e-05 Event 38 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -308,7 +308,7 @@ Event 38 Batch 0 2 5.068057345787187e+02 4.883513201966852e+02 -7.570036138649985e+01 -1.124032737511800e+02 3 3.871140338254017e+02 -1.153787089711745e+02 -3.599073977747533e+02 -8.373585688177315e+01 4 6.060802315958797e+02 -3.729726112255107e+02 4.356077591612532e+02 1.961391306329531e+02 - ME 1.006736553113524e-04 + ME 2.004468492837133e-05 Event 39 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -316,7 +316,7 @@ Event 39 Batch 0 2 4.960337392567769e+02 -3.669089247616476e+02 2.651961920161227e+02 -2.027271347192069e+02 3 2.837821967046824e+02 -2.822567153069604e+02 -2.935613327724534e+01 -1.303560381865560e+00 4 7.201840640385411e+02 6.491656400686079e+02 -2.358400587388775e+02 2.040306951010725e+02 - ME 1.372807525012575e-03 + ME 2.738639406673165e-04 Event 40 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -324,7 +324,7 @@ Event 40 Batch 0 2 3.080730228651936e+02 -3.065830270999447e+02 -2.484308296331460e+01 1.728167064871203e+01 3 6.842346640746094e+02 4.630487823766367e+02 8.554554725666550e+01 -4.964321303112498e+02 4 5.076923130601962e+02 -1.564657552766919e+02 -6.070246429335075e+01 4.791504596625378e+02 - ME 4.192363154074847e-05 + ME 4.316353181637933e-05 Event 41 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -332,7 +332,7 @@ Event 41 Batch 0 2 1.602650851118221e+02 -1.258781096038287e+02 -9.817642232798531e+01 1.417706342452912e+01 3 7.146392966623014e+02 6.799675591776853e+02 -1.019163870176435e+02 1.948499239342933e+02 4 6.250956182258764e+02 -5.540894495738563e+02 2.000928093456288e+02 -2.090269873588226e+02 - ME 4.523507186168379e-04 + ME 6.118266190948034e-05 Event 42 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -340,7 +340,7 @@ Event 42 Batch 0 2 1.687893235969910e+02 1.289401357197518e+02 4.788693514682045e+01 9.783209393213438e+01 3 7.042017295435162e+02 -1.022058447296739e+02 -6.640064324330017e+02 -2.110675220936915e+02 4 6.270089468594927e+02 -2.673429099007782e+01 6.161194972861812e+02 1.132354281615572e+02 - ME 1.686356189272381e-04 + ME 4.091574289077424e-05 Event 43 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -348,7 +348,7 @@ Event 43 Batch 0 2 4.729783670130408e+02 -7.983817933050123e+01 9.052957805204315e+01 4.573169538528310e+02 3 5.638402597824536e+02 4.785250044669658e+02 7.435095949863268e+01 -2.887933404236804e+02 4 4.631813732045056e+02 -3.986868251364646e+02 -1.648805375506758e+02 -1.685236134291506e+02 - ME 5.938757690519573e-04 + ME 2.654067897204875e-04 Event 44 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -356,7 +356,7 @@ Event 44 Batch 0 2 1.774791104122977e+02 -1.952605982635784e+01 6.371003613266313e+01 1.644949814321787e+02 3 7.194816205691247e+02 -3.678871192485065e+02 2.644831693887214e+01 -6.177486190667772e+02 4 6.030392690185777e+02 3.874131790748646e+02 -9.015835307153536e+01 4.532536376345985e+02 - ME 2.092333697371024e-04 + ME 1.390282437939369e-04 Event 45 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -364,7 +364,7 @@ Event 45 Batch 0 2 7.477488480180839e+02 -3.787655987618923e+02 1.634662296474455e+02 6.236535517992064e+02 3 7.458113398274099e+02 3.819163358711198e+02 -1.661042992235261e+02 -6.186952632673017e+02 4 6.439812154506046e+00 -3.150737109227506e+00 2.638069576080606e+00 -4.958288531904773e+00 - ME 9.377954359926730e-02 + ME 4.591622113024210e-03 Event 46 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -372,7 +372,7 @@ Event 46 Batch 0 2 3.243146757688279e+02 -4.392587631431587e+00 -2.496903827548322e+02 -2.069188895501946e+02 3 5.341608950426614e+02 -2.704482657861201e+02 2.711825143656835e+02 -3.723515022507137e+02 4 6.415244291885106e+02 2.748408534175518e+02 -2.149213161085120e+01 5.792703918009084e+02 - ME 1.879047912263320e-04 + ME 7.845213441237594e-05 Event 47 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -380,7 +380,7 @@ Event 47 Batch 0 2 6.742198761450968e+02 -3.282965096491567e+02 5.301803926793563e+02 -2.563251730900704e+02 3 6.484148720042493e+02 3.527030795571956e+02 -3.975273148506379e+02 3.715029176935211e+02 4 1.773652518506536e+02 -2.440656990803885e+01 -1.326530778287185e+02 -1.151777446034508e+02 - ME 1.136665455996279e-03 + ME 5.254395938575492e-05 Event 48 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -388,7 +388,7 @@ Event 48 Batch 0 2 7.321401810535270e+02 -1.843482647928687e+02 4.412348098999295e+02 5.543976952635381e+02 3 7.293058265076229e+02 2.182722651304250e+02 -4.435200216702997e+02 -5.362221528717154e+02 4 3.855399243885009e+01 -3.392400033755636e+01 2.285211770370227e+00 -1.817554239182278e+01 - ME 2.278442596973106e-03 + ME 2.330290263553363e-04 Event 49 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -396,7 +396,7 @@ Event 49 Batch 0 2 3.511117284856090e+02 -3.272266866652174e+02 5.199533974843238e+01 1.161835877338140e+02 3 7.326526490901410e+02 6.615045961628415e+02 -2.993354007364775e+02 -9.792799058578566e+01 4 4.162356224242500e+02 -3.342779094976241e+02 2.473400609880451e+02 -1.825559714802838e+01 - ME 8.806759903737244e-05 + ME 7.863589115869630e-06 Event 50 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -404,7 +404,7 @@ Event 50 Batch 0 2 7.322170903075255e+02 2.740692406080844e+02 1.952596610981929e+01 -6.787095515302592e+02 3 3.078559130669522e+02 -1.663333363406682e+02 8.625456119089935e+01 2.442716420418760e+02 4 4.599269966255216e+02 -1.077359042674159e+02 -1.057805273007185e+02 4.344379094883832e+02 - ME 7.579426018596712e-05 + ME 6.765758192049922e-05 Event 51 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -412,7 +412,7 @@ Event 51 Batch 0 2 3.473696038265160e+02 -2.922314643158454e+02 -6.759614889845234e+01 -1.752060888796554e+02 3 5.389399151999496e+02 -2.449040872454050e+02 9.346474502284556e+01 4.708954891311219e+02 4 6.136904809735339e+02 5.371355515612503e+02 -2.586859612439322e+01 -2.956894002514666e+02 - ME 4.687828430739845e-04 + ME 2.035652280642710e-04 Event 52 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -420,7 +420,7 @@ Event 52 Batch 0 2 6.818614816439094e+02 5.970116833066725e+02 3.013730734325877e+02 1.329902280423528e+02 3 2.108623144448950e+02 -4.198344769951654e+00 -1.698802183673395e+02 -1.248439063859965e+02 4 6.072762039111957e+02 -5.928133385367207e+02 -1.314928550652483e+02 -8.146321656356344e+00 - ME 1.636869658416981e-04 + ME 4.047005152694340e-05 Event 53 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -428,7 +428,7 @@ Event 53 Batch 0 2 5.157714002491656e+02 -5.140718537651751e+02 -4.182413977701254e+01 1.003899065692042e+00 3 5.148181840855221e+02 2.868792199999327e+02 1.974924151010656e+02 3.791237552236646e+02 4 4.694104156653124e+02 2.271926337652422e+02 -1.556682753240530e+02 -3.801276542893567e+02 - ME 3.182294022992135e-03 + ME 1.547751010871262e-04 Event 54 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -436,7 +436,7 @@ Event 54 Batch 0 2 6.433410767101752e+02 2.586883950027282e+02 -5.809813083922761e+02 9.710187728524583e+01 3 6.928799734080563e+02 -1.579832568796111e+02 6.405510983559769e+02 -2.117031848853746e+02 4 1.637789498817686e+02 -1.007051381231171e+02 -5.956978996370073e+01 1.146013076001288e+02 - ME 3.280140142776471e-05 + ME 1.302720215079095e-05 Event 55 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -444,7 +444,7 @@ Event 55 Batch 0 2 7.193759752058201e+02 -3.536444481659258e+02 -7.212523476050659e+01 -6.222823703878202e+02 3 5.307053661742267e+02 2.409461639849982e+02 1.900944302490854e+02 4.329633233142391e+02 4 2.499186586199529e+02 1.126982841809279e+02 -1.179691954885788e+02 1.893190470735813e+02 - ME 3.939174164528502e-05 + ME 3.087450123310173e-05 Event 56 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -452,7 +452,7 @@ Event 56 Batch 0 2 3.858864959547013e+02 1.815174721437793e+02 3.218581876578407e+02 -1.112074732396182e+02 3 4.484505297447187e+02 -3.244105157450006e+02 2.934585578803474e+02 -9.873079412811623e+01 4 6.656629743005793e+02 1.428930436012212e+02 -6.153167455381879e+02 2.099382673677345e+02 - ME 2.326138625268126e-04 + ME 4.275995533811995e-05 Event 57 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -460,7 +460,7 @@ Event 57 Batch 0 2 5.284589752749192e+02 3.868194647882293e+02 -1.709996888155517e+02 3.168575336559793e+02 3 6.299868555278971e+02 -1.587414880613579e+02 2.327134172236622e+02 -5.634971548731005e+02 4 3.415541691971835e+02 -2.280779767268714e+02 -6.171372840811043e+01 2.466396212171210e+02 - ME 3.474853710074164e-05 + ME 2.211478424702745e-05 Event 58 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -468,7 +468,7 @@ Event 58 Batch 0 2 6.172037319760957e+02 -2.246119436411400e+02 -2.286037628748728e+01 5.744278237820342e+02 3 5.117934503257735e+02 1.262762853074207e+02 3.215736628881853e+02 -3.775939815489577e+02 4 3.710028176981306e+02 9.833565833371921e+01 -2.987132866006979e+02 -1.968338422330765e+02 - ME 6.183305374210038e-04 + ME 1.857727050583390e-04 Event 59 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -476,7 +476,7 @@ Event 59 Batch 0 2 7.388935626701858e+02 -3.912134623809441e+02 -5.457789630286015e+02 3.082872805076099e+02 3 1.936051438730608e+02 1.561492575196544e+02 8.304673385628061e+01 -7.876294246644987e+01 4 5.675012934567535e+02 2.350642048612896e+02 4.627322291723209e+02 -2.295243380411600e+02 - ME 4.116991424436793e-04 + ME 6.745345781245190e-05 Event 60 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -484,7 +484,7 @@ Event 60 Batch 0 2 7.258141426633659e+02 -5.584991156701968e+02 1.635894950857984e+02 4.337319270970709e+02 3 2.789580074371136e+02 2.331554478032953e+02 6.512410160032128e+01 -1.386180308029247e+02 4 4.952278498995201e+02 3.253436678669015e+02 -2.287135966861195e+02 -2.951138962941461e+02 - ME 7.295672680059989e-04 + ME 9.170244877267536e-05 Event 61 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -492,15 +492,15 @@ Event 61 Batch 0 2 5.906141202026897e+02 4.485275282318680e+02 -2.043613424290570e+02 3.253990429020988e+02 3 4.163572165237975e+02 -4.021600557528675e+02 -4.112755461437413e+01 9.964509802161204e+01 4 4.930286632735124e+02 -4.636747247900051e+01 2.454888970434311e+02 -4.250441409237108e+02 - ME 5.845307122272604e-03 + ME 1.836685601489136e-04 Event 62 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 1 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 -7.500000000000000e+02 2 7.346180891175762e+02 3.693463141798367e+02 7.549194961263061e+01 -6.305140780380819e+02 3 4.420621433230785e+02 -2.806743363126464e+02 3.467380983154045e+01 3.397625382625571e+02 - 4 3.233197675593453e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755249e+02 - ME 3.963631774242112e-05 + 4 3.233197675593452e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755248e+02 + ME 3.490896135533686e-05 Event 63 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -508,7 +508,7 @@ Event 63 Batch 0 2 6.451039732729313e+02 -2.415045377667665e+02 1.990362537024482e+02 -5.641092662620230e+02 3 3.260870385294104e+02 2.061141051805976e+02 -2.496695602716584e+02 3.892098426606745e+01 4 5.288089881976584e+02 3.539043258616898e+01 5.063330656921013e+01 5.251882819959555e+02 - ME 4.832224458906289e-04 + ME 4.428689394331114e-04 Event 64 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -516,7 +516,7 @@ Event 64 Batch 0 2 5.275973380665291e+02 -6.064553482667328e+01 4.309976929667101e+02 -2.981980196075213e+02 3 5.799838776791826e+02 3.279821268626862e+02 -1.824214634122377e+02 4.421893627315650e+02 4 3.924187842542880e+02 -2.673365920360130e+02 -2.485762295544724e+02 -1.439913431240437e+02 - ME 2.175617604507715e-04 + ME 4.205989960223865e-05 Event 65 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -524,7 +524,7 @@ Event 65 Batch 0 2 6.480172869826541e+02 2.720879118036237e+02 -5.153900904044360e+02 -2.833154199679406e+02 3 7.075023253568394e+02 -3.440299289242928e+02 4.709796137500282e+02 4.004761563708322e+02 4 1.444803876605064e+02 7.194201712066916e+01 4.441047665440794e+01 -1.171607364028916e+02 - ME 4.989956280474397e-03 + ME 1.103463366798231e-04 Event 66 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -532,7 +532,7 @@ Event 66 Batch 0 2 5.472978185025795e+02 4.857452785131266e+02 -2.223654169683454e+02 -1.189119332799752e+02 3 3.203062148499983e+02 1.169702135976477e+02 2.922172461416276e+02 -5.935588816501102e+01 4 6.323959666474225e+02 -6.027154921107744e+02 -6.985182917328234e+01 1.782678214449862e+02 - ME 1.346850069104626e-04 + ME 2.913920636000223e-05 Event 67 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -540,7 +540,7 @@ Event 67 Batch 0 2 4.264671493042950e+02 1.195959046886511e+02 -2.647539231733031e+02 3.122121220929446e+02 3 5.059969655247565e+02 3.777175441887567e+02 -7.608313561896731e+00 -3.366073372596325e+02 4 5.675358851709483e+02 -4.973134488774080e+02 2.723622367352000e+02 2.439521516668857e+01 - ME 9.763221977220593e-05 + ME 4.009347519102052e-05 Event 68 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -548,7 +548,7 @@ Event 68 Batch 0 2 5.996105691520872e+02 -3.814725562071957e+02 -3.417794545715573e+02 3.117664637712124e+02 3 2.164196744806214e+02 1.292759463548889e+02 -1.184749651041615e+02 1.268419798013013e+02 4 6.839697563672917e+02 2.521966098523068e+02 4.602544196757188e+02 -4.386084435725137e+02 - ME 2.936083529685707e-03 + ME 6.175473672610461e-04 Event 69 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -556,7 +556,7 @@ Event 69 Batch 0 2 4.950546755511076e+02 -1.873718558932053e+02 -4.578972175289678e+02 -1.735101101888631e+01 3 4.768584394819691e+02 -1.830244097668608e+02 2.985566003539791e+02 -3.236664843936508e+02 4 5.280868849669230e+02 3.703962656600661e+02 1.593406171749887e+02 3.410174954125370e+02 - ME 5.234212626720279e-05 + ME 1.367292435278724e-05 Event 70 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -564,7 +564,7 @@ Event 70 Batch 0 2 6.918343395272258e+02 6.895733556028865e+02 -5.391072441382606e+01 -1.473005040127906e+01 3 2.169590284692678e+02 -1.127375202028747e+02 1.807969800614662e+02 4.091361110301506e+01 4 5.912066320035063e+02 -5.768358354000119e+02 -1.268862556476402e+02 -2.618356070173603e+01 - ME 1.591740981760110e-04 + ME 3.526540789264872e-05 Event 71 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -572,7 +572,7 @@ Event 71 Batch 0 2 5.156371334918733e+02 1.547202099034306e+02 -4.807172487652236e+02 1.041836686949964e+02 3 3.718518305526428e+02 -8.969821893462726e+01 -7.521366892975188e+01 -3.529460545344468e+02 4 6.125110359554843e+02 -6.502199096880338e+01 5.559309176949756e+02 2.487623858394504e+02 - ME 1.125100552069616e-04 + ME 2.860782472746935e-05 Event 72 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -580,7 +580,7 @@ Event 72 Batch 0 2 2.110577464974889e+02 5.009520239746097e+01 -1.453533690489527e+02 -1.445968227848547e+02 3 7.317124633441161e+02 -4.429659627226336e+02 5.264774879404380e+02 2.490095170354977e+02 4 5.572297901583943e+02 3.928707603251725e+02 -3.811241188914850e+02 -1.044126942506430e+02 - ME 1.823320413479066e-04 + ME 2.666441446531882e-05 Event 73 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -588,7 +588,7 @@ Event 73 Batch 0 2 3.932257450488246e+02 3.105005764664288e+01 -2.932679039283983e+02 2.601082794045340e+02 3 5.658879124646472e+02 3.645905401293642e+02 4.244364556305355e+02 8.459646951004230e+01 4 5.408863424865281e+02 -3.956405977760074e+02 -1.311685517021372e+02 -3.447047489145762e+02 - ME 8.953763196089171e-04 + ME 7.825486685913998e-05 Event 74 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -596,7 +596,7 @@ Event 74 Batch 0 2 1.374854102925440e+02 7.785209805930555e+01 4.289805712042688e+01 1.048858692406466e+02 3 6.381281910764947e+02 -1.004137270491618e+02 -1.591026937267357e+02 6.097630724433484e+02 4 7.243863986309617e+02 2.256162898985645e+01 1.162046366063089e+02 -7.146489416839951e+02 - ME 1.395531292378326e+01 + ME 1.919068868336380e+00 Event 75 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -604,7 +604,7 @@ Event 75 Batch 0 2 5.936883054156938e+02 -3.438525101293572e+00 -2.706855443967301e+02 5.283780053968293e+02 3 5.912298912592892e+02 1.109657062166288e+02 4.832067437414102e+02 -3.221034603433170e+02 4 3.150818033250173e+02 -1.075271811153352e+02 -2.125211993446803e+02 -2.062745450535123e+02 - ME 1.379908325625592e-03 + ME 1.642862842910461e-04 Event 76 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -612,7 +612,7 @@ Event 76 Batch 0 2 6.619486867997672e+02 2.801967015359571e+01 2.136411519593737e+02 6.258980909300584e+02 3 1.201252731414031e+02 2.274423842261747e+01 -8.754996679960182e+01 7.904292618103446e+01 4 7.179260400588295e+02 -5.076390857621322e+01 -1.260911851597719e+02 -7.049410171110928e+02 - ME 5.870483941147637e+00 + ME 7.362202483972824e-01 Event 77 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -620,7 +620,7 @@ Event 77 Batch 0 2 7.456676259451606e+02 -7.346624001550109e+02 6.511229493320701e+01 -1.097804865615983e+02 3 1.284204120828029e+02 1.251494694834492e+02 2.867183268690428e+01 2.708973588335753e+00 4 6.259119619720373e+02 6.095129306715618e+02 -9.378412762011118e+01 1.070715129732624e+02 - ME 1.662775178233579e-04 + ME 4.400761364703354e-05 Event 78 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -628,7 +628,7 @@ Event 78 Batch 0 2 7.040158920877628e+02 6.911264613612161e+02 -6.659640240533211e+01 -1.163937709034254e+02 3 5.185438503615327e+02 -4.976050220224222e+02 -1.270913363611937e+02 7.158742227342900e+01 4 2.774402575507044e+02 -1.935214393387939e+02 1.936877387665258e+02 4.480634862999637e+01 - ME 5.328004946641866e-05 + ME 9.352750539306009e-06 Event 79 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -636,7 +636,7 @@ Event 79 Batch 0 2 6.777589592768838e+02 1.742725197144059e+02 -4.776543849198212e+01 6.532264221831092e+02 3 5.725002211294488e+02 -1.786302554544233e+02 -1.627852110918317e+02 -5.189881598643107e+02 4 2.497408195936665e+02 4.357735740017474e+00 2.105506495838138e+02 -1.342382623187985e+02 - ME 9.179311580246363e-04 + ME 3.598558866345749e-04 Event 80 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -644,7 +644,7 @@ Event 80 Batch 0 2 6.240819586861880e+02 4.679310297228965e+02 -4.118464023828053e+02 -3.002304821964348e+01 3 6.688675489057649e+02 -5.494372353172420e+02 3.251429131208653e+02 1.994607943266771e+02 4 2.070504924080468e+02 8.150620559434545e+01 8.670348926194001e+01 -1.694377461070337e+02 - ME 3.575286400583300e-03 + ME 5.382869847396148e-05 Event 81 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -652,7 +652,7 @@ Event 81 Batch 0 2 5.198056748722776e+02 1.034797897616987e+02 -2.885605608993972e+02 4.197888462474007e+02 3 5.672098642055398e+02 -4.160331805498524e+02 2.087659545613757e+01 -3.849773895903518e+02 4 4.129844609221831e+02 3.125533907881537e+02 2.676839654432596e+02 -3.481145665704891e+01 - ME 1.018936778946332e-04 + ME 3.612255741613163e-05 Event 82 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -660,7 +660,7 @@ Event 82 Batch 0 2 2.057598609140514e+02 6.385349666266659e+01 -2.765433460911293e+01 1.936364870179372e+02 3 6.235840147705873e+02 4.654039114453895e+02 -3.828889383639962e+02 -1.601633028106901e+02 4 6.706561243153629e+02 -5.292574081080552e+02 4.105432729731107e+02 -3.347318420724690e+01 - ME 6.930850923220120e-04 + ME 3.172622561805068e-04 Event 83 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -668,7 +668,7 @@ Event 83 Batch 0 2 6.583322583736492e+02 1.865539504254553e+02 -1.926584839569474e+02 6.012334775737429e+02 3 3.620902826842561e+02 -3.107067244571256e+02 -1.177956631152976e+01 -1.855584705935048e+02 4 4.795774589420946e+02 1.241527740316703e+02 2.044380502684771e+02 -4.156750069802382e+02 - ME 8.385116111585099e-03 + ME 6.756528802944365e-04 Event 84 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -676,7 +676,7 @@ Event 84 Batch 0 2 4.849329564663161e+02 -2.622178945286150e+02 4.068620488841210e+02 -2.941124332559817e+01 3 4.737588937677760e+02 6.014532316188546e+01 -1.333934272225749e+02 4.505954095412368e+02 4 5.413081497659077e+02 2.020725713667296e+02 -2.734686216615461e+02 -4.211841662156386e+02 - ME 5.162990427398554e-03 + ME 1.017468409980153e-03 Event 85 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -684,7 +684,7 @@ Event 85 Batch 0 2 7.085742632080854e+02 -2.174614026040270e+02 -5.283468657604088e+02 -4.190914152061853e+02 3 5.315764222715953e+02 8.528530557199829e+00 3.820092234108129e+02 3.695533927738615e+02 4 2.598493145203187e+02 2.089328720468272e+02 1.463376423495959e+02 4.953802243232388e+01 - ME 6.335517668355978e-05 + ME 1.894143727100354e-05 Event 86 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -692,7 +692,7 @@ Event 86 Batch 0 2 1.724500140939190e+02 1.231518677708316e+02 -1.121928207497684e+01 1.201946443701656e+02 3 7.028475062724231e+02 -6.467096040851287e+01 -4.553168759141600e+02 -5.315061866629339e+02 4 6.247024796336580e+02 -5.848090736231883e+01 4.665361579891369e+02 4.113115422927684e+02 - ME 1.165531323127631e-04 + ME 5.311384036847167e-05 Event 87 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -700,7 +700,7 @@ Event 87 Batch 0 2 1.942099203196796e+02 -7.751148196958454e+01 -1.356691819650310e+02 -1.153400900745028e+02 3 7.314670447251594e+02 1.724617634710876e+02 7.020747158546045e+02 1.113196793791551e+02 4 5.743230349551606e+02 -9.495028150150301e+01 -5.664055338895735e+02 4.020410695347637e+00 - ME 1.237609879052555e-04 + ME 1.874087134673149e-05 Event 88 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -708,7 +708,7 @@ Event 88 Batch 0 2 6.382497024023744e+02 2.632142028760094e+02 -5.613974181649784e+02 1.513733956108635e+02 3 3.997044228265544e+02 -5.264940326118349e+01 3.435187961344461e+02 1.974500004195773e+02 4 4.620458747710724e+02 -2.105647996148253e+02 2.178786220305324e+02 -3.488233960304407e+02 - ME 1.863821317258467e-03 + ME 9.699609186666195e-05 Event 89 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -716,7 +716,7 @@ Event 89 Batch 0 2 1.419006640093282e+02 -8.677155154367878e+01 6.457545216231642e+01 -9.185046144153740e+01 3 7.131224514048055e+02 5.460003286026870e+02 -4.154556538506974e+02 -1.944836022569670e+02 4 6.449768845858670e+02 -4.592287770590082e+02 3.508802016883808e+02 2.863340636985044e+02 - ME 1.136115495374629e-04 + ME 2.974199953519439e-05 Event 90 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -724,7 +724,7 @@ Event 90 Batch 0 2 5.730615760623938e+02 -6.017783679015001e+01 -5.202921970507185e+02 -2.325386583054727e+02 3 5.389913703864468e+02 -6.302812531165206e+01 2.446311215742109e+02 4.761247390423042e+02 4 3.879470535511588e+02 1.232059621018019e+02 2.756610754765076e+02 -2.435860807368315e+02 - ME 1.094721025518881e-03 + ME 1.667772733247344e-04 Event 91 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -732,7 +732,7 @@ Event 91 Batch 0 2 4.546745139784350e+02 -1.470341619195494e+02 -1.726383255301703e+02 -3.940886669878754e+02 3 5.110976540119647e+02 -2.482119727393537e+02 -1.865817698532448e+02 4.059542728975803e+02 4 5.342278320096005e+02 3.952461346589030e+02 3.592200953834151e+02 -1.186560590970480e+01 - ME 8.789722587847313e-05 + ME 4.420313882846059e-05 Event 92 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -740,7 +740,7 @@ Event 92 Batch 0 2 6.683728375977241e+02 -1.148152650923627e+02 3.458291789782991e+02 5.603051703379153e+02 3 2.872567998557088e+02 1.635098024620329e+02 7.847331657016402e+01 -2.227620976482501e+02 4 5.443703625465666e+02 -4.869453736967034e+01 -4.243024955484631e+02 -3.375430726896653e+02 - ME 8.270083568815311e-04 + ME 2.265252332392545e-04 Event 93 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -748,7 +748,7 @@ Event 93 Batch 0 2 5.666948073002088e+02 5.408074886689032e+01 5.639942928586390e+02 -1.134525653745258e+01 3 6.168025492529713e+02 2.439040545997395e+02 -5.541969602989467e+02 1.175666879272316e+02 4 3.165026434468199e+02 -2.979848034666298e+02 -9.797332559692304e+00 -1.062214313897791e+02 - ME 1.664960428447917e-04 + ME 1.251778043268437e-05 Event 94 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -756,7 +756,7 @@ Event 94 Batch 0 2 4.964349376711385e+02 8.445930034540567e+01 -2.409007074648561e+02 -4.257712097695705e+02 3 5.660980232871289e+02 1.373833465612049e+02 5.210669225216058e+02 1.734417778711397e+02 4 4.374670390417324e+02 -2.218426469066104e+02 -2.801662150567495e+02 2.523294318984307e+02 - ME 3.431641292834382e-05 + ME 1.007141026120618e-05 Event 95 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -764,7 +764,7 @@ Event 95 Batch 0 2 7.117074025057361e+02 -3.227984571262278e+02 4.276971164854593e+02 -4.684055501468919e+02 3 1.264078228725325e+02 8.675876182178401e+01 5.074873328843479e+01 7.665781760618943e+01 4 6.618847746217315e+02 2.360396953044439e+02 -4.784458497738940e+02 3.917477325407025e+02 - ME 2.121249861094822e-04 + ME 8.653822330208906e-05 Event 96 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -772,7 +772,7 @@ Event 96 Batch 0 2 7.329769441659936e+02 -9.642859092211874e+01 6.903981466332597e+02 -2.265107649915406e+02 3 3.937873938465678e+02 -4.837693103302091e+01 -3.847118583018795e+02 6.873841850241256e+01 4 3.732356619874385e+02 1.448055219551397e+02 -3.056862883313802e+02 1.577723464891279e+02 - ME 3.473186069800973e-05 + ME 9.822975749896163e-06 Event 97 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -780,7 +780,7 @@ Event 97 Batch 0 2 3.394989963266853e+01 6.003767577498499e+00 -2.078495220615399e+01 2.616364312804199e+01 3 7.377311980366451e+02 -5.308290258162607e+02 4.681853362634530e+02 2.080152802450354e+02 4 7.283189023306861e+02 5.248252582387622e+02 -4.474003840572991e+02 -2.341789233730774e+02 - ME 2.063600678642283e-02 + ME 2.729355315721549e-03 Event 98 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -788,7 +788,7 @@ Event 98 Batch 0 2 2.496912687496082e+02 -2.485814905959506e+02 -5.435228288348340e-01 -2.350907922099247e+01 3 7.458289852530976e+02 7.373315781279124e+02 9.801365830907572e+01 -5.473885205171283e+01 4 5.044797459972945e+02 -4.887500875319618e+02 -9.747013548024091e+01 7.824793127270530e+01 - ME 6.800308216903296e-05 + ME 8.091578731489026e-06 Event 99 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -796,7 +796,7 @@ Event 99 Batch 0 2 1.698125854886770e+02 8.336002034290719e+01 8.774494220182726e+01 -1.191144253093525e+02 3 6.496622934125946e+02 5.714329899004554e+02 -6.230613627727958e+01 3.027265745152471e+02 4 6.805251210987285e+02 -6.547930102433627e+02 -2.543880592454771e+01 -1.836121492058947e+02 - ME 6.115029137493471e-04 + ME 1.856310681395454e-04 Event 100 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -804,7 +804,7 @@ Event 100 Batch 0 2 6.141460480129781e+02 -5.842473718080511e+02 -5.092222124447417e+01 1.823110095657221e+02 3 3.909476383151783e+02 2.539115798088024e+02 -2.930333502072385e+02 -5.000421191795168e+01 4 4.949063136718440e+02 3.303357919992488e+02 3.439555714517127e+02 -1.323067976477707e+02 - ME 1.550407956048336e-04 + ME 2.380755205932631e-05 Event 101 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -812,7 +812,7 @@ Event 101 Batch 0 2 7.469346538870473e+02 3.524232024688497e+02 -1.488240016505349e+02 -6.415299525912136e+02 3 6.502268999047169e+02 -2.777200960400715e+02 1.351761574712158e+02 5.721835160737410e+02 4 1.028384462082358e+02 -7.470310642877820e+01 1.364784417931910e+01 6.934643651747267e+01 - ME 1.080054053054822e-04 + ME 7.777208667430486e-05 Event 102 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -820,7 +820,7 @@ Event 102 Batch 0 2 7.426790432885583e+02 -3.141071077544728e+02 6.615000409077074e+02 1.238005738162371e+02 3 6.735764515788642e+01 -4.139700837311957e+00 -5.533298776898177e+01 -3.818606686673834e+01 4 6.899633115535552e+02 3.182468085917849e+02 -6.061670531387255e+02 -8.561450694949879e+01 - ME 6.292262541994918e-04 + ME 1.796768498680773e-04 Event 103 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -828,7 +828,7 @@ Event 103 Batch 0 2 4.837874798175253e+02 -2.731724972668680e+02 1.247027290420595e+02 -3.793103501549069e+02 3 4.466406321977809e+02 -2.904538080082218e+02 -1.536665846758871e+02 3.025078850172422e+02 4 5.695718879846930e+02 5.636263052750895e+02 2.896385563382777e+01 7.680246513766473e+01 - ME 8.140894767450013e-05 + ME 2.998858312831636e-05 Event 104 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -836,7 +836,7 @@ Event 104 Batch 0 2 5.788466572679498e+02 3.572346730226224e+02 -3.682137844992378e+02 2.680773207965347e+02 3 2.925711988065158e+02 2.155069407513812e+02 1.697995838195863e+02 -1.016010147279926e+02 4 6.285821439255348e+02 -5.727416137740034e+02 1.984142006796517e+02 -1.664763060685422e+02 - ME 2.849770726480251e-04 + ME 7.634200862908681e-05 Event 105 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -844,7 +844,7 @@ Event 105 Batch 0 2 3.361125455083114e+02 2.619004058447622e+02 4.338373361330959e+01 -2.061496357605196e+02 3 5.299016201311088e+02 2.892532450564946e+02 2.091058919093095e+02 3.916669672191841e+02 4 6.339858343605800e+02 -5.511536509012568e+02 -2.524896255226191e+02 -1.855173314586645e+02 - ME 2.866662317167052e-04 + ME 1.089382545947932e-04 Event 106 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -852,7 +852,7 @@ Event 106 Batch 0 2 3.578050478863485e+02 -2.265838270225943e+02 2.740910124726658e+02 -3.947579646386072e+01 3 5.202885196186892e+02 1.412729374205232e+02 1.631578432376887e+02 4.734148487210871e+02 4 6.219064324949621e+02 8.531088960207101e+01 -4.372488557103545e+02 -4.339390522572265e+02 - ME 1.912263829178338e-03 + ME 4.548955126640399e-04 Event 107 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -860,7 +860,7 @@ Event 107 Batch 0 2 5.409822745993889e+02 9.278463733038997e+01 5.102180459532771e+02 -1.540466750365499e+02 3 2.501852297905710e+02 1.682301834486207e+02 1.474652503315489e+02 1.120056004263085e+02 4 7.088324956100398e+02 -2.610148207790107e+02 -6.576832962848259e+02 4.204107461024153e+01 - ME 7.096163321035572e-04 + ME 2.159102073406285e-04 Event 108 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -868,7 +868,7 @@ Event 108 Batch 0 2 6.835202199428555e+02 6.670011709444186e+02 6.653656309718588e+01 1.337243986739828e+02 3 2.377887385005082e+02 -1.098327419601477e+02 7.667443498831059e+01 -1.964720946353502e+02 4 5.786910415566365e+02 -5.571684289842709e+02 -1.432109980854965e+02 6.274769596136723e+01 - ME 1.143500637563713e-04 + ME 2.960130886583330e-05 Event 109 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -876,7 +876,7 @@ Event 109 Batch 0 2 5.978180281189351e+02 4.291222314737005e+02 2.249703559956599e+02 3.501840146583366e+02 3 3.585061336071061e+02 -3.227227650115256e+02 1.541688059097761e+02 2.467071262824850e+01 4 5.436758382739589e+02 -1.063994664621746e+02 -3.791391619054360e+02 -3.748547272865851e+02 - ME 1.159187207430584e-03 + ME 1.100286424576873e-04 Event 110 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -884,7 +884,7 @@ Event 110 Batch 0 2 7.073952645543156e+01 -4.753982451958468e+01 4.872856968801237e+01 -1.922426029646691e+01 3 7.438039776014969e+02 1.707202332282495e+02 -7.225114374584515e+02 4.556513803361385e+01 4 6.854564959430718e+02 -1.231804087086648e+02 6.737828677704391e+02 -2.634087773714689e+01 - ME 5.177444310012934e-04 + ME 1.052942530962122e-04 Event 111 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -892,7 +892,7 @@ Event 111 Batch 0 2 5.206822291802364e+02 -3.873336848644893e+02 2.415505427333673e+02 -2.504714268307115e+02 3 5.478000561519707e+02 4.687653961676166e+02 -2.245690260344170e+02 -1.729527606656598e+02 4 4.315177146677929e+02 -8.143171130312743e+01 -1.698151669895031e+01 4.234241874963712e+02 - ME 1.041517236520828e-04 + ME 8.545692640795734e-05 Event 112 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -900,7 +900,7 @@ Event 112 Batch 0 2 3.610471238372959e+02 2.563298943277285e+02 9.635756626046441e+01 -2.352981732387216e+02 3 6.139063356201009e+02 1.031778254919422e+02 -4.257030126280926e+02 4.301305270271111e+02 4 5.250465405426031e+02 -3.595077198196707e+02 3.293454463676283e+02 -1.948323537883896e+02 - ME 2.333567140730066e-04 + ME 5.572029836371622e-05 Event 113 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -908,7 +908,7 @@ Event 113 Batch 0 2 5.886653054136124e+02 3.035646198144377e+02 3.278619896967805e+02 -3.832517176826292e+02 3 5.420023902452333e+02 -3.658357535838290e+02 -3.990519958595696e+02 2.623541560166928e+01 4 3.693323043411537e+02 6.227113376939163e+01 7.119000616278893e+01 3.570163020809600e+02 - ME 6.906402420910258e-05 + ME 4.986188449478774e-05 Event 114 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -916,7 +916,7 @@ Event 114 Batch 0 2 5.165204340356855e+02 2.346362244736889e+01 6.298471388966840e+00 5.159487827839334e+02 3 5.932916594323345e+02 3.608814360715946e+02 -5.336137507463695e+01 -4.678804824963537e+02 4 3.901879065319798e+02 -3.843450585189634e+02 4.706290368567026e+01 -4.806830028757967e+01 - ME 5.363382776736297e-04 + ME 4.029549711869195e-04 Event 115 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -924,7 +924,7 @@ Event 115 Batch 0 2 5.432307281524777e+02 2.250327918244370e+02 4.870559856477670e+02 -8.506664127290338e+01 3 4.265243530840496e+02 2.057819224248363e+02 -2.472237669715339e+02 2.801021835354204e+02 4 5.302449187634726e+02 -4.308147142492733e+02 -2.398322186762331e+02 -1.950355422625171e+02 - ME 2.364149932043149e-04 + ME 4.159321993514108e-05 Event 116 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -932,7 +932,7 @@ Event 116 Batch 0 2 4.402635748890415e+02 -4.240500842615081e+02 -5.733358735035193e+01 -1.035683405941509e+02 3 4.399967684638562e+02 1.183617589007452e+02 -1.041572505293867e+02 -4.107784286579766e+02 4 6.197396566471035e+02 3.056883253607625e+02 1.614908378797388e+02 5.143467692521278e+02 - ME 1.343295643586522e-04 + ME 4.172733678506819e-05 Event 117 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -940,7 +940,7 @@ Event 117 Batch 0 2 3.074085311587982e+02 -4.270248480828711e+01 -3.034838508096459e+02 2.395944736750828e+01 3 5.360984061023379e+02 3.510554986169303e+02 -1.596589010508530e+02 -3.723849798683070e+02 4 6.564930627388640e+02 -3.083530138086433e+02 4.631427518604987e+02 3.484255325007987e+02 - ME 1.795895763168496e-04 + ME 4.142391000026985e-05 Event 118 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -948,7 +948,7 @@ Event 118 Batch 0 2 5.403602961735903e+02 4.471526113902045e+02 -1.804334130868151e+02 -2.439007487679592e+02 3 5.654623567965698e+02 -5.534570111367966e+02 -1.157195831079003e+02 6.480112868522320e+00 4 3.941773470298406e+02 1.063043997465919e+02 2.961529961947150e+02 2.374206358994370e+02 - ME 3.055618730902428e-05 + ME 7.288650603673961e-06 Event 119 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -956,7 +956,7 @@ Event 119 Batch 0 2 8.009099446659010e+01 5.775399043490319e+01 -2.629604726664823e+01 4.886268393818209e+01 3 7.131140611332349e+02 2.472685400460709e+02 -2.870014097539109e+02 -6.041689532644716e+02 4 7.067949444001758e+02 -3.050225304809738e+02 3.132974570205592e+02 5.553062693262896e+02 - ME 6.861262467765907e-04 + ME 2.815424392761942e-04 Event 120 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -964,7 +964,7 @@ Event 120 Batch 0 2 5.007248873753321e+02 2.708997263130530e+02 -3.880896283797751e+02 1.634784128397387e+02 3 7.413897277398672e+02 -4.257033276374029e+02 5.921425482134987e+02 -1.334264135464211e+02 4 2.578853848848011e+02 1.548036013243502e+02 -2.040529198337238e+02 -3.005199929331748e+01 - ME 1.034513276694145e-04 + ME 6.003662532288496e-06 Event 121 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -972,7 +972,7 @@ Event 121 Batch 0 2 5.732265116821120e+02 -1.149395375629033e+02 4.260916136383032e+02 3.658189076403451e+02 3 4.323948798659248e+02 -2.148488009071912e+01 -4.178027098651986e+02 1.092914804138530e+02 4 4.943786084519640e+02 1.364244176536226e+02 -8.288903773105691e+00 -4.751103880541979e+02 - ME 8.074833733477824e-02 + ME 7.661241871407340e-04 Event 122 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -980,7 +980,7 @@ Event 122 Batch 0 2 3.423360304412701e+02 2.648046119434483e+02 2.369247279710451e+01 -2.156644197927059e+02 3 6.059487982275789e+02 2.457729689670163e+01 -4.569077875801422e+02 3.972469964635579e+02 4 5.517151713311508e+02 -2.893819088401499e+02 4.332153147830377e+02 -1.815825766708520e+02 - ME 2.180123533398812e-04 + ME 5.274300345459390e-05 Event 123 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -988,7 +988,7 @@ Event 123 Batch 0 2 1.430133297276668e+02 -4.205671322284506e+01 3.498095937953869e+01 1.321377229770999e+02 3 7.140350670908600e+02 -2.955397919833849e+01 -6.570980288365154e+02 -2.778395577453968e+02 4 6.429516031814733e+02 7.161069242118367e+01 6.221170694569771e+02 1.457018347682969e+02 - ME 5.626335206455025e-04 + ME 2.698780233597045e-04 Event 124 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -996,7 +996,7 @@ Event 124 Batch 0 2 6.053457283343441e+02 5.458657819531910e+02 -1.853964251366731e+01 -2.610177782464909e+02 3 7.499633671623128e+02 -6.784114238502394e+02 2.145325921506613e+01 3.189713933003628e+02 4 1.446909045033435e+02 1.325456418970486e+02 -2.913616701398675e+00 -5.795361505387172e+01 - ME 4.169465060943616e-04 + ME 2.629538535113942e-05 Event 125 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1004,7 +1004,7 @@ Event 125 Batch 0 2 6.695439244882118e+02 9.058534244088493e+01 6.586171675820721e+02 7.941529525294386e+01 3 9.341516463500346e+01 3.490868167113007e+01 5.232133368429144e+01 6.906703243419068e+01 4 7.370409108767834e+02 -1.254940241120154e+02 -7.109385012663632e+02 -1.484823276871337e+02 - ME 1.111472366347957e-02 + ME 4.436636984625360e-03 Event 126 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1012,7 +1012,7 @@ Event 126 Batch 0 2 6.465564354211967e+02 -2.094351601488127e+02 -1.930091683601272e+02 -5.804477571728034e+02 3 1.356182567235447e+02 -2.832094442380729e+01 9.735247446175231e+01 -9.007070211700794e+01 4 7.178253078552584e+02 2.377561045726200e+02 9.565669389837488e+01 6.705184592898115e+02 - ME 1.775660879411100e-03 + ME 1.230970446288030e-03 Event 127 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1020,7 +1020,7 @@ Event 127 Batch 0 2 4.508388003927651e+02 -3.846405138087858e+02 7.756355374444065e+01 2.220162025777267e+02 3 6.162879941073576e+02 2.174727303224461e+02 1.334711143222092e+02 -5.609830344035003e+02 4 4.328732054998774e+02 1.671677834863399e+02 -2.110346680666500e+02 3.389668318257735e+02 - ME 3.922171581774212e-05 + ME 2.127227557837123e-05 Event 128 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1028,7 +1028,7 @@ Event 128 Batch 0 2 7.468963146802857e+02 5.701805835528932e+02 -3.440982003215339e+02 -3.381488363986430e+02 3 1.196664332518719e+02 -9.337643239636876e+01 2.398139841985228e+01 7.089280393650260e+01 4 6.334372520678420e+02 -4.768041511565244e+02 3.201168019016817e+02 2.672560324621404e+02 - ME 2.053620454072734e-04 + ME 7.842790653965437e-05 Event 129 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1036,7 +1036,7 @@ Event 129 Batch 0 2 4.378966182438207e+02 -4.256397208622688e+02 4.624364030548149e+01 9.190104474357973e+01 3 7.127537996732577e+02 5.790589826349546e+02 -1.369827771626340e+02 -3.923574802896586e+02 4 3.493495820829217e+02 -1.534192617726859e+02 9.073913685715252e+01 3.004564355460789e+02 - ME 1.668072874757384e-05 + ME 1.046217618618756e-05 Event 130 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1044,7 +1044,7 @@ Event 130 Batch 0 2 6.322026526626455e+02 5.905875735566585e+02 -2.387291116192753e+01 -2.243136110600485e+02 3 5.268087771404591e+02 -3.287250458747471e+02 1.913681034684307e+02 3.644798771698754e+02 4 3.409885701968954e+02 -2.618625276819114e+02 -1.674951923065032e+02 -1.401662661098267e+02 - ME 2.766647151388132e-04 + ME 3.412796728096272e-05 Event 131 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1052,7 +1052,7 @@ Event 131 Batch 0 2 2.691964685177017e+02 -2.641651354044939e+02 4.065264362900757e+01 -3.210735842607325e+01 3 5.382709487855662e+02 -3.022535437819008e+02 -4.307865739991411e+02 1.131429946566680e+02 4 6.925325826967319e+02 5.664186791863947e+02 3.901339303701337e+02 -8.103563623059465e+01 - ME 5.354423766199649e-04 + ME 1.516502654737588e-04 Event 132 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1060,7 +1060,7 @@ Event 132 Batch 0 2 1.376388194981169e+02 -2.491804956023667e+01 3.114513197621116e+01 1.317327453336230e+02 3 7.332494677489981e+02 -3.054807357444667e+02 -6.882601889638243e+00 -6.665500220046781e+02 4 6.291117127528858e+02 3.303987853047034e+02 -2.426253008657308e+01 5.348172766710551e+02 - ME 3.625143788027957e-04 + ME 2.459616839911958e-04 Event 133 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1068,7 +1068,7 @@ Event 133 Batch 0 2 5.818916885738672e+02 -3.437736592641007e+02 -2.113522447259726e+02 -4.192228966514222e+02 3 7.075583625851592e+02 3.695171106849944e+02 9.875952986414086e+01 5.952667441040354e+02 4 2.105499488409736e+02 -2.574345142089370e+01 1.125927148618317e+02 -1.760438474526132e+02 - ME 6.644965721204062e-03 + ME 3.278402967978973e-04 Event 134 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1076,7 +1076,7 @@ Event 134 Batch 0 2 7.039051474789593e+02 -1.767404282002263e+02 5.832845063404937e+02 3.521710697233707e+02 3 6.740856043500099e+02 9.540039380435479e+01 -5.203258634262522e+02 -4.177932056695244e+02 4 1.220092481710302e+02 8.134003439587134e+01 -6.295864291424151e+01 6.562213594615410e+01 - ME 6.394436352069354e-05 + ME 3.621089826286842e-05 Event 135 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1084,7 +1084,7 @@ Event 135 Batch 0 2 7.491379873081086e+02 -6.603965492909807e+02 -9.243924572685610e+01 -3.413782470545817e+02 3 4.360367703469753e+02 3.763875731093294e+02 3.833030381995060e+01 2.167746473012021e+02 4 3.148252423449159e+02 2.840089761816513e+02 5.410894190690560e+01 1.246035997533796e+02 - ME 3.729096801849378e-05 + ME 1.170602675185252e-05 Event 136 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1092,7 +1092,7 @@ Event 136 Batch 0 2 6.907976432034611e+02 -8.965778913807024e+01 -5.375684903631193e+02 -4.244796613161184e+02 3 4.317447428217263e+02 2.541758793770707e+02 2.501815833403360e+02 2.433255445990286e+02 4 3.774576139748129e+02 -1.645180902390004e+02 2.873869070227833e+02 1.811541167170898e+02 - ME 3.295715598818487e-05 + ME 1.221598515374744e-05 Event 137 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1100,7 +1100,7 @@ Event 137 Batch 0 2 5.927917878715718e+02 -5.453882061843875e+02 -2.239274061847312e+02 6.172783069514800e+01 3 3.718333194205911e+02 2.859809174201715e+02 -2.363544177495510e+02 2.472896101988843e+01 4 5.353748927078371e+02 2.594072887642160e+02 4.602818239342820e+02 -8.645679171503701e+01 - ME 1.267334233155001e-04 + ME 2.222722395048600e-05 Event 138 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1108,7 +1108,7 @@ Event 138 Batch 0 2 1.164849493482387e+02 2.012854405109472e+01 -2.573298799707043e+01 -1.118096528381494e+02 3 7.481698498358139e+02 -1.044692284663333e+02 -4.003634472873074e+00 7.408294509656059e+02 4 6.353452008159477e+02 8.434068441523856e+01 2.973662246994375e+01 -6.290197981274564e+02 - ME 3.545594402685597e+00 + ME 1.183014588836486e-01 Event 139 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1116,7 +1116,7 @@ Event 139 Batch 0 2 3.415587822283577e+02 -2.468214832259765e+02 1.926082427237748e+02 1.365416492148350e+02 3 5.828887331044928e+02 -1.023403009989268e+02 -5.561813319045077e+02 1.412376154306548e+02 4 5.755524846671491e+02 3.491617842249035e+02 3.635730891807333e+02 -2.777792646454897e+02 - ME 4.142320485322521e-04 + ME 5.213154494000113e-05 Event 140 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1124,7 +1124,7 @@ Event 140 Batch 0 2 4.395392082109443e+02 -3.037880820376849e+02 -2.455930383243060e+02 -2.014735126343029e+02 3 4.709796125547878e+02 -2.826270024952004e+02 2.984919122515593e+02 2.298833426397907e+02 4 5.894811792342680e+02 5.864150845328855e+02 -5.289887392725340e+01 -2.840983000548780e+01 - ME 1.220048440917972e-04 + ME 2.990357782498624e-05 Event 141 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1132,7 +1132,7 @@ Event 141 Batch 0 2 3.025838986653694e+02 -2.680006525137058e+02 -6.218827689980458e+01 -1.259574698062632e+02 3 5.104624598690772e+02 -2.829910827131053e+02 4.173533268753467e+02 -7.939880721102661e+01 4 6.869536414655528e+02 5.509917352268112e+02 -3.551650499755422e+02 2.053562770172896e+02 - ME 3.735313583347012e-04 + ME 7.151804808113674e-05 Event 142 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1140,7 +1140,7 @@ Event 142 Batch 0 2 4.390011511178412e+02 -3.153925512561953e+02 3.992377088505197e+01 -3.027468279160259e+02 3 4.597282536099518e+02 2.984856708041211e+02 -2.221794712617382e+02 -2.699863960308454e+02 4 6.012705952722066e+02 1.690688045207421e+01 1.822557003766862e+02 5.727332239468712e+02 - ME 1.630913878361870e-04 + ME 8.945447985744934e-05 Event 143 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1148,7 +1148,7 @@ Event 143 Batch 0 2 7.103308443495001e+02 -3.626595603160224e+02 2.462759922459802e+02 5.589240443825270e+02 3 3.424564807343295e+02 4.507572778536915e+01 -2.357842367637252e+02 -2.442343416788665e+02 4 4.472126749161695e+02 3.175838325306533e+02 -1.049175548225529e+01 -3.146897027036604e+02 - ME 1.304325296055160e-03 + ME 1.789392510542836e-04 Event 144 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1156,7 +1156,7 @@ Event 144 Batch 0 2 6.893886390440568e+02 -2.470805413393656e+02 1.331686162420120e+02 6.296618309717105e+02 3 7.132719020730987e+02 2.482972988978650e+02 -2.304803220538649e+02 -6.276815106349294e+02 4 9.733945888284487e+01 -1.216757558499225e+00 9.731170581185302e+01 -1.980320336781234e+00 - ME 3.769348793094523e-04 + ME 1.486904409371019e-04 Event 145 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1164,7 +1164,7 @@ Event 145 Batch 0 2 3.784954309743686e+02 2.391836032855264e+02 1.115572896135236e+01 -2.931305935912622e+02 3 7.389406222827198e+02 -4.231861417520660e+02 1.513250860114713e+02 5.865555822189353e+02 4 3.825639467429113e+02 1.840025384665394e+02 -1.624808149728234e+02 -2.934249886276727e+02 - ME 2.193982780219728e-03 + ME 2.016505354100400e-04 Event 146 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1172,7 +1172,7 @@ Event 146 Batch 0 2 4.681255842987410e+02 -3.253195724522379e+01 1.754808059398437e+02 -4.327698247100133e+02 3 2.875849079819393e+02 2.091841587061404e+01 1.879781824316579e+02 -2.166372592748876e+02 4 7.442895077193195e+02 1.161354137460973e+01 -3.634589883715017e+02 6.494070839849006e+02 - ME 5.347932692815789e-02 + ME 1.210467216316050e-02 Event 147 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1180,7 +1180,7 @@ Event 147 Batch 0 2 2.442136391928777e+02 -1.784444843977844e+02 -1.666832492802189e+02 -3.816014311599316e+00 3 5.551361515401285e+02 1.378338123621512e+02 -5.199472642306259e+02 1.372327560591401e+02 4 7.006502092669938e+02 4.061067203563306e+01 6.866305135108448e+02 -1.334167417475408e+02 - ME 7.450632204513606e-04 + ME 2.360352365747709e-04 Event 148 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1188,7 +1188,7 @@ Event 148 Batch 0 2 4.547263863263726e+02 3.928375677411887e+02 5.145105706241225e+01 2.231759855356057e+02 3 7.397285466814292e+02 -5.611511356388266e+02 -1.533645573573770e+02 -4.569322031694095e+02 4 3.055450669921979e+02 1.683135678976379e+02 1.019135002949646e+02 2.337562176338038e+02 - ME 1.440225905683450e-05 + ME 6.307552439231181e-06 Event 149 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1196,7 +1196,7 @@ Event 149 Batch 0 2 2.343018799311635e+02 9.853424545130945e+01 1.924850318874441e+02 -9.021023174733594e+01 3 7.291173748950658e+02 3.429747374294529e+01 -5.990516617369192e+02 4.142136359886766e+02 4 5.365807451737705e+02 -1.328317191942547e+02 4.065666298494750e+02 -3.240034042413406e+02 - ME 8.405553848068603e-04 + ME 8.298171355094406e-05 Event 150 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1204,7 +1204,7 @@ Event 150 Batch 0 2 4.707648023587808e+02 -8.969278865174961e+01 -3.008719699078221e+02 3.507859183712497e+02 3 6.876639918976698e+02 3.906111988928598e+02 4.609284537794546e+02 -3.284046551871671e+02 4 3.415712057435500e+02 -3.009184102411105e+02 -1.600564838716325e+02 -2.238126318408256e+01 - ME 1.070125715137075e-04 + ME 1.887585788236135e-05 Event 151 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1212,7 +1212,7 @@ Event 151 Batch 0 2 6.503034458278056e+02 -1.575298496674962e+02 -3.658248853789647e+01 -6.298735108350154e+02 3 6.998690336552314e+02 1.302751858829802e+02 -1.019415103826456e+02 6.800389464387812e+02 4 1.498275205169629e+02 2.725466378451580e+01 1.385239989205421e+02 -5.016543560376590e+01 - ME 6.663776898009472e-04 + ME 4.060174493404880e-04 Event 152 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1220,7 +1220,7 @@ Event 152 Batch 0 2 7.401192382353395e+02 1.493701961830190e+02 6.288419447382046e+02 3.605867993093739e+02 3 7.332111095478891e+02 -1.230079111936445e+02 -6.287602831147091e+02 -3.565502647954901e+02 4 2.666965221677112e+01 -2.636228498937447e+01 -8.166162349550861e-02 -4.036534513883709e+00 - ME 8.446403371723604e-04 + ME 1.210964379505254e-04 Event 153 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1228,7 +1228,7 @@ Event 153 Batch 0 2 5.645797071775899e+02 7.941901905692946e+01 3.691428696980725e+02 -4.197337333594241e+02 3 6.079979027943974e+02 1.021455738177839e+02 -5.566920170809548e+02 2.220849604771994e+02 4 3.274223900280123e+02 -1.815645928747133e+02 1.875491473828823e+02 1.976487728822249e+02 - ME 2.846663840296023e-05 + ME 9.895323747190810e-06 Event 154 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1236,7 +1236,7 @@ Event 154 Batch 0 2 6.022174885419887e+02 -5.152457849782368e+02 -1.493252664732707e+02 -2.736597328082223e+02 3 3.617627670199851e+02 1.925398333816265e+02 -2.626238171638091e+02 1.575736108034646e+02 4 5.360197444380261e+02 3.227059515966102e+02 4.119490836370796e+02 1.160861220047577e+02 - ME 6.437319974597944e-05 + ME 1.660411512586943e-05 Event 155 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1244,7 +1244,7 @@ Event 155 Batch 0 2 6.202229507100907e+02 -2.107861924791831e+02 -3.212541876154504e+02 4.868690137883067e+02 3 2.943040328093193e+02 2.940980302320592e+02 1.073731199058907e+01 2.433613089266508e+00 4 5.854730164805898e+02 -8.331183775287627e+01 3.105168756248616e+02 -4.893026268775732e+02 - ME 5.904510654775639e-03 + ME 4.918845171174253e-04 Event 156 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1252,7 +1252,7 @@ Event 156 Batch 0 2 4.945486805149833e+02 4.540818864859257e+02 -1.431706201593249e+02 -1.337542944644701e+02 3 5.997303202813281e+02 -3.624214233270367e+02 -5.726286247273350e+01 4.743923835389624e+02 4 4.057209992036886e+02 -9.166046315888883e+01 2.004334826320584e+02 -3.406380890744924e+02 - ME 4.701306652347430e-03 + ME 1.986837824231628e-04 Event 157 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1260,7 +1260,7 @@ Event 157 Batch 0 2 4.617003083190191e+02 3.118400043328062e+02 3.404502064148864e+02 -4.079626411035589e+00 3 5.720097526413113e+02 -4.999240316044806e+01 -4.329264075474301e+02 -3.705005295422582e+02 4 4.662899390396696e+02 -2.618476011723578e+02 9.247620113254365e+01 3.745801559532937e+02 - ME 3.907978340087068e-05 + ME 1.403598809900552e-05 Event 158 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1268,7 +1268,7 @@ Event 158 Batch 0 2 6.784877363061535e+02 -5.707102180762959e+02 -3.102223423027389e+02 -1.959529373021938e+02 3 5.650909444059712e+02 5.525284805868615e+02 7.765167789879932e+01 8.950011457818250e+01 4 2.564213192878751e+02 1.818173748943443e+01 2.325706644039396e+02 1.064528227240114e+02 - ME 3.503179830087694e-05 + ME 8.470133063482862e-06 Event 159 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1276,7 +1276,7 @@ Event 159 Batch 0 2 5.369491563274252e+02 2.154713482252002e+02 -2.912667909729743e+02 3.962955349875316e+02 3 6.066564496499102e+02 -4.020061311781470e+01 5.572389608252350e+02 -2.364332868806716e+02 4 3.563943940226648e+02 -1.752707351073854e+02 -2.659721698522608e+02 -1.598622481068599e+02 - ME 3.198473025834927e-04 + ME 3.562393617300492e-05 Event 160 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1284,7 +1284,7 @@ Event 160 Batch 0 2 6.492474755438517e+02 3.490068395973682e+02 1.460348644657111e+02 -5.276270735801970e+02 3 2.857818814470013e+02 -2.550253586192556e+02 1.227259509083862e+02 3.964456076362119e+01 4 5.649706430091471e+02 -9.398148097811273e+01 -2.687608153740973e+02 4.879825128165764e+02 - ME 6.719464076924620e-05 + ME 3.516238941302227e-05 Event 161 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1292,7 +1292,7 @@ Event 161 Batch 0 2 6.770282049439580e+02 -2.863253153105184e+02 -4.911270786072976e+02 -3.676672364525180e+02 3 1.598243093356544e+02 -7.505362471426160e+01 1.299195075310522e+02 -5.506073768810752e+01 4 6.631474857203874e+02 3.613789400247800e+02 3.612075710762453e+02 4.227279741406256e+02 - ME 1.577168105051119e-04 + ME 5.970757951131334e-05 Event 162 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1300,7 +1300,7 @@ Event 162 Batch 0 2 5.178592782584632e+02 -3.271131571456631e+02 3.943743741889439e+02 -7.512700901574514e+01 3 3.730686930366258e+02 -2.885924195736573e+01 -1.360208443078026e+02 -3.461874113706257e+02 4 6.090720287049110e+02 3.559723991030290e+02 -2.583535298811414e+02 4.213144203863710e+02 - ME 1.031749267713353e-04 + ME 2.768303103320498e-05 Event 163 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1308,7 +1308,7 @@ Event 163 Batch 0 2 5.388642316037673e+02 3.152159924116781e+02 3.539969933522669e+01 -4.356149670486711e+02 3 5.364171791816749e+02 -5.299694218906361e+02 3.369785517714305e+01 7.576448071880543e+01 4 4.247185892145582e+02 2.147534294789580e+02 -6.909755451236977e+01 3.598504863298658e+02 - ME 3.508094027565679e-05 + ME 1.485600561394433e-05 Event 164 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1316,7 +1316,7 @@ Event 164 Batch 0 2 6.862697092177667e+02 4.132218376422068e+02 1.310202162324327e+02 -5.320221138485150e+02 3 4.476895523579005e+02 -2.769046850483522e+02 1.374187337517142e+02 3.238299280529301e+02 4 3.660407384243329e+02 -1.363171525938544e+02 -2.684389499841469e+02 2.081921857955847e+02 - ME 3.375894779915149e-05 + ME 1.755563256840939e-05 Event 165 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1324,7 +1324,7 @@ Event 165 Batch 0 2 2.382444910715278e+02 -2.158277263671036e+02 -9.471372817531817e+00 -1.004446273032522e+02 3 7.304591383576048e+02 4.619003715882296e+02 -1.223345688256177e+02 5.524969256086772e+02 4 5.312963705708673e+02 -2.460726452211260e+02 1.318059416431495e+02 -4.520522983054250e+02 - ME 6.966498968932957e-03 + ME 4.549138184301779e-04 Event 166 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1332,7 +1332,7 @@ Event 166 Batch 0 2 2.131352071380649e+02 -7.633553084455029e+01 -1.899581415396244e+02 5.929087379418958e+01 3 7.305557876753161e+02 8.980971292745940e+01 7.136333043711877e+02 1.279589045828712e+02 4 5.563090051866194e+02 -1.347418208290915e+01 -5.236751628315633e+02 -1.872497783770607e+02 - ME 3.314006956523505e-04 + ME 3.352199959657985e-05 Event 167 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1340,7 +1340,7 @@ Event 167 Batch 0 2 4.122964103002419e+02 -3.405127102276982e+02 6.366431608201744e+01 2.235761145061386e+02 3 4.697083356610920e+02 -2.521100678451879e+02 -2.856113063438232e+01 -3.952855880214881e+02 4 6.179952540386658e+02 5.926227780728861e+02 -3.510318544763516e+01 1.717094735153495e+02 - ME 1.146777177775239e-04 + ME 3.829535931496594e-05 Event 168 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1348,7 +1348,7 @@ Event 168 Batch 0 2 7.156643283953484e+02 -3.999734570317170e+02 4.816586825103861e+02 3.467009924560655e+02 3 6.192344221355605e+02 2.722545660880235e+02 -4.999454120042317e+02 -2.436869012025525e+02 4 1.651012494690919e+02 1.277188909436936e+02 1.828672949384504e+01 -1.030140912535133e+02 - ME 1.017624049822302e-03 + ME 5.027887292283473e-05 Event 169 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1356,7 +1356,7 @@ Event 169 Batch 0 2 3.626022684949455e+02 7.511110909567982e+01 -2.030941161665286e+02 -2.908461902563517e+02 3 5.580565590514408e+02 -2.529981754432838e+02 -3.439969378312538e+02 3.592842232626199e+02 4 5.793411724536141e+02 1.778870663476037e+02 5.470910539977822e+02 -6.843803300626824e+01 - ME 1.371698416063432e-04 + ME 4.350242525242475e-05 Event 170 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1364,7 +1364,7 @@ Event 170 Batch 0 2 6.602909342483501e+02 4.699653539595539e+02 -3.020118498241596e+02 3.520021683086903e+02 3 1.039297502933440e+02 3.247420585022842e+01 -9.851348423194945e+01 6.473976746580508e+00 4 7.357793154583061e+02 -5.024395598097824e+02 4.005253340561092e+02 -3.584761450552709e+02 - ME 1.673719496447659e-02 + ME 9.967260301798612e-03 Event 171 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1372,7 +1372,7 @@ Event 171 Batch 0 2 1.506693011949600e+02 -3.657300520509282e+01 -1.244227366169959e+02 -7.669834565089053e+01 3 6.344013325830570e+02 -2.026333084464634e+02 -4.956100871165362e+02 3.402578943089165e+02 4 7.149293662219835e+02 2.392063136515561e+02 6.200328237335323e+02 -2.635595486580261e+02 - ME 2.133207113512388e-03 + ME 9.157902172934166e-04 Event 172 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1380,7 +1380,7 @@ Event 172 Batch 0 2 5.352445157558213e+02 -2.018352690102651e+02 3.892440882325296e+02 -3.069825004886504e+02 3 6.716112180685394e+02 2.825227203806547e+02 -5.978593235713698e+02 1.175022124175027e+02 4 2.931442661756383e+02 -8.068745137038898e+01 2.086152353388391e+02 1.894802880711483e+02 - ME 2.630379932615259e-05 + ME 8.067092159940342e-06 Event 173 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1388,7 +1388,7 @@ Event 173 Batch 0 2 6.571348515648592e+02 -2.769863586381786e+02 5.805753619381593e+02 1.343019708712704e+02 3 5.332990408103321e+02 1.871824832342877e+02 -4.782426732337677e+02 1.437168410371092e+02 4 3.095661076248081e+02 8.980387540389081e+01 -1.023326887043915e+02 -2.780188119083794e+02 - ME 9.985413945498126e-03 + ME 1.269359653092767e-04 Event 174 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1396,7 +1396,7 @@ Event 174 Batch 0 2 6.091496911716730e+02 -4.752584064243671e+02 3.135726231883978e+01 -3.797492797588730e+02 3 6.417481529658018e+02 3.309293137608124e+02 9.015643604119191e+01 5.424004960996682e+02 4 2.491021558625255e+02 1.443290926635548e+02 -1.215136983600317e+02 -1.626512163407953e+02 - ME 1.319192968737130e-03 + ME 1.362612102685676e-04 Event 175 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1404,7 +1404,7 @@ Event 175 Batch 0 2 5.399801778396885e+02 1.966672297646830e+02 2.343185748302537e+02 -4.449667388535759e+02 3 6.987953575798327e+02 -1.857207036318898e+02 -9.664246188148675e+01 6.666955876403318e+02 4 2.612244645804785e+02 -1.094652613279307e+01 -1.376761129487668e+02 -2.217288487867561e+02 - ME 9.528877211334405e-03 + ME 9.613528518728674e-04 Event 176 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1412,7 +1412,7 @@ Event 176 Batch 0 2 6.615757321243968e+02 -4.129469954321281e+02 4.686878756164518e+02 -2.179194886871010e+02 3 1.607981401590110e+02 -6.355407199259605e+01 7.929314438200207e+00 1.474925346731048e+02 4 6.776261277165921e+02 4.765010674247242e+02 -4.766171900546519e+02 7.042695401399614e+01 - ME 6.965204353376922e-04 + ME 3.097907077728356e-04 Event 177 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1420,7 +1420,7 @@ Event 177 Batch 0 2 4.314334067424883e+02 -3.493619040652741e+02 -2.026482683689240e+01 -2.523299055494341e+02 3 4.840006500668400e+02 -1.846595828310067e+02 -1.450727057198388e+02 4.232155216776995e+02 4 5.845659431906716e+02 5.340214868962809e+02 1.653375325567312e+02 -1.708856161282654e+02 - ME 2.160100049311594e-04 + ME 1.084300812640113e-04 Event 178 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1428,7 +1428,7 @@ Event 178 Batch 0 2 4.528135981327372e+02 -2.544528544607913e+02 1.436928116455424e+02 3.458992272209776e+02 3 3.053350882587867e+02 -1.380299578048218e+02 2.072032295570572e+02 1.767599177741536e+02 4 7.418513136084770e+02 3.924828122656132e+02 -3.508960412025996e+02 -5.226591449951313e+02 - ME 7.384409254828141e-02 + ME 5.382438151181503e-02 Event 179 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1436,7 +1436,7 @@ Event 179 Batch 0 2 7.433145319259943e+02 -2.538538580850882e+02 -6.778753511348521e+02 -1.689962142519080e+02 3 1.647945947160298e+02 1.009041857568576e+02 1.171651165877689e+02 5.699069397138987e+01 4 5.918908733579761e+02 1.529496723282306e+02 5.607102345470832e+02 1.120055202805181e+02 - ME 1.335347052581446e-04 + ME 3.739915465576335e-05 Event 180 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1444,7 +1444,7 @@ Event 180 Batch 0 2 2.396120216689867e+02 1.204528233788652e+02 -1.081248155319049e+02 1.766750195544080e+02 3 5.541470271917004e+02 2.767127195685322e+02 2.999096875483201e+02 3.749175614572557e+02 4 7.062409511393131e+02 -3.971655429473975e+02 -1.917848720164151e+02 -5.515925810116636e+02 - ME 1.316593054412419e-02 + ME 2.792447184071457e-03 Event 181 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1452,7 +1452,7 @@ Event 181 Batch 0 2 2.165494222755782e+02 1.336973493521793e+02 -1.495065670853883e+02 -8.164837697364385e+01 3 6.960869932595207e+02 -2.848973600545249e+02 2.209041937252092e+01 6.347303441548928e+02 4 5.873635844649011e+02 1.512000107023455e+02 1.274161477128675e+02 -5.530819671812490e+02 - ME 6.164296623062663e-02 + ME 3.488874737600980e-03 Event 182 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1460,7 +1460,7 @@ Event 182 Batch 0 2 6.472681881349898e+02 4.279258056181361e+02 3.994050733201775e+02 -2.762448183472868e+02 3 5.337197582091030e+02 -3.479343829022644e+02 -4.034091782989213e+02 -3.254965992745409e+01 4 3.190120536559070e+02 -7.999142271587166e+01 4.004104978744005e+00 3.087944782747408e+02 - ME 6.393158381765308e-05 + ME 5.523679400573375e-05 Event 183 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1468,7 +1468,7 @@ Event 183 Batch 0 2 6.165307808531154e+02 -3.276949594572818e+02 8.808524820164887e+01 -5.147496540405800e+02 3 2.975460412740734e+02 -1.030095950018341e+02 -2.375020297789284e+02 1.466814775843215e+02 4 5.859231778728107e+02 4.307045544591158e+02 1.494167815772794e+02 3.680681764562588e+02 - ME 6.887775529805495e-05 + ME 2.562496117427957e-05 Event 184 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1476,7 +1476,7 @@ Event 184 Batch 0 2 5.645337360463252e+02 -3.940276919793660e+02 3.776398996283964e+02 1.443212503288767e+02 3 5.368100353438223e+02 2.392766596964613e+02 -1.719264331693737e+02 -4.487237410122139e+02 4 3.986562286098531e+02 1.547510322829050e+02 -2.057134664590229e+02 3.044024906833372e+02 - ME 3.553984578535888e-05 + ME 1.712138666139329e-05 Event 185 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1484,7 +1484,7 @@ Event 185 Batch 0 2 6.347397779710931e+02 2.522092504724420e+02 -1.599825720327363e+02 5.600809373302327e+02 3 4.566768168089404e+02 -3.359958684022406e+02 -1.272903681003782e+02 -2.818823400219340e+02 4 4.085834052199659e+02 8.378661792979838e+01 2.872729401331145e+02 -2.781985973082986e+02 - ME 1.184197550833168e-03 + ME 1.836859309200860e-04 Event 186 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1492,7 +1492,7 @@ Event 186 Batch 0 2 7.089823220133230e+02 -5.197119220861886e+02 4.248734840868308e+02 -2.281183322067745e+02 3 5.364076825758043e+02 3.588264146200084e+02 -3.973752875032956e+02 3.270606945152315e+01 4 2.546099954108725e+02 1.608855074661802e+02 -2.749819658353518e+01 1.954122627552515e+02 - ME 2.583895514537347e-05 + ME 1.318469173008218e-05 Event 187 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1500,7 +1500,7 @@ Event 187 Batch 0 2 4.835105223217566e+02 -2.128653471696258e+02 1.375287019182911e+02 -4.117725407538514e+02 3 7.240136612790383e+02 4.407273454759851e+02 -4.896543389042274e+01 5.723264583716990e+02 4 2.924758163992057e+02 -2.278619983063593e+02 -8.856326802786833e+01 -1.605539176178473e+02 - ME 5.307563978210835e-04 + ME 9.185777086042985e-05 Event 188 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1508,7 +1508,7 @@ Event 188 Batch 0 2 6.611118500396009e+02 3.502021063704277e+02 -2.011693879247277e+02 -5.234102027267809e+02 3 3.072944371702247e+02 -6.894916504330918e+01 -1.599953986835475e+02 2.531350551695447e+02 4 5.315937127901742e+02 -2.812529413271184e+02 3.611647866082752e+02 2.702751475572362e+02 - ME 6.863567490702385e-05 + ME 3.862980709292737e-05 Event 189 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1516,7 +1516,7 @@ Event 189 Batch 0 2 7.498478362545707e+02 6.780504955298834e+02 -3.199144947524264e+02 -1.319162971889924e+01 3 3.253008430749361e+02 -2.985087551774363e+02 1.291384938207140e+02 6.034152914782593e+00 4 4.248513206704935e+02 -3.795417403524470e+02 1.907760009317124e+02 7.157476804116639e+00 - ME 8.583750584152986e-05 + ME 1.504471760657040e-05 Event 190 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1524,7 +1524,7 @@ Event 190 Batch 0 2 4.938867893347995e+02 3.689671478502748e+02 -1.218724623869293e+02 3.048516153777389e+02 3 5.264063001598521e+02 6.631942569346465e+01 1.276367949726208e+02 -5.063735530147588e+02 4 4.797069105053494e+02 -4.352865735437401e+02 -5.764332585691415e+00 2.015219376370201e+02 - ME 4.759343488474735e-05 + ME 2.269926034328256e-05 Event 191 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1532,7 +1532,7 @@ Event 191 Batch 0 2 3.681793141805986e+02 -3.225132888415706e+02 1.579589482507471e+02 -8.117977937027918e+01 3 5.431126642386394e+02 4.058413736814005e+01 9.147123993851424e+01 5.338139246166097e+02 4 5.887080215807621e+02 2.819291514734305e+02 -2.494301881892614e+02 -4.526341452463304e+02 - ME 4.908990110546420e-03 + ME 1.427494731558637e-03 Event 192 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1540,7 +1540,7 @@ Event 192 Batch 0 2 6.054165399887861e+02 1.497087111729466e+02 8.905021611535379e+01 5.798159601983524e+02 3 2.106656439489222e+02 1.451894976721945e+02 -1.487249448604451e+02 3.436443048222171e+01 4 6.839178160622922e+02 -2.948982088451411e+02 5.967472874509133e+01 -6.141803906805740e+02 - ME 4.294450320853435e-02 + ME 6.984876913518998e-03 Event 193 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1548,7 +1548,7 @@ Event 193 Batch 0 2 2.753169163933055e+02 -1.695475157411122e+02 -2.139406274107579e+02 3.581134319495643e+01 3 5.760219428901971e+02 -3.264616044953138e+02 1.527507522369444e+02 -4.493231656306969e+02 4 6.486611407164972e+02 4.960091202364260e+02 6.118987517381347e+01 4.135118224357404e+02 - ME 1.537583375796735e-04 + ME 4.273063058931925e-05 Event 194 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1556,7 +1556,7 @@ Event 194 Batch 0 2 3.445934948105150e+02 -2.970257025567896e+02 -8.183019525038441e+01 1.543509890854414e+02 3 7.485441862377920e+02 6.623797851941252e+02 1.083400559332054e+02 -3.314119056355291e+02 4 4.068623189516925e+02 -3.653540826373358e+02 -2.650986068282081e+01 1.770609165500877e+02 - ME 3.024610065690235e-05 + ME 4.921158833271929e-06 Event 195 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1564,7 +1564,7 @@ Event 195 Batch 0 2 2.012122274303647e+02 -5.190018365965096e+01 1.322177369426910e+02 -1.425173724194237e+02 3 7.122630330184543e+02 -3.054768058087834e+02 -2.528097616133813e+02 5.916838461125119e+02 4 5.865247395511832e+02 3.573769894684365e+02 1.205920246706904e+02 -4.491664736930883e+02 - ME 3.011639483286710e-03 + ME 4.696445912229638e-04 Event 196 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1572,7 +1572,7 @@ Event 196 Batch 0 2 4.490485793345989e+02 3.485190427929747e+02 -2.661098616642627e+01 -2.819059396826192e+02 3 5.531554978829222e+02 -3.330165694254377e+02 4.416170126965178e+02 7.442003978758296e+00 4 4.977959227824785e+02 -1.550247336753688e+01 -4.150060265300915e+02 2.744639357038610e+02 - ME 4.340266456570635e-05 + ME 9.363355109875406e-06 Event 197 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1580,7 +1580,7 @@ Event 197 Batch 0 2 3.951249254444253e+02 -2.278358800090239e+02 3.101157211704546e+02 -8.968142489336992e+01 3 3.607080640108546e+02 -2.889948719219027e+02 2.155030307719242e+02 -1.227661082778765e+01 4 7.441670105447209e+02 5.168307519309257e+02 -5.256187519423792e+02 1.019580357211576e+02 - ME 3.377741088449004e-02 + ME 6.597373610109231e-03 Event 198 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1588,7 +1588,7 @@ Event 198 Batch 0 2 3.750236904637998e+02 1.183014344420310e+02 -1.005952209347265e+02 -3.413621838211424e+02 3 4.381296266085964e+02 -2.726825461625328e+02 1.003845461170281e+02 -3.279096546785175e+02 4 6.868466829276033e+02 1.543811117205018e+02 2.106748176980602e-01 6.692718384996598e+02 - ME 9.606390506705955e-04 + ME 6.145502577419889e-04 Event 199 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1596,7 +1596,7 @@ Event 199 Batch 0 2 2.454478562244572e+02 -2.058455361543722e+02 -1.131056012155068e+02 -7.126982772660261e+01 3 5.321797086694488e+02 -9.806778012582416e+01 -4.820333037417012e+02 -2.030808875905193e+02 4 7.223724351060940e+02 3.039133162801963e+02 5.951389049572081e+02 2.743507153171219e+02 - ME 1.577081887352965e-03 + ME 3.088173795554332e-04 Event 200 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1604,7 +1604,7 @@ Event 200 Batch 0 2 3.952431318363244e+02 3.031309873729303e+02 9.337877017948550e+01 2.358159092128122e+02 3 6.094031244332663e+02 -7.796753338981905e+01 -5.315426896439308e+02 -2.876727322709444e+02 4 4.953537437304092e+02 -2.251634539831113e+02 4.381639194644453e+02 5.185682305813224e+01 - ME 6.703240553489506e-05 + ME 1.668296552597111e-05 Event 201 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1612,7 +1612,7 @@ Event 201 Batch 0 2 6.497938633639732e+02 3.771120671245744e+02 3.553445817627057e+02 -3.921081252746440e+02 3 3.369790646193914e+02 -2.140351778515325e+02 1.061239955238163e+02 2.376584318047305e+02 4 5.132270720166357e+02 -1.630768892730420e+02 -4.614685772865220e+02 1.544496934699135e+02 - ME 6.283412004793947e-05 + ME 2.404518058628388e-05 Event 202 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1620,7 +1620,7 @@ Event 202 Batch 0 2 7.267802742470179e+02 6.523432021666289e+02 -1.481957728499301e+02 2.840702844913056e+02 3 3.546086620137576e+02 -3.102429173963679e+02 -5.939291787501398e+01 -1.611493614224694e+02 4 4.186110637392242e+02 -3.421002847702610e+02 2.075886907249440e+02 -1.229209230688360e+02 - ME 1.894138330341389e-04 + ME 2.830403199974809e-05 Event 203 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1628,7 +1628,7 @@ Event 203 Batch 0 2 4.830190702985662e+02 2.789429895135886e+02 -3.943102945050296e+02 -4.197918611657844e+00 3 5.247163710833165e+02 -4.266462829986153e+02 3.263988520595893e+01 3.037019215942698e+02 4 4.922645586181170e+02 1.477032934850268e+02 3.616704092990706e+02 -2.995040029826120e+02 - ME 5.831910678002871e-04 + ME 5.153190919865371e-05 Event 204 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1636,7 +1636,7 @@ Event 204 Batch 0 2 6.952375769935185e+02 3.823764713153302e+01 6.531840992713522e+02 -2.350397908115460e+02 3 6.250862947179036e+02 1.031861473443961e+02 -5.506835576815644e+02 2.771878679515999e+02 4 1.796761282885781e+02 -1.414237944759291e+02 -1.025005415897879e+02 -4.214807714005369e+01 - ME 1.802858800889920e-04 + ME 1.903000177287069e-05 Event 205 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1644,7 +1644,7 @@ Event 205 Batch 0 2 5.625197268936781e+02 2.955060596751036e+02 4.395356105446072e+02 -1.895074112086703e+02 3 3.144813194259642e+02 -1.941101430078122e+02 -7.073026664887073e+00 -2.473251401357733e+02 4 6.229989536803572e+02 -1.013959166672914e+02 -4.324625838797200e+02 4.368325513444433e+02 - ME 1.140145509231641e-04 + ME 3.163472493443465e-05 Event 206 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1652,7 +1652,7 @@ Event 206 Batch 0 2 5.487698581700869e+02 -4.771827558939671e+02 -2.639484985605369e+02 6.145050708573941e+01 3 4.357856725513919e+02 1.877155863290790e+02 1.701172104948722e+02 3.545872893148349e+02 4 5.154444692785200e+02 2.894671695648880e+02 9.383128806566407e+01 -4.160377964005746e+02 - ME 4.167786087259531e-03 + ME 3.341888001113221e-04 Event 207 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1660,7 +1660,7 @@ Event 207 Batch 0 2 5.289473514933904e+02 -3.230637718239221e+02 -3.258094337294262e+02 2.631792409740627e+02 3 3.730441408755686e+02 -1.145152671243400e+02 -7.298530142052728e+01 -3.474497523579300e+02 4 5.980085076310412e+02 4.375790389482623e+02 3.987947351499535e+02 8.427051138386733e+01 - ME 1.161501350367753e-04 + ME 3.789028948405571e-05 Event 208 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1668,7 +1668,7 @@ Event 208 Batch 0 2 3.144460531270953e+02 3.105028133645123e+02 -3.495125011961062e+01 3.525242310830974e+01 3 7.230517599976935e+02 -6.554206809343713e+02 2.220922910679198e+02 2.095294558946058e+02 4 4.625021868752117e+02 3.449178675698588e+02 -1.871410409483092e+02 -2.447818790029155e+02 - ME 4.858457850437588e-04 + ME 2.941989209837521e-05 Event 209 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1676,7 +1676,7 @@ Event 209 Batch 0 2 2.827014058170527e+02 -6.682954863774688e+01 -1.958656753088385e+02 -1.925890275057887e+02 3 5.969812148172332e+02 5.625717004655273e+02 1.060136244597389e+02 -1.692949027847388e+02 4 6.203173793657136e+02 -4.957421518277804e+02 8.985205084909943e+01 3.618839302905275e+02 - ME 1.004351001266980e-04 + ME 2.261939336541961e-05 Event 210 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1684,7 +1684,7 @@ Event 210 Batch 0 2 3.369223392964550e+02 -2.366581006943837e+02 8.850719545688517e+01 -2.228813191927023e+02 3 6.926279093100447e+02 9.835546321295956e+01 -1.581805884470998e+02 6.671120783270956e+02 4 4.704497513935005e+02 1.383026374814242e+02 6.967339299021461e+01 -4.442307591343933e+02 - ME 5.974710408786874e-02 + ME 3.044010300440331e-03 Event 211 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1692,7 +1692,7 @@ Event 211 Batch 0 2 5.754314663824422e+02 -1.965408456680789e+02 -5.399725108422632e+02 3.037689947684008e+01 3 6.656941886103589e+02 4.112771407945243e+02 5.114655840792436e+02 1.113679599883347e+02 4 2.588743450071987e+02 -2.147362951264454e+02 2.850692676301957e+01 -1.417448594651748e+02 - ME 4.382347812376007e-04 + ME 1.754510489093768e-05 Event 212 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1700,7 +1700,7 @@ Event 212 Batch 0 2 5.922157374848572e+02 8.073316194509509e+00 4.947261155542873e+02 -3.254233732830556e+02 3 3.635572903001510e+02 8.951663862813328e+01 4.011175755255380e+01 3.500738802669425e+02 4 5.442269722149914e+02 -9.758995482264278e+01 -5.348378731068407e+02 -2.465050698388706e+01 - ME 3.041427876287276e-04 + ME 1.919214373141161e-04 Event 213 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1708,7 +1708,7 @@ Event 213 Batch 0 2 7.434820262506830e+02 2.991548764052629e+02 2.111623598614188e+02 -6.470566753063675e+02 3 5.607612173038236e+02 -2.664197873565705e+02 -1.905271140771768e+02 4.551626726109781e+02 4 1.957567564454930e+02 -3.273508904869271e+01 -2.063524578424195e+01 1.918940026953895e+02 - ME 1.827786070323022e-04 + ME 1.896082550340891e-04 Event 214 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1716,7 +1716,7 @@ Event 214 Batch 0 2 5.400874280734793e+02 3.457358963402696e+02 2.445843697627679e+02 -3.351710101016577e+02 3 3.400793067879315e+02 1.482066942304564e+02 1.256466447865830e+02 2.791086371729012e+02 4 6.198332651385892e+02 -4.939425905707261e+02 -3.702310145493508e+02 5.606237292875651e+01 - ME 1.356968066378560e-04 + ME 6.515553919952984e-05 Event 215 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1724,7 +1724,7 @@ Event 215 Batch 0 2 3.916345321859864e+02 3.271767110560381e+02 -1.945589530122144e+02 9.208594000107233e+01 3 6.136750729169615e+02 -1.269585669220027e+02 2.644680756040779e+02 -5.390132228350478e+02 4 4.946903948970534e+02 -2.002181441340350e+02 -6.990912259186331e+01 4.469272828339764e+02 - ME 6.207321332343461e-05 + ME 3.427926940877871e-05 Event 216 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1732,7 +1732,7 @@ Event 216 Batch 0 2 3.767411090262154e+02 1.602503356822860e+02 2.758455349572533e+02 -2.004069210086422e+02 3 4.061922956351256e+02 3.340053729931861e+02 2.237650079776778e+02 5.798114391563544e+01 4 7.170665953386593e+02 -4.942557086754721e+02 -4.996105429349309e+02 1.424257770930068e+02 - ME 1.232271832865728e-03 + ME 2.360785017217177e-04 Event 217 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1740,7 +1740,7 @@ Event 217 Batch 0 2 6.474118977458852e+02 -5.378641111590873e+02 -3.279650037002520e+02 1.492759847325320e+02 3 5.088298200539713e+02 3.261878344469131e+02 1.555821256186315e+02 -3.581947579501665e+02 4 3.437582822001433e+02 2.116762767121744e+02 1.723828780816206e+02 2.089187732176345e+02 - ME 3.357118960820415e-05 + ME 1.388331578224744e-05 Event 218 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1748,7 +1748,7 @@ Event 218 Batch 0 2 6.658501161076259e+02 -6.577627036244854e+02 -3.020200479570956e+01 9.895676706252418e+01 3 2.516345839620714e+02 1.565221509782131e+02 -1.156477271957936e+02 1.595192254662914e+02 4 5.825152999303023e+02 5.012405526462722e+02 1.458497319915031e+02 -2.584759925288157e+02 - ME 5.956187308313417e-04 + ME 1.036808356896783e-04 Event 219 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1756,7 +1756,7 @@ Event 219 Batch 0 2 4.328556070633435e+02 6.122246558068494e+01 -1.687441385117925e+02 3.938796795879554e+02 3 6.500677455605621e+02 -3.703058656885360e+02 4.356876543064814e+02 -3.092537914719426e+02 4 4.170766473760945e+02 3.090834001078509e+02 -2.669435157946888e+02 -8.462588811601287e+01 - ME 2.797067114354785e-04 + ME 9.046106878448173e-05 Event 220 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1764,7 +1764,7 @@ Event 220 Batch 0 2 3.686297280598666e+02 -3.497113779929074e+02 -8.765282776369953e+01 7.685577594963354e+01 3 4.155522773953191e+02 -1.777404948015450e+02 -1.525848366500187e+02 3.432344379292750e+02 4 7.158179945448145e+02 5.274518727944524e+02 2.402376644137182e+02 -4.200902138789084e+02 - ME 3.485410710153060e-03 + ME 1.676729229638681e-03 Event 221 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1772,7 +1772,7 @@ Event 221 Batch 0 2 5.295220830718469e+02 3.654688468413813e+01 4.204675060608333e+02 3.197890523886257e+02 3 7.127556392876786e+02 -1.727486268095863e+02 -4.342549693537605e+02 -5.381460163035255e+02 4 2.577222776404743e+02 1.362017421254481e+02 1.378746329292729e+01 2.183569639148998e+02 - ME 2.819264207321091e-05 + ME 2.031931825964470e-05 Event 222 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1780,7 +1780,7 @@ Event 222 Batch 0 2 2.464305981122427e+02 -2.054199106396077e+02 6.127423271580306e+01 1.215572638876956e+02 3 6.926647117218595e+02 4.702892479611936e+02 3.872350261814336e+02 -3.296383785530530e+02 4 5.609046901658980e+02 -2.648693373215859e+02 -4.485092588972366e+02 2.080811146653574e+02 - ME 6.319142394583372e-05 + ME 1.678695785515194e-05 Event 223 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1788,7 +1788,7 @@ Event 223 Batch 0 2 2.463384302181125e+02 -1.209251938955738e+02 -2.140981972257043e+02 -1.488897673935926e+01 3 6.819620845265065e+02 -2.400891875757811e+02 5.819023806457059e+02 2.623339210620683e+02 4 5.716994852553812e+02 3.610143814713547e+02 -3.678041834200016e+02 -2.474449443227091e+02 - ME 3.931927185620913e-04 + ME 4.810915220985587e-05 Event 224 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1796,7 +1796,7 @@ Event 224 Batch 0 2 2.236851263016067e+02 -8.671871524968952e+01 1.717231909970332e+02 1.141317038679677e+02 3 5.308972974363861e+02 -3.715833295102001e+01 4.680039348616383e+02 2.478780257941054e+02 4 7.454175762620068e+02 1.238770482007099e+02 -6.397271258586715e+02 -3.620097296620728e+02 - ME 8.708656265179471e-02 + ME 6.017706528853119e-02 Event 225 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1804,7 +1804,7 @@ Event 225 Batch 0 2 5.094176014319268e+02 1.569347096242780e+02 -1.561291130928888e+00 -4.846394040251013e+02 3 7.252311334449815e+02 -3.845161955462210e+02 -4.374219820797174e+01 6.133466494377277e+02 4 2.653512651230916e+02 2.275814859219426e+02 4.530348933890067e+01 -1.287072454126262e+02 - ME 3.974215742688118e-04 + ME 1.151501859389029e-04 Event 226 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1812,7 +1812,7 @@ Event 226 Batch 0 2 6.863217264048350e+02 -2.391756120967483e+02 -6.171186323675804e+02 1.816511279850093e+02 3 5.332348374442744e+02 1.096335504493486e+02 4.112484130583279e+02 -3.212391931833643e+02 4 2.804434361508906e+02 1.295420616473995e+02 2.058702193092524e+02 1.395880651983551e+02 - ME 3.797053871351767e-05 + ME 1.438206074993319e-05 Event 227 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1820,7 +1820,7 @@ Event 227 Batch 0 2 7.243206345463230e+02 -5.280189925476210e+02 -1.406011303275692e+02 4.754657162080069e+02 3 5.487499634657129e+02 3.840442912861271e+02 -1.353123555187442e+01 -3.917312987222202e+02 4 2.269294019879644e+02 1.439747012614939e+02 1.541323658794436e+02 -8.373441748578679e+01 - ME 2.903986554770466e-04 + ME 5.165623507180856e-05 Event 228 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1828,7 +1828,7 @@ Event 228 Batch 0 2 2.119578664379945e+02 1.625437651479949e+01 -1.806612394559917e+02 1.096514885776142e+02 3 6.254097456672617e+02 -3.200704000326812e+01 3.158243706171928e+02 5.388579277416935e+02 4 6.626323878947439e+02 1.575266348846865e+01 -1.351631311612011e+02 -6.485094163193077e+02 - ME 8.951233069377997e-01 + ME 3.800526374221887e-02 Event 229 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1836,7 +1836,7 @@ Event 229 Batch 0 2 5.921227120343664e+02 -3.877491982207575e+02 4.449193714386763e+02 -4.802726626309342e+01 3 4.688278331283221e+02 3.470549659129084e+02 -1.517581364471262e+02 -2.762641051115459e+02 4 4.390494548373113e+02 4.069423230784909e+01 -2.931612349915501e+02 3.242913713746393e+02 - ME 3.492131538818778e-05 + ME 1.250052930035257e-05 Event 230 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1844,7 +1844,7 @@ Event 230 Batch 0 2 4.261952284727868e+02 2.153699775439378e+02 -1.171086083390750e+02 3.486312082969335e+02 3 3.540619701921573e+02 3.070144260847319e+01 1.307424531367546e+02 3.276029778648147e+02 4 7.197428013350559e+02 -2.460714201524109e+02 -1.363384479767965e+01 -6.762341861617483e+02 - ME 3.186738302883428e-01 + ME 4.711214236813061e-02 Event 231 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1852,7 +1852,7 @@ Event 231 Batch 0 2 4.205236024420392e+02 7.533931576750228e+01 -3.260217181731272e+02 -2.547036061581322e+02 3 5.397543491930860e+02 8.423195081267914e+01 -1.158376015978276e+02 5.204050211049134e+02 4 5.397220483648740e+02 -1.595712665801811e+02 4.418593197709548e+02 -2.657014149467809e+02 - ME 5.532186388062512e-04 + ME 3.265984123744224e-04 Event 232 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1860,7 +1860,7 @@ Event 232 Batch 0 2 4.295782852421121e+02 3.239064445356881e+02 9.240815775655221e-01 2.821724019337124e+02 3 7.183371274312143e+02 -6.155391061575082e+02 -1.955291718271078e+02 -3.144649112405858e+02 4 3.520845873266736e+02 2.916326616218201e+02 1.946050902495422e+02 3.229250930687335e+01 - ME 6.730603828970119e-05 + ME 1.049779024540051e-05 Event 233 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1868,7 +1868,7 @@ Event 233 Batch 0 2 3.640046126075324e+02 -2.220120664068515e+02 -1.165482463207536e+02 2.638683509799470e+02 3 4.682121509308883e+02 -1.009786196736112e+02 3.762431872847591e+02 2.597441061312976e+02 4 6.677832364615790e+02 3.229906860804628e+02 -2.596949409640055e+02 -5.236124571112447e+02 - ME 5.385640989777132e-03 + ME 7.598357868514145e-04 Event 234 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1876,7 +1876,7 @@ Event 234 Batch 0 2 8.690043548936441e+01 -2.607433849884744e+01 -7.258333015587984e+01 4.004341073848801e+01 3 6.785651905172676e+02 -3.574930335951373e+02 -4.725723606052789e+01 5.748184081539155e+02 4 7.345343739933678e+02 3.835673720939847e+02 1.198405662164078e+02 -6.148618188924036e+02 - ME 1.962113644780599e-01 + ME 8.152211059226219e-02 Event 235 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1884,7 +1884,7 @@ Event 235 Batch 0 2 3.000566282865331e+02 1.219146462304108e+01 -2.126850238006026e+02 2.113064812540423e+02 3 7.160981218147422e+02 2.575873756248088e+02 2.779062108697769e+02 -6.076293293985470e+02 4 4.838452498987246e+02 -2.697788402478500e+02 -6.522118706917435e+01 3.963228481445046e+02 - ME 3.940402333844027e-05 + ME 2.498899672933017e-05 Event 236 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1892,7 +1892,7 @@ Event 236 Batch 0 2 1.510518772182422e+02 -9.497518588910037e+01 1.467158067736534e+01 1.165380984781943e+02 3 6.955499852411461e+02 5.933480346078575e+02 3.495450158124774e+02 9.770452249822526e+01 4 6.533981375406115e+02 -4.983728487187572e+02 -3.642165964898426e+02 -2.142426209764196e+02 - ME 1.121647028585911e-03 + ME 2.623118294900277e-04 Event 237 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1900,7 +1900,7 @@ Event 237 Batch 0 2 2.173874152942701e+02 2.069918593916189e+02 -3.850229167793934e+01 -5.412237993169356e+01 3 7.305677895866185e+02 -6.701932224704495e+02 -2.421540700080861e+02 1.610333695687662e+02 4 5.520447951191120e+02 4.632013630788306e+02 2.806563616860255e+02 -1.069109896370727e+02 - ME 1.822378225061386e-04 + ME 2.170005261464319e-05 Event 238 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1908,7 +1908,7 @@ Event 238 Batch 0 2 6.349573912113930e+02 -3.336495545457479e+02 -4.785400196851591e+02 2.506956580500139e+02 3 5.768887318987100e+02 4.812119270965607e+02 2.334547330568691e+02 -2.161818165921041e+02 4 2.881538768898968e+02 -1.475623725508129e+02 2.450852866282900e+02 -3.451384145790988e+01 - ME 9.810731053503000e-05 + ME 1.383744831772315e-05 Event 239 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1916,7 +1916,7 @@ Event 239 Batch 0 2 5.349076725903783e+02 -5.331874414268931e+02 1.887721601290929e+01 -3.848403846142781e+01 3 3.658437465440003e+02 8.335465236419728e+01 1.670818061666301e+01 -3.558292926602242e+02 4 5.992485808656214e+02 4.498327890626960e+02 -3.558539662957234e+01 3.943133311216517e+02 - ME 9.226736931333760e-05 + ME 2.560110521983184e-05 Event 240 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1924,7 +1924,7 @@ Event 240 Batch 0 2 2.870582387324442e+02 1.830793600232297e+02 -1.562409872742485e+02 1.564389154054251e+02 3 6.007192677438852e+02 3.433229388031108e+02 4.688113613010560e+02 -1.523446941819630e+02 4 6.122224935236703e+02 -5.264022988263405e+02 -3.125703740268075e+02 -4.094221223461989e+00 - ME 1.424405912705748e-04 + ME 3.548113744927254e-05 Event 241 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1932,7 +1932,7 @@ Event 241 Batch 0 2 7.424696267657401e+02 4.823783107714221e+02 2.498315161211407e+02 5.061190823507636e+02 3 2.455726236162737e+02 -1.827879695947952e+02 -1.199757723946156e+02 -1.118046764652876e+02 4 5.119577496179861e+02 -2.995903411766270e+02 -1.298557437265251e+02 -3.943144058854759e+02 - ME 2.705973755259623e-03 + ME 2.366266620918590e-04 Event 242 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1940,7 +1940,7 @@ Event 242 Batch 0 2 7.249130370348905e+02 1.676828147928013e+02 6.059046362201677e+02 -3.609168279440810e+02 3 6.240672718074169e+02 -4.529413961306761e+01 -5.490982345027019e+02 2.930862151720549e+02 4 1.510196911576933e+02 -1.223886751797337e+02 -5.680640171746593e+01 6.783061277202641e+01 - ME 4.587322306592483e-05 + ME 1.668420503127583e-05 Event 243 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1948,7 +1948,7 @@ Event 243 Batch 0 2 4.655090712555229e+02 2.096323612054770e+02 2.113490506800235e+02 3.578890153850057e+02 3 5.764797256412519e+02 6.697224883641857e+01 -5.382210340689440e+02 -1.953502251008744e+02 4 4.580112031032257e+02 -2.766046100418949e+02 3.268719833889206e+02 -1.625387902841314e+02 - ME 2.309042201876567e-04 + ME 3.999521919602606e-05 Event 244 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1956,7 +1956,7 @@ Event 244 Batch 0 2 5.237109195354749e+02 1.305098338947756e+02 -4.868141165486322e+02 -1.423106687020528e+02 3 5.804450110242352e+02 -4.045654344879671e+02 2.643676733537771e+02 3.214855413949400e+02 4 3.958440694402901e+02 2.740556005931916e+02 2.224464431948551e+02 -1.791748726928872e+02 - ME 2.644202232750943e-04 + ME 2.634847163425152e-05 Event 245 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1964,7 +1964,7 @@ Event 245 Batch 0 2 2.629169357520612e+02 2.457511487795889e+02 -4.402365929491729e+01 -8.242333044139184e+01 3 6.931386101565748e+02 -5.195573187661655e+02 4.004017488088275e+02 -2.240084037645317e+02 4 5.439444540913644e+02 2.738061699865766e+02 -3.563780895139104e+02 3.064317342059234e+02 - ME 4.288053786412853e-05 + ME 1.052590061693975e-05 Event 246 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1972,7 +1972,7 @@ Event 246 Batch 0 2 6.300937687157445e+02 -5.459948028041557e+02 3.085954426748102e+02 6.063567799240802e+01 3 1.673910408536145e+02 -3.546130270298926e+01 7.662824936562275e+01 -1.445350060290698e+02 4 7.025151904306430e+02 5.814561055071442e+02 -3.852236920404341e+02 8.389932803666261e+01 - ME 6.282756509154168e-04 + ME 1.915763997923398e-04 Event 247 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1980,7 +1980,7 @@ Event 247 Batch 0 2 2.577847506495701e+02 2.418237207037818e+02 -8.449121421856779e+01 2.890502538162603e+01 3 5.130193185035739e+02 4.381905811488919e+02 1.366496386102691e+02 2.291390669832418e+02 4 7.291959308468561e+02 -6.800143018526737e+02 -5.215842439170134e+01 -2.580440923648679e+02 - ME 4.005872724472581e-03 + ME 1.831864018495938e-03 Event 248 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1988,7 +1988,7 @@ Event 248 Batch 0 2 7.033207479153643e+02 -5.040306065309413e+02 -2.020637997366072e+02 4.469714117975369e+02 3 1.758360012551320e+02 -1.471306652922549e+01 -4.035460943683606e+00 -1.751728862172264e+02 4 6.208432508295037e+02 5.187436730601667e+02 2.060992606802909e+02 -2.717985255803103e+02 - ME 5.592865021063005e-04 + ME 1.512538512828554e-04 Event 249 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1996,7 +1996,7 @@ Event 249 Batch 0 2 3.018816177222694e+02 5.523075638651412e+01 1.752331212074551e+02 2.395316845419020e+02 3 6.597415560701297e+02 6.315352823685419e+01 -6.561001191322722e+02 -2.834054254405022e+01 4 5.383768262076012e+02 -1.183842846233684e+02 4.808669979248172e+02 -2.111911419978518e+02 - ME 4.868100986861644e-04 + ME 9.225490912808109e-05 Event 250 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2004,7 +2004,7 @@ Event 250 Batch 0 2 2.166381935101301e+02 -1.289072913913530e+02 -1.189615590004073e+02 -1.271344351215279e+02 3 6.815426093761062e+02 -2.511966318704653e+02 5.323234433390903e+02 3.435583388650892e+02 4 6.018191971137635e+02 3.801039232618182e+02 -4.133618843386827e+02 -2.164239037435611e+02 - ME 3.468666532553966e-04 + ME 6.586594805989363e-05 Event 251 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2012,7 +2012,7 @@ Event 251 Batch 0 2 6.676961532387151e+02 -3.991265595084280e+01 -4.419965947723094e+02 4.988628500443886e+02 3 7.150412702460949e+02 3.921851524844908e+01 5.505653759000154e+02 -4.545587894617490e+02 4 1.172625765151894e+02 6.941407023942340e-01 -1.085687811277060e+02 -4.430406058263954e+01 - ME 5.615833562023813e-04 + ME 4.930952510857648e-05 Event 252 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2020,7 +2020,7 @@ Event 252 Batch 0 2 2.112668789066533e+02 -1.147554660376938e+02 3.364589711187055e+01 -1.741632301749357e+02 3 7.393007599584276e+02 2.529046383258835e+02 -3.593132473314827e+02 5.945576909606565e+02 4 5.494323611349191e+02 -1.381491722881897e+02 3.256673502196121e+02 -4.203944607857206e+02 - ME 2.709805393201018e-03 + ME 3.541023077707110e-04 Event 253 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2028,7 +2028,7 @@ Event 253 Batch 0 2 7.299659304470913e+01 -4.405884533650594e+01 -5.451291667290519e+01 2.038780663930336e+01 3 7.253475305576840e+02 3.245698054519170e+02 -1.402290280555607e+02 -6.333397991328418e+02 4 7.016558763976062e+02 -2.805109601154107e+02 1.947419447284657e+02 6.129519924935382e+02 - ME 6.484723438037138e-04 + ME 3.511004874943257e-04 Event 254 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2036,7 +2036,7 @@ Event 254 Batch 0 2 1.982520535096858e+02 -6.164633378269741e+01 1.773450413210087e+02 -6.365801262063783e+01 3 7.183815394471145e+02 -1.984891252513599e+02 -6.893152145826987e+02 -3.896971029099802e+01 4 5.833664070431995e+02 2.601354590340572e+02 5.119701732616900e+02 1.026277229116358e+02 - ME 9.210498573936143e-05 + ME 1.539519794804785e-05 Event 255 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2044,7 +2044,7 @@ Event 255 Batch 0 2 5.347080663542586e+02 -5.063606624096446e+02 1.592577719822621e+02 6.440929941880935e+01 3 2.475406015289465e+02 -1.856063881081879e+02 3.468010668896048e+00 -1.637516137347836e+02 4 7.177513321167953e+02 6.919670505178326e+02 -1.627257826511582e+02 9.934231431597431e+01 - ME 1.305481727349711e-03 + ME 3.137689362725149e-04 Event 0 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2052,7 +2052,7 @@ Event 0 Batch 1 2 5.775677821222389e+02 4.314431287975208e+02 -2.652567205762379e+02 -2.776332864556192e+02 3 6.023469575940325e+02 -3.228069847179709e+02 5.005558924007591e+02 8.978477890465942e+01 4 3.200852602837275e+02 -1.086361440795499e+02 -2.352991718245218e+02 1.878485075509607e+02 - ME 2.846168667868940e-05 + ME 7.533072458757011e-06 Event 1 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2060,7 +2060,7 @@ Event 1 Batch 1 2 7.241206267812560e+02 3.541578305635416e+02 -4.894807402105655e+02 3.991635230623179e+02 3 7.375567605136832e+02 -3.903081173548693e+02 4.920451519627784e+02 -3.867054653560791e+02 4 3.832261270506111e+01 3.615028679132773e+01 -2.564411752212873e+00 -1.245805770623896e+01 - ME 1.002871021831580e-03 + ME 7.043932941624384e-05 Event 2 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2068,7 +2068,7 @@ Event 2 Batch 1 2 4.849204091734790e+02 2.108660079931152e+02 4.054727376659824e+02 1.620962335024329e+02 3 2.728468517759738e+02 4.961449545460115e+01 2.005017763154939e+02 1.782774356422519e+02 4 7.422327390505470e+02 -2.604805034477164e+02 -6.059745139814763e+02 -3.403736691446848e+02 - ME 2.729395913593408e-02 + ME 1.721146206228212e-02 Event 3 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2076,7 +2076,7 @@ Event 3 Batch 1 2 4.264155576764489e+02 -4.170952165204416e+02 -7.054834331799705e+01 5.370977042744418e+01 3 7.108631972082329e+02 6.832597695609467e+02 -1.727180704166534e+02 -9.301097030017993e+01 4 3.627212451153183e+02 -2.661645530405051e+02 2.432664137346505e+02 3.930119987273574e+01 - ME 5.466137525204964e-05 + ME 5.739226791327231e-06 Event 4 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2084,7 +2084,7 @@ Event 4 Batch 1 2 7.183269968238449e+02 -3.584978055671311e+02 -5.048824553914336e+02 -3.640971079361008e+02 3 7.387431276480253e+02 4.013538934928407e+02 5.036810263913359e+02 3.618865629982628e+02 4 4.292987552812846e+01 -4.285608792570924e+01 1.201429000097643e+00 2.210544937839338e+00 - ME 3.145606575501715e-04 + ME 5.884725836744927e-05 Event 5 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2092,7 +2092,7 @@ Event 5 Batch 1 2 4.529780005473896e+02 -8.443182436392424e+01 4.445408460134587e+02 -2.106590230986445e+01 3 4.683757780543924e+02 -6.076819021151039e+01 -1.335482427838441e+02 -4.448010379662153e+02 4 5.786462213982179e+02 1.452000145754347e+02 -3.109926032296145e+02 4.658669402760799e+02 - ME 8.481958952475706e-05 + ME 2.851579396246287e-05 Event 6 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2100,7 +2100,7 @@ Event 6 Batch 1 2 6.238848262005389e+02 -1.065131260140052e+02 -4.741487807795934e+02 -3.912418229627633e+02 3 1.729069432107234e+02 -1.460869767542721e+02 -8.199113358821990e+01 4.281191710484079e+01 4 7.032082305887380e+02 2.526001027682771e+02 5.561399143678132e+02 3.484299058579224e+02 - ME 4.868510537699180e-04 + ME 1.468701510222534e-04 Event 7 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2108,7 +2108,7 @@ Event 7 Batch 1 2 6.977203086376783e+02 -6.126072843634399e+02 -1.744636661244187e+02 2.847602033865263e+02 3 1.614193396272251e+02 -4.571584237043670e+00 8.497734613495712e+01 -1.371646983269120e+02 4 6.408603517350967e+02 6.171788686004836e+02 8.948631998946138e+01 -1.475955050596143e+02 - ME 3.540796080305845e-04 + ME 9.523334397108766e-05 Event 8 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2116,7 +2116,7 @@ Event 8 Batch 1 2 6.871091945484288e+02 4.059708628308462e+02 2.886614153103366e+02 4.732666173272762e+02 3 5.653302025665631e+02 -2.838835484844413e+02 -7.353399035097291e+01 -4.833229987253825e+02 4 2.475606028850081e+02 -1.220873143464048e+02 -2.151274249593637e+02 1.005638139810634e+01 - ME 8.785466054587446e-05 + ME 3.726341895116938e-05 Event 9 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2124,7 +2124,7 @@ Event 9 Batch 1 2 1.618579955503452e+02 1.385215220188489e+01 1.601201234527701e+02 -1.917484467788566e+01 3 7.196660585644588e+02 -4.527189715496824e+02 -4.214090439733052e+02 3.679391067910628e+02 4 6.184759458851959e+02 4.388668193477974e+02 2.612889205205349e+02 -3.487642621131772e+02 - ME 1.054640649369016e-03 + ME 1.276556148007894e-04 Event 10 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2132,7 +2132,7 @@ Event 10 Batch 1 2 7.832785200561162e+01 1.027681340851886e+01 -7.242726264265977e+01 -2.799877018853974e+01 3 7.448007230566494e+02 2.520540107528716e+02 6.813719334665398e+02 1.641011304445167e+02 4 6.768714249377393e+02 -2.623308241613905e+02 -6.089446708238800e+02 -1.361023602559769e+02 - ME 5.876642887714617e-04 + ME 1.087112534498832e-04 Event 11 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2140,7 +2140,7 @@ Event 11 Batch 1 2 5.478627446486676e+02 2.070882322301630e+02 -4.708081692757452e+02 1.887000762823861e+02 3 6.997827604382593e+02 -4.209013422316021e+02 4.569873120768409e+02 -3.220257264800591e+02 4 2.523544949130733e+02 2.138131100014392e+02 1.382085719890436e+01 1.333256501976729e+02 - ME 2.703695959900953e-05 + ME 7.092902148917371e-06 Event 12 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2148,7 +2148,7 @@ Event 12 Batch 1 2 5.802868936311938e+02 -4.467002255894120e+01 5.211262762381961e+02 -2.513262266832405e+02 3 5.208038834706859e+02 2.151797013176283e+01 -4.993650129388666e+02 -1.463155694111945e+02 4 3.989092228981199e+02 2.315205242717860e+01 -2.176126329932955e+01 3.976417960944350e+02 - ME 5.046437564325244e-04 + ME 4.980323856672599e-04 Event 13 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2156,7 +2156,7 @@ Event 13 Batch 1 2 5.774880087360024e+02 1.576445054854711e+02 5.481077151088400e+02 -9.065617884226717e+01 3 5.915098138161557e+02 -3.018001633277128e+02 -3.808656371901898e+02 3.372564123391869e+02 4 3.310021774478421e+02 1.441556578422419e+02 -1.672420779186502e+02 -2.466002334969197e+02 - ME 1.505341700965184e-03 + ME 5.587942683639647e-05 Event 14 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2164,7 +2164,7 @@ Event 14 Batch 1 2 2.531797527967491e+02 -8.400833666640553e+01 -2.384535242035555e+02 -1.350938161690895e+01 3 5.261064571264828e+02 -1.751971590790252e+02 -3.334570051994592e+02 3.672878780523887e+02 4 7.207137900767681e+02 2.592054957454308e+02 5.719105294030147e+02 -3.537784964354798e+02 - ME 3.373121845959189e-03 + ME 1.659114310450813e-03 Event 15 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2172,7 +2172,7 @@ Event 15 Batch 1 2 4.605848765362425e+02 3.563504404614684e+02 1.735853700506503e+02 2.345653669687875e+02 3 4.216445088607453e+02 1.370719005416187e+02 -3.933730877164850e+02 6.521502736890037e+01 4 6.177706146030118e+02 -4.934223410030871e+02 2.197877176658347e+02 -2.997803943376878e+02 - ME 4.613631402771334e-04 + ME 9.110622752737525e-05 Event 16 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2180,7 +2180,7 @@ Event 16 Batch 1 2 4.972484926572777e+02 -1.474122335888775e+02 -4.748950276275915e+02 -6.399787981958280e-01 3 5.072511849723048e+02 4.846784046822065e+02 1.224000792205880e+02 -8.607455661990267e+01 4 4.955003223704169e+02 -3.372661710933285e+02 3.524949484070036e+02 8.671453541809866e+01 - ME 5.856804747367533e-05 + ME 1.035537635543116e-05 Event 17 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2188,7 +2188,7 @@ Event 17 Batch 1 2 3.182636773520259e+02 -9.176062613973060e+01 -1.890905041641619e+02 2.389906630959087e+02 3 6.376303990615819e+02 -4.240378519397394e+02 2.706855745366566e+02 -3.917827786765570e+02 4 5.441059235863918e+02 5.157984780794702e+02 -8.159507037249479e+01 1.527921155806483e+02 - ME 7.445984612273079e-05 + ME 2.964570775197734e-05 Event 18 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2196,7 +2196,7 @@ Event 18 Batch 1 2 5.532560008158404e+02 -4.148613005881325e+02 1.689647846464811e+02 -3.247047971041214e+02 3 3.650144721835348e+02 -1.597348634907620e+02 -2.160675866909894e+02 2.470529017650751e+02 4 5.817295270006244e+02 5.745961640788944e+02 4.710280204450838e+01 7.765189533904635e+01 - ME 9.119298978738387e-05 + ME 3.148325734685632e-05 Event 19 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2204,7 +2204,7 @@ Event 19 Batch 1 2 3.263687475619531e+02 -1.904667433734991e+02 2.390747946355329e+02 -1.143775398573919e+02 3 7.331345945903582e+02 2.597391859223821e+02 -6.739404183465077e+02 1.258022320965774e+02 4 4.404966578476884e+02 -6.927244254888298e+01 4.348656237109747e+02 -1.142469223918529e+01 - ME 8.793129888044293e-05 + ME 9.665339952809457e-06 Event 20 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2212,7 +2212,7 @@ Event 20 Batch 1 2 9.588718605412237e+01 4.259536217794532e+01 8.056474827260676e+01 -2.982128277051557e+01 3 7.250265356668370e+02 3.120913743414047e+02 -4.446787057645155e+02 4.801284204484703e+02 4 6.790862782790414e+02 -3.546867365193502e+02 3.641139574919093e+02 -4.503071376779550e+02 - ME 3.686389281265799e-03 + ME 6.402422614019696e-04 Event 21 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2220,7 +2220,7 @@ Event 21 Batch 1 2 1.825278201605081e+02 -1.533737674675502e+02 8.574830442242751e+01 4.939757963742074e+01 3 7.183016103669913e+02 1.713205736990392e+02 -6.275703015775031e+02 -3.045685162014731e+02 4 5.991705694725008e+02 -1.794680623148897e+01 5.418219971550755e+02 2.551709365640523e+02 - ME 7.470861105912214e-05 + ME 1.806434468406198e-05 Event 22 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2228,7 +2228,7 @@ Event 22 Batch 1 2 2.349542451120770e+02 9.235159917618290e+01 -2.156570331301489e+02 -1.291214495308476e+01 3 7.360601907662837e+02 -2.182033070539752e+02 6.568866822530020e+02 -2.503433799808774e+02 4 5.289855641216395e+02 1.258517078777923e+02 -4.412296491228531e+02 2.632555249339621e+02 - ME 3.893602972207037e-05 + ME 8.007442232312076e-06 Event 23 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2236,7 +2236,7 @@ Event 23 Batch 1 2 2.350908908124364e+02 -7.377772511691019e+00 -2.298431804723787e+02 -4.884063683135331e+01 3 6.797114625392685e+02 -5.485955088721076e+02 3.603976926464840e+02 1.765336882516069e+02 4 5.851976466482949e+02 5.559732813837987e+02 -1.305545121741055e+02 -1.276930514202538e+02 - ME 2.057468423101862e-04 + ME 3.185713653214173e-05 Event 24 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2244,7 +2244,7 @@ Event 24 Batch 1 2 4.355364173804401e+02 2.538053291625626e+02 -2.665393838801487e+02 -2.328767540869265e+02 3 4.093863144993796e+02 -1.953012891316528e+02 -3.573484670764558e+02 4.191221827828568e+01 4 6.550772681201798e+02 -5.850404003090968e+01 6.238878509566048e+02 1.909645358086408e+02 - ME 1.895168702655672e-04 + ME 3.721637657688893e-05 Event 25 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2252,7 +2252,7 @@ Event 25 Batch 1 2 7.365386968907909e+02 3.875876454009267e+02 3.151568854896985e+02 5.412404333367775e+02 3 5.208510884285567e+02 -2.430585576296288e+02 -1.518636440371932e+02 -4.349089876054084e+02 4 2.426102146806534e+02 -1.445290877712977e+02 -1.632932414525050e+02 -1.063314457313693e+02 - ME 3.717867207603688e-04 + ME 7.982561935336398e-05 Event 26 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2260,7 +2260,7 @@ Event 26 Batch 1 2 7.198867014174701e+02 5.189601929589824e+02 4.797253921416957e+02 -1.370428003807496e+02 3 3.889101953712928e+02 -1.847394503243419e+02 -2.837815501141775e+02 1.912864537085460e+02 4 3.912031032112371e+02 -3.342207426346404e+02 -1.959438420275183e+02 -5.424365332779646e+01 - ME 1.222836766708484e-04 + ME 1.928349098758061e-05 Event 27 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2268,7 +2268,7 @@ Event 27 Batch 1 2 6.732032222628646e+02 5.870808395006010e+02 -9.126179303429218e+01 3.165595544104447e+02 3 1.177373967283342e+02 7.847176641415683e+01 5.304379211899001e+00 -8.761358356661104e+01 4 7.090593810088013e+02 -6.655526059147578e+02 8.595741382239324e+01 -2.289459708438336e+02 - ME 1.603290018002586e-03 + ME 6.795383824785976e-04 Event 28 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2276,7 +2276,7 @@ Event 28 Batch 1 2 6.475300414228806e+02 3.136396845517189e+02 3.816259196370642e+02 -4.186728559156669e+02 3 7.290923529036073e+02 -2.791764769994177e+02 -4.112865540505715e+02 5.333662195995520e+02 4 1.233776056735125e+02 -3.446320755230100e+01 2.966063441350738e+01 -1.146933636838856e+02 - ME 5.037107889244314e-02 + ME 6.311296815400830e-04 Event 29 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2284,7 +2284,7 @@ Event 29 Batch 1 2 3.156754590345620e+02 -2.870540678871016e+02 4.159516713841874e+01 -1.245825012466667e+02 3 4.770060274033896e+02 -2.355061130652810e+02 -3.231858413754910e+02 -2.600433287405434e+02 4 7.073185135620483e+02 5.225601809523826e+02 2.815906742370723e+02 3.846258299872100e+02 - ME 7.956699356695784e-04 + ME 1.321807869823317e-04 Event 30 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2292,7 +2292,7 @@ Event 30 Batch 1 2 6.091290614220995e+02 1.543004089904798e+02 4.216196287493766e+00 -5.892468251447810e+02 3 2.079357839022729e+02 2.034647466922837e+02 4.185675980476618e+01 9.348729279626889e+00 4 6.829351546756266e+02 -3.577651556827627e+02 -4.607295609226003e+01 5.798980958651539e+02 - ME 3.902231064020147e-04 + ME 1.448382779935031e-04 Event 31 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2300,7 +2300,7 @@ Event 31 Batch 1 2 6.901710072855793e+02 1.433309098684656e+01 6.447948515477649e+02 -2.457034416076623e+02 3 5.898919363861644e+02 1.120085307876391e+02 -4.815950471622465e+02 3.217029626736535e+02 4 2.199370563282564e+02 -1.263416217744856e+02 -1.631998043855182e+02 -7.599952106599136e+01 - ME 2.415465849322543e-04 + ME 2.376400497996635e-05 Event 32 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2308,7 +2308,7 @@ Event 32 Batch 1 2 6.144498311923271e+02 5.832947925341469e+02 -1.925283703230110e+02 1.576726595169125e+01 3 2.478450424037004e+02 5.004284035329792e+01 2.389954177960992e+02 4.247433867565734e+01 4 6.377051264039724e+02 -6.333376328874447e+02 -4.646704747308818e+01 -5.824160462734862e+01 - ME 2.160220890176678e-04 + ME 5.390650629646604e-05 Event 33 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2316,7 +2316,7 @@ Event 33 Batch 1 2 6.134536717469736e+02 -1.625429495269566e+02 -1.853973484494194e+02 5.617232593785355e+02 3 5.361644687950269e+02 -3.755831293394986e+01 -9.992652347025609e+01 -5.254297294928764e+02 4 3.503818594579993e+02 2.001012624609065e+02 2.853238719196754e+02 -3.629352988565911e+01 - ME 1.224582992507153e-04 + ME 1.005452860076771e-04 Event 34 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2324,7 +2324,7 @@ Event 34 Batch 1 2 3.840838099420727e+02 -2.442269925519278e+02 -3.827314394217582e+01 -2.939535943332559e+02 3 6.022630974514659e+02 3.956891925431131e+01 5.086724982658299e+02 3.200116071158652e+02 4 5.136530926064613e+02 2.046580732976165e+02 -4.703993543236541e+02 -2.605801278260916e+01 - ME 9.608243105510499e-05 + ME 2.313941306740064e-05 Event 35 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2332,7 +2332,7 @@ Event 35 Batch 1 2 3.454350783663418e+02 -3.439607925797615e+02 2.363778141880094e+01 -2.139209721976717e+01 3 6.705698302143294e+02 5.215327591153251e+02 4.060443141865528e+02 -1.131171661597076e+02 4 4.839950914193290e+02 -1.775719665355635e+02 -4.296820956053536e+02 1.345092633794747e+02 - ME 4.862206803317224e-05 + ME 7.982017052260048e-06 Event 36 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2340,7 +2340,7 @@ Event 36 Batch 1 2 7.098652154429357e+02 2.489290984574327e+02 -1.674080692141068e+02 -6.433641786725617e+02 3 6.178479130357197e+02 -1.435715807033598e+02 2.588953561477193e+02 5.423065917191846e+02 4 1.722868715213448e+02 -1.053575177540730e+02 -9.148728693361247e+01 1.010575869533772e+02 - ME 6.680529568232270e-05 + ME 5.562249548714765e-05 Event 37 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2348,7 +2348,7 @@ Event 37 Batch 1 2 6.906872786346031e+02 1.495946561071237e+02 1.712833879510068e+02 6.521750966909805e+02 3 3.682276595245592e+02 -1.358558710218083e+02 1.194309698061993e+02 -3.207351477449753e+02 4 4.410850618408380e+02 -1.373878508531530e+01 -2.907143577572061e+02 -3.314399489460051e+02 - ME 2.014943348935539e-03 + ME 5.542438863722841e-04 Event 38 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2356,7 +2356,7 @@ Event 38 Batch 1 2 6.131720166645955e+02 -5.222102655174087e+02 6.340623138461877e+00 3.213038392347352e+02 3 4.540063357567760e+02 2.932429176443922e+02 -3.207297067242505e+02 -1.313879727496968e+02 4 4.328216475786277e+02 2.289673478730168e+02 3.143890835857886e+02 -1.899158664850380e+02 - ME 2.589645049118943e-04 + ME 3.150821423911933e-05 Event 39 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2364,7 +2364,7 @@ Event 39 Batch 1 2 2.929747896182304e+02 2.510117592312210e+02 -1.378648144805472e+02 6.181113983529403e+01 3 6.287164314722783e+02 3.864928360025993e+01 6.254120614625328e+02 5.148142827864510e+01 4 5.783087789094894e+02 -2.896610428314818e+02 -4.875472469819856e+02 -1.132925681139394e+02 - ME 1.708238325115053e-04 + ME 2.723120294663496e-05 Event 40 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2372,7 +2372,7 @@ Event 40 Batch 1 2 1.143487538112954e+02 -3.203572478439017e+01 1.022340126870988e+02 3.996944439980560e+01 3 7.361483923235807e+02 5.924235295921244e+02 -3.838567751530157e+02 -2.088128187524163e+02 4 6.495028538651248e+02 -5.603878048077345e+02 2.816227624659169e+02 1.688433743526105e+02 - ME 2.026369815874481e-04 + ME 4.279185076498264e-05 Event 41 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2380,7 +2380,7 @@ Event 41 Batch 1 2 6.384898508133350e+02 5.540399192408263e+02 -3.014826159773289e+02 -9.908223727147148e+01 3 3.510407251698805e+02 -1.719168197014114e+02 2.065966849440144e+02 -2.258140996521069e+02 4 5.104694240167846e+02 -3.821230995394149e+02 9.488593103331458e+01 3.248963369235784e+02 - ME 4.455092331482675e-05 + ME 1.488395965626735e-05 Event 42 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2388,7 +2388,7 @@ Event 42 Batch 1 2 3.291654598309212e+02 -1.090829060981258e+02 2.972891943885482e+02 -8.983292515941632e+01 3 6.884965239796815e+02 4.933628807557017e+02 -2.919492821202986e+02 3.812953554581829e+02 4 4.823380161893969e+02 -3.842799746575757e+02 -5.339912268249619e+00 -2.914624302987665e+02 - ME 6.690811667999076e-04 + ME 5.767145017550451e-05 Event 43 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2396,7 +2396,7 @@ Event 43 Batch 1 2 3.674173006007981e+02 2.791827424102563e+02 1.079644067383057e+02 2.130637369397045e+02 3 7.392205647816575e+02 -6.110484627794917e+02 -4.247874240022372e+01 -4.138385868609020e+02 4 3.933621346175442e+02 3.318657203692355e+02 -6.548566433808202e+01 2.007748499211975e+02 - ME 2.734436884563990e-05 + ME 6.513986915725277e-06 Event 44 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2404,7 +2404,7 @@ Event 44 Batch 1 2 2.081359682230012e+02 -1.082501549908087e+02 1.771964605001424e+02 1.427934167997762e+01 3 7.449563315308093e+02 5.092828751965591e+02 -5.388739609944279e+02 7.215083562608928e+01 4 5.469077002461893e+02 -4.010327202057504e+02 3.616775004942854e+02 -8.643017730606689e+01 - ME 1.760644262839344e-04 + ME 1.838899544278803e-05 Event 45 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2412,7 +2412,7 @@ Event 45 Batch 1 2 5.180982465404422e+02 4.470261481799612e+02 -3.368837017252423e+01 -2.597277606009553e+02 3 3.377595659674062e+02 -7.316527185649456e+01 2.454727770679006e+02 -2.201624016839132e+02 4 6.441421874921515e+02 -3.738608763234666e+02 -2.117844068953763e+02 4.798901622848684e+02 - ME 1.645403798734011e-04 + ME 4.091340785269233e-05 Event 46 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2420,7 +2420,7 @@ Event 46 Batch 1 2 6.296560291524888e+02 2.172411497655985e+02 5.821614514430422e+02 -1.017892054705761e+02 3 6.224001894826197e+02 1.405102091633609e+01 -6.218608257778048e+02 2.176414579432105e+01 4 2.479437813648912e+02 -2.312921706819346e+02 3.969937433476264e+01 8.002505967625511e+01 - ME 4.041878897626609e-05 + ME 7.434320230190137e-06 Event 47 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2428,7 +2428,7 @@ Event 47 Batch 1 2 5.458843469271557e+02 -1.019033861791133e+02 -1.559739004096151e+02 5.131058004898495e+02 3 2.573134207008558e+02 6.791700498899543e+01 -2.412204887508016e+02 5.839651284901167e+01 4 6.968022323719882e+02 3.398638119011781e+01 3.971943891604168e+02 -5.715023133388611e+02 - ME 1.408798022766008e-02 + ME 4.005478861198618e-03 Event 48 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2436,7 +2436,7 @@ Event 48 Batch 1 2 6.623920218006384e+02 -6.284562032939594e+02 -1.837527125398962e+02 -1.002044496053409e+02 3 1.251779629744606e+02 -7.502448682133647e+01 9.550779386908961e+01 3.031682869117444e+01 4 7.124300152249010e+02 7.034806901152959e+02 8.824491867080658e+01 6.988762091416655e+01 - ME 8.682321044518227e-04 + ME 3.004757451335502e-04 Event 49 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2444,7 +2444,7 @@ Event 49 Batch 1 2 2.397494808364364e+02 2.393958238941666e+02 -4.144666783354266e+00 -1.233996761053010e+01 3 6.782491241100328e+02 -3.516321535544010e+02 -2.705899831712919e+02 5.129890485673947e+02 4 5.820013950535307e+02 1.122363296602344e+02 2.747346499546462e+02 -5.006490809568646e+02 - ME 9.041285542966720e-03 + ME 6.040872325723622e-04 Event 50 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2452,7 +2452,7 @@ Event 50 Batch 1 2 4.764898792162554e+02 4.667163214316568e+02 5.900817880915086e+01 -7.573978570375913e+01 3 5.114228101321805e+02 -2.035689445851523e+02 -4.549677995197112e+02 -1.145306811477843e+02 4 5.120873106515638e+02 -2.631473768465044e+02 3.959596207105603e+02 1.902704668515434e+02 - ME 5.157319121365441e-05 + ME 9.692662313613028e-06 Event 51 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2460,7 +2460,7 @@ Event 51 Batch 1 2 4.678795643859630e+02 4.629737719234085e+02 5.365495313512251e+01 4.108186077915564e+01 3 6.311645871918951e+02 -4.500610707732837e+02 -4.345770688214700e+02 8.340587481742408e+01 4 4.009558484221416e+02 -1.291270115012470e+01 3.809221156863474e+02 -1.244877355965797e+02 - ME 1.517985021504320e-04 + ME 1.293558494013996e-05 Event 52 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2468,7 +2468,7 @@ Event 52 Batch 1 2 3.696230029266819e+02 2.516704934433110e+02 2.514038675722595e+02 1.003953305301004e+02 3 6.696174214325739e+02 -2.754912388418390e+01 -6.493999246431116e+02 -1.609604756850079e+02 4 4.607595756407442e+02 -2.241213695591271e+02 3.979960570708519e+02 6.056514515490756e+01 - ME 5.727699238559496e-05 + ME 8.655753222194317e-06 Event 53 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2476,7 +2476,7 @@ Event 53 Batch 1 2 7.284624742442375e+01 -4.271742504396477e+01 -2.683807109937144e+01 -5.255012179908527e+01 3 7.493542950735829e+02 3.356513586119740e+02 2.501807367708783e+02 6.215139772812374e+02 4 6.777994575019936e+02 -2.929339335680093e+02 -2.233426656715069e+02 -5.689638554821522e+02 - ME 1.612275481129464e-02 + ME 2.372423861687152e-03 Event 54 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2484,7 +2484,7 @@ Event 54 Batch 1 2 7.460259847230064e+02 2.055186857047568e+01 6.233229443227743e+02 4.093908861479223e+02 3 5.756222844616437e+02 2.606063779094539e+01 -4.696411468594731e+02 -3.318117699890848e+02 4 1.783517308153497e+02 -4.661250636142109e+01 -1.536817974633012e+02 -7.757911615883735e+01 - ME 4.374243668355642e-04 + ME 5.046268590690708e-05 Event 55 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2492,7 +2492,7 @@ Event 55 Batch 1 2 5.967428482894213e+02 -8.165820254184375e+01 5.098287527914877e+02 -2.991798919868828e+02 3 5.942526243827265e+02 5.606061544962815e+01 -2.905196430116550e+02 5.153559216750568e+02 4 3.090045273278509e+02 2.559758709221549e+01 -2.193091097798325e+02 -2.161760296881746e+02 - ME 1.779007466146034e-03 + ME 1.849048785615045e-04 Event 56 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2500,7 +2500,7 @@ Event 56 Batch 1 2 5.610874267302015e+02 -4.199055433713192e+02 3.580252469767042e+02 1.015694718309908e+02 3 6.303091265298390e+02 2.130872195586830e+02 -5.453843477211296e+02 -2.333224059286980e+02 4 3.086034467399593e+02 2.068183238126362e+02 1.873591007444254e+02 1.317529340977073e+02 - ME 3.258989367177766e-05 + ME 7.213009143835112e-06 Event 57 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2508,7 +2508,7 @@ Event 57 Batch 1 2 6.552053965855981e+02 4.516249927537604e+02 7.110694105335197e+00 4.746350341729917e+02 3 6.035190443408458e+02 -3.717228873476765e+02 2.148772607224587e+02 -4.241286299324850e+02 4 2.412755590735562e+02 -7.990210540608396e+01 -2.219879548277939e+02 -5.050640424050685e+01 - ME 1.623545585873121e-04 + ME 3.752873989265266e-05 Event 58 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2516,7 +2516,7 @@ Event 58 Batch 1 2 2.959982971085279e+02 1.850007048157144e+02 -2.304987961744356e+02 1.612563397119956e+01 3 7.018897389129390e+02 -3.764226030262936e+02 4.376344751014918e+02 3.992884868423144e+02 4 5.021119639785326e+02 1.914218982105791e+02 -2.071356789270567e+02 -4.154141208135139e+02 - ME 4.558573859477246e-03 + ME 1.901193343270815e-04 Event 59 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2524,7 +2524,7 @@ Event 59 Batch 1 2 5.521089721327345e+02 1.223876815062619e+02 -3.629066091228882e+01 -5.371485459866160e+02 3 4.098988410471214e+02 -5.841964900319319e+01 -3.626461945087767e+02 1.819119075553315e+02 4 5.379921868201441e+02 -6.396803250306872e+01 3.989368554210655e+02 3.552366384312845e+02 - ME 5.148841296796537e-05 + ME 1.780280399801712e-05 Event 60 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2532,7 +2532,7 @@ Event 60 Batch 1 2 7.143828168925960e+02 -4.584044193456332e+02 -2.419772079280938e+02 -4.915844060170314e+02 3 1.284110307517517e+02 8.324300347118127e+01 -7.889851197070540e+01 5.774963203893758e+01 4 6.572061523556514e+02 3.751614158744520e+02 3.208757198987992e+02 4.338347739780938e+02 - ME 1.673517837789511e-04 + ME 7.144001898958308e-05 Event 61 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2540,7 +2540,7 @@ Event 61 Batch 1 2 4.394390210968651e+02 -2.137451655543886e+02 -3.779414621253704e+02 -6.767502250635177e+01 3 4.431311911324728e+02 3.845666395406355e+02 -2.150363068358313e+02 4.725610065709574e+01 4 6.174297877706618e+02 -1.708214739862469e+02 5.929777689612018e+02 2.041892184925626e+01 - ME 1.368591177943825e-04 + ME 2.870354731125455e-05 Event 62 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2548,7 +2548,7 @@ Event 62 Batch 1 2 7.301725729481176e+02 4.281927891852710e+02 5.652737593150771e+02 -1.739784429324868e+02 3 7.567373964415995e+01 2.589885732647599e+01 -5.696550981957816e+01 4.255225906941358e+01 4 6.941536874077224e+02 -4.540916465117469e+02 -5.083082494954988e+02 1.314261838630732e+02 - ME 8.513592598060080e-04 + ME 2.379197431250548e-04 Event 63 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2556,7 +2556,7 @@ Event 63 Batch 1 2 4.361152320236988e+02 -3.738769057978321e+02 1.427754799584550e+02 -1.732850750548248e+02 3 5.817148313055657e+02 5.081993893256957e+02 2.829214478037172e+02 -8.998890070513914e+00 4 4.821699366707353e+02 -1.343224835278637e+02 -4.256969277621721e+02 1.822839651253387e+02 - ME 4.544766189571194e-05 + ME 8.350404272725701e-06 Event 64 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2564,7 +2564,7 @@ Event 64 Batch 1 2 6.097675704107204e+02 3.288514690970509e+02 4.971291587853200e+02 -1.285916042465611e+02 3 5.709532610348123e+02 -6.501292612520263e+01 -4.768258747557200e+02 3.072426254385416e+02 4 3.192791685544673e+02 -2.638385429718484e+02 -2.030328402960006e+01 -1.786510211919805e+02 - ME 4.598138986874043e-04 + ME 3.000969253297957e-05 Event 65 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2572,7 +2572,7 @@ Event 65 Batch 1 2 6.258641293880484e+02 3.743515439843765e+02 -1.622018320411498e+02 -4.746128903155367e+02 3 7.438702198751357e+02 -4.029113627030089e+02 2.325939036896868e+02 5.804355380128616e+02 4 1.302656507368158e+02 2.855981871863233e+01 -7.039207164853700e+01 -1.058226476973252e+02 - ME 6.427333508548903e-03 + ME 3.162776051460646e-04 Event 66 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2580,7 +2580,7 @@ Event 66 Batch 1 2 3.731957242404369e+02 1.596860493342637e+01 -3.714568973276624e+02 3.224632809376674e+01 3 6.079923612940432e+02 4.451199598539357e+02 3.189341902600864e+02 -2.642043054431177e+02 4 5.188119144655197e+02 -4.610885647873621e+02 5.252270706757586e+01 2.319579773493509e+02 - ME 4.681392980523237e-05 + ME 1.034065067393998e-05 Event 67 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2588,7 +2588,7 @@ Event 67 Batch 1 2 7.084256499213539e+02 6.318790977834966e+02 -2.229764540025608e+02 2.299504472951746e+02 3 5.168612394424738e+01 1.130069959366449e+01 -1.428140623590627e+01 4.837138651102398e+01 4 7.398882261343989e+02 -6.431797973771612e+02 2.372578602384670e+02 -2.783218338061985e+02 - ME 5.878400132197954e-02 + ME 1.479715191731530e-02 Event 68 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2596,7 +2596,7 @@ Event 68 Batch 1 2 5.644037677826096e+02 -7.446914007305443e+01 3.170710956176409e+02 4.609467220707991e+02 3 4.303832728799333e+02 -1.588265612792408e+02 -3.994808673830752e+02 -2.046757440246668e+01 4 5.052129593374568e+02 2.332957013522950e+02 8.240977176543441e+01 -4.404791476683325e+02 - ME 8.108482137897523e-03 + ME 3.274273226082449e-04 Event 69 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2604,7 +2604,7 @@ Event 69 Batch 1 2 2.379282923937934e+02 -4.413455715133102e+01 1.058497776082811e+02 -2.084654354245804e+02 3 5.822935131976616e+02 -5.806422676829345e+02 4.095409019445288e+01 -1.559022092337181e+01 4 6.797781944085444e+02 6.247768248342655e+02 -1.468038678027338e+02 2.240556563479522e+02 - ME 3.039802585689931e-04 + ME 6.379305675073031e-05 Event 70 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2612,7 +2612,7 @@ Event 70 Batch 1 2 5.861861307468000e+02 1.831219916849830e+02 2.904683423406074e+02 -4.750880530376756e+02 3 4.633200606614189e+02 -4.245314712871158e+02 -1.339518705596282e+02 1.284344380284135e+02 4 4.504938085917810e+02 2.414094796021329e+02 -1.565164717809791e+02 3.466536150092620e+02 - ME 3.530491740557932e-05 + ME 1.325653453486623e-05 Event 71 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2620,7 +2620,7 @@ Event 71 Batch 1 2 7.383412459951699e+02 5.748049255568963e+02 -1.639684737984460e+02 -4.334298474879633e+02 3 3.973981306646684e+02 -3.228684354469153e+02 -4.837114091238284e+00 2.316416412804533e+02 4 3.642606233401616e+02 -2.519364901099809e+02 1.688055878896842e+02 2.017882062075102e+02 - ME 3.103530482016079e-05 + ME 1.333441808219846e-05 Event 72 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2628,7 +2628,7 @@ Event 72 Batch 1 2 3.538199915090663e+02 3.512029503136998e+02 -6.467835580753929e+00 -4.246458742680748e+01 3 5.344234504985296e+02 1.310173344785605e+01 3.836805260246265e+01 5.328833470497182e+02 4 6.117565579924039e+02 -3.643046837615559e+02 -3.190021702170876e+01 -4.904187596229107e+02 - ME 9.376669006106200e-03 + ME 2.994704399169685e-03 Event 73 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2636,7 +2636,7 @@ Event 73 Batch 1 2 4.694927197571710e+02 1.451947293992222e+02 -1.807863847612341e+02 4.082379055705570e+02 3 5.537325951281179e+02 -5.796379956652479e+01 5.401382741253894e+02 -1.072876026015002e+02 4 4.767746851147115e+02 -8.723092983269744e+01 -3.593518893641554e+02 -3.009503029690568e+02 - ME 1.077472469645428e-03 + ME 1.535829386616431e-04 Event 74 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2644,7 +2644,7 @@ Event 74 Batch 1 2 6.258444305735198e+02 -3.349227552763227e+02 4.941036656040852e+02 1.880679848209580e+02 3 5.555040664889822e+02 3.765538795180102e+01 -5.474422011270130e+02 -8.645158222500005e+01 4 3.186515029374982e+02 2.972673673245214e+02 5.333853552292791e+01 -1.016164025959578e+02 - ME 1.623439923565115e-04 + ME 1.487896902219418e-05 Event 75 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2652,7 +2652,7 @@ Event 75 Batch 1 2 3.943316317993887e+02 5.588489849751632e+01 -2.552251009651266e+02 -2.953548066221912e+02 3 5.467466262348042e+02 -3.021648543602057e+02 -2.377479281839000e+02 3.887212326756534e+02 4 5.589217419658066e+02 2.462799558626894e+02 4.929730291490265e+02 -9.336642605346221e+01 - ME 1.348649436679123e-04 + ME 4.632408498797698e-05 Event 76 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2660,7 +2660,7 @@ Event 76 Batch 1 2 5.517772830004059e+02 2.282681125856672e+02 -4.885490190451381e+02 -1.169260227747471e+02 3 4.245403880864563e+02 -2.793100283061228e+02 1.521744876196477e+02 -2.811821020654221e+02 4 5.236823289131380e+02 5.104191572045557e+01 3.363745314254903e+02 3.981081248401691e+02 - ME 5.074216551061466e-05 + ME 1.645260485784409e-05 Event 77 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2668,7 +2668,7 @@ Event 77 Batch 1 2 3.781543446472003e+02 -5.926925448310480e+01 -1.775497893613220e+02 3.285786605157444e+02 3 6.702964816234122e+02 -6.066564226432872e+01 -1.057468051743550e+02 -6.591165802199176e+02 4 4.515491737293867e+02 1.199348967474336e+02 2.832965945356770e+02 3.305379197041734e+02 - ME 6.321080405055773e-05 + ME 5.041095643414513e-05 Event 78 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2676,7 +2676,7 @@ Event 78 Batch 1 2 4.564262045363139e+02 1.882572856930395e+02 1.751822011208171e+02 -3.770878823051468e+02 3 3.809544602625751e+02 -2.816334489555117e+02 1.992812047321844e+02 -1.615422627793184e+02 4 6.626193352011103e+02 9.337616326247226e+01 -3.744634058530013e+02 5.386301450844651e+02 - ME 2.572921643188974e-04 + ME 6.222463480998997e-05 Event 79 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2684,7 +2684,7 @@ Event 79 Batch 1 2 6.126536521478922e+02 6.075062399138452e+02 -4.178945028651393e+01 6.733726903166659e+01 3 2.872846052831658e+02 -1.084163947926161e+02 2.139961846825774e+01 2.651799127051085e+02 4 6.000617425689430e+02 -4.990898451212283e+02 2.038983181825616e+01 -3.325171817367756e+02 - ME 1.996659951821530e-03 + ME 6.289823950094716e-04 Event 80 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2692,7 +2692,7 @@ Event 80 Batch 1 2 4.171281258707700e+02 -2.756641813219371e+02 1.445082905894664e+01 3.127240094205691e+02 3 3.805235327384960e+02 -2.955852199231463e+02 2.395269588958384e+02 7.373784162959287e+00 4 7.023483413907342e+02 5.712494012450838e+02 -2.539777879547846e+02 -3.200977935835284e+02 - ME 1.297520069620947e-03 + ME 5.629434448779270e-04 Event 81 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2700,7 +2700,7 @@ Event 81 Batch 1 2 7.471091333863935e+02 -9.753029041192970e+01 7.407154559164039e+02 -7.162458282065091e-01 3 6.775352561453885e+02 9.550863422814814e+01 -6.702673865908516e+02 -2.595678293896889e+01 4 7.535561046821789e+01 2.021656183781575e+00 -7.044806932555213e+01 2.667302876717550e+01 - ME 1.022399816924924e-04 + ME 2.904529061551848e-05 Event 82 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2708,7 +2708,7 @@ Event 82 Batch 1 2 4.309094465924175e+02 3.042233433179616e+02 2.799835808203350e+02 -1.214096495919827e+02 3 5.540384887187945e+02 -4.824447657759213e+02 1.988969596446625e+02 1.861335391629672e+02 4 5.150520646887885e+02 1.782214224579596e+02 -4.788805404649973e+02 -6.472388957098450e+01 - ME 1.053635072607165e-04 + ME 1.778678120024833e-05 Event 83 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2716,7 +2716,7 @@ Event 83 Batch 1 2 4.869534474909295e+02 -4.727010820510885e+02 1.062322962656182e+02 4.890855018466118e+01 3 3.520990385354405e+02 -1.437544586613779e+02 -3.142298368411062e+02 6.758696761482639e+01 4 6.609475139736298e+02 6.164555407124665e+02 2.079975405754878e+02 -1.164955177994876e+02 - ME 2.998516055200512e-04 + ME 7.948516811691567e-05 Event 84 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2724,7 +2724,7 @@ Event 84 Batch 1 2 1.391975815431583e+01 -3.682657486111166e-01 -1.138840508663312e+01 -7.995516055627093e+00 3 7.493632094786751e+02 -3.452281541586202e+01 3.833012084573049e+02 6.429880080772211e+02 4 7.367170323670085e+02 3.489108116447313e+01 -3.719128033706718e+02 -6.349924920215940e+02 - ME 3.806217512266510e-01 + ME 8.671177508029917e-02 Event 85 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2732,7 +2732,7 @@ Event 85 Batch 1 2 7.362448947738020e+02 6.409220704967113e+02 3.243429451315054e+02 1.614840505254833e+02 3 1.517836214454495e+02 -1.266859291808411e+02 -6.780846852200752e+01 4.889738933094901e+01 4 6.119714837807480e+02 -5.142361413158706e+02 -2.565344766094980e+02 -2.103814398564324e+02 - ME 5.694785892689211e-04 + ME 1.062305495679385e-04 Event 86 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2740,7 +2740,7 @@ Event 86 Batch 1 2 5.451728369778392e+02 -6.605005893803180e+01 1.066920544886257e+02 -5.305352178712969e+02 3 3.158718592284829e+02 -1.755596039144849e+02 2.550395858012225e+02 6.251932981237656e+01 4 6.389553037936773e+02 2.416096628525165e+02 -3.617316402898481e+02 4.680158880589203e+02 - ME 1.469986179099727e-04 + ME 4.057626974930324e-05 Event 87 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2748,7 +2748,7 @@ Event 87 Batch 1 2 3.414211232216659e+02 1.437256906952883e+02 1.534640422371205e+02 -2.689983214749668e+02 3 5.081668091119999e+02 4.794742948200324e+02 -1.464748766741243e+02 8.296394996143997e+01 4 6.504120676663341e+02 -6.231999855153207e+02 -6.989165562996117e+00 1.860343715135268e+02 - ME 1.823135893899652e-04 + ME 3.656584417835253e-05 Event 88 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2756,7 +2756,7 @@ Event 88 Batch 1 2 2.925516585730864e+02 1.655911293372511e+01 2.598275245766865e+02 -1.334238591297045e+02 3 7.159840369510271e+02 -1.056844973272874e+02 -3.694097043713192e+02 6.041526284885822e+02 4 4.914643044758866e+02 8.912538439356234e+01 1.095821797946327e+02 -4.707287693588777e+02 - ME 8.728488941697977e-02 + ME 2.327745727475104e-03 Event 89 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2764,7 +2764,7 @@ Event 89 Batch 1 2 6.333634651097186e+02 1.209853522660007e+02 5.372166546881791e+02 -3.129058794565919e+02 3 6.221307427802806e+02 5.757192259699385e+01 -4.327483989541182e+02 4.432391657372765e+02 4 2.445057921100010e+02 -1.785572748629945e+02 -1.044682557340609e+02 -1.303332862806847e+02 - ME 5.497507832908574e-04 + ME 5.047204144927262e-05 Event 90 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2772,7 +2772,7 @@ Event 90 Batch 1 2 3.111538587406461e+02 2.628215106651484e+02 -6.985334981761831e+01 -1.512021390726355e+02 3 5.216486323898988e+02 1.252715366480781e+02 4.457714554600226e+02 -2.402335265468457e+02 4 6.671975088694549e+02 -3.880930473132266e+02 -3.759181056424042e+02 3.914356656194811e+02 - ME 2.329075524537458e-04 + ME 4.503542584588689e-05 Event 91 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2780,7 +2780,7 @@ Event 91 Batch 1 2 3.007803348469016e+02 8.390513937949677e+01 2.884042062049404e+02 -1.586667134655829e+01 3 6.256884422056424e+02 2.364580673743878e+02 -3.590826126759745e+02 -4.545693416378727e+02 4 5.735312229474563e+02 -3.203632067538847e+02 7.067840647103421e+01 4.704360129844310e+02 - ME 6.478111274774788e-05 + ME 2.635583378174906e-05 Event 92 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2788,7 +2788,7 @@ Event 92 Batch 1 2 6.843865618656529e+02 -2.264962467301474e+02 -5.909185329480341e+02 2.605757158639088e+02 3 6.645516272550811e+02 3.453347116263074e+02 4.983670680340538e+02 -2.720350487207341e+02 4 1.510618108792659e+02 -1.188384648961601e+02 9.255146491398015e+01 1.145933285682523e+01 - ME 9.365402433981294e-05 + ME 1.711437740567050e-05 Event 93 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2796,7 +2796,7 @@ Event 93 Batch 1 2 5.579763469381434e+02 2.180908585044468e+02 5.135246110359701e+02 8.151996049100932e+00 3 3.333821836060117e+02 1.681122988324202e+02 -1.261705574188212e+02 2.587719570738210e+02 4 6.086414694558448e+02 -3.862031573368670e+02 -3.873540536171486e+02 -2.669239531229223e+02 - ME 5.183695239236329e-04 + ME 1.157787815150910e-04 Event 94 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2804,7 +2804,7 @@ Event 94 Batch 1 2 4.534979734151987e+02 1.139662723650677e+02 2.686183171543304e+01 4.381216071501101e+02 3 3.856184698299744e+02 1.545134372854228e+02 -3.452526490806396e+02 7.501873282757614e+01 4 6.608835567548277e+02 -2.684797096504910e+02 3.183908173652065e+02 -5.131403399776862e+02 - ME 6.944325623628402e-03 + ME 1.545010233607317e-03 Event 95 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2812,7 +2812,7 @@ Event 95 Batch 1 2 2.828073115974175e+02 -5.711637476392460e+01 5.915078172645698e+01 -2.705898746219725e+02 3 6.809618671276158e+02 3.772100991821226e+02 3.247893528880094e+02 4.646864338535512e+02 4 5.362308212749670e+02 -3.200937244181981e+02 -3.839401346144663e+02 -1.940965592315787e+02 - ME 2.560512106670314e-04 + ME 6.408796328924562e-05 Event 96 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2820,7 +2820,7 @@ Event 96 Batch 1 2 4.639832102051440e+02 -4.275497908582962e+02 -1.317248975374901e+02 -1.230046627491649e+02 3 7.474114851375481e+02 6.594176555428718e+02 2.654537688070380e+02 2.309254864669502e+02 4 2.886053046573076e+02 -2.318678646845757e+02 -1.337288712695479e+02 -1.079208237177853e+02 - ME 2.440162169445852e-04 + ME 1.445191791082226e-05 Event 97 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2828,7 +2828,7 @@ Event 97 Batch 1 2 5.095921959312568e+02 3.190102848863560e+02 3.100341192456060e+02 2.485869851668986e+02 3 4.555541331018014e+02 -2.788120391899956e+02 2.221549471930723e+02 -2.836205112936887e+02 4 5.348536709669415e+02 -4.019824569636059e+01 -5.321890664386783e+02 3.503352612679014e+01 - ME 8.198891770965733e-05 + ME 2.250661525403011e-05 Event 98 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2836,7 +2836,7 @@ Event 98 Batch 1 2 5.299941952467790e+02 -2.570048161992350e+02 -4.630296380940593e+02 -2.111695271961878e+01 3 7.352146396921255e+02 2.361229278157243e+02 6.962552486063584e+02 3.893348873424185e+00 4 2.347911650610957e+02 2.088188838351074e+01 -2.332256105122990e+02 1.722360384619465e+01 - ME 6.760444392591968e-05 + ME 5.654417419793765e-06 Event 99 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2844,7 +2844,7 @@ Event 99 Batch 1 2 4.290897291078425e+02 3.747236205606835e+02 2.040795775432686e+02 -4.529602465443949e+01 3 6.438744429739487e+02 -5.215755139094103e+02 2.133414139578182e+01 3.769325350988583e+02 4 4.270358279182090e+02 1.468518933487271e+02 -2.254137189390505e+02 -3.316365104444187e+02 - ME 2.024851967866169e-03 + ME 8.457850707842401e-05 Event 100 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2852,7 +2852,7 @@ Event 100 Batch 1 2 5.119062275524872e+02 -4.721600394809319e+02 -1.845880136125884e+02 7.099400083769524e+01 3 4.523854579707449e+02 2.836789572262426e+02 -3.060214184981774e+02 -1.747276258374610e+02 4 5.357083144767672e+02 1.884810822546894e+02 4.906094321107658e+02 1.037336249997658e+02 - ME 6.898305006855298e-05 + ME 1.420495101373495e-05 Event 101 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2860,7 +2860,7 @@ Event 101 Batch 1 2 6.024072815192737e+02 -3.080418730730875e+02 -4.692284526425155e+02 2.186993289696520e+02 3 3.347434020484399e+02 8.940653726951260e+01 -3.939923552329941e+01 -3.201676381969582e+02 4 5.628493164322859e+02 2.186353358035749e+02 5.086276881658150e+02 1.014683092273061e+02 - ME 9.290725627447436e-05 + ME 2.743452031293993e-05 Event 102 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2868,7 +2868,7 @@ Event 102 Batch 1 2 5.910857738801296e+02 3.707548039128416e+02 -7.516477307090547e+01 -4.541734518311494e+02 3 2.311218706704979e+02 4.536804143672514e+01 -2.262982016400413e+02 1.217307902336991e+01 4 6.777923554493723e+02 -4.161228453495667e+02 3.014629747109467e+02 4.420003728077793e+02 - ME 2.633339755449651e-04 + ME 7.158169676479796e-05 Event 103 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2876,7 +2876,7 @@ Event 103 Batch 1 2 6.627949406417042e+02 7.189602123685950e+01 -6.391860825813610e+02 -1.599038689489492e+02 3 5.519979886399102e+02 1.442810582977179e+02 4.734454174874869e+02 2.444057944057306e+02 4 2.852070707183856e+02 -2.161770795345774e+02 1.657406650938741e+02 -8.450192545678139e+01 - ME 1.652798222861839e-04 + ME 1.658567428345252e-05 Event 104 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2884,7 +2884,7 @@ Event 104 Batch 1 2 4.368180791462563e+02 -3.483499330357901e+02 -2.596280064690262e+02 4.533935023690698e+01 3 4.635715977792429e+02 1.873023362819025e+02 -2.251347602994603e+02 -3.593477435519053e+02 4 5.996103230745010e+02 1.610475967538876e+02 4.847627667684865e+02 3.140083933149983e+02 - ME 9.158171748371188e-05 + ME 2.162124469235967e-05 Event 105 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2892,7 +2892,7 @@ Event 105 Batch 1 2 5.701708357490469e+02 2.288495716262106e+02 -4.521314661478370e+02 -2.613422905391967e+02 3 3.711008490497917e+02 -3.362590561223710e+02 -8.126001400906793e+01 1.343223639771668e+02 4 5.587283152011612e+02 1.074094844961603e+02 5.333914801569049e+02 1.270199265620299e+02 - ME 7.043372303967046e-05 + ME 1.720246557093887e-05 Event 106 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2900,7 +2900,7 @@ Event 106 Batch 1 2 6.775588183099673e+02 5.149765831731705e+02 3.445381345095063e+02 -2.741870619150275e+02 3 7.044100837534635e+02 -4.546975847980706e+02 -4.392260662935809e+02 3.106833358270535e+02 4 1.180310979365712e+02 -6.027899837509908e+01 9.468793178407486e+01 -3.649627391202603e+01 - ME 3.259673897057837e-04 + ME 2.786544600802367e-05 Event 107 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2908,7 +2908,7 @@ Event 107 Batch 1 2 6.046880513041550e+02 2.289413119004024e+02 -5.349774474143721e+02 -1.644160754103499e+02 3 3.366746442316215e+02 -7.166101576320902e+01 2.452245434825371e+01 3.280444544890399e+02 4 5.586373044642238e+02 -1.572802961371935e+02 5.104549930661184e+02 -1.636283790786902e+02 - ME 8.859556065170558e-04 + ME 4.667002706670146e-04 Event 108 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2916,7 +2916,7 @@ Event 108 Batch 1 2 6.239206451413978e+02 -2.218030564243363e+02 5.011455197099735e+02 -2.982172759400455e+02 3 2.841199272340513e+02 1.209406641294798e+02 7.967327320293104e+01 2.444374323800143e+02 4 5.919594276245514e+02 1.008623922948564e+02 -5.808187929129044e+02 5.377984356003120e+01 - ME 1.727643234936365e-04 + ME 7.961277501126149e-05 Event 109 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2924,7 +2924,7 @@ Event 109 Batch 1 2 3.093404598873124e+02 1.546999830656544e+02 1.629193992247174e+02 2.126421988200774e+02 3 5.287372542258961e+02 -2.136116696975048e+02 -1.865832176193536e+02 4.462284633214169e+02 4 6.619222858867909e+02 5.891168663185049e+01 2.366381839463621e+01 -6.588706621414941e+02 - ME 1.686695657867669e+01 + ME 2.902408960420708e-01 Event 110 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2932,7 +2932,7 @@ Event 110 Batch 1 2 4.920948406187608e+02 -8.595212543403569e+01 -4.824913009925944e+02 -4.440392734262522e+01 3 4.634042325716594e+02 -2.085760624772916e+00 1.255608851371819e+02 4.460645653843308e+02 4 5.445009268095798e+02 8.803788605880843e+01 3.569304158554124e+02 -4.016606380417056e+02 - ME 4.151412887207382e-03 + ME 1.043536440561108e-03 Event 111 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2940,7 +2940,7 @@ Event 111 Batch 1 2 4.637454700443120e+02 1.543048221589588e+02 -4.372769385391800e+02 6.225902899506631e+00 3 3.246747011850293e+02 -5.128652792678845e+01 -2.274142471268230e+02 2.259781269206006e+02 4 7.115798287706589e+02 -1.030182942321705e+02 6.646911856660031e+02 -2.322040298201072e+02 - ME 1.240833065187375e-03 + ME 5.219332617201280e-04 Event 112 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2948,7 +2948,7 @@ Event 112 Batch 1 2 6.923761777814550e+02 3.939190124845535e+02 4.398224952082178e+01 -5.676954684419625e+02 3 5.277418353503033e+02 -4.270527740856185e+02 4.970714905179168e+01 3.060499505927539e+02 4 2.798819868682421e+02 3.313376160106501e+01 -9.368939857261346e+01 2.616455178492087e+02 - ME 5.385735959435035e-05 + ME 4.381536575941429e-05 Event 113 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2956,7 +2956,7 @@ Event 113 Batch 1 2 7.174898838850694e+02 -6.130145063482008e+02 3.726797356942233e+02 1.071275347265524e+01 3 1.705115822510491e+02 3.993583199494100e+01 -1.624320619120163e+02 3.309311510932528e+01 4 6.119985338638814e+02 5.730786743532599e+02 -2.102476737822071e+02 -4.380586858198049e+01 - ME 2.197559713387976e-04 + ME 4.914674319256647e-05 Event 114 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2964,7 +2964,7 @@ Event 114 Batch 1 2 6.772826088252357e+02 -1.430288042596954e+02 -3.410390118171982e+02 5.674036356844296e+02 3 6.725037798358682e+02 3.626161999767239e+01 2.510744134018114e+02 -6.228226615527174e+02 4 1.502136113388951e+02 1.067671842620232e+02 8.996459841538707e+01 5.541902586828807e+01 - ME 8.926156406775035e-05 + ME 7.986648389935193e-05 Event 115 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2972,7 +2972,7 @@ Event 115 Batch 1 2 9.320551230331124e+01 1.288474310894606e+01 -2.581623869377880e+01 8.862715576190526e+01 3 6.672654287607164e+02 1.525114284892182e+02 2.829200767588875e+02 5.847560574856374e+02 4 7.395290589359720e+02 -1.653961715981643e+02 -2.571038380651088e+02 -6.733832132475428e+02 - ME 1.800237703627863e+00 + ME 4.304938165075599e-01 Event 116 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2980,7 +2980,7 @@ Event 116 Batch 1 2 4.951202926530015e+02 -4.575339943514647e+02 4.220102313368785e+01 1.844608951947751e+02 3 3.101750696753587e+02 -4.711582585559527e+01 2.172188132736168e+02 2.163438466008694e+02 4 6.947046376716394e+02 5.046498202070600e+02 -2.594198364073050e+02 -4.008047417956444e+02 - ME 1.933367100533606e-03 + ME 5.988625984136040e-04 Event 117 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2988,7 +2988,7 @@ Event 117 Batch 1 2 6.543248494478489e+02 1.390926466871539e+02 9.107024539473488e+01 6.328510524967589e+02 3 5.040443237953712e+02 6.874740772121054e+01 1.336336536624387e+02 -4.811200690999848e+02 4 3.416308267567792e+02 -2.078400544083643e+02 -2.247038990571737e+02 -1.517309833967742e+02 - ME 4.207453923038474e-04 + ME 3.026560085299302e-04 Event 118 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2996,7 +2996,7 @@ Event 118 Batch 1 2 5.829230400014206e+02 5.307803371482089e+02 -3.192285892796672e+01 2.388565162167381e+02 3 3.965113090906140e+02 -5.470249758902820e+01 2.256187790844517e+02 -3.214420966810604e+02 4 5.205656509079653e+02 -4.760778395591807e+02 -1.936959201564850e+02 8.258558046432242e+01 - ME 7.464562943747175e-05 + ME 2.168340782914014e-05 Event 119 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3004,7 +3004,7 @@ Event 119 Batch 1 2 3.549567073991255e+02 2.281637891139605e+02 1.474502150787006e+02 2.284600261271838e+02 3 4.727085372220640e+02 7.463684946128350e+01 -3.092948822053327e+02 3.495988811576870e+02 4 6.723347553788102e+02 -3.028006385752440e+02 1.618446671266322e+02 -5.780589072848707e+02 - ME 1.455012849105755e-02 + ME 1.664672733965846e-03 Event 120 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3012,7 +3012,7 @@ Event 120 Batch 1 2 7.192117275853698e+02 4.094232477570927e+02 -5.552624156333899e+02 -2.032775518283800e+02 3 3.685061529232585e+02 -2.522084621786424e+02 1.741347663658646e+02 2.046087962197375e+02 4 4.122821194913712e+02 -1.572147855784500e+02 3.811276492675253e+02 -1.331244391357209e+00 - ME 9.281995463485567e-05 + ME 1.900262756274459e-05 Event 121 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3020,7 +3020,7 @@ Event 121 Batch 1 2 1.923953846467517e+02 -5.182078839520096e+01 -1.486351786617837e+02 -1.106262789198433e+02 3 6.582127150877787e+02 -3.509182841037630e+02 -1.191939510078701e+02 5.439606035624541e+02 4 6.493919002654695e+02 4.027390724989639e+02 2.678291296696539e+02 -4.333343246426108e+02 - ME 1.925188892577692e-03 + ME 5.360055113881300e-04 Event 122 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3028,7 +3028,7 @@ Event 122 Batch 1 2 6.905732817636248e+02 3.462508192534570e+02 -5.375670569609784e+02 -2.608131264380775e+02 3 7.097575386120018e+02 -2.677396278645660e+02 5.849221766424142e+02 2.998954860604125e+02 4 9.966917962437387e+01 -7.851119138889094e+01 -4.735511968143584e+01 -3.908235962233509e+01 - ME 5.007312135859238e-04 + ME 3.451011759976180e-05 Event 123 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3036,7 +3036,7 @@ Event 123 Batch 1 2 4.035126033432560e+02 2.481103298242076e+01 -3.878573016343356e+02 -1.085059780294573e+02 3 3.541388771651666e+02 1.572344474048876e+02 -3.105653677404273e+02 -6.512161875550808e+01 4 7.423485194915780e+02 -1.820454803873083e+02 6.984226693747627e+02 1.736275967849660e+02 - ME 2.043564129780385e-02 + ME 3.471230489499830e-03 Event 124 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3044,7 +3044,7 @@ Event 124 Batch 1 2 5.353042728143347e+02 -4.785252055946481e+02 -2.279396245170433e+02 7.488537693644093e+01 3 7.454081943698113e+02 6.785307544150930e+02 3.069354144183444e+02 -3.193811081429426e+01 4 2.192875328158541e+02 -2.000055488204448e+02 -7.899578990130104e+01 -4.294726612214667e+01 - ME 1.399009675490331e-04 + ME 6.765427234678898e-06 Event 125 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3052,7 +3052,7 @@ Event 125 Batch 1 2 7.351681880566981e+02 -1.932492970253984e+01 -4.393064933429818e+02 -5.891592456452273e+02 3 6.537497908129355e+02 -2.883189353576726e+01 3.454898907503182e+02 5.542510679217788e+02 4 1.110820211303664e+02 4.815682323830688e+01 9.381660259266363e+01 3.490817772344844e+01 - ME 1.431077255619906e-04 + ME 6.639428548470109e-05 Event 126 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3060,7 +3060,7 @@ Event 126 Batch 1 2 5.568747108147126e+02 1.149185667256990e+02 4.264979152236775e+02 -3.391204725116689e+02 3 6.934211462641822e+02 -1.939160042589616e+02 -6.294239612595663e+02 2.169215212257340e+02 4 2.497041429211053e+02 7.899743753326281e+01 2.029260460358889e+02 1.221989512859350e+02 - ME 3.344185566612618e-05 + ME 9.143592130512915e-06 Event 127 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3068,7 +3068,7 @@ Event 127 Batch 1 2 7.108931196972316e+02 4.270547743949553e+02 5.664613189451065e+02 -4.598718776252147e+01 3 4.445675167124290e+02 -1.247884466860518e+02 -4.129475031266345e+02 1.074359351009545e+02 4 3.445393635903407e+02 -3.022663277089035e+02 -1.535138158184720e+02 -6.144874733843321e+01 - ME 1.180920695556687e-04 + ME 1.427738327825488e-05 Event 128 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3076,7 +3076,7 @@ Event 128 Batch 1 2 5.312407894292422e+02 -7.192118124205533e+01 -4.398126160332176e+02 -2.891521793453568e+02 3 5.717192413787027e+02 3.434745903572437e+02 1.811915566412192e+02 4.195923218357252e+02 4 3.970399691920551e+02 -2.715534091151883e+02 2.586210593919984e+02 -1.304401424903685e+02 - ME 1.848006274423395e-04 + ME 3.532660248239223e-05 Event 129 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3084,7 +3084,7 @@ Event 129 Batch 1 2 6.644129951428383e+02 -3.595672586482287e+02 4.645590915434784e+02 3.103882489514914e+02 3 1.967652372382455e+02 -5.204943416929049e+01 8.794498000645085e+00 -1.895522930301724e+02 4 6.388217676189169e+02 4.116166928175192e+02 -4.733535895441232e+02 -1.208359559213191e+02 - ME 3.082956717278722e-04 + ME 9.192558188476414e-05 Event 130 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3092,7 +3092,7 @@ Event 130 Batch 1 2 7.302263990443511e+02 -1.919590472356484e+02 3.836584700935805e+02 -5.909217345563752e+02 3 4.156541164903923e+02 2.203243106780774e+02 -1.767969453775071e+02 3.049071707664833e+02 4 3.541194844652567e+02 -2.836526344242890e+01 -2.068615247160734e+02 2.860145637898919e+02 - ME 3.110012368642411e-05 + ME 2.258971422042701e-05 Event 131 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3100,7 +3100,7 @@ Event 131 Batch 1 2 2.308323688168238e+02 -1.780469473698228e+02 1.469011263880862e+02 1.710582294195638e+00 3 7.308075033948297e+02 5.219262643529272e+02 -3.840435213624620e+02 3.379099810545737e+02 4 5.383601277883465e+02 -3.438793169831044e+02 2.371423949743758e+02 -3.396205633487694e+02 - ME 1.061667055612532e-03 + ME 7.770640764079256e-05 Event 132 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3108,7 +3108,7 @@ Event 132 Batch 1 2 5.909630762789660e+02 -4.293852116769707e+02 -3.988922148105424e+02 7.583335995300355e+01 3 5.415993952096327e+02 2.260703809971038e+02 3.221145619770360e+02 -3.721079100067703e+02 4 3.674375285114020e+02 2.033148306798666e+02 7.677765283350686e+01 2.962745500537670e+02 - ME 3.321676569401813e-05 + ME 1.628447412544396e-05 Event 133 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3116,7 +3116,7 @@ Event 133 Batch 1 2 4.506052863582997e+02 2.189991325227701e+02 -3.914006430783634e+02 -4.347459771134355e+01 3 4.043998006859111e+02 3.160348074769272e+02 8.738893432792010e+01 2.366946839598570e+02 4 6.449949129557901e+02 -5.350339399996973e+02 3.040117087504433e+02 -1.932200862485142e+02 - ME 3.121497332919934e-04 + ME 8.705579101282482e-05 Event 134 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3124,7 +3124,7 @@ Event 134 Batch 1 2 7.151470882937614e+02 -1.041377497037516e+01 -4.186394096729767e+01 7.138447461686595e+02 3 3.416424731356660e+02 1.638631808685801e+02 3.081581136487586e+01 -2.981925940995343e+02 4 4.432104385705719e+02 -1.534494058982047e+02 1.104812960242199e+01 -4.156521520691248e+02 - ME 5.534325530265236e-02 + ME 6.342792451335309e-03 Event 135 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3132,7 +3132,7 @@ Event 135 Batch 1 2 7.115730144432832e+02 -3.219296530898238e+02 2.184242454110169e+02 -5.958089478700319e+02 3 1.627059459894212e+02 -6.880794311551747e+01 -3.259803939022061e+01 1.437917231708342e+02 4 6.257210395672955e+02 3.907375962053413e+02 -1.858262060207963e+02 4.520172246991979e+02 - ME 2.112989182930814e-04 + ME 1.277979532321233e-04 Event 136 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3140,7 +3140,7 @@ Event 136 Batch 1 2 7.195404287114588e+02 -4.369992732083461e+02 -4.270318019286997e+02 3.800182941743402e+02 3 6.668605996318223e+02 3.634158794560479e+02 4.690430049045651e+02 -3.043527845290675e+02 4 1.135989716567186e+02 7.358339375229815e+01 -4.201120297586535e+01 -7.566550964527264e+01 - ME 1.804344388349211e-03 + ME 7.515399240093053e-05 Event 137 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3148,7 +3148,7 @@ Event 137 Batch 1 2 6.722782806744999e+02 -6.045581260407005e+02 -2.538460778300668e+02 1.484241478840623e+02 3 6.869263774705689e+02 6.661257235671316e+02 1.481819739565761e+02 -7.865412297735662e+01 4 1.407953418549304e+02 -6.156759752643097e+01 1.056641038734908e+02 -6.977002490670534e+01 - ME 5.192812231664224e-04 + ME 2.119149330726453e-05 Event 138 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3156,7 +3156,7 @@ Event 138 Batch 1 2 6.463287544295633e+02 8.684709774942756e+01 2.409249839962013e+02 -5.934253049048401e+02 3 3.917330799270068e+02 1.767690441671677e+02 4.696120064017492e+01 3.464132742372293e+02 4 4.619381656434300e+02 -2.636161419165952e+02 -2.878861846363762e+02 2.470120306676108e+02 - ME 5.804753959762886e-05 + ME 4.203806696206548e-05 Event 139 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3164,7 +3164,7 @@ Event 139 Batch 1 2 2.994802063237944e+02 -1.272876183039153e+02 6.552211336810879e+00 2.710042891410713e+02 3 7.257546970836092e+02 -8.848613612326799e+00 5.127896146768584e+00 -7.256826352181574e+02 4 4.747650965925943e+02 1.361362319162416e+02 -1.168010748357900e+01 4.546783460770868e+02 - ME 1.724196014694060e-04 + ME 1.500396153249019e-04 Event 140 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3172,7 +3172,7 @@ Event 140 Batch 1 2 7.326756101999780e+02 5.655005379385240e+02 4.343799907428446e+02 1.683351270988810e+02 3 7.428339005597779e+02 -5.680473426214219e+02 -4.534832054058505e+02 -1.532233754243464e+02 4 2.449048924024402e+01 2.546804682897962e+00 1.910321466300584e+01 -1.511175167453447e+01 - ME 4.669436438173466e-03 + ME 1.024603362434272e-04 Event 141 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3180,7 +3180,7 @@ Event 141 Batch 1 2 7.363238871411332e+02 -6.772722174663238e+02 -2.824373475598683e+02 -6.086341204880675e+01 3 5.504260535970963e+02 4.650298533191528e+02 2.914345410616540e+02 4.221355560271704e+01 4 2.132500592617708e+02 2.122423641471711e+02 -8.997193501785816e+00 1.864985644608987e+01 - ME 7.300791864660033e-05 + ME 1.166401869382226e-05 Event 142 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3188,7 +3188,7 @@ Event 142 Batch 1 2 5.862280565156834e+02 4.248793793115829e+01 -2.479279504752411e+02 -5.295184989682986e+02 3 4.287264749982929e+02 -3.025296967755320e+02 2.785471849307642e+02 1.212173201341831e+02 4 4.850454684860405e+02 2.600417588443628e+02 -3.061923445551928e+01 4.083011788341197e+02 - ME 4.569028399965169e-05 + ME 1.949810022878841e-05 Event 143 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3196,7 +3196,7 @@ Event 143 Batch 1 2 2.464531733710510e+02 4.046044690030688e+01 -2.103865804466287e+02 1.218179201483223e+02 3 5.378449948854583e+02 4.607829603950880e+02 -2.747641700963839e+02 3.822241180409925e+01 4 7.157018317434903e+02 -5.012434072953949e+02 4.851507505430126e+02 -1.600403319524219e+02 - ME 1.284493741497843e-03 + ME 4.863434295951330e-04 Event 144 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3204,7 +3204,7 @@ Event 144 Batch 1 2 5.367418008803521e+02 -1.343004856786532e+02 -4.048537736989352e+02 -3.258044847458254e+02 3 6.294877130859599e+02 3.313530054622211e+02 5.282137272543231e+02 8.631468610520756e+01 4 3.337704860336884e+02 -1.970525197835678e+02 -1.233599535553879e+02 2.394897986406179e+02 - ME 2.612855607885159e-05 + ME 8.754930746282009e-06 Event 145 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3212,7 +3212,7 @@ Event 145 Batch 1 2 6.805380148481771e+01 -3.411514819754512e+01 -4.339750646760406e+01 -3.980116822894492e+01 3 6.831461500979880e+02 -3.834019790669201e+02 -2.756424954453614e+02 -4.936727656514237e+02 4 7.488000484171945e+02 4.175171272644653e+02 3.190400019129655e+02 5.334739338803686e+02 - ME 4.832444287218038e-01 + ME 4.117012994651258e-01 Event 146 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3220,7 +3220,7 @@ Event 146 Batch 1 2 5.031746658797123e+02 4.202301876294930e+02 2.767377273314875e+02 2.750283520766640e+00 3 4.317115817339341e+02 -1.098088257924671e+02 -5.455162180567243e+01 4.139336083717602e+02 4 5.651137523863538e+02 -3.104213618370259e+02 -2.221861055258150e+02 -4.166838918925268e+02 - ME 4.446377084117306e-03 + ME 1.122040831263755e-03 Event 147 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3228,7 +3228,7 @@ Event 147 Batch 1 2 4.251223043705630e+02 -4.223502783198938e+02 -4.694338569631599e+01 1.206377286808446e+01 3 5.457819748703678e+02 2.791608945230574e+02 -4.384138579515959e+02 -1.665546403390879e+02 4 5.290957207590696e+02 1.431893837968364e+02 4.853572436479118e+02 1.544908674710035e+02 - ME 5.820013407126093e-05 + ME 1.117959404473985e-05 Event 148 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3236,7 +3236,7 @@ Event 148 Batch 1 2 6.905785821272525e+02 6.249608768654489e+02 -6.243387159972350e+01 -2.870970082698929e+02 3 1.361638260920089e+02 2.862044352088506e+01 1.704210379179796e+01 1.320266050727362e+02 4 6.732575917807402e+02 -6.535813203863343e+02 4.539176780792534e+01 1.550704031971573e+02 - ME 9.573948308169230e-04 + ME 5.047601105033982e-04 Event 149 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3244,7 +3244,7 @@ Event 149 Batch 1 2 6.694705528096943e+02 -5.216497821741067e+02 -3.785079074709545e+02 1.811189935345937e+02 3 2.821401257551277e+02 1.148500354702071e-01 2.786662494166578e+02 -4.413795199872407e+01 4 5.483893214351779e+02 5.215349321386365e+02 9.984165805429673e+01 -1.369810415358697e+02 - ME 1.943324414096923e-04 + ME 3.486097449584098e-05 Event 150 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3252,7 +3252,7 @@ Event 150 Batch 1 2 4.637486188995366e+02 -4.033412855298819e+02 -2.279949807412008e+02 -1.992178895453991e+01 3 3.756800751656199e+02 6.230662615514293e+01 -2.632310737913946e+02 -2.606967683041707e+02 4 6.605713059348438e+02 3.410346593747391e+02 4.912260545325952e+02 2.806185572587107e+02 - ME 2.156945366470290e-04 + ME 4.211370643652993e-05 Event 151 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3260,7 +3260,7 @@ Event 151 Batch 1 2 3.821954355913596e+02 -2.528320044280690e+02 2.861764538722267e+02 1.588602445142563e+01 3 6.796189325418250e+02 2.911670128135291e+02 -4.900375979142738e+02 3.700902818893582e+02 4 4.381856318668152e+02 -3.833500838546018e+01 2.038611440420471e+02 -3.859763063407838e+02 - ME 8.197229841786387e-03 + ME 1.923941526207248e-04 Event 152 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3268,7 +3268,7 @@ Event 152 Batch 1 2 6.751133298339792e+02 -2.999578895043981e+02 -2.855974213275218e+02 -5.331391803034741e+02 3 4.976977783498468e+02 -3.003988119418482e+00 1.843802943840355e+02 4.622747685874795e+02 4 3.271888918161745e+02 3.029618776238166e+02 1.012171269434863e+02 7.086441171599445e+01 - ME 1.204579535049519e-04 + ME 6.977738125195056e-05 Event 153 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3276,7 +3276,7 @@ Event 153 Batch 1 2 1.729293620257127e+02 1.558357805102956e+02 -7.193392860849491e+01 2.110174585940510e+01 3 6.524550819255464e+02 2.410158908712478e+02 5.786677971610501e+02 1.809766692333240e+02 4 6.746155560487412e+02 -3.968516713815435e+02 -5.067338685525552e+02 -2.020784150927291e+02 - ME 5.985591428637023e-04 + ME 1.391654510317005e-04 Event 154 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3284,7 +3284,7 @@ Event 154 Batch 1 2 6.585658455851002e+02 -2.410305357139302e+02 -2.116446673272157e+02 -5.751693564652295e+02 3 5.764400833248005e+02 3.388133979948972e+02 3.092747322371399e+02 3.490527051926400e+02 4 2.649940710900988e+02 -9.778286228096688e+01 -9.763006490992416e+01 2.261166512725894e+02 - ME 3.655181799213059e-05 + ME 2.686434432328395e-05 Event 155 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3292,7 +3292,7 @@ Event 155 Batch 1 2 5.686586231936359e+02 -1.693366246265498e+02 -1.542203680657918e+02 5.204938187588979e+02 3 1.882190564276536e+02 -1.089234770645493e+02 -9.145416397064866e+01 1.232810822434430e+02 4 7.431223203787102e+02 2.782601016910992e+02 2.456745320364404e+02 -6.437749010023409e+02 - ME 6.696396361607482e-01 + ME 4.701119881405690e-01 Event 156 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3300,7 +3300,7 @@ Event 156 Batch 1 2 6.143652095725128e+02 2.879464601546110e+02 5.379391909976823e+02 -7.178351904348040e+01 3 6.287751645293085e+02 -4.584164185734781e+02 -4.225140875260598e+02 -8.181956094447702e+01 4 2.568596258981782e+02 1.704699584188668e+02 -1.154251034716223e+02 1.536030799879581e+02 - ME 2.899571701789112e-05 + ME 7.769660148731367e-06 Event 157 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3308,7 +3308,7 @@ Event 157 Batch 1 2 5.050842109798973e+02 4.185498850973046e+02 -1.305174306570672e+02 -2.507812875014723e+02 3 5.170424494038050e+02 -3.084595065654854e+02 3.930456446728388e+02 -1.330441599566699e+02 4 4.778733396162975e+02 -1.100903785318191e+02 -2.625282140157716e+02 3.838254474581424e+02 - ME 4.033251359625283e-05 + ME 1.243977993100618e-05 Event 158 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3316,7 +3316,7 @@ Event 158 Batch 1 2 4.312542366204098e+02 -3.114503370626313e+02 2.737030704635235e+02 1.185982013584742e+02 3 6.944315393047829e+02 2.166643175309468e+02 -6.173965008138002e+02 -2.326226495269423e+02 4 3.743142240748070e+02 9.478601953168439e+01 3.436934303502764e+02 1.140244481684682e+02 - ME 3.680357310121394e-05 + ME 5.864250821924803e-06 Event 159 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3324,7 +3324,7 @@ Event 159 Batch 1 2 5.860112473308646e+02 -1.581297551692178e+02 4.935632758462007e+02 2.734948907463652e+02 3 3.772013313646349e+02 -2.371132827856262e+02 -1.305099443644436e+02 -2.627266448837395e+02 4 5.367874213045002e+02 3.952430379548442e+02 -3.630533314817573e+02 -1.076824586262577e+01 - ME 1.030382455754272e-04 + ME 2.805189658646002e-05 Event 160 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3332,7 +3332,7 @@ Event 160 Batch 1 2 5.883409724804535e+02 -3.739819298758817e+02 -2.887651121595530e+02 3.505671490956299e+02 3 4.300332553173178e+02 1.788055146224819e+02 3.829208006453583e+02 7.955406370837679e+01 4 4.816257722022287e+02 1.951764152533999e+02 -9.415568848580530e+01 -4.301212128040066e+02 - ME 9.797271586219467e-03 + ME 2.307516153071828e-04 Event 161 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3340,7 +3340,7 @@ Event 161 Batch 1 2 6.868305165969147e+02 4.119610488151656e+00 5.515184990814985e+02 4.093244831537709e+02 3 3.260821955312833e+02 -1.956999890649130e+02 -2.483451099187458e+02 -7.972338993006402e+01 4 4.870872878718022e+02 1.915803785767614e+02 -3.031733891627526e+02 -3.296010932237070e+02 - ME 1.075603053132144e-03 + ME 9.860610555787331e-05 Event 162 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3348,7 +3348,7 @@ Event 162 Batch 1 2 2.159818802305119e+02 -2.018126805027919e+02 4.096951387107715e+01 -6.512536763314942e+01 3 6.870078865581224e+02 4.896730732821633e+02 -2.356527215298929e+02 -4.203188222421333e+02 4 5.970102332113654e+02 -2.878603927793715e+02 1.946832076588156e+02 4.854441898752826e+02 - ME 5.344822454174306e-05 + ME 2.809071549115161e-05 Event 163 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3356,7 +3356,7 @@ Event 163 Batch 1 2 4.889699854403287e+02 -4.067839821807834e+01 -2.740835242435768e+02 4.028835269878222e+02 3 4.282392920294498e+02 4.007468150560176e+02 -8.832740907173851e+01 -1.224301852772270e+02 4 5.827907225302220e+02 -3.600684168379390e+02 3.624109333153153e+02 -2.804533417105952e+02 - ME 4.336231422638298e-04 + ME 1.173701793303044e-04 Event 164 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3364,7 +3364,7 @@ Event 164 Batch 1 2 6.224346677404150e+02 -1.282049393554146e+02 5.480608628970117e+02 -2.657399098565701e+02 3 7.444531740822750e+02 1.794330131141779e+02 -6.708967511266460e+02 2.681638893170603e+02 4 1.331121581773107e+02 -5.122807375876333e+01 1.228358882296343e+02 -2.423979460490191e+00 - ME 1.368953177788070e-04 + ME 1.571413941583783e-05 Event 165 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3372,7 +3372,7 @@ Event 165 Batch 1 2 6.980339706506675e+02 -5.154669325341684e+01 -4.947847840614098e+02 4.896757907618869e+02 3 1.362964882116331e+02 4.252532371924361e+01 -5.641238783031591e+01 -1.165588780002596e+02 4 6.656695411377010e+02 9.021369534174053e+00 5.511971718917263e+02 -3.731169127616273e+02 - ME 1.450267418906797e-03 + ME 4.238311927693088e-04 Event 166 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3380,7 +3380,7 @@ Event 166 Batch 1 2 3.060640747281171e+02 -1.981167412190918e+02 -9.095380261170779e+01 -2.148310510107333e+02 3 5.580104478575086e+02 -3.585720992432471e+02 -1.558095186186280e+02 3.981521109704927e+02 4 6.359254774143739e+02 5.566888404623389e+02 2.467633212303362e+02 -1.833210599597597e+02 - ME 3.000804338470548e-04 + ME 1.099447007687216e-04 Event 167 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3388,7 +3388,7 @@ Event 167 Batch 1 2 2.833153623322893e+02 2.526850217013923e+02 8.687924899084067e+01 9.417998957332070e+01 3 6.595685044563415e+02 -8.780626893611850e+01 -2.875856231737449e+02 -5.870393347553995e+02 4 5.571161332113688e+02 -1.648787527652738e+02 2.007063741829043e+02 4.928593451820789e+02 - ME 7.367447958524992e-05 + ME 4.244421486768831e-05 Event 168 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3396,7 +3396,7 @@ Event 168 Batch 1 2 6.026267479353969e+02 -5.987968578530475e+02 5.775180228477150e+00 6.758674164241529e+01 3 4.991211680715713e+02 3.812575567959843e+02 3.220701575873951e+02 -5.952259631185711e+00 4 3.982520839930309e+02 2.175393010570631e+02 -3.278453378158730e+02 -6.163448201122968e+01 - ME 9.606399998327532e-05 + ME 1.203107058680061e-05 Event 169 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3404,7 +3404,7 @@ Event 169 Batch 1 2 5.510662376679772e+02 -9.251111075413947e+01 -5.291920243323356e+02 -1.227660134875281e+02 3 5.034535790022877e+02 -2.816014265681677e+02 3.283802195198170e+02 2.575511098657944e+02 4 4.454801833297348e+02 3.741125373223072e+02 2.008118048125185e+02 -1.347850963782663e+02 - ME 1.532484123791625e-04 + ME 2.085195230877358e-05 Event 170 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3412,7 +3412,7 @@ Event 170 Batch 1 2 2.814808559369750e+02 3.658097943502287e+01 -1.412301634042880e+02 -2.407225480659935e+02 3 6.646522150540470e+02 2.753499086551696e+02 -1.631412967142655e+02 5.825203104495404e+02 4 5.538669290089779e+02 -3.119308880901926e+02 3.043714601185535e+02 -3.417977623835468e+02 - ME 7.823510217753851e-04 + ME 2.587160315460459e-04 Event 171 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3420,7 +3420,7 @@ Event 171 Batch 1 2 1.777965289077954e+02 -6.143496808852239e+01 -1.603735842336773e+00 1.668375809551635e+02 3 7.439290290569696e+02 2.163074211412066e+01 -1.907051550939623e+01 -7.433699124308462e+02 4 5.782744420352348e+02 3.980422597440174e+01 2.067425135173305e+01 5.765323314756826e+02 - ME 2.063755640794395e-03 + ME 1.981167274383509e-03 Event 172 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3428,7 +3428,7 @@ Event 172 Batch 1 2 1.369499454750680e+02 -1.250080331667568e+01 -3.518152151649629e+01 -1.317622025690455e+02 3 6.692885586315896e+02 -2.346283187163472e+02 -6.130705295376303e+02 1.305421486874673e+02 4 6.937614958933425e+02 2.471291220330227e+02 6.482520510541266e+02 1.220053881578238e+00 - ME 5.039586079692636e-04 + ME 1.548169060571347e-04 Event 173 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3436,7 +3436,7 @@ Event 173 Batch 1 2 7.088772083623137e+02 4.973951266878932e+01 3.171232495758680e+01 -7.064185769505260e+02 3 5.785136264307895e+02 8.584813303397833e+01 5.766505028397120e+01 5.691949191590089e+02 4 2.126091652068944e+02 -1.355876457027672e+02 -8.937737524155732e+01 1.372236577915166e+02 - ME 1.743760900867476e-04 + ME 1.732961413682620e-04 Event 174 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3444,7 +3444,7 @@ Event 174 Batch 1 2 4.367208701713482e+02 -3.923163287174704e+01 4.325755195957351e+02 -4.543585887727652e+01 3 3.528978856725088e+02 9.622572295106905e+01 1.987077746703234e+02 -2.753048278549415e+02 4 7.103812441561454e+02 -5.699409007932221e+01 -6.312832942660567e+02 3.207406867322186e+02 - ME 9.353677491192390e-04 + ME 1.541208918572365e-04 Event 175 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3452,7 +3452,7 @@ Event 175 Batch 1 2 6.418562164876806e+02 1.962785648722137e+02 -6.110736372974047e+02 -6.567908015856712e+00 3 4.843421844702149e+02 -1.886631806266161e+02 3.569879071908527e+02 -2.674942804112337e+02 4 3.738015990421035e+02 -7.615384245597569e+00 2.540857301065516e+02 2.740621884270906e+02 - ME 3.029111560812189e-05 + ME 1.279055979705581e-05 Event 176 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3460,7 +3460,7 @@ Event 176 Batch 1 2 6.288652703123263e+02 4.005522031116294e+02 3.691482793515075e+02 3.142594606996526e+02 3 7.209127580467475e+02 -4.124575135572966e+02 -5.165298058232565e+02 -2.877341896975221e+02 4 1.502219716409257e+02 1.190531044566666e+01 1.473815264717492e+02 -2.652527100213051e+01 - ME 1.719274466020296e-04 + ME 1.300720357566141e-05 Event 177 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3468,7 +3468,7 @@ Event 177 Batch 1 2 4.716578040000077e+02 -4.521622645932388e+02 -1.012739918234145e+01 1.338200520767543e+02 3 3.021382980750606e+02 -2.714821202364266e+02 6.773215888881064e+01 -1.140059832109250e+02 4 7.262038979249317e+02 7.236443848296653e+02 -5.760475970646905e+01 -1.981406886582933e+01 - ME 2.354271252348000e-03 + ME 6.442260552556652e-04 Event 178 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3476,7 +3476,7 @@ Event 178 Batch 1 2 7.350088877399502e+02 -3.684484945749095e+02 -2.561732769425163e+02 -5.821159885132296e+02 3 1.415495174310248e+02 7.181268644032879e+01 1.095010133995263e+02 5.374692563910759e+01 4 6.234415948290248e+02 2.966358081345808e+02 1.466722635429900e+02 5.283690628741219e+02 - ME 1.035408980291912e-04 + ME 6.828487731379645e-05 Event 179 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3484,7 +3484,7 @@ Event 179 Batch 1 2 7.426064621425413e+02 6.748632301344054e+01 7.201624948975951e+02 -1.681544967131679e+02 3 5.821031882499326e+02 8.394276920418550e-01 -5.588194474899291e+02 1.629854049874919e+02 4 1.752903496075256e+02 -6.832575070548241e+01 -1.613430474076661e+02 5.169091725675888e+00 - ME 9.197132478706931e-05 + ME 1.412410550503903e-05 Event 180 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3492,7 +3492,7 @@ Event 180 Batch 1 2 6.099515195485484e+02 2.272495331206023e+02 1.762692760011278e+02 -5.378918555193875e+02 3 5.718889655176699e+02 4.324570510796980e+01 -3.278409766521432e+02 4.665909256493895e+02 4 3.181595149337819e+02 -2.704952382285720e+02 1.515717006510154e+02 7.130092986999803e+01 - ME 5.401477812349802e-05 + ME 3.043963963928669e-05 Event 181 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3500,7 +3500,7 @@ Event 181 Batch 1 2 1.206370886915177e+02 -8.151225636567759e+01 1.767749325039422e+01 8.715827822142556e+01 3 6.451493408002739e+02 -6.748216257939080e+01 4.373428479320614e+02 4.694625256943417e+02 4 7.342135705082084e+02 1.489944189450684e+02 -4.550203411824557e+02 -5.566208039157672e+02 - ME 7.131653341377736e-02 + ME 2.625479922313071e-02 Event 182 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3508,7 +3508,7 @@ Event 182 Batch 1 2 4.626866082364760e+02 -3.084610429505738e+02 3.306629079434072e+02 9.794245113140897e+01 3 4.974966719253473e+02 3.582955998671217e+02 1.664640547097976e+02 -3.023523113558579e+02 4 5.398167198381765e+02 -4.983455691654795e+01 -4.971269626532048e+02 2.044098602244489e+02 - ME 5.959042767905828e-05 + ME 1.414799589613471e-05 Event 183 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3516,7 +3516,7 @@ Event 183 Batch 1 2 3.304723045950491e+02 3.244647182058462e+00 3.209425641774955e+02 7.872284845075714e+01 3 4.379804819457451e+02 2.312428523500660e+02 3.131807483468383e+02 2.006775141049615e+02 4 7.315472134592065e+02 -2.344874995321247e+02 -6.341233125243344e+02 -2.794003625557186e+02 - ME 4.899988668912175e-03 + ME 2.330806393221907e-03 Event 184 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3524,7 +3524,7 @@ Event 184 Batch 1 2 7.470051035005908e+02 -4.953964753944513e+02 -4.028924750569613e+02 3.876552725878485e+02 3 2.183325716323390e+02 1.119040172022777e+02 1.451703047217021e+02 -1.186262424448778e+02 4 5.346623248670695e+02 3.834924581921736e+02 2.577221703352594e+02 -2.690290301429710e+02 - ME 5.441344453720516e-04 + ME 7.987999480474686e-05 Event 185 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3532,7 +3532,7 @@ Event 185 Batch 1 2 4.448583927494090e+02 2.810173563272025e+02 -3.384637477435971e+02 6.610995769032235e+01 3 6.236443795626774e+02 -1.690803760724666e+02 5.125139620028374e+02 3.125277225134823e+02 4 4.314972276879136e+02 -1.119369802547359e+02 -1.740502142592404e+02 -3.786376802038046e+02 - ME 6.949230823829164e-03 + ME 1.405605442011058e-04 Event 186 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3540,7 +3540,7 @@ Event 186 Batch 1 2 6.802792190696962e+02 -1.681815241656754e+02 5.427923640013703e+02 3.739936368565512e+02 3 6.331554869749547e+02 3.172201723440435e+02 -4.588808692389625e+02 -2.994755095011972e+02 4 1.865652939553488e+02 -1.490386481783679e+02 -8.391149476240778e+01 -7.451812735535422e+01 - ME 3.276943053321406e-04 + ME 3.045129627255903e-05 Event 187 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3548,7 +3548,7 @@ Event 187 Batch 1 2 7.472897115267965e+02 -6.988402471604775e+02 -2.391684329048669e+02 1.134137672609268e+02 3 6.826908170748527e+02 6.328852277257668e+02 2.212839847556716e+02 -1.286718241709738e+02 4 7.001947139835140e+01 6.595501943471052e+01 1.788444814919547e+01 1.525805691004725e+01 - ME 1.461490870437387e-04 + ME 3.485925693242860e-05 Event 188 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3556,7 +3556,7 @@ Event 188 Batch 1 2 6.496068877140275e+02 -5.024316730938291e+02 -3.980061777252906e+02 -1.055585379310702e+02 3 4.885976180718368e+02 4.424928723138696e+02 1.459942636040002e+02 -1.470148473169288e+02 4 3.617954942141354e+02 5.993880077995960e+01 2.520119141212904e+02 2.525733852479991e+02 - ME 2.843805826594158e-05 + ME 1.006519408431335e-05 Event 189 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3564,7 +3564,7 @@ Event 189 Batch 1 2 4.082379946778654e+02 2.679237131173331e+02 -7.718184435750955e+01 2.981913934867987e+02 3 5.864211573889181e+02 -5.780822197382728e+02 -6.394893886953379e+01 7.497502433004084e+01 4 5.053408479332167e+02 3.101585066209396e+02 1.411307832270433e+02 -3.731664178168398e+02 - ME 1.937644878671120e-03 + ME 1.322787627040098e-04 Event 190 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3572,7 +3572,7 @@ Event 190 Batch 1 2 6.472516823166364e+02 6.463779961822676e+02 -3.289365889632791e+01 6.945035458816692e+00 3 4.318767277050750e+02 -3.286790725415815e+02 -7.183748821760624e+00 -2.800642229191639e+02 4 4.208715899782885e+02 -3.176989236406859e+02 4.007740771808847e+01 2.731191874603472e+02 - ME 3.409584379294133e-05 + ME 1.272332211942340e-05 Event 191 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3580,7 +3580,7 @@ Event 191 Batch 1 2 6.757500036387052e+02 6.222744522021635e+02 -2.261571472854044e+02 1.351499844096745e+02 3 3.644673602666567e+02 -2.020102809038697e+02 1.114149692296405e+02 -2.821613151026251e+02 4 4.597826360946380e+02 -4.202641712982938e+02 1.147421780557637e+02 1.470113306929507e+02 - ME 5.389305783035389e-05 + ME 1.560703181590231e-05 Event 192 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3588,7 +3588,7 @@ Event 192 Batch 1 2 7.394562478491531e+02 -7.307873850878615e+02 3.988568028534699e+01 1.056147375500683e+02 3 8.098058518630978e+01 5.419286926826393e+01 4.244928426361276e+00 -6.002473390399248e+01 4 6.795631669645365e+02 6.765945158195976e+02 -4.413060871170821e+01 -4.559000364607596e+01 - ME 4.204295748489254e-04 + ME 1.231033846344155e-04 Event 193 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3596,7 +3596,7 @@ Event 193 Batch 1 2 5.607395612273153e+02 -3.164229781907934e+02 -3.517992386171808e+02 -3.009030576558548e+02 3 3.741643617741927e+02 -2.156271676189966e+02 1.666697084176705e+02 2.563690747778811e+02 4 5.650960769984922e+02 5.320501458097899e+02 1.851295301995104e+02 4.453398287797368e+01 - ME 9.141090879934244e-05 + ME 3.026844143728605e-05 Event 194 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3604,7 +3604,7 @@ Event 194 Batch 1 2 5.729373416862012e+02 -2.155045544874616e+02 -1.679805246197324e+02 5.035846779262559e+02 3 2.831035485618876e+02 -2.543279085173982e+02 1.042261812492671e+02 -6.783684323208054e+01 4 6.439591097519118e+02 4.698324630048598e+02 6.375434337046515e+01 -4.357478346941756e+02 - ME 1.781231321893996e-03 + ME 5.497724763810379e-04 Event 195 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3612,7 +3612,7 @@ Event 195 Batch 1 2 5.572874060171201e+02 -5.433144409127298e+02 3.646295232533866e+01 1.185290019729285e+02 3 6.765845568040619e+02 5.574999049241243e+02 -1.212989803269169e+01 -3.831623469093195e+02 4 2.661280371788181e+02 -1.418546401139455e+01 -2.433305429264712e+01 2.646333449363910e+02 - ME 3.395618115588225e-04 + ME 3.378534889977447e-04 Event 196 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3620,7 +3620,7 @@ Event 196 Batch 1 2 5.405888343305829e+02 3.940239871950471e+02 -8.826690628749978e+01 -3.594305754554688e+02 3 6.983754392688073e+02 -3.888370902622853e+02 -5.513072771506098e+01 5.774898910559966e+02 4 2.610357264006097e+02 -5.186896932761887e+00 1.433976340025607e+02 -2.180593156005277e+02 - ME 5.539073969003598e-03 + ME 2.676929502290073e-04 Event 197 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3628,7 +3628,7 @@ Event 197 Batch 1 2 2.783346334111661e+02 2.282410890438732e+02 -1.474467226896361e+02 6.029624695020830e+01 3 6.434654504578666e+02 1.172104173128919e+01 6.205939438823057e+02 1.696277097949658e+02 4 5.781999161309674e+02 -2.399621307751624e+02 -4.731472211926695e+02 -2.299239567451741e+02 - ME 3.321087064690878e-04 + ME 4.280180350752636e-05 Event 198 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3636,7 +3636,7 @@ Event 198 Batch 1 2 4.349536439683943e+02 1.774777254208009e+02 -9.709992209949135e+01 3.850427697141142e+02 3 4.134500153047116e+02 7.095914770071803e+01 -4.041194890923881e+02 -5.092301099466194e+01 4 6.515963407268921e+02 -2.484368731215197e+02 5.012194111918782e+02 -3.341197587194521e+02 - ME 7.849443582399766e-04 + ME 2.926862112764983e-04 Event 199 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3644,7 +3644,7 @@ Event 199 Batch 1 2 6.682109290882580e+02 2.136897997740939e+02 -5.035763266519416e+02 3.837361052354048e+02 3 1.424120473397155e+02 8.952788458880865e+01 -4.686863299276860e+01 -1.003458038481504e+02 4 6.893770235720265e+02 -3.032176843629025e+02 5.504449596447103e+02 -2.833903013872543e+02 - ME 1.167594898598604e-03 + ME 4.183851150998592e-04 Event 200 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3652,7 +3652,7 @@ Event 200 Batch 1 2 5.959952693237885e+02 -4.878566955018547e+02 -2.510837703973929e+01 -3.414319479966339e+02 3 4.479637599869168e+02 4.499951041477978e+01 7.146287716862105e+01 4.399313940955211e+02 4 4.560409706892941e+02 4.428571850870749e+02 -4.635450012888173e+01 -9.849944609888662e+01 - ME 5.545496796633981e-04 + ME 3.228844805909175e-04 Event 201 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3660,7 +3660,7 @@ Event 201 Batch 1 2 5.203096708642927e+02 -1.112696379946441e+02 1.367824427202020e+02 4.895219960522141e+02 3 2.871951825199399e+02 -2.582762312778227e+02 1.200876310962787e+02 3.678888524092984e+01 4 6.924951466157675e+02 3.695458692724667e+02 -2.568700738164807e+02 -5.263108812931440e+02 - ME 6.577575910850049e-03 + ME 2.285182473348715e-03 Event 202 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3668,7 +3668,7 @@ Event 202 Batch 1 2 2.158792376054218e+02 2.112389782008981e+01 -7.195062193526132e+01 -2.024369881546198e+02 3 5.463652944256570e+02 2.787950008966254e+02 -3.108926376755554e+02 -3.523267663221479e+02 4 7.377554679689213e+02 -2.999188987167153e+02 3.828432596108168e+02 5.547637544767679e+02 - ME 8.695282964050810e-03 + ME 1.952686275320307e-03 Event 203 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3676,7 +3676,7 @@ Event 203 Batch 1 2 7.124273471334275e+02 4.879265047129839e+02 -1.059167473143779e+02 -5.081949365946950e+02 3 6.746108110440506e+02 -5.248642991835990e+02 4.352799102536777e+01 4.215714978711400e+02 4 1.129618418225217e+02 3.693779447061509e+01 6.238875628901040e+01 8.662343872355494e+01 - ME 5.361938367485652e-05 + ME 4.211918129012132e-05 Event 204 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3684,7 +3684,7 @@ Event 204 Batch 1 2 7.084787759842808e+02 4.992472551829619e+02 -4.528122431715626e+02 -2.183012291454193e+02 3 1.034373169902747e+02 -8.959882065299325e+01 -3.938861547415055e+01 -3.346441176487074e+01 4 6.880839070254444e+02 -4.096484345299685e+02 4.922008586457131e+02 2.517656409102901e+02 - ME 2.988048706021647e-04 + ME 1.033102023766027e-04 Event 205 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3692,7 +3692,7 @@ Event 205 Batch 1 2 6.496569846879349e+02 -5.869603795046561e+02 -2.345911576090251e+02 1.499956646614410e+02 3 2.543878192344406e+02 -1.851019090219859e+00 2.474675926596849e+02 -5.890268997594536e+01 4 5.959551960776247e+02 5.888113985948760e+02 -1.287643505065981e+01 -9.109297468549572e+01 - ME 1.871447246980874e-04 + ME 4.134215827558992e-05 Event 206 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3700,7 +3700,7 @@ Event 206 Batch 1 2 6.172060642836410e+02 2.978040691523503e+02 4.166709400833434e+02 3.444435946201744e+02 3 7.205754982426181e+02 -2.468045809177361e+02 -5.690387091428452e+02 -3.667580878490107e+02 4 1.622184374737409e+02 -5.099948823461420e+01 1.523677690595017e+02 2.231449322883641e+01 - ME 7.356489425273393e-05 + ME 1.138691716042452e-05 Event 207 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3708,7 +3708,7 @@ Event 207 Batch 1 2 5.250113096394139e+02 -1.091977068802181e+02 -4.322753509449321e+02 2.772196909074646e+02 3 5.240251005653129e+02 3.541948269240045e+02 3.738549241960732e+02 9.685466564450643e+01 4 4.509635897952731e+02 -2.449971200437864e+02 5.842042674885889e+01 -3.740743565519710e+02 - ME 3.378615964480245e-03 + ME 9.518274156960593e-05 Event 208 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3716,7 +3716,7 @@ Event 208 Batch 1 2 4.449444343820048e+02 1.928662436733418e+02 -3.595193210859464e+02 1.775500478872298e+02 3 4.894053462810564e+02 -2.195789585225567e+02 2.295326432211599e+02 3.723136307450180e+02 4 5.656502193369389e+02 2.671271484921488e+01 1.299866778647865e+02 -5.498636786322478e+02 - ME 2.068943926258950e-01 + ME 2.179806976662403e-03 Event 209 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3724,7 +3724,7 @@ Event 209 Batch 1 2 4.949423498078044e+02 -2.830370809537592e+02 -1.684680620467476e+02 -3.694271951395289e+02 3 6.326444171345161e+02 3.898538983719823e+02 -1.748162179498052e+02 4.665749526039372e+02 4 3.724132330576786e+02 -1.068168174182231e+02 3.432842799965525e+02 -9.714775746440780e+01 - ME 1.473942246791387e-04 + ME 3.638076645868775e-05 Event 210 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3732,7 +3732,7 @@ Event 210 Batch 1 2 5.469464199121014e+02 -4.947084169679945e+02 2.319240083666633e+02 -2.500445517953792e+01 3 2.929141603572806e+02 -5.602902696925145e+01 2.099470855189298e+01 2.867379913571110e+02 4 6.601394197306178e+02 5.507374439372461e+02 -2.529187169185561e+02 -2.617335361775729e+02 - ME 1.577330101330874e-03 + ME 7.792286450853471e-04 Event 211 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3740,7 +3740,7 @@ Event 211 Batch 1 2 5.484404249965427e+02 1.659778109685243e+01 3.514591842057613e+02 -4.206992456262192e+02 3 4.635537606517395e+02 -3.607884938122542e+02 -3.140996451540818e+01 2.893564685231623e+02 4 4.880058143517181e+02 3.441907127154018e+02 -3.200492196903532e+02 1.313427771030569e+02 - ME 4.999214184618137e-05 + ME 1.717788621912363e-05 Event 212 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3748,7 +3748,7 @@ Event 212 Batch 1 2 6.930853388432640e+02 -3.424793196872474e+02 -8.152110066892747e+01 5.970171795281683e+02 3 9.131624224772825e+01 6.738328155058525e+01 1.365968298972706e+01 6.009627714210347e+01 4 7.155984189090078e+02 2.750960381366621e+02 6.786141767920034e+01 -6.571134566702718e+02 - ME 3.224436999651524e-01 + ME 4.440767413899675e-02 Event 213 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3756,7 +3756,7 @@ Event 213 Batch 1 2 7.316448870278512e+02 4.203233031264803e+02 4.913598772661251e+02 -3.423419819067778e+02 3 4.750162603483208e+02 -1.726357548525294e+02 -3.708603862154638e+02 2.414537588813190e+02 4 2.933388526238279e+02 -2.476875482739507e+02 -1.204994910506614e+02 1.008882230254589e+02 - ME 4.008080891216109e-05 + ME 1.166473784051930e-05 Event 214 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3764,7 +3764,7 @@ Event 214 Batch 1 2 4.805779599533694e+02 3.904513572450257e+02 -1.742898429406511e+02 2.193763065287195e+02 3 6.164938851206517e+02 -5.563771061772993e+02 2.227142270499353e+02 1.445946028815716e+02 4 4.029281549259790e+02 1.659257489322735e+02 -4.842438410928419e+01 -3.639709094102910e+02 - ME 1.130096726278085e-02 + ME 1.644694060635318e-04 Event 215 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3772,7 +3772,7 @@ Event 215 Batch 1 2 4.610896439725640e+02 -3.106576460930037e+02 -3.050258363865880e+02 -1.518378274323046e+02 3 7.153470686812809e+02 2.726436938726979e+02 6.046054769368644e+02 2.680280994976061e+02 4 3.235632873461531e+02 3.801395222030658e+01 -2.995796405502758e+02 -1.161902720653026e+02 - ME 2.130646114222361e-04 + ME 1.638803663744001e-05 Event 216 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3780,7 +3780,7 @@ Event 216 Batch 1 2 5.309452696424389e+02 -4.912950836090372e+02 -3.608909251460832e+01 -1.980646298023531e+02 3 6.627369363365399e+02 4.479096066616000e+02 2.308759280187052e+02 4.304573578259469e+02 4 3.063177940210212e+02 4.338547694743724e+01 -1.947868355040969e+02 -2.323927280235938e+02 - ME 1.881406502208647e-03 + ME 7.684209531203918e-05 Event 217 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3788,7 +3788,7 @@ Event 217 Batch 1 2 4.608032244164870e+02 2.215832851737383e+02 3.318832460795877e+02 -2.304212888079594e+02 3 3.107022283044695e+02 -4.724697178681157e+01 2.830528592337836e+02 -1.190994425256424e+02 4 7.284945472790432e+02 -1.743363133869267e+02 -6.149361053133712e+02 3.495207313336019e+02 - ME 2.894775763457067e-03 + ME 4.426756984161849e-04 Event 218 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3796,7 +3796,7 @@ Event 218 Batch 1 2 6.336891602166270e+02 5.249943224110900e+02 1.648031440577737e+02 -3.142973702098814e+02 3 5.195346944320743e+02 -3.655895580768890e+02 -3.610279413409480e+02 7.693763263116504e+01 4 3.467761453512956e+02 -1.594047643342018e+02 1.962247972831736e+02 2.373597375787177e+02 - ME 2.703962034458943e-05 + ME 8.957256945094420e-06 Event 219 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3804,7 +3804,7 @@ Event 219 Batch 1 2 2.579228498517417e+02 -4.166553381892272e+01 1.191899344508913e+02 2.249042891828000e+02 3 7.453266221408651e+02 -3.354388163550532e+01 -3.947818065141064e+02 -6.312954196904914e+02 4 4.967505280073930e+02 7.520941545442813e+01 2.755918720632151e+02 4.063911305076915e+02 - ME 6.103184694489295e-05 + ME 4.019449398167179e-05 Event 220 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3812,7 +3812,7 @@ Event 220 Batch 1 2 4.940336288355577e+02 -2.383755021420815e+02 -2.918661661143953e+02 3.194690712363630e+02 3 7.129224521449780e+02 2.727447507998269e+02 2.535039959962389e+02 -6.079510240944473e+02 4 2.930439190194635e+02 -3.436924865774512e+01 3.836217011815621e+01 2.884819528580837e+02 - ME 1.761519882509421e-04 + ME 1.677977866215262e-04 Event 221 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3820,7 +3820,7 @@ Event 221 Batch 1 2 3.305414381337777e+02 -2.712796684963201e+02 -1.199910663213094e+02 -1.458325333632650e+02 3 7.388441803280767e+02 5.510455284380058e+02 4.375213740715825e+02 2.254209298704556e+02 4 4.306143815381457e+02 -2.797658599416856e+02 -3.175303077502730e+02 -7.958839650719051e+01 - ME 1.338118621913618e-04 + ME 1.392897982206581e-05 Event 222 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3828,7 +3828,7 @@ Event 222 Batch 1 2 4.657562074797755e+02 2.823280548971349e+02 2.956503281023745e+02 2.231828795335844e+02 3 4.791948192186352e+02 -3.228825926298714e+02 2.575611801233854e+02 -2.429747818931873e+02 4 5.550489733015891e+02 4.055453773273638e+01 -5.532115082257600e+02 1.979190235960287e+01 - ME 9.040551632672907e-05 + ME 2.328731171682892e-05 Event 223 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3836,7 +3836,7 @@ Event 223 Batch 1 2 1.612164685986321e+02 -4.527922182271191e+01 -1.095260585492910e+01 1.543391792239740e+02 3 6.984218503485876e+02 -4.629950983513680e+02 2.605715575888556e+02 -4.533553609726805e+02 4 6.403616810527805e+02 5.082743201740799e+02 -2.496189517339264e+02 2.990161817487066e+02 - ME 4.148580235863498e-04 + ME 2.446487784841432e-04 Event 224 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3844,7 +3844,7 @@ Event 224 Batch 1 2 1.663853414671972e+02 -1.350882138037309e+02 9.706071747767010e+01 3.804401292344658e+00 3 6.436745581417563e+02 -4.469273298203079e+02 -4.412749113764766e+02 -1.408877256838118e+02 4 6.899401003910457e+02 5.820155436240389e+02 3.442141938988058e+02 1.370833243914657e+02 - ME 3.449215697364171e-04 + ME 9.431632941984795e-05 Event 225 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3852,7 +3852,7 @@ Event 225 Batch 1 2 6.702356777533546e+02 6.117158080352369e+02 -2.649249521350114e+02 -6.952987609335720e+01 3 6.901224376513153e+02 -6.564819557015361e+02 1.560869289536550e+02 1.446972404640001e+02 4 1.396418845953297e+02 4.476614766629927e+01 1.088380231813564e+02 -7.516736437064299e+01 - ME 6.407468428023662e-04 + ME 2.456039108263569e-05 Event 226 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3860,7 +3860,7 @@ Event 226 Batch 1 2 7.307777643673112e+02 -4.569648094661606e+02 4.416236342013199e+02 -3.608155616351098e+02 3 1.446420186345137e+02 4.133161435221925e+01 -3.411742569426914e+01 1.343466131828505e+02 4 6.245802169981752e+02 4.156331951139413e+02 -4.075062085070508e+02 2.264689484522593e+02 - ME 4.858390443010437e-04 + ME 2.774761612267077e-04 Event 227 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3868,7 +3868,7 @@ Event 227 Batch 1 2 7.408615397889290e+02 -4.398089081634772e+02 -5.325812259979131e+02 2.679574278743413e+02 3 4.035753807128123e+02 3.000971513323747e+02 2.468113220276344e+02 -1.090823496201683e+02 4 3.555630794982585e+02 1.397117568311025e+02 2.857699039702786e+02 -1.588750782541728e+02 - ME 3.215647103618368e-04 + ME 3.077346064218035e-05 Event 228 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3876,7 +3876,7 @@ Event 228 Batch 1 2 5.775455372723294e+02 -3.656199842755111e+02 -6.289501053880601e+01 4.426342647953073e+02 3 3.247306314578497e+02 8.776645762339835e+01 3.116872137482897e+02 2.445634292125525e+01 4 5.977238312698206e+02 2.778535266521127e+02 -2.487922032094836e+02 -4.670906077165625e+02 - ME 3.156934429573604e-03 + ME 3.399241079583280e-04 Event 229 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3884,7 +3884,7 @@ Event 229 Batch 1 2 3.665477125629453e+02 -2.081014917770363e+02 2.317985113364040e+02 -1.931850016112187e+02 3 6.187040836990479e+02 -2.134593092471877e+02 -3.484367286517815e+02 4.645661552545953e+02 4 5.147482037380067e+02 4.215608010242241e+02 1.166382173153775e+02 -2.713811536433765e+02 - ME 4.392210547845218e-04 + ME 8.330968691049859e-05 Event 230 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3892,7 +3892,7 @@ Event 230 Batch 1 2 5.913978529013565e+02 -4.986092821675885e+02 -3.028328044703767e+02 9.712104143419764e+01 3 3.439186614041002e+02 -6.573524045766426e+01 3.216488491089061e+02 -1.024741025375549e+02 4 5.646834856945436e+02 5.643445226252528e+02 -1.881604463852933e+01 5.353061103357447e+00 - ME 1.067159092411647e-04 + ME 2.296146042402505e-05 Event 231 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3900,7 +3900,7 @@ Event 231 Batch 1 2 5.760768557894827e+02 -7.075794524290799e+01 5.609870884449791e+02 1.102331327656218e+02 3 6.038619762337338e+02 -2.467027894308989e+02 -5.464177649873398e+02 -7.221250677108812e+01 4 3.200611679767834e+02 3.174607346738069e+02 -1.456932345763944e+01 -3.802062599453370e+01 - ME 8.750887998909065e-05 + ME 9.438631267217403e-06 Event 232 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3908,7 +3908,7 @@ Event 232 Batch 1 2 7.230187249684843e+02 -2.426041066061352e+02 1.884455685697195e+02 -6.545132479937492e+02 3 4.821326920133732e+02 2.438648429837413e+02 -1.563760752388986e+01 4.156168142598493e+02 4 2.948485830181424e+02 -1.260736377606032e+00 -1.728079610458298e+02 2.388964337338999e+02 - ME 4.549716999825542e-05 + ME 3.745272037455064e-05 Event 233 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3916,7 +3916,7 @@ Event 233 Batch 1 2 3.540260977608100e+02 -1.904526694678991e+02 -1.042089619355360e+02 -2.796475475319170e+02 3 4.925592302096041e+02 1.195034224421750e+02 3.554637678715695e+02 -3.193415679485398e+02 4 6.534146720295859e+02 7.094924702572415e+01 -2.512548059360335e+02 5.989891154804569e+02 - ME 2.494643034161164e-04 + ME 1.035644942794080e-04 Event 234 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3924,7 +3924,7 @@ Event 234 Batch 1 2 1.866526101194276e+02 7.776953530733704e+01 -1.047503781897390e+01 1.693557493124073e+02 3 6.012752698516817e+02 5.974840035795012e+02 -4.570329760029643e+01 4.955829083294186e+01 4 7.120721200288899e+02 -6.752535388868379e+02 5.617833541927040e+01 -2.189140401453492e+02 - ME 2.154454342135980e-03 + ME 6.655948749153013e-04 Event 235 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3932,7 +3932,7 @@ Event 235 Batch 1 2 5.032945404607945e+02 1.612889276925247e+02 2.561838854094329e+02 -4.020710050699558e+02 3 7.153634726767370e+02 -3.739069589148947e+02 -1.979140468542061e+02 5.768609140624169e+02 4 2.813419868624690e+02 2.126180312223700e+02 -5.826983855522722e+01 -1.747899089924609e+02 - ME 8.184939555880423e-04 + ME 1.137471703441233e-04 Event 236 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3940,7 +3940,7 @@ Event 236 Batch 1 2 6.980797829886610e+02 -9.803971882836288e+00 4.740144261428889e+02 5.123764137440797e+02 3 5.519387921056282e+02 -1.638876688381594e+02 -3.209728652821290e+02 -4.180355032606608e+02 4 2.499814249057108e+02 1.736916407209956e+02 -1.530415608607599e+02 -9.434091048341891e+01 - ME 2.813360227943072e-04 + ME 5.842524801707843e-05 Event 237 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3948,7 +3948,7 @@ Event 237 Batch 1 2 1.604490925133743e+02 6.212857081252698e+01 9.075394990141041e+01 1.168232534834160e+02 3 6.578242662283152e+02 5.348507070161563e+02 -3.810396531957998e+02 3.842224792439630e+01 4 6.817266412583107e+02 -5.969792778286832e+02 2.902857032943894e+02 -1.552455014078122e+02 - ME 8.205069948818567e-04 + ME 1.834055676127939e-04 Event 238 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3956,7 +3956,7 @@ Event 238 Batch 1 2 2.789018340499539e+02 1.069933592962543e+02 -2.572713415352736e+02 1.225197647611563e+01 3 4.761759619803052e+02 7.755191627191856e+01 -4.591043622469822e+02 -9.976187456245104e+01 4 7.449222039697408e+02 -1.845452755681728e+02 7.163757037822556e+02 8.750989808633538e+01 - ME 4.130258343824905e-02 + ME 9.445005309896021e-03 Event 239 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3964,7 +3964,7 @@ Event 239 Batch 1 2 4.581461811054764e+02 -3.899520773556200e+02 2.006122777919944e+02 1.326273524830990e+02 3 3.013476461129690e+02 -2.996604136348060e+02 3.145663680794619e+01 4.951799549362093e+00 4 7.405061727815548e+02 6.896124909904260e+02 -2.320689145999406e+02 -1.375791520324611e+02 - ME 1.351152256907066e-02 + ME 4.970363634614722e-03 Event 240 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3972,7 +3972,7 @@ Event 240 Batch 1 2 5.932490652975304e+02 -4.094504138983958e+01 -3.300190662632461e+02 4.912793227530680e+02 3 3.147487537014150e+02 3.081803657249563e+02 4.097350029662016e+01 -4.912038692507519e+01 4 5.920021810010543e+02 -2.672353243351168e+02 2.890455659666260e+02 -4.421589358279927e+02 - ME 2.300291351402201e-03 + ME 3.420638167820422e-04 Event 241 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3980,7 +3980,7 @@ Event 241 Batch 1 2 4.438703186026563e+01 1.425431959717181e+01 -4.430288595443099e+00 -4.180186016371768e+01 3 7.139617398095604e+02 -8.415544716076485e+01 -5.657765076565163e+02 -4.272659242311072e+02 4 7.416512283301737e+02 6.990112756359306e+01 5.702067962519594e+02 4.690677843948249e+02 - ME 9.657825758456334e-03 + ME 9.983667466725972e-03 Event 242 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3988,7 +3988,7 @@ Event 242 Batch 1 2 3.798759956195423e+02 -1.259218082844715e+02 -3.429343473884153e+02 1.041417477651927e+02 3 6.208895880511435e+02 5.354328139337265e+02 1.248673426784089e+02 -2.884852319370315e+02 4 4.992344163293142e+02 -4.095110056492549e+02 2.180670047100064e+02 1.843434841718389e+02 - ME 4.523810239016752e-05 + ME 1.030886114253601e-05 Event 243 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3996,7 +3996,7 @@ Event 243 Batch 1 2 2.320641800899440e+02 1.658639294991472e+02 7.783463994856535e+01 1.424243988788334e+02 3 6.251485586341132e+02 -2.328139095298017e+02 -4.262931976140131e+02 3.935511574875350e+02 4 6.427872612759426e+02 6.694998003065477e+01 3.484585576654476e+02 -5.359755563663684e+02 - ME 1.068434238404496e-02 + ME 8.493072129055412e-04 Event 244 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4004,7 +4004,7 @@ Event 244 Batch 1 2 6.609991843787810e+02 -2.293678857540617e+02 -4.971623496474938e+02 -3.703240376037023e+02 3 1.091403980947070e+02 1.154537470975927e+01 -9.115666825632124e+00 -1.081445118228680e+02 4 7.298604175265119e+02 2.178225110443025e+02 5.062780164731259e+02 4.784685494265703e+02 - ME 2.129811247265830e-03 + ME 9.635755455313371e-04 Event 245 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4012,7 +4012,7 @@ Event 245 Batch 1 2 4.893629130846664e+02 -3.546974954177181e+02 3.112856868655738e+02 -1.294873298810978e+02 3 7.129026631852477e+02 5.703735458058533e+02 -4.257115617679147e+02 -4.091322034012423e+01 4 2.977344237300874e+02 -2.156760503881352e+02 1.144258749023406e+02 1.704005502212233e+02 - ME 2.548352504440589e-05 + ME 5.312368446054512e-06 Event 246 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4020,7 +4020,7 @@ Event 246 Batch 1 2 3.999457395350199e+02 9.605025124341067e+01 9.072234098128430e+01 3.774922524438975e+02 3 3.675469088581873e+02 -1.615841482674670e+01 2.570183669846762e+02 2.622426259669196e+02 4 7.325073516067924e+02 -7.989183641666393e+01 -3.477407079659604e+02 -6.397348784108170e+02 - ME 1.294421983622042e-01 + ME 5.023802198964801e-02 Event 247 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4028,7 +4028,7 @@ Event 247 Batch 1 2 6.711864521923226e+02 3.763073240556692e+02 5.338170415278108e+02 1.546719678644905e+02 3 5.231557804938882e+02 -1.057595517177888e+02 -5.121603131388773e+02 -1.409615302513522e+01 4 3.056577673137891e+02 -2.705477723378804e+02 -2.165672838893370e+01 -1.405758148393554e+02 - ME 2.873345328272106e-04 + ME 1.980507958825256e-05 Event 248 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4036,7 +4036,7 @@ Event 248 Batch 1 2 6.307803946875938e+02 -6.240065811552291e+01 -3.654556314590158e+02 5.103256270499047e+02 3 3.935347424219227e+02 -2.188782290807617e+02 2.916853933646314e+01 -3.257470040392325e+02 4 4.756848628904837e+02 2.812788871962847e+02 3.362870921225527e+02 -1.845786230106721e+02 - ME 2.418190194667681e-04 + ME 8.712398839363553e-05 Event 249 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4044,7 +4044,7 @@ Event 249 Batch 1 2 4.326970760901858e+02 -4.070406664121577e+02 -1.467447404863359e+02 3.261392852829594e+00 3 4.839435229991528e+02 2.335311811831339e+01 2.018595963184923e+02 -4.392136936630267e+02 4 5.833594009106607e+02 3.836875482938447e+02 -5.511485583215654e+01 4.359523008101972e+02 - ME 8.354140201035124e-05 + ME 2.487145538635957e-05 Event 250 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4052,7 +4052,7 @@ Event 250 Batch 1 2 7.010671671345858e+02 -6.122994886156980e+02 -2.473946684860857e+02 2.353303785738851e+02 3 5.574643785654457e+02 3.902114201641945e+02 2.260985614407801e+02 -3.276904354069721e+02 4 2.414684542999681e+02 2.220880684515034e+02 2.129610704530562e+01 9.236005683308701e+01 - ME 4.704118057291807e-05 + ME 1.645582299148298e-05 Event 251 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4060,7 +4060,7 @@ Event 251 Batch 1 2 7.364006127103795e+02 5.379960890463808e+02 4.302640987755426e+02 2.602285070392761e+02 3 3.051282143252570e+01 -2.901685968644106e+00 1.337962970917706e+01 -2.726899336532026e+01 4 7.330865658570956e+02 -5.350944030777371e+02 -4.436437284847198e+02 -2.329595136739561e+02 - ME 8.340546584740779e-03 + ME 6.389613086136084e-03 Event 252 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4068,7 +4068,7 @@ Event 252 Batch 1 2 5.965625584838610e+02 -7.369842915522101e+01 -5.671364104158780e+02 -1.697401534860145e+02 3 6.549338760881149e+02 -1.514014639568436e+02 6.313240788068730e+02 8.628954906696529e+01 4 2.485035654280235e+02 2.250998931120648e+02 -6.418766839099484e+01 8.345060441904938e+01 - ME 3.985162011735342e-05 + ME 7.225550854378042e-06 Event 253 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4076,7 +4076,7 @@ Event 253 Batch 1 2 5.728678540484714e+02 3.212236187283236e+01 -4.622666283104808e+02 -3.368312580807653e+02 3 7.160302400837320e+02 1.132435775281999e+02 5.206369974620781e+02 4.783433011307397e+02 4 2.111019058677967e+02 -1.453659394010323e+02 -5.837036915159722e+01 -1.415120430499744e+02 - ME 1.248429186447426e-03 + ME 7.499676590470843e-05 Event 254 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4084,7 +4084,7 @@ Event 254 Batch 1 2 5.579357369440610e+02 1.333150067790222e+02 -6.785864805882139e+01 5.375077668373273e+02 3 6.202682598689536e+02 -4.039338689731095e+02 2.012068793592834e+02 -4.255419314189536e+02 4 3.217960031869852e+02 2.706188621940872e+02 -1.333482313004621e+02 -1.119658354183736e+02 - ME 6.088720978226072e-04 + ME 2.226893396847405e-04 Event 255 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4092,5 +4092,5 @@ Event 255 Batch 1 2 7.263612771087843e+02 3.396063850675520e+02 -6.401091575508393e+02 5.028393902637355e+01 3 1.540578578981475e+02 -3.080387127739228e+01 1.060177193258910e+02 -1.074485378375538e+02 4 6.195808649930684e+02 -3.088025137901597e+02 5.340914382249483e+02 5.716459881118030e+01 - ME 1.547064591142216e-04 + ME 4.003666322732326e-05 diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 386e592a4e..71c2006493 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.004611015319824219  +DEBUG: model prefixing takes 0.005433082580566406  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,15 +169,15 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.071 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -189,96 +189,44 @@ INFO: Processing color information for process: g u~ > t t~ u~ @1 INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=1 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.027 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=1 [output.py at line 192]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. +Generated helas calls for 2 subprocesses (10 diagrams) in 0.029 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.123 s +ALOHA: aloha creates 2 routines in 0.142 s FFV1 FFV1 FFV1 FFV1 VVV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 2 , keys size = 2 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.730s -user 0m0.605s -sys 0m0.063s +real 0m0.651s +user 0m0.593s +sys 0m0.051s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc index 0b2899d317..037662f7db 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc @@ -243,26 +243,19 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); -#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz -#else - if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz - else - ixxxxx( momenta, 0, cHel[ihel][1], +1, w_fp[1], 1 ); -#endif + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -272,11 +265,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -286,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -299,10 +292,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -315,7 +308,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -853,13 +846,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc index e37fd43d6a..12179b9801 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc @@ -243,19 +243,19 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); + ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); - FFV1_2( w_fp[4], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -265,11 +265,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[0], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[0], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -292,10 +292,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[0], COUPs[0], 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -308,7 +308,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -846,13 +846,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gq_ttq.sa/mg5.in b/epochX/cudacpp/gq_ttq.sa/mg5.in index ae4d2d2c15..c0952db410 100644 --- a/epochX/cudacpp/gq_ttq.sa/mg5.in +++ b/epochX/cudacpp/gq_ttq.sa/mg5.in @@ -1,4 +1,5 @@ +set stdout_level DEBUG +set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ generate g q > t t~ q -output standalone_cudacpp gq_ttq.sa --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp - +output standalone_cudacpp gq_ttq.sa diff --git a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h index 901400d447..0dd5f20f71 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -885,6 +887,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -897,6 +900,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -910,6 +914,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //========================================================================== @@ -921,6 +926,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -944,6 +950,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -975,6 +982,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1006,6 +1014,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1037,6 +1046,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); diff --git a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc index d5eda63ee0..3452d1e8da 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc +++ b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc @@ -111,8 +111,8 @@ Parameters_sm::setDependentParameters() // now computed event-by-event (running void Parameters_sm::setDependentCouplings() // now computed event-by-event (running alphas #373) { - GC_11 = mdl_complexi * G; GC_10 = -G; + GC_11 = mdl_complexi * G; } */ @@ -195,7 +195,7 @@ void Parameters_sm::printDependentCouplings() // now computed event-by-event (running alphas #373) { std::cout << "sm model couplings dependent on event kinematics:" << std::endl; - std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; + std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; } */ diff --git a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h index 0c77cf58f0..4f6f322ed9 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h +++ b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h @@ -54,7 +54,7 @@ namespace mg5amcCpu //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //cxsmpl GC_11, GC_10; // now computed event-by-event (running alphas #373) + //cxsmpl GC_10, GC_11; // now computed event-by-event (running alphas #373) // Set parameters that are unchanged during the run void setIndependentParameters( SLHAReader& slha ); @@ -194,8 +194,8 @@ namespace mg5amcCpu //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) + //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) // Print parameters that are unchanged during the run void printIndependentParameters(); @@ -226,12 +226,12 @@ namespace mg5amcCpu namespace Parameters_sm_dependentCouplings { constexpr size_t ndcoup = 2; // #couplings that vary event by event because they depend on the running alphas QCD - constexpr size_t idcoup_GC_11 = 0; - constexpr size_t idcoup_GC_10 = 1; + constexpr size_t idcoup_GC_10 = 0; + constexpr size_t idcoup_GC_11 = 1; struct DependentCouplings_sv { - cxtype_sv GC_11; cxtype_sv GC_10; + cxtype_sv GC_11; }; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> @@ -257,8 +257,8 @@ namespace mg5amcCpu //const fptype_sv G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); const fptype_sv mdl_G__exp__2 = ( ( G ) * ( G ) ); // Model couplings dependent on aS - out.GC_11 = cI * G; out.GC_10 = -G; + out.GC_11 = cI * G; } // End SM implementation - no special handling of vectors of floats as in EFT (#439) return out; @@ -293,12 +293,12 @@ namespace mg5amcCpu using namespace Parameters_sm_dependentCouplings; const fptype_sv& gs_sv = G_ACCESS::kernelAccessConst( gs ); DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv ); - fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); - cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); + fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); - GC_11s_sv = couplings_sv.GC_11; + cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); GC_10s_sv = couplings_sv.GC_10; + GC_11s_sv = couplings_sv.GC_11; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt b/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt index dd90c94acf..d596b33ae7 100644 --- a/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt +++ b/epochX/cudacpp/gq_ttq.sa/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt @@ -4,7 +4,7 @@ Event 0 Batch 0 2 2.647483690509011e+02 7.527657265342380e+01 -2.528976247704283e+02 -2.163164141117315e+01 3 6.252973211776936e+02 -5.721080498766041e+02 -1.578766990348905e+01 2.518727230515587e+02 4 6.099543097714056e+02 4.968314772231802e+02 2.686852946739174e+02 -2.302410816403857e+02 - ME 3.498510462248670e-04 + ME 6.254927412618323e-05 Event 1 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -12,7 +12,7 @@ Event 1 Batch 0 2 2.542827954151951e+02 1.482213322085297e+02 -1.988618298139058e+02 -5.607271498295615e+01 3 6.883656117507998e+02 1.265478873489434e+02 5.602777828023585e+02 3.793700749224233e+02 4 5.573515928340058e+02 -2.747692195574731e+02 -3.614159529884527e+02 -3.232973599394667e+02 - ME 7.257243108248426e-04 + ME 8.120933129385430e-05 Event 2 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -20,7 +20,7 @@ Event 2 Batch 0 2 4.301460683791099e+02 -3.656995432079240e+02 -2.257802895903974e+02 -1.768459985405173e+01 3 5.058528987551350e+02 2.755467101243707e+02 -2.034821274188550e+02 3.722313656043856e+02 4 5.640010328657550e+02 9.015283308355326e+01 4.292624170092524e+02 -3.545467657503340e+02 - ME 8.130044127338102e-04 + ME 1.104115154253218e-04 Event 3 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -28,7 +28,7 @@ Event 3 Batch 0 2 6.758793342627306e+02 1.455349847705337e+02 4.360940220328824e+02 -4.954335945799966e+02 3 3.008019460079605e+02 -1.607139834787174e+02 2.732727402256846e+01 2.527964523704278e+02 4 5.233187197293092e+02 1.517899870818368e+01 -4.634212960554508e+02 2.426371422095687e+02 - ME 7.753277710143621e-05 + ME 4.288074098478053e-05 Event 4 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -36,7 +36,7 @@ Event 4 Batch 0 2 3.540811678028369e+02 5.414642718170588e+01 -3.497885023717100e+02 -9.467915537920108e+00 3 7.415000547748695e+02 1.453779348794601e+00 7.277337852109665e+02 1.422102514562805e+02 4 4.044187774222938e+02 -5.560020653050046e+01 -3.779452828392566e+02 -1.327423359183605e+02 - ME 2.015528729476554e-04 + ME 1.304731284254719e-05 Event 5 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -44,7 +44,7 @@ Event 5 Batch 0 2 4.747467875786874e+02 2.462969907607520e+02 3.713870243947702e+02 1.636886763636381e+02 3 3.438196236093862e+02 -2.056491112573935e+02 2.636029701703988e+02 8.021128807897365e+01 4 6.814335888119255e+02 -4.064787950335840e+01 -6.349899945651691e+02 -2.438999644426124e+02 - ME 6.140777519977192e-04 + ME 1.932390649640220e-04 Event 6 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -52,7 +52,7 @@ Event 6 Batch 0 2 5.623951200922340e+02 4.644673798421034e+02 3.089047820108764e+02 -7.166700647426805e+01 3 2.268243199894467e+02 1.761899852590787e+02 -7.114332369064562e+01 -1.238748914321566e+02 4 7.107805599183188e+02 -6.406573651011822e+02 -2.377614583202307e+02 1.955418979064247e+02 - ME 8.375373201653861e-04 + ME 1.929702539767979e-04 Event 7 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -60,7 +60,7 @@ Event 7 Batch 0 2 4.922243378496302e+02 2.878585072835456e+02 -1.441537488072182e+02 -3.723465794939189e+02 3 2.873990637609374e+02 -5.400981623596619e+01 -8.913204919452846e+01 -2.678369642286231e+02 4 7.203765983894325e+02 -2.338486910475794e+02 2.332857980017467e+02 6.401835437225419e+02 - ME 2.045598717079573e-03 + ME 6.280412585349807e-04 Event 8 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -68,7 +68,7 @@ Event 8 Batch 0 2 3.353309706037128e+02 -7.529439061162444e+01 -4.917829145606096e+01 -3.230466069128648e+02 3 7.169322705461503e+02 -1.597426278178964e+02 -1.460012137440150e+01 6.987567601563110e+02 4 4.477367588501368e+02 2.350370184295208e+02 6.377841283046249e+01 -3.757101532434461e+02 - ME 5.176104304710922e-03 + ME 1.424871539111113e-03 Event 9 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -76,7 +76,7 @@ Event 9 Batch 0 2 2.557626120875720e+02 2.000882245504951e+02 -5.276260741790070e+01 -1.503174088272977e+02 3 7.044202058180884e+02 -6.969679478438196e+02 -1.019614549623775e+02 6.882422911146106e+00 4 5.398171820943397e+02 4.968797232933244e+02 1.547240623802783e+02 1.434349859161515e+02 - ME 6.498215193902510e-05 + ME 1.126010180174107e-05 Event 10 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -84,7 +84,7 @@ Event 10 Batch 0 2 3.466796552973448e+02 1.172124288883391e+02 -1.804077050554743e+02 2.718475489457261e+02 3 5.174471655316495e+02 -1.610456139025784e+02 -4.497410659869822e+02 -1.988689340353916e+02 4 6.358731791710053e+02 4.383318501423926e+01 6.301487710424565e+02 -7.297861491033444e+01 - ME 2.111165581639245e-04 + ME 8.292383053707579e-05 Event 11 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -92,7 +92,7 @@ Event 11 Batch 0 2 5.730783827248506e+02 -3.059484875398849e+01 3.466457017175528e+02 -4.553235612803233e+02 3 4.410994673708892e+02 -3.026218886155176e+02 -1.990641070399019e+01 3.203005892260318e+02 4 4.858221499042607e+02 3.332167373695061e+02 -3.267392910135624e+02 1.350229720542913e+02 - ME 5.129802099928076e-05 + ME 2.195851954305949e-05 Event 12 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -100,7 +100,7 @@ Event 12 Batch 0 2 2.275003875859171e+02 -1.247450244086003e+02 1.654605359856639e+02 9.390376067217456e+01 3 6.138170466352969e+02 3.363961838598331e+02 -2.139358085817026e+01 5.129827374509639e+02 4 6.586825657787861e+02 -2.116511594512328e+02 -1.440669551274935e+02 -6.068864981231385e+02 - ME 5.249882090061186e-02 + ME 3.843244876666358e-03 Event 13 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -108,7 +108,7 @@ Event 13 Batch 0 2 2.867684047377951e+02 7.055192702127012e+01 -2.028354730671929e+02 1.900429278217245e+02 3 6.990707050557395e+02 -5.605742285334717e+02 2.413419117565430e+02 -3.408965629057132e+02 4 5.141608902064654e+02 4.900223015122016e+02 -3.850643868935023e+01 1.508536350839886e+02 - ME 6.422048006176975e-05 + ME 1.780264803426774e-05 Event 14 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -116,7 +116,7 @@ Event 14 Batch 0 2 3.551549262960330e+02 1.090410064132905e+02 3.205839746298526e+02 1.071027348074892e+02 3 5.276349775014137e+02 3.895763694332612e+02 -2.529209653865598e+02 2.503196099590423e+02 4 6.172100962025531e+02 -4.986173758465519e+02 -6.766300924329285e+01 -3.574223447665315e+02 - ME 7.422587439250419e-04 + ME 1.172793340377339e-04 Event 15 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -124,7 +124,7 @@ Event 15 Batch 0 2 5.846731991828425e+02 7.106081559720657e+01 3.900476102503054e+02 4.297161529048979e+02 3 2.829885923647302e+02 -2.767806781033229e+02 5.223342094943639e+01 -2.732525156618249e+01 4 6.323382084524278e+02 2.057198625061163e+02 -4.422810311997417e+02 -4.023909013387152e+02 - ME 1.255922738422332e-03 + ME 2.768931482482754e-04 Event 16 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -132,7 +132,7 @@ Event 16 Batch 0 2 7.471577506095512e+02 1.666056475215676e+02 -5.784682380714994e+02 -4.425627187781379e+02 3 6.589296733908160e+02 -1.235441202519038e+02 5.251239647671507e+02 3.783780998595698e+02 4 9.391257599963087e+01 -4.306152726966400e+01 5.334427330434855e+01 6.418461891856485e+01 - ME 5.526726502577864e-05 + ME 3.619360847906487e-05 Event 17 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -140,7 +140,7 @@ Event 17 Batch 0 2 3.567490993131759e+02 3.856364495163717e+01 -1.708845728849435e+02 -3.107752047682324e+02 3 6.453207560475681e+02 4.468356462873772e+02 2.282834847349605e+02 4.057874246326636e+02 4 4.979301446392561e+02 -4.853992912390142e+02 -5.739891185001719e+01 -9.501221986443127e+01 - ME 1.327369996555111e-04 + ME 3.400819398697452e-05 Event 18 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -148,7 +148,7 @@ Event 18 Batch 0 2 4.856701782481425e+02 2.509110753153842e+02 -3.498523763974107e+02 -2.247720379690150e+02 3 3.014847498930008e+02 -1.059425909901355e+02 -2.435847754696140e+02 -1.426032222348426e+02 4 7.128450718588564e+02 -1.449684843252488e+02 5.934371518670247e+02 3.673752602038576e+02 - ME 1.018512933050835e-03 + ME 1.704840743724005e-04 Event 19 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -156,7 +156,7 @@ Event 19 Batch 0 2 5.848213503304410e+02 -3.141116763848333e+02 -1.950442390378232e+02 4.531088295091878e+02 3 5.769300027107226e+02 5.020221748138873e+02 2.252239828724832e+02 -1.734823378963534e+02 4 3.382486469588368e+02 -1.879104984290540e+02 -3.017974383465995e+01 -2.796264916128346e+02 - ME 4.267017342507976e-03 + ME 1.566312636528492e-04 Event 20 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -164,7 +164,7 @@ Event 20 Batch 0 2 5.550938429889906e+02 -4.478597170519693e+02 -1.958065402362923e+02 -2.630791652090858e+02 3 5.585686897587655e+02 3.351111310173187e+02 -1.360174455686903e+02 4.256744830831253e+02 4 3.863374672522434e+02 1.127485860346507e+02 3.318239858049826e+02 -1.625953178740396e+02 - ME 2.768271682113988e-04 + ME 4.443882992804106e-05 Event 21 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -172,7 +172,7 @@ Event 21 Batch 0 2 6.296556563991993e+02 -3.477135312394776e+02 -1.376147989324512e+02 -5.065804111325866e+02 3 3.137568007204202e+02 1.080474571851863e+02 -2.382188236683311e+02 1.732653140250679e+02 4 5.565875428803801e+02 2.396660740542913e+02 3.758336226007823e+02 3.333150971075189e+02 - ME 5.519034669639832e-05 + ME 2.195742323347977e-05 Event 22 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -180,7 +180,7 @@ Event 22 Batch 0 2 5.583338925767162e+02 2.471586228668332e+02 -1.597599499756147e+02 -4.744745610949311e+02 3 5.378723432497920e+02 9.149532098241385e+00 4.314513680009925e+02 3.210493120152684e+02 4 4.037937641734921e+02 -2.563081549650745e+02 -2.716914180253778e+02 1.534252490796627e+02 - ME 3.705224437539572e-05 + ME 1.393143104564022e-05 Event 23 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -188,7 +188,7 @@ Event 23 Batch 0 2 6.057340011976822e+02 6.848115528115159e+01 -5.207204912425279e+02 -3.017849923015605e+02 3 6.884459352783615e+02 -2.949639632364767e+01 6.680977958792448e+02 1.635026102131439e+02 4 2.058200635239559e+02 -3.898475895750391e+01 -1.473773046367171e+02 1.382823820884168e+02 - ME 2.946248744974782e-05 + ME 1.074117284514867e-05 Event 24 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -196,7 +196,7 @@ Event 24 Batch 0 2 4.702316790647315e+02 -1.210575128627593e+02 4.313728504035306e+02 -1.427598490831810e+02 3 7.180482366151732e+02 1.040047389253588e+02 -7.104588047260974e+02 4.956931953573291e+00 4 3.117200843200960e+02 1.705277393740069e+01 2.790859543225674e+02 1.378029171296075e+02 - ME 3.146557994448562e-05 + ME 5.213387311993420e-06 Event 25 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -204,7 +204,7 @@ Event 25 Batch 0 2 6.261365010744016e+02 -5.354018140499276e+02 -2.095559720530078e+02 2.479477970595020e+02 3 5.483958991041942e+02 5.199465180092641e+02 -9.843995208133505e+01 -1.438862620216537e+02 4 3.254675998214045e+02 1.545529604066345e+01 3.079959241343431e+02 -1.040615350378483e+02 - ME 1.657640191611339e-04 + ME 1.695323153210731e-05 Event 26 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -212,7 +212,7 @@ Event 26 Batch 0 2 4.635816356180677e+02 1.904702824079147e+02 -2.351549941335565e+02 -3.511853259118595e+02 3 3.686385821486527e+02 -2.712527815845713e+02 -6.015354190959191e+01 -2.422764621809819e+02 4 6.677797822332798e+02 8.078249917665664e+01 2.953085360431485e+02 5.934617880928415e+02 - ME 3.250975879010065e-04 + ME 1.052251904460155e-04 Event 27 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -220,7 +220,7 @@ Event 27 Batch 0 2 2.851713673150520e+02 1.387976072955998e+02 1.520424011317634e+02 -1.973348453858079e+02 3 6.747356481771329e+02 2.426633222154767e+02 -4.300238522839811e+02 4.598501858640580e+02 4 5.400929845078149e+02 -3.814609295110765e+02 2.779814511522176e+02 -2.625153404782502e+02 - ME 4.155279516527712e-04 + ME 7.957109124083736e-05 Event 28 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -228,7 +228,7 @@ Event 28 Batch 0 2 1.977804200471008e+02 -1.803202618401224e+02 -8.082809162516925e+01 -8.277519444290659e+00 3 7.197523834069627e+02 3.152541965091956e+02 6.467033971658861e+02 -2.080867841663842e+01 4 5.824671965459364e+02 -1.349339346690732e+02 -5.658753055407169e+02 2.908619786092899e+01 - ME 1.172809031809504e-04 + ME 1.748013159755222e-05 Event 29 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -236,7 +236,7 @@ Event 29 Batch 0 2 6.123364628491765e+02 -3.746492624245139e+02 3.785128791537567e+02 -3.021950929683376e+02 3 4.056577755659300e+02 1.796205570313495e+00 -8.781658530568643e+01 3.960344074293251e+02 4 4.820057615848937e+02 3.728530568542006e+02 -2.906962938480702e+02 -9.383931446098750e+01 - ME 5.496242925842306e-04 + ME 3.085570985177973e-04 Event 30 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -244,7 +244,7 @@ Event 30 Batch 0 2 7.349194950356053e+02 7.241679607953656e+02 1.425637322816703e+01 1.244354634469208e+02 3 7.321421454671275e+02 -7.253765693071590e+02 -2.895970851972107e+01 -9.498573130653318e+01 4 3.293835949726734e+01 1.208608511793152e+00 1.470333529155409e+01 -2.944973214038765e+01 - ME 5.147061682527938e-02 + ME 3.267107835672361e-04 Event 31 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -252,7 +252,7 @@ Event 31 Batch 0 2 1.718338270585457e+02 -1.344914872264095e+02 -1.021614404532311e+02 3.165350011824393e+01 3 6.313115253715935e+02 -2.849940593920691e+02 -7.916450257599642e+01 -5.577325610990745e+02 4 6.968546475698608e+02 4.194855466184786e+02 1.813259430292275e+02 5.260790609808306e+02 - ME 4.645345268703414e-04 + ME 1.685680846028125e-04 Event 32 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -260,7 +260,7 @@ Event 32 Batch 0 2 7.235176898898732e+02 -4.762113006241282e+02 -2.880822916693121e+01 5.439400065022983e+02 3 6.603902828461299e+02 4.672103814637360e+02 1.031050210016798e+02 -4.551913221650266e+02 4 1.160920272639969e+02 9.000919160392018e+00 -7.429679183474862e+01 -8.874868433727177e+01 - ME 4.476006843186700e-03 + ME 2.173072900368875e-04 Event 33 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -268,7 +268,7 @@ Event 33 Batch 0 2 4.786737271642286e+02 2.009638309376703e+02 4.090184839380260e+02 1.464443769121513e+02 3 3.795793219608408e+02 -6.057523839522271e+00 -8.244277697544294e+01 3.704685635647950e+02 4 6.417469508749314e+02 -1.949063070981495e+02 -3.265757069625828e+02 -5.169129404769461e+02 - ME 1.351709676586880e-02 + ME 3.322437827682699e-03 Event 34 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -276,7 +276,7 @@ Event 34 Batch 0 2 6.621583515140109e+02 -5.051303032557109e+02 -1.429543729176959e+02 4.035605363216953e+02 3 3.008522892707525e+02 8.677543723835062e+01 2.726747894692539e+02 -9.290092916351111e+01 4 5.369893592152367e+02 4.183548660173603e+02 -1.297204165515579e+02 -3.106596071581844e+02 - ME 6.460854093057828e-04 + ME 9.294666462955388e-05 Event 35 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -284,7 +284,7 @@ Event 35 Batch 0 2 6.158114977149372e+02 2.502256147979830e+02 4.233348779616202e+00 5.626659943296695e+02 3 1.476397433483021e+02 -1.670550278282843e+01 -6.055370982200890e+01 1.336101351676488e+02 4 7.365487589367605e+02 -2.335201120151546e+02 5.632036104239269e+01 -6.962761294973184e+02 - ME 2.101231899117793e+00 + ME 5.450893768264864e-01 Event 36 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -292,7 +292,7 @@ Event 36 Batch 0 2 7.182456511154913e+02 -7.463771462544163e+01 -6.667773110518942e+02 2.563475070450518e+02 3 4.860008755751825e+02 -7.840660561780868e+01 4.141081959217036e+02 -2.419992919944378e+02 4 2.957534733093268e+02 1.530443202432501e+02 2.526691151301903e+02 -1.434821505061448e+01 - ME 9.644531209480271e-05 + ME 1.793136635525090e-05 Event 37 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -300,7 +300,7 @@ Event 37 Batch 0 2 5.672182018814327e+02 -2.031706828392718e+00 -5.267408190306547e+02 2.104197478372323e+02 3 4.664069288608281e+02 3.712365792892206e+02 2.604523782658950e+02 -1.090109358856581e+02 4 4.663748692577387e+02 -3.692048724608279e+02 2.662884407647597e+02 -1.014088119515743e+02 - ME 1.216876552012178e-04 + ME 1.885829354904198e-05 Event 38 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -308,7 +308,7 @@ Event 38 Batch 0 2 5.068057345787187e+02 4.883513201966852e+02 -7.570036138649985e+01 -1.124032737511800e+02 3 3.871140338254017e+02 -1.153787089711745e+02 -3.599073977747533e+02 -8.373585688177315e+01 4 6.060802315958797e+02 -3.729726112255107e+02 4.356077591612532e+02 1.961391306329531e+02 - ME 1.006736553113524e-04 + ME 2.004468492837133e-05 Event 39 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -316,7 +316,7 @@ Event 39 Batch 0 2 4.960337392567769e+02 -3.669089247616476e+02 2.651961920161227e+02 -2.027271347192069e+02 3 2.837821967046824e+02 -2.822567153069604e+02 -2.935613327724534e+01 -1.303560381865560e+00 4 7.201840640385411e+02 6.491656400686079e+02 -2.358400587388775e+02 2.040306951010725e+02 - ME 1.372807525012575e-03 + ME 2.738639406673165e-04 Event 40 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -324,7 +324,7 @@ Event 40 Batch 0 2 3.080730228651936e+02 -3.065830270999447e+02 -2.484308296331460e+01 1.728167064871203e+01 3 6.842346640746094e+02 4.630487823766367e+02 8.554554725666550e+01 -4.964321303112498e+02 4 5.076923130601962e+02 -1.564657552766919e+02 -6.070246429335075e+01 4.791504596625378e+02 - ME 4.192363154074847e-05 + ME 4.316353181637933e-05 Event 41 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -332,7 +332,7 @@ Event 41 Batch 0 2 1.602650851118221e+02 -1.258781096038287e+02 -9.817642232798531e+01 1.417706342452912e+01 3 7.146392966623014e+02 6.799675591776853e+02 -1.019163870176435e+02 1.948499239342933e+02 4 6.250956182258764e+02 -5.540894495738563e+02 2.000928093456288e+02 -2.090269873588226e+02 - ME 4.523507186168379e-04 + ME 6.118266190948034e-05 Event 42 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -340,7 +340,7 @@ Event 42 Batch 0 2 1.687893235969910e+02 1.289401357197518e+02 4.788693514682045e+01 9.783209393213438e+01 3 7.042017295435162e+02 -1.022058447296739e+02 -6.640064324330017e+02 -2.110675220936915e+02 4 6.270089468594927e+02 -2.673429099007782e+01 6.161194972861812e+02 1.132354281615572e+02 - ME 1.686356189272381e-04 + ME 4.091574289077424e-05 Event 43 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -348,7 +348,7 @@ Event 43 Batch 0 2 4.729783670130408e+02 -7.983817933050123e+01 9.052957805204315e+01 4.573169538528310e+02 3 5.638402597824536e+02 4.785250044669658e+02 7.435095949863268e+01 -2.887933404236804e+02 4 4.631813732045056e+02 -3.986868251364646e+02 -1.648805375506758e+02 -1.685236134291506e+02 - ME 5.938757690519573e-04 + ME 2.654067897204875e-04 Event 44 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -356,7 +356,7 @@ Event 44 Batch 0 2 1.774791104122977e+02 -1.952605982635784e+01 6.371003613266313e+01 1.644949814321787e+02 3 7.194816205691247e+02 -3.678871192485065e+02 2.644831693887214e+01 -6.177486190667772e+02 4 6.030392690185777e+02 3.874131790748646e+02 -9.015835307153536e+01 4.532536376345985e+02 - ME 2.092333697371024e-04 + ME 1.390282437939369e-04 Event 45 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -364,7 +364,7 @@ Event 45 Batch 0 2 7.477488480180839e+02 -3.787655987618923e+02 1.634662296474455e+02 6.236535517992064e+02 3 7.458113398274099e+02 3.819163358711198e+02 -1.661042992235261e+02 -6.186952632673017e+02 4 6.439812154506046e+00 -3.150737109227506e+00 2.638069576080606e+00 -4.958288531904773e+00 - ME 9.377954359926730e-02 + ME 4.591622113024210e-03 Event 46 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -372,7 +372,7 @@ Event 46 Batch 0 2 3.243146757688279e+02 -4.392587631431587e+00 -2.496903827548322e+02 -2.069188895501946e+02 3 5.341608950426614e+02 -2.704482657861201e+02 2.711825143656835e+02 -3.723515022507137e+02 4 6.415244291885106e+02 2.748408534175518e+02 -2.149213161085120e+01 5.792703918009084e+02 - ME 1.879047912263320e-04 + ME 7.845213441237594e-05 Event 47 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -380,7 +380,7 @@ Event 47 Batch 0 2 6.742198761450968e+02 -3.282965096491567e+02 5.301803926793563e+02 -2.563251730900704e+02 3 6.484148720042493e+02 3.527030795571956e+02 -3.975273148506379e+02 3.715029176935211e+02 4 1.773652518506536e+02 -2.440656990803885e+01 -1.326530778287185e+02 -1.151777446034508e+02 - ME 1.136665455996279e-03 + ME 5.254395938575492e-05 Event 48 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -388,7 +388,7 @@ Event 48 Batch 0 2 7.321401810535270e+02 -1.843482647928687e+02 4.412348098999295e+02 5.543976952635381e+02 3 7.293058265076229e+02 2.182722651304250e+02 -4.435200216702997e+02 -5.362221528717154e+02 4 3.855399243885009e+01 -3.392400033755636e+01 2.285211770370227e+00 -1.817554239182278e+01 - ME 2.278442596973106e-03 + ME 2.330290263553363e-04 Event 49 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -396,7 +396,7 @@ Event 49 Batch 0 2 3.511117284856090e+02 -3.272266866652174e+02 5.199533974843238e+01 1.161835877338140e+02 3 7.326526490901410e+02 6.615045961628415e+02 -2.993354007364775e+02 -9.792799058578566e+01 4 4.162356224242500e+02 -3.342779094976241e+02 2.473400609880451e+02 -1.825559714802838e+01 - ME 8.806759903737244e-05 + ME 7.863589115869630e-06 Event 50 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -404,7 +404,7 @@ Event 50 Batch 0 2 7.322170903075255e+02 2.740692406080844e+02 1.952596610981929e+01 -6.787095515302592e+02 3 3.078559130669522e+02 -1.663333363406682e+02 8.625456119089935e+01 2.442716420418760e+02 4 4.599269966255216e+02 -1.077359042674159e+02 -1.057805273007185e+02 4.344379094883832e+02 - ME 7.579426018596712e-05 + ME 6.765758192049922e-05 Event 51 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -412,7 +412,7 @@ Event 51 Batch 0 2 3.473696038265160e+02 -2.922314643158454e+02 -6.759614889845234e+01 -1.752060888796554e+02 3 5.389399151999496e+02 -2.449040872454050e+02 9.346474502284556e+01 4.708954891311219e+02 4 6.136904809735339e+02 5.371355515612503e+02 -2.586859612439322e+01 -2.956894002514666e+02 - ME 4.687828430739845e-04 + ME 2.035652280642710e-04 Event 52 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -420,7 +420,7 @@ Event 52 Batch 0 2 6.818614816439094e+02 5.970116833066725e+02 3.013730734325877e+02 1.329902280423528e+02 3 2.108623144448950e+02 -4.198344769951654e+00 -1.698802183673395e+02 -1.248439063859965e+02 4 6.072762039111957e+02 -5.928133385367207e+02 -1.314928550652483e+02 -8.146321656356344e+00 - ME 1.636869658416981e-04 + ME 4.047005152694340e-05 Event 53 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -428,7 +428,7 @@ Event 53 Batch 0 2 5.157714002491656e+02 -5.140718537651751e+02 -4.182413977701254e+01 1.003899065692042e+00 3 5.148181840855221e+02 2.868792199999327e+02 1.974924151010656e+02 3.791237552236646e+02 4 4.694104156653124e+02 2.271926337652422e+02 -1.556682753240530e+02 -3.801276542893567e+02 - ME 3.182294022992135e-03 + ME 1.547751010871262e-04 Event 54 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -436,7 +436,7 @@ Event 54 Batch 0 2 6.433410767101752e+02 2.586883950027282e+02 -5.809813083922761e+02 9.710187728524583e+01 3 6.928799734080563e+02 -1.579832568796111e+02 6.405510983559769e+02 -2.117031848853746e+02 4 1.637789498817686e+02 -1.007051381231171e+02 -5.956978996370073e+01 1.146013076001288e+02 - ME 3.280140142776471e-05 + ME 1.302720215079095e-05 Event 55 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -444,7 +444,7 @@ Event 55 Batch 0 2 7.193759752058201e+02 -3.536444481659258e+02 -7.212523476050659e+01 -6.222823703878202e+02 3 5.307053661742267e+02 2.409461639849982e+02 1.900944302490854e+02 4.329633233142391e+02 4 2.499186586199529e+02 1.126982841809279e+02 -1.179691954885788e+02 1.893190470735813e+02 - ME 3.939174164528502e-05 + ME 3.087450123310173e-05 Event 56 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -452,7 +452,7 @@ Event 56 Batch 0 2 3.858864959547013e+02 1.815174721437793e+02 3.218581876578407e+02 -1.112074732396182e+02 3 4.484505297447187e+02 -3.244105157450006e+02 2.934585578803474e+02 -9.873079412811623e+01 4 6.656629743005793e+02 1.428930436012212e+02 -6.153167455381879e+02 2.099382673677345e+02 - ME 2.326138625268126e-04 + ME 4.275995533811995e-05 Event 57 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -460,7 +460,7 @@ Event 57 Batch 0 2 5.284589752749192e+02 3.868194647882293e+02 -1.709996888155517e+02 3.168575336559793e+02 3 6.299868555278971e+02 -1.587414880613579e+02 2.327134172236622e+02 -5.634971548731005e+02 4 3.415541691971835e+02 -2.280779767268714e+02 -6.171372840811043e+01 2.466396212171210e+02 - ME 3.474853710074164e-05 + ME 2.211478424702745e-05 Event 58 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -468,7 +468,7 @@ Event 58 Batch 0 2 6.172037319760957e+02 -2.246119436411400e+02 -2.286037628748728e+01 5.744278237820342e+02 3 5.117934503257735e+02 1.262762853074207e+02 3.215736628881853e+02 -3.775939815489577e+02 4 3.710028176981306e+02 9.833565833371921e+01 -2.987132866006979e+02 -1.968338422330765e+02 - ME 6.183305374210038e-04 + ME 1.857727050583390e-04 Event 59 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -476,7 +476,7 @@ Event 59 Batch 0 2 7.388935626701858e+02 -3.912134623809441e+02 -5.457789630286015e+02 3.082872805076099e+02 3 1.936051438730608e+02 1.561492575196544e+02 8.304673385628061e+01 -7.876294246644987e+01 4 5.675012934567535e+02 2.350642048612896e+02 4.627322291723209e+02 -2.295243380411600e+02 - ME 4.116991424436793e-04 + ME 6.745345781245190e-05 Event 60 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -484,7 +484,7 @@ Event 60 Batch 0 2 7.258141426633659e+02 -5.584991156701968e+02 1.635894950857984e+02 4.337319270970709e+02 3 2.789580074371136e+02 2.331554478032953e+02 6.512410160032128e+01 -1.386180308029247e+02 4 4.952278498995201e+02 3.253436678669015e+02 -2.287135966861195e+02 -2.951138962941461e+02 - ME 7.295672680059989e-04 + ME 9.170244877267536e-05 Event 61 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -492,15 +492,15 @@ Event 61 Batch 0 2 5.906141202026897e+02 4.485275282318680e+02 -2.043613424290570e+02 3.253990429020988e+02 3 4.163572165237975e+02 -4.021600557528675e+02 -4.112755461437413e+01 9.964509802161204e+01 4 4.930286632735124e+02 -4.636747247900051e+01 2.454888970434311e+02 -4.250441409237108e+02 - ME 5.845307122272604e-03 + ME 1.836685601489136e-04 Event 62 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 1 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 -7.500000000000000e+02 2 7.346180891175762e+02 3.693463141798367e+02 7.549194961263061e+01 -6.305140780380819e+02 3 4.420621433230785e+02 -2.806743363126464e+02 3.467380983154045e+01 3.397625382625571e+02 - 4 3.233197675593453e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755249e+02 - ME 3.963631774242112e-05 + 4 3.233197675593452e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755248e+02 + ME 3.490896135533686e-05 Event 63 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -508,7 +508,7 @@ Event 63 Batch 0 2 6.451039732729313e+02 -2.415045377667665e+02 1.990362537024482e+02 -5.641092662620230e+02 3 3.260870385294104e+02 2.061141051805976e+02 -2.496695602716584e+02 3.892098426606745e+01 4 5.288089881976584e+02 3.539043258616898e+01 5.063330656921013e+01 5.251882819959555e+02 - ME 4.832224458906289e-04 + ME 4.428689394331114e-04 Event 64 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -516,7 +516,7 @@ Event 64 Batch 0 2 5.275973380665291e+02 -6.064553482667328e+01 4.309976929667101e+02 -2.981980196075213e+02 3 5.799838776791826e+02 3.279821268626862e+02 -1.824214634122377e+02 4.421893627315650e+02 4 3.924187842542880e+02 -2.673365920360130e+02 -2.485762295544724e+02 -1.439913431240437e+02 - ME 2.175617604507715e-04 + ME 4.205989960223865e-05 Event 65 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -524,7 +524,7 @@ Event 65 Batch 0 2 6.480172869826541e+02 2.720879118036237e+02 -5.153900904044360e+02 -2.833154199679406e+02 3 7.075023253568394e+02 -3.440299289242928e+02 4.709796137500282e+02 4.004761563708322e+02 4 1.444803876605064e+02 7.194201712066916e+01 4.441047665440794e+01 -1.171607364028916e+02 - ME 4.989956280474397e-03 + ME 1.103463366798231e-04 Event 66 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -532,7 +532,7 @@ Event 66 Batch 0 2 5.472978185025795e+02 4.857452785131266e+02 -2.223654169683454e+02 -1.189119332799752e+02 3 3.203062148499983e+02 1.169702135976477e+02 2.922172461416276e+02 -5.935588816501102e+01 4 6.323959666474225e+02 -6.027154921107744e+02 -6.985182917328234e+01 1.782678214449862e+02 - ME 1.346850069104626e-04 + ME 2.913920636000223e-05 Event 67 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -540,7 +540,7 @@ Event 67 Batch 0 2 4.264671493042950e+02 1.195959046886511e+02 -2.647539231733031e+02 3.122121220929446e+02 3 5.059969655247565e+02 3.777175441887567e+02 -7.608313561896731e+00 -3.366073372596325e+02 4 5.675358851709483e+02 -4.973134488774080e+02 2.723622367352000e+02 2.439521516668857e+01 - ME 9.763221977220593e-05 + ME 4.009347519102052e-05 Event 68 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -548,7 +548,7 @@ Event 68 Batch 0 2 5.996105691520872e+02 -3.814725562071957e+02 -3.417794545715573e+02 3.117664637712124e+02 3 2.164196744806214e+02 1.292759463548889e+02 -1.184749651041615e+02 1.268419798013013e+02 4 6.839697563672917e+02 2.521966098523068e+02 4.602544196757188e+02 -4.386084435725137e+02 - ME 2.936083529685707e-03 + ME 6.175473672610461e-04 Event 69 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -556,7 +556,7 @@ Event 69 Batch 0 2 4.950546755511076e+02 -1.873718558932053e+02 -4.578972175289678e+02 -1.735101101888631e+01 3 4.768584394819691e+02 -1.830244097668608e+02 2.985566003539791e+02 -3.236664843936508e+02 4 5.280868849669230e+02 3.703962656600661e+02 1.593406171749887e+02 3.410174954125370e+02 - ME 5.234212626720279e-05 + ME 1.367292435278724e-05 Event 70 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -564,7 +564,7 @@ Event 70 Batch 0 2 6.918343395272258e+02 6.895733556028865e+02 -5.391072441382606e+01 -1.473005040127906e+01 3 2.169590284692678e+02 -1.127375202028747e+02 1.807969800614662e+02 4.091361110301506e+01 4 5.912066320035063e+02 -5.768358354000119e+02 -1.268862556476402e+02 -2.618356070173603e+01 - ME 1.591740981760110e-04 + ME 3.526540789264872e-05 Event 71 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -572,7 +572,7 @@ Event 71 Batch 0 2 5.156371334918733e+02 1.547202099034306e+02 -4.807172487652236e+02 1.041836686949964e+02 3 3.718518305526428e+02 -8.969821893462726e+01 -7.521366892975188e+01 -3.529460545344468e+02 4 6.125110359554843e+02 -6.502199096880338e+01 5.559309176949756e+02 2.487623858394504e+02 - ME 1.125100552069616e-04 + ME 2.860782472746935e-05 Event 72 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -580,7 +580,7 @@ Event 72 Batch 0 2 2.110577464974889e+02 5.009520239746097e+01 -1.453533690489527e+02 -1.445968227848547e+02 3 7.317124633441161e+02 -4.429659627226336e+02 5.264774879404380e+02 2.490095170354977e+02 4 5.572297901583943e+02 3.928707603251725e+02 -3.811241188914850e+02 -1.044126942506430e+02 - ME 1.823320413479066e-04 + ME 2.666441446531882e-05 Event 73 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -588,7 +588,7 @@ Event 73 Batch 0 2 3.932257450488246e+02 3.105005764664288e+01 -2.932679039283983e+02 2.601082794045340e+02 3 5.658879124646472e+02 3.645905401293642e+02 4.244364556305355e+02 8.459646951004230e+01 4 5.408863424865281e+02 -3.956405977760074e+02 -1.311685517021372e+02 -3.447047489145762e+02 - ME 8.953763196089171e-04 + ME 7.825486685913998e-05 Event 74 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -596,7 +596,7 @@ Event 74 Batch 0 2 1.374854102925440e+02 7.785209805930555e+01 4.289805712042688e+01 1.048858692406466e+02 3 6.381281910764947e+02 -1.004137270491618e+02 -1.591026937267357e+02 6.097630724433484e+02 4 7.243863986309617e+02 2.256162898985645e+01 1.162046366063089e+02 -7.146489416839951e+02 - ME 1.395531292378326e+01 + ME 1.919068868336380e+00 Event 75 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -604,7 +604,7 @@ Event 75 Batch 0 2 5.936883054156938e+02 -3.438525101293572e+00 -2.706855443967301e+02 5.283780053968293e+02 3 5.912298912592892e+02 1.109657062166288e+02 4.832067437414102e+02 -3.221034603433170e+02 4 3.150818033250173e+02 -1.075271811153352e+02 -2.125211993446803e+02 -2.062745450535123e+02 - ME 1.379908325625592e-03 + ME 1.642862842910461e-04 Event 76 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -612,7 +612,7 @@ Event 76 Batch 0 2 6.619486867997672e+02 2.801967015359571e+01 2.136411519593737e+02 6.258980909300584e+02 3 1.201252731414031e+02 2.274423842261747e+01 -8.754996679960182e+01 7.904292618103446e+01 4 7.179260400588295e+02 -5.076390857621322e+01 -1.260911851597719e+02 -7.049410171110928e+02 - ME 5.870483941147637e+00 + ME 7.362202483972824e-01 Event 77 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -620,7 +620,7 @@ Event 77 Batch 0 2 7.456676259451606e+02 -7.346624001550109e+02 6.511229493320701e+01 -1.097804865615983e+02 3 1.284204120828029e+02 1.251494694834492e+02 2.867183268690428e+01 2.708973588335753e+00 4 6.259119619720373e+02 6.095129306715618e+02 -9.378412762011118e+01 1.070715129732624e+02 - ME 1.662775178233579e-04 + ME 4.400761364703354e-05 Event 78 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -628,7 +628,7 @@ Event 78 Batch 0 2 7.040158920877628e+02 6.911264613612161e+02 -6.659640240533211e+01 -1.163937709034254e+02 3 5.185438503615327e+02 -4.976050220224222e+02 -1.270913363611937e+02 7.158742227342900e+01 4 2.774402575507044e+02 -1.935214393387939e+02 1.936877387665258e+02 4.480634862999637e+01 - ME 5.328004946641866e-05 + ME 9.352750539306009e-06 Event 79 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -636,7 +636,7 @@ Event 79 Batch 0 2 6.777589592768838e+02 1.742725197144059e+02 -4.776543849198212e+01 6.532264221831092e+02 3 5.725002211294488e+02 -1.786302554544233e+02 -1.627852110918317e+02 -5.189881598643107e+02 4 2.497408195936665e+02 4.357735740017474e+00 2.105506495838138e+02 -1.342382623187985e+02 - ME 9.179311580246363e-04 + ME 3.598558866345749e-04 Event 80 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -644,7 +644,7 @@ Event 80 Batch 0 2 6.240819586861880e+02 4.679310297228965e+02 -4.118464023828053e+02 -3.002304821964348e+01 3 6.688675489057649e+02 -5.494372353172420e+02 3.251429131208653e+02 1.994607943266771e+02 4 2.070504924080468e+02 8.150620559434545e+01 8.670348926194001e+01 -1.694377461070337e+02 - ME 3.575286400583300e-03 + ME 5.382869847396148e-05 Event 81 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -652,7 +652,7 @@ Event 81 Batch 0 2 5.198056748722776e+02 1.034797897616987e+02 -2.885605608993972e+02 4.197888462474007e+02 3 5.672098642055398e+02 -4.160331805498524e+02 2.087659545613757e+01 -3.849773895903518e+02 4 4.129844609221831e+02 3.125533907881537e+02 2.676839654432596e+02 -3.481145665704891e+01 - ME 1.018936778946332e-04 + ME 3.612255741613163e-05 Event 82 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -660,7 +660,7 @@ Event 82 Batch 0 2 2.057598609140514e+02 6.385349666266659e+01 -2.765433460911293e+01 1.936364870179372e+02 3 6.235840147705873e+02 4.654039114453895e+02 -3.828889383639962e+02 -1.601633028106901e+02 4 6.706561243153629e+02 -5.292574081080552e+02 4.105432729731107e+02 -3.347318420724690e+01 - ME 6.930850923220120e-04 + ME 3.172622561805068e-04 Event 83 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -668,7 +668,7 @@ Event 83 Batch 0 2 6.583322583736492e+02 1.865539504254553e+02 -1.926584839569474e+02 6.012334775737429e+02 3 3.620902826842561e+02 -3.107067244571256e+02 -1.177956631152976e+01 -1.855584705935048e+02 4 4.795774589420946e+02 1.241527740316703e+02 2.044380502684771e+02 -4.156750069802382e+02 - ME 8.385116111585099e-03 + ME 6.756528802944365e-04 Event 84 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -676,7 +676,7 @@ Event 84 Batch 0 2 4.849329564663161e+02 -2.622178945286150e+02 4.068620488841210e+02 -2.941124332559817e+01 3 4.737588937677760e+02 6.014532316188546e+01 -1.333934272225749e+02 4.505954095412368e+02 4 5.413081497659077e+02 2.020725713667296e+02 -2.734686216615461e+02 -4.211841662156386e+02 - ME 5.162990427398554e-03 + ME 1.017468409980153e-03 Event 85 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -684,7 +684,7 @@ Event 85 Batch 0 2 7.085742632080854e+02 -2.174614026040270e+02 -5.283468657604088e+02 -4.190914152061853e+02 3 5.315764222715953e+02 8.528530557199829e+00 3.820092234108129e+02 3.695533927738615e+02 4 2.598493145203187e+02 2.089328720468272e+02 1.463376423495959e+02 4.953802243232388e+01 - ME 6.335517668355978e-05 + ME 1.894143727100354e-05 Event 86 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -692,7 +692,7 @@ Event 86 Batch 0 2 1.724500140939190e+02 1.231518677708316e+02 -1.121928207497684e+01 1.201946443701656e+02 3 7.028475062724231e+02 -6.467096040851287e+01 -4.553168759141600e+02 -5.315061866629339e+02 4 6.247024796336580e+02 -5.848090736231883e+01 4.665361579891369e+02 4.113115422927684e+02 - ME 1.165531323127631e-04 + ME 5.311384036847167e-05 Event 87 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -700,7 +700,7 @@ Event 87 Batch 0 2 1.942099203196796e+02 -7.751148196958454e+01 -1.356691819650310e+02 -1.153400900745028e+02 3 7.314670447251594e+02 1.724617634710876e+02 7.020747158546045e+02 1.113196793791551e+02 4 5.743230349551606e+02 -9.495028150150301e+01 -5.664055338895735e+02 4.020410695347637e+00 - ME 1.237609879052555e-04 + ME 1.874087134673149e-05 Event 88 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -708,7 +708,7 @@ Event 88 Batch 0 2 6.382497024023744e+02 2.632142028760094e+02 -5.613974181649784e+02 1.513733956108635e+02 3 3.997044228265544e+02 -5.264940326118349e+01 3.435187961344461e+02 1.974500004195773e+02 4 4.620458747710724e+02 -2.105647996148253e+02 2.178786220305324e+02 -3.488233960304407e+02 - ME 1.863821317258467e-03 + ME 9.699609186666195e-05 Event 89 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -716,7 +716,7 @@ Event 89 Batch 0 2 1.419006640093282e+02 -8.677155154367878e+01 6.457545216231642e+01 -9.185046144153740e+01 3 7.131224514048055e+02 5.460003286026870e+02 -4.154556538506974e+02 -1.944836022569670e+02 4 6.449768845858670e+02 -4.592287770590082e+02 3.508802016883808e+02 2.863340636985044e+02 - ME 1.136115495374629e-04 + ME 2.974199953519439e-05 Event 90 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -724,7 +724,7 @@ Event 90 Batch 0 2 5.730615760623938e+02 -6.017783679015001e+01 -5.202921970507185e+02 -2.325386583054727e+02 3 5.389913703864468e+02 -6.302812531165206e+01 2.446311215742109e+02 4.761247390423042e+02 4 3.879470535511588e+02 1.232059621018019e+02 2.756610754765076e+02 -2.435860807368315e+02 - ME 1.094721025518881e-03 + ME 1.667772733247344e-04 Event 91 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -732,7 +732,7 @@ Event 91 Batch 0 2 4.546745139784350e+02 -1.470341619195494e+02 -1.726383255301703e+02 -3.940886669878754e+02 3 5.110976540119647e+02 -2.482119727393537e+02 -1.865817698532448e+02 4.059542728975803e+02 4 5.342278320096005e+02 3.952461346589030e+02 3.592200953834151e+02 -1.186560590970480e+01 - ME 8.789722587847313e-05 + ME 4.420313882846059e-05 Event 92 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -740,7 +740,7 @@ Event 92 Batch 0 2 6.683728375977241e+02 -1.148152650923627e+02 3.458291789782991e+02 5.603051703379153e+02 3 2.872567998557088e+02 1.635098024620329e+02 7.847331657016402e+01 -2.227620976482501e+02 4 5.443703625465666e+02 -4.869453736967034e+01 -4.243024955484631e+02 -3.375430726896653e+02 - ME 8.270083568815311e-04 + ME 2.265252332392545e-04 Event 93 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -748,7 +748,7 @@ Event 93 Batch 0 2 5.666948073002088e+02 5.408074886689032e+01 5.639942928586390e+02 -1.134525653745258e+01 3 6.168025492529713e+02 2.439040545997395e+02 -5.541969602989467e+02 1.175666879272316e+02 4 3.165026434468199e+02 -2.979848034666298e+02 -9.797332559692304e+00 -1.062214313897791e+02 - ME 1.664960428447917e-04 + ME 1.251778043268437e-05 Event 94 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -756,7 +756,7 @@ Event 94 Batch 0 2 4.964349376711385e+02 8.445930034540567e+01 -2.409007074648561e+02 -4.257712097695705e+02 3 5.660980232871289e+02 1.373833465612049e+02 5.210669225216058e+02 1.734417778711397e+02 4 4.374670390417324e+02 -2.218426469066104e+02 -2.801662150567495e+02 2.523294318984307e+02 - ME 3.431641292834382e-05 + ME 1.007141026120618e-05 Event 95 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -764,7 +764,7 @@ Event 95 Batch 0 2 7.117074025057361e+02 -3.227984571262278e+02 4.276971164854593e+02 -4.684055501468919e+02 3 1.264078228725325e+02 8.675876182178401e+01 5.074873328843479e+01 7.665781760618943e+01 4 6.618847746217315e+02 2.360396953044439e+02 -4.784458497738940e+02 3.917477325407025e+02 - ME 2.121249861094822e-04 + ME 8.653822330208906e-05 Event 96 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -772,7 +772,7 @@ Event 96 Batch 0 2 7.329769441659936e+02 -9.642859092211874e+01 6.903981466332597e+02 -2.265107649915406e+02 3 3.937873938465678e+02 -4.837693103302091e+01 -3.847118583018795e+02 6.873841850241256e+01 4 3.732356619874385e+02 1.448055219551397e+02 -3.056862883313802e+02 1.577723464891279e+02 - ME 3.473186069800973e-05 + ME 9.822975749896163e-06 Event 97 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -780,7 +780,7 @@ Event 97 Batch 0 2 3.394989963266853e+01 6.003767577498499e+00 -2.078495220615399e+01 2.616364312804199e+01 3 7.377311980366451e+02 -5.308290258162607e+02 4.681853362634530e+02 2.080152802450354e+02 4 7.283189023306861e+02 5.248252582387622e+02 -4.474003840572991e+02 -2.341789233730774e+02 - ME 2.063600678642283e-02 + ME 2.729355315721549e-03 Event 98 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -788,7 +788,7 @@ Event 98 Batch 0 2 2.496912687496082e+02 -2.485814905959506e+02 -5.435228288348340e-01 -2.350907922099247e+01 3 7.458289852530976e+02 7.373315781279124e+02 9.801365830907572e+01 -5.473885205171283e+01 4 5.044797459972945e+02 -4.887500875319618e+02 -9.747013548024091e+01 7.824793127270530e+01 - ME 6.800308216903296e-05 + ME 8.091578731489026e-06 Event 99 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -796,7 +796,7 @@ Event 99 Batch 0 2 1.698125854886770e+02 8.336002034290719e+01 8.774494220182726e+01 -1.191144253093525e+02 3 6.496622934125946e+02 5.714329899004554e+02 -6.230613627727958e+01 3.027265745152471e+02 4 6.805251210987285e+02 -6.547930102433627e+02 -2.543880592454771e+01 -1.836121492058947e+02 - ME 6.115029137493471e-04 + ME 1.856310681395454e-04 Event 100 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -804,7 +804,7 @@ Event 100 Batch 0 2 6.141460480129781e+02 -5.842473718080511e+02 -5.092222124447417e+01 1.823110095657221e+02 3 3.909476383151783e+02 2.539115798088024e+02 -2.930333502072385e+02 -5.000421191795168e+01 4 4.949063136718440e+02 3.303357919992488e+02 3.439555714517127e+02 -1.323067976477707e+02 - ME 1.550407956048336e-04 + ME 2.380755205932631e-05 Event 101 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -812,7 +812,7 @@ Event 101 Batch 0 2 7.469346538870473e+02 3.524232024688497e+02 -1.488240016505349e+02 -6.415299525912136e+02 3 6.502268999047169e+02 -2.777200960400715e+02 1.351761574712158e+02 5.721835160737410e+02 4 1.028384462082358e+02 -7.470310642877820e+01 1.364784417931910e+01 6.934643651747267e+01 - ME 1.080054053054822e-04 + ME 7.777208667430486e-05 Event 102 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -820,7 +820,7 @@ Event 102 Batch 0 2 7.426790432885583e+02 -3.141071077544728e+02 6.615000409077074e+02 1.238005738162371e+02 3 6.735764515788642e+01 -4.139700837311957e+00 -5.533298776898177e+01 -3.818606686673834e+01 4 6.899633115535552e+02 3.182468085917849e+02 -6.061670531387255e+02 -8.561450694949879e+01 - ME 6.292262541994918e-04 + ME 1.796768498680773e-04 Event 103 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -828,7 +828,7 @@ Event 103 Batch 0 2 4.837874798175253e+02 -2.731724972668680e+02 1.247027290420595e+02 -3.793103501549069e+02 3 4.466406321977809e+02 -2.904538080082218e+02 -1.536665846758871e+02 3.025078850172422e+02 4 5.695718879846930e+02 5.636263052750895e+02 2.896385563382777e+01 7.680246513766473e+01 - ME 8.140894767450013e-05 + ME 2.998858312831636e-05 Event 104 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -836,7 +836,7 @@ Event 104 Batch 0 2 5.788466572679498e+02 3.572346730226224e+02 -3.682137844992378e+02 2.680773207965347e+02 3 2.925711988065158e+02 2.155069407513812e+02 1.697995838195863e+02 -1.016010147279926e+02 4 6.285821439255348e+02 -5.727416137740034e+02 1.984142006796517e+02 -1.664763060685422e+02 - ME 2.849770726480251e-04 + ME 7.634200862908681e-05 Event 105 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -844,7 +844,7 @@ Event 105 Batch 0 2 3.361125455083114e+02 2.619004058447622e+02 4.338373361330959e+01 -2.061496357605196e+02 3 5.299016201311088e+02 2.892532450564946e+02 2.091058919093095e+02 3.916669672191841e+02 4 6.339858343605800e+02 -5.511536509012568e+02 -2.524896255226191e+02 -1.855173314586645e+02 - ME 2.866662317167052e-04 + ME 1.089382545947932e-04 Event 106 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -852,7 +852,7 @@ Event 106 Batch 0 2 3.578050478863485e+02 -2.265838270225943e+02 2.740910124726658e+02 -3.947579646386072e+01 3 5.202885196186892e+02 1.412729374205232e+02 1.631578432376887e+02 4.734148487210871e+02 4 6.219064324949621e+02 8.531088960207101e+01 -4.372488557103545e+02 -4.339390522572265e+02 - ME 1.912263829178338e-03 + ME 4.548955126640399e-04 Event 107 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -860,7 +860,7 @@ Event 107 Batch 0 2 5.409822745993889e+02 9.278463733038997e+01 5.102180459532771e+02 -1.540466750365499e+02 3 2.501852297905710e+02 1.682301834486207e+02 1.474652503315489e+02 1.120056004263085e+02 4 7.088324956100398e+02 -2.610148207790107e+02 -6.576832962848259e+02 4.204107461024153e+01 - ME 7.096163321035572e-04 + ME 2.159102073406285e-04 Event 108 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -868,7 +868,7 @@ Event 108 Batch 0 2 6.835202199428555e+02 6.670011709444186e+02 6.653656309718588e+01 1.337243986739828e+02 3 2.377887385005082e+02 -1.098327419601477e+02 7.667443498831059e+01 -1.964720946353502e+02 4 5.786910415566365e+02 -5.571684289842709e+02 -1.432109980854965e+02 6.274769596136723e+01 - ME 1.143500637563713e-04 + ME 2.960130886583330e-05 Event 109 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -876,7 +876,7 @@ Event 109 Batch 0 2 5.978180281189351e+02 4.291222314737005e+02 2.249703559956599e+02 3.501840146583366e+02 3 3.585061336071061e+02 -3.227227650115256e+02 1.541688059097761e+02 2.467071262824850e+01 4 5.436758382739589e+02 -1.063994664621746e+02 -3.791391619054360e+02 -3.748547272865851e+02 - ME 1.159187207430584e-03 + ME 1.100286424576873e-04 Event 110 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -884,7 +884,7 @@ Event 110 Batch 0 2 7.073952645543156e+01 -4.753982451958468e+01 4.872856968801237e+01 -1.922426029646691e+01 3 7.438039776014969e+02 1.707202332282495e+02 -7.225114374584515e+02 4.556513803361385e+01 4 6.854564959430718e+02 -1.231804087086648e+02 6.737828677704391e+02 -2.634087773714689e+01 - ME 5.177444310012934e-04 + ME 1.052942530962122e-04 Event 111 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -892,7 +892,7 @@ Event 111 Batch 0 2 5.206822291802364e+02 -3.873336848644893e+02 2.415505427333673e+02 -2.504714268307115e+02 3 5.478000561519707e+02 4.687653961676166e+02 -2.245690260344170e+02 -1.729527606656598e+02 4 4.315177146677929e+02 -8.143171130312743e+01 -1.698151669895031e+01 4.234241874963712e+02 - ME 1.041517236520828e-04 + ME 8.545692640795734e-05 Event 112 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -900,7 +900,7 @@ Event 112 Batch 0 2 3.610471238372959e+02 2.563298943277285e+02 9.635756626046441e+01 -2.352981732387216e+02 3 6.139063356201009e+02 1.031778254919422e+02 -4.257030126280926e+02 4.301305270271111e+02 4 5.250465405426031e+02 -3.595077198196707e+02 3.293454463676283e+02 -1.948323537883896e+02 - ME 2.333567140730066e-04 + ME 5.572029836371622e-05 Event 113 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -908,7 +908,7 @@ Event 113 Batch 0 2 5.886653054136124e+02 3.035646198144377e+02 3.278619896967805e+02 -3.832517176826292e+02 3 5.420023902452333e+02 -3.658357535838290e+02 -3.990519958595696e+02 2.623541560166928e+01 4 3.693323043411537e+02 6.227113376939163e+01 7.119000616278893e+01 3.570163020809600e+02 - ME 6.906402420910258e-05 + ME 4.986188449478774e-05 Event 114 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -916,7 +916,7 @@ Event 114 Batch 0 2 5.165204340356855e+02 2.346362244736889e+01 6.298471388966840e+00 5.159487827839334e+02 3 5.932916594323345e+02 3.608814360715946e+02 -5.336137507463695e+01 -4.678804824963537e+02 4 3.901879065319798e+02 -3.843450585189634e+02 4.706290368567026e+01 -4.806830028757967e+01 - ME 5.363382776736297e-04 + ME 4.029549711869195e-04 Event 115 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -924,7 +924,7 @@ Event 115 Batch 0 2 5.432307281524777e+02 2.250327918244370e+02 4.870559856477670e+02 -8.506664127290338e+01 3 4.265243530840496e+02 2.057819224248363e+02 -2.472237669715339e+02 2.801021835354204e+02 4 5.302449187634726e+02 -4.308147142492733e+02 -2.398322186762331e+02 -1.950355422625171e+02 - ME 2.364149932043149e-04 + ME 4.159321993514108e-05 Event 116 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -932,7 +932,7 @@ Event 116 Batch 0 2 4.402635748890415e+02 -4.240500842615081e+02 -5.733358735035193e+01 -1.035683405941509e+02 3 4.399967684638562e+02 1.183617589007452e+02 -1.041572505293867e+02 -4.107784286579766e+02 4 6.197396566471035e+02 3.056883253607625e+02 1.614908378797388e+02 5.143467692521278e+02 - ME 1.343295643586522e-04 + ME 4.172733678506819e-05 Event 117 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -940,7 +940,7 @@ Event 117 Batch 0 2 3.074085311587982e+02 -4.270248480828711e+01 -3.034838508096459e+02 2.395944736750828e+01 3 5.360984061023379e+02 3.510554986169303e+02 -1.596589010508530e+02 -3.723849798683070e+02 4 6.564930627388640e+02 -3.083530138086433e+02 4.631427518604987e+02 3.484255325007987e+02 - ME 1.795895763168496e-04 + ME 4.142391000026985e-05 Event 118 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -948,7 +948,7 @@ Event 118 Batch 0 2 5.403602961735903e+02 4.471526113902045e+02 -1.804334130868151e+02 -2.439007487679592e+02 3 5.654623567965698e+02 -5.534570111367966e+02 -1.157195831079003e+02 6.480112868522320e+00 4 3.941773470298406e+02 1.063043997465919e+02 2.961529961947150e+02 2.374206358994370e+02 - ME 3.055618730902428e-05 + ME 7.288650603673961e-06 Event 119 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -956,7 +956,7 @@ Event 119 Batch 0 2 8.009099446659010e+01 5.775399043490319e+01 -2.629604726664823e+01 4.886268393818209e+01 3 7.131140611332349e+02 2.472685400460709e+02 -2.870014097539109e+02 -6.041689532644716e+02 4 7.067949444001758e+02 -3.050225304809738e+02 3.132974570205592e+02 5.553062693262896e+02 - ME 6.861262467765907e-04 + ME 2.815424392761942e-04 Event 120 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -964,7 +964,7 @@ Event 120 Batch 0 2 5.007248873753321e+02 2.708997263130530e+02 -3.880896283797751e+02 1.634784128397387e+02 3 7.413897277398672e+02 -4.257033276374029e+02 5.921425482134987e+02 -1.334264135464211e+02 4 2.578853848848011e+02 1.548036013243502e+02 -2.040529198337238e+02 -3.005199929331748e+01 - ME 1.034513276694145e-04 + ME 6.003662532288496e-06 Event 121 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -972,7 +972,7 @@ Event 121 Batch 0 2 5.732265116821120e+02 -1.149395375629033e+02 4.260916136383032e+02 3.658189076403451e+02 3 4.323948798659248e+02 -2.148488009071912e+01 -4.178027098651986e+02 1.092914804138530e+02 4 4.943786084519640e+02 1.364244176536226e+02 -8.288903773105691e+00 -4.751103880541979e+02 - ME 8.074833733477824e-02 + ME 7.661241871407340e-04 Event 122 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -980,7 +980,7 @@ Event 122 Batch 0 2 3.423360304412701e+02 2.648046119434483e+02 2.369247279710451e+01 -2.156644197927059e+02 3 6.059487982275789e+02 2.457729689670163e+01 -4.569077875801422e+02 3.972469964635579e+02 4 5.517151713311508e+02 -2.893819088401499e+02 4.332153147830377e+02 -1.815825766708520e+02 - ME 2.180123533398812e-04 + ME 5.274300345459390e-05 Event 123 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -988,7 +988,7 @@ Event 123 Batch 0 2 1.430133297276668e+02 -4.205671322284506e+01 3.498095937953869e+01 1.321377229770999e+02 3 7.140350670908600e+02 -2.955397919833849e+01 -6.570980288365154e+02 -2.778395577453968e+02 4 6.429516031814733e+02 7.161069242118367e+01 6.221170694569771e+02 1.457018347682969e+02 - ME 5.626335206455025e-04 + ME 2.698780233597045e-04 Event 124 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -996,7 +996,7 @@ Event 124 Batch 0 2 6.053457283343441e+02 5.458657819531910e+02 -1.853964251366731e+01 -2.610177782464909e+02 3 7.499633671623128e+02 -6.784114238502394e+02 2.145325921506613e+01 3.189713933003628e+02 4 1.446909045033435e+02 1.325456418970486e+02 -2.913616701398675e+00 -5.795361505387172e+01 - ME 4.169465060943616e-04 + ME 2.629538535113942e-05 Event 125 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1004,7 +1004,7 @@ Event 125 Batch 0 2 6.695439244882118e+02 9.058534244088493e+01 6.586171675820721e+02 7.941529525294386e+01 3 9.341516463500346e+01 3.490868167113007e+01 5.232133368429144e+01 6.906703243419068e+01 4 7.370409108767834e+02 -1.254940241120154e+02 -7.109385012663632e+02 -1.484823276871337e+02 - ME 1.111472366347957e-02 + ME 4.436636984625360e-03 Event 126 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1012,7 +1012,7 @@ Event 126 Batch 0 2 6.465564354211967e+02 -2.094351601488127e+02 -1.930091683601272e+02 -5.804477571728034e+02 3 1.356182567235447e+02 -2.832094442380729e+01 9.735247446175231e+01 -9.007070211700794e+01 4 7.178253078552584e+02 2.377561045726200e+02 9.565669389837488e+01 6.705184592898115e+02 - ME 1.775660879411100e-03 + ME 1.230970446288030e-03 Event 127 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1020,7 +1020,7 @@ Event 127 Batch 0 2 4.508388003927651e+02 -3.846405138087858e+02 7.756355374444065e+01 2.220162025777267e+02 3 6.162879941073576e+02 2.174727303224461e+02 1.334711143222092e+02 -5.609830344035003e+02 4 4.328732054998774e+02 1.671677834863399e+02 -2.110346680666500e+02 3.389668318257735e+02 - ME 3.922171581774212e-05 + ME 2.127227557837123e-05 Event 128 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1028,7 +1028,7 @@ Event 128 Batch 0 2 7.468963146802857e+02 5.701805835528932e+02 -3.440982003215339e+02 -3.381488363986430e+02 3 1.196664332518719e+02 -9.337643239636876e+01 2.398139841985228e+01 7.089280393650260e+01 4 6.334372520678420e+02 -4.768041511565244e+02 3.201168019016817e+02 2.672560324621404e+02 - ME 2.053620454072734e-04 + ME 7.842790653965437e-05 Event 129 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1036,7 +1036,7 @@ Event 129 Batch 0 2 4.378966182438207e+02 -4.256397208622688e+02 4.624364030548149e+01 9.190104474357973e+01 3 7.127537996732577e+02 5.790589826349546e+02 -1.369827771626340e+02 -3.923574802896586e+02 4 3.493495820829217e+02 -1.534192617726859e+02 9.073913685715252e+01 3.004564355460789e+02 - ME 1.668072874757384e-05 + ME 1.046217618618756e-05 Event 130 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1044,7 +1044,7 @@ Event 130 Batch 0 2 6.322026526626455e+02 5.905875735566585e+02 -2.387291116192753e+01 -2.243136110600485e+02 3 5.268087771404591e+02 -3.287250458747471e+02 1.913681034684307e+02 3.644798771698754e+02 4 3.409885701968954e+02 -2.618625276819114e+02 -1.674951923065032e+02 -1.401662661098267e+02 - ME 2.766647151388132e-04 + ME 3.412796728096272e-05 Event 131 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1052,7 +1052,7 @@ Event 131 Batch 0 2 2.691964685177017e+02 -2.641651354044939e+02 4.065264362900757e+01 -3.210735842607325e+01 3 5.382709487855662e+02 -3.022535437819008e+02 -4.307865739991411e+02 1.131429946566680e+02 4 6.925325826967319e+02 5.664186791863947e+02 3.901339303701337e+02 -8.103563623059465e+01 - ME 5.354423766199649e-04 + ME 1.516502654737588e-04 Event 132 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1060,7 +1060,7 @@ Event 132 Batch 0 2 1.376388194981169e+02 -2.491804956023667e+01 3.114513197621116e+01 1.317327453336230e+02 3 7.332494677489981e+02 -3.054807357444667e+02 -6.882601889638243e+00 -6.665500220046781e+02 4 6.291117127528858e+02 3.303987853047034e+02 -2.426253008657308e+01 5.348172766710551e+02 - ME 3.625143788027957e-04 + ME 2.459616839911958e-04 Event 133 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1068,7 +1068,7 @@ Event 133 Batch 0 2 5.818916885738672e+02 -3.437736592641007e+02 -2.113522447259726e+02 -4.192228966514222e+02 3 7.075583625851592e+02 3.695171106849944e+02 9.875952986414086e+01 5.952667441040354e+02 4 2.105499488409736e+02 -2.574345142089370e+01 1.125927148618317e+02 -1.760438474526132e+02 - ME 6.644965721204062e-03 + ME 3.278402967978973e-04 Event 134 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1076,7 +1076,7 @@ Event 134 Batch 0 2 7.039051474789593e+02 -1.767404282002263e+02 5.832845063404937e+02 3.521710697233707e+02 3 6.740856043500099e+02 9.540039380435479e+01 -5.203258634262522e+02 -4.177932056695244e+02 4 1.220092481710302e+02 8.134003439587134e+01 -6.295864291424151e+01 6.562213594615410e+01 - ME 6.394436352069354e-05 + ME 3.621089826286842e-05 Event 135 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1084,7 +1084,7 @@ Event 135 Batch 0 2 7.491379873081086e+02 -6.603965492909807e+02 -9.243924572685610e+01 -3.413782470545817e+02 3 4.360367703469753e+02 3.763875731093294e+02 3.833030381995060e+01 2.167746473012021e+02 4 3.148252423449159e+02 2.840089761816513e+02 5.410894190690560e+01 1.246035997533796e+02 - ME 3.729096801849378e-05 + ME 1.170602675185252e-05 Event 136 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1092,7 +1092,7 @@ Event 136 Batch 0 2 6.907976432034611e+02 -8.965778913807024e+01 -5.375684903631193e+02 -4.244796613161184e+02 3 4.317447428217263e+02 2.541758793770707e+02 2.501815833403360e+02 2.433255445990286e+02 4 3.774576139748129e+02 -1.645180902390004e+02 2.873869070227833e+02 1.811541167170898e+02 - ME 3.295715598818487e-05 + ME 1.221598515374744e-05 Event 137 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1100,7 +1100,7 @@ Event 137 Batch 0 2 5.927917878715718e+02 -5.453882061843875e+02 -2.239274061847312e+02 6.172783069514800e+01 3 3.718333194205911e+02 2.859809174201715e+02 -2.363544177495510e+02 2.472896101988843e+01 4 5.353748927078371e+02 2.594072887642160e+02 4.602818239342820e+02 -8.645679171503701e+01 - ME 1.267334233155001e-04 + ME 2.222722395048600e-05 Event 138 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1108,7 +1108,7 @@ Event 138 Batch 0 2 1.164849493482387e+02 2.012854405109472e+01 -2.573298799707043e+01 -1.118096528381494e+02 3 7.481698498358139e+02 -1.044692284663333e+02 -4.003634472873074e+00 7.408294509656059e+02 4 6.353452008159477e+02 8.434068441523856e+01 2.973662246994375e+01 -6.290197981274564e+02 - ME 3.545594402685597e+00 + ME 1.183014588836486e-01 Event 139 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1116,7 +1116,7 @@ Event 139 Batch 0 2 3.415587822283577e+02 -2.468214832259765e+02 1.926082427237748e+02 1.365416492148350e+02 3 5.828887331044928e+02 -1.023403009989268e+02 -5.561813319045077e+02 1.412376154306548e+02 4 5.755524846671491e+02 3.491617842249035e+02 3.635730891807333e+02 -2.777792646454897e+02 - ME 4.142320485322521e-04 + ME 5.213154494000113e-05 Event 140 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1124,7 +1124,7 @@ Event 140 Batch 0 2 4.395392082109443e+02 -3.037880820376849e+02 -2.455930383243060e+02 -2.014735126343029e+02 3 4.709796125547878e+02 -2.826270024952004e+02 2.984919122515593e+02 2.298833426397907e+02 4 5.894811792342680e+02 5.864150845328855e+02 -5.289887392725340e+01 -2.840983000548780e+01 - ME 1.220048440917972e-04 + ME 2.990357782498624e-05 Event 141 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1132,7 +1132,7 @@ Event 141 Batch 0 2 3.025838986653694e+02 -2.680006525137058e+02 -6.218827689980458e+01 -1.259574698062632e+02 3 5.104624598690772e+02 -2.829910827131053e+02 4.173533268753467e+02 -7.939880721102661e+01 4 6.869536414655528e+02 5.509917352268112e+02 -3.551650499755422e+02 2.053562770172896e+02 - ME 3.735313583347012e-04 + ME 7.151804808113674e-05 Event 142 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1140,7 +1140,7 @@ Event 142 Batch 0 2 4.390011511178412e+02 -3.153925512561953e+02 3.992377088505197e+01 -3.027468279160259e+02 3 4.597282536099518e+02 2.984856708041211e+02 -2.221794712617382e+02 -2.699863960308454e+02 4 6.012705952722066e+02 1.690688045207421e+01 1.822557003766862e+02 5.727332239468712e+02 - ME 1.630913878361870e-04 + ME 8.945447985744934e-05 Event 143 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1148,7 +1148,7 @@ Event 143 Batch 0 2 7.103308443495001e+02 -3.626595603160224e+02 2.462759922459802e+02 5.589240443825270e+02 3 3.424564807343295e+02 4.507572778536915e+01 -2.357842367637252e+02 -2.442343416788665e+02 4 4.472126749161695e+02 3.175838325306533e+02 -1.049175548225529e+01 -3.146897027036604e+02 - ME 1.304325296055160e-03 + ME 1.789392510542836e-04 Event 144 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1156,7 +1156,7 @@ Event 144 Batch 0 2 6.893886390440568e+02 -2.470805413393656e+02 1.331686162420120e+02 6.296618309717105e+02 3 7.132719020730987e+02 2.482972988978650e+02 -2.304803220538649e+02 -6.276815106349294e+02 4 9.733945888284487e+01 -1.216757558499225e+00 9.731170581185302e+01 -1.980320336781234e+00 - ME 3.769348793094523e-04 + ME 1.486904409371019e-04 Event 145 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1164,7 +1164,7 @@ Event 145 Batch 0 2 3.784954309743686e+02 2.391836032855264e+02 1.115572896135236e+01 -2.931305935912622e+02 3 7.389406222827198e+02 -4.231861417520660e+02 1.513250860114713e+02 5.865555822189353e+02 4 3.825639467429113e+02 1.840025384665394e+02 -1.624808149728234e+02 -2.934249886276727e+02 - ME 2.193982780219728e-03 + ME 2.016505354100400e-04 Event 146 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1172,7 +1172,7 @@ Event 146 Batch 0 2 4.681255842987410e+02 -3.253195724522379e+01 1.754808059398437e+02 -4.327698247100133e+02 3 2.875849079819393e+02 2.091841587061404e+01 1.879781824316579e+02 -2.166372592748876e+02 4 7.442895077193195e+02 1.161354137460973e+01 -3.634589883715017e+02 6.494070839849006e+02 - ME 5.347932692815789e-02 + ME 1.210467216316050e-02 Event 147 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1180,7 +1180,7 @@ Event 147 Batch 0 2 2.442136391928777e+02 -1.784444843977844e+02 -1.666832492802189e+02 -3.816014311599316e+00 3 5.551361515401285e+02 1.378338123621512e+02 -5.199472642306259e+02 1.372327560591401e+02 4 7.006502092669938e+02 4.061067203563306e+01 6.866305135108448e+02 -1.334167417475408e+02 - ME 7.450632204513606e-04 + ME 2.360352365747709e-04 Event 148 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1188,7 +1188,7 @@ Event 148 Batch 0 2 4.547263863263726e+02 3.928375677411887e+02 5.145105706241225e+01 2.231759855356057e+02 3 7.397285466814292e+02 -5.611511356388266e+02 -1.533645573573770e+02 -4.569322031694095e+02 4 3.055450669921979e+02 1.683135678976379e+02 1.019135002949646e+02 2.337562176338038e+02 - ME 1.440225905683450e-05 + ME 6.307552439231181e-06 Event 149 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1196,7 +1196,7 @@ Event 149 Batch 0 2 2.343018799311635e+02 9.853424545130945e+01 1.924850318874441e+02 -9.021023174733594e+01 3 7.291173748950658e+02 3.429747374294529e+01 -5.990516617369192e+02 4.142136359886766e+02 4 5.365807451737705e+02 -1.328317191942547e+02 4.065666298494750e+02 -3.240034042413406e+02 - ME 8.405553848068603e-04 + ME 8.298171355094406e-05 Event 150 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1204,7 +1204,7 @@ Event 150 Batch 0 2 4.707648023587808e+02 -8.969278865174961e+01 -3.008719699078221e+02 3.507859183712497e+02 3 6.876639918976698e+02 3.906111988928598e+02 4.609284537794546e+02 -3.284046551871671e+02 4 3.415712057435500e+02 -3.009184102411105e+02 -1.600564838716325e+02 -2.238126318408256e+01 - ME 1.070125715137075e-04 + ME 1.887585788236135e-05 Event 151 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1212,7 +1212,7 @@ Event 151 Batch 0 2 6.503034458278056e+02 -1.575298496674962e+02 -3.658248853789647e+01 -6.298735108350154e+02 3 6.998690336552314e+02 1.302751858829802e+02 -1.019415103826456e+02 6.800389464387812e+02 4 1.498275205169629e+02 2.725466378451580e+01 1.385239989205421e+02 -5.016543560376590e+01 - ME 6.663776898009472e-04 + ME 4.060174493404880e-04 Event 152 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1220,7 +1220,7 @@ Event 152 Batch 0 2 7.401192382353395e+02 1.493701961830190e+02 6.288419447382046e+02 3.605867993093739e+02 3 7.332111095478891e+02 -1.230079111936445e+02 -6.287602831147091e+02 -3.565502647954901e+02 4 2.666965221677112e+01 -2.636228498937447e+01 -8.166162349550861e-02 -4.036534513883709e+00 - ME 8.446403371723604e-04 + ME 1.210964379505254e-04 Event 153 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1228,7 +1228,7 @@ Event 153 Batch 0 2 5.645797071775899e+02 7.941901905692946e+01 3.691428696980725e+02 -4.197337333594241e+02 3 6.079979027943974e+02 1.021455738177839e+02 -5.566920170809548e+02 2.220849604771994e+02 4 3.274223900280123e+02 -1.815645928747133e+02 1.875491473828823e+02 1.976487728822249e+02 - ME 2.846663840296023e-05 + ME 9.895323747190810e-06 Event 154 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1236,7 +1236,7 @@ Event 154 Batch 0 2 6.022174885419887e+02 -5.152457849782368e+02 -1.493252664732707e+02 -2.736597328082223e+02 3 3.617627670199851e+02 1.925398333816265e+02 -2.626238171638091e+02 1.575736108034646e+02 4 5.360197444380261e+02 3.227059515966102e+02 4.119490836370796e+02 1.160861220047577e+02 - ME 6.437319974597944e-05 + ME 1.660411512586943e-05 Event 155 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1244,7 +1244,7 @@ Event 155 Batch 0 2 6.202229507100907e+02 -2.107861924791831e+02 -3.212541876154504e+02 4.868690137883067e+02 3 2.943040328093193e+02 2.940980302320592e+02 1.073731199058907e+01 2.433613089266508e+00 4 5.854730164805898e+02 -8.331183775287627e+01 3.105168756248616e+02 -4.893026268775732e+02 - ME 5.904510654775639e-03 + ME 4.918845171174253e-04 Event 156 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1252,7 +1252,7 @@ Event 156 Batch 0 2 4.945486805149833e+02 4.540818864859257e+02 -1.431706201593249e+02 -1.337542944644701e+02 3 5.997303202813281e+02 -3.624214233270367e+02 -5.726286247273350e+01 4.743923835389624e+02 4 4.057209992036886e+02 -9.166046315888883e+01 2.004334826320584e+02 -3.406380890744924e+02 - ME 4.701306652347430e-03 + ME 1.986837824231628e-04 Event 157 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1260,7 +1260,7 @@ Event 157 Batch 0 2 4.617003083190191e+02 3.118400043328062e+02 3.404502064148864e+02 -4.079626411035589e+00 3 5.720097526413113e+02 -4.999240316044806e+01 -4.329264075474301e+02 -3.705005295422582e+02 4 4.662899390396696e+02 -2.618476011723578e+02 9.247620113254365e+01 3.745801559532937e+02 - ME 3.907978340087068e-05 + ME 1.403598809900552e-05 Event 158 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1268,7 +1268,7 @@ Event 158 Batch 0 2 6.784877363061535e+02 -5.707102180762959e+02 -3.102223423027389e+02 -1.959529373021938e+02 3 5.650909444059712e+02 5.525284805868615e+02 7.765167789879932e+01 8.950011457818250e+01 4 2.564213192878751e+02 1.818173748943443e+01 2.325706644039396e+02 1.064528227240114e+02 - ME 3.503179830087694e-05 + ME 8.470133063482862e-06 Event 159 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1276,7 +1276,7 @@ Event 159 Batch 0 2 5.369491563274252e+02 2.154713482252002e+02 -2.912667909729743e+02 3.962955349875316e+02 3 6.066564496499102e+02 -4.020061311781470e+01 5.572389608252350e+02 -2.364332868806716e+02 4 3.563943940226648e+02 -1.752707351073854e+02 -2.659721698522608e+02 -1.598622481068599e+02 - ME 3.198473025834927e-04 + ME 3.562393617300492e-05 Event 160 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1284,7 +1284,7 @@ Event 160 Batch 0 2 6.492474755438517e+02 3.490068395973682e+02 1.460348644657111e+02 -5.276270735801970e+02 3 2.857818814470013e+02 -2.550253586192556e+02 1.227259509083862e+02 3.964456076362119e+01 4 5.649706430091471e+02 -9.398148097811273e+01 -2.687608153740973e+02 4.879825128165764e+02 - ME 6.719464076924620e-05 + ME 3.516238941302227e-05 Event 161 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1292,7 +1292,7 @@ Event 161 Batch 0 2 6.770282049439580e+02 -2.863253153105184e+02 -4.911270786072976e+02 -3.676672364525180e+02 3 1.598243093356544e+02 -7.505362471426160e+01 1.299195075310522e+02 -5.506073768810752e+01 4 6.631474857203874e+02 3.613789400247800e+02 3.612075710762453e+02 4.227279741406256e+02 - ME 1.577168105051119e-04 + ME 5.970757951131334e-05 Event 162 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1300,7 +1300,7 @@ Event 162 Batch 0 2 5.178592782584632e+02 -3.271131571456631e+02 3.943743741889439e+02 -7.512700901574514e+01 3 3.730686930366258e+02 -2.885924195736573e+01 -1.360208443078026e+02 -3.461874113706257e+02 4 6.090720287049110e+02 3.559723991030290e+02 -2.583535298811414e+02 4.213144203863710e+02 - ME 1.031749267713353e-04 + ME 2.768303103320498e-05 Event 163 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1308,7 +1308,7 @@ Event 163 Batch 0 2 5.388642316037673e+02 3.152159924116781e+02 3.539969933522669e+01 -4.356149670486711e+02 3 5.364171791816749e+02 -5.299694218906361e+02 3.369785517714305e+01 7.576448071880543e+01 4 4.247185892145582e+02 2.147534294789580e+02 -6.909755451236977e+01 3.598504863298658e+02 - ME 3.508094027565679e-05 + ME 1.485600561394433e-05 Event 164 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1316,7 +1316,7 @@ Event 164 Batch 0 2 6.862697092177667e+02 4.132218376422068e+02 1.310202162324327e+02 -5.320221138485150e+02 3 4.476895523579005e+02 -2.769046850483522e+02 1.374187337517142e+02 3.238299280529301e+02 4 3.660407384243329e+02 -1.363171525938544e+02 -2.684389499841469e+02 2.081921857955847e+02 - ME 3.375894779915149e-05 + ME 1.755563256840939e-05 Event 165 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1324,7 +1324,7 @@ Event 165 Batch 0 2 2.382444910715278e+02 -2.158277263671036e+02 -9.471372817531817e+00 -1.004446273032522e+02 3 7.304591383576048e+02 4.619003715882296e+02 -1.223345688256177e+02 5.524969256086772e+02 4 5.312963705708673e+02 -2.460726452211260e+02 1.318059416431495e+02 -4.520522983054250e+02 - ME 6.966498968932957e-03 + ME 4.549138184301779e-04 Event 166 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1332,7 +1332,7 @@ Event 166 Batch 0 2 2.131352071380649e+02 -7.633553084455029e+01 -1.899581415396244e+02 5.929087379418958e+01 3 7.305557876753161e+02 8.980971292745940e+01 7.136333043711877e+02 1.279589045828712e+02 4 5.563090051866194e+02 -1.347418208290915e+01 -5.236751628315633e+02 -1.872497783770607e+02 - ME 3.314006956523505e-04 + ME 3.352199959657985e-05 Event 167 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1340,7 +1340,7 @@ Event 167 Batch 0 2 4.122964103002419e+02 -3.405127102276982e+02 6.366431608201744e+01 2.235761145061386e+02 3 4.697083356610920e+02 -2.521100678451879e+02 -2.856113063438232e+01 -3.952855880214881e+02 4 6.179952540386658e+02 5.926227780728861e+02 -3.510318544763516e+01 1.717094735153495e+02 - ME 1.146777177775239e-04 + ME 3.829535931496594e-05 Event 168 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1348,7 +1348,7 @@ Event 168 Batch 0 2 7.156643283953484e+02 -3.999734570317170e+02 4.816586825103861e+02 3.467009924560655e+02 3 6.192344221355605e+02 2.722545660880235e+02 -4.999454120042317e+02 -2.436869012025525e+02 4 1.651012494690919e+02 1.277188909436936e+02 1.828672949384504e+01 -1.030140912535133e+02 - ME 1.017624049822302e-03 + ME 5.027887292283473e-05 Event 169 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1356,7 +1356,7 @@ Event 169 Batch 0 2 3.626022684949455e+02 7.511110909567982e+01 -2.030941161665286e+02 -2.908461902563517e+02 3 5.580565590514408e+02 -2.529981754432838e+02 -3.439969378312538e+02 3.592842232626199e+02 4 5.793411724536141e+02 1.778870663476037e+02 5.470910539977822e+02 -6.843803300626824e+01 - ME 1.371698416063432e-04 + ME 4.350242525242475e-05 Event 170 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1364,7 +1364,7 @@ Event 170 Batch 0 2 6.602909342483501e+02 4.699653539595539e+02 -3.020118498241596e+02 3.520021683086903e+02 3 1.039297502933440e+02 3.247420585022842e+01 -9.851348423194945e+01 6.473976746580508e+00 4 7.357793154583061e+02 -5.024395598097824e+02 4.005253340561092e+02 -3.584761450552709e+02 - ME 1.673719496447659e-02 + ME 9.967260301798612e-03 Event 171 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1372,7 +1372,7 @@ Event 171 Batch 0 2 1.506693011949600e+02 -3.657300520509282e+01 -1.244227366169959e+02 -7.669834565089053e+01 3 6.344013325830570e+02 -2.026333084464634e+02 -4.956100871165362e+02 3.402578943089165e+02 4 7.149293662219835e+02 2.392063136515561e+02 6.200328237335323e+02 -2.635595486580261e+02 - ME 2.133207113512388e-03 + ME 9.157902172934166e-04 Event 172 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1380,7 +1380,7 @@ Event 172 Batch 0 2 5.352445157558213e+02 -2.018352690102651e+02 3.892440882325296e+02 -3.069825004886504e+02 3 6.716112180685394e+02 2.825227203806547e+02 -5.978593235713698e+02 1.175022124175027e+02 4 2.931442661756383e+02 -8.068745137038898e+01 2.086152353388391e+02 1.894802880711483e+02 - ME 2.630379932615259e-05 + ME 8.067092159940342e-06 Event 173 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1388,7 +1388,7 @@ Event 173 Batch 0 2 6.571348515648592e+02 -2.769863586381786e+02 5.805753619381593e+02 1.343019708712704e+02 3 5.332990408103321e+02 1.871824832342877e+02 -4.782426732337677e+02 1.437168410371092e+02 4 3.095661076248081e+02 8.980387540389081e+01 -1.023326887043915e+02 -2.780188119083794e+02 - ME 9.985413945498126e-03 + ME 1.269359653092767e-04 Event 174 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1396,7 +1396,7 @@ Event 174 Batch 0 2 6.091496911716730e+02 -4.752584064243671e+02 3.135726231883978e+01 -3.797492797588730e+02 3 6.417481529658018e+02 3.309293137608124e+02 9.015643604119191e+01 5.424004960996682e+02 4 2.491021558625255e+02 1.443290926635548e+02 -1.215136983600317e+02 -1.626512163407953e+02 - ME 1.319192968737130e-03 + ME 1.362612102685676e-04 Event 175 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1404,7 +1404,7 @@ Event 175 Batch 0 2 5.399801778396885e+02 1.966672297646830e+02 2.343185748302537e+02 -4.449667388535759e+02 3 6.987953575798327e+02 -1.857207036318898e+02 -9.664246188148675e+01 6.666955876403318e+02 4 2.612244645804785e+02 -1.094652613279307e+01 -1.376761129487668e+02 -2.217288487867561e+02 - ME 9.528877211334405e-03 + ME 9.613528518728674e-04 Event 176 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1412,7 +1412,7 @@ Event 176 Batch 0 2 6.615757321243968e+02 -4.129469954321281e+02 4.686878756164518e+02 -2.179194886871010e+02 3 1.607981401590110e+02 -6.355407199259605e+01 7.929314438200207e+00 1.474925346731048e+02 4 6.776261277165921e+02 4.765010674247242e+02 -4.766171900546519e+02 7.042695401399614e+01 - ME 6.965204353376922e-04 + ME 3.097907077728356e-04 Event 177 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1420,7 +1420,7 @@ Event 177 Batch 0 2 4.314334067424883e+02 -3.493619040652741e+02 -2.026482683689240e+01 -2.523299055494341e+02 3 4.840006500668400e+02 -1.846595828310067e+02 -1.450727057198388e+02 4.232155216776995e+02 4 5.845659431906716e+02 5.340214868962809e+02 1.653375325567312e+02 -1.708856161282654e+02 - ME 2.160100049311594e-04 + ME 1.084300812640113e-04 Event 178 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1428,7 +1428,7 @@ Event 178 Batch 0 2 4.528135981327372e+02 -2.544528544607913e+02 1.436928116455424e+02 3.458992272209776e+02 3 3.053350882587867e+02 -1.380299578048218e+02 2.072032295570572e+02 1.767599177741536e+02 4 7.418513136084770e+02 3.924828122656132e+02 -3.508960412025996e+02 -5.226591449951313e+02 - ME 7.384409254828141e-02 + ME 5.382438151181503e-02 Event 179 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1436,7 +1436,7 @@ Event 179 Batch 0 2 7.433145319259943e+02 -2.538538580850882e+02 -6.778753511348521e+02 -1.689962142519080e+02 3 1.647945947160298e+02 1.009041857568576e+02 1.171651165877689e+02 5.699069397138987e+01 4 5.918908733579761e+02 1.529496723282306e+02 5.607102345470832e+02 1.120055202805181e+02 - ME 1.335347052581446e-04 + ME 3.739915465576335e-05 Event 180 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1444,7 +1444,7 @@ Event 180 Batch 0 2 2.396120216689867e+02 1.204528233788652e+02 -1.081248155319049e+02 1.766750195544080e+02 3 5.541470271917004e+02 2.767127195685322e+02 2.999096875483201e+02 3.749175614572557e+02 4 7.062409511393131e+02 -3.971655429473975e+02 -1.917848720164151e+02 -5.515925810116636e+02 - ME 1.316593054412419e-02 + ME 2.792447184071457e-03 Event 181 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1452,7 +1452,7 @@ Event 181 Batch 0 2 2.165494222755782e+02 1.336973493521793e+02 -1.495065670853883e+02 -8.164837697364385e+01 3 6.960869932595207e+02 -2.848973600545249e+02 2.209041937252092e+01 6.347303441548928e+02 4 5.873635844649011e+02 1.512000107023455e+02 1.274161477128675e+02 -5.530819671812490e+02 - ME 6.164296623062663e-02 + ME 3.488874737600980e-03 Event 182 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1460,7 +1460,7 @@ Event 182 Batch 0 2 6.472681881349898e+02 4.279258056181361e+02 3.994050733201775e+02 -2.762448183472868e+02 3 5.337197582091030e+02 -3.479343829022644e+02 -4.034091782989213e+02 -3.254965992745409e+01 4 3.190120536559070e+02 -7.999142271587166e+01 4.004104978744005e+00 3.087944782747408e+02 - ME 6.393158381765308e-05 + ME 5.523679400573375e-05 Event 183 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1468,7 +1468,7 @@ Event 183 Batch 0 2 6.165307808531154e+02 -3.276949594572818e+02 8.808524820164887e+01 -5.147496540405800e+02 3 2.975460412740734e+02 -1.030095950018341e+02 -2.375020297789284e+02 1.466814775843215e+02 4 5.859231778728107e+02 4.307045544591158e+02 1.494167815772794e+02 3.680681764562588e+02 - ME 6.887775529805495e-05 + ME 2.562496117427957e-05 Event 184 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1476,7 +1476,7 @@ Event 184 Batch 0 2 5.645337360463252e+02 -3.940276919793660e+02 3.776398996283964e+02 1.443212503288767e+02 3 5.368100353438223e+02 2.392766596964613e+02 -1.719264331693737e+02 -4.487237410122139e+02 4 3.986562286098531e+02 1.547510322829050e+02 -2.057134664590229e+02 3.044024906833372e+02 - ME 3.553984578535888e-05 + ME 1.712138666139329e-05 Event 185 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1484,7 +1484,7 @@ Event 185 Batch 0 2 6.347397779710931e+02 2.522092504724420e+02 -1.599825720327363e+02 5.600809373302327e+02 3 4.566768168089404e+02 -3.359958684022406e+02 -1.272903681003782e+02 -2.818823400219340e+02 4 4.085834052199659e+02 8.378661792979838e+01 2.872729401331145e+02 -2.781985973082986e+02 - ME 1.184197550833168e-03 + ME 1.836859309200860e-04 Event 186 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1492,7 +1492,7 @@ Event 186 Batch 0 2 7.089823220133230e+02 -5.197119220861886e+02 4.248734840868308e+02 -2.281183322067745e+02 3 5.364076825758043e+02 3.588264146200084e+02 -3.973752875032956e+02 3.270606945152315e+01 4 2.546099954108725e+02 1.608855074661802e+02 -2.749819658353518e+01 1.954122627552515e+02 - ME 2.583895514537347e-05 + ME 1.318469173008218e-05 Event 187 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1500,7 +1500,7 @@ Event 187 Batch 0 2 4.835105223217566e+02 -2.128653471696258e+02 1.375287019182911e+02 -4.117725407538514e+02 3 7.240136612790383e+02 4.407273454759851e+02 -4.896543389042274e+01 5.723264583716990e+02 4 2.924758163992057e+02 -2.278619983063593e+02 -8.856326802786833e+01 -1.605539176178473e+02 - ME 5.307563978210835e-04 + ME 9.185777086042985e-05 Event 188 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1508,7 +1508,7 @@ Event 188 Batch 0 2 6.611118500396009e+02 3.502021063704277e+02 -2.011693879247277e+02 -5.234102027267809e+02 3 3.072944371702247e+02 -6.894916504330918e+01 -1.599953986835475e+02 2.531350551695447e+02 4 5.315937127901742e+02 -2.812529413271184e+02 3.611647866082752e+02 2.702751475572362e+02 - ME 6.863567490702385e-05 + ME 3.862980709292737e-05 Event 189 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1516,7 +1516,7 @@ Event 189 Batch 0 2 7.498478362545707e+02 6.780504955298834e+02 -3.199144947524264e+02 -1.319162971889924e+01 3 3.253008430749361e+02 -2.985087551774363e+02 1.291384938207140e+02 6.034152914782593e+00 4 4.248513206704935e+02 -3.795417403524470e+02 1.907760009317124e+02 7.157476804116639e+00 - ME 8.583750584152986e-05 + ME 1.504471760657040e-05 Event 190 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1524,7 +1524,7 @@ Event 190 Batch 0 2 4.938867893347995e+02 3.689671478502748e+02 -1.218724623869293e+02 3.048516153777389e+02 3 5.264063001598521e+02 6.631942569346465e+01 1.276367949726208e+02 -5.063735530147588e+02 4 4.797069105053494e+02 -4.352865735437401e+02 -5.764332585691415e+00 2.015219376370201e+02 - ME 4.759343488474735e-05 + ME 2.269926034328256e-05 Event 191 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1532,7 +1532,7 @@ Event 191 Batch 0 2 3.681793141805986e+02 -3.225132888415706e+02 1.579589482507471e+02 -8.117977937027918e+01 3 5.431126642386394e+02 4.058413736814005e+01 9.147123993851424e+01 5.338139246166097e+02 4 5.887080215807621e+02 2.819291514734305e+02 -2.494301881892614e+02 -4.526341452463304e+02 - ME 4.908990110546420e-03 + ME 1.427494731558637e-03 Event 192 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1540,7 +1540,7 @@ Event 192 Batch 0 2 6.054165399887861e+02 1.497087111729466e+02 8.905021611535379e+01 5.798159601983524e+02 3 2.106656439489222e+02 1.451894976721945e+02 -1.487249448604451e+02 3.436443048222171e+01 4 6.839178160622922e+02 -2.948982088451411e+02 5.967472874509133e+01 -6.141803906805740e+02 - ME 4.294450320853435e-02 + ME 6.984876913518998e-03 Event 193 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1548,7 +1548,7 @@ Event 193 Batch 0 2 2.753169163933055e+02 -1.695475157411122e+02 -2.139406274107579e+02 3.581134319495643e+01 3 5.760219428901971e+02 -3.264616044953138e+02 1.527507522369444e+02 -4.493231656306969e+02 4 6.486611407164972e+02 4.960091202364260e+02 6.118987517381347e+01 4.135118224357404e+02 - ME 1.537583375796735e-04 + ME 4.273063058931925e-05 Event 194 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1556,7 +1556,7 @@ Event 194 Batch 0 2 3.445934948105150e+02 -2.970257025567896e+02 -8.183019525038441e+01 1.543509890854414e+02 3 7.485441862377920e+02 6.623797851941252e+02 1.083400559332054e+02 -3.314119056355291e+02 4 4.068623189516925e+02 -3.653540826373358e+02 -2.650986068282081e+01 1.770609165500877e+02 - ME 3.024610065690235e-05 + ME 4.921158833271929e-06 Event 195 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1564,7 +1564,7 @@ Event 195 Batch 0 2 2.012122274303647e+02 -5.190018365965096e+01 1.322177369426910e+02 -1.425173724194237e+02 3 7.122630330184543e+02 -3.054768058087834e+02 -2.528097616133813e+02 5.916838461125119e+02 4 5.865247395511832e+02 3.573769894684365e+02 1.205920246706904e+02 -4.491664736930883e+02 - ME 3.011639483286710e-03 + ME 4.696445912229638e-04 Event 196 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1572,7 +1572,7 @@ Event 196 Batch 0 2 4.490485793345989e+02 3.485190427929747e+02 -2.661098616642627e+01 -2.819059396826192e+02 3 5.531554978829222e+02 -3.330165694254377e+02 4.416170126965178e+02 7.442003978758296e+00 4 4.977959227824785e+02 -1.550247336753688e+01 -4.150060265300915e+02 2.744639357038610e+02 - ME 4.340266456570635e-05 + ME 9.363355109875406e-06 Event 197 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1580,7 +1580,7 @@ Event 197 Batch 0 2 3.951249254444253e+02 -2.278358800090239e+02 3.101157211704546e+02 -8.968142489336992e+01 3 3.607080640108546e+02 -2.889948719219027e+02 2.155030307719242e+02 -1.227661082778765e+01 4 7.441670105447209e+02 5.168307519309257e+02 -5.256187519423792e+02 1.019580357211576e+02 - ME 3.377741088449004e-02 + ME 6.597373610109231e-03 Event 198 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1588,7 +1588,7 @@ Event 198 Batch 0 2 3.750236904637998e+02 1.183014344420310e+02 -1.005952209347265e+02 -3.413621838211424e+02 3 4.381296266085964e+02 -2.726825461625328e+02 1.003845461170281e+02 -3.279096546785175e+02 4 6.868466829276033e+02 1.543811117205018e+02 2.106748176980602e-01 6.692718384996598e+02 - ME 9.606390506705955e-04 + ME 6.145502577419889e-04 Event 199 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1596,7 +1596,7 @@ Event 199 Batch 0 2 2.454478562244572e+02 -2.058455361543722e+02 -1.131056012155068e+02 -7.126982772660261e+01 3 5.321797086694488e+02 -9.806778012582416e+01 -4.820333037417012e+02 -2.030808875905193e+02 4 7.223724351060940e+02 3.039133162801963e+02 5.951389049572081e+02 2.743507153171219e+02 - ME 1.577081887352965e-03 + ME 3.088173795554332e-04 Event 200 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1604,7 +1604,7 @@ Event 200 Batch 0 2 3.952431318363244e+02 3.031309873729303e+02 9.337877017948550e+01 2.358159092128122e+02 3 6.094031244332663e+02 -7.796753338981905e+01 -5.315426896439308e+02 -2.876727322709444e+02 4 4.953537437304092e+02 -2.251634539831113e+02 4.381639194644453e+02 5.185682305813224e+01 - ME 6.703240553489506e-05 + ME 1.668296552597111e-05 Event 201 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1612,7 +1612,7 @@ Event 201 Batch 0 2 6.497938633639732e+02 3.771120671245744e+02 3.553445817627057e+02 -3.921081252746440e+02 3 3.369790646193914e+02 -2.140351778515325e+02 1.061239955238163e+02 2.376584318047305e+02 4 5.132270720166357e+02 -1.630768892730420e+02 -4.614685772865220e+02 1.544496934699135e+02 - ME 6.283412004793947e-05 + ME 2.404518058628388e-05 Event 202 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1620,7 +1620,7 @@ Event 202 Batch 0 2 7.267802742470179e+02 6.523432021666289e+02 -1.481957728499301e+02 2.840702844913056e+02 3 3.546086620137576e+02 -3.102429173963679e+02 -5.939291787501398e+01 -1.611493614224694e+02 4 4.186110637392242e+02 -3.421002847702610e+02 2.075886907249440e+02 -1.229209230688360e+02 - ME 1.894138330341389e-04 + ME 2.830403199974809e-05 Event 203 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1628,7 +1628,7 @@ Event 203 Batch 0 2 4.830190702985662e+02 2.789429895135886e+02 -3.943102945050296e+02 -4.197918611657844e+00 3 5.247163710833165e+02 -4.266462829986153e+02 3.263988520595893e+01 3.037019215942698e+02 4 4.922645586181170e+02 1.477032934850268e+02 3.616704092990706e+02 -2.995040029826120e+02 - ME 5.831910678002871e-04 + ME 5.153190919865371e-05 Event 204 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1636,7 +1636,7 @@ Event 204 Batch 0 2 6.952375769935185e+02 3.823764713153302e+01 6.531840992713522e+02 -2.350397908115460e+02 3 6.250862947179036e+02 1.031861473443961e+02 -5.506835576815644e+02 2.771878679515999e+02 4 1.796761282885781e+02 -1.414237944759291e+02 -1.025005415897879e+02 -4.214807714005369e+01 - ME 1.802858800889920e-04 + ME 1.903000177287069e-05 Event 205 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1644,7 +1644,7 @@ Event 205 Batch 0 2 5.625197268936781e+02 2.955060596751036e+02 4.395356105446072e+02 -1.895074112086703e+02 3 3.144813194259642e+02 -1.941101430078122e+02 -7.073026664887073e+00 -2.473251401357733e+02 4 6.229989536803572e+02 -1.013959166672914e+02 -4.324625838797200e+02 4.368325513444433e+02 - ME 1.140145509231641e-04 + ME 3.163472493443465e-05 Event 206 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1652,7 +1652,7 @@ Event 206 Batch 0 2 5.487698581700869e+02 -4.771827558939671e+02 -2.639484985605369e+02 6.145050708573941e+01 3 4.357856725513919e+02 1.877155863290790e+02 1.701172104948722e+02 3.545872893148349e+02 4 5.154444692785200e+02 2.894671695648880e+02 9.383128806566407e+01 -4.160377964005746e+02 - ME 4.167786087259531e-03 + ME 3.341888001113221e-04 Event 207 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1660,7 +1660,7 @@ Event 207 Batch 0 2 5.289473514933904e+02 -3.230637718239221e+02 -3.258094337294262e+02 2.631792409740627e+02 3 3.730441408755686e+02 -1.145152671243400e+02 -7.298530142052728e+01 -3.474497523579300e+02 4 5.980085076310412e+02 4.375790389482623e+02 3.987947351499535e+02 8.427051138386733e+01 - ME 1.161501350367753e-04 + ME 3.789028948405571e-05 Event 208 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1668,7 +1668,7 @@ Event 208 Batch 0 2 3.144460531270953e+02 3.105028133645123e+02 -3.495125011961062e+01 3.525242310830974e+01 3 7.230517599976935e+02 -6.554206809343713e+02 2.220922910679198e+02 2.095294558946058e+02 4 4.625021868752117e+02 3.449178675698588e+02 -1.871410409483092e+02 -2.447818790029155e+02 - ME 4.858457850437588e-04 + ME 2.941989209837521e-05 Event 209 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1676,7 +1676,7 @@ Event 209 Batch 0 2 2.827014058170527e+02 -6.682954863774688e+01 -1.958656753088385e+02 -1.925890275057887e+02 3 5.969812148172332e+02 5.625717004655273e+02 1.060136244597389e+02 -1.692949027847388e+02 4 6.203173793657136e+02 -4.957421518277804e+02 8.985205084909943e+01 3.618839302905275e+02 - ME 1.004351001266980e-04 + ME 2.261939336541961e-05 Event 210 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1684,7 +1684,7 @@ Event 210 Batch 0 2 3.369223392964550e+02 -2.366581006943837e+02 8.850719545688517e+01 -2.228813191927023e+02 3 6.926279093100447e+02 9.835546321295956e+01 -1.581805884470998e+02 6.671120783270956e+02 4 4.704497513935005e+02 1.383026374814242e+02 6.967339299021461e+01 -4.442307591343933e+02 - ME 5.974710408786874e-02 + ME 3.044010300440331e-03 Event 211 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1692,7 +1692,7 @@ Event 211 Batch 0 2 5.754314663824422e+02 -1.965408456680789e+02 -5.399725108422632e+02 3.037689947684008e+01 3 6.656941886103589e+02 4.112771407945243e+02 5.114655840792436e+02 1.113679599883347e+02 4 2.588743450071987e+02 -2.147362951264454e+02 2.850692676301957e+01 -1.417448594651748e+02 - ME 4.382347812376007e-04 + ME 1.754510489093768e-05 Event 212 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1700,7 +1700,7 @@ Event 212 Batch 0 2 5.922157374848572e+02 8.073316194509509e+00 4.947261155542873e+02 -3.254233732830556e+02 3 3.635572903001510e+02 8.951663862813328e+01 4.011175755255380e+01 3.500738802669425e+02 4 5.442269722149914e+02 -9.758995482264278e+01 -5.348378731068407e+02 -2.465050698388706e+01 - ME 3.041427876287276e-04 + ME 1.919214373141161e-04 Event 213 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1708,7 +1708,7 @@ Event 213 Batch 0 2 7.434820262506830e+02 2.991548764052629e+02 2.111623598614188e+02 -6.470566753063675e+02 3 5.607612173038236e+02 -2.664197873565705e+02 -1.905271140771768e+02 4.551626726109781e+02 4 1.957567564454930e+02 -3.273508904869271e+01 -2.063524578424195e+01 1.918940026953895e+02 - ME 1.827786070323022e-04 + ME 1.896082550340891e-04 Event 214 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1716,7 +1716,7 @@ Event 214 Batch 0 2 5.400874280734793e+02 3.457358963402696e+02 2.445843697627679e+02 -3.351710101016577e+02 3 3.400793067879315e+02 1.482066942304564e+02 1.256466447865830e+02 2.791086371729012e+02 4 6.198332651385892e+02 -4.939425905707261e+02 -3.702310145493508e+02 5.606237292875651e+01 - ME 1.356968066378560e-04 + ME 6.515553919952984e-05 Event 215 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1724,7 +1724,7 @@ Event 215 Batch 0 2 3.916345321859864e+02 3.271767110560381e+02 -1.945589530122144e+02 9.208594000107233e+01 3 6.136750729169615e+02 -1.269585669220027e+02 2.644680756040779e+02 -5.390132228350478e+02 4 4.946903948970534e+02 -2.002181441340350e+02 -6.990912259186331e+01 4.469272828339764e+02 - ME 6.207321332343461e-05 + ME 3.427926940877871e-05 Event 216 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1732,7 +1732,7 @@ Event 216 Batch 0 2 3.767411090262154e+02 1.602503356822860e+02 2.758455349572533e+02 -2.004069210086422e+02 3 4.061922956351256e+02 3.340053729931861e+02 2.237650079776778e+02 5.798114391563544e+01 4 7.170665953386593e+02 -4.942557086754721e+02 -4.996105429349309e+02 1.424257770930068e+02 - ME 1.232271832865728e-03 + ME 2.360785017217177e-04 Event 217 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1740,7 +1740,7 @@ Event 217 Batch 0 2 6.474118977458852e+02 -5.378641111590873e+02 -3.279650037002520e+02 1.492759847325320e+02 3 5.088298200539713e+02 3.261878344469131e+02 1.555821256186315e+02 -3.581947579501665e+02 4 3.437582822001433e+02 2.116762767121744e+02 1.723828780816206e+02 2.089187732176345e+02 - ME 3.357118960820415e-05 + ME 1.388331578224744e-05 Event 218 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1748,7 +1748,7 @@ Event 218 Batch 0 2 6.658501161076259e+02 -6.577627036244854e+02 -3.020200479570956e+01 9.895676706252418e+01 3 2.516345839620714e+02 1.565221509782131e+02 -1.156477271957936e+02 1.595192254662914e+02 4 5.825152999303023e+02 5.012405526462722e+02 1.458497319915031e+02 -2.584759925288157e+02 - ME 5.956187308313417e-04 + ME 1.036808356896783e-04 Event 219 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1756,7 +1756,7 @@ Event 219 Batch 0 2 4.328556070633435e+02 6.122246558068494e+01 -1.687441385117925e+02 3.938796795879554e+02 3 6.500677455605621e+02 -3.703058656885360e+02 4.356876543064814e+02 -3.092537914719426e+02 4 4.170766473760945e+02 3.090834001078509e+02 -2.669435157946888e+02 -8.462588811601287e+01 - ME 2.797067114354785e-04 + ME 9.046106878448173e-05 Event 220 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1764,7 +1764,7 @@ Event 220 Batch 0 2 3.686297280598666e+02 -3.497113779929074e+02 -8.765282776369953e+01 7.685577594963354e+01 3 4.155522773953191e+02 -1.777404948015450e+02 -1.525848366500187e+02 3.432344379292750e+02 4 7.158179945448145e+02 5.274518727944524e+02 2.402376644137182e+02 -4.200902138789084e+02 - ME 3.485410710153060e-03 + ME 1.676729229638681e-03 Event 221 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1772,7 +1772,7 @@ Event 221 Batch 0 2 5.295220830718469e+02 3.654688468413813e+01 4.204675060608333e+02 3.197890523886257e+02 3 7.127556392876786e+02 -1.727486268095863e+02 -4.342549693537605e+02 -5.381460163035255e+02 4 2.577222776404743e+02 1.362017421254481e+02 1.378746329292729e+01 2.183569639148998e+02 - ME 2.819264207321091e-05 + ME 2.031931825964470e-05 Event 222 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1780,7 +1780,7 @@ Event 222 Batch 0 2 2.464305981122427e+02 -2.054199106396077e+02 6.127423271580306e+01 1.215572638876956e+02 3 6.926647117218595e+02 4.702892479611936e+02 3.872350261814336e+02 -3.296383785530530e+02 4 5.609046901658980e+02 -2.648693373215859e+02 -4.485092588972366e+02 2.080811146653574e+02 - ME 6.319142394583372e-05 + ME 1.678695785515194e-05 Event 223 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1788,7 +1788,7 @@ Event 223 Batch 0 2 2.463384302181125e+02 -1.209251938955738e+02 -2.140981972257043e+02 -1.488897673935926e+01 3 6.819620845265065e+02 -2.400891875757811e+02 5.819023806457059e+02 2.623339210620683e+02 4 5.716994852553812e+02 3.610143814713547e+02 -3.678041834200016e+02 -2.474449443227091e+02 - ME 3.931927185620913e-04 + ME 4.810915220985587e-05 Event 224 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1796,7 +1796,7 @@ Event 224 Batch 0 2 2.236851263016067e+02 -8.671871524968952e+01 1.717231909970332e+02 1.141317038679677e+02 3 5.308972974363861e+02 -3.715833295102001e+01 4.680039348616383e+02 2.478780257941054e+02 4 7.454175762620068e+02 1.238770482007099e+02 -6.397271258586715e+02 -3.620097296620728e+02 - ME 8.708656265179471e-02 + ME 6.017706528853119e-02 Event 225 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1804,7 +1804,7 @@ Event 225 Batch 0 2 5.094176014319268e+02 1.569347096242780e+02 -1.561291130928888e+00 -4.846394040251013e+02 3 7.252311334449815e+02 -3.845161955462210e+02 -4.374219820797174e+01 6.133466494377277e+02 4 2.653512651230916e+02 2.275814859219426e+02 4.530348933890067e+01 -1.287072454126262e+02 - ME 3.974215742688118e-04 + ME 1.151501859389029e-04 Event 226 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1812,7 +1812,7 @@ Event 226 Batch 0 2 6.863217264048350e+02 -2.391756120967483e+02 -6.171186323675804e+02 1.816511279850093e+02 3 5.332348374442744e+02 1.096335504493486e+02 4.112484130583279e+02 -3.212391931833643e+02 4 2.804434361508906e+02 1.295420616473995e+02 2.058702193092524e+02 1.395880651983551e+02 - ME 3.797053871351767e-05 + ME 1.438206074993319e-05 Event 227 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1820,7 +1820,7 @@ Event 227 Batch 0 2 7.243206345463230e+02 -5.280189925476210e+02 -1.406011303275692e+02 4.754657162080069e+02 3 5.487499634657129e+02 3.840442912861271e+02 -1.353123555187442e+01 -3.917312987222202e+02 4 2.269294019879644e+02 1.439747012614939e+02 1.541323658794436e+02 -8.373441748578679e+01 - ME 2.903986554770466e-04 + ME 5.165623507180856e-05 Event 228 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1828,7 +1828,7 @@ Event 228 Batch 0 2 2.119578664379945e+02 1.625437651479949e+01 -1.806612394559917e+02 1.096514885776142e+02 3 6.254097456672617e+02 -3.200704000326812e+01 3.158243706171928e+02 5.388579277416935e+02 4 6.626323878947439e+02 1.575266348846865e+01 -1.351631311612011e+02 -6.485094163193077e+02 - ME 8.951233069377997e-01 + ME 3.800526374221887e-02 Event 229 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1836,7 +1836,7 @@ Event 229 Batch 0 2 5.921227120343664e+02 -3.877491982207575e+02 4.449193714386763e+02 -4.802726626309342e+01 3 4.688278331283221e+02 3.470549659129084e+02 -1.517581364471262e+02 -2.762641051115459e+02 4 4.390494548373113e+02 4.069423230784909e+01 -2.931612349915501e+02 3.242913713746393e+02 - ME 3.492131538818778e-05 + ME 1.250052930035257e-05 Event 230 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1844,7 +1844,7 @@ Event 230 Batch 0 2 4.261952284727868e+02 2.153699775439378e+02 -1.171086083390750e+02 3.486312082969335e+02 3 3.540619701921573e+02 3.070144260847319e+01 1.307424531367546e+02 3.276029778648147e+02 4 7.197428013350559e+02 -2.460714201524109e+02 -1.363384479767965e+01 -6.762341861617483e+02 - ME 3.186738302883428e-01 + ME 4.711214236813061e-02 Event 231 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1852,7 +1852,7 @@ Event 231 Batch 0 2 4.205236024420392e+02 7.533931576750228e+01 -3.260217181731272e+02 -2.547036061581322e+02 3 5.397543491930860e+02 8.423195081267914e+01 -1.158376015978276e+02 5.204050211049134e+02 4 5.397220483648740e+02 -1.595712665801811e+02 4.418593197709548e+02 -2.657014149467809e+02 - ME 5.532186388062512e-04 + ME 3.265984123744224e-04 Event 232 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1860,7 +1860,7 @@ Event 232 Batch 0 2 4.295782852421121e+02 3.239064445356881e+02 9.240815775655221e-01 2.821724019337124e+02 3 7.183371274312143e+02 -6.155391061575082e+02 -1.955291718271078e+02 -3.144649112405858e+02 4 3.520845873266736e+02 2.916326616218201e+02 1.946050902495422e+02 3.229250930687335e+01 - ME 6.730603828970119e-05 + ME 1.049779024540051e-05 Event 233 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1868,7 +1868,7 @@ Event 233 Batch 0 2 3.640046126075324e+02 -2.220120664068515e+02 -1.165482463207536e+02 2.638683509799470e+02 3 4.682121509308883e+02 -1.009786196736112e+02 3.762431872847591e+02 2.597441061312976e+02 4 6.677832364615790e+02 3.229906860804628e+02 -2.596949409640055e+02 -5.236124571112447e+02 - ME 5.385640989777132e-03 + ME 7.598357868514145e-04 Event 234 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1876,7 +1876,7 @@ Event 234 Batch 0 2 8.690043548936441e+01 -2.607433849884744e+01 -7.258333015587984e+01 4.004341073848801e+01 3 6.785651905172676e+02 -3.574930335951373e+02 -4.725723606052789e+01 5.748184081539155e+02 4 7.345343739933678e+02 3.835673720939847e+02 1.198405662164078e+02 -6.148618188924036e+02 - ME 1.962113644780599e-01 + ME 8.152211059226219e-02 Event 235 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1884,7 +1884,7 @@ Event 235 Batch 0 2 3.000566282865331e+02 1.219146462304108e+01 -2.126850238006026e+02 2.113064812540423e+02 3 7.160981218147422e+02 2.575873756248088e+02 2.779062108697769e+02 -6.076293293985470e+02 4 4.838452498987246e+02 -2.697788402478500e+02 -6.522118706917435e+01 3.963228481445046e+02 - ME 3.940402333844027e-05 + ME 2.498899672933017e-05 Event 236 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1892,7 +1892,7 @@ Event 236 Batch 0 2 1.510518772182422e+02 -9.497518588910037e+01 1.467158067736534e+01 1.165380984781943e+02 3 6.955499852411461e+02 5.933480346078575e+02 3.495450158124774e+02 9.770452249822526e+01 4 6.533981375406115e+02 -4.983728487187572e+02 -3.642165964898426e+02 -2.142426209764196e+02 - ME 1.121647028585911e-03 + ME 2.623118294900277e-04 Event 237 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1900,7 +1900,7 @@ Event 237 Batch 0 2 2.173874152942701e+02 2.069918593916189e+02 -3.850229167793934e+01 -5.412237993169356e+01 3 7.305677895866185e+02 -6.701932224704495e+02 -2.421540700080861e+02 1.610333695687662e+02 4 5.520447951191120e+02 4.632013630788306e+02 2.806563616860255e+02 -1.069109896370727e+02 - ME 1.822378225061386e-04 + ME 2.170005261464319e-05 Event 238 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1908,7 +1908,7 @@ Event 238 Batch 0 2 6.349573912113930e+02 -3.336495545457479e+02 -4.785400196851591e+02 2.506956580500139e+02 3 5.768887318987100e+02 4.812119270965607e+02 2.334547330568691e+02 -2.161818165921041e+02 4 2.881538768898968e+02 -1.475623725508129e+02 2.450852866282900e+02 -3.451384145790988e+01 - ME 9.810731053503000e-05 + ME 1.383744831772315e-05 Event 239 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1916,7 +1916,7 @@ Event 239 Batch 0 2 5.349076725903783e+02 -5.331874414268931e+02 1.887721601290929e+01 -3.848403846142781e+01 3 3.658437465440003e+02 8.335465236419728e+01 1.670818061666301e+01 -3.558292926602242e+02 4 5.992485808656214e+02 4.498327890626960e+02 -3.558539662957234e+01 3.943133311216517e+02 - ME 9.226736931333760e-05 + ME 2.560110521983184e-05 Event 240 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1924,7 +1924,7 @@ Event 240 Batch 0 2 2.870582387324442e+02 1.830793600232297e+02 -1.562409872742485e+02 1.564389154054251e+02 3 6.007192677438852e+02 3.433229388031108e+02 4.688113613010560e+02 -1.523446941819630e+02 4 6.122224935236703e+02 -5.264022988263405e+02 -3.125703740268075e+02 -4.094221223461989e+00 - ME 1.424405912705748e-04 + ME 3.548113744927254e-05 Event 241 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1932,7 +1932,7 @@ Event 241 Batch 0 2 7.424696267657401e+02 4.823783107714221e+02 2.498315161211407e+02 5.061190823507636e+02 3 2.455726236162737e+02 -1.827879695947952e+02 -1.199757723946156e+02 -1.118046764652876e+02 4 5.119577496179861e+02 -2.995903411766270e+02 -1.298557437265251e+02 -3.943144058854759e+02 - ME 2.705973755259623e-03 + ME 2.366266620918590e-04 Event 242 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1940,7 +1940,7 @@ Event 242 Batch 0 2 7.249130370348905e+02 1.676828147928013e+02 6.059046362201677e+02 -3.609168279440810e+02 3 6.240672718074169e+02 -4.529413961306761e+01 -5.490982345027019e+02 2.930862151720549e+02 4 1.510196911576933e+02 -1.223886751797337e+02 -5.680640171746593e+01 6.783061277202641e+01 - ME 4.587322306592483e-05 + ME 1.668420503127583e-05 Event 243 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1948,7 +1948,7 @@ Event 243 Batch 0 2 4.655090712555229e+02 2.096323612054770e+02 2.113490506800235e+02 3.578890153850057e+02 3 5.764797256412519e+02 6.697224883641857e+01 -5.382210340689440e+02 -1.953502251008744e+02 4 4.580112031032257e+02 -2.766046100418949e+02 3.268719833889206e+02 -1.625387902841314e+02 - ME 2.309042201876567e-04 + ME 3.999521919602606e-05 Event 244 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1956,7 +1956,7 @@ Event 244 Batch 0 2 5.237109195354749e+02 1.305098338947756e+02 -4.868141165486322e+02 -1.423106687020528e+02 3 5.804450110242352e+02 -4.045654344879671e+02 2.643676733537771e+02 3.214855413949400e+02 4 3.958440694402901e+02 2.740556005931916e+02 2.224464431948551e+02 -1.791748726928872e+02 - ME 2.644202232750943e-04 + ME 2.634847163425152e-05 Event 245 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1964,7 +1964,7 @@ Event 245 Batch 0 2 2.629169357520612e+02 2.457511487795889e+02 -4.402365929491729e+01 -8.242333044139184e+01 3 6.931386101565748e+02 -5.195573187661655e+02 4.004017488088275e+02 -2.240084037645317e+02 4 5.439444540913644e+02 2.738061699865766e+02 -3.563780895139104e+02 3.064317342059234e+02 - ME 4.288053786412853e-05 + ME 1.052590061693975e-05 Event 246 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1972,7 +1972,7 @@ Event 246 Batch 0 2 6.300937687157445e+02 -5.459948028041557e+02 3.085954426748102e+02 6.063567799240802e+01 3 1.673910408536145e+02 -3.546130270298926e+01 7.662824936562275e+01 -1.445350060290698e+02 4 7.025151904306430e+02 5.814561055071442e+02 -3.852236920404341e+02 8.389932803666261e+01 - ME 6.282756509154168e-04 + ME 1.915763997923398e-04 Event 247 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1980,7 +1980,7 @@ Event 247 Batch 0 2 2.577847506495701e+02 2.418237207037818e+02 -8.449121421856779e+01 2.890502538162603e+01 3 5.130193185035739e+02 4.381905811488919e+02 1.366496386102691e+02 2.291390669832418e+02 4 7.291959308468561e+02 -6.800143018526737e+02 -5.215842439170134e+01 -2.580440923648679e+02 - ME 4.005872724472581e-03 + ME 1.831864018495938e-03 Event 248 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1988,7 +1988,7 @@ Event 248 Batch 0 2 7.033207479153643e+02 -5.040306065309413e+02 -2.020637997366072e+02 4.469714117975369e+02 3 1.758360012551320e+02 -1.471306652922549e+01 -4.035460943683606e+00 -1.751728862172264e+02 4 6.208432508295037e+02 5.187436730601667e+02 2.060992606802909e+02 -2.717985255803103e+02 - ME 5.592865021063005e-04 + ME 1.512538512828554e-04 Event 249 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1996,7 +1996,7 @@ Event 249 Batch 0 2 3.018816177222694e+02 5.523075638651412e+01 1.752331212074551e+02 2.395316845419020e+02 3 6.597415560701297e+02 6.315352823685419e+01 -6.561001191322722e+02 -2.834054254405022e+01 4 5.383768262076012e+02 -1.183842846233684e+02 4.808669979248172e+02 -2.111911419978518e+02 - ME 4.868100986861644e-04 + ME 9.225490912808109e-05 Event 250 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2004,7 +2004,7 @@ Event 250 Batch 0 2 2.166381935101301e+02 -1.289072913913530e+02 -1.189615590004073e+02 -1.271344351215279e+02 3 6.815426093761062e+02 -2.511966318704653e+02 5.323234433390903e+02 3.435583388650892e+02 4 6.018191971137635e+02 3.801039232618182e+02 -4.133618843386827e+02 -2.164239037435611e+02 - ME 3.468666532553966e-04 + ME 6.586594805989363e-05 Event 251 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2012,7 +2012,7 @@ Event 251 Batch 0 2 6.676961532387151e+02 -3.991265595084280e+01 -4.419965947723094e+02 4.988628500443886e+02 3 7.150412702460949e+02 3.921851524844908e+01 5.505653759000154e+02 -4.545587894617490e+02 4 1.172625765151894e+02 6.941407023942340e-01 -1.085687811277060e+02 -4.430406058263954e+01 - ME 5.615833562023813e-04 + ME 4.930952510857648e-05 Event 252 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2020,7 +2020,7 @@ Event 252 Batch 0 2 2.112668789066533e+02 -1.147554660376938e+02 3.364589711187055e+01 -1.741632301749357e+02 3 7.393007599584276e+02 2.529046383258835e+02 -3.593132473314827e+02 5.945576909606565e+02 4 5.494323611349191e+02 -1.381491722881897e+02 3.256673502196121e+02 -4.203944607857206e+02 - ME 2.709805393201018e-03 + ME 3.541023077707110e-04 Event 253 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2028,7 +2028,7 @@ Event 253 Batch 0 2 7.299659304470913e+01 -4.405884533650594e+01 -5.451291667290519e+01 2.038780663930336e+01 3 7.253475305576840e+02 3.245698054519170e+02 -1.402290280555607e+02 -6.333397991328418e+02 4 7.016558763976062e+02 -2.805109601154107e+02 1.947419447284657e+02 6.129519924935382e+02 - ME 6.484723438037138e-04 + ME 3.511004874943257e-04 Event 254 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2036,7 +2036,7 @@ Event 254 Batch 0 2 1.982520535096858e+02 -6.164633378269741e+01 1.773450413210087e+02 -6.365801262063783e+01 3 7.183815394471145e+02 -1.984891252513599e+02 -6.893152145826987e+02 -3.896971029099802e+01 4 5.833664070431995e+02 2.601354590340572e+02 5.119701732616900e+02 1.026277229116358e+02 - ME 9.210498573936143e-05 + ME 1.539519794804785e-05 Event 255 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2044,7 +2044,7 @@ Event 255 Batch 0 2 5.347080663542586e+02 -5.063606624096446e+02 1.592577719822621e+02 6.440929941880935e+01 3 2.475406015289465e+02 -1.856063881081879e+02 3.468010668896048e+00 -1.637516137347836e+02 4 7.177513321167953e+02 6.919670505178326e+02 -1.627257826511582e+02 9.934231431597431e+01 - ME 1.305481727349711e-03 + ME 3.137689362725149e-04 Event 0 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2052,7 +2052,7 @@ Event 0 Batch 1 2 5.775677821222389e+02 4.314431287975208e+02 -2.652567205762379e+02 -2.776332864556192e+02 3 6.023469575940325e+02 -3.228069847179709e+02 5.005558924007591e+02 8.978477890465942e+01 4 3.200852602837275e+02 -1.086361440795499e+02 -2.352991718245218e+02 1.878485075509607e+02 - ME 2.846168667868940e-05 + ME 7.533072458757011e-06 Event 1 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2060,7 +2060,7 @@ Event 1 Batch 1 2 7.241206267812560e+02 3.541578305635416e+02 -4.894807402105655e+02 3.991635230623179e+02 3 7.375567605136832e+02 -3.903081173548693e+02 4.920451519627784e+02 -3.867054653560791e+02 4 3.832261270506111e+01 3.615028679132773e+01 -2.564411752212873e+00 -1.245805770623896e+01 - ME 1.002871021831580e-03 + ME 7.043932941624384e-05 Event 2 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2068,7 +2068,7 @@ Event 2 Batch 1 2 4.849204091734790e+02 2.108660079931152e+02 4.054727376659824e+02 1.620962335024329e+02 3 2.728468517759738e+02 4.961449545460115e+01 2.005017763154939e+02 1.782774356422519e+02 4 7.422327390505470e+02 -2.604805034477164e+02 -6.059745139814763e+02 -3.403736691446848e+02 - ME 2.729395913593408e-02 + ME 1.721146206228212e-02 Event 3 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2076,7 +2076,7 @@ Event 3 Batch 1 2 4.264155576764489e+02 -4.170952165204416e+02 -7.054834331799705e+01 5.370977042744418e+01 3 7.108631972082329e+02 6.832597695609467e+02 -1.727180704166534e+02 -9.301097030017993e+01 4 3.627212451153183e+02 -2.661645530405051e+02 2.432664137346505e+02 3.930119987273574e+01 - ME 5.466137525204964e-05 + ME 5.739226791327231e-06 Event 4 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2084,7 +2084,7 @@ Event 4 Batch 1 2 7.183269968238449e+02 -3.584978055671311e+02 -5.048824553914336e+02 -3.640971079361008e+02 3 7.387431276480253e+02 4.013538934928407e+02 5.036810263913359e+02 3.618865629982628e+02 4 4.292987552812846e+01 -4.285608792570924e+01 1.201429000097643e+00 2.210544937839338e+00 - ME 3.145606575501715e-04 + ME 5.884725836744927e-05 Event 5 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2092,7 +2092,7 @@ Event 5 Batch 1 2 4.529780005473896e+02 -8.443182436392424e+01 4.445408460134587e+02 -2.106590230986445e+01 3 4.683757780543924e+02 -6.076819021151039e+01 -1.335482427838441e+02 -4.448010379662153e+02 4 5.786462213982179e+02 1.452000145754347e+02 -3.109926032296145e+02 4.658669402760799e+02 - ME 8.481958952475706e-05 + ME 2.851579396246287e-05 Event 6 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2100,7 +2100,7 @@ Event 6 Batch 1 2 6.238848262005389e+02 -1.065131260140052e+02 -4.741487807795934e+02 -3.912418229627633e+02 3 1.729069432107234e+02 -1.460869767542721e+02 -8.199113358821990e+01 4.281191710484079e+01 4 7.032082305887380e+02 2.526001027682771e+02 5.561399143678132e+02 3.484299058579224e+02 - ME 4.868510537699180e-04 + ME 1.468701510222534e-04 Event 7 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2108,7 +2108,7 @@ Event 7 Batch 1 2 6.977203086376783e+02 -6.126072843634399e+02 -1.744636661244187e+02 2.847602033865263e+02 3 1.614193396272251e+02 -4.571584237043670e+00 8.497734613495712e+01 -1.371646983269120e+02 4 6.408603517350967e+02 6.171788686004836e+02 8.948631998946138e+01 -1.475955050596143e+02 - ME 3.540796080305845e-04 + ME 9.523334397108766e-05 Event 8 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2116,7 +2116,7 @@ Event 8 Batch 1 2 6.871091945484288e+02 4.059708628308462e+02 2.886614153103366e+02 4.732666173272762e+02 3 5.653302025665631e+02 -2.838835484844413e+02 -7.353399035097291e+01 -4.833229987253825e+02 4 2.475606028850081e+02 -1.220873143464048e+02 -2.151274249593637e+02 1.005638139810634e+01 - ME 8.785466054587446e-05 + ME 3.726341895116938e-05 Event 9 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2124,7 +2124,7 @@ Event 9 Batch 1 2 1.618579955503452e+02 1.385215220188489e+01 1.601201234527701e+02 -1.917484467788566e+01 3 7.196660585644588e+02 -4.527189715496824e+02 -4.214090439733052e+02 3.679391067910628e+02 4 6.184759458851959e+02 4.388668193477974e+02 2.612889205205349e+02 -3.487642621131772e+02 - ME 1.054640649369016e-03 + ME 1.276556148007894e-04 Event 10 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2132,7 +2132,7 @@ Event 10 Batch 1 2 7.832785200561162e+01 1.027681340851886e+01 -7.242726264265977e+01 -2.799877018853974e+01 3 7.448007230566494e+02 2.520540107528716e+02 6.813719334665398e+02 1.641011304445167e+02 4 6.768714249377393e+02 -2.623308241613905e+02 -6.089446708238800e+02 -1.361023602559769e+02 - ME 5.876642887714617e-04 + ME 1.087112534498832e-04 Event 11 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2140,7 +2140,7 @@ Event 11 Batch 1 2 5.478627446486676e+02 2.070882322301630e+02 -4.708081692757452e+02 1.887000762823861e+02 3 6.997827604382593e+02 -4.209013422316021e+02 4.569873120768409e+02 -3.220257264800591e+02 4 2.523544949130733e+02 2.138131100014392e+02 1.382085719890436e+01 1.333256501976729e+02 - ME 2.703695959900953e-05 + ME 7.092902148917371e-06 Event 12 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2148,7 +2148,7 @@ Event 12 Batch 1 2 5.802868936311938e+02 -4.467002255894120e+01 5.211262762381961e+02 -2.513262266832405e+02 3 5.208038834706859e+02 2.151797013176283e+01 -4.993650129388666e+02 -1.463155694111945e+02 4 3.989092228981199e+02 2.315205242717860e+01 -2.176126329932955e+01 3.976417960944350e+02 - ME 5.046437564325244e-04 + ME 4.980323856672599e-04 Event 13 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2156,7 +2156,7 @@ Event 13 Batch 1 2 5.774880087360024e+02 1.576445054854711e+02 5.481077151088400e+02 -9.065617884226717e+01 3 5.915098138161557e+02 -3.018001633277128e+02 -3.808656371901898e+02 3.372564123391869e+02 4 3.310021774478421e+02 1.441556578422419e+02 -1.672420779186502e+02 -2.466002334969197e+02 - ME 1.505341700965184e-03 + ME 5.587942683639647e-05 Event 14 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2164,7 +2164,7 @@ Event 14 Batch 1 2 2.531797527967491e+02 -8.400833666640553e+01 -2.384535242035555e+02 -1.350938161690895e+01 3 5.261064571264828e+02 -1.751971590790252e+02 -3.334570051994592e+02 3.672878780523887e+02 4 7.207137900767681e+02 2.592054957454308e+02 5.719105294030147e+02 -3.537784964354798e+02 - ME 3.373121845959189e-03 + ME 1.659114310450813e-03 Event 15 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2172,7 +2172,7 @@ Event 15 Batch 1 2 4.605848765362425e+02 3.563504404614684e+02 1.735853700506503e+02 2.345653669687875e+02 3 4.216445088607453e+02 1.370719005416187e+02 -3.933730877164850e+02 6.521502736890037e+01 4 6.177706146030118e+02 -4.934223410030871e+02 2.197877176658347e+02 -2.997803943376878e+02 - ME 4.613631402771334e-04 + ME 9.110622752737525e-05 Event 16 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2180,7 +2180,7 @@ Event 16 Batch 1 2 4.972484926572777e+02 -1.474122335888775e+02 -4.748950276275915e+02 -6.399787981958280e-01 3 5.072511849723048e+02 4.846784046822065e+02 1.224000792205880e+02 -8.607455661990267e+01 4 4.955003223704169e+02 -3.372661710933285e+02 3.524949484070036e+02 8.671453541809866e+01 - ME 5.856804747367533e-05 + ME 1.035537635543116e-05 Event 17 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2188,7 +2188,7 @@ Event 17 Batch 1 2 3.182636773520259e+02 -9.176062613973060e+01 -1.890905041641619e+02 2.389906630959087e+02 3 6.376303990615819e+02 -4.240378519397394e+02 2.706855745366566e+02 -3.917827786765570e+02 4 5.441059235863918e+02 5.157984780794702e+02 -8.159507037249479e+01 1.527921155806483e+02 - ME 7.445984612273079e-05 + ME 2.964570775197734e-05 Event 18 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2196,7 +2196,7 @@ Event 18 Batch 1 2 5.532560008158404e+02 -4.148613005881325e+02 1.689647846464811e+02 -3.247047971041214e+02 3 3.650144721835348e+02 -1.597348634907620e+02 -2.160675866909894e+02 2.470529017650751e+02 4 5.817295270006244e+02 5.745961640788944e+02 4.710280204450838e+01 7.765189533904635e+01 - ME 9.119298978738387e-05 + ME 3.148325734685632e-05 Event 19 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2204,7 +2204,7 @@ Event 19 Batch 1 2 3.263687475619531e+02 -1.904667433734991e+02 2.390747946355329e+02 -1.143775398573919e+02 3 7.331345945903582e+02 2.597391859223821e+02 -6.739404183465077e+02 1.258022320965774e+02 4 4.404966578476884e+02 -6.927244254888298e+01 4.348656237109747e+02 -1.142469223918529e+01 - ME 8.793129888044293e-05 + ME 9.665339952809457e-06 Event 20 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2212,7 +2212,7 @@ Event 20 Batch 1 2 9.588718605412237e+01 4.259536217794532e+01 8.056474827260676e+01 -2.982128277051557e+01 3 7.250265356668370e+02 3.120913743414047e+02 -4.446787057645155e+02 4.801284204484703e+02 4 6.790862782790414e+02 -3.546867365193502e+02 3.641139574919093e+02 -4.503071376779550e+02 - ME 3.686389281265799e-03 + ME 6.402422614019696e-04 Event 21 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2220,7 +2220,7 @@ Event 21 Batch 1 2 1.825278201605081e+02 -1.533737674675502e+02 8.574830442242751e+01 4.939757963742074e+01 3 7.183016103669913e+02 1.713205736990392e+02 -6.275703015775031e+02 -3.045685162014731e+02 4 5.991705694725008e+02 -1.794680623148897e+01 5.418219971550755e+02 2.551709365640523e+02 - ME 7.470861105912214e-05 + ME 1.806434468406198e-05 Event 22 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2228,7 +2228,7 @@ Event 22 Batch 1 2 2.349542451120770e+02 9.235159917618290e+01 -2.156570331301489e+02 -1.291214495308476e+01 3 7.360601907662837e+02 -2.182033070539752e+02 6.568866822530020e+02 -2.503433799808774e+02 4 5.289855641216395e+02 1.258517078777923e+02 -4.412296491228531e+02 2.632555249339621e+02 - ME 3.893602972207037e-05 + ME 8.007442232312076e-06 Event 23 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2236,7 +2236,7 @@ Event 23 Batch 1 2 2.350908908124364e+02 -7.377772511691019e+00 -2.298431804723787e+02 -4.884063683135331e+01 3 6.797114625392685e+02 -5.485955088721076e+02 3.603976926464840e+02 1.765336882516069e+02 4 5.851976466482949e+02 5.559732813837987e+02 -1.305545121741055e+02 -1.276930514202538e+02 - ME 2.057468423101862e-04 + ME 3.185713653214173e-05 Event 24 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2244,7 +2244,7 @@ Event 24 Batch 1 2 4.355364173804401e+02 2.538053291625626e+02 -2.665393838801487e+02 -2.328767540869265e+02 3 4.093863144993796e+02 -1.953012891316528e+02 -3.573484670764558e+02 4.191221827828568e+01 4 6.550772681201798e+02 -5.850404003090968e+01 6.238878509566048e+02 1.909645358086408e+02 - ME 1.895168702655672e-04 + ME 3.721637657688893e-05 Event 25 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2252,7 +2252,7 @@ Event 25 Batch 1 2 7.365386968907909e+02 3.875876454009267e+02 3.151568854896985e+02 5.412404333367775e+02 3 5.208510884285567e+02 -2.430585576296288e+02 -1.518636440371932e+02 -4.349089876054084e+02 4 2.426102146806534e+02 -1.445290877712977e+02 -1.632932414525050e+02 -1.063314457313693e+02 - ME 3.717867207603688e-04 + ME 7.982561935336398e-05 Event 26 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2260,7 +2260,7 @@ Event 26 Batch 1 2 7.198867014174701e+02 5.189601929589824e+02 4.797253921416957e+02 -1.370428003807496e+02 3 3.889101953712928e+02 -1.847394503243419e+02 -2.837815501141775e+02 1.912864537085460e+02 4 3.912031032112371e+02 -3.342207426346404e+02 -1.959438420275183e+02 -5.424365332779646e+01 - ME 1.222836766708484e-04 + ME 1.928349098758061e-05 Event 27 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2268,7 +2268,7 @@ Event 27 Batch 1 2 6.732032222628646e+02 5.870808395006010e+02 -9.126179303429218e+01 3.165595544104447e+02 3 1.177373967283342e+02 7.847176641415683e+01 5.304379211899001e+00 -8.761358356661104e+01 4 7.090593810088013e+02 -6.655526059147578e+02 8.595741382239324e+01 -2.289459708438336e+02 - ME 1.603290018002586e-03 + ME 6.795383824785976e-04 Event 28 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2276,7 +2276,7 @@ Event 28 Batch 1 2 6.475300414228806e+02 3.136396845517189e+02 3.816259196370642e+02 -4.186728559156669e+02 3 7.290923529036073e+02 -2.791764769994177e+02 -4.112865540505715e+02 5.333662195995520e+02 4 1.233776056735125e+02 -3.446320755230100e+01 2.966063441350738e+01 -1.146933636838856e+02 - ME 5.037107889244314e-02 + ME 6.311296815400830e-04 Event 29 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2284,7 +2284,7 @@ Event 29 Batch 1 2 3.156754590345620e+02 -2.870540678871016e+02 4.159516713841874e+01 -1.245825012466667e+02 3 4.770060274033896e+02 -2.355061130652810e+02 -3.231858413754910e+02 -2.600433287405434e+02 4 7.073185135620483e+02 5.225601809523826e+02 2.815906742370723e+02 3.846258299872100e+02 - ME 7.956699356695784e-04 + ME 1.321807869823317e-04 Event 30 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2292,7 +2292,7 @@ Event 30 Batch 1 2 6.091290614220995e+02 1.543004089904798e+02 4.216196287493766e+00 -5.892468251447810e+02 3 2.079357839022729e+02 2.034647466922837e+02 4.185675980476618e+01 9.348729279626889e+00 4 6.829351546756266e+02 -3.577651556827627e+02 -4.607295609226003e+01 5.798980958651539e+02 - ME 3.902231064020147e-04 + ME 1.448382779935031e-04 Event 31 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2300,7 +2300,7 @@ Event 31 Batch 1 2 6.901710072855793e+02 1.433309098684656e+01 6.447948515477649e+02 -2.457034416076623e+02 3 5.898919363861644e+02 1.120085307876391e+02 -4.815950471622465e+02 3.217029626736535e+02 4 2.199370563282564e+02 -1.263416217744856e+02 -1.631998043855182e+02 -7.599952106599136e+01 - ME 2.415465849322543e-04 + ME 2.376400497996635e-05 Event 32 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2308,7 +2308,7 @@ Event 32 Batch 1 2 6.144498311923271e+02 5.832947925341469e+02 -1.925283703230110e+02 1.576726595169125e+01 3 2.478450424037004e+02 5.004284035329792e+01 2.389954177960992e+02 4.247433867565734e+01 4 6.377051264039724e+02 -6.333376328874447e+02 -4.646704747308818e+01 -5.824160462734862e+01 - ME 2.160220890176678e-04 + ME 5.390650629646604e-05 Event 33 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2316,7 +2316,7 @@ Event 33 Batch 1 2 6.134536717469736e+02 -1.625429495269566e+02 -1.853973484494194e+02 5.617232593785355e+02 3 5.361644687950269e+02 -3.755831293394986e+01 -9.992652347025609e+01 -5.254297294928764e+02 4 3.503818594579993e+02 2.001012624609065e+02 2.853238719196754e+02 -3.629352988565911e+01 - ME 1.224582992507153e-04 + ME 1.005452860076771e-04 Event 34 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2324,7 +2324,7 @@ Event 34 Batch 1 2 3.840838099420727e+02 -2.442269925519278e+02 -3.827314394217582e+01 -2.939535943332559e+02 3 6.022630974514659e+02 3.956891925431131e+01 5.086724982658299e+02 3.200116071158652e+02 4 5.136530926064613e+02 2.046580732976165e+02 -4.703993543236541e+02 -2.605801278260916e+01 - ME 9.608243105510499e-05 + ME 2.313941306740064e-05 Event 35 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2332,7 +2332,7 @@ Event 35 Batch 1 2 3.454350783663418e+02 -3.439607925797615e+02 2.363778141880094e+01 -2.139209721976717e+01 3 6.705698302143294e+02 5.215327591153251e+02 4.060443141865528e+02 -1.131171661597076e+02 4 4.839950914193290e+02 -1.775719665355635e+02 -4.296820956053536e+02 1.345092633794747e+02 - ME 4.862206803317224e-05 + ME 7.982017052260048e-06 Event 36 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2340,7 +2340,7 @@ Event 36 Batch 1 2 7.098652154429357e+02 2.489290984574327e+02 -1.674080692141068e+02 -6.433641786725617e+02 3 6.178479130357197e+02 -1.435715807033598e+02 2.588953561477193e+02 5.423065917191846e+02 4 1.722868715213448e+02 -1.053575177540730e+02 -9.148728693361247e+01 1.010575869533772e+02 - ME 6.680529568232270e-05 + ME 5.562249548714765e-05 Event 37 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2348,7 +2348,7 @@ Event 37 Batch 1 2 6.906872786346031e+02 1.495946561071237e+02 1.712833879510068e+02 6.521750966909805e+02 3 3.682276595245592e+02 -1.358558710218083e+02 1.194309698061993e+02 -3.207351477449753e+02 4 4.410850618408380e+02 -1.373878508531530e+01 -2.907143577572061e+02 -3.314399489460051e+02 - ME 2.014943348935539e-03 + ME 5.542438863722841e-04 Event 38 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2356,7 +2356,7 @@ Event 38 Batch 1 2 6.131720166645955e+02 -5.222102655174087e+02 6.340623138461877e+00 3.213038392347352e+02 3 4.540063357567760e+02 2.932429176443922e+02 -3.207297067242505e+02 -1.313879727496968e+02 4 4.328216475786277e+02 2.289673478730168e+02 3.143890835857886e+02 -1.899158664850380e+02 - ME 2.589645049118943e-04 + ME 3.150821423911933e-05 Event 39 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2364,7 +2364,7 @@ Event 39 Batch 1 2 2.929747896182304e+02 2.510117592312210e+02 -1.378648144805472e+02 6.181113983529403e+01 3 6.287164314722783e+02 3.864928360025993e+01 6.254120614625328e+02 5.148142827864510e+01 4 5.783087789094894e+02 -2.896610428314818e+02 -4.875472469819856e+02 -1.132925681139394e+02 - ME 1.708238325115053e-04 + ME 2.723120294663496e-05 Event 40 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2372,7 +2372,7 @@ Event 40 Batch 1 2 1.143487538112954e+02 -3.203572478439017e+01 1.022340126870988e+02 3.996944439980560e+01 3 7.361483923235807e+02 5.924235295921244e+02 -3.838567751530157e+02 -2.088128187524163e+02 4 6.495028538651248e+02 -5.603878048077345e+02 2.816227624659169e+02 1.688433743526105e+02 - ME 2.026369815874481e-04 + ME 4.279185076498264e-05 Event 41 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2380,7 +2380,7 @@ Event 41 Batch 1 2 6.384898508133350e+02 5.540399192408263e+02 -3.014826159773289e+02 -9.908223727147148e+01 3 3.510407251698805e+02 -1.719168197014114e+02 2.065966849440144e+02 -2.258140996521069e+02 4 5.104694240167846e+02 -3.821230995394149e+02 9.488593103331458e+01 3.248963369235784e+02 - ME 4.455092331482675e-05 + ME 1.488395965626735e-05 Event 42 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2388,7 +2388,7 @@ Event 42 Batch 1 2 3.291654598309212e+02 -1.090829060981258e+02 2.972891943885482e+02 -8.983292515941632e+01 3 6.884965239796815e+02 4.933628807557017e+02 -2.919492821202986e+02 3.812953554581829e+02 4 4.823380161893969e+02 -3.842799746575757e+02 -5.339912268249619e+00 -2.914624302987665e+02 - ME 6.690811667999076e-04 + ME 5.767145017550451e-05 Event 43 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2396,7 +2396,7 @@ Event 43 Batch 1 2 3.674173006007981e+02 2.791827424102563e+02 1.079644067383057e+02 2.130637369397045e+02 3 7.392205647816575e+02 -6.110484627794917e+02 -4.247874240022372e+01 -4.138385868609020e+02 4 3.933621346175442e+02 3.318657203692355e+02 -6.548566433808202e+01 2.007748499211975e+02 - ME 2.734436884563990e-05 + ME 6.513986915725277e-06 Event 44 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2404,7 +2404,7 @@ Event 44 Batch 1 2 2.081359682230012e+02 -1.082501549908087e+02 1.771964605001424e+02 1.427934167997762e+01 3 7.449563315308093e+02 5.092828751965591e+02 -5.388739609944279e+02 7.215083562608928e+01 4 5.469077002461893e+02 -4.010327202057504e+02 3.616775004942854e+02 -8.643017730606689e+01 - ME 1.760644262839344e-04 + ME 1.838899544278803e-05 Event 45 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2412,7 +2412,7 @@ Event 45 Batch 1 2 5.180982465404422e+02 4.470261481799612e+02 -3.368837017252423e+01 -2.597277606009553e+02 3 3.377595659674062e+02 -7.316527185649456e+01 2.454727770679006e+02 -2.201624016839132e+02 4 6.441421874921515e+02 -3.738608763234666e+02 -2.117844068953763e+02 4.798901622848684e+02 - ME 1.645403798734011e-04 + ME 4.091340785269233e-05 Event 46 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2420,7 +2420,7 @@ Event 46 Batch 1 2 6.296560291524888e+02 2.172411497655985e+02 5.821614514430422e+02 -1.017892054705761e+02 3 6.224001894826197e+02 1.405102091633609e+01 -6.218608257778048e+02 2.176414579432105e+01 4 2.479437813648912e+02 -2.312921706819346e+02 3.969937433476264e+01 8.002505967625511e+01 - ME 4.041878897626609e-05 + ME 7.434320230190137e-06 Event 47 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2428,7 +2428,7 @@ Event 47 Batch 1 2 5.458843469271557e+02 -1.019033861791133e+02 -1.559739004096151e+02 5.131058004898495e+02 3 2.573134207008558e+02 6.791700498899543e+01 -2.412204887508016e+02 5.839651284901167e+01 4 6.968022323719882e+02 3.398638119011781e+01 3.971943891604168e+02 -5.715023133388611e+02 - ME 1.408798022766008e-02 + ME 4.005478861198618e-03 Event 48 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2436,7 +2436,7 @@ Event 48 Batch 1 2 6.623920218006384e+02 -6.284562032939594e+02 -1.837527125398962e+02 -1.002044496053409e+02 3 1.251779629744606e+02 -7.502448682133647e+01 9.550779386908961e+01 3.031682869117444e+01 4 7.124300152249010e+02 7.034806901152959e+02 8.824491867080658e+01 6.988762091416655e+01 - ME 8.682321044518227e-04 + ME 3.004757451335502e-04 Event 49 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2444,7 +2444,7 @@ Event 49 Batch 1 2 2.397494808364364e+02 2.393958238941666e+02 -4.144666783354266e+00 -1.233996761053010e+01 3 6.782491241100328e+02 -3.516321535544010e+02 -2.705899831712919e+02 5.129890485673947e+02 4 5.820013950535307e+02 1.122363296602344e+02 2.747346499546462e+02 -5.006490809568646e+02 - ME 9.041285542966720e-03 + ME 6.040872325723622e-04 Event 50 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2452,7 +2452,7 @@ Event 50 Batch 1 2 4.764898792162554e+02 4.667163214316568e+02 5.900817880915086e+01 -7.573978570375913e+01 3 5.114228101321805e+02 -2.035689445851523e+02 -4.549677995197112e+02 -1.145306811477843e+02 4 5.120873106515638e+02 -2.631473768465044e+02 3.959596207105603e+02 1.902704668515434e+02 - ME 5.157319121365441e-05 + ME 9.692662313613028e-06 Event 51 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2460,7 +2460,7 @@ Event 51 Batch 1 2 4.678795643859630e+02 4.629737719234085e+02 5.365495313512251e+01 4.108186077915564e+01 3 6.311645871918951e+02 -4.500610707732837e+02 -4.345770688214700e+02 8.340587481742408e+01 4 4.009558484221416e+02 -1.291270115012470e+01 3.809221156863474e+02 -1.244877355965797e+02 - ME 1.517985021504320e-04 + ME 1.293558494013996e-05 Event 52 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2468,7 +2468,7 @@ Event 52 Batch 1 2 3.696230029266819e+02 2.516704934433110e+02 2.514038675722595e+02 1.003953305301004e+02 3 6.696174214325739e+02 -2.754912388418390e+01 -6.493999246431116e+02 -1.609604756850079e+02 4 4.607595756407442e+02 -2.241213695591271e+02 3.979960570708519e+02 6.056514515490756e+01 - ME 5.727699238559496e-05 + ME 8.655753222194317e-06 Event 53 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2476,7 +2476,7 @@ Event 53 Batch 1 2 7.284624742442375e+01 -4.271742504396477e+01 -2.683807109937144e+01 -5.255012179908527e+01 3 7.493542950735829e+02 3.356513586119740e+02 2.501807367708783e+02 6.215139772812374e+02 4 6.777994575019936e+02 -2.929339335680093e+02 -2.233426656715069e+02 -5.689638554821522e+02 - ME 1.612275481129464e-02 + ME 2.372423861687152e-03 Event 54 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2484,7 +2484,7 @@ Event 54 Batch 1 2 7.460259847230064e+02 2.055186857047568e+01 6.233229443227743e+02 4.093908861479223e+02 3 5.756222844616437e+02 2.606063779094539e+01 -4.696411468594731e+02 -3.318117699890848e+02 4 1.783517308153497e+02 -4.661250636142109e+01 -1.536817974633012e+02 -7.757911615883735e+01 - ME 4.374243668355642e-04 + ME 5.046268590690708e-05 Event 55 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2492,7 +2492,7 @@ Event 55 Batch 1 2 5.967428482894213e+02 -8.165820254184375e+01 5.098287527914877e+02 -2.991798919868828e+02 3 5.942526243827265e+02 5.606061544962815e+01 -2.905196430116550e+02 5.153559216750568e+02 4 3.090045273278509e+02 2.559758709221549e+01 -2.193091097798325e+02 -2.161760296881746e+02 - ME 1.779007466146034e-03 + ME 1.849048785615045e-04 Event 56 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2500,7 +2500,7 @@ Event 56 Batch 1 2 5.610874267302015e+02 -4.199055433713192e+02 3.580252469767042e+02 1.015694718309908e+02 3 6.303091265298390e+02 2.130872195586830e+02 -5.453843477211296e+02 -2.333224059286980e+02 4 3.086034467399593e+02 2.068183238126362e+02 1.873591007444254e+02 1.317529340977073e+02 - ME 3.258989367177766e-05 + ME 7.213009143835112e-06 Event 57 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2508,7 +2508,7 @@ Event 57 Batch 1 2 6.552053965855981e+02 4.516249927537604e+02 7.110694105335197e+00 4.746350341729917e+02 3 6.035190443408458e+02 -3.717228873476765e+02 2.148772607224587e+02 -4.241286299324850e+02 4 2.412755590735562e+02 -7.990210540608396e+01 -2.219879548277939e+02 -5.050640424050685e+01 - ME 1.623545585873121e-04 + ME 3.752873989265266e-05 Event 58 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2516,7 +2516,7 @@ Event 58 Batch 1 2 2.959982971085279e+02 1.850007048157144e+02 -2.304987961744356e+02 1.612563397119956e+01 3 7.018897389129390e+02 -3.764226030262936e+02 4.376344751014918e+02 3.992884868423144e+02 4 5.021119639785326e+02 1.914218982105791e+02 -2.071356789270567e+02 -4.154141208135139e+02 - ME 4.558573859477246e-03 + ME 1.901193343270815e-04 Event 59 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2524,7 +2524,7 @@ Event 59 Batch 1 2 5.521089721327345e+02 1.223876815062619e+02 -3.629066091228882e+01 -5.371485459866160e+02 3 4.098988410471214e+02 -5.841964900319319e+01 -3.626461945087767e+02 1.819119075553315e+02 4 5.379921868201441e+02 -6.396803250306872e+01 3.989368554210655e+02 3.552366384312845e+02 - ME 5.148841296796537e-05 + ME 1.780280399801712e-05 Event 60 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2532,7 +2532,7 @@ Event 60 Batch 1 2 7.143828168925960e+02 -4.584044193456332e+02 -2.419772079280938e+02 -4.915844060170314e+02 3 1.284110307517517e+02 8.324300347118127e+01 -7.889851197070540e+01 5.774963203893758e+01 4 6.572061523556514e+02 3.751614158744520e+02 3.208757198987992e+02 4.338347739780938e+02 - ME 1.673517837789511e-04 + ME 7.144001898958308e-05 Event 61 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2540,7 +2540,7 @@ Event 61 Batch 1 2 4.394390210968651e+02 -2.137451655543886e+02 -3.779414621253704e+02 -6.767502250635177e+01 3 4.431311911324728e+02 3.845666395406355e+02 -2.150363068358313e+02 4.725610065709574e+01 4 6.174297877706618e+02 -1.708214739862469e+02 5.929777689612018e+02 2.041892184925626e+01 - ME 1.368591177943825e-04 + ME 2.870354731125455e-05 Event 62 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2548,7 +2548,7 @@ Event 62 Batch 1 2 7.301725729481176e+02 4.281927891852710e+02 5.652737593150771e+02 -1.739784429324868e+02 3 7.567373964415995e+01 2.589885732647599e+01 -5.696550981957816e+01 4.255225906941358e+01 4 6.941536874077224e+02 -4.540916465117469e+02 -5.083082494954988e+02 1.314261838630732e+02 - ME 8.513592598060080e-04 + ME 2.379197431250548e-04 Event 63 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2556,7 +2556,7 @@ Event 63 Batch 1 2 4.361152320236988e+02 -3.738769057978321e+02 1.427754799584550e+02 -1.732850750548248e+02 3 5.817148313055657e+02 5.081993893256957e+02 2.829214478037172e+02 -8.998890070513914e+00 4 4.821699366707353e+02 -1.343224835278637e+02 -4.256969277621721e+02 1.822839651253387e+02 - ME 4.544766189571194e-05 + ME 8.350404272725701e-06 Event 64 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2564,7 +2564,7 @@ Event 64 Batch 1 2 6.097675704107204e+02 3.288514690970509e+02 4.971291587853200e+02 -1.285916042465611e+02 3 5.709532610348123e+02 -6.501292612520263e+01 -4.768258747557200e+02 3.072426254385416e+02 4 3.192791685544673e+02 -2.638385429718484e+02 -2.030328402960006e+01 -1.786510211919805e+02 - ME 4.598138986874043e-04 + ME 3.000969253297957e-05 Event 65 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2572,7 +2572,7 @@ Event 65 Batch 1 2 6.258641293880484e+02 3.743515439843765e+02 -1.622018320411498e+02 -4.746128903155367e+02 3 7.438702198751357e+02 -4.029113627030089e+02 2.325939036896868e+02 5.804355380128616e+02 4 1.302656507368158e+02 2.855981871863233e+01 -7.039207164853700e+01 -1.058226476973252e+02 - ME 6.427333508548903e-03 + ME 3.162776051460646e-04 Event 66 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2580,7 +2580,7 @@ Event 66 Batch 1 2 3.731957242404369e+02 1.596860493342637e+01 -3.714568973276624e+02 3.224632809376674e+01 3 6.079923612940432e+02 4.451199598539357e+02 3.189341902600864e+02 -2.642043054431177e+02 4 5.188119144655197e+02 -4.610885647873621e+02 5.252270706757586e+01 2.319579773493509e+02 - ME 4.681392980523237e-05 + ME 1.034065067393998e-05 Event 67 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2588,7 +2588,7 @@ Event 67 Batch 1 2 7.084256499213539e+02 6.318790977834966e+02 -2.229764540025608e+02 2.299504472951746e+02 3 5.168612394424738e+01 1.130069959366449e+01 -1.428140623590627e+01 4.837138651102398e+01 4 7.398882261343989e+02 -6.431797973771612e+02 2.372578602384670e+02 -2.783218338061985e+02 - ME 5.878400132197954e-02 + ME 1.479715191731530e-02 Event 68 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2596,7 +2596,7 @@ Event 68 Batch 1 2 5.644037677826096e+02 -7.446914007305443e+01 3.170710956176409e+02 4.609467220707991e+02 3 4.303832728799333e+02 -1.588265612792408e+02 -3.994808673830752e+02 -2.046757440246668e+01 4 5.052129593374568e+02 2.332957013522950e+02 8.240977176543441e+01 -4.404791476683325e+02 - ME 8.108482137897523e-03 + ME 3.274273226082449e-04 Event 69 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2604,7 +2604,7 @@ Event 69 Batch 1 2 2.379282923937934e+02 -4.413455715133102e+01 1.058497776082811e+02 -2.084654354245804e+02 3 5.822935131976616e+02 -5.806422676829345e+02 4.095409019445288e+01 -1.559022092337181e+01 4 6.797781944085444e+02 6.247768248342655e+02 -1.468038678027338e+02 2.240556563479522e+02 - ME 3.039802585689931e-04 + ME 6.379305675073031e-05 Event 70 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2612,7 +2612,7 @@ Event 70 Batch 1 2 5.861861307468000e+02 1.831219916849830e+02 2.904683423406074e+02 -4.750880530376756e+02 3 4.633200606614189e+02 -4.245314712871158e+02 -1.339518705596282e+02 1.284344380284135e+02 4 4.504938085917810e+02 2.414094796021329e+02 -1.565164717809791e+02 3.466536150092620e+02 - ME 3.530491740557932e-05 + ME 1.325653453486623e-05 Event 71 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2620,7 +2620,7 @@ Event 71 Batch 1 2 7.383412459951699e+02 5.748049255568963e+02 -1.639684737984460e+02 -4.334298474879633e+02 3 3.973981306646684e+02 -3.228684354469153e+02 -4.837114091238284e+00 2.316416412804533e+02 4 3.642606233401616e+02 -2.519364901099809e+02 1.688055878896842e+02 2.017882062075102e+02 - ME 3.103530482016079e-05 + ME 1.333441808219846e-05 Event 72 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2628,7 +2628,7 @@ Event 72 Batch 1 2 3.538199915090663e+02 3.512029503136998e+02 -6.467835580753929e+00 -4.246458742680748e+01 3 5.344234504985296e+02 1.310173344785605e+01 3.836805260246265e+01 5.328833470497182e+02 4 6.117565579924039e+02 -3.643046837615559e+02 -3.190021702170876e+01 -4.904187596229107e+02 - ME 9.376669006106200e-03 + ME 2.994704399169685e-03 Event 73 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2636,7 +2636,7 @@ Event 73 Batch 1 2 4.694927197571710e+02 1.451947293992222e+02 -1.807863847612341e+02 4.082379055705570e+02 3 5.537325951281179e+02 -5.796379956652479e+01 5.401382741253894e+02 -1.072876026015002e+02 4 4.767746851147115e+02 -8.723092983269744e+01 -3.593518893641554e+02 -3.009503029690568e+02 - ME 1.077472469645428e-03 + ME 1.535829386616431e-04 Event 74 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2644,7 +2644,7 @@ Event 74 Batch 1 2 6.258444305735198e+02 -3.349227552763227e+02 4.941036656040852e+02 1.880679848209580e+02 3 5.555040664889822e+02 3.765538795180102e+01 -5.474422011270130e+02 -8.645158222500005e+01 4 3.186515029374982e+02 2.972673673245214e+02 5.333853552292791e+01 -1.016164025959578e+02 - ME 1.623439923565115e-04 + ME 1.487896902219418e-05 Event 75 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2652,7 +2652,7 @@ Event 75 Batch 1 2 3.943316317993887e+02 5.588489849751632e+01 -2.552251009651266e+02 -2.953548066221912e+02 3 5.467466262348042e+02 -3.021648543602057e+02 -2.377479281839000e+02 3.887212326756534e+02 4 5.589217419658066e+02 2.462799558626894e+02 4.929730291490265e+02 -9.336642605346221e+01 - ME 1.348649436679123e-04 + ME 4.632408498797698e-05 Event 76 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2660,7 +2660,7 @@ Event 76 Batch 1 2 5.517772830004059e+02 2.282681125856672e+02 -4.885490190451381e+02 -1.169260227747471e+02 3 4.245403880864563e+02 -2.793100283061228e+02 1.521744876196477e+02 -2.811821020654221e+02 4 5.236823289131380e+02 5.104191572045557e+01 3.363745314254903e+02 3.981081248401691e+02 - ME 5.074216551061466e-05 + ME 1.645260485784409e-05 Event 77 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2668,7 +2668,7 @@ Event 77 Batch 1 2 3.781543446472003e+02 -5.926925448310480e+01 -1.775497893613220e+02 3.285786605157444e+02 3 6.702964816234122e+02 -6.066564226432872e+01 -1.057468051743550e+02 -6.591165802199176e+02 4 4.515491737293867e+02 1.199348967474336e+02 2.832965945356770e+02 3.305379197041734e+02 - ME 6.321080405055773e-05 + ME 5.041095643414513e-05 Event 78 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2676,7 +2676,7 @@ Event 78 Batch 1 2 4.564262045363139e+02 1.882572856930395e+02 1.751822011208171e+02 -3.770878823051468e+02 3 3.809544602625751e+02 -2.816334489555117e+02 1.992812047321844e+02 -1.615422627793184e+02 4 6.626193352011103e+02 9.337616326247226e+01 -3.744634058530013e+02 5.386301450844651e+02 - ME 2.572921643188974e-04 + ME 6.222463480998997e-05 Event 79 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2684,7 +2684,7 @@ Event 79 Batch 1 2 6.126536521478922e+02 6.075062399138452e+02 -4.178945028651393e+01 6.733726903166659e+01 3 2.872846052831658e+02 -1.084163947926161e+02 2.139961846825774e+01 2.651799127051085e+02 4 6.000617425689430e+02 -4.990898451212283e+02 2.038983181825616e+01 -3.325171817367756e+02 - ME 1.996659951821530e-03 + ME 6.289823950094716e-04 Event 80 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2692,7 +2692,7 @@ Event 80 Batch 1 2 4.171281258707700e+02 -2.756641813219371e+02 1.445082905894664e+01 3.127240094205691e+02 3 3.805235327384960e+02 -2.955852199231463e+02 2.395269588958384e+02 7.373784162959287e+00 4 7.023483413907342e+02 5.712494012450838e+02 -2.539777879547846e+02 -3.200977935835284e+02 - ME 1.297520069620947e-03 + ME 5.629434448779270e-04 Event 81 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2700,7 +2700,7 @@ Event 81 Batch 1 2 7.471091333863935e+02 -9.753029041192970e+01 7.407154559164039e+02 -7.162458282065091e-01 3 6.775352561453885e+02 9.550863422814814e+01 -6.702673865908516e+02 -2.595678293896889e+01 4 7.535561046821789e+01 2.021656183781575e+00 -7.044806932555213e+01 2.667302876717550e+01 - ME 1.022399816924924e-04 + ME 2.904529061551848e-05 Event 82 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2708,7 +2708,7 @@ Event 82 Batch 1 2 4.309094465924175e+02 3.042233433179616e+02 2.799835808203350e+02 -1.214096495919827e+02 3 5.540384887187945e+02 -4.824447657759213e+02 1.988969596446625e+02 1.861335391629672e+02 4 5.150520646887885e+02 1.782214224579596e+02 -4.788805404649973e+02 -6.472388957098450e+01 - ME 1.053635072607165e-04 + ME 1.778678120024833e-05 Event 83 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2716,7 +2716,7 @@ Event 83 Batch 1 2 4.869534474909295e+02 -4.727010820510885e+02 1.062322962656182e+02 4.890855018466118e+01 3 3.520990385354405e+02 -1.437544586613779e+02 -3.142298368411062e+02 6.758696761482639e+01 4 6.609475139736298e+02 6.164555407124665e+02 2.079975405754878e+02 -1.164955177994876e+02 - ME 2.998516055200512e-04 + ME 7.948516811691567e-05 Event 84 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2724,7 +2724,7 @@ Event 84 Batch 1 2 1.391975815431583e+01 -3.682657486111166e-01 -1.138840508663312e+01 -7.995516055627093e+00 3 7.493632094786751e+02 -3.452281541586202e+01 3.833012084573049e+02 6.429880080772211e+02 4 7.367170323670085e+02 3.489108116447313e+01 -3.719128033706718e+02 -6.349924920215940e+02 - ME 3.806217512266510e-01 + ME 8.671177508029917e-02 Event 85 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2732,7 +2732,7 @@ Event 85 Batch 1 2 7.362448947738020e+02 6.409220704967113e+02 3.243429451315054e+02 1.614840505254833e+02 3 1.517836214454495e+02 -1.266859291808411e+02 -6.780846852200752e+01 4.889738933094901e+01 4 6.119714837807480e+02 -5.142361413158706e+02 -2.565344766094980e+02 -2.103814398564324e+02 - ME 5.694785892689211e-04 + ME 1.062305495679385e-04 Event 86 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2740,7 +2740,7 @@ Event 86 Batch 1 2 5.451728369778392e+02 -6.605005893803180e+01 1.066920544886257e+02 -5.305352178712969e+02 3 3.158718592284829e+02 -1.755596039144849e+02 2.550395858012225e+02 6.251932981237656e+01 4 6.389553037936773e+02 2.416096628525165e+02 -3.617316402898481e+02 4.680158880589203e+02 - ME 1.469986179099727e-04 + ME 4.057626974930324e-05 Event 87 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2748,7 +2748,7 @@ Event 87 Batch 1 2 3.414211232216659e+02 1.437256906952883e+02 1.534640422371205e+02 -2.689983214749668e+02 3 5.081668091119999e+02 4.794742948200324e+02 -1.464748766741243e+02 8.296394996143997e+01 4 6.504120676663341e+02 -6.231999855153207e+02 -6.989165562996117e+00 1.860343715135268e+02 - ME 1.823135893899652e-04 + ME 3.656584417835253e-05 Event 88 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2756,7 +2756,7 @@ Event 88 Batch 1 2 2.925516585730864e+02 1.655911293372511e+01 2.598275245766865e+02 -1.334238591297045e+02 3 7.159840369510271e+02 -1.056844973272874e+02 -3.694097043713192e+02 6.041526284885822e+02 4 4.914643044758866e+02 8.912538439356234e+01 1.095821797946327e+02 -4.707287693588777e+02 - ME 8.728488941697977e-02 + ME 2.327745727475104e-03 Event 89 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2764,7 +2764,7 @@ Event 89 Batch 1 2 6.333634651097186e+02 1.209853522660007e+02 5.372166546881791e+02 -3.129058794565919e+02 3 6.221307427802806e+02 5.757192259699385e+01 -4.327483989541182e+02 4.432391657372765e+02 4 2.445057921100010e+02 -1.785572748629945e+02 -1.044682557340609e+02 -1.303332862806847e+02 - ME 5.497507832908574e-04 + ME 5.047204144927262e-05 Event 90 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2772,7 +2772,7 @@ Event 90 Batch 1 2 3.111538587406461e+02 2.628215106651484e+02 -6.985334981761831e+01 -1.512021390726355e+02 3 5.216486323898988e+02 1.252715366480781e+02 4.457714554600226e+02 -2.402335265468457e+02 4 6.671975088694549e+02 -3.880930473132266e+02 -3.759181056424042e+02 3.914356656194811e+02 - ME 2.329075524537458e-04 + ME 4.503542584588689e-05 Event 91 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2780,7 +2780,7 @@ Event 91 Batch 1 2 3.007803348469016e+02 8.390513937949677e+01 2.884042062049404e+02 -1.586667134655829e+01 3 6.256884422056424e+02 2.364580673743878e+02 -3.590826126759745e+02 -4.545693416378727e+02 4 5.735312229474563e+02 -3.203632067538847e+02 7.067840647103421e+01 4.704360129844310e+02 - ME 6.478111274774788e-05 + ME 2.635583378174906e-05 Event 92 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2788,7 +2788,7 @@ Event 92 Batch 1 2 6.843865618656529e+02 -2.264962467301474e+02 -5.909185329480341e+02 2.605757158639088e+02 3 6.645516272550811e+02 3.453347116263074e+02 4.983670680340538e+02 -2.720350487207341e+02 4 1.510618108792659e+02 -1.188384648961601e+02 9.255146491398015e+01 1.145933285682523e+01 - ME 9.365402433981294e-05 + ME 1.711437740567050e-05 Event 93 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2796,7 +2796,7 @@ Event 93 Batch 1 2 5.579763469381434e+02 2.180908585044468e+02 5.135246110359701e+02 8.151996049100932e+00 3 3.333821836060117e+02 1.681122988324202e+02 -1.261705574188212e+02 2.587719570738210e+02 4 6.086414694558448e+02 -3.862031573368670e+02 -3.873540536171486e+02 -2.669239531229223e+02 - ME 5.183695239236329e-04 + ME 1.157787815150910e-04 Event 94 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2804,7 +2804,7 @@ Event 94 Batch 1 2 4.534979734151987e+02 1.139662723650677e+02 2.686183171543304e+01 4.381216071501101e+02 3 3.856184698299744e+02 1.545134372854228e+02 -3.452526490806396e+02 7.501873282757614e+01 4 6.608835567548277e+02 -2.684797096504910e+02 3.183908173652065e+02 -5.131403399776862e+02 - ME 6.944325623628402e-03 + ME 1.545010233607317e-03 Event 95 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2812,7 +2812,7 @@ Event 95 Batch 1 2 2.828073115974175e+02 -5.711637476392460e+01 5.915078172645698e+01 -2.705898746219725e+02 3 6.809618671276158e+02 3.772100991821226e+02 3.247893528880094e+02 4.646864338535512e+02 4 5.362308212749670e+02 -3.200937244181981e+02 -3.839401346144663e+02 -1.940965592315787e+02 - ME 2.560512106670314e-04 + ME 6.408796328924562e-05 Event 96 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2820,7 +2820,7 @@ Event 96 Batch 1 2 4.639832102051440e+02 -4.275497908582962e+02 -1.317248975374901e+02 -1.230046627491649e+02 3 7.474114851375481e+02 6.594176555428718e+02 2.654537688070380e+02 2.309254864669502e+02 4 2.886053046573076e+02 -2.318678646845757e+02 -1.337288712695479e+02 -1.079208237177853e+02 - ME 2.440162169445852e-04 + ME 1.445191791082226e-05 Event 97 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2828,7 +2828,7 @@ Event 97 Batch 1 2 5.095921959312568e+02 3.190102848863560e+02 3.100341192456060e+02 2.485869851668986e+02 3 4.555541331018014e+02 -2.788120391899956e+02 2.221549471930723e+02 -2.836205112936887e+02 4 5.348536709669415e+02 -4.019824569636059e+01 -5.321890664386783e+02 3.503352612679014e+01 - ME 8.198891770965733e-05 + ME 2.250661525403011e-05 Event 98 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2836,7 +2836,7 @@ Event 98 Batch 1 2 5.299941952467790e+02 -2.570048161992350e+02 -4.630296380940593e+02 -2.111695271961878e+01 3 7.352146396921255e+02 2.361229278157243e+02 6.962552486063584e+02 3.893348873424185e+00 4 2.347911650610957e+02 2.088188838351074e+01 -2.332256105122990e+02 1.722360384619465e+01 - ME 6.760444392591968e-05 + ME 5.654417419793765e-06 Event 99 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2844,7 +2844,7 @@ Event 99 Batch 1 2 4.290897291078425e+02 3.747236205606835e+02 2.040795775432686e+02 -4.529602465443949e+01 3 6.438744429739487e+02 -5.215755139094103e+02 2.133414139578182e+01 3.769325350988583e+02 4 4.270358279182090e+02 1.468518933487271e+02 -2.254137189390505e+02 -3.316365104444187e+02 - ME 2.024851967866169e-03 + ME 8.457850707842401e-05 Event 100 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2852,7 +2852,7 @@ Event 100 Batch 1 2 5.119062275524872e+02 -4.721600394809319e+02 -1.845880136125884e+02 7.099400083769524e+01 3 4.523854579707449e+02 2.836789572262426e+02 -3.060214184981774e+02 -1.747276258374610e+02 4 5.357083144767672e+02 1.884810822546894e+02 4.906094321107658e+02 1.037336249997658e+02 - ME 6.898305006855298e-05 + ME 1.420495101373495e-05 Event 101 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2860,7 +2860,7 @@ Event 101 Batch 1 2 6.024072815192737e+02 -3.080418730730875e+02 -4.692284526425155e+02 2.186993289696520e+02 3 3.347434020484399e+02 8.940653726951260e+01 -3.939923552329941e+01 -3.201676381969582e+02 4 5.628493164322859e+02 2.186353358035749e+02 5.086276881658150e+02 1.014683092273061e+02 - ME 9.290725627447436e-05 + ME 2.743452031293993e-05 Event 102 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2868,7 +2868,7 @@ Event 102 Batch 1 2 5.910857738801296e+02 3.707548039128416e+02 -7.516477307090547e+01 -4.541734518311494e+02 3 2.311218706704979e+02 4.536804143672514e+01 -2.262982016400413e+02 1.217307902336991e+01 4 6.777923554493723e+02 -4.161228453495667e+02 3.014629747109467e+02 4.420003728077793e+02 - ME 2.633339755449651e-04 + ME 7.158169676479796e-05 Event 103 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2876,7 +2876,7 @@ Event 103 Batch 1 2 6.627949406417042e+02 7.189602123685950e+01 -6.391860825813610e+02 -1.599038689489492e+02 3 5.519979886399102e+02 1.442810582977179e+02 4.734454174874869e+02 2.444057944057306e+02 4 2.852070707183856e+02 -2.161770795345774e+02 1.657406650938741e+02 -8.450192545678139e+01 - ME 1.652798222861839e-04 + ME 1.658567428345252e-05 Event 104 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2884,7 +2884,7 @@ Event 104 Batch 1 2 4.368180791462563e+02 -3.483499330357901e+02 -2.596280064690262e+02 4.533935023690698e+01 3 4.635715977792429e+02 1.873023362819025e+02 -2.251347602994603e+02 -3.593477435519053e+02 4 5.996103230745010e+02 1.610475967538876e+02 4.847627667684865e+02 3.140083933149983e+02 - ME 9.158171748371188e-05 + ME 2.162124469235967e-05 Event 105 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2892,7 +2892,7 @@ Event 105 Batch 1 2 5.701708357490469e+02 2.288495716262106e+02 -4.521314661478370e+02 -2.613422905391967e+02 3 3.711008490497917e+02 -3.362590561223710e+02 -8.126001400906793e+01 1.343223639771668e+02 4 5.587283152011612e+02 1.074094844961603e+02 5.333914801569049e+02 1.270199265620299e+02 - ME 7.043372303967046e-05 + ME 1.720246557093887e-05 Event 106 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2900,7 +2900,7 @@ Event 106 Batch 1 2 6.775588183099673e+02 5.149765831731705e+02 3.445381345095063e+02 -2.741870619150275e+02 3 7.044100837534635e+02 -4.546975847980706e+02 -4.392260662935809e+02 3.106833358270535e+02 4 1.180310979365712e+02 -6.027899837509908e+01 9.468793178407486e+01 -3.649627391202603e+01 - ME 3.259673897057837e-04 + ME 2.786544600802367e-05 Event 107 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2908,7 +2908,7 @@ Event 107 Batch 1 2 6.046880513041550e+02 2.289413119004024e+02 -5.349774474143721e+02 -1.644160754103499e+02 3 3.366746442316215e+02 -7.166101576320902e+01 2.452245434825371e+01 3.280444544890399e+02 4 5.586373044642238e+02 -1.572802961371935e+02 5.104549930661184e+02 -1.636283790786902e+02 - ME 8.859556065170558e-04 + ME 4.667002706670146e-04 Event 108 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2916,7 +2916,7 @@ Event 108 Batch 1 2 6.239206451413978e+02 -2.218030564243363e+02 5.011455197099735e+02 -2.982172759400455e+02 3 2.841199272340513e+02 1.209406641294798e+02 7.967327320293104e+01 2.444374323800143e+02 4 5.919594276245514e+02 1.008623922948564e+02 -5.808187929129044e+02 5.377984356003120e+01 - ME 1.727643234936365e-04 + ME 7.961277501126149e-05 Event 109 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2924,7 +2924,7 @@ Event 109 Batch 1 2 3.093404598873124e+02 1.546999830656544e+02 1.629193992247174e+02 2.126421988200774e+02 3 5.287372542258961e+02 -2.136116696975048e+02 -1.865832176193536e+02 4.462284633214169e+02 4 6.619222858867909e+02 5.891168663185049e+01 2.366381839463621e+01 -6.588706621414941e+02 - ME 1.686695657867669e+01 + ME 2.902408960420708e-01 Event 110 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2932,7 +2932,7 @@ Event 110 Batch 1 2 4.920948406187608e+02 -8.595212543403569e+01 -4.824913009925944e+02 -4.440392734262522e+01 3 4.634042325716594e+02 -2.085760624772916e+00 1.255608851371819e+02 4.460645653843308e+02 4 5.445009268095798e+02 8.803788605880843e+01 3.569304158554124e+02 -4.016606380417056e+02 - ME 4.151412887207382e-03 + ME 1.043536440561108e-03 Event 111 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2940,7 +2940,7 @@ Event 111 Batch 1 2 4.637454700443120e+02 1.543048221589588e+02 -4.372769385391800e+02 6.225902899506631e+00 3 3.246747011850293e+02 -5.128652792678845e+01 -2.274142471268230e+02 2.259781269206006e+02 4 7.115798287706589e+02 -1.030182942321705e+02 6.646911856660031e+02 -2.322040298201072e+02 - ME 1.240833065187375e-03 + ME 5.219332617201280e-04 Event 112 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2948,7 +2948,7 @@ Event 112 Batch 1 2 6.923761777814550e+02 3.939190124845535e+02 4.398224952082178e+01 -5.676954684419625e+02 3 5.277418353503033e+02 -4.270527740856185e+02 4.970714905179168e+01 3.060499505927539e+02 4 2.798819868682421e+02 3.313376160106501e+01 -9.368939857261346e+01 2.616455178492087e+02 - ME 5.385735959435035e-05 + ME 4.381536575941429e-05 Event 113 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2956,7 +2956,7 @@ Event 113 Batch 1 2 7.174898838850694e+02 -6.130145063482008e+02 3.726797356942233e+02 1.071275347265524e+01 3 1.705115822510491e+02 3.993583199494100e+01 -1.624320619120163e+02 3.309311510932528e+01 4 6.119985338638814e+02 5.730786743532599e+02 -2.102476737822071e+02 -4.380586858198049e+01 - ME 2.197559713387976e-04 + ME 4.914674319256647e-05 Event 114 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2964,7 +2964,7 @@ Event 114 Batch 1 2 6.772826088252357e+02 -1.430288042596954e+02 -3.410390118171982e+02 5.674036356844296e+02 3 6.725037798358682e+02 3.626161999767239e+01 2.510744134018114e+02 -6.228226615527174e+02 4 1.502136113388951e+02 1.067671842620232e+02 8.996459841538707e+01 5.541902586828807e+01 - ME 8.926156406775035e-05 + ME 7.986648389935193e-05 Event 115 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2972,7 +2972,7 @@ Event 115 Batch 1 2 9.320551230331124e+01 1.288474310894606e+01 -2.581623869377880e+01 8.862715576190526e+01 3 6.672654287607164e+02 1.525114284892182e+02 2.829200767588875e+02 5.847560574856374e+02 4 7.395290589359720e+02 -1.653961715981643e+02 -2.571038380651088e+02 -6.733832132475428e+02 - ME 1.800237703627863e+00 + ME 4.304938165075599e-01 Event 116 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2980,7 +2980,7 @@ Event 116 Batch 1 2 4.951202926530015e+02 -4.575339943514647e+02 4.220102313368785e+01 1.844608951947751e+02 3 3.101750696753587e+02 -4.711582585559527e+01 2.172188132736168e+02 2.163438466008694e+02 4 6.947046376716394e+02 5.046498202070600e+02 -2.594198364073050e+02 -4.008047417956444e+02 - ME 1.933367100533606e-03 + ME 5.988625984136040e-04 Event 117 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2988,7 +2988,7 @@ Event 117 Batch 1 2 6.543248494478489e+02 1.390926466871539e+02 9.107024539473488e+01 6.328510524967589e+02 3 5.040443237953712e+02 6.874740772121054e+01 1.336336536624387e+02 -4.811200690999848e+02 4 3.416308267567792e+02 -2.078400544083643e+02 -2.247038990571737e+02 -1.517309833967742e+02 - ME 4.207453923038474e-04 + ME 3.026560085299302e-04 Event 118 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2996,7 +2996,7 @@ Event 118 Batch 1 2 5.829230400014206e+02 5.307803371482089e+02 -3.192285892796672e+01 2.388565162167381e+02 3 3.965113090906140e+02 -5.470249758902820e+01 2.256187790844517e+02 -3.214420966810604e+02 4 5.205656509079653e+02 -4.760778395591807e+02 -1.936959201564850e+02 8.258558046432242e+01 - ME 7.464562943747175e-05 + ME 2.168340782914014e-05 Event 119 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3004,7 +3004,7 @@ Event 119 Batch 1 2 3.549567073991255e+02 2.281637891139605e+02 1.474502150787006e+02 2.284600261271838e+02 3 4.727085372220640e+02 7.463684946128350e+01 -3.092948822053327e+02 3.495988811576870e+02 4 6.723347553788102e+02 -3.028006385752440e+02 1.618446671266322e+02 -5.780589072848707e+02 - ME 1.455012849105755e-02 + ME 1.664672733965846e-03 Event 120 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3012,7 +3012,7 @@ Event 120 Batch 1 2 7.192117275853698e+02 4.094232477570927e+02 -5.552624156333899e+02 -2.032775518283800e+02 3 3.685061529232585e+02 -2.522084621786424e+02 1.741347663658646e+02 2.046087962197375e+02 4 4.122821194913712e+02 -1.572147855784500e+02 3.811276492675253e+02 -1.331244391357209e+00 - ME 9.281995463485567e-05 + ME 1.900262756274459e-05 Event 121 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3020,7 +3020,7 @@ Event 121 Batch 1 2 1.923953846467517e+02 -5.182078839520096e+01 -1.486351786617837e+02 -1.106262789198433e+02 3 6.582127150877787e+02 -3.509182841037630e+02 -1.191939510078701e+02 5.439606035624541e+02 4 6.493919002654695e+02 4.027390724989639e+02 2.678291296696539e+02 -4.333343246426108e+02 - ME 1.925188892577692e-03 + ME 5.360055113881300e-04 Event 122 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3028,7 +3028,7 @@ Event 122 Batch 1 2 6.905732817636248e+02 3.462508192534570e+02 -5.375670569609784e+02 -2.608131264380775e+02 3 7.097575386120018e+02 -2.677396278645660e+02 5.849221766424142e+02 2.998954860604125e+02 4 9.966917962437387e+01 -7.851119138889094e+01 -4.735511968143584e+01 -3.908235962233509e+01 - ME 5.007312135859238e-04 + ME 3.451011759976180e-05 Event 123 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3036,7 +3036,7 @@ Event 123 Batch 1 2 4.035126033432560e+02 2.481103298242076e+01 -3.878573016343356e+02 -1.085059780294573e+02 3 3.541388771651666e+02 1.572344474048876e+02 -3.105653677404273e+02 -6.512161875550808e+01 4 7.423485194915780e+02 -1.820454803873083e+02 6.984226693747627e+02 1.736275967849660e+02 - ME 2.043564129780385e-02 + ME 3.471230489499830e-03 Event 124 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3044,7 +3044,7 @@ Event 124 Batch 1 2 5.353042728143347e+02 -4.785252055946481e+02 -2.279396245170433e+02 7.488537693644093e+01 3 7.454081943698113e+02 6.785307544150930e+02 3.069354144183444e+02 -3.193811081429426e+01 4 2.192875328158541e+02 -2.000055488204448e+02 -7.899578990130104e+01 -4.294726612214667e+01 - ME 1.399009675490331e-04 + ME 6.765427234678898e-06 Event 125 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3052,7 +3052,7 @@ Event 125 Batch 1 2 7.351681880566981e+02 -1.932492970253984e+01 -4.393064933429818e+02 -5.891592456452273e+02 3 6.537497908129355e+02 -2.883189353576726e+01 3.454898907503182e+02 5.542510679217788e+02 4 1.110820211303664e+02 4.815682323830688e+01 9.381660259266363e+01 3.490817772344844e+01 - ME 1.431077255619906e-04 + ME 6.639428548470109e-05 Event 126 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3060,7 +3060,7 @@ Event 126 Batch 1 2 5.568747108147126e+02 1.149185667256990e+02 4.264979152236775e+02 -3.391204725116689e+02 3 6.934211462641822e+02 -1.939160042589616e+02 -6.294239612595663e+02 2.169215212257340e+02 4 2.497041429211053e+02 7.899743753326281e+01 2.029260460358889e+02 1.221989512859350e+02 - ME 3.344185566612618e-05 + ME 9.143592130512915e-06 Event 127 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3068,7 +3068,7 @@ Event 127 Batch 1 2 7.108931196972316e+02 4.270547743949553e+02 5.664613189451065e+02 -4.598718776252147e+01 3 4.445675167124290e+02 -1.247884466860518e+02 -4.129475031266345e+02 1.074359351009545e+02 4 3.445393635903407e+02 -3.022663277089035e+02 -1.535138158184720e+02 -6.144874733843321e+01 - ME 1.180920695556687e-04 + ME 1.427738327825488e-05 Event 128 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3076,7 +3076,7 @@ Event 128 Batch 1 2 5.312407894292422e+02 -7.192118124205533e+01 -4.398126160332176e+02 -2.891521793453568e+02 3 5.717192413787027e+02 3.434745903572437e+02 1.811915566412192e+02 4.195923218357252e+02 4 3.970399691920551e+02 -2.715534091151883e+02 2.586210593919984e+02 -1.304401424903685e+02 - ME 1.848006274423395e-04 + ME 3.532660248239223e-05 Event 129 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3084,7 +3084,7 @@ Event 129 Batch 1 2 6.644129951428383e+02 -3.595672586482287e+02 4.645590915434784e+02 3.103882489514914e+02 3 1.967652372382455e+02 -5.204943416929049e+01 8.794498000645085e+00 -1.895522930301724e+02 4 6.388217676189169e+02 4.116166928175192e+02 -4.733535895441232e+02 -1.208359559213191e+02 - ME 3.082956717278722e-04 + ME 9.192558188476414e-05 Event 130 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3092,7 +3092,7 @@ Event 130 Batch 1 2 7.302263990443511e+02 -1.919590472356484e+02 3.836584700935805e+02 -5.909217345563752e+02 3 4.156541164903923e+02 2.203243106780774e+02 -1.767969453775071e+02 3.049071707664833e+02 4 3.541194844652567e+02 -2.836526344242890e+01 -2.068615247160734e+02 2.860145637898919e+02 - ME 3.110012368642411e-05 + ME 2.258971422042701e-05 Event 131 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3100,7 +3100,7 @@ Event 131 Batch 1 2 2.308323688168238e+02 -1.780469473698228e+02 1.469011263880862e+02 1.710582294195638e+00 3 7.308075033948297e+02 5.219262643529272e+02 -3.840435213624620e+02 3.379099810545737e+02 4 5.383601277883465e+02 -3.438793169831044e+02 2.371423949743758e+02 -3.396205633487694e+02 - ME 1.061667055612532e-03 + ME 7.770640764079256e-05 Event 132 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3108,7 +3108,7 @@ Event 132 Batch 1 2 5.909630762789660e+02 -4.293852116769707e+02 -3.988922148105424e+02 7.583335995300355e+01 3 5.415993952096327e+02 2.260703809971038e+02 3.221145619770360e+02 -3.721079100067703e+02 4 3.674375285114020e+02 2.033148306798666e+02 7.677765283350686e+01 2.962745500537670e+02 - ME 3.321676569401813e-05 + ME 1.628447412544396e-05 Event 133 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3116,7 +3116,7 @@ Event 133 Batch 1 2 4.506052863582997e+02 2.189991325227701e+02 -3.914006430783634e+02 -4.347459771134355e+01 3 4.043998006859111e+02 3.160348074769272e+02 8.738893432792010e+01 2.366946839598570e+02 4 6.449949129557901e+02 -5.350339399996973e+02 3.040117087504433e+02 -1.932200862485142e+02 - ME 3.121497332919934e-04 + ME 8.705579101282482e-05 Event 134 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3124,7 +3124,7 @@ Event 134 Batch 1 2 7.151470882937614e+02 -1.041377497037516e+01 -4.186394096729767e+01 7.138447461686595e+02 3 3.416424731356660e+02 1.638631808685801e+02 3.081581136487586e+01 -2.981925940995343e+02 4 4.432104385705719e+02 -1.534494058982047e+02 1.104812960242199e+01 -4.156521520691248e+02 - ME 5.534325530265236e-02 + ME 6.342792451335309e-03 Event 135 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3132,7 +3132,7 @@ Event 135 Batch 1 2 7.115730144432832e+02 -3.219296530898238e+02 2.184242454110169e+02 -5.958089478700319e+02 3 1.627059459894212e+02 -6.880794311551747e+01 -3.259803939022061e+01 1.437917231708342e+02 4 6.257210395672955e+02 3.907375962053413e+02 -1.858262060207963e+02 4.520172246991979e+02 - ME 2.112989182930814e-04 + ME 1.277979532321233e-04 Event 136 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3140,7 +3140,7 @@ Event 136 Batch 1 2 7.195404287114588e+02 -4.369992732083461e+02 -4.270318019286997e+02 3.800182941743402e+02 3 6.668605996318223e+02 3.634158794560479e+02 4.690430049045651e+02 -3.043527845290675e+02 4 1.135989716567186e+02 7.358339375229815e+01 -4.201120297586535e+01 -7.566550964527264e+01 - ME 1.804344388349211e-03 + ME 7.515399240093053e-05 Event 137 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3148,7 +3148,7 @@ Event 137 Batch 1 2 6.722782806744999e+02 -6.045581260407005e+02 -2.538460778300668e+02 1.484241478840623e+02 3 6.869263774705689e+02 6.661257235671316e+02 1.481819739565761e+02 -7.865412297735662e+01 4 1.407953418549304e+02 -6.156759752643097e+01 1.056641038734908e+02 -6.977002490670534e+01 - ME 5.192812231664224e-04 + ME 2.119149330726453e-05 Event 138 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3156,7 +3156,7 @@ Event 138 Batch 1 2 6.463287544295633e+02 8.684709774942756e+01 2.409249839962013e+02 -5.934253049048401e+02 3 3.917330799270068e+02 1.767690441671677e+02 4.696120064017492e+01 3.464132742372293e+02 4 4.619381656434300e+02 -2.636161419165952e+02 -2.878861846363762e+02 2.470120306676108e+02 - ME 5.804753959762886e-05 + ME 4.203806696206548e-05 Event 139 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3164,7 +3164,7 @@ Event 139 Batch 1 2 2.994802063237944e+02 -1.272876183039153e+02 6.552211336810879e+00 2.710042891410713e+02 3 7.257546970836092e+02 -8.848613612326799e+00 5.127896146768584e+00 -7.256826352181574e+02 4 4.747650965925943e+02 1.361362319162416e+02 -1.168010748357900e+01 4.546783460770868e+02 - ME 1.724196014694060e-04 + ME 1.500396153249019e-04 Event 140 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3172,7 +3172,7 @@ Event 140 Batch 1 2 7.326756101999780e+02 5.655005379385240e+02 4.343799907428446e+02 1.683351270988810e+02 3 7.428339005597779e+02 -5.680473426214219e+02 -4.534832054058505e+02 -1.532233754243464e+02 4 2.449048924024402e+01 2.546804682897962e+00 1.910321466300584e+01 -1.511175167453447e+01 - ME 4.669436438173466e-03 + ME 1.024603362434272e-04 Event 141 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3180,7 +3180,7 @@ Event 141 Batch 1 2 7.363238871411332e+02 -6.772722174663238e+02 -2.824373475598683e+02 -6.086341204880675e+01 3 5.504260535970963e+02 4.650298533191528e+02 2.914345410616540e+02 4.221355560271704e+01 4 2.132500592617708e+02 2.122423641471711e+02 -8.997193501785816e+00 1.864985644608987e+01 - ME 7.300791864660033e-05 + ME 1.166401869382226e-05 Event 142 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3188,7 +3188,7 @@ Event 142 Batch 1 2 5.862280565156834e+02 4.248793793115829e+01 -2.479279504752411e+02 -5.295184989682986e+02 3 4.287264749982929e+02 -3.025296967755320e+02 2.785471849307642e+02 1.212173201341831e+02 4 4.850454684860405e+02 2.600417588443628e+02 -3.061923445551928e+01 4.083011788341197e+02 - ME 4.569028399965169e-05 + ME 1.949810022878841e-05 Event 143 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3196,7 +3196,7 @@ Event 143 Batch 1 2 2.464531733710510e+02 4.046044690030688e+01 -2.103865804466287e+02 1.218179201483223e+02 3 5.378449948854583e+02 4.607829603950880e+02 -2.747641700963839e+02 3.822241180409925e+01 4 7.157018317434903e+02 -5.012434072953949e+02 4.851507505430126e+02 -1.600403319524219e+02 - ME 1.284493741497843e-03 + ME 4.863434295951330e-04 Event 144 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3204,7 +3204,7 @@ Event 144 Batch 1 2 5.367418008803521e+02 -1.343004856786532e+02 -4.048537736989352e+02 -3.258044847458254e+02 3 6.294877130859599e+02 3.313530054622211e+02 5.282137272543231e+02 8.631468610520756e+01 4 3.337704860336884e+02 -1.970525197835678e+02 -1.233599535553879e+02 2.394897986406179e+02 - ME 2.612855607885159e-05 + ME 8.754930746282009e-06 Event 145 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3212,7 +3212,7 @@ Event 145 Batch 1 2 6.805380148481771e+01 -3.411514819754512e+01 -4.339750646760406e+01 -3.980116822894492e+01 3 6.831461500979880e+02 -3.834019790669201e+02 -2.756424954453614e+02 -4.936727656514237e+02 4 7.488000484171945e+02 4.175171272644653e+02 3.190400019129655e+02 5.334739338803686e+02 - ME 4.832444287218038e-01 + ME 4.117012994651258e-01 Event 146 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3220,7 +3220,7 @@ Event 146 Batch 1 2 5.031746658797123e+02 4.202301876294930e+02 2.767377273314875e+02 2.750283520766640e+00 3 4.317115817339341e+02 -1.098088257924671e+02 -5.455162180567243e+01 4.139336083717602e+02 4 5.651137523863538e+02 -3.104213618370259e+02 -2.221861055258150e+02 -4.166838918925268e+02 - ME 4.446377084117306e-03 + ME 1.122040831263755e-03 Event 147 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3228,7 +3228,7 @@ Event 147 Batch 1 2 4.251223043705630e+02 -4.223502783198938e+02 -4.694338569631599e+01 1.206377286808446e+01 3 5.457819748703678e+02 2.791608945230574e+02 -4.384138579515959e+02 -1.665546403390879e+02 4 5.290957207590696e+02 1.431893837968364e+02 4.853572436479118e+02 1.544908674710035e+02 - ME 5.820013407126093e-05 + ME 1.117959404473985e-05 Event 148 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3236,7 +3236,7 @@ Event 148 Batch 1 2 6.905785821272525e+02 6.249608768654489e+02 -6.243387159972350e+01 -2.870970082698929e+02 3 1.361638260920089e+02 2.862044352088506e+01 1.704210379179796e+01 1.320266050727362e+02 4 6.732575917807402e+02 -6.535813203863343e+02 4.539176780792534e+01 1.550704031971573e+02 - ME 9.573948308169230e-04 + ME 5.047601105033982e-04 Event 149 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3244,7 +3244,7 @@ Event 149 Batch 1 2 6.694705528096943e+02 -5.216497821741067e+02 -3.785079074709545e+02 1.811189935345937e+02 3 2.821401257551277e+02 1.148500354702071e-01 2.786662494166578e+02 -4.413795199872407e+01 4 5.483893214351779e+02 5.215349321386365e+02 9.984165805429673e+01 -1.369810415358697e+02 - ME 1.943324414096923e-04 + ME 3.486097449584098e-05 Event 150 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3252,7 +3252,7 @@ Event 150 Batch 1 2 4.637486188995366e+02 -4.033412855298819e+02 -2.279949807412008e+02 -1.992178895453991e+01 3 3.756800751656199e+02 6.230662615514293e+01 -2.632310737913946e+02 -2.606967683041707e+02 4 6.605713059348438e+02 3.410346593747391e+02 4.912260545325952e+02 2.806185572587107e+02 - ME 2.156945366470290e-04 + ME 4.211370643652993e-05 Event 151 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3260,7 +3260,7 @@ Event 151 Batch 1 2 3.821954355913596e+02 -2.528320044280690e+02 2.861764538722267e+02 1.588602445142563e+01 3 6.796189325418250e+02 2.911670128135291e+02 -4.900375979142738e+02 3.700902818893582e+02 4 4.381856318668152e+02 -3.833500838546018e+01 2.038611440420471e+02 -3.859763063407838e+02 - ME 8.197229841786387e-03 + ME 1.923941526207248e-04 Event 152 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3268,7 +3268,7 @@ Event 152 Batch 1 2 6.751133298339792e+02 -2.999578895043981e+02 -2.855974213275218e+02 -5.331391803034741e+02 3 4.976977783498468e+02 -3.003988119418482e+00 1.843802943840355e+02 4.622747685874795e+02 4 3.271888918161745e+02 3.029618776238166e+02 1.012171269434863e+02 7.086441171599445e+01 - ME 1.204579535049519e-04 + ME 6.977738125195056e-05 Event 153 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3276,7 +3276,7 @@ Event 153 Batch 1 2 1.729293620257127e+02 1.558357805102956e+02 -7.193392860849491e+01 2.110174585940510e+01 3 6.524550819255464e+02 2.410158908712478e+02 5.786677971610501e+02 1.809766692333240e+02 4 6.746155560487412e+02 -3.968516713815435e+02 -5.067338685525552e+02 -2.020784150927291e+02 - ME 5.985591428637023e-04 + ME 1.391654510317005e-04 Event 154 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3284,7 +3284,7 @@ Event 154 Batch 1 2 6.585658455851002e+02 -2.410305357139302e+02 -2.116446673272157e+02 -5.751693564652295e+02 3 5.764400833248005e+02 3.388133979948972e+02 3.092747322371399e+02 3.490527051926400e+02 4 2.649940710900988e+02 -9.778286228096688e+01 -9.763006490992416e+01 2.261166512725894e+02 - ME 3.655181799213059e-05 + ME 2.686434432328395e-05 Event 155 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3292,7 +3292,7 @@ Event 155 Batch 1 2 5.686586231936359e+02 -1.693366246265498e+02 -1.542203680657918e+02 5.204938187588979e+02 3 1.882190564276536e+02 -1.089234770645493e+02 -9.145416397064866e+01 1.232810822434430e+02 4 7.431223203787102e+02 2.782601016910992e+02 2.456745320364404e+02 -6.437749010023409e+02 - ME 6.696396361607482e-01 + ME 4.701119881405690e-01 Event 156 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3300,7 +3300,7 @@ Event 156 Batch 1 2 6.143652095725128e+02 2.879464601546110e+02 5.379391909976823e+02 -7.178351904348040e+01 3 6.287751645293085e+02 -4.584164185734781e+02 -4.225140875260598e+02 -8.181956094447702e+01 4 2.568596258981782e+02 1.704699584188668e+02 -1.154251034716223e+02 1.536030799879581e+02 - ME 2.899571701789112e-05 + ME 7.769660148731367e-06 Event 157 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3308,7 +3308,7 @@ Event 157 Batch 1 2 5.050842109798973e+02 4.185498850973046e+02 -1.305174306570672e+02 -2.507812875014723e+02 3 5.170424494038050e+02 -3.084595065654854e+02 3.930456446728388e+02 -1.330441599566699e+02 4 4.778733396162975e+02 -1.100903785318191e+02 -2.625282140157716e+02 3.838254474581424e+02 - ME 4.033251359625283e-05 + ME 1.243977993100618e-05 Event 158 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3316,7 +3316,7 @@ Event 158 Batch 1 2 4.312542366204098e+02 -3.114503370626313e+02 2.737030704635235e+02 1.185982013584742e+02 3 6.944315393047829e+02 2.166643175309468e+02 -6.173965008138002e+02 -2.326226495269423e+02 4 3.743142240748070e+02 9.478601953168439e+01 3.436934303502764e+02 1.140244481684682e+02 - ME 3.680357310121394e-05 + ME 5.864250821924803e-06 Event 159 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3324,7 +3324,7 @@ Event 159 Batch 1 2 5.860112473308646e+02 -1.581297551692178e+02 4.935632758462007e+02 2.734948907463652e+02 3 3.772013313646349e+02 -2.371132827856262e+02 -1.305099443644436e+02 -2.627266448837395e+02 4 5.367874213045002e+02 3.952430379548442e+02 -3.630533314817573e+02 -1.076824586262577e+01 - ME 1.030382455754272e-04 + ME 2.805189658646002e-05 Event 160 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3332,7 +3332,7 @@ Event 160 Batch 1 2 5.883409724804535e+02 -3.739819298758817e+02 -2.887651121595530e+02 3.505671490956299e+02 3 4.300332553173178e+02 1.788055146224819e+02 3.829208006453583e+02 7.955406370837679e+01 4 4.816257722022287e+02 1.951764152533999e+02 -9.415568848580530e+01 -4.301212128040066e+02 - ME 9.797271586219467e-03 + ME 2.307516153071828e-04 Event 161 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3340,7 +3340,7 @@ Event 161 Batch 1 2 6.868305165969147e+02 4.119610488151656e+00 5.515184990814985e+02 4.093244831537709e+02 3 3.260821955312833e+02 -1.956999890649130e+02 -2.483451099187458e+02 -7.972338993006402e+01 4 4.870872878718022e+02 1.915803785767614e+02 -3.031733891627526e+02 -3.296010932237070e+02 - ME 1.075603053132144e-03 + ME 9.860610555787331e-05 Event 162 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3348,7 +3348,7 @@ Event 162 Batch 1 2 2.159818802305119e+02 -2.018126805027919e+02 4.096951387107715e+01 -6.512536763314942e+01 3 6.870078865581224e+02 4.896730732821633e+02 -2.356527215298929e+02 -4.203188222421333e+02 4 5.970102332113654e+02 -2.878603927793715e+02 1.946832076588156e+02 4.854441898752826e+02 - ME 5.344822454174306e-05 + ME 2.809071549115161e-05 Event 163 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3356,7 +3356,7 @@ Event 163 Batch 1 2 4.889699854403287e+02 -4.067839821807834e+01 -2.740835242435768e+02 4.028835269878222e+02 3 4.282392920294498e+02 4.007468150560176e+02 -8.832740907173851e+01 -1.224301852772270e+02 4 5.827907225302220e+02 -3.600684168379390e+02 3.624109333153153e+02 -2.804533417105952e+02 - ME 4.336231422638298e-04 + ME 1.173701793303044e-04 Event 164 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3364,7 +3364,7 @@ Event 164 Batch 1 2 6.224346677404150e+02 -1.282049393554146e+02 5.480608628970117e+02 -2.657399098565701e+02 3 7.444531740822750e+02 1.794330131141779e+02 -6.708967511266460e+02 2.681638893170603e+02 4 1.331121581773107e+02 -5.122807375876333e+01 1.228358882296343e+02 -2.423979460490191e+00 - ME 1.368953177788070e-04 + ME 1.571413941583783e-05 Event 165 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3372,7 +3372,7 @@ Event 165 Batch 1 2 6.980339706506675e+02 -5.154669325341684e+01 -4.947847840614098e+02 4.896757907618869e+02 3 1.362964882116331e+02 4.252532371924361e+01 -5.641238783031591e+01 -1.165588780002596e+02 4 6.656695411377010e+02 9.021369534174053e+00 5.511971718917263e+02 -3.731169127616273e+02 - ME 1.450267418906797e-03 + ME 4.238311927693088e-04 Event 166 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3380,7 +3380,7 @@ Event 166 Batch 1 2 3.060640747281171e+02 -1.981167412190918e+02 -9.095380261170779e+01 -2.148310510107333e+02 3 5.580104478575086e+02 -3.585720992432471e+02 -1.558095186186280e+02 3.981521109704927e+02 4 6.359254774143739e+02 5.566888404623389e+02 2.467633212303362e+02 -1.833210599597597e+02 - ME 3.000804338470548e-04 + ME 1.099447007687216e-04 Event 167 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3388,7 +3388,7 @@ Event 167 Batch 1 2 2.833153623322893e+02 2.526850217013923e+02 8.687924899084067e+01 9.417998957332070e+01 3 6.595685044563415e+02 -8.780626893611850e+01 -2.875856231737449e+02 -5.870393347553995e+02 4 5.571161332113688e+02 -1.648787527652738e+02 2.007063741829043e+02 4.928593451820789e+02 - ME 7.367447958524992e-05 + ME 4.244421486768831e-05 Event 168 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3396,7 +3396,7 @@ Event 168 Batch 1 2 6.026267479353969e+02 -5.987968578530475e+02 5.775180228477150e+00 6.758674164241529e+01 3 4.991211680715713e+02 3.812575567959843e+02 3.220701575873951e+02 -5.952259631185711e+00 4 3.982520839930309e+02 2.175393010570631e+02 -3.278453378158730e+02 -6.163448201122968e+01 - ME 9.606399998327532e-05 + ME 1.203107058680061e-05 Event 169 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3404,7 +3404,7 @@ Event 169 Batch 1 2 5.510662376679772e+02 -9.251111075413947e+01 -5.291920243323356e+02 -1.227660134875281e+02 3 5.034535790022877e+02 -2.816014265681677e+02 3.283802195198170e+02 2.575511098657944e+02 4 4.454801833297348e+02 3.741125373223072e+02 2.008118048125185e+02 -1.347850963782663e+02 - ME 1.532484123791625e-04 + ME 2.085195230877358e-05 Event 170 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3412,7 +3412,7 @@ Event 170 Batch 1 2 2.814808559369750e+02 3.658097943502287e+01 -1.412301634042880e+02 -2.407225480659935e+02 3 6.646522150540470e+02 2.753499086551696e+02 -1.631412967142655e+02 5.825203104495404e+02 4 5.538669290089779e+02 -3.119308880901926e+02 3.043714601185535e+02 -3.417977623835468e+02 - ME 7.823510217753851e-04 + ME 2.587160315460459e-04 Event 171 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3420,7 +3420,7 @@ Event 171 Batch 1 2 1.777965289077954e+02 -6.143496808852239e+01 -1.603735842336773e+00 1.668375809551635e+02 3 7.439290290569696e+02 2.163074211412066e+01 -1.907051550939623e+01 -7.433699124308462e+02 4 5.782744420352348e+02 3.980422597440174e+01 2.067425135173305e+01 5.765323314756826e+02 - ME 2.063755640794395e-03 + ME 1.981167274383509e-03 Event 172 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3428,7 +3428,7 @@ Event 172 Batch 1 2 1.369499454750680e+02 -1.250080331667568e+01 -3.518152151649629e+01 -1.317622025690455e+02 3 6.692885586315896e+02 -2.346283187163472e+02 -6.130705295376303e+02 1.305421486874673e+02 4 6.937614958933425e+02 2.471291220330227e+02 6.482520510541266e+02 1.220053881578238e+00 - ME 5.039586079692636e-04 + ME 1.548169060571347e-04 Event 173 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3436,7 +3436,7 @@ Event 173 Batch 1 2 7.088772083623137e+02 4.973951266878932e+01 3.171232495758680e+01 -7.064185769505260e+02 3 5.785136264307895e+02 8.584813303397833e+01 5.766505028397120e+01 5.691949191590089e+02 4 2.126091652068944e+02 -1.355876457027672e+02 -8.937737524155732e+01 1.372236577915166e+02 - ME 1.743760900867476e-04 + ME 1.732961413682620e-04 Event 174 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3444,7 +3444,7 @@ Event 174 Batch 1 2 4.367208701713482e+02 -3.923163287174704e+01 4.325755195957351e+02 -4.543585887727652e+01 3 3.528978856725088e+02 9.622572295106905e+01 1.987077746703234e+02 -2.753048278549415e+02 4 7.103812441561454e+02 -5.699409007932221e+01 -6.312832942660567e+02 3.207406867322186e+02 - ME 9.353677491192390e-04 + ME 1.541208918572365e-04 Event 175 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3452,7 +3452,7 @@ Event 175 Batch 1 2 6.418562164876806e+02 1.962785648722137e+02 -6.110736372974047e+02 -6.567908015856712e+00 3 4.843421844702149e+02 -1.886631806266161e+02 3.569879071908527e+02 -2.674942804112337e+02 4 3.738015990421035e+02 -7.615384245597569e+00 2.540857301065516e+02 2.740621884270906e+02 - ME 3.029111560812189e-05 + ME 1.279055979705581e-05 Event 176 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3460,7 +3460,7 @@ Event 176 Batch 1 2 6.288652703123263e+02 4.005522031116294e+02 3.691482793515075e+02 3.142594606996526e+02 3 7.209127580467475e+02 -4.124575135572966e+02 -5.165298058232565e+02 -2.877341896975221e+02 4 1.502219716409257e+02 1.190531044566666e+01 1.473815264717492e+02 -2.652527100213051e+01 - ME 1.719274466020296e-04 + ME 1.300720357566141e-05 Event 177 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3468,7 +3468,7 @@ Event 177 Batch 1 2 4.716578040000077e+02 -4.521622645932388e+02 -1.012739918234145e+01 1.338200520767543e+02 3 3.021382980750606e+02 -2.714821202364266e+02 6.773215888881064e+01 -1.140059832109250e+02 4 7.262038979249317e+02 7.236443848296653e+02 -5.760475970646905e+01 -1.981406886582933e+01 - ME 2.354271252348000e-03 + ME 6.442260552556652e-04 Event 178 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3476,7 +3476,7 @@ Event 178 Batch 1 2 7.350088877399502e+02 -3.684484945749095e+02 -2.561732769425163e+02 -5.821159885132296e+02 3 1.415495174310248e+02 7.181268644032879e+01 1.095010133995263e+02 5.374692563910759e+01 4 6.234415948290248e+02 2.966358081345808e+02 1.466722635429900e+02 5.283690628741219e+02 - ME 1.035408980291912e-04 + ME 6.828487731379645e-05 Event 179 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3484,7 +3484,7 @@ Event 179 Batch 1 2 7.426064621425413e+02 6.748632301344054e+01 7.201624948975951e+02 -1.681544967131679e+02 3 5.821031882499326e+02 8.394276920418550e-01 -5.588194474899291e+02 1.629854049874919e+02 4 1.752903496075256e+02 -6.832575070548241e+01 -1.613430474076661e+02 5.169091725675888e+00 - ME 9.197132478706931e-05 + ME 1.412410550503903e-05 Event 180 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3492,7 +3492,7 @@ Event 180 Batch 1 2 6.099515195485484e+02 2.272495331206023e+02 1.762692760011278e+02 -5.378918555193875e+02 3 5.718889655176699e+02 4.324570510796980e+01 -3.278409766521432e+02 4.665909256493895e+02 4 3.181595149337819e+02 -2.704952382285720e+02 1.515717006510154e+02 7.130092986999803e+01 - ME 5.401477812349802e-05 + ME 3.043963963928669e-05 Event 181 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3500,7 +3500,7 @@ Event 181 Batch 1 2 1.206370886915177e+02 -8.151225636567759e+01 1.767749325039422e+01 8.715827822142556e+01 3 6.451493408002739e+02 -6.748216257939080e+01 4.373428479320614e+02 4.694625256943417e+02 4 7.342135705082084e+02 1.489944189450684e+02 -4.550203411824557e+02 -5.566208039157672e+02 - ME 7.131653341377736e-02 + ME 2.625479922313071e-02 Event 182 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3508,7 +3508,7 @@ Event 182 Batch 1 2 4.626866082364760e+02 -3.084610429505738e+02 3.306629079434072e+02 9.794245113140897e+01 3 4.974966719253473e+02 3.582955998671217e+02 1.664640547097976e+02 -3.023523113558579e+02 4 5.398167198381765e+02 -4.983455691654795e+01 -4.971269626532048e+02 2.044098602244489e+02 - ME 5.959042767905828e-05 + ME 1.414799589613471e-05 Event 183 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3516,7 +3516,7 @@ Event 183 Batch 1 2 3.304723045950491e+02 3.244647182058462e+00 3.209425641774955e+02 7.872284845075714e+01 3 4.379804819457451e+02 2.312428523500660e+02 3.131807483468383e+02 2.006775141049615e+02 4 7.315472134592065e+02 -2.344874995321247e+02 -6.341233125243344e+02 -2.794003625557186e+02 - ME 4.899988668912175e-03 + ME 2.330806393221907e-03 Event 184 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3524,7 +3524,7 @@ Event 184 Batch 1 2 7.470051035005908e+02 -4.953964753944513e+02 -4.028924750569613e+02 3.876552725878485e+02 3 2.183325716323390e+02 1.119040172022777e+02 1.451703047217021e+02 -1.186262424448778e+02 4 5.346623248670695e+02 3.834924581921736e+02 2.577221703352594e+02 -2.690290301429710e+02 - ME 5.441344453720516e-04 + ME 7.987999480474686e-05 Event 185 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3532,7 +3532,7 @@ Event 185 Batch 1 2 4.448583927494090e+02 2.810173563272025e+02 -3.384637477435971e+02 6.610995769032235e+01 3 6.236443795626774e+02 -1.690803760724666e+02 5.125139620028374e+02 3.125277225134823e+02 4 4.314972276879136e+02 -1.119369802547359e+02 -1.740502142592404e+02 -3.786376802038046e+02 - ME 6.949230823829164e-03 + ME 1.405605442011058e-04 Event 186 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3540,7 +3540,7 @@ Event 186 Batch 1 2 6.802792190696962e+02 -1.681815241656754e+02 5.427923640013703e+02 3.739936368565512e+02 3 6.331554869749547e+02 3.172201723440435e+02 -4.588808692389625e+02 -2.994755095011972e+02 4 1.865652939553488e+02 -1.490386481783679e+02 -8.391149476240778e+01 -7.451812735535422e+01 - ME 3.276943053321406e-04 + ME 3.045129627255903e-05 Event 187 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3548,7 +3548,7 @@ Event 187 Batch 1 2 7.472897115267965e+02 -6.988402471604775e+02 -2.391684329048669e+02 1.134137672609268e+02 3 6.826908170748527e+02 6.328852277257668e+02 2.212839847556716e+02 -1.286718241709738e+02 4 7.001947139835140e+01 6.595501943471052e+01 1.788444814919547e+01 1.525805691004725e+01 - ME 1.461490870437387e-04 + ME 3.485925693242860e-05 Event 188 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3556,7 +3556,7 @@ Event 188 Batch 1 2 6.496068877140275e+02 -5.024316730938291e+02 -3.980061777252906e+02 -1.055585379310702e+02 3 4.885976180718368e+02 4.424928723138696e+02 1.459942636040002e+02 -1.470148473169288e+02 4 3.617954942141354e+02 5.993880077995960e+01 2.520119141212904e+02 2.525733852479991e+02 - ME 2.843805826594158e-05 + ME 1.006519408431335e-05 Event 189 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3564,7 +3564,7 @@ Event 189 Batch 1 2 4.082379946778654e+02 2.679237131173331e+02 -7.718184435750955e+01 2.981913934867987e+02 3 5.864211573889181e+02 -5.780822197382728e+02 -6.394893886953379e+01 7.497502433004084e+01 4 5.053408479332167e+02 3.101585066209396e+02 1.411307832270433e+02 -3.731664178168398e+02 - ME 1.937644878671120e-03 + ME 1.322787627040098e-04 Event 190 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3572,7 +3572,7 @@ Event 190 Batch 1 2 6.472516823166364e+02 6.463779961822676e+02 -3.289365889632791e+01 6.945035458816692e+00 3 4.318767277050750e+02 -3.286790725415815e+02 -7.183748821760624e+00 -2.800642229191639e+02 4 4.208715899782885e+02 -3.176989236406859e+02 4.007740771808847e+01 2.731191874603472e+02 - ME 3.409584379294133e-05 + ME 1.272332211942340e-05 Event 191 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3580,7 +3580,7 @@ Event 191 Batch 1 2 6.757500036387052e+02 6.222744522021635e+02 -2.261571472854044e+02 1.351499844096745e+02 3 3.644673602666567e+02 -2.020102809038697e+02 1.114149692296405e+02 -2.821613151026251e+02 4 4.597826360946380e+02 -4.202641712982938e+02 1.147421780557637e+02 1.470113306929507e+02 - ME 5.389305783035389e-05 + ME 1.560703181590231e-05 Event 192 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3588,7 +3588,7 @@ Event 192 Batch 1 2 7.394562478491531e+02 -7.307873850878615e+02 3.988568028534699e+01 1.056147375500683e+02 3 8.098058518630978e+01 5.419286926826393e+01 4.244928426361276e+00 -6.002473390399248e+01 4 6.795631669645365e+02 6.765945158195976e+02 -4.413060871170821e+01 -4.559000364607596e+01 - ME 4.204295748489254e-04 + ME 1.231033846344155e-04 Event 193 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3596,7 +3596,7 @@ Event 193 Batch 1 2 5.607395612273153e+02 -3.164229781907934e+02 -3.517992386171808e+02 -3.009030576558548e+02 3 3.741643617741927e+02 -2.156271676189966e+02 1.666697084176705e+02 2.563690747778811e+02 4 5.650960769984922e+02 5.320501458097899e+02 1.851295301995104e+02 4.453398287797368e+01 - ME 9.141090879934244e-05 + ME 3.026844143728605e-05 Event 194 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3604,7 +3604,7 @@ Event 194 Batch 1 2 5.729373416862012e+02 -2.155045544874616e+02 -1.679805246197324e+02 5.035846779262559e+02 3 2.831035485618876e+02 -2.543279085173982e+02 1.042261812492671e+02 -6.783684323208054e+01 4 6.439591097519118e+02 4.698324630048598e+02 6.375434337046515e+01 -4.357478346941756e+02 - ME 1.781231321893996e-03 + ME 5.497724763810379e-04 Event 195 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3612,7 +3612,7 @@ Event 195 Batch 1 2 5.572874060171201e+02 -5.433144409127298e+02 3.646295232533866e+01 1.185290019729285e+02 3 6.765845568040619e+02 5.574999049241243e+02 -1.212989803269169e+01 -3.831623469093195e+02 4 2.661280371788181e+02 -1.418546401139455e+01 -2.433305429264712e+01 2.646333449363910e+02 - ME 3.395618115588225e-04 + ME 3.378534889977447e-04 Event 196 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3620,7 +3620,7 @@ Event 196 Batch 1 2 5.405888343305829e+02 3.940239871950471e+02 -8.826690628749978e+01 -3.594305754554688e+02 3 6.983754392688073e+02 -3.888370902622853e+02 -5.513072771506098e+01 5.774898910559966e+02 4 2.610357264006097e+02 -5.186896932761887e+00 1.433976340025607e+02 -2.180593156005277e+02 - ME 5.539073969003598e-03 + ME 2.676929502290073e-04 Event 197 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3628,7 +3628,7 @@ Event 197 Batch 1 2 2.783346334111661e+02 2.282410890438732e+02 -1.474467226896361e+02 6.029624695020830e+01 3 6.434654504578666e+02 1.172104173128919e+01 6.205939438823057e+02 1.696277097949658e+02 4 5.781999161309674e+02 -2.399621307751624e+02 -4.731472211926695e+02 -2.299239567451741e+02 - ME 3.321087064690878e-04 + ME 4.280180350752636e-05 Event 198 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3636,7 +3636,7 @@ Event 198 Batch 1 2 4.349536439683943e+02 1.774777254208009e+02 -9.709992209949135e+01 3.850427697141142e+02 3 4.134500153047116e+02 7.095914770071803e+01 -4.041194890923881e+02 -5.092301099466194e+01 4 6.515963407268921e+02 -2.484368731215197e+02 5.012194111918782e+02 -3.341197587194521e+02 - ME 7.849443582399766e-04 + ME 2.926862112764983e-04 Event 199 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3644,7 +3644,7 @@ Event 199 Batch 1 2 6.682109290882580e+02 2.136897997740939e+02 -5.035763266519416e+02 3.837361052354048e+02 3 1.424120473397155e+02 8.952788458880865e+01 -4.686863299276860e+01 -1.003458038481504e+02 4 6.893770235720265e+02 -3.032176843629025e+02 5.504449596447103e+02 -2.833903013872543e+02 - ME 1.167594898598604e-03 + ME 4.183851150998592e-04 Event 200 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3652,7 +3652,7 @@ Event 200 Batch 1 2 5.959952693237885e+02 -4.878566955018547e+02 -2.510837703973929e+01 -3.414319479966339e+02 3 4.479637599869168e+02 4.499951041477978e+01 7.146287716862105e+01 4.399313940955211e+02 4 4.560409706892941e+02 4.428571850870749e+02 -4.635450012888173e+01 -9.849944609888662e+01 - ME 5.545496796633981e-04 + ME 3.228844805909175e-04 Event 201 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3660,7 +3660,7 @@ Event 201 Batch 1 2 5.203096708642927e+02 -1.112696379946441e+02 1.367824427202020e+02 4.895219960522141e+02 3 2.871951825199399e+02 -2.582762312778227e+02 1.200876310962787e+02 3.678888524092984e+01 4 6.924951466157675e+02 3.695458692724667e+02 -2.568700738164807e+02 -5.263108812931440e+02 - ME 6.577575910850049e-03 + ME 2.285182473348715e-03 Event 202 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3668,7 +3668,7 @@ Event 202 Batch 1 2 2.158792376054218e+02 2.112389782008981e+01 -7.195062193526132e+01 -2.024369881546198e+02 3 5.463652944256570e+02 2.787950008966254e+02 -3.108926376755554e+02 -3.523267663221479e+02 4 7.377554679689213e+02 -2.999188987167153e+02 3.828432596108168e+02 5.547637544767679e+02 - ME 8.695282964050810e-03 + ME 1.952686275320307e-03 Event 203 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3676,7 +3676,7 @@ Event 203 Batch 1 2 7.124273471334275e+02 4.879265047129839e+02 -1.059167473143779e+02 -5.081949365946950e+02 3 6.746108110440506e+02 -5.248642991835990e+02 4.352799102536777e+01 4.215714978711400e+02 4 1.129618418225217e+02 3.693779447061509e+01 6.238875628901040e+01 8.662343872355494e+01 - ME 5.361938367485652e-05 + ME 4.211918129012132e-05 Event 204 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3684,7 +3684,7 @@ Event 204 Batch 1 2 7.084787759842808e+02 4.992472551829619e+02 -4.528122431715626e+02 -2.183012291454193e+02 3 1.034373169902747e+02 -8.959882065299325e+01 -3.938861547415055e+01 -3.346441176487074e+01 4 6.880839070254444e+02 -4.096484345299685e+02 4.922008586457131e+02 2.517656409102901e+02 - ME 2.988048706021647e-04 + ME 1.033102023766027e-04 Event 205 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3692,7 +3692,7 @@ Event 205 Batch 1 2 6.496569846879349e+02 -5.869603795046561e+02 -2.345911576090251e+02 1.499956646614410e+02 3 2.543878192344406e+02 -1.851019090219859e+00 2.474675926596849e+02 -5.890268997594536e+01 4 5.959551960776247e+02 5.888113985948760e+02 -1.287643505065981e+01 -9.109297468549572e+01 - ME 1.871447246980874e-04 + ME 4.134215827558992e-05 Event 206 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3700,7 +3700,7 @@ Event 206 Batch 1 2 6.172060642836410e+02 2.978040691523503e+02 4.166709400833434e+02 3.444435946201744e+02 3 7.205754982426181e+02 -2.468045809177361e+02 -5.690387091428452e+02 -3.667580878490107e+02 4 1.622184374737409e+02 -5.099948823461420e+01 1.523677690595017e+02 2.231449322883641e+01 - ME 7.356489425273393e-05 + ME 1.138691716042452e-05 Event 207 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3708,7 +3708,7 @@ Event 207 Batch 1 2 5.250113096394139e+02 -1.091977068802181e+02 -4.322753509449321e+02 2.772196909074646e+02 3 5.240251005653129e+02 3.541948269240045e+02 3.738549241960732e+02 9.685466564450643e+01 4 4.509635897952731e+02 -2.449971200437864e+02 5.842042674885889e+01 -3.740743565519710e+02 - ME 3.378615964480245e-03 + ME 9.518274156960593e-05 Event 208 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3716,7 +3716,7 @@ Event 208 Batch 1 2 4.449444343820048e+02 1.928662436733418e+02 -3.595193210859464e+02 1.775500478872298e+02 3 4.894053462810564e+02 -2.195789585225567e+02 2.295326432211599e+02 3.723136307450180e+02 4 5.656502193369389e+02 2.671271484921488e+01 1.299866778647865e+02 -5.498636786322478e+02 - ME 2.068943926258950e-01 + ME 2.179806976662403e-03 Event 209 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3724,7 +3724,7 @@ Event 209 Batch 1 2 4.949423498078044e+02 -2.830370809537592e+02 -1.684680620467476e+02 -3.694271951395289e+02 3 6.326444171345161e+02 3.898538983719823e+02 -1.748162179498052e+02 4.665749526039372e+02 4 3.724132330576786e+02 -1.068168174182231e+02 3.432842799965525e+02 -9.714775746440780e+01 - ME 1.473942246791387e-04 + ME 3.638076645868775e-05 Event 210 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3732,7 +3732,7 @@ Event 210 Batch 1 2 5.469464199121014e+02 -4.947084169679945e+02 2.319240083666633e+02 -2.500445517953792e+01 3 2.929141603572806e+02 -5.602902696925145e+01 2.099470855189298e+01 2.867379913571110e+02 4 6.601394197306178e+02 5.507374439372461e+02 -2.529187169185561e+02 -2.617335361775729e+02 - ME 1.577330101330874e-03 + ME 7.792286450853471e-04 Event 211 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3740,7 +3740,7 @@ Event 211 Batch 1 2 5.484404249965427e+02 1.659778109685243e+01 3.514591842057613e+02 -4.206992456262192e+02 3 4.635537606517395e+02 -3.607884938122542e+02 -3.140996451540818e+01 2.893564685231623e+02 4 4.880058143517181e+02 3.441907127154018e+02 -3.200492196903532e+02 1.313427771030569e+02 - ME 4.999214184618137e-05 + ME 1.717788621912363e-05 Event 212 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3748,7 +3748,7 @@ Event 212 Batch 1 2 6.930853388432640e+02 -3.424793196872474e+02 -8.152110066892747e+01 5.970171795281683e+02 3 9.131624224772825e+01 6.738328155058525e+01 1.365968298972706e+01 6.009627714210347e+01 4 7.155984189090078e+02 2.750960381366621e+02 6.786141767920034e+01 -6.571134566702718e+02 - ME 3.224436999651524e-01 + ME 4.440767413899675e-02 Event 213 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3756,7 +3756,7 @@ Event 213 Batch 1 2 7.316448870278512e+02 4.203233031264803e+02 4.913598772661251e+02 -3.423419819067778e+02 3 4.750162603483208e+02 -1.726357548525294e+02 -3.708603862154638e+02 2.414537588813190e+02 4 2.933388526238279e+02 -2.476875482739507e+02 -1.204994910506614e+02 1.008882230254589e+02 - ME 4.008080891216109e-05 + ME 1.166473784051930e-05 Event 214 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3764,7 +3764,7 @@ Event 214 Batch 1 2 4.805779599533694e+02 3.904513572450257e+02 -1.742898429406511e+02 2.193763065287195e+02 3 6.164938851206517e+02 -5.563771061772993e+02 2.227142270499353e+02 1.445946028815716e+02 4 4.029281549259790e+02 1.659257489322735e+02 -4.842438410928419e+01 -3.639709094102910e+02 - ME 1.130096726278085e-02 + ME 1.644694060635318e-04 Event 215 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3772,7 +3772,7 @@ Event 215 Batch 1 2 4.610896439725640e+02 -3.106576460930037e+02 -3.050258363865880e+02 -1.518378274323046e+02 3 7.153470686812809e+02 2.726436938726979e+02 6.046054769368644e+02 2.680280994976061e+02 4 3.235632873461531e+02 3.801395222030658e+01 -2.995796405502758e+02 -1.161902720653026e+02 - ME 2.130646114222361e-04 + ME 1.638803663744001e-05 Event 216 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3780,7 +3780,7 @@ Event 216 Batch 1 2 5.309452696424389e+02 -4.912950836090372e+02 -3.608909251460832e+01 -1.980646298023531e+02 3 6.627369363365399e+02 4.479096066616000e+02 2.308759280187052e+02 4.304573578259469e+02 4 3.063177940210212e+02 4.338547694743724e+01 -1.947868355040969e+02 -2.323927280235938e+02 - ME 1.881406502208647e-03 + ME 7.684209531203918e-05 Event 217 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3788,7 +3788,7 @@ Event 217 Batch 1 2 4.608032244164870e+02 2.215832851737383e+02 3.318832460795877e+02 -2.304212888079594e+02 3 3.107022283044695e+02 -4.724697178681157e+01 2.830528592337836e+02 -1.190994425256424e+02 4 7.284945472790432e+02 -1.743363133869267e+02 -6.149361053133712e+02 3.495207313336019e+02 - ME 2.894775763457067e-03 + ME 4.426756984161849e-04 Event 218 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3796,7 +3796,7 @@ Event 218 Batch 1 2 6.336891602166270e+02 5.249943224110900e+02 1.648031440577737e+02 -3.142973702098814e+02 3 5.195346944320743e+02 -3.655895580768890e+02 -3.610279413409480e+02 7.693763263116504e+01 4 3.467761453512956e+02 -1.594047643342018e+02 1.962247972831736e+02 2.373597375787177e+02 - ME 2.703962034458943e-05 + ME 8.957256945094420e-06 Event 219 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3804,7 +3804,7 @@ Event 219 Batch 1 2 2.579228498517417e+02 -4.166553381892272e+01 1.191899344508913e+02 2.249042891828000e+02 3 7.453266221408651e+02 -3.354388163550532e+01 -3.947818065141064e+02 -6.312954196904914e+02 4 4.967505280073930e+02 7.520941545442813e+01 2.755918720632151e+02 4.063911305076915e+02 - ME 6.103184694489295e-05 + ME 4.019449398167179e-05 Event 220 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3812,7 +3812,7 @@ Event 220 Batch 1 2 4.940336288355577e+02 -2.383755021420815e+02 -2.918661661143953e+02 3.194690712363630e+02 3 7.129224521449780e+02 2.727447507998269e+02 2.535039959962389e+02 -6.079510240944473e+02 4 2.930439190194635e+02 -3.436924865774512e+01 3.836217011815621e+01 2.884819528580837e+02 - ME 1.761519882509421e-04 + ME 1.677977866215262e-04 Event 221 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3820,7 +3820,7 @@ Event 221 Batch 1 2 3.305414381337777e+02 -2.712796684963201e+02 -1.199910663213094e+02 -1.458325333632650e+02 3 7.388441803280767e+02 5.510455284380058e+02 4.375213740715825e+02 2.254209298704556e+02 4 4.306143815381457e+02 -2.797658599416856e+02 -3.175303077502730e+02 -7.958839650719051e+01 - ME 1.338118621913618e-04 + ME 1.392897982206581e-05 Event 222 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3828,7 +3828,7 @@ Event 222 Batch 1 2 4.657562074797755e+02 2.823280548971349e+02 2.956503281023745e+02 2.231828795335844e+02 3 4.791948192186352e+02 -3.228825926298714e+02 2.575611801233854e+02 -2.429747818931873e+02 4 5.550489733015891e+02 4.055453773273638e+01 -5.532115082257600e+02 1.979190235960287e+01 - ME 9.040551632672907e-05 + ME 2.328731171682892e-05 Event 223 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3836,7 +3836,7 @@ Event 223 Batch 1 2 1.612164685986321e+02 -4.527922182271191e+01 -1.095260585492910e+01 1.543391792239740e+02 3 6.984218503485876e+02 -4.629950983513680e+02 2.605715575888556e+02 -4.533553609726805e+02 4 6.403616810527805e+02 5.082743201740799e+02 -2.496189517339264e+02 2.990161817487066e+02 - ME 4.148580235863498e-04 + ME 2.446487784841432e-04 Event 224 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3844,7 +3844,7 @@ Event 224 Batch 1 2 1.663853414671972e+02 -1.350882138037309e+02 9.706071747767010e+01 3.804401292344658e+00 3 6.436745581417563e+02 -4.469273298203079e+02 -4.412749113764766e+02 -1.408877256838118e+02 4 6.899401003910457e+02 5.820155436240389e+02 3.442141938988058e+02 1.370833243914657e+02 - ME 3.449215697364171e-04 + ME 9.431632941984795e-05 Event 225 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3852,7 +3852,7 @@ Event 225 Batch 1 2 6.702356777533546e+02 6.117158080352369e+02 -2.649249521350114e+02 -6.952987609335720e+01 3 6.901224376513153e+02 -6.564819557015361e+02 1.560869289536550e+02 1.446972404640001e+02 4 1.396418845953297e+02 4.476614766629927e+01 1.088380231813564e+02 -7.516736437064299e+01 - ME 6.407468428023662e-04 + ME 2.456039108263569e-05 Event 226 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3860,7 +3860,7 @@ Event 226 Batch 1 2 7.307777643673112e+02 -4.569648094661606e+02 4.416236342013199e+02 -3.608155616351098e+02 3 1.446420186345137e+02 4.133161435221925e+01 -3.411742569426914e+01 1.343466131828505e+02 4 6.245802169981752e+02 4.156331951139413e+02 -4.075062085070508e+02 2.264689484522593e+02 - ME 4.858390443010437e-04 + ME 2.774761612267077e-04 Event 227 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3868,7 +3868,7 @@ Event 227 Batch 1 2 7.408615397889290e+02 -4.398089081634772e+02 -5.325812259979131e+02 2.679574278743413e+02 3 4.035753807128123e+02 3.000971513323747e+02 2.468113220276344e+02 -1.090823496201683e+02 4 3.555630794982585e+02 1.397117568311025e+02 2.857699039702786e+02 -1.588750782541728e+02 - ME 3.215647103618368e-04 + ME 3.077346064218035e-05 Event 228 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3876,7 +3876,7 @@ Event 228 Batch 1 2 5.775455372723294e+02 -3.656199842755111e+02 -6.289501053880601e+01 4.426342647953073e+02 3 3.247306314578497e+02 8.776645762339835e+01 3.116872137482897e+02 2.445634292125525e+01 4 5.977238312698206e+02 2.778535266521127e+02 -2.487922032094836e+02 -4.670906077165625e+02 - ME 3.156934429573604e-03 + ME 3.399241079583280e-04 Event 229 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3884,7 +3884,7 @@ Event 229 Batch 1 2 3.665477125629453e+02 -2.081014917770363e+02 2.317985113364040e+02 -1.931850016112187e+02 3 6.187040836990479e+02 -2.134593092471877e+02 -3.484367286517815e+02 4.645661552545953e+02 4 5.147482037380067e+02 4.215608010242241e+02 1.166382173153775e+02 -2.713811536433765e+02 - ME 4.392210547845218e-04 + ME 8.330968691049859e-05 Event 230 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3892,7 +3892,7 @@ Event 230 Batch 1 2 5.913978529013565e+02 -4.986092821675885e+02 -3.028328044703767e+02 9.712104143419764e+01 3 3.439186614041002e+02 -6.573524045766426e+01 3.216488491089061e+02 -1.024741025375549e+02 4 5.646834856945436e+02 5.643445226252528e+02 -1.881604463852933e+01 5.353061103357447e+00 - ME 1.067159092411647e-04 + ME 2.296146042402505e-05 Event 231 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3900,7 +3900,7 @@ Event 231 Batch 1 2 5.760768557894827e+02 -7.075794524290799e+01 5.609870884449791e+02 1.102331327656218e+02 3 6.038619762337338e+02 -2.467027894308989e+02 -5.464177649873398e+02 -7.221250677108812e+01 4 3.200611679767834e+02 3.174607346738069e+02 -1.456932345763944e+01 -3.802062599453370e+01 - ME 8.750887998909065e-05 + ME 9.438631267217403e-06 Event 232 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3908,7 +3908,7 @@ Event 232 Batch 1 2 7.230187249684843e+02 -2.426041066061352e+02 1.884455685697195e+02 -6.545132479937492e+02 3 4.821326920133732e+02 2.438648429837413e+02 -1.563760752388986e+01 4.156168142598493e+02 4 2.948485830181424e+02 -1.260736377606032e+00 -1.728079610458298e+02 2.388964337338999e+02 - ME 4.549716999825542e-05 + ME 3.745272037455064e-05 Event 233 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3916,7 +3916,7 @@ Event 233 Batch 1 2 3.540260977608100e+02 -1.904526694678991e+02 -1.042089619355360e+02 -2.796475475319170e+02 3 4.925592302096041e+02 1.195034224421750e+02 3.554637678715695e+02 -3.193415679485398e+02 4 6.534146720295859e+02 7.094924702572415e+01 -2.512548059360335e+02 5.989891154804569e+02 - ME 2.494643034161164e-04 + ME 1.035644942794080e-04 Event 234 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3924,7 +3924,7 @@ Event 234 Batch 1 2 1.866526101194276e+02 7.776953530733704e+01 -1.047503781897390e+01 1.693557493124073e+02 3 6.012752698516817e+02 5.974840035795012e+02 -4.570329760029643e+01 4.955829083294186e+01 4 7.120721200288899e+02 -6.752535388868379e+02 5.617833541927040e+01 -2.189140401453492e+02 - ME 2.154454342135980e-03 + ME 6.655948749153013e-04 Event 235 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3932,7 +3932,7 @@ Event 235 Batch 1 2 5.032945404607945e+02 1.612889276925247e+02 2.561838854094329e+02 -4.020710050699558e+02 3 7.153634726767370e+02 -3.739069589148947e+02 -1.979140468542061e+02 5.768609140624169e+02 4 2.813419868624690e+02 2.126180312223700e+02 -5.826983855522722e+01 -1.747899089924609e+02 - ME 8.184939555880423e-04 + ME 1.137471703441233e-04 Event 236 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3940,7 +3940,7 @@ Event 236 Batch 1 2 6.980797829886610e+02 -9.803971882836288e+00 4.740144261428889e+02 5.123764137440797e+02 3 5.519387921056282e+02 -1.638876688381594e+02 -3.209728652821290e+02 -4.180355032606608e+02 4 2.499814249057108e+02 1.736916407209956e+02 -1.530415608607599e+02 -9.434091048341891e+01 - ME 2.813360227943072e-04 + ME 5.842524801707843e-05 Event 237 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3948,7 +3948,7 @@ Event 237 Batch 1 2 1.604490925133743e+02 6.212857081252698e+01 9.075394990141041e+01 1.168232534834160e+02 3 6.578242662283152e+02 5.348507070161563e+02 -3.810396531957998e+02 3.842224792439630e+01 4 6.817266412583107e+02 -5.969792778286832e+02 2.902857032943894e+02 -1.552455014078122e+02 - ME 8.205069948818567e-04 + ME 1.834055676127939e-04 Event 238 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3956,7 +3956,7 @@ Event 238 Batch 1 2 2.789018340499539e+02 1.069933592962543e+02 -2.572713415352736e+02 1.225197647611563e+01 3 4.761759619803052e+02 7.755191627191856e+01 -4.591043622469822e+02 -9.976187456245104e+01 4 7.449222039697408e+02 -1.845452755681728e+02 7.163757037822556e+02 8.750989808633538e+01 - ME 4.130258343824905e-02 + ME 9.445005309896021e-03 Event 239 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3964,7 +3964,7 @@ Event 239 Batch 1 2 4.581461811054764e+02 -3.899520773556200e+02 2.006122777919944e+02 1.326273524830990e+02 3 3.013476461129690e+02 -2.996604136348060e+02 3.145663680794619e+01 4.951799549362093e+00 4 7.405061727815548e+02 6.896124909904260e+02 -2.320689145999406e+02 -1.375791520324611e+02 - ME 1.351152256907066e-02 + ME 4.970363634614722e-03 Event 240 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3972,7 +3972,7 @@ Event 240 Batch 1 2 5.932490652975304e+02 -4.094504138983958e+01 -3.300190662632461e+02 4.912793227530680e+02 3 3.147487537014150e+02 3.081803657249563e+02 4.097350029662016e+01 -4.912038692507519e+01 4 5.920021810010543e+02 -2.672353243351168e+02 2.890455659666260e+02 -4.421589358279927e+02 - ME 2.300291351402201e-03 + ME 3.420638167820422e-04 Event 241 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3980,7 +3980,7 @@ Event 241 Batch 1 2 4.438703186026563e+01 1.425431959717181e+01 -4.430288595443099e+00 -4.180186016371768e+01 3 7.139617398095604e+02 -8.415544716076485e+01 -5.657765076565163e+02 -4.272659242311072e+02 4 7.416512283301737e+02 6.990112756359306e+01 5.702067962519594e+02 4.690677843948249e+02 - ME 9.657825758456334e-03 + ME 9.983667466725972e-03 Event 242 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3988,7 +3988,7 @@ Event 242 Batch 1 2 3.798759956195423e+02 -1.259218082844715e+02 -3.429343473884153e+02 1.041417477651927e+02 3 6.208895880511435e+02 5.354328139337265e+02 1.248673426784089e+02 -2.884852319370315e+02 4 4.992344163293142e+02 -4.095110056492549e+02 2.180670047100064e+02 1.843434841718389e+02 - ME 4.523810239016752e-05 + ME 1.030886114253601e-05 Event 243 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3996,7 +3996,7 @@ Event 243 Batch 1 2 2.320641800899440e+02 1.658639294991472e+02 7.783463994856535e+01 1.424243988788334e+02 3 6.251485586341132e+02 -2.328139095298017e+02 -4.262931976140131e+02 3.935511574875350e+02 4 6.427872612759426e+02 6.694998003065477e+01 3.484585576654476e+02 -5.359755563663684e+02 - ME 1.068434238404496e-02 + ME 8.493072129055412e-04 Event 244 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4004,7 +4004,7 @@ Event 244 Batch 1 2 6.609991843787810e+02 -2.293678857540617e+02 -4.971623496474938e+02 -3.703240376037023e+02 3 1.091403980947070e+02 1.154537470975927e+01 -9.115666825632124e+00 -1.081445118228680e+02 4 7.298604175265119e+02 2.178225110443025e+02 5.062780164731259e+02 4.784685494265703e+02 - ME 2.129811247265830e-03 + ME 9.635755455313371e-04 Event 245 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4012,7 +4012,7 @@ Event 245 Batch 1 2 4.893629130846664e+02 -3.546974954177181e+02 3.112856868655738e+02 -1.294873298810978e+02 3 7.129026631852477e+02 5.703735458058533e+02 -4.257115617679147e+02 -4.091322034012423e+01 4 2.977344237300874e+02 -2.156760503881352e+02 1.144258749023406e+02 1.704005502212233e+02 - ME 2.548352504440589e-05 + ME 5.312368446054512e-06 Event 246 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4020,7 +4020,7 @@ Event 246 Batch 1 2 3.999457395350199e+02 9.605025124341067e+01 9.072234098128430e+01 3.774922524438975e+02 3 3.675469088581873e+02 -1.615841482674670e+01 2.570183669846762e+02 2.622426259669196e+02 4 7.325073516067924e+02 -7.989183641666393e+01 -3.477407079659604e+02 -6.397348784108170e+02 - ME 1.294421983622042e-01 + ME 5.023802198964801e-02 Event 247 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4028,7 +4028,7 @@ Event 247 Batch 1 2 6.711864521923226e+02 3.763073240556692e+02 5.338170415278108e+02 1.546719678644905e+02 3 5.231557804938882e+02 -1.057595517177888e+02 -5.121603131388773e+02 -1.409615302513522e+01 4 3.056577673137891e+02 -2.705477723378804e+02 -2.165672838893370e+01 -1.405758148393554e+02 - ME 2.873345328272106e-04 + ME 1.980507958825256e-05 Event 248 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4036,7 +4036,7 @@ Event 248 Batch 1 2 6.307803946875938e+02 -6.240065811552291e+01 -3.654556314590158e+02 5.103256270499047e+02 3 3.935347424219227e+02 -2.188782290807617e+02 2.916853933646314e+01 -3.257470040392325e+02 4 4.756848628904837e+02 2.812788871962847e+02 3.362870921225527e+02 -1.845786230106721e+02 - ME 2.418190194667681e-04 + ME 8.712398839363553e-05 Event 249 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4044,7 +4044,7 @@ Event 249 Batch 1 2 4.326970760901858e+02 -4.070406664121577e+02 -1.467447404863359e+02 3.261392852829594e+00 3 4.839435229991528e+02 2.335311811831339e+01 2.018595963184923e+02 -4.392136936630267e+02 4 5.833594009106607e+02 3.836875482938447e+02 -5.511485583215654e+01 4.359523008101972e+02 - ME 8.354140201035124e-05 + ME 2.487145538635957e-05 Event 250 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4052,7 +4052,7 @@ Event 250 Batch 1 2 7.010671671345858e+02 -6.122994886156980e+02 -2.473946684860857e+02 2.353303785738851e+02 3 5.574643785654457e+02 3.902114201641945e+02 2.260985614407801e+02 -3.276904354069721e+02 4 2.414684542999681e+02 2.220880684515034e+02 2.129610704530562e+01 9.236005683308701e+01 - ME 4.704118057291807e-05 + ME 1.645582299148298e-05 Event 251 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4060,7 +4060,7 @@ Event 251 Batch 1 2 7.364006127103795e+02 5.379960890463808e+02 4.302640987755426e+02 2.602285070392761e+02 3 3.051282143252570e+01 -2.901685968644106e+00 1.337962970917706e+01 -2.726899336532026e+01 4 7.330865658570956e+02 -5.350944030777371e+02 -4.436437284847198e+02 -2.329595136739561e+02 - ME 8.340546584740779e-03 + ME 6.389613086136084e-03 Event 252 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4068,7 +4068,7 @@ Event 252 Batch 1 2 5.965625584838610e+02 -7.369842915522101e+01 -5.671364104158780e+02 -1.697401534860145e+02 3 6.549338760881149e+02 -1.514014639568436e+02 6.313240788068730e+02 8.628954906696529e+01 4 2.485035654280235e+02 2.250998931120648e+02 -6.418766839099484e+01 8.345060441904938e+01 - ME 3.985162011735342e-05 + ME 7.225550854378042e-06 Event 253 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4076,7 +4076,7 @@ Event 253 Batch 1 2 5.728678540484714e+02 3.212236187283236e+01 -4.622666283104808e+02 -3.368312580807653e+02 3 7.160302400837320e+02 1.132435775281999e+02 5.206369974620781e+02 4.783433011307397e+02 4 2.111019058677967e+02 -1.453659394010323e+02 -5.837036915159722e+01 -1.415120430499744e+02 - ME 1.248429186447426e-03 + ME 7.499676590470843e-05 Event 254 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4084,7 +4084,7 @@ Event 254 Batch 1 2 5.579357369440610e+02 1.333150067790222e+02 -6.785864805882139e+01 5.375077668373273e+02 3 6.202682598689536e+02 -4.039338689731095e+02 2.012068793592834e+02 -4.255419314189536e+02 4 3.217960031869852e+02 2.706188621940872e+02 -1.333482313004621e+02 -1.119658354183736e+02 - ME 6.088720978226072e-04 + ME 2.226893396847405e-04 Event 255 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4092,5 +4092,5 @@ Event 255 Batch 1 2 7.263612771087843e+02 3.396063850675520e+02 -6.401091575508393e+02 5.028393902637355e+01 3 1.540578578981475e+02 -3.080387127739228e+01 1.060177193258910e+02 -1.074485378375538e+02 4 6.195808649930684e+02 -3.088025137901597e+02 5.340914382249483e+02 5.716459881118030e+01 - ME 1.547064591142216e-04 + ME 4.003666322732326e-05 diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index d13feee76f..64ce042fd4 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -51,16 +51,16 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T; import model heft; generate g g > h save options auto_convert_model -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -127,74 +127,43 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: g g > h HIG<=1 HIW<=1 WEIGHTED<=2 @1 INFO: Process has 1 diagrams -1 processes with 1 diagrams generated in 0.003 s +1 processes with 1 diagrams generated in 0.004 s Total: 1 processes with 1 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_h Load PLUGIN.CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > h HIG<=1 HIW<=1 WEIGHTED<=2 @1 INFO: Processing color information for process: g g > h HIG<=1 HIW<=1 @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 187]  -DEBUG: type(subproc_group)= [output.py at line 188]  -DEBUG: type(fortran_model)= [output.py at line 189]  -DEBUG: type(me)= me=0 [output.py at line 190]  -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: proc_id =  0 [model_handling.py at line 1046]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is not yet defined: this is standalone_cudacpp mode [model_handling.py at line 1302]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  False [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True False [model_handling.py at line 1164]  -DEBUG: multi_channel_map =  None [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_heft_gg_h.txt [model_handling.py at line 1336]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 189]  +DEBUG: type(subproc_group)= [output.py at line 190]  +DEBUG: type(fortran_model)= [output.py at line 191]  +DEBUG: type(me)= me=0 [output.py at line 192]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.054 s +ALOHA: aloha creates 1 routines in 0.060 s VVS3 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h -INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h +INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 59 , keys size = 59 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 5 , keys size = 5 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 1 , keys size = 1 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 5 , keys size = 5 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 1 , keys size = 1 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 1 , keys size = 1 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.cc INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 204]  +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.497s -user 0m0.397s -sys 0m0.051s +real 0m0.428s +user 0m0.366s +sys 0m0.055s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc index e120b9206f..6cc0be1461 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc @@ -245,7 +245,7 @@ namespace mg5amcCpu sxxxxx( momenta, +1, w_fp[2], 2 ); // Amplitude(s) for diagram number 1 - VVS3_0( w_fp[0], w_fp[1], w_fp[2], COUPs[0], &_fp[0] ); + VVS3_0( w_fp[0], w_fp[1], w_fp[2], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -745,13 +745,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: standalone_cudacpp static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h index d35dba2369..a2e9b6a70c 100644 --- a/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h +++ b/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allS3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //========================================================================== @@ -874,6 +875,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allS3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 1134659ef0..e2ec882498 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -51,9 +51,9 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs MG5_aMC> set lhapdf /PATH/TO/lhapdf-config Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0048046112060546875  +DEBUG: model prefixing takes 0.005310535430908203  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.027 s +5 processes with 7 diagrams generated in 0.029 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.125 s +13 processes with 76 diagrams generated in 0.135 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,21 +378,21 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.848 s +65 processes with 1119 diagrams generated in 1.797 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 153]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 155]  INFO: initialize a new directory: CODEGEN_mad_pp_tt012j INFO: remove old information in CODEGEN_mad_pp_tt012j -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 158]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 160]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Processing color information for process: g g > t t~ g g @2 @@ -496,710 +496,328 @@ INFO: Combined process c c~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9], 10: [10], 11: [11], 12: [12], 13: [13], 14: [14], 15: [15], 16: [16], 17: [17], 18: [18], 19: [19], 20: [20], 21: [21], 22: [22], 23: [23], 24: [24], 25: [25], 26: [26], 27: [27], 28: [28], 29: [29], 30: [30], 31: [32], 32: [33], 33: [34], 34: [35], 35: [36], 36: [37], 37: [38], 38: [39], 39: [40], 40: [41], 41: [42], 42: [43], 43: [44], 44: [45], 45: [46], 46: [48], 47: [49], 48: [50], 49: [51], 50: [52], 51: [53], 52: [54], 53: [55], 54: [56], 55: [58], 56: [59], 57: [60], 58: [61], 59: [62], 60: [63], 61: [64], 62: [65], 63: [66], 64: [67], 65: [68], 66: [69], 67: [70], 68: [71], 69: [72], 70: [74], 71: [75], 72: [76], 73: [77], 74: [78], 75: [79], 76: [80], 77: [81], 78: [82], 79: [83], 80: [84], 81: [85], 82: [86], 83: [87], 84: [88], 85: [89], 86: [90], 87: [91], 88: [93], 89: [94], 90: [95], 91: [96], 92: [97], 93: [98], 94: [100], 95: [101], 96: [102], 97: [103], 98: [104], 99: [105], 100: [107], 101: [108], 102: [109], 103: [110], 104: [111], 105: [112]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 10, 14: 11, 15: 12, 16: 13, 17: 14, 18: 15, 19: 16, 20: 17, 21: 18, 22: 19, 23: 20, 24: 21, 25: 22, 26: 23, 27: 24, 28: 25, 29: 26, 30: 27, 31: 28, 32: 29, 33: 30, 37: 31, 38: 32, 39: 33, 40: 34, 41: 35, 42: 36, 43: 37, 44: 38, 45: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 55: 46, 56: 47, 57: 48, 58: 49, 59: 50, 60: 51, 61: 52, 62: 53, 63: 54, 67: 55, 68: 56, 69: 57, 70: 58, 71: 59, 72: 60, 73: 61, 74: 62, 75: 63, 76: 64, 77: 65, 78: 66, 79: 67, 80: 68, 81: 69, 85: 70, 86: 71, 87: 72, 88: 73, 89: 74, 90: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 100: 85, 101: 86, 102: 87, 106: 88, 107: 89, 108: 90, 109: 91, 110: 92, 111: 93, 115: 94, 116: 95, 117: 96, 118: 97, 119: 98, 120: 99, 124: 100, 125: 101, 126: 102, 127: 103, 128: 104, 129: 105} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxgg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxuux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxgu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  2 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxgux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  3 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14], 16: [15], 17: [16], 18: [17], 19: [18], 20: [19], 21: [20], 22: [21], 23: [22], 24: [23], 25: [24], 26: [25], 27: [26], 28: [27], 29: [28], 30: [29], 31: [30], 32: [31], 33: [32], 34: [34], 35: [35]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 37: 34, 38: 35} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 5, 1, 5, 5) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxgg.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  4 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13], 15: [14]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttxg.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  5 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uu_ttxuu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  6 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxuux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  7 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6], 8: [7], 9: [8], 10: [9], 11: [10], 12: [11], 13: [12], 14: [13]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxux_ttxuxux.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  8 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uc_ttxuc.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  9 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxccx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  10 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_ucx_ttxucx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  11 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5, 6, 7] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4], 6: [5], 7: [6]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uxcx_ttxuxcx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  12 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gu_ttxu.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  13 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  14 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3, 4, 5] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3, 4, 5] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2], 4: [3], 5: [4]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 4, 1, 4, 4) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttxg.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  15 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1, 2, 3] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1, 2, 3] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0], 2: [1], 3: [2]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1710]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 0, -1, 0, 0) [model_handling.py at line 1823]  -DEBUG: call =  vxxxxx( momenta,m_pars->%s, cHel[ihel][%d],%+d, w_sv[%d], %d ); [model_handling.py at line 1822]  -DEBUG: ('ZERO', 1, -1, 1, 1) [model_handling.py at line 1823]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Copying test reference file: ', template_ref =  Copying test reference file: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_gg_ttx.txt [model_handling.py at line 1336]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  16 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx -DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1039]  -DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1040]  -DEBUG: proc_id =  1 [model_handling.py at line 1046]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6180]  +DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . -DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1298]  -DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1300]  FileWriter for ././CPPProcess.h -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_h_file [model_handling.py at line 1454]  FileWriter for ././CPPProcess.cc -DEBUG: Entering PLUGIN_OneProcessExporter.write_process_cc_file [model_handling.py at line 1476]  -DEBUG: Entering PLUGIN_OneProcessExporter.get_sigmaKin_lines [model_handling.py at line 1144]  -DEBUG: self.include_multi_channel =  [1] [model_handling.py at line 1145]  -DEBUG: self.support_multichannel =  True [model_handling.py at line 1146]  -DEBUG: type(self.helas_call_writer) =  [model_handling.py at line 1163]  -DEBUG: self.support_multichannel, self.include_multi_channel =  True [1] [model_handling.py at line 1164]  -DEBUG: multi_channel =  {1: [0]} [model_handling.py at line 1170]  -DEBUG: multi_channel_map =  {1: [0]} [model_handling.py at line 1655]  -DEBUG: diag_to_config =  {1: 1} [model_handling.py at line 1710]  INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. -DEBUG: Entering PLUGIN_OneProcessExporter.edit_CMakeLists [model_handling.py at line 1344]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_check_sa [model_handling.py at line 1353]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_mgonGPU [model_handling.py at line 1370]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_processidfile [model_handling.py at line 1390]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_coloramps [model_handling.py at line 1402]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_testxxx [model_handling.py at line 1420]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memorybuffers [model_handling.py at line 1431]  -DEBUG: Entering PLUGIN_OneProcessExporter.edit_memoryaccesscouplings [model_handling.py at line 1442]  -DEBUG: 'Test reference file does not exist and will not be copied: ', template_ref =  Test reference file does not exist and will not be copied: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/madgraph/iolibs/template_files/../../../test/ref/dump_CPUTest.Sigma_sm_uux_ttx.txt [model_handling.py at line 1339]  DEBUG: proc_id =  1 [export_cpp.py at line 710]  DEBUG: config_map =  [1] [export_cpp.py at line 711]  DEBUG: subproc_number =  17 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.145 s -Wrote files for 810 helas calls in 3.136 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.287 s +Wrote files for 810 helas calls in 3.227 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.287 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 195]  +ALOHA: aloha creates 5 routines in 0.329 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.671 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -1212,27 +830,22 @@ ALOHA: aloha creates 10 routines in 0.671 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 48 , keys size = 48 [model_handling.py at line 729]  super_write_set_parameters_onlyfixMajorana (hardcoded=True) -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -DEBUG: 'pardef_lines size =', len(pardef_lines), ', keys size =', len(pardef_lines.keys()) =  pardef_lines size = 3 , keys size = 3 [model_handling.py at line 729]  -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 204]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) ************************************************************ * * * W E L C O M E to * @@ -1253,14 +866,15 @@ INFO: Generate web pages * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP quit INFO: launch in debug mode @@ -1284,31 +898,33 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -No valid eps viewer found. Please set in ./input/mg5_configuration.txt -Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file Source/make_opts patching file bin/internal/banner.py +Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1316,21 +932,18 @@ Hunk #2 succeeded at 146 (offset 3 lines). Hunk #3 succeeded at 224 (offset 3 lines). Hunk #4 succeeded at 252 (offset 3 lines). Hunk #5 succeeded at 297 (offset 3 lines). -Hunk #6 succeeded at 402 (offset 6 lines). -Hunk #7 succeeded at 466 (offset -4 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -Hunk #6 succeeded at 434 (offset 38 lines). -Hunk #7 succeeded at 588 (offset 118 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 513 (offset 44 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1338,11 +951,9 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -Hunk #6 succeeded at 428 (offset 32 lines). -Hunk #7 succeeded at 518 (offset 48 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 517 (offset 48 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1350,11 +961,9 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -Hunk #6 succeeded at 428 (offset 32 lines). -Hunk #7 succeeded at 518 (offset 48 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1362,21 +971,18 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -Hunk #6 succeeded at 428 (offset 32 lines). -Hunk #7 succeeded at 518 (offset 48 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 191 (offset 48 lines). Hunk #3 succeeded at 269 (offset 48 lines). Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). -Hunk #6 succeeded at 830 (offset 434 lines). -Hunk #7 succeeded at 1717 (offset 1247 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 502 (offset 33 lines). +Hunk #1 succeeded at 516 (offset 47 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1384,11 +990,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 540 (offset 144 lines). -Hunk #7 succeeded at 813 (offset 343 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 513 (offset 44 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1396,11 +1000,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 540 (offset 144 lines). -Hunk #7 succeeded at 815 (offset 345 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 517 (offset 48 lines). +Hunk #1 succeeded at 527 (offset 58 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1408,11 +1010,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 538 (offset 142 lines). -Hunk #7 succeeded at 812 (offset 342 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 540 (offset 71 lines). +Hunk #1 succeeded at 554 (offset 85 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 77 (offset 5 lines). @@ -1420,11 +1020,9 @@ Hunk #2 succeeded at 196 (offset 53 lines). Hunk #3 succeeded at 274 (offset 53 lines). Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). -Hunk #6 succeeded at 472 (offset 76 lines). -Hunk #7 succeeded at 581 (offset 111 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 616 (offset 147 lines). +Hunk #1 succeeded at 626 (offset 157 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 83 (offset 11 lines). @@ -1432,11 +1030,9 @@ Hunk #2 succeeded at 202 (offset 59 lines). Hunk #3 succeeded at 280 (offset 59 lines). Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). -Hunk #6 succeeded at 484 (offset 88 lines). -Hunk #7 succeeded at 593 (offset 123 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 524 (offset 55 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1444,11 +1040,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 468 (offset 72 lines). -Hunk #7 succeeded at 620 (offset 150 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 616 (offset 147 lines). +Hunk #1 succeeded at 626 (offset 157 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 83 (offset 11 lines). @@ -1456,11 +1050,9 @@ Hunk #2 succeeded at 202 (offset 59 lines). Hunk #3 succeeded at 280 (offset 59 lines). Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). -Hunk #6 succeeded at 484 (offset 88 lines). -Hunk #7 succeeded at 593 (offset 123 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1468,11 +1060,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 540 (offset 144 lines). -Hunk #7 succeeded at 821 (offset 351 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1480,11 +1070,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 468 (offset 72 lines). -Hunk #7 succeeded at 620 (offset 150 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 546 (offset 77 lines). +Hunk #1 succeeded at 554 (offset 85 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 77 (offset 5 lines). @@ -1492,11 +1080,9 @@ Hunk #2 succeeded at 196 (offset 53 lines). Hunk #3 succeeded at 274 (offset 53 lines). Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). -Hunk #6 succeeded at 472 (offset 76 lines). -Hunk #7 succeeded at 581 (offset 111 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 532 (offset 63 lines). +Hunk #1 succeeded at 538 (offset 69 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1504,14 +1090,12 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -Hunk #6 succeeded at 468 (offset 72 lines). -Hunk #7 succeeded at 620 (offset 150 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuMOD/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README Run "open index.html" to see more information about this process. quit -real 0m9.562s -user 0m8.185s -sys 0m0.659s +real 0m9.420s +user 0m8.812s +sys 0m0.563s diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/ident_card.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/ident_card.dat index b37758a42a..0ba87b008f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/ident_card.dat +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/ident_card.dat @@ -2,32 +2,32 @@ ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc c written by the UFO converter ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc -mass 5 mdl_MB -mass 6 mdl_MT -mass 15 mdl_MTA -mass 23 mdl_MZ -mass 25 mdl_MH -sminputs 1 aEWM1 -sminputs 2 mdl_Gf -sminputs 3 aS -yukawa 5 mdl_ymb -yukawa 6 mdl_ymt -yukawa 15 mdl_ymtau -decay 6 mdl_WT -decay 23 mdl_WZ -decay 24 mdl_WW +decay 23 mdl_WZ +decay 24 mdl_WW decay 25 mdl_WH +decay 6 mdl_WT +mass 15 mdl_MTA +mass 23 mdl_MZ +mass 25 mdl_MH +mass 5 mdl_MB +mass 6 mdl_MT +sminputs 1 aEWM1 +sminputs 2 mdl_Gf +sminputs 3 aS +yukawa 15 mdl_ymtau +yukawa 5 mdl_ymb +yukawa 6 mdl_ymt diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt index 27acb12a1e..cdeedc7863 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -mg5_path = /data/stephan/madgraph4gpu/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat index 09bece911a..944298ae75 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat @@ -35,9 +35,10 @@ set loop_color_flows False set max_npoint_for_channel 0 set default_unset_couplings 99 set max_t_for_channel 99 -set zerowidth_tchannel True set nlo_mixed_expansion True -import model sm +set stdout_level DEBUG +set zerowidth_tchannel F +define j = p define p = g u c d s u~ c~ d~ s~ define j = g u c d s u~ c~ d~ s~ define l+ = e+ mu+ @@ -48,5 +49,5 @@ define j = p generate p p > t t~ @0 add process p p > t t~ j @1 add process p p > t t~ j j @2 -output madevent pp_tt012j.mad_gen --hel_recycling=False --vector_size=\ -16384 --me_exporter=standalone_cudacpp +output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False \ +--vector_size=16384 --me_exporter=standalone_cudacpp diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc index fa0f3d86f5..ec923afd6d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o VVVV1P0_1.o VVVV1_0.o VVVV3P0_1.o VVVV3_0.o VVVV4P0_1.o VVVV4_0.o +ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc index 30257195b6..74b5239ebf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MatrixElementKernels.cc @@ -112,10 +112,17 @@ namespace mg5amcCpu // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu bool ok = true; // this is just an assumption! const std::string tag = "arm neon (128bit as in SSE4.2)"; -#else +#elif defined( __x86_64__ ) || defined( __i386__ ) bool known = true; bool ok = __builtin_cpu_supports( "sse4.2" ); const std::string tag = "nehalem (SSE4.2)"; +#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted + bool known = false; // __builtin_cpu_supports is not supported + // See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html + // See https://stackoverflow.com/q/62783908 + // See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + bool ok = true; // this is just an assumption! + const std::string tag = "arm neon (128bit as in SSE4.2)"; #endif #else bool known = true; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc index 44f313bf0a..0317bbc95a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[4] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -260,10 +260,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 3 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 3 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[4] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[2], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,13 +794,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f index b68450743c..ee723193db 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f index 8a43211040..d803e4f19f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f @@ -416,10 +416,10 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL VVV1P0_1(W(1,1),W(1,2),GC_10(IVEC),ZERO, FK_ZERO,W(1,5)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,4),W(1,3),W(1,5),GC_11(IVEC),AMP(1)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,5)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,5)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,4),W(1,5),W(1,2),GC_11(IVEC),AMP(2)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,5)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,5)) C Amplitude(s) for diagram number 3 CALL FFV1_0(W(1,5),W(1,3),W(1,2),GC_11(IVEC),AMP(3)) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc index 89ce64642c..75110e8fec 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc @@ -241,18 +241,18 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 1 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[4] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -771,19 +771,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f index 8310241f21..f205954b28 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,21 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index 8cc007dff8..f7f5899260 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -248,11 +248,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[6], w_fp[4], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -265,10 +265,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 16 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[7], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 16 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -293,11 +293,11 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 16 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -307,10 +307,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 16 *** // Wavefunction(s) for diagram number 5 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -324,7 +324,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -334,11 +334,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 16 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,7 +351,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,7 +365,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[7], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -375,10 +375,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 16 *** // Wavefunction(s) for diagram number 10 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[5] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -392,7 +392,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -406,7 +406,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,7 +422,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[11], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -435,7 +435,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[7], w_fp[0], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -448,7 +448,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -461,22 +461,22 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 16 *** // Wavefunction(s) for diagram number 16 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[10] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[6] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[9] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[10] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[6] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -1015,13 +1015,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index 668cc26192..b8615bc68f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 7188daef76..fc924825c2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -462,8 +462,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_2(W(1,4),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 3 CALL FFV1_0(W(1,9),W(1,3),W(1,6),GC_11(IVEC),AMP(3)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) - CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) + CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) C Amplitude(s) for diagram number 4 CALL FFV1_0(W(1,10),W(1,6),W(1,5),GC_11(IVEC),AMP(4)) CALL VVV1P0_1(W(1,2),W(1,5),GC_10(IVEC),ZERO, FK_ZERO,W(1,11)) @@ -471,8 +471,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,6),W(1,11),GC_11(IVEC),AMP(5)) C Amplitude(s) for diagram number 6 CALL FFV1_0(W(1,9),W(1,6),W(1,2),GC_11(IVEC),AMP(6)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) - CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,12)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) + CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,12)) C Amplitude(s) for diagram number 7 CALL FFV1_0(W(1,6),W(1,12),W(1,5),GC_11(IVEC),AMP(7)) C Amplitude(s) for diagram number 8 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 2d43c24604..90a457ac40 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -243,19 +243,19 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[4], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -851,19 +851,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index ca1b7c1dc5..81ab70f6d1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,14 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f index f61dd29bde..d61f0e1a21 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -450,11 +450,11 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1P0_3(W(1,4),W(1,3),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,6),W(1,5),W(1,7),GC_11(IVEC),AMP(1)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) CALL FFV1P0_3(W(1,2),W(1,5),GC_11(IVEC),ZERO, FK_ZERO,W(1,8)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,4),W(1,6),W(1,8),GC_11(IVEC),AMP(2)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) C Amplitude(s) for diagram number 3 CALL FFV1_0(W(1,6),W(1,3),W(1,8),GC_11(IVEC),AMP(3)) CALL FFV1_1(W(1,5),W(1,1),GC_11(IVEC),ZERO, FK_ZERO,W(1,6)) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index a676bbeb6e..9a73b3ed94 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -243,19 +243,19 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); + ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); - FFV1_2( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[4], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[7], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -851,19 +851,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 33e638e237..b58c5d70bd 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,18 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f index bc8b23e7ba..b082becd2a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -450,11 +450,11 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1P0_3(W(1,4),W(1,3),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,6),W(1,2),W(1,7),GC_11(IVEC),AMP(1)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) CALL FFV1P0_3(W(1,5),W(1,2),GC_11(IVEC),ZERO, FK_ZERO,W(1,8)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,4),W(1,6),W(1,8),GC_11(IVEC),AMP(2)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,6)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,6)) C Amplitude(s) for diagram number 3 CALL FFV1_0(W(1,6),W(1,3),W(1,8),GC_11(IVEC),AMP(3)) CALL FFV1_1(W(1,2),W(1,1),GC_11(IVEC),ZERO, FK_ZERO,W(1,6)) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc index 7edb26013e..dc1a3e9d26 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc @@ -241,9 +241,9 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 5 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); @@ -251,11 +251,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - FFV1_2( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[5] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -266,11 +266,11 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 5 *** // Wavefunction(s) for diagram number 2 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -281,10 +281,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 5 *** // Wavefunction(s) for diagram number 3 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[5] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[5] ); // Amplitude(s) for diagram number 3 - FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 5 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[5] ); + FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[5] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[0], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -312,7 +312,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 5 - VVV1_0( w_fp[4], w_fp[7], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -851,19 +851,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f index f2902c7183..d85b1143a0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,21 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc index f1e1f21142..cbc45ff652 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc @@ -250,11 +250,11 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); // Amplitude(s) for diagram number 1 - VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -263,7 +263,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -272,7 +272,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[6], w_fp[7], w_fp[4], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -285,10 +285,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 123 *** // Wavefunction(s) for diagram number 2 - VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[6], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -305,10 +305,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 123 *** // Wavefunction(s) for diagram number 3 - VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 0., 0., w_fp[9] ); + VVV1P0_1( w_fp[6], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 123 *** // Wavefunction(s) for diagram number 4 - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[10] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 4 - VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[10], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -345,11 +345,11 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 123 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -363,7 +363,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -376,10 +376,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 123 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -390,10 +390,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 123 *** // Wavefunction(s) for diagram number 8 - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,7 +407,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -420,10 +420,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 123 *** // Wavefunction(s) for diagram number 10 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[15] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[15] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -434,10 +434,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 123 *** // Wavefunction(s) for diagram number 11 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[16], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -451,7 +451,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[16], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -481,7 +481,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -497,7 +497,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[16], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -513,7 +513,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -526,12 +526,12 @@ namespace mg5amcCpu // *** DIAGRAM 17 OF 123 *** // Wavefunction(s) for diagram number 17 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[16] ); - FFV1_1( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[16] ); + FFV1_1( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[8], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -541,10 +541,10 @@ namespace mg5amcCpu // *** DIAGRAM 18 OF 123 *** // Wavefunction(s) for diagram number 18 - FFV1_1( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -557,7 +557,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[12], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -568,11 +568,11 @@ namespace mg5amcCpu // *** DIAGRAM 20 OF 123 *** // Wavefunction(s) for diagram number 20 - VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 0., 0., w_fp[17] ); + VVV1P0_1( w_fp[1], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[12], COUPs[1], 1.0, 0., 0., w_fp[17] ); // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -588,7 +588,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -602,7 +602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[12], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -613,10 +613,10 @@ namespace mg5amcCpu // *** DIAGRAM 23 OF 123 *** // Wavefunction(s) for diagram number 23 - VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 0., 0., w_fp[18] ); + VVV1P0_1( w_fp[1], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[18] ); // Amplitude(s) for diagram number 23 - VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -632,7 +632,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -646,7 +646,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[12], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -657,10 +657,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 123 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[19] ); + FFV1_1( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[19] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[19], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -673,7 +673,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -686,7 +686,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[19], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -699,7 +699,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[8], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -712,7 +712,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[19], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -726,7 +726,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 31 - VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[17], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -739,22 +739,22 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 123 *** // Wavefunction(s) for diagram number 32 - VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[17] ); - VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[19] ); - VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[8] ); + VVVV1P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[17] ); + VVVV3P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[19] ); + VVVV4P0_1( w_fp[1], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[1] -= amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[5] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[3] -= amp_sv[0]; jamp_sv[4] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[12], w_fp[8], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[2] += amp_sv[0]; jamp_sv[4] += amp_sv[0]; @@ -763,12 +763,12 @@ namespace mg5amcCpu // *** DIAGRAM 33 OF 123 *** // Wavefunction(s) for diagram number 33 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[12], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -778,10 +778,10 @@ namespace mg5amcCpu // *** DIAGRAM 34 OF 123 *** // Wavefunction(s) for diagram number 34 - FFV1_2( w_fp[12], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[12], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 34 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 34 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -794,7 +794,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -805,10 +805,10 @@ namespace mg5amcCpu // *** DIAGRAM 36 OF 123 *** // Wavefunction(s) for diagram number 36 - FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 0., 0., w_fp[22] ); + FFV1P0_3( w_fp[12], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -824,7 +824,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 37 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 37 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -838,7 +838,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 38 - FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 38 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -852,7 +852,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 39 - VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 39 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -868,7 +868,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 40 - FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 40 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -882,7 +882,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 41 - FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[11], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 41 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -893,10 +893,10 @@ namespace mg5amcCpu // *** DIAGRAM 42 OF 123 *** // Wavefunction(s) for diagram number 42 - FFV1_2( w_fp[12], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_2( w_fp[12], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 42 - FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 42 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -909,7 +909,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 43 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 43 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -922,7 +922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 44 - FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 44 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -935,7 +935,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 45 - FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[20], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 45 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -948,7 +948,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 46 - FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[23], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 46 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -962,7 +962,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 47 - VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 47 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -978,17 +978,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 48 - FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[17], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] += amp_sv[0]; jamp_sv[11] -= amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[19], COUPs[1], 1.0, &_fp[0] ); jamp_sv[11] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[17] -= amp_sv[0]; jamp_sv[21] += amp_sv[0]; - FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); jamp_sv[9] -= amp_sv[0]; jamp_sv[15] += amp_sv[0]; jamp_sv[21] += amp_sv[0]; @@ -997,11 +997,11 @@ namespace mg5amcCpu // *** DIAGRAM 49 OF 123 *** // Wavefunction(s) for diagram number 49 - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[12] ); - FFV1_2( w_fp[3], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[12] ); + FFV1_2( w_fp[3], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 49 - FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 49 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1012,10 +1012,10 @@ namespace mg5amcCpu // *** DIAGRAM 50 OF 123 *** // Wavefunction(s) for diagram number 50 - VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[12], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 50 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 50 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1031,7 +1031,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 51 - FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[9], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 51 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1042,10 +1042,10 @@ namespace mg5amcCpu // *** DIAGRAM 52 OF 123 *** // Wavefunction(s) for diagram number 52 - FFV1_1( w_fp[2], w_fp[12], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[2], w_fp[12], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 52 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 52 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1059,7 +1059,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 53 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 53 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1075,7 +1075,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 54 - FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 54 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1089,7 +1089,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 55 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 55 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1105,7 +1105,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 56 - FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 56 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1121,7 +1121,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 57 - VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[18], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 57 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1141,7 +1141,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 58 - VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1150,7 +1150,7 @@ namespace mg5amcCpu jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1159,7 +1159,7 @@ namespace mg5amcCpu jamp_sv[13] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[12], w_fp[1], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1172,10 +1172,10 @@ namespace mg5amcCpu // *** DIAGRAM 59 OF 123 *** // Wavefunction(s) for diagram number 59 - VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 0., 0., w_fp[21] ); + VVV1P0_1( w_fp[12], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 59 - VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[21], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 59 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1195,7 +1195,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 60 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 60 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1215,7 +1215,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 61 - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 61 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1231,7 +1231,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 62 - FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 62 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1245,7 +1245,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 63 - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 63 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1261,7 +1261,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 64 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 64 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1272,11 +1272,11 @@ namespace mg5amcCpu // *** DIAGRAM 65 OF 123 *** // Wavefunction(s) for diagram number 65 - VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 0., 0., w_fp[20] ); - FFV1_2( w_fp[3], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + VVV1P0_1( w_fp[0], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[20] ); + FFV1_2( w_fp[3], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 65 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 65 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1287,10 +1287,10 @@ namespace mg5amcCpu // *** DIAGRAM 66 OF 123 *** // Wavefunction(s) for diagram number 66 - VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[20], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 66 - FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 66 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1306,7 +1306,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 67 - FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[9], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 67 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1317,10 +1317,10 @@ namespace mg5amcCpu // *** DIAGRAM 68 OF 123 *** // Wavefunction(s) for diagram number 68 - FFV1_1( w_fp[2], w_fp[20], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[2], w_fp[20], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 68 - FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 68 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1334,7 +1334,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 69 - FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 69 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1350,7 +1350,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 70 - FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[11], w_fp[20], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 70 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1364,7 +1364,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 71 - FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 71 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1380,7 +1380,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 72 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 72 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1396,7 +1396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 73 - VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[20], w_fp[6], w_fp[7], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 73 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1416,7 +1416,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 74 - VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1425,7 +1425,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1434,7 +1434,7 @@ namespace mg5amcCpu jamp_sv[15] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[20], w_fp[1], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[5] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1447,10 +1447,10 @@ namespace mg5amcCpu // *** DIAGRAM 75 OF 123 *** // Wavefunction(s) for diagram number 75 - VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 0., 0., w_fp[12] ); + VVV1P0_1( w_fp[20], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 75 - VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[12], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 75 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1470,7 +1470,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 76 - VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 76 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1490,7 +1490,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 77 - FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 77 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1506,7 +1506,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 78 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 78 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1520,7 +1520,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 79 - FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 79 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1536,7 +1536,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 80 - FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 80 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1547,10 +1547,10 @@ namespace mg5amcCpu // *** DIAGRAM 81 OF 123 *** // Wavefunction(s) for diagram number 81 - FFV1_1( w_fp[9], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[23] ); + FFV1_1( w_fp[9], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[23] ); // Amplitude(s) for diagram number 81 - FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[23], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 81 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1560,10 +1560,10 @@ namespace mg5amcCpu // *** DIAGRAM 82 OF 123 *** // Wavefunction(s) for diagram number 82 - FFV1_2( w_fp[15], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[12] ); + FFV1_2( w_fp[15], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[12] ); // Amplitude(s) for diagram number 82 - FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 82 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1576,7 +1576,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 83 - FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[23], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 83 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1586,10 +1586,10 @@ namespace mg5amcCpu // *** DIAGRAM 84 OF 123 *** // Wavefunction(s) for diagram number 84 - FFV1_2( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[21] ); + FFV1_2( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[21] ); // Amplitude(s) for diagram number 84 - FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[9], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 84 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1602,7 +1602,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 85 - FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[23], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 85 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1613,10 +1613,10 @@ namespace mg5amcCpu // *** DIAGRAM 86 OF 123 *** // Wavefunction(s) for diagram number 86 - VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 0., 0., w_fp[23] ); + VVV1P0_1( w_fp[0], w_fp[10], COUPs[0], 1.0, 0., 0., w_fp[23] ); // Amplitude(s) for diagram number 86 - FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 86 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1629,10 +1629,10 @@ namespace mg5amcCpu // *** DIAGRAM 87 OF 123 *** // Wavefunction(s) for diagram number 87 - FFV1_2( w_fp[16], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[22] ); + FFV1_2( w_fp[16], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[22] ); // Amplitude(s) for diagram number 87 - FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[11], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 87 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1642,10 +1642,10 @@ namespace mg5amcCpu // *** DIAGRAM 88 OF 123 *** // Wavefunction(s) for diagram number 88 - FFV1_1( w_fp[11], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[20] ); + FFV1_1( w_fp[11], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[20] ); // Amplitude(s) for diagram number 88 - FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[20], w_fp[5], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 88 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1658,7 +1658,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 89 - FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[14], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 89 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1668,10 +1668,10 @@ namespace mg5amcCpu // *** DIAGRAM 90 OF 123 *** // Wavefunction(s) for diagram number 90 - FFV1_1( w_fp[14], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[24] ); + FFV1_1( w_fp[14], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[24] ); // Amplitude(s) for diagram number 90 - FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[24], w_fp[4], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 90 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1684,7 +1684,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 91 - FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[22], w_fp[2], w_fp[10], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 91 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1698,7 +1698,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 92 - FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[23], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 92 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1714,7 +1714,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 93 - VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1723,7 +1723,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1732,7 +1732,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[6], w_fp[7], w_fp[5], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1745,10 +1745,10 @@ namespace mg5amcCpu // *** DIAGRAM 94 OF 123 *** // Wavefunction(s) for diagram number 94 - VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 0., 0., w_fp[22] ); + VVV1P0_1( w_fp[0], w_fp[6], COUPs[0], 1.0, 0., 0., w_fp[22] ); // Amplitude(s) for diagram number 94 - VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[5], w_fp[22], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 94 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1765,10 +1765,10 @@ namespace mg5amcCpu // *** DIAGRAM 95 OF 123 *** // Wavefunction(s) for diagram number 95 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[25] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[25] ); // Amplitude(s) for diagram number 95 - VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[5], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 95 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1788,7 +1788,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 96 - FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 96 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1804,7 +1804,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 97 - FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[24], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 97 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1818,7 +1818,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 98 - FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[22], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 98 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1834,7 +1834,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 99 - FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 99 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1848,7 +1848,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 100 - VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1857,7 +1857,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1866,7 +1866,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[18], w_fp[7], w_fp[4], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] += cxtype( 0, 1 ) * amp_sv[0]; @@ -1879,10 +1879,10 @@ namespace mg5amcCpu // *** DIAGRAM 101 OF 123 *** // Wavefunction(s) for diagram number 101 - VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[18], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 101 - VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[7], w_fp[4], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 101 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1902,7 +1902,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 102 - VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[18], w_fp[4], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 102 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1922,7 +1922,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 103 - FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 103 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1938,7 +1938,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 104 - FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[20], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 104 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1952,7 +1952,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 105 - FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 105 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1968,7 +1968,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 106 - FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[2], w_fp[18], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 106 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1982,7 +1982,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 107 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -1991,7 +1991,7 @@ namespace mg5amcCpu jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2000,7 +2000,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[10], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2016,7 +2016,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 108 - VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[10], w_fp[25], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 108 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2036,7 +2036,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 109 - VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[23], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 109 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2056,7 +2056,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 110 - FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[20], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 110 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2069,7 +2069,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 111 - FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[21], w_fp[11], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 111 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2082,7 +2082,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 112 - FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[24], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 112 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2095,7 +2095,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 113 - FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[14], w_fp[1], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 113 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -2105,12 +2105,12 @@ namespace mg5amcCpu // *** DIAGRAM 114 OF 123 *** // Wavefunction(s) for diagram number 114 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[12] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[24] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 0., 0., w_fp[21] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[12] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[4], COUPs[2], 1.0, 0., 0., w_fp[21] ); // Amplitude(s) for diagram number 114 - VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[12], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2119,7 +2119,7 @@ namespace mg5amcCpu jamp_sv[19] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2128,7 +2128,7 @@ namespace mg5amcCpu jamp_sv[20] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[5], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2144,17 +2144,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 115 - FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[12], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] += amp_sv[0]; jamp_sv[19] -= amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[23] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[19] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[21] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[14], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[18] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; jamp_sv[22] += amp_sv[0]; @@ -2166,17 +2166,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 116 - FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[12], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] += amp_sv[0]; jamp_sv[2] -= amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[2] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[8] -= amp_sv[0]; jamp_sv[12] += amp_sv[0]; - FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[0] -= amp_sv[0]; jamp_sv[6] += amp_sv[0]; jamp_sv[12] += amp_sv[0]; @@ -2185,12 +2185,12 @@ namespace mg5amcCpu // *** DIAGRAM 117 OF 123 *** // Wavefunction(s) for diagram number 117 - VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[21] ); - VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); - VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); + VVVV1P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[21] ); + VVVV3P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); + VVVV4P0_1( w_fp[0], w_fp[1], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); // Amplitude(s) for diagram number 117 - VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[21], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2199,7 +2199,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[20] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[10] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2208,7 +2208,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[18] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[7], w_fp[4], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[12] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2224,17 +2224,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 118 - FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] += amp_sv[0]; jamp_sv[13] -= amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[17] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[13] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[15] -= amp_sv[0]; jamp_sv[16] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[12] -= amp_sv[0]; jamp_sv[14] += amp_sv[0]; jamp_sv[16] += amp_sv[0]; @@ -2246,17 +2246,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 119 - FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[21], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] += amp_sv[0]; jamp_sv[4] -= amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[20] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[4] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[10] -= amp_sv[0]; jamp_sv[18] += amp_sv[0]; - FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[15], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[1] -= amp_sv[0]; jamp_sv[7] += amp_sv[0]; jamp_sv[18] += amp_sv[0]; @@ -2265,22 +2265,22 @@ namespace mg5amcCpu // *** DIAGRAM 120 OF 123 *** // Wavefunction(s) for diagram number 120 - VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[24] ); - VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[15] ); - VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 0., 0., w_fp[13] ); + VVVV1P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[24] ); + VVVV3P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[15] ); + VVVV4P0_1( w_fp[0], w_fp[4], w_fp[5], COUPs[2], 1.0, 0., 0., w_fp[13] ); // Amplitude(s) for diagram number 120 - FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] += amp_sv[0]; jamp_sv[7] -= amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[11] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[7] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[9] -= amp_sv[0]; jamp_sv[10] += amp_sv[0]; - FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[6] -= amp_sv[0]; jamp_sv[8] += amp_sv[0]; jamp_sv[10] += amp_sv[0]; @@ -2292,17 +2292,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 121 - FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[24], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] += amp_sv[0]; jamp_sv[5] -= amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[22] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[15], COUPs[1], 1.0, &_fp[0] ); jamp_sv[5] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[16] -= amp_sv[0]; jamp_sv[19] += amp_sv[0]; - FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[16], w_fp[2], w_fp[13], COUPs[1], 1.0, &_fp[0] ); jamp_sv[3] -= amp_sv[0]; jamp_sv[13] += amp_sv[0]; jamp_sv[19] += amp_sv[0]; @@ -2314,7 +2314,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 122 - VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[24], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2323,7 +2323,7 @@ namespace mg5amcCpu jamp_sv[11] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[22] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[15], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[5] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[7] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2332,7 +2332,7 @@ namespace mg5amcCpu jamp_sv[13] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[16] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[19] -= cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[13], w_fp[1], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[6] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[8] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2348,7 +2348,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 123 - VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[17], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2357,7 +2357,7 @@ namespace mg5amcCpu jamp_sv[11] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[23] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[19], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[1] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[3] += cxtype( 0, 1 ) * amp_sv[0]; @@ -2366,7 +2366,7 @@ namespace mg5amcCpu jamp_sv[15] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[17] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[21] += cxtype( 0, 1 ) * amp_sv[0]; - VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[0], 1.0, &_fp[0] ); jamp_sv[0] += cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[2] -= cxtype( 0, 1 ) * amp_sv[0]; jamp_sv[4] -= cxtype( 0, 1 ) * amp_sv[0]; @@ -2961,13 +2961,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f index 3f0b6e29c5..4d2e1b4f8c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f @@ -39,6 +39,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -126,11 +127,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f index 2f7edac864..2e8e377de8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f @@ -894,12 +894,12 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,17),W(1,11),GC_11(IVEC),AMP(17)) C Amplitude(s) for diagram number 16 CALL FFV1_0(W(1,13),W(1,3),W(1,11),GC_11(IVEC),AMP(18)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,13)) - CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,17)) - CALL FFV1_1(W(1,13),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,13)) + CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,17)) + CALL FFV1_1(W(1,13),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 17 CALL FFV1_0(W(1,17),W(1,9),W(1,6),GC_11(IVEC),AMP(19)) - CALL FFV1_1(W(1,13),W(1,6),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_1(W(1,13),W(1,6),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) C Amplitude(s) for diagram number 18 CALL FFV1_0(W(1,17),W(1,10),W(1,5),GC_11(IVEC),AMP(20)) C Amplitude(s) for diagram number 19 @@ -942,12 +942,12 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,13),W(1,18),GC_11(IVEC),AMP(34)) CALL FFV1_0(W(1,4),W(1,13),W(1,20),GC_11(IVEC),AMP(35)) CALL FFV1_0(W(1,4),W(1,13),W(1,9),GC_11(IVEC),AMP(36)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,13)) - CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) - CALL FFV1_2(W(1,13),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,21)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,13)) + CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) + CALL FFV1_2(W(1,13),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,21)) C Amplitude(s) for diagram number 33 CALL FFV1_0(W(1,21),W(1,10),W(1,6),GC_11(IVEC),AMP(37)) - CALL FFV1_2(W(1,13),W(1,6),GC_11(IVEC),MDL_MT, ZERO,W(1,22)) + CALL FFV1_2(W(1,13),W(1,6),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,22)) C Amplitude(s) for diagram number 34 CALL FFV1_0(W(1,22),W(1,10),W(1,5),GC_11(IVEC),AMP(38)) C Amplitude(s) for diagram number 35 @@ -983,7 +983,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,13),W(1,3),W(1,20),GC_11(IVEC),AMP(53)) CALL FFV1_0(W(1,13),W(1,3),W(1,9),GC_11(IVEC),AMP(54)) CALL VVV1P0_1(W(1,1),W(1,5),GC_10(IVEC),ZERO, FK_ZERO,W(1,13)) - CALL FFV1_2(W(1,4),W(1,13),GC_11(IVEC),MDL_MT, ZERO,W(1,23)) + CALL FFV1_2(W(1,4),W(1,13),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,23)) C Amplitude(s) for diagram number 49 CALL FFV1_0(W(1,23),W(1,10),W(1,6),GC_11(IVEC),AMP(55)) CALL VVV1P0_1(W(1,13),W(1,6),GC_10(IVEC),ZERO, FK_ZERO,W(1,24)) @@ -991,7 +991,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,10),W(1,24),GC_11(IVEC),AMP(56)) C Amplitude(s) for diagram number 51 CALL FFV1_0(W(1,14),W(1,10),W(1,13),GC_11(IVEC),AMP(57)) - CALL FFV1_1(W(1,3),W(1,13),GC_11(IVEC),MDL_MT, ZERO,W(1,21)) + CALL FFV1_1(W(1,3),W(1,13),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,21)) C Amplitude(s) for diagram number 52 CALL FFV1_0(W(1,17),W(1,21),W(1,6),GC_11(IVEC),AMP(58)) C Amplitude(s) for diagram number 53 @@ -1022,7 +1022,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C Amplitude(s) for diagram number 64 CALL FFV1_0(W(1,14),W(1,21),W(1,2),GC_11(IVEC),AMP(72)) CALL VVV1P0_1(W(1,1),W(1,6),GC_10(IVEC),ZERO, FK_ZERO,W(1,21)) - CALL FFV1_2(W(1,4),W(1,21),GC_11(IVEC),MDL_MT, ZERO,W(1,22)) + CALL FFV1_2(W(1,4),W(1,21),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,22)) C Amplitude(s) for diagram number 65 CALL FFV1_0(W(1,22),W(1,10),W(1,5),GC_11(IVEC),AMP(73)) CALL VVV1P0_1(W(1,21),W(1,5),GC_10(IVEC),ZERO, FK_ZERO,W(1,23)) @@ -1030,7 +1030,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,10),W(1,23),GC_11(IVEC),AMP(74)) C Amplitude(s) for diagram number 67 CALL FFV1_0(W(1,16),W(1,10),W(1,21),GC_11(IVEC),AMP(75)) - CALL FFV1_1(W(1,3),W(1,21),GC_11(IVEC),MDL_MT, ZERO,W(1,24)) + CALL FFV1_1(W(1,3),W(1,21),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,24)) C Amplitude(s) for diagram number 68 CALL FFV1_0(W(1,17),W(1,24),W(1,5),GC_11(IVEC),AMP(76)) C Amplitude(s) for diagram number 69 @@ -1063,12 +1063,12 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_1(W(1,10),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,24)) C Amplitude(s) for diagram number 81 CALL FFV1_0(W(1,16),W(1,24),W(1,6),GC_11(IVEC),AMP(91)) - CALL FFV1_2(W(1,16),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,13)) + CALL FFV1_2(W(1,16),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,13)) C Amplitude(s) for diagram number 82 CALL FFV1_0(W(1,13),W(1,10),W(1,6),GC_11(IVEC),AMP(92)) C Amplitude(s) for diagram number 83 CALL FFV1_0(W(1,14),W(1,24),W(1,5),GC_11(IVEC),AMP(93)) - CALL FFV1_2(W(1,14),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,22)) + CALL FFV1_2(W(1,14),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,22)) C Amplitude(s) for diagram number 84 CALL FFV1_0(W(1,22),W(1,10),W(1,5),GC_11(IVEC),AMP(94)) C Amplitude(s) for diagram number 85 @@ -1079,12 +1079,12 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_2(W(1,17),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,23)) C Amplitude(s) for diagram number 87 CALL FFV1_0(W(1,23),W(1,12),W(1,6),GC_11(IVEC),AMP(97)) - CALL FFV1_1(W(1,12),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,21)) + CALL FFV1_1(W(1,12),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,21)) C Amplitude(s) for diagram number 88 CALL FFV1_0(W(1,17),W(1,21),W(1,6),GC_11(IVEC),AMP(98)) C Amplitude(s) for diagram number 89 CALL FFV1_0(W(1,23),W(1,15),W(1,5),GC_11(IVEC),AMP(99)) - CALL FFV1_1(W(1,15),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,25)) + CALL FFV1_1(W(1,15),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,25)) C Amplitude(s) for diagram number 90 CALL FFV1_0(W(1,17),W(1,25),W(1,5),GC_11(IVEC),AMP(100)) C Amplitude(s) for diagram number 91 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc index e5e62a0af2..5723ed5665 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc @@ -249,23 +249,16 @@ namespace mg5amcCpu ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); -#if not( defined __CUDACC__ and defined MGONGPU_TEST_DIVERGENCE ) - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); -#else - if( ( blockDim.x * blockIdx.x + threadIdx.x ) % 2 == 0 ) - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); - else - oxxxxx( momenta, 0, cHel[ihel][4], +1, w_fp[4], 4 ) -#endif + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[0], w_fp[1], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -278,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -294,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -310,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -326,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -342,11 +335,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -357,11 +350,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[4], w_fp[1], COUPs[1], 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -372,10 +365,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[12] ); + FFV1_2( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[4], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -386,10 +379,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -403,7 +396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[1], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -414,11 +407,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,10 +422,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -446,7 +439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[4], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[4], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -457,10 +450,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[1], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[1], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -474,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -485,11 +478,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); + FFV1_1( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -503,7 +496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -517,7 +510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -528,10 +521,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[1], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -545,7 +538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -556,11 +549,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 0., 0., w_fp[11] ); + FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -574,7 +567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -588,7 +581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -599,10 +592,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[1], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -616,7 +609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -627,10 +620,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -641,10 +634,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -655,10 +648,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -672,7 +665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -683,10 +676,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -697,10 +690,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -711,10 +704,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[0], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -728,7 +721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[4], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[4], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -742,17 +735,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += 1. / 2. * amp_sv[0]; jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[9] -= 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[1] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[6] -= 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[1], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[6] -= 1. / 2. * amp_sv[0]; @@ -764,7 +757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[1], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -780,7 +773,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[1], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1363,13 +1356,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f index 67decfd0d7..67adf83921 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION G2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,11 +130,24 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + G2=PDG2PDF(LPP(IB(2)),0, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f index d599711adb..41e5e36e39 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f @@ -576,8 +576,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) C Amplitude(s) for diagram number 5 CALL FFV1_0(W(1,10),W(1,3),W(1,9),GC_11(IVEC),AMP(5)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) - CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,7)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) + CALL FFV1_2(W(1,4),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,7)) C Amplitude(s) for diagram number 6 CALL FFV1_0(W(1,7),W(1,10),W(1,9),GC_11(IVEC),AMP(6)) CALL FFV1_1(W(1,5),W(1,2),GC_11(IVEC),ZERO, FK_ZERO,W(1,11)) @@ -592,8 +592,8 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,4),W(1,14),W(1,9),GC_11(IVEC),AMP(9)) C Amplitude(s) for diagram number 10 CALL VVV1_0(W(1,2),W(1,9),W(1,12),GC_10(IVEC),AMP(10)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,12)) - CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, ZERO,W(1,14)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,12)) + CALL FFV1_1(W(1,3),W(1,2),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,14)) C Amplitude(s) for diagram number 11 CALL FFV1_0(W(1,12),W(1,14),W(1,9),GC_11(IVEC),AMP(11)) CALL FFV1P0_3(W(1,12),W(1,3),GC_11(IVEC),ZERO, FK_ZERO,W(1,10)) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc index 59c382aca4..b8f74ecafe 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc @@ -243,7 +243,7 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); @@ -251,14 +251,14 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - oxzxxx( momenta, cHel[ihel][5], +1, w_fp[5], 5 ); + oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[1], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,11 +335,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -350,11 +350,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[1], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,10 +365,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[12] ); + FFV1_2( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[5], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[5], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -379,10 +379,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -396,7 +396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,11 +407,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,10 +422,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[1], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -439,7 +439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[5], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[5], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -450,10 +450,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -478,11 +478,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[1], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); + FFV1_1( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -496,7 +496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -510,7 +510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -521,10 +521,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[1], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -538,7 +538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -549,11 +549,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[5], COUPs[1], 0., 0., w_fp[11] ); + FFV1_2( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -567,7 +567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -581,7 +581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -592,10 +592,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -609,7 +609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -620,10 +620,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -634,10 +634,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -648,10 +648,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -665,7 +665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -676,10 +676,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[1], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -690,10 +690,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[1], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -704,10 +704,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[0], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[5], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[5], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -721,7 +721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -735,17 +735,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[5] += 1. / 2. * amp_sv[0]; jamp_sv[8] -= 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += 1. / 2. * amp_sv[0]; jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; jamp_sv[8] -= 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -773,7 +773,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1356,19 +1356,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f index 58e83991fd..83a2a24681 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,14 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f index 7508bec3da..a2b48f860a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f @@ -570,13 +570,13 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1P0_3(W(1,2),W(1,6),GC_11(IVEC),ZERO, FK_ZERO,W(1,9)) C Amplitude(s) for diagram number 3 CALL VVV1_0(W(1,7),W(1,8),W(1,9),GC_10(IVEC),AMP(3)) - CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) C Amplitude(s) for diagram number 4 CALL FFV1_0(W(1,4),W(1,10),W(1,9),GC_11(IVEC),AMP(4)) - CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) C Amplitude(s) for diagram number 5 CALL FFV1_0(W(1,10),W(1,3),W(1,9),GC_11(IVEC),AMP(5)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) CALL FFV1_2(W(1,4),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,7)) C Amplitude(s) for diagram number 6 CALL FFV1_0(W(1,7),W(1,10),W(1,9),GC_11(IVEC),AMP(6)) @@ -587,12 +587,12 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_2(W(1,2),W(1,5),GC_11(IVEC),ZERO, FK_ZERO,W(1,13)) C Amplitude(s) for diagram number 8 CALL FFV1_0(W(1,13),W(1,6),W(1,12),GC_11(IVEC),AMP(8)) - CALL FFV1_1(W(1,10),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,14)) + CALL FFV1_1(W(1,10),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,14)) C Amplitude(s) for diagram number 9 CALL FFV1_0(W(1,4),W(1,14),W(1,9),GC_11(IVEC),AMP(9)) C Amplitude(s) for diagram number 10 CALL VVV1_0(W(1,5),W(1,9),W(1,12),GC_10(IVEC),AMP(10)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,12)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,12)) CALL FFV1_1(W(1,3),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,14)) C Amplitude(s) for diagram number 11 CALL FFV1_0(W(1,12),W(1,14),W(1,9),GC_11(IVEC),AMP(11)) @@ -601,7 +601,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,2),W(1,11),W(1,10),GC_11(IVEC),AMP(12)) C Amplitude(s) for diagram number 13 CALL FFV1_0(W(1,13),W(1,6),W(1,10),GC_11(IVEC),AMP(13)) - CALL FFV1_2(W(1,12),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,15)) + CALL FFV1_2(W(1,12),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,15)) C Amplitude(s) for diagram number 14 CALL FFV1_0(W(1,15),W(1,3),W(1,9),GC_11(IVEC),AMP(14)) C Amplitude(s) for diagram number 15 @@ -632,13 +632,13 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,10),W(1,6),W(1,8),GC_11(IVEC),AMP(24)) C Amplitude(s) for diagram number 25 CALL VVV1_0(W(1,5),W(1,8),W(1,12),GC_10(IVEC),AMP(25)) - CALL FFV1_1(W(1,14),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,12)) + CALL FFV1_1(W(1,14),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,12)) C Amplitude(s) for diagram number 26 CALL FFV1_0(W(1,4),W(1,12),W(1,9),GC_11(IVEC),AMP(26)) CALL VVV1P0_1(W(1,1),W(1,9),GC_10(IVEC),ZERO, FK_ZERO,W(1,12)) C Amplitude(s) for diagram number 27 CALL FFV1_0(W(1,4),W(1,14),W(1,12),GC_11(IVEC),AMP(27)) - CALL FFV1_2(W(1,7),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,14)) + CALL FFV1_2(W(1,7),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,14)) C Amplitude(s) for diagram number 28 CALL FFV1_0(W(1,14),W(1,3),W(1,9),GC_11(IVEC),AMP(28)) C Amplitude(s) for diagram number 29 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc index 607cb95cfd..2495941a73 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc @@ -243,7 +243,7 @@ namespace mg5amcCpu // Wavefunction(s) for diagram number 1 vxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); @@ -251,14 +251,14 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[0], w_fp[4], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,11 +335,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -350,11 +350,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,10 +365,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[12] ); + FFV1_2( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -379,10 +379,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -396,7 +396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,11 +407,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,10 +422,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -439,7 +439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -450,10 +450,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -478,11 +478,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[1], w_fp[0], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); + FFV1_1( w_fp[1], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -496,7 +496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -510,7 +510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -521,10 +521,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[4], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -538,7 +538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -549,11 +549,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 0., 0., w_fp[11] ); + FFV1_2( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -567,7 +567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -581,7 +581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -592,10 +592,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -609,7 +609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -620,10 +620,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -634,10 +634,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[0], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -648,10 +648,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[0], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[0], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -665,7 +665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -676,10 +676,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -690,10 +690,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[0], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -704,10 +704,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[0], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -721,7 +721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -735,17 +735,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[4] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; jamp_sv[8] -= 1. / 2. * amp_sv[0]; jamp_sv[11] += 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= 1. / 2. * amp_sv[0]; jamp_sv[3] -= 1. / 2. * amp_sv[0]; jamp_sv[7] += 1. / 2. * amp_sv[0]; jamp_sv[11] += 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[0], w_fp[4], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[0] -= 1. / 2. * amp_sv[0]; jamp_sv[3] -= 1. / 2. * amp_sv[0]; jamp_sv[4] += 1. / 2. * amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -773,7 +773,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[4], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1356,19 +1356,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 96,96 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 96 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f index 56e24ed83e..8cb3f9af60 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION G1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,18 +130,27 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f index d1e36d2d51..7ce63300ba 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f @@ -568,13 +568,13 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1P0_3(W(1,6),W(1,2),GC_11(IVEC),ZERO, FK_ZERO,W(1,9)) C Amplitude(s) for diagram number 3 CALL VVV1_0(W(1,7),W(1,8),W(1,9),GC_10(IVEC),AMP(3)) - CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) C Amplitude(s) for diagram number 4 CALL FFV1_0(W(1,4),W(1,10),W(1,9),GC_11(IVEC),AMP(4)) - CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) C Amplitude(s) for diagram number 5 CALL FFV1_0(W(1,10),W(1,3),W(1,9),GC_11(IVEC),AMP(5)) - CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,10)) + CALL FFV1_1(W(1,3),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,10)) CALL FFV1_2(W(1,4),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,7)) C Amplitude(s) for diagram number 6 CALL FFV1_0(W(1,7),W(1,10),W(1,9),GC_11(IVEC),AMP(6)) @@ -585,12 +585,12 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_2(W(1,6),W(1,5),GC_11(IVEC),ZERO, FK_ZERO,W(1,13)) C Amplitude(s) for diagram number 8 CALL FFV1_0(W(1,13),W(1,2),W(1,12),GC_11(IVEC),AMP(8)) - CALL FFV1_1(W(1,10),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,14)) + CALL FFV1_1(W(1,10),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,14)) C Amplitude(s) for diagram number 9 CALL FFV1_0(W(1,4),W(1,14),W(1,9),GC_11(IVEC),AMP(9)) C Amplitude(s) for diagram number 10 CALL VVV1_0(W(1,5),W(1,9),W(1,12),GC_10(IVEC),AMP(10)) - CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,12)) + CALL FFV1_2(W(1,4),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,12)) CALL FFV1_1(W(1,3),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,14)) C Amplitude(s) for diagram number 11 CALL FFV1_0(W(1,12),W(1,14),W(1,9),GC_11(IVEC),AMP(11)) @@ -599,7 +599,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,6),W(1,11),W(1,10),GC_11(IVEC),AMP(12)) C Amplitude(s) for diagram number 13 CALL FFV1_0(W(1,13),W(1,2),W(1,10),GC_11(IVEC),AMP(13)) - CALL FFV1_2(W(1,12),W(1,5),GC_11(IVEC),MDL_MT, ZERO,W(1,15)) + CALL FFV1_2(W(1,12),W(1,5),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,15)) C Amplitude(s) for diagram number 14 CALL FFV1_0(W(1,15),W(1,3),W(1,9),GC_11(IVEC),AMP(14)) C Amplitude(s) for diagram number 15 @@ -630,13 +630,13 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,10),W(1,2),W(1,8),GC_11(IVEC),AMP(24)) C Amplitude(s) for diagram number 25 CALL VVV1_0(W(1,5),W(1,8),W(1,12),GC_10(IVEC),AMP(25)) - CALL FFV1_1(W(1,14),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,12)) + CALL FFV1_1(W(1,14),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,12)) C Amplitude(s) for diagram number 26 CALL FFV1_0(W(1,4),W(1,12),W(1,9),GC_11(IVEC),AMP(26)) CALL VVV1P0_1(W(1,1),W(1,9),GC_10(IVEC),ZERO, FK_ZERO,W(1,12)) C Amplitude(s) for diagram number 27 CALL FFV1_0(W(1,4),W(1,14),W(1,12),GC_11(IVEC),AMP(27)) - CALL FFV1_2(W(1,7),W(1,1),GC_11(IVEC),MDL_MT, ZERO,W(1,14)) + CALL FFV1_2(W(1,7),W(1,1),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,14)) C Amplitude(s) for diagram number 28 CALL FFV1_0(W(1,14),W(1,3),W(1,9),GC_11(IVEC),AMP(28)) C Amplitude(s) for diagram number 29 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc index 0ac5734c21..529477ff3e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc @@ -243,24 +243,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - oxzxxx( momenta, cHel[ihel][5], +1, w_fp[5], 5 ); + oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -289,10 +289,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[5], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[5], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[1], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,10 +335,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -931,19 +931,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f index c68a9f5a67..3488dfd2e6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f @@ -44,6 +44,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,C1 DOUBLE PRECISION D2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -131,15 +132,28 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f index 4508401458..efcaed5bd1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f @@ -493,10 +493,10 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL OXXXXX(P(0,6),ZERO,NHEL(6),+1*IC(6),W(1,6)) CALL FFV1P0_3(W(1,1),W(1,5),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) CALL FFV1P0_3(W(1,2),W(1,6),GC_11(IVEC),ZERO, FK_ZERO,W(1,8)) - CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,4),W(1,9),W(1,8),GC_11(IVEC),AMP(1)) - CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,9),W(1,3),W(1,8),GC_11(IVEC),AMP(2)) CALL FFV1P0_3(W(1,4),W(1,3),GC_11(IVEC),ZERO, FK_ZERO,W(1,9)) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc index 5d8331468c..e54a24ea57 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc @@ -249,24 +249,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -309,10 +309,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -341,10 +341,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -357,10 +357,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -937,19 +937,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f index 1044310fc4..0b6e873ee4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f @@ -50,6 +50,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -137,21 +138,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f index f444d0a2c6..3172975ef4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f @@ -505,10 +505,10 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL IXXXXX(P(0,6),ZERO,NHEL(6),-1*IC(6),W(1,6)) CALL FFV1P0_3(W(1,1),W(1,5),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) CALL FFV1P0_3(W(1,6),W(1,2),GC_11(IVEC),ZERO, FK_ZERO,W(1,8)) - CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,4),W(1,9),W(1,8),GC_11(IVEC),AMP(1)) - CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,9),W(1,3),W(1,8),GC_11(IVEC),AMP(2)) CALL FFV1P0_3(W(1,4),W(1,3),GC_11(IVEC),ZERO, FK_ZERO,W(1,9)) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc index d5d7e9e858..8638bbefa2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc @@ -241,24 +241,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 14 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - imzxxx( momenta, cHel[ihel][1], +1, w_fp[1], 1 ); // NB: imzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][1], +1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - oxzxxx( momenta, cHel[ihel][5], +1, w_fp[5], 5 ); + oxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 14 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 14 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -301,10 +301,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 14 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[5], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[5], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -317,10 +317,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 14 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_1( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[1], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -333,12 +333,12 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 14 *** // Wavefunction(s) for diagram number 6 - FFV1P0_3( w_fp[0], w_fp[5], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); - FFV1_1( w_fp[2], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1P0_3( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 14 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -370,7 +370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -381,10 +381,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 14 *** // Wavefunction(s) for diagram number 9 - FFV1_2( w_fp[1], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -397,10 +397,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 14 *** // Wavefunction(s) for diagram number 10 - FFV1_1( w_fp[4], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_1( w_fp[4], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[1], w_fp[10], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[1], w_fp[10], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -413,10 +413,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 14 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[0], w_fp[6], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[10], w_fp[5], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[5], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,10 +429,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 14 *** // Wavefunction(s) for diagram number 12 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[10], w_fp[5], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[5], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -445,10 +445,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 14 *** // Wavefunction(s) for diagram number 13 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[6], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -464,7 +464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1041,13 +1041,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f index 25de63622f..5ed7bc881f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION D2,U2,S2,C2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,17 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) - C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)))) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + D2=PDG2PDF(LPP(IB(2)),1, IB(2),XBK(IB(2)), QSCALE) + U2=PDG2PDF(LPP(IB(2)),2, IB(2),XBK(IB(2)), QSCALE) + S2=PDG2PDF(LPP(IB(2)),3, IB(2),XBK(IB(2)), QSCALE) + C2=PDG2PDF(LPP(IB(2)),4, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f index 0834ca2262..77fe909abc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f @@ -489,10 +489,10 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL OXXXXX(P(0,6),ZERO,NHEL(6),+1*IC(6),W(1,6)) CALL FFV1P0_3(W(1,1),W(1,5),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) CALL FFV1P0_3(W(1,2),W(1,6),GC_11(IVEC),ZERO, FK_ZERO,W(1,8)) - CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,4),W(1,9),W(1,8),GC_11(IVEC),AMP(1)) - CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,9),W(1,3),W(1,8),GC_11(IVEC),AMP(2)) CALL FFV1P0_3(W(1,4),W(1,3),GC_11(IVEC),ZERO, FK_ZERO,W(1,9)) @@ -506,10 +506,10 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,2),W(1,10),W(1,9),GC_11(IVEC),AMP(5)) CALL FFV1P0_3(W(1,1),W(1,6),GC_11(IVEC),ZERO, FK_ZERO,W(1,10)) CALL FFV1P0_3(W(1,2),W(1,5),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) - CALL FFV1_1(W(1,3),W(1,10),GC_11(IVEC),MDL_MT, ZERO,W(1,11)) + CALL FFV1_1(W(1,3),W(1,10),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,11)) C Amplitude(s) for diagram number 6 CALL FFV1_0(W(1,4),W(1,11),W(1,7),GC_11(IVEC),AMP(6)) - CALL FFV1_2(W(1,4),W(1,10),GC_11(IVEC),MDL_MT, ZERO,W(1,11)) + CALL FFV1_2(W(1,4),W(1,10),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,11)) C Amplitude(s) for diagram number 7 CALL FFV1_0(W(1,11),W(1,3),W(1,7),GC_11(IVEC),AMP(7)) C Amplitude(s) for diagram number 8 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc index 58687f7276..c071cc6900 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc @@ -249,24 +249,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -279,10 +279,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -295,10 +295,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -309,10 +309,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -325,10 +325,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -341,10 +341,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -357,10 +357,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -937,19 +937,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f index 300733b34c..a32595dce6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f @@ -50,6 +50,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -137,21 +138,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc index 0622603ad2..2eb6b491fa 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc @@ -241,9 +241,9 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 36 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); @@ -253,12 +253,12 @@ namespace mg5amcCpu vxxxxx( momenta, 0., cHel[ihel][5], +1, w_fp[5], 5 ); - VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + VVV1P0_1( w_fp[4], w_fp[5], COUPs[0], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 36 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[0], w_fp[6], COUPs[1], 0., 0., w_fp[8] ); + FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 36 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 36 *** // Wavefunction(s) for diagram number 4 - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 36 *** // Wavefunction(s) for diagram number 5 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,11 +335,11 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 36 *** // Wavefunction(s) for diagram number 6 - FFV1_1( w_fp[2], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[9] ); - FFV1_2( w_fp[3], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[6] ); + FFV1_1( w_fp[2], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[9] ); + FFV1_2( w_fp[3], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[6] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -350,11 +350,11 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 36 *** // Wavefunction(s) for diagram number 7 - FFV1_1( w_fp[1], w_fp[5], COUPs[1], 0., 0., w_fp[10] ); - FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[1], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[10] ); + FFV1P0_3( w_fp[3], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[0], w_fp[10], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[10], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -365,10 +365,10 @@ namespace mg5amcCpu // *** DIAGRAM 8 OF 36 *** // Wavefunction(s) for diagram number 8 - FFV1_2( w_fp[0], w_fp[5], COUPs[1], 0., 0., w_fp[12] ); + FFV1_2( w_fp[0], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[12] ); // Amplitude(s) for diagram number 8 - FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -379,10 +379,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 36 *** // Wavefunction(s) for diagram number 9 - FFV1_1( w_fp[9], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -396,7 +396,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 10 - VVV1_0( w_fp[5], w_fp[8], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[8], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -407,11 +407,11 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 36 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[3], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); - FFV1_1( w_fp[2], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[3], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[2], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[11], w_fp[13], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -422,10 +422,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 36 *** // Wavefunction(s) for diagram number 12 - FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[11], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[0], w_fp[10], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[10], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -439,7 +439,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[9], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -450,10 +450,10 @@ namespace mg5amcCpu // *** DIAGRAM 14 OF 36 *** // Wavefunction(s) for diagram number 14 - FFV1_2( w_fp[11], w_fp[5], COUPs[1], cIPD[0], cIPD[1], w_fp[14] ); + FFV1_2( w_fp[11], w_fp[5], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[14] ); // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -467,7 +467,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 15 - VVV1_0( w_fp[5], w_fp[8], w_fp[9], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[8], w_fp[9], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 15 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -478,11 +478,11 @@ namespace mg5amcCpu // *** DIAGRAM 16 OF 36 *** // Wavefunction(s) for diagram number 16 - FFV1_1( w_fp[1], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[0], w_fp[9], COUPs[1], 0., 0., w_fp[14] ); + FFV1_1( w_fp[1], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[14] ); // Amplitude(s) for diagram number 16 - FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 16 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -496,7 +496,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 17 - FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[14], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 17 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -510,7 +510,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 18 - FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[9], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 18 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -521,10 +521,10 @@ namespace mg5amcCpu // *** DIAGRAM 19 OF 36 *** // Wavefunction(s) for diagram number 19 - FFV1_1( w_fp[9], w_fp[5], COUPs[1], 0., 0., w_fp[11] ); + FFV1_1( w_fp[9], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 19 - FFV1_0( w_fp[0], w_fp[11], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[11], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 19 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -538,7 +538,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 20 - VVV1_0( w_fp[5], w_fp[7], w_fp[14], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[7], w_fp[14], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 20 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -549,11 +549,11 @@ namespace mg5amcCpu // *** DIAGRAM 21 OF 36 *** // Wavefunction(s) for diagram number 21 - FFV1_2( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[14] ); - FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 0., 0., w_fp[11] ); + FFV1_2( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[14] ); + FFV1P0_3( w_fp[14], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 21 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 21 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -567,7 +567,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 22 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 22 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -581,7 +581,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 23 - FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[14], w_fp[10], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 23 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -592,10 +592,10 @@ namespace mg5amcCpu // *** DIAGRAM 24 OF 36 *** // Wavefunction(s) for diagram number 24 - FFV1_2( w_fp[14], w_fp[5], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[14], w_fp[5], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 24 - FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 24 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -609,7 +609,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 25 - VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 25 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -620,10 +620,10 @@ namespace mg5amcCpu // *** DIAGRAM 26 OF 36 *** // Wavefunction(s) for diagram number 26 - FFV1_1( w_fp[13], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[11] ); + FFV1_1( w_fp[13], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[11] ); // Amplitude(s) for diagram number 26 - FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[11], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 26 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -634,10 +634,10 @@ namespace mg5amcCpu // *** DIAGRAM 27 OF 36 *** // Wavefunction(s) for diagram number 27 - VVV1P0_1( w_fp[4], w_fp[8], COUPs[0], 0., 0., w_fp[11] ); + VVV1P0_1( w_fp[4], w_fp[8], COUPs[0], 1.0, 0., 0., w_fp[11] ); // Amplitude(s) for diagram number 27 - FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[13], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 27 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -648,10 +648,10 @@ namespace mg5amcCpu // *** DIAGRAM 28 OF 36 *** // Wavefunction(s) for diagram number 28 - FFV1_2( w_fp[6], w_fp[4], COUPs[1], cIPD[0], cIPD[1], w_fp[13] ); + FFV1_2( w_fp[6], w_fp[4], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[13] ); // Amplitude(s) for diagram number 28 - FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[13], w_fp[2], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 28 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -665,7 +665,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 29 - FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[2], w_fp[11], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 29 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -676,10 +676,10 @@ namespace mg5amcCpu // *** DIAGRAM 30 OF 36 *** // Wavefunction(s) for diagram number 30 - FFV1_1( w_fp[10], w_fp[4], COUPs[1], 0., 0., w_fp[6] ); + FFV1_1( w_fp[10], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 30 - FFV1_0( w_fp[0], w_fp[6], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[6], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 30 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -690,10 +690,10 @@ namespace mg5amcCpu // *** DIAGRAM 31 OF 36 *** // Wavefunction(s) for diagram number 31 - VVV1P0_1( w_fp[4], w_fp[7], COUPs[0], 0., 0., w_fp[6] ); + VVV1P0_1( w_fp[4], w_fp[7], COUPs[0], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 31 - FFV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[0], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 31 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -704,10 +704,10 @@ namespace mg5amcCpu // *** DIAGRAM 32 OF 36 *** // Wavefunction(s) for diagram number 32 - FFV1_2( w_fp[12], w_fp[4], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[12], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 32 - FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 32 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -721,7 +721,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 33 - FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[12], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 33 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -735,17 +735,17 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 34 - VVVV1_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV1_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[3] += 1. / 2. * amp_sv[0]; jamp_sv[4] += 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; - VVVV3_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV3_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[2] -= 1. / 2. * amp_sv[0]; jamp_sv[5] -= 1. / 2. * amp_sv[0]; jamp_sv[9] += 1. / 2. * amp_sv[0]; jamp_sv[10] += 1. / 2. * amp_sv[0]; - VVVV4_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], &_fp[0] ); + VVVV4_0( w_fp[4], w_fp[5], w_fp[7], w_fp[8], COUPs[2], 1.0, &_fp[0] ); jamp_sv[3] -= 1. / 2. * amp_sv[0]; jamp_sv[4] -= 1. / 2. * amp_sv[0]; jamp_sv[9] += 1. / 2. * amp_sv[0]; @@ -757,7 +757,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 35 - VVV1_0( w_fp[5], w_fp[8], w_fp[6], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[8], w_fp[6], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 35 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -773,7 +773,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 36 - VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[5], w_fp[7], w_fp[11], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 36 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1356,19 +1356,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 72,72 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 72 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f index c9d97c2911..baaee299a2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,21 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc index 5bf4c02337..8682128442 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc @@ -241,24 +241,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 14 *** // Wavefunction(s) for diagram number 1 - ipzxxx( momenta, cHel[ihel][0], +1, w_fp[0], 0 ); // NB: ipzxxx only uses pz + ixxxxx( momenta, 0., cHel[ihel][0], +1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - oxzxxx( momenta, cHel[ihel][4], +1, w_fp[4], 4 ); + oxxxxx( momenta, 0., cHel[ihel][4], +1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[0], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 14 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 14 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -301,10 +301,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 14 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -317,10 +317,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 14 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_1( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -333,12 +333,12 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 14 *** // Wavefunction(s) for diagram number 6 - FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[6] ); - FFV1_1( w_fp[2], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1P0_3( w_fp[0], w_fp[4], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 14 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -370,7 +370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -381,10 +381,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 14 *** // Wavefunction(s) for diagram number 9 - FFV1_2( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -397,10 +397,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 14 *** // Wavefunction(s) for diagram number 10 - FFV1_1( w_fp[1], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_1( w_fp[1], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -413,10 +413,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 14 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[0], w_fp[6], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,10 +429,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 14 *** // Wavefunction(s) for diagram number 12 - FFV1_2( w_fp[0], w_fp[8], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[0], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[10], w_fp[4], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[4], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -445,10 +445,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 14 *** // Wavefunction(s) for diagram number 13 - FFV1_2( w_fp[0], w_fp[7], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[0], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[6], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -464,7 +464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1041,19 +1041,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f index c3b97a2a87..c2206e8d5e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION D1,U1,S1,C1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,21 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) - C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)))) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + D1=PDG2PDF(LPP(IB(1)),1, IB(1),XBK(IB(1)), QSCALE) + U1=PDG2PDF(LPP(IB(1)),2, IB(1),XBK(IB(1)), QSCALE) + S1=PDG2PDF(LPP(IB(1)),3, IB(1),XBK(IB(1)), QSCALE) + C1=PDG2PDF(LPP(IB(1)),4, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f index 4f193a2ce6..3544d80d72 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f @@ -506,10 +506,10 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,6),W(1,10),W(1,9),GC_11(IVEC),AMP(5)) CALL FFV1P0_3(W(1,1),W(1,5),GC_11(IVEC),ZERO, FK_ZERO,W(1,10)) CALL FFV1P0_3(W(1,6),W(1,2),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) - CALL FFV1_1(W(1,3),W(1,10),GC_11(IVEC),MDL_MT, ZERO,W(1,11)) + CALL FFV1_1(W(1,3),W(1,10),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,11)) C Amplitude(s) for diagram number 6 CALL FFV1_0(W(1,4),W(1,11),W(1,7),GC_11(IVEC),AMP(6)) - CALL FFV1_2(W(1,4),W(1,10),GC_11(IVEC),MDL_MT, ZERO,W(1,11)) + CALL FFV1_2(W(1,4),W(1,10),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,11)) C Amplitude(s) for diagram number 7 CALL FFV1_0(W(1,11),W(1,3),W(1,7),GC_11(IVEC),AMP(7)) C Amplitude(s) for diagram number 8 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc index 739b5a1bb2..7d3141cfc4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc @@ -243,24 +243,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 7 *** // Wavefunction(s) for diagram number 1 - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); + ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -273,10 +273,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 7 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -289,10 +289,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 7 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -303,10 +303,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 7 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -319,10 +319,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 7 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[3] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[3], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -335,10 +335,10 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 7 *** // Wavefunction(s) for diagram number 6 - FFV1_2( w_fp[4], w_fp[7], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[0], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[0], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 7 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[4], w_fp[8], COUPs[1], 0., 0., w_fp[3] ); + FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[3] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[3], w_fp[0], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[0], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -931,19 +931,18 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 2; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } // Denominators: spins, colors and identical particles - constexpr int helcolDenominators[1] = { 36,36 }; // assume nprocesses == 1 (#272 and #343) + constexpr int helcolDenominators[1] = { 36 }; // assume nprocesses == 1 (#272 and #343) #ifdef __CUDACC__ // Remember: in CUDA this is a kernel for one event, in c++ this processes n events diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f index adb807b78c..e92ee65fd7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f @@ -44,6 +44,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION CX1,UX1,DX1 DOUBLE PRECISION CX2,SX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -131,21 +132,28 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) - UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) - DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)), QSCALE) + UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)), QSCALE) + DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f index b856b0ec12..61d4e59741 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f @@ -493,10 +493,10 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL IXXXXX(P(0,6),ZERO,NHEL(6),-1*IC(6),W(1,6)) CALL FFV1P0_3(W(1,5),W(1,1),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) CALL FFV1P0_3(W(1,6),W(1,2),GC_11(IVEC),ZERO, FK_ZERO,W(1,8)) - CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,4),W(1,9),W(1,8),GC_11(IVEC),AMP(1)) - CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,9),W(1,3),W(1,8),GC_11(IVEC),AMP(2)) CALL FFV1P0_3(W(1,4),W(1,3),GC_11(IVEC),ZERO, FK_ZERO,W(1,9)) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc index 129dd8551a..6ec302f68b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc @@ -241,24 +241,24 @@ namespace mg5amcCpu // *** DIAGRAM 1 OF 14 *** // Wavefunction(s) for diagram number 1 - opzxxx( momenta, cHel[ihel][0], -1, w_fp[0], 0 ); // NB: opzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][0], -1, w_fp[0], 0 ); - omzxxx( momenta, cHel[ihel][1], -1, w_fp[1], 1 ); // NB: omzxxx only uses pz + oxxxxx( momenta, 0., cHel[ihel][1], -1, w_fp[1], 1 ); oxxxxx( momenta, cIPD[0], cHel[ihel][2], +1, w_fp[2], 2 ); ixxxxx( momenta, cIPD[0], cHel[ihel][3], -1, w_fp[3], 3 ); - ixzxxx( momenta, cHel[ihel][4], -1, w_fp[4], 4 ); + ixxxxx( momenta, 0., cHel[ihel][4], -1, w_fp[4], 4 ); - ixzxxx( momenta, cHel[ihel][5], -1, w_fp[5], 5 ); + ixxxxx( momenta, 0., cHel[ihel][5], -1, w_fp[5], 5 ); - FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); - FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 0., 0., w_fp[7] ); - FFV1_1( w_fp[2], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1P0_3( w_fp[4], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1P0_3( w_fp[5], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[7] ); + FFV1_1( w_fp[2], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[8], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -271,10 +271,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 14 *** // Wavefunction(s) for diagram number 2 - FFV1_2( w_fp[3], w_fp[6], COUPs[1], cIPD[0], cIPD[1], w_fp[8] ); + FFV1_2( w_fp[3], w_fp[6], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[8] ); // Amplitude(s) for diagram number 2 - FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[8], w_fp[2], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -287,10 +287,10 @@ namespace mg5amcCpu // *** DIAGRAM 3 OF 14 *** // Wavefunction(s) for diagram number 3 - FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 0., 0., w_fp[8] ); + FFV1P0_3( w_fp[3], w_fp[2], COUPs[1], 1.0, 0., 0., w_fp[8] ); // Amplitude(s) for diagram number 3 - VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[6], w_fp[7], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 3 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -301,10 +301,10 @@ namespace mg5amcCpu // *** DIAGRAM 4 OF 14 *** // Wavefunction(s) for diagram number 4 - FFV1_2( w_fp[5], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_2( w_fp[5], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 4 - FFV1_0( w_fp[9], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[9], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 4 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -317,10 +317,10 @@ namespace mg5amcCpu // *** DIAGRAM 5 OF 14 *** // Wavefunction(s) for diagram number 5 - FFV1_1( w_fp[1], w_fp[6], COUPs[1], 0., 0., w_fp[9] ); + FFV1_1( w_fp[1], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[9] ); // Amplitude(s) for diagram number 5 - FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[9], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 5 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -333,12 +333,12 @@ namespace mg5amcCpu // *** DIAGRAM 6 OF 14 *** // Wavefunction(s) for diagram number 6 - FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 0., 0., w_fp[9] ); - FFV1P0_3( w_fp[5], w_fp[0], COUPs[1], 0., 0., w_fp[6] ); - FFV1_1( w_fp[2], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1P0_3( w_fp[4], w_fp[1], COUPs[1], 1.0, 0., 0., w_fp[9] ); + FFV1P0_3( w_fp[5], w_fp[0], COUPs[1], 1.0, 0., 0., w_fp[6] ); + FFV1_1( w_fp[2], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 6 - FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[3], w_fp[10], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 6 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -351,10 +351,10 @@ namespace mg5amcCpu // *** DIAGRAM 7 OF 14 *** // Wavefunction(s) for diagram number 7 - FFV1_2( w_fp[3], w_fp[9], COUPs[1], cIPD[0], cIPD[1], w_fp[10] ); + FFV1_2( w_fp[3], w_fp[9], COUPs[1], 1.0, cIPD[0], cIPD[1], w_fp[10] ); // Amplitude(s) for diagram number 7 - FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[2], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 7 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -370,7 +370,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 8 - VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], &_fp[0] ); + VVV1_0( w_fp[9], w_fp[6], w_fp[8], COUPs[0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 8 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -381,10 +381,10 @@ namespace mg5amcCpu // *** DIAGRAM 9 OF 14 *** // Wavefunction(s) for diagram number 9 - FFV1_2( w_fp[5], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[5], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 9 - FFV1_0( w_fp[10], w_fp[0], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[0], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 9 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -397,10 +397,10 @@ namespace mg5amcCpu // *** DIAGRAM 10 OF 14 *** // Wavefunction(s) for diagram number 10 - FFV1_1( w_fp[0], w_fp[9], COUPs[1], 0., 0., w_fp[10] ); + FFV1_1( w_fp[0], w_fp[9], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 10 - FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[5], w_fp[10], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 10 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -413,10 +413,10 @@ namespace mg5amcCpu // *** DIAGRAM 11 OF 14 *** // Wavefunction(s) for diagram number 11 - FFV1_2( w_fp[4], w_fp[6], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[4], w_fp[6], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 11 - FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 11 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -429,10 +429,10 @@ namespace mg5amcCpu // *** DIAGRAM 12 OF 14 *** // Wavefunction(s) for diagram number 12 - FFV1_2( w_fp[4], w_fp[8], COUPs[1], 0., 0., w_fp[10] ); + FFV1_2( w_fp[4], w_fp[8], COUPs[1], 1.0, 0., 0., w_fp[10] ); // Amplitude(s) for diagram number 12 - FFV1_0( w_fp[10], w_fp[1], w_fp[6], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[1], w_fp[6], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 12 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -445,10 +445,10 @@ namespace mg5amcCpu // *** DIAGRAM 13 OF 14 *** // Wavefunction(s) for diagram number 13 - FFV1_2( w_fp[4], w_fp[7], COUPs[1], 0., 0., w_fp[6] ); + FFV1_2( w_fp[4], w_fp[7], COUPs[1], 1.0, 0., 0., w_fp[6] ); // Amplitude(s) for diagram number 13 - FFV1_0( w_fp[6], w_fp[0], w_fp[8], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[6], w_fp[0], w_fp[8], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 13 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -464,7 +464,7 @@ namespace mg5amcCpu // (none) // Amplitude(s) for diagram number 14 - FFV1_0( w_fp[10], w_fp[0], w_fp[7], COUPs[1], &_fp[0] ); + FFV1_0( w_fp[10], w_fp[0], w_fp[7], COUPs[1], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 14 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -1041,13 +1041,12 @@ namespace mg5amcCpu { mgDebugInitialise(); - // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360 and #396) + // SANITY CHECKS for cudacpp code generation (see issues #272 and #343 and PRs #619, #626, #360, #396 and #754) // These variable are not used anywhere else in the code and their scope is limited to this sanity check { - // nprocesses>1 was last observed for "mirror processes" in uux_ttx in the 270 branch (see issue #343 and PRs #360 and #396) + // nprocesses == 2 may happen for "mirror processes" such as P0_uux_ttx within pp_tt012j (see PR #754) constexpr int nprocesses = 1; - static_assert( nprocesses == 1, "Assume nprocesses == 1" ); - // process_id corresponds to the index of DSIG1 Fortran functions (must be 1 because cudacpp is unable to handle DSIG2) + static_assert( nprocesses == 1 || nprocesses == 2, "Assume nprocesses == 1 or 2" ); constexpr int process_id = 1; // code generation source: madevent + cudacpp exporter static_assert( process_id == 1, "Assume process_id == 1" ); } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f index 68d329862c..cad7f4197d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f @@ -42,6 +42,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C LOCAL VARIABLES C INTEGER I,ITYPE,LP,IPROC + DOUBLE PRECISION QSCALE DOUBLE PRECISION CX1,SX1,UX1,DX1 DOUBLE PRECISION CX2,SX2,UX2,DX2 DOUBLE PRECISION XPQ(-7:7),PD(0:MAXPROC) @@ -129,25 +130,30 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) IF (ABS(LPP(IB(1))).GE.1) THEN !LP=SIGN(1,LPP(IB(1))) - CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) - SX1=PDG2PDF(LPP(IB(1)),-3, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) - UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) - DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)),DSQRT(Q2FACT(IB(1)) - $ )) + IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN + QSCALE=0D0 + DO I=3,NEXTERNAL + QSCALE=QSCALE+DSQRT(MAX(0D0,(PP(0,I)+PP(3,I))*(PP(0,I) + $ -PP(3,I)))) + ENDDO + QSCALE=QSCALE/2D0 + ELSE + QSCALE=DSQRT(Q2FACT(IB(1))) + ENDIF + CX1=PDG2PDF(LPP(IB(1)),-4, IB(1),XBK(IB(1)), QSCALE) + SX1=PDG2PDF(LPP(IB(1)),-3, IB(1),XBK(IB(1)), QSCALE) + UX1=PDG2PDF(LPP(IB(1)),-2, IB(1),XBK(IB(1)), QSCALE) + DX1=PDG2PDF(LPP(IB(1)),-1, IB(1),XBK(IB(1)), QSCALE) ENDIF IF (ABS(LPP(IB(2))).GE.1) THEN !LP=SIGN(1,LPP(IB(2))) - CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) - DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)),DSQRT(Q2FACT(IB(2)) - $ )) + IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN + QSCALE=DSQRT(Q2FACT(IB(2))) + ENDIF + CX2=PDG2PDF(LPP(IB(2)),-4, IB(2),XBK(IB(2)), QSCALE) + SX2=PDG2PDF(LPP(IB(2)),-3, IB(2),XBK(IB(2)), QSCALE) + UX2=PDG2PDF(LPP(IB(2)),-2, IB(2),XBK(IB(2)), QSCALE) + DX2=PDG2PDF(LPP(IB(2)),-1, IB(2),XBK(IB(2)), QSCALE) ENDIF PD(0) = 0D0 IPROC = 0 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc deleted file mode 120000 index 06e29b46f9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc +++ /dev/null @@ -1 +0,0 @@ -../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f index b71e9c09c0..1b50f51264 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f @@ -489,10 +489,10 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL IXXXXX(P(0,6),ZERO,NHEL(6),-1*IC(6),W(1,6)) CALL FFV1P0_3(W(1,5),W(1,1),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) CALL FFV1P0_3(W(1,6),W(1,2),GC_11(IVEC),ZERO, FK_ZERO,W(1,8)) - CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_1(W(1,3),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 1 CALL FFV1_0(W(1,4),W(1,9),W(1,8),GC_11(IVEC),AMP(1)) - CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, ZERO,W(1,9)) + CALL FFV1_2(W(1,4),W(1,7),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,9)) C Amplitude(s) for diagram number 2 CALL FFV1_0(W(1,9),W(1,3),W(1,8),GC_11(IVEC),AMP(2)) CALL FFV1P0_3(W(1,4),W(1,3),GC_11(IVEC),ZERO, FK_ZERO,W(1,9)) @@ -506,10 +506,10 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) CALL FFV1_0(W(1,6),W(1,10),W(1,9),GC_11(IVEC),AMP(5)) CALL FFV1P0_3(W(1,5),W(1,2),GC_11(IVEC),ZERO, FK_ZERO,W(1,10)) CALL FFV1P0_3(W(1,6),W(1,1),GC_11(IVEC),ZERO, FK_ZERO,W(1,7)) - CALL FFV1_1(W(1,3),W(1,10),GC_11(IVEC),MDL_MT, ZERO,W(1,11)) + CALL FFV1_1(W(1,3),W(1,10),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,11)) C Amplitude(s) for diagram number 6 CALL FFV1_0(W(1,4),W(1,11),W(1,7),GC_11(IVEC),AMP(6)) - CALL FFV1_2(W(1,4),W(1,10),GC_11(IVEC),MDL_MT, ZERO,W(1,11)) + CALL FFV1_2(W(1,4),W(1,10),GC_11(IVEC),MDL_MT, FK_MDL_WT,W(1,11)) C Amplitude(s) for diagram number 7 CALL FFV1_0(W(1,11),W(1,3),W(1,7),GC_11(IVEC),AMP(7)) C Amplitude(s) for diagram number 8 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc deleted file mode 120000 index 645dc78215..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc +++ /dev/null @@ -1 +0,0 @@ -../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index 43cee0977e..c6c1826de7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -27,6 +27,8 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') +###include ../../Source/make_opts # AV remove (added by OM) + #------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA @@ -220,7 +222,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -override OMPFLAGS = # disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -555,6 +558,7 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile index 74db44d848..74b19033a8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile @@ -51,7 +51,7 @@ CUDACPP_MAKEFILE=cudacpp.mk CUDACPP_MAKEENV:=$(shell echo '$(.VARIABLES)' | tr " " "\n" | egrep "(USEBUILDDIR|AVX|FPTYPE|HELINL|HRDCOD)") ###$(info CUDACPP_MAKEENV=$(CUDACPP_MAKEENV)) ###$(info $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))")) -CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn |& awk '/Building/{print $$3}' | sed s/BUILDDIR=//) +CUDACPP_BUILDDIR:=$(shell $(MAKE) $(foreach v,$(CUDACPP_MAKEENV),$(v)="$($(v))") -f $(CUDACPP_MAKEFILE) -pn 2>&1 | awk '/Building/{print $$3}' | sed s/BUILDDIR=//) ifeq ($(CUDACPP_BUILDDIR),) $(error CUDACPP_BUILDDIR='$(CUDACPP_BUILDDIR)' should not be empty!) else @@ -89,7 +89,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 +LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/proc_characteristics index 02b80b11e8..8bc9226ddb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/proc_characteristics +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/proc_characteristics @@ -8,7 +8,7 @@ ninitial = 2 grouped_matrix = True has_loops = False - bias_module = None + bias_module = dummy max_n_matched_jets = 2 colored_pdgs = [1, 2, 3, 4, 5, 6, 21] complex_mass_scheme = False diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/generate_events b/epochX/cudacpp/pp_tt012j.mad/bin/generate_events index 107313b25d..5577cc66a0 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/generate_events +++ b/epochX/cudacpp/pp_tt012j.mad/bin/generate_events @@ -46,7 +46,7 @@ if __debug__ and (not os.path.exists(pjoin(root_path,'../..', 'bin','create_rele sys.path.append(pjoin(root_path,'bin','internal')) import madevent_interface as ME - +import misc as misc import logging import logging.config @@ -160,17 +160,31 @@ if '__main__' == __name__: # Check that python version is valid set_configuration() - argument = sys.argv + argument = sys.argv + + # check for plugin customization of the launch command + launch_interface = ME.MadEventCmdShell + if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(root_path, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + launch_interface = launch_plugin.MEINTERFACE + + + try: if '-h' in argument or '--help' in argument: - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.exec_cmd('help generate_events') sys.exit() elif len(argument) > 1 and argument[1] in ['0', '1', '2']: argument = treat_old_argument(argument) with ME.MadEventCmdShell.RunWebHandling(root_path, ): - launch = ME.MadEventCmdShell(me_dir=root_path, force_run=True) + launch = launch_interface(me_dir=root_path, force_run=True) launch.run_cmd('generate_events %s' % ' '.join(argument[1:])) launch.run_cmd('quit') except ME.MadEventAlreadyRunning as message: diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py index 7624b9f557..e9f421ae5f 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py @@ -1002,13 +1002,14 @@ def __init__(self, finput=None, **opt): self.allowed_value = {} self.default_setup() + self.plugin_input(finput) # if input is define read that input if isinstance(finput, (file, str, StringIO.StringIO)): self.read(finput, **opt) - self.plugin_input(finput) + def plugin_input(self, finput=None): diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model.pkl b/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/py3_model.pkl index afc2ca4e273b368050537e3f722b85c825bbf510..27a1caae3c115073669b90622e9351ab04166d39 100644 GIT binary patch delta 54 zcmX?lj_Le4rVZZ9G>RD*81z#TOA_@H%Mx=Ei;FY$-2+0642+ERa}!h2ixLYmOwtQV KBPM4qy8{3ztQ2Sf delta 44 zcmX?qj_K$*rVZZ9 t t~ @0 add process p p > t t~ j @1 diff --git a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h index 9cea8bcbe7..9b946c21e1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h @@ -863,6 +863,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -873,6 +874,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -886,6 +888,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -896,6 +899,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) ALWAYS_INLINE; @@ -908,6 +912,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) ALWAYS_INLINE; @@ -920,6 +925,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) ALWAYS_INLINE; @@ -934,6 +940,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -945,6 +952,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -959,6 +967,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -970,6 +979,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -984,6 +994,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) ALWAYS_INLINE; //-------------------------------------------------------------------------- @@ -995,6 +1006,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) ALWAYS_INLINE; @@ -1008,6 +1020,7 @@ namespace mg5amcCpu const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1042,6 +1055,7 @@ namespace mg5amcCpu VVV1P0_1( const fptype allV2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1080,6 +1094,7 @@ namespace mg5amcCpu const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1103,6 +1118,7 @@ namespace mg5amcCpu FFV1_1( const fptype allF2[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allF1[] ) @@ -1134,6 +1150,7 @@ namespace mg5amcCpu FFV1_2( const fptype allF1[], const fptype allV3[], const fptype allCOUP[], + const double Ccoeff, const fptype M2, const fptype W2, fptype allF2[] ) @@ -1165,6 +1182,7 @@ namespace mg5amcCpu FFV1P0_3( const fptype allF1[], const fptype allF2[], const fptype allCOUP[], + const double Ccoeff, const fptype M3, const fptype W3, fptype allV3[] ) @@ -1197,6 +1215,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1225,6 +1244,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1260,6 +1280,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1288,6 +1309,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) @@ -1323,6 +1345,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, fptype allvertexes[] ) { mgDebug( 0, __FUNCTION__ ); @@ -1351,6 +1374,7 @@ namespace mg5amcCpu const fptype allV3[], const fptype allV4[], const fptype allCOUP[], + const double Ccoeff, const fptype M1, const fptype W1, fptype allV1[] ) diff --git a/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt b/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt index dd90c94acf..d596b33ae7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt +++ b/epochX/cudacpp/pp_tt012j.mad/test/ref/dump_CPUTest.Sigma_sm_gux_ttxux.txt @@ -4,7 +4,7 @@ Event 0 Batch 0 2 2.647483690509011e+02 7.527657265342380e+01 -2.528976247704283e+02 -2.163164141117315e+01 3 6.252973211776936e+02 -5.721080498766041e+02 -1.578766990348905e+01 2.518727230515587e+02 4 6.099543097714056e+02 4.968314772231802e+02 2.686852946739174e+02 -2.302410816403857e+02 - ME 3.498510462248670e-04 + ME 6.254927412618323e-05 Event 1 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -12,7 +12,7 @@ Event 1 Batch 0 2 2.542827954151951e+02 1.482213322085297e+02 -1.988618298139058e+02 -5.607271498295615e+01 3 6.883656117507998e+02 1.265478873489434e+02 5.602777828023585e+02 3.793700749224233e+02 4 5.573515928340058e+02 -2.747692195574731e+02 -3.614159529884527e+02 -3.232973599394667e+02 - ME 7.257243108248426e-04 + ME 8.120933129385430e-05 Event 2 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -20,7 +20,7 @@ Event 2 Batch 0 2 4.301460683791099e+02 -3.656995432079240e+02 -2.257802895903974e+02 -1.768459985405173e+01 3 5.058528987551350e+02 2.755467101243707e+02 -2.034821274188550e+02 3.722313656043856e+02 4 5.640010328657550e+02 9.015283308355326e+01 4.292624170092524e+02 -3.545467657503340e+02 - ME 8.130044127338102e-04 + ME 1.104115154253218e-04 Event 3 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -28,7 +28,7 @@ Event 3 Batch 0 2 6.758793342627306e+02 1.455349847705337e+02 4.360940220328824e+02 -4.954335945799966e+02 3 3.008019460079605e+02 -1.607139834787174e+02 2.732727402256846e+01 2.527964523704278e+02 4 5.233187197293092e+02 1.517899870818368e+01 -4.634212960554508e+02 2.426371422095687e+02 - ME 7.753277710143621e-05 + ME 4.288074098478053e-05 Event 4 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -36,7 +36,7 @@ Event 4 Batch 0 2 3.540811678028369e+02 5.414642718170588e+01 -3.497885023717100e+02 -9.467915537920108e+00 3 7.415000547748695e+02 1.453779348794601e+00 7.277337852109665e+02 1.422102514562805e+02 4 4.044187774222938e+02 -5.560020653050046e+01 -3.779452828392566e+02 -1.327423359183605e+02 - ME 2.015528729476554e-04 + ME 1.304731284254719e-05 Event 5 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -44,7 +44,7 @@ Event 5 Batch 0 2 4.747467875786874e+02 2.462969907607520e+02 3.713870243947702e+02 1.636886763636381e+02 3 3.438196236093862e+02 -2.056491112573935e+02 2.636029701703988e+02 8.021128807897365e+01 4 6.814335888119255e+02 -4.064787950335840e+01 -6.349899945651691e+02 -2.438999644426124e+02 - ME 6.140777519977192e-04 + ME 1.932390649640220e-04 Event 6 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -52,7 +52,7 @@ Event 6 Batch 0 2 5.623951200922340e+02 4.644673798421034e+02 3.089047820108764e+02 -7.166700647426805e+01 3 2.268243199894467e+02 1.761899852590787e+02 -7.114332369064562e+01 -1.238748914321566e+02 4 7.107805599183188e+02 -6.406573651011822e+02 -2.377614583202307e+02 1.955418979064247e+02 - ME 8.375373201653861e-04 + ME 1.929702539767979e-04 Event 7 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -60,7 +60,7 @@ Event 7 Batch 0 2 4.922243378496302e+02 2.878585072835456e+02 -1.441537488072182e+02 -3.723465794939189e+02 3 2.873990637609374e+02 -5.400981623596619e+01 -8.913204919452846e+01 -2.678369642286231e+02 4 7.203765983894325e+02 -2.338486910475794e+02 2.332857980017467e+02 6.401835437225419e+02 - ME 2.045598717079573e-03 + ME 6.280412585349807e-04 Event 8 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -68,7 +68,7 @@ Event 8 Batch 0 2 3.353309706037128e+02 -7.529439061162444e+01 -4.917829145606096e+01 -3.230466069128648e+02 3 7.169322705461503e+02 -1.597426278178964e+02 -1.460012137440150e+01 6.987567601563110e+02 4 4.477367588501368e+02 2.350370184295208e+02 6.377841283046249e+01 -3.757101532434461e+02 - ME 5.176104304710922e-03 + ME 1.424871539111113e-03 Event 9 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -76,7 +76,7 @@ Event 9 Batch 0 2 2.557626120875720e+02 2.000882245504951e+02 -5.276260741790070e+01 -1.503174088272977e+02 3 7.044202058180884e+02 -6.969679478438196e+02 -1.019614549623775e+02 6.882422911146106e+00 4 5.398171820943397e+02 4.968797232933244e+02 1.547240623802783e+02 1.434349859161515e+02 - ME 6.498215193902510e-05 + ME 1.126010180174107e-05 Event 10 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -84,7 +84,7 @@ Event 10 Batch 0 2 3.466796552973448e+02 1.172124288883391e+02 -1.804077050554743e+02 2.718475489457261e+02 3 5.174471655316495e+02 -1.610456139025784e+02 -4.497410659869822e+02 -1.988689340353916e+02 4 6.358731791710053e+02 4.383318501423926e+01 6.301487710424565e+02 -7.297861491033444e+01 - ME 2.111165581639245e-04 + ME 8.292383053707579e-05 Event 11 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -92,7 +92,7 @@ Event 11 Batch 0 2 5.730783827248506e+02 -3.059484875398849e+01 3.466457017175528e+02 -4.553235612803233e+02 3 4.410994673708892e+02 -3.026218886155176e+02 -1.990641070399019e+01 3.203005892260318e+02 4 4.858221499042607e+02 3.332167373695061e+02 -3.267392910135624e+02 1.350229720542913e+02 - ME 5.129802099928076e-05 + ME 2.195851954305949e-05 Event 12 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -100,7 +100,7 @@ Event 12 Batch 0 2 2.275003875859171e+02 -1.247450244086003e+02 1.654605359856639e+02 9.390376067217456e+01 3 6.138170466352969e+02 3.363961838598331e+02 -2.139358085817026e+01 5.129827374509639e+02 4 6.586825657787861e+02 -2.116511594512328e+02 -1.440669551274935e+02 -6.068864981231385e+02 - ME 5.249882090061186e-02 + ME 3.843244876666358e-03 Event 13 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -108,7 +108,7 @@ Event 13 Batch 0 2 2.867684047377951e+02 7.055192702127012e+01 -2.028354730671929e+02 1.900429278217245e+02 3 6.990707050557395e+02 -5.605742285334717e+02 2.413419117565430e+02 -3.408965629057132e+02 4 5.141608902064654e+02 4.900223015122016e+02 -3.850643868935023e+01 1.508536350839886e+02 - ME 6.422048006176975e-05 + ME 1.780264803426774e-05 Event 14 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -116,7 +116,7 @@ Event 14 Batch 0 2 3.551549262960330e+02 1.090410064132905e+02 3.205839746298526e+02 1.071027348074892e+02 3 5.276349775014137e+02 3.895763694332612e+02 -2.529209653865598e+02 2.503196099590423e+02 4 6.172100962025531e+02 -4.986173758465519e+02 -6.766300924329285e+01 -3.574223447665315e+02 - ME 7.422587439250419e-04 + ME 1.172793340377339e-04 Event 15 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -124,7 +124,7 @@ Event 15 Batch 0 2 5.846731991828425e+02 7.106081559720657e+01 3.900476102503054e+02 4.297161529048979e+02 3 2.829885923647302e+02 -2.767806781033229e+02 5.223342094943639e+01 -2.732525156618249e+01 4 6.323382084524278e+02 2.057198625061163e+02 -4.422810311997417e+02 -4.023909013387152e+02 - ME 1.255922738422332e-03 + ME 2.768931482482754e-04 Event 16 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -132,7 +132,7 @@ Event 16 Batch 0 2 7.471577506095512e+02 1.666056475215676e+02 -5.784682380714994e+02 -4.425627187781379e+02 3 6.589296733908160e+02 -1.235441202519038e+02 5.251239647671507e+02 3.783780998595698e+02 4 9.391257599963087e+01 -4.306152726966400e+01 5.334427330434855e+01 6.418461891856485e+01 - ME 5.526726502577864e-05 + ME 3.619360847906487e-05 Event 17 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -140,7 +140,7 @@ Event 17 Batch 0 2 3.567490993131759e+02 3.856364495163717e+01 -1.708845728849435e+02 -3.107752047682324e+02 3 6.453207560475681e+02 4.468356462873772e+02 2.282834847349605e+02 4.057874246326636e+02 4 4.979301446392561e+02 -4.853992912390142e+02 -5.739891185001719e+01 -9.501221986443127e+01 - ME 1.327369996555111e-04 + ME 3.400819398697452e-05 Event 18 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -148,7 +148,7 @@ Event 18 Batch 0 2 4.856701782481425e+02 2.509110753153842e+02 -3.498523763974107e+02 -2.247720379690150e+02 3 3.014847498930008e+02 -1.059425909901355e+02 -2.435847754696140e+02 -1.426032222348426e+02 4 7.128450718588564e+02 -1.449684843252488e+02 5.934371518670247e+02 3.673752602038576e+02 - ME 1.018512933050835e-03 + ME 1.704840743724005e-04 Event 19 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -156,7 +156,7 @@ Event 19 Batch 0 2 5.848213503304410e+02 -3.141116763848333e+02 -1.950442390378232e+02 4.531088295091878e+02 3 5.769300027107226e+02 5.020221748138873e+02 2.252239828724832e+02 -1.734823378963534e+02 4 3.382486469588368e+02 -1.879104984290540e+02 -3.017974383465995e+01 -2.796264916128346e+02 - ME 4.267017342507976e-03 + ME 1.566312636528492e-04 Event 20 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -164,7 +164,7 @@ Event 20 Batch 0 2 5.550938429889906e+02 -4.478597170519693e+02 -1.958065402362923e+02 -2.630791652090858e+02 3 5.585686897587655e+02 3.351111310173187e+02 -1.360174455686903e+02 4.256744830831253e+02 4 3.863374672522434e+02 1.127485860346507e+02 3.318239858049826e+02 -1.625953178740396e+02 - ME 2.768271682113988e-04 + ME 4.443882992804106e-05 Event 21 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -172,7 +172,7 @@ Event 21 Batch 0 2 6.296556563991993e+02 -3.477135312394776e+02 -1.376147989324512e+02 -5.065804111325866e+02 3 3.137568007204202e+02 1.080474571851863e+02 -2.382188236683311e+02 1.732653140250679e+02 4 5.565875428803801e+02 2.396660740542913e+02 3.758336226007823e+02 3.333150971075189e+02 - ME 5.519034669639832e-05 + ME 2.195742323347977e-05 Event 22 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -180,7 +180,7 @@ Event 22 Batch 0 2 5.583338925767162e+02 2.471586228668332e+02 -1.597599499756147e+02 -4.744745610949311e+02 3 5.378723432497920e+02 9.149532098241385e+00 4.314513680009925e+02 3.210493120152684e+02 4 4.037937641734921e+02 -2.563081549650745e+02 -2.716914180253778e+02 1.534252490796627e+02 - ME 3.705224437539572e-05 + ME 1.393143104564022e-05 Event 23 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -188,7 +188,7 @@ Event 23 Batch 0 2 6.057340011976822e+02 6.848115528115159e+01 -5.207204912425279e+02 -3.017849923015605e+02 3 6.884459352783615e+02 -2.949639632364767e+01 6.680977958792448e+02 1.635026102131439e+02 4 2.058200635239559e+02 -3.898475895750391e+01 -1.473773046367171e+02 1.382823820884168e+02 - ME 2.946248744974782e-05 + ME 1.074117284514867e-05 Event 24 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -196,7 +196,7 @@ Event 24 Batch 0 2 4.702316790647315e+02 -1.210575128627593e+02 4.313728504035306e+02 -1.427598490831810e+02 3 7.180482366151732e+02 1.040047389253588e+02 -7.104588047260974e+02 4.956931953573291e+00 4 3.117200843200960e+02 1.705277393740069e+01 2.790859543225674e+02 1.378029171296075e+02 - ME 3.146557994448562e-05 + ME 5.213387311993420e-06 Event 25 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -204,7 +204,7 @@ Event 25 Batch 0 2 6.261365010744016e+02 -5.354018140499276e+02 -2.095559720530078e+02 2.479477970595020e+02 3 5.483958991041942e+02 5.199465180092641e+02 -9.843995208133505e+01 -1.438862620216537e+02 4 3.254675998214045e+02 1.545529604066345e+01 3.079959241343431e+02 -1.040615350378483e+02 - ME 1.657640191611339e-04 + ME 1.695323153210731e-05 Event 26 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -212,7 +212,7 @@ Event 26 Batch 0 2 4.635816356180677e+02 1.904702824079147e+02 -2.351549941335565e+02 -3.511853259118595e+02 3 3.686385821486527e+02 -2.712527815845713e+02 -6.015354190959191e+01 -2.422764621809819e+02 4 6.677797822332798e+02 8.078249917665664e+01 2.953085360431485e+02 5.934617880928415e+02 - ME 3.250975879010065e-04 + ME 1.052251904460155e-04 Event 27 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -220,7 +220,7 @@ Event 27 Batch 0 2 2.851713673150520e+02 1.387976072955998e+02 1.520424011317634e+02 -1.973348453858079e+02 3 6.747356481771329e+02 2.426633222154767e+02 -4.300238522839811e+02 4.598501858640580e+02 4 5.400929845078149e+02 -3.814609295110765e+02 2.779814511522176e+02 -2.625153404782502e+02 - ME 4.155279516527712e-04 + ME 7.957109124083736e-05 Event 28 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -228,7 +228,7 @@ Event 28 Batch 0 2 1.977804200471008e+02 -1.803202618401224e+02 -8.082809162516925e+01 -8.277519444290659e+00 3 7.197523834069627e+02 3.152541965091956e+02 6.467033971658861e+02 -2.080867841663842e+01 4 5.824671965459364e+02 -1.349339346690732e+02 -5.658753055407169e+02 2.908619786092899e+01 - ME 1.172809031809504e-04 + ME 1.748013159755222e-05 Event 29 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -236,7 +236,7 @@ Event 29 Batch 0 2 6.123364628491765e+02 -3.746492624245139e+02 3.785128791537567e+02 -3.021950929683376e+02 3 4.056577755659300e+02 1.796205570313495e+00 -8.781658530568643e+01 3.960344074293251e+02 4 4.820057615848937e+02 3.728530568542006e+02 -2.906962938480702e+02 -9.383931446098750e+01 - ME 5.496242925842306e-04 + ME 3.085570985177973e-04 Event 30 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -244,7 +244,7 @@ Event 30 Batch 0 2 7.349194950356053e+02 7.241679607953656e+02 1.425637322816703e+01 1.244354634469208e+02 3 7.321421454671275e+02 -7.253765693071590e+02 -2.895970851972107e+01 -9.498573130653318e+01 4 3.293835949726734e+01 1.208608511793152e+00 1.470333529155409e+01 -2.944973214038765e+01 - ME 5.147061682527938e-02 + ME 3.267107835672361e-04 Event 31 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -252,7 +252,7 @@ Event 31 Batch 0 2 1.718338270585457e+02 -1.344914872264095e+02 -1.021614404532311e+02 3.165350011824393e+01 3 6.313115253715935e+02 -2.849940593920691e+02 -7.916450257599642e+01 -5.577325610990745e+02 4 6.968546475698608e+02 4.194855466184786e+02 1.813259430292275e+02 5.260790609808306e+02 - ME 4.645345268703414e-04 + ME 1.685680846028125e-04 Event 32 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -260,7 +260,7 @@ Event 32 Batch 0 2 7.235176898898732e+02 -4.762113006241282e+02 -2.880822916693121e+01 5.439400065022983e+02 3 6.603902828461299e+02 4.672103814637360e+02 1.031050210016798e+02 -4.551913221650266e+02 4 1.160920272639969e+02 9.000919160392018e+00 -7.429679183474862e+01 -8.874868433727177e+01 - ME 4.476006843186700e-03 + ME 2.173072900368875e-04 Event 33 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -268,7 +268,7 @@ Event 33 Batch 0 2 4.786737271642286e+02 2.009638309376703e+02 4.090184839380260e+02 1.464443769121513e+02 3 3.795793219608408e+02 -6.057523839522271e+00 -8.244277697544294e+01 3.704685635647950e+02 4 6.417469508749314e+02 -1.949063070981495e+02 -3.265757069625828e+02 -5.169129404769461e+02 - ME 1.351709676586880e-02 + ME 3.322437827682699e-03 Event 34 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -276,7 +276,7 @@ Event 34 Batch 0 2 6.621583515140109e+02 -5.051303032557109e+02 -1.429543729176959e+02 4.035605363216953e+02 3 3.008522892707525e+02 8.677543723835062e+01 2.726747894692539e+02 -9.290092916351111e+01 4 5.369893592152367e+02 4.183548660173603e+02 -1.297204165515579e+02 -3.106596071581844e+02 - ME 6.460854093057828e-04 + ME 9.294666462955388e-05 Event 35 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -284,7 +284,7 @@ Event 35 Batch 0 2 6.158114977149372e+02 2.502256147979830e+02 4.233348779616202e+00 5.626659943296695e+02 3 1.476397433483021e+02 -1.670550278282843e+01 -6.055370982200890e+01 1.336101351676488e+02 4 7.365487589367605e+02 -2.335201120151546e+02 5.632036104239269e+01 -6.962761294973184e+02 - ME 2.101231899117793e+00 + ME 5.450893768264864e-01 Event 36 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -292,7 +292,7 @@ Event 36 Batch 0 2 7.182456511154913e+02 -7.463771462544163e+01 -6.667773110518942e+02 2.563475070450518e+02 3 4.860008755751825e+02 -7.840660561780868e+01 4.141081959217036e+02 -2.419992919944378e+02 4 2.957534733093268e+02 1.530443202432501e+02 2.526691151301903e+02 -1.434821505061448e+01 - ME 9.644531209480271e-05 + ME 1.793136635525090e-05 Event 37 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -300,7 +300,7 @@ Event 37 Batch 0 2 5.672182018814327e+02 -2.031706828392718e+00 -5.267408190306547e+02 2.104197478372323e+02 3 4.664069288608281e+02 3.712365792892206e+02 2.604523782658950e+02 -1.090109358856581e+02 4 4.663748692577387e+02 -3.692048724608279e+02 2.662884407647597e+02 -1.014088119515743e+02 - ME 1.216876552012178e-04 + ME 1.885829354904198e-05 Event 38 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -308,7 +308,7 @@ Event 38 Batch 0 2 5.068057345787187e+02 4.883513201966852e+02 -7.570036138649985e+01 -1.124032737511800e+02 3 3.871140338254017e+02 -1.153787089711745e+02 -3.599073977747533e+02 -8.373585688177315e+01 4 6.060802315958797e+02 -3.729726112255107e+02 4.356077591612532e+02 1.961391306329531e+02 - ME 1.006736553113524e-04 + ME 2.004468492837133e-05 Event 39 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -316,7 +316,7 @@ Event 39 Batch 0 2 4.960337392567769e+02 -3.669089247616476e+02 2.651961920161227e+02 -2.027271347192069e+02 3 2.837821967046824e+02 -2.822567153069604e+02 -2.935613327724534e+01 -1.303560381865560e+00 4 7.201840640385411e+02 6.491656400686079e+02 -2.358400587388775e+02 2.040306951010725e+02 - ME 1.372807525012575e-03 + ME 2.738639406673165e-04 Event 40 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -324,7 +324,7 @@ Event 40 Batch 0 2 3.080730228651936e+02 -3.065830270999447e+02 -2.484308296331460e+01 1.728167064871203e+01 3 6.842346640746094e+02 4.630487823766367e+02 8.554554725666550e+01 -4.964321303112498e+02 4 5.076923130601962e+02 -1.564657552766919e+02 -6.070246429335075e+01 4.791504596625378e+02 - ME 4.192363154074847e-05 + ME 4.316353181637933e-05 Event 41 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -332,7 +332,7 @@ Event 41 Batch 0 2 1.602650851118221e+02 -1.258781096038287e+02 -9.817642232798531e+01 1.417706342452912e+01 3 7.146392966623014e+02 6.799675591776853e+02 -1.019163870176435e+02 1.948499239342933e+02 4 6.250956182258764e+02 -5.540894495738563e+02 2.000928093456288e+02 -2.090269873588226e+02 - ME 4.523507186168379e-04 + ME 6.118266190948034e-05 Event 42 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -340,7 +340,7 @@ Event 42 Batch 0 2 1.687893235969910e+02 1.289401357197518e+02 4.788693514682045e+01 9.783209393213438e+01 3 7.042017295435162e+02 -1.022058447296739e+02 -6.640064324330017e+02 -2.110675220936915e+02 4 6.270089468594927e+02 -2.673429099007782e+01 6.161194972861812e+02 1.132354281615572e+02 - ME 1.686356189272381e-04 + ME 4.091574289077424e-05 Event 43 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -348,7 +348,7 @@ Event 43 Batch 0 2 4.729783670130408e+02 -7.983817933050123e+01 9.052957805204315e+01 4.573169538528310e+02 3 5.638402597824536e+02 4.785250044669658e+02 7.435095949863268e+01 -2.887933404236804e+02 4 4.631813732045056e+02 -3.986868251364646e+02 -1.648805375506758e+02 -1.685236134291506e+02 - ME 5.938757690519573e-04 + ME 2.654067897204875e-04 Event 44 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -356,7 +356,7 @@ Event 44 Batch 0 2 1.774791104122977e+02 -1.952605982635784e+01 6.371003613266313e+01 1.644949814321787e+02 3 7.194816205691247e+02 -3.678871192485065e+02 2.644831693887214e+01 -6.177486190667772e+02 4 6.030392690185777e+02 3.874131790748646e+02 -9.015835307153536e+01 4.532536376345985e+02 - ME 2.092333697371024e-04 + ME 1.390282437939369e-04 Event 45 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -364,7 +364,7 @@ Event 45 Batch 0 2 7.477488480180839e+02 -3.787655987618923e+02 1.634662296474455e+02 6.236535517992064e+02 3 7.458113398274099e+02 3.819163358711198e+02 -1.661042992235261e+02 -6.186952632673017e+02 4 6.439812154506046e+00 -3.150737109227506e+00 2.638069576080606e+00 -4.958288531904773e+00 - ME 9.377954359926730e-02 + ME 4.591622113024210e-03 Event 46 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -372,7 +372,7 @@ Event 46 Batch 0 2 3.243146757688279e+02 -4.392587631431587e+00 -2.496903827548322e+02 -2.069188895501946e+02 3 5.341608950426614e+02 -2.704482657861201e+02 2.711825143656835e+02 -3.723515022507137e+02 4 6.415244291885106e+02 2.748408534175518e+02 -2.149213161085120e+01 5.792703918009084e+02 - ME 1.879047912263320e-04 + ME 7.845213441237594e-05 Event 47 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -380,7 +380,7 @@ Event 47 Batch 0 2 6.742198761450968e+02 -3.282965096491567e+02 5.301803926793563e+02 -2.563251730900704e+02 3 6.484148720042493e+02 3.527030795571956e+02 -3.975273148506379e+02 3.715029176935211e+02 4 1.773652518506536e+02 -2.440656990803885e+01 -1.326530778287185e+02 -1.151777446034508e+02 - ME 1.136665455996279e-03 + ME 5.254395938575492e-05 Event 48 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -388,7 +388,7 @@ Event 48 Batch 0 2 7.321401810535270e+02 -1.843482647928687e+02 4.412348098999295e+02 5.543976952635381e+02 3 7.293058265076229e+02 2.182722651304250e+02 -4.435200216702997e+02 -5.362221528717154e+02 4 3.855399243885009e+01 -3.392400033755636e+01 2.285211770370227e+00 -1.817554239182278e+01 - ME 2.278442596973106e-03 + ME 2.330290263553363e-04 Event 49 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -396,7 +396,7 @@ Event 49 Batch 0 2 3.511117284856090e+02 -3.272266866652174e+02 5.199533974843238e+01 1.161835877338140e+02 3 7.326526490901410e+02 6.615045961628415e+02 -2.993354007364775e+02 -9.792799058578566e+01 4 4.162356224242500e+02 -3.342779094976241e+02 2.473400609880451e+02 -1.825559714802838e+01 - ME 8.806759903737244e-05 + ME 7.863589115869630e-06 Event 50 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -404,7 +404,7 @@ Event 50 Batch 0 2 7.322170903075255e+02 2.740692406080844e+02 1.952596610981929e+01 -6.787095515302592e+02 3 3.078559130669522e+02 -1.663333363406682e+02 8.625456119089935e+01 2.442716420418760e+02 4 4.599269966255216e+02 -1.077359042674159e+02 -1.057805273007185e+02 4.344379094883832e+02 - ME 7.579426018596712e-05 + ME 6.765758192049922e-05 Event 51 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -412,7 +412,7 @@ Event 51 Batch 0 2 3.473696038265160e+02 -2.922314643158454e+02 -6.759614889845234e+01 -1.752060888796554e+02 3 5.389399151999496e+02 -2.449040872454050e+02 9.346474502284556e+01 4.708954891311219e+02 4 6.136904809735339e+02 5.371355515612503e+02 -2.586859612439322e+01 -2.956894002514666e+02 - ME 4.687828430739845e-04 + ME 2.035652280642710e-04 Event 52 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -420,7 +420,7 @@ Event 52 Batch 0 2 6.818614816439094e+02 5.970116833066725e+02 3.013730734325877e+02 1.329902280423528e+02 3 2.108623144448950e+02 -4.198344769951654e+00 -1.698802183673395e+02 -1.248439063859965e+02 4 6.072762039111957e+02 -5.928133385367207e+02 -1.314928550652483e+02 -8.146321656356344e+00 - ME 1.636869658416981e-04 + ME 4.047005152694340e-05 Event 53 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -428,7 +428,7 @@ Event 53 Batch 0 2 5.157714002491656e+02 -5.140718537651751e+02 -4.182413977701254e+01 1.003899065692042e+00 3 5.148181840855221e+02 2.868792199999327e+02 1.974924151010656e+02 3.791237552236646e+02 4 4.694104156653124e+02 2.271926337652422e+02 -1.556682753240530e+02 -3.801276542893567e+02 - ME 3.182294022992135e-03 + ME 1.547751010871262e-04 Event 54 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -436,7 +436,7 @@ Event 54 Batch 0 2 6.433410767101752e+02 2.586883950027282e+02 -5.809813083922761e+02 9.710187728524583e+01 3 6.928799734080563e+02 -1.579832568796111e+02 6.405510983559769e+02 -2.117031848853746e+02 4 1.637789498817686e+02 -1.007051381231171e+02 -5.956978996370073e+01 1.146013076001288e+02 - ME 3.280140142776471e-05 + ME 1.302720215079095e-05 Event 55 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -444,7 +444,7 @@ Event 55 Batch 0 2 7.193759752058201e+02 -3.536444481659258e+02 -7.212523476050659e+01 -6.222823703878202e+02 3 5.307053661742267e+02 2.409461639849982e+02 1.900944302490854e+02 4.329633233142391e+02 4 2.499186586199529e+02 1.126982841809279e+02 -1.179691954885788e+02 1.893190470735813e+02 - ME 3.939174164528502e-05 + ME 3.087450123310173e-05 Event 56 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -452,7 +452,7 @@ Event 56 Batch 0 2 3.858864959547013e+02 1.815174721437793e+02 3.218581876578407e+02 -1.112074732396182e+02 3 4.484505297447187e+02 -3.244105157450006e+02 2.934585578803474e+02 -9.873079412811623e+01 4 6.656629743005793e+02 1.428930436012212e+02 -6.153167455381879e+02 2.099382673677345e+02 - ME 2.326138625268126e-04 + ME 4.275995533811995e-05 Event 57 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -460,7 +460,7 @@ Event 57 Batch 0 2 5.284589752749192e+02 3.868194647882293e+02 -1.709996888155517e+02 3.168575336559793e+02 3 6.299868555278971e+02 -1.587414880613579e+02 2.327134172236622e+02 -5.634971548731005e+02 4 3.415541691971835e+02 -2.280779767268714e+02 -6.171372840811043e+01 2.466396212171210e+02 - ME 3.474853710074164e-05 + ME 2.211478424702745e-05 Event 58 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -468,7 +468,7 @@ Event 58 Batch 0 2 6.172037319760957e+02 -2.246119436411400e+02 -2.286037628748728e+01 5.744278237820342e+02 3 5.117934503257735e+02 1.262762853074207e+02 3.215736628881853e+02 -3.775939815489577e+02 4 3.710028176981306e+02 9.833565833371921e+01 -2.987132866006979e+02 -1.968338422330765e+02 - ME 6.183305374210038e-04 + ME 1.857727050583390e-04 Event 59 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -476,7 +476,7 @@ Event 59 Batch 0 2 7.388935626701858e+02 -3.912134623809441e+02 -5.457789630286015e+02 3.082872805076099e+02 3 1.936051438730608e+02 1.561492575196544e+02 8.304673385628061e+01 -7.876294246644987e+01 4 5.675012934567535e+02 2.350642048612896e+02 4.627322291723209e+02 -2.295243380411600e+02 - ME 4.116991424436793e-04 + ME 6.745345781245190e-05 Event 60 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -484,7 +484,7 @@ Event 60 Batch 0 2 7.258141426633659e+02 -5.584991156701968e+02 1.635894950857984e+02 4.337319270970709e+02 3 2.789580074371136e+02 2.331554478032953e+02 6.512410160032128e+01 -1.386180308029247e+02 4 4.952278498995201e+02 3.253436678669015e+02 -2.287135966861195e+02 -2.951138962941461e+02 - ME 7.295672680059989e-04 + ME 9.170244877267536e-05 Event 61 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -492,15 +492,15 @@ Event 61 Batch 0 2 5.906141202026897e+02 4.485275282318680e+02 -2.043613424290570e+02 3.253990429020988e+02 3 4.163572165237975e+02 -4.021600557528675e+02 -4.112755461437413e+01 9.964509802161204e+01 4 4.930286632735124e+02 -4.636747247900051e+01 2.454888970434311e+02 -4.250441409237108e+02 - ME 5.845307122272604e-03 + ME 1.836685601489136e-04 Event 62 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 1 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 -7.500000000000000e+02 2 7.346180891175762e+02 3.693463141798367e+02 7.549194961263061e+01 -6.305140780380819e+02 3 4.420621433230785e+02 -2.806743363126464e+02 3.467380983154045e+01 3.397625382625571e+02 - 4 3.233197675593453e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755249e+02 - ME 3.963631774242112e-05 + 4 3.233197675593452e+02 -8.867197786719018e+01 -1.101657594441711e+02 2.907515397755248e+02 + ME 3.490896135533686e-05 Event 63 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -508,7 +508,7 @@ Event 63 Batch 0 2 6.451039732729313e+02 -2.415045377667665e+02 1.990362537024482e+02 -5.641092662620230e+02 3 3.260870385294104e+02 2.061141051805976e+02 -2.496695602716584e+02 3.892098426606745e+01 4 5.288089881976584e+02 3.539043258616898e+01 5.063330656921013e+01 5.251882819959555e+02 - ME 4.832224458906289e-04 + ME 4.428689394331114e-04 Event 64 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -516,7 +516,7 @@ Event 64 Batch 0 2 5.275973380665291e+02 -6.064553482667328e+01 4.309976929667101e+02 -2.981980196075213e+02 3 5.799838776791826e+02 3.279821268626862e+02 -1.824214634122377e+02 4.421893627315650e+02 4 3.924187842542880e+02 -2.673365920360130e+02 -2.485762295544724e+02 -1.439913431240437e+02 - ME 2.175617604507715e-04 + ME 4.205989960223865e-05 Event 65 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -524,7 +524,7 @@ Event 65 Batch 0 2 6.480172869826541e+02 2.720879118036237e+02 -5.153900904044360e+02 -2.833154199679406e+02 3 7.075023253568394e+02 -3.440299289242928e+02 4.709796137500282e+02 4.004761563708322e+02 4 1.444803876605064e+02 7.194201712066916e+01 4.441047665440794e+01 -1.171607364028916e+02 - ME 4.989956280474397e-03 + ME 1.103463366798231e-04 Event 66 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -532,7 +532,7 @@ Event 66 Batch 0 2 5.472978185025795e+02 4.857452785131266e+02 -2.223654169683454e+02 -1.189119332799752e+02 3 3.203062148499983e+02 1.169702135976477e+02 2.922172461416276e+02 -5.935588816501102e+01 4 6.323959666474225e+02 -6.027154921107744e+02 -6.985182917328234e+01 1.782678214449862e+02 - ME 1.346850069104626e-04 + ME 2.913920636000223e-05 Event 67 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -540,7 +540,7 @@ Event 67 Batch 0 2 4.264671493042950e+02 1.195959046886511e+02 -2.647539231733031e+02 3.122121220929446e+02 3 5.059969655247565e+02 3.777175441887567e+02 -7.608313561896731e+00 -3.366073372596325e+02 4 5.675358851709483e+02 -4.973134488774080e+02 2.723622367352000e+02 2.439521516668857e+01 - ME 9.763221977220593e-05 + ME 4.009347519102052e-05 Event 68 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -548,7 +548,7 @@ Event 68 Batch 0 2 5.996105691520872e+02 -3.814725562071957e+02 -3.417794545715573e+02 3.117664637712124e+02 3 2.164196744806214e+02 1.292759463548889e+02 -1.184749651041615e+02 1.268419798013013e+02 4 6.839697563672917e+02 2.521966098523068e+02 4.602544196757188e+02 -4.386084435725137e+02 - ME 2.936083529685707e-03 + ME 6.175473672610461e-04 Event 69 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -556,7 +556,7 @@ Event 69 Batch 0 2 4.950546755511076e+02 -1.873718558932053e+02 -4.578972175289678e+02 -1.735101101888631e+01 3 4.768584394819691e+02 -1.830244097668608e+02 2.985566003539791e+02 -3.236664843936508e+02 4 5.280868849669230e+02 3.703962656600661e+02 1.593406171749887e+02 3.410174954125370e+02 - ME 5.234212626720279e-05 + ME 1.367292435278724e-05 Event 70 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -564,7 +564,7 @@ Event 70 Batch 0 2 6.918343395272258e+02 6.895733556028865e+02 -5.391072441382606e+01 -1.473005040127906e+01 3 2.169590284692678e+02 -1.127375202028747e+02 1.807969800614662e+02 4.091361110301506e+01 4 5.912066320035063e+02 -5.768358354000119e+02 -1.268862556476402e+02 -2.618356070173603e+01 - ME 1.591740981760110e-04 + ME 3.526540789264872e-05 Event 71 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -572,7 +572,7 @@ Event 71 Batch 0 2 5.156371334918733e+02 1.547202099034306e+02 -4.807172487652236e+02 1.041836686949964e+02 3 3.718518305526428e+02 -8.969821893462726e+01 -7.521366892975188e+01 -3.529460545344468e+02 4 6.125110359554843e+02 -6.502199096880338e+01 5.559309176949756e+02 2.487623858394504e+02 - ME 1.125100552069616e-04 + ME 2.860782472746935e-05 Event 72 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -580,7 +580,7 @@ Event 72 Batch 0 2 2.110577464974889e+02 5.009520239746097e+01 -1.453533690489527e+02 -1.445968227848547e+02 3 7.317124633441161e+02 -4.429659627226336e+02 5.264774879404380e+02 2.490095170354977e+02 4 5.572297901583943e+02 3.928707603251725e+02 -3.811241188914850e+02 -1.044126942506430e+02 - ME 1.823320413479066e-04 + ME 2.666441446531882e-05 Event 73 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -588,7 +588,7 @@ Event 73 Batch 0 2 3.932257450488246e+02 3.105005764664288e+01 -2.932679039283983e+02 2.601082794045340e+02 3 5.658879124646472e+02 3.645905401293642e+02 4.244364556305355e+02 8.459646951004230e+01 4 5.408863424865281e+02 -3.956405977760074e+02 -1.311685517021372e+02 -3.447047489145762e+02 - ME 8.953763196089171e-04 + ME 7.825486685913998e-05 Event 74 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -596,7 +596,7 @@ Event 74 Batch 0 2 1.374854102925440e+02 7.785209805930555e+01 4.289805712042688e+01 1.048858692406466e+02 3 6.381281910764947e+02 -1.004137270491618e+02 -1.591026937267357e+02 6.097630724433484e+02 4 7.243863986309617e+02 2.256162898985645e+01 1.162046366063089e+02 -7.146489416839951e+02 - ME 1.395531292378326e+01 + ME 1.919068868336380e+00 Event 75 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -604,7 +604,7 @@ Event 75 Batch 0 2 5.936883054156938e+02 -3.438525101293572e+00 -2.706855443967301e+02 5.283780053968293e+02 3 5.912298912592892e+02 1.109657062166288e+02 4.832067437414102e+02 -3.221034603433170e+02 4 3.150818033250173e+02 -1.075271811153352e+02 -2.125211993446803e+02 -2.062745450535123e+02 - ME 1.379908325625592e-03 + ME 1.642862842910461e-04 Event 76 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -612,7 +612,7 @@ Event 76 Batch 0 2 6.619486867997672e+02 2.801967015359571e+01 2.136411519593737e+02 6.258980909300584e+02 3 1.201252731414031e+02 2.274423842261747e+01 -8.754996679960182e+01 7.904292618103446e+01 4 7.179260400588295e+02 -5.076390857621322e+01 -1.260911851597719e+02 -7.049410171110928e+02 - ME 5.870483941147637e+00 + ME 7.362202483972824e-01 Event 77 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -620,7 +620,7 @@ Event 77 Batch 0 2 7.456676259451606e+02 -7.346624001550109e+02 6.511229493320701e+01 -1.097804865615983e+02 3 1.284204120828029e+02 1.251494694834492e+02 2.867183268690428e+01 2.708973588335753e+00 4 6.259119619720373e+02 6.095129306715618e+02 -9.378412762011118e+01 1.070715129732624e+02 - ME 1.662775178233579e-04 + ME 4.400761364703354e-05 Event 78 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -628,7 +628,7 @@ Event 78 Batch 0 2 7.040158920877628e+02 6.911264613612161e+02 -6.659640240533211e+01 -1.163937709034254e+02 3 5.185438503615327e+02 -4.976050220224222e+02 -1.270913363611937e+02 7.158742227342900e+01 4 2.774402575507044e+02 -1.935214393387939e+02 1.936877387665258e+02 4.480634862999637e+01 - ME 5.328004946641866e-05 + ME 9.352750539306009e-06 Event 79 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -636,7 +636,7 @@ Event 79 Batch 0 2 6.777589592768838e+02 1.742725197144059e+02 -4.776543849198212e+01 6.532264221831092e+02 3 5.725002211294488e+02 -1.786302554544233e+02 -1.627852110918317e+02 -5.189881598643107e+02 4 2.497408195936665e+02 4.357735740017474e+00 2.105506495838138e+02 -1.342382623187985e+02 - ME 9.179311580246363e-04 + ME 3.598558866345749e-04 Event 80 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -644,7 +644,7 @@ Event 80 Batch 0 2 6.240819586861880e+02 4.679310297228965e+02 -4.118464023828053e+02 -3.002304821964348e+01 3 6.688675489057649e+02 -5.494372353172420e+02 3.251429131208653e+02 1.994607943266771e+02 4 2.070504924080468e+02 8.150620559434545e+01 8.670348926194001e+01 -1.694377461070337e+02 - ME 3.575286400583300e-03 + ME 5.382869847396148e-05 Event 81 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -652,7 +652,7 @@ Event 81 Batch 0 2 5.198056748722776e+02 1.034797897616987e+02 -2.885605608993972e+02 4.197888462474007e+02 3 5.672098642055398e+02 -4.160331805498524e+02 2.087659545613757e+01 -3.849773895903518e+02 4 4.129844609221831e+02 3.125533907881537e+02 2.676839654432596e+02 -3.481145665704891e+01 - ME 1.018936778946332e-04 + ME 3.612255741613163e-05 Event 82 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -660,7 +660,7 @@ Event 82 Batch 0 2 2.057598609140514e+02 6.385349666266659e+01 -2.765433460911293e+01 1.936364870179372e+02 3 6.235840147705873e+02 4.654039114453895e+02 -3.828889383639962e+02 -1.601633028106901e+02 4 6.706561243153629e+02 -5.292574081080552e+02 4.105432729731107e+02 -3.347318420724690e+01 - ME 6.930850923220120e-04 + ME 3.172622561805068e-04 Event 83 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -668,7 +668,7 @@ Event 83 Batch 0 2 6.583322583736492e+02 1.865539504254553e+02 -1.926584839569474e+02 6.012334775737429e+02 3 3.620902826842561e+02 -3.107067244571256e+02 -1.177956631152976e+01 -1.855584705935048e+02 4 4.795774589420946e+02 1.241527740316703e+02 2.044380502684771e+02 -4.156750069802382e+02 - ME 8.385116111585099e-03 + ME 6.756528802944365e-04 Event 84 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -676,7 +676,7 @@ Event 84 Batch 0 2 4.849329564663161e+02 -2.622178945286150e+02 4.068620488841210e+02 -2.941124332559817e+01 3 4.737588937677760e+02 6.014532316188546e+01 -1.333934272225749e+02 4.505954095412368e+02 4 5.413081497659077e+02 2.020725713667296e+02 -2.734686216615461e+02 -4.211841662156386e+02 - ME 5.162990427398554e-03 + ME 1.017468409980153e-03 Event 85 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -684,7 +684,7 @@ Event 85 Batch 0 2 7.085742632080854e+02 -2.174614026040270e+02 -5.283468657604088e+02 -4.190914152061853e+02 3 5.315764222715953e+02 8.528530557199829e+00 3.820092234108129e+02 3.695533927738615e+02 4 2.598493145203187e+02 2.089328720468272e+02 1.463376423495959e+02 4.953802243232388e+01 - ME 6.335517668355978e-05 + ME 1.894143727100354e-05 Event 86 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -692,7 +692,7 @@ Event 86 Batch 0 2 1.724500140939190e+02 1.231518677708316e+02 -1.121928207497684e+01 1.201946443701656e+02 3 7.028475062724231e+02 -6.467096040851287e+01 -4.553168759141600e+02 -5.315061866629339e+02 4 6.247024796336580e+02 -5.848090736231883e+01 4.665361579891369e+02 4.113115422927684e+02 - ME 1.165531323127631e-04 + ME 5.311384036847167e-05 Event 87 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -700,7 +700,7 @@ Event 87 Batch 0 2 1.942099203196796e+02 -7.751148196958454e+01 -1.356691819650310e+02 -1.153400900745028e+02 3 7.314670447251594e+02 1.724617634710876e+02 7.020747158546045e+02 1.113196793791551e+02 4 5.743230349551606e+02 -9.495028150150301e+01 -5.664055338895735e+02 4.020410695347637e+00 - ME 1.237609879052555e-04 + ME 1.874087134673149e-05 Event 88 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -708,7 +708,7 @@ Event 88 Batch 0 2 6.382497024023744e+02 2.632142028760094e+02 -5.613974181649784e+02 1.513733956108635e+02 3 3.997044228265544e+02 -5.264940326118349e+01 3.435187961344461e+02 1.974500004195773e+02 4 4.620458747710724e+02 -2.105647996148253e+02 2.178786220305324e+02 -3.488233960304407e+02 - ME 1.863821317258467e-03 + ME 9.699609186666195e-05 Event 89 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -716,7 +716,7 @@ Event 89 Batch 0 2 1.419006640093282e+02 -8.677155154367878e+01 6.457545216231642e+01 -9.185046144153740e+01 3 7.131224514048055e+02 5.460003286026870e+02 -4.154556538506974e+02 -1.944836022569670e+02 4 6.449768845858670e+02 -4.592287770590082e+02 3.508802016883808e+02 2.863340636985044e+02 - ME 1.136115495374629e-04 + ME 2.974199953519439e-05 Event 90 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -724,7 +724,7 @@ Event 90 Batch 0 2 5.730615760623938e+02 -6.017783679015001e+01 -5.202921970507185e+02 -2.325386583054727e+02 3 5.389913703864468e+02 -6.302812531165206e+01 2.446311215742109e+02 4.761247390423042e+02 4 3.879470535511588e+02 1.232059621018019e+02 2.756610754765076e+02 -2.435860807368315e+02 - ME 1.094721025518881e-03 + ME 1.667772733247344e-04 Event 91 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -732,7 +732,7 @@ Event 91 Batch 0 2 4.546745139784350e+02 -1.470341619195494e+02 -1.726383255301703e+02 -3.940886669878754e+02 3 5.110976540119647e+02 -2.482119727393537e+02 -1.865817698532448e+02 4.059542728975803e+02 4 5.342278320096005e+02 3.952461346589030e+02 3.592200953834151e+02 -1.186560590970480e+01 - ME 8.789722587847313e-05 + ME 4.420313882846059e-05 Event 92 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -740,7 +740,7 @@ Event 92 Batch 0 2 6.683728375977241e+02 -1.148152650923627e+02 3.458291789782991e+02 5.603051703379153e+02 3 2.872567998557088e+02 1.635098024620329e+02 7.847331657016402e+01 -2.227620976482501e+02 4 5.443703625465666e+02 -4.869453736967034e+01 -4.243024955484631e+02 -3.375430726896653e+02 - ME 8.270083568815311e-04 + ME 2.265252332392545e-04 Event 93 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -748,7 +748,7 @@ Event 93 Batch 0 2 5.666948073002088e+02 5.408074886689032e+01 5.639942928586390e+02 -1.134525653745258e+01 3 6.168025492529713e+02 2.439040545997395e+02 -5.541969602989467e+02 1.175666879272316e+02 4 3.165026434468199e+02 -2.979848034666298e+02 -9.797332559692304e+00 -1.062214313897791e+02 - ME 1.664960428447917e-04 + ME 1.251778043268437e-05 Event 94 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -756,7 +756,7 @@ Event 94 Batch 0 2 4.964349376711385e+02 8.445930034540567e+01 -2.409007074648561e+02 -4.257712097695705e+02 3 5.660980232871289e+02 1.373833465612049e+02 5.210669225216058e+02 1.734417778711397e+02 4 4.374670390417324e+02 -2.218426469066104e+02 -2.801662150567495e+02 2.523294318984307e+02 - ME 3.431641292834382e-05 + ME 1.007141026120618e-05 Event 95 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -764,7 +764,7 @@ Event 95 Batch 0 2 7.117074025057361e+02 -3.227984571262278e+02 4.276971164854593e+02 -4.684055501468919e+02 3 1.264078228725325e+02 8.675876182178401e+01 5.074873328843479e+01 7.665781760618943e+01 4 6.618847746217315e+02 2.360396953044439e+02 -4.784458497738940e+02 3.917477325407025e+02 - ME 2.121249861094822e-04 + ME 8.653822330208906e-05 Event 96 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -772,7 +772,7 @@ Event 96 Batch 0 2 7.329769441659936e+02 -9.642859092211874e+01 6.903981466332597e+02 -2.265107649915406e+02 3 3.937873938465678e+02 -4.837693103302091e+01 -3.847118583018795e+02 6.873841850241256e+01 4 3.732356619874385e+02 1.448055219551397e+02 -3.056862883313802e+02 1.577723464891279e+02 - ME 3.473186069800973e-05 + ME 9.822975749896163e-06 Event 97 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -780,7 +780,7 @@ Event 97 Batch 0 2 3.394989963266853e+01 6.003767577498499e+00 -2.078495220615399e+01 2.616364312804199e+01 3 7.377311980366451e+02 -5.308290258162607e+02 4.681853362634530e+02 2.080152802450354e+02 4 7.283189023306861e+02 5.248252582387622e+02 -4.474003840572991e+02 -2.341789233730774e+02 - ME 2.063600678642283e-02 + ME 2.729355315721549e-03 Event 98 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -788,7 +788,7 @@ Event 98 Batch 0 2 2.496912687496082e+02 -2.485814905959506e+02 -5.435228288348340e-01 -2.350907922099247e+01 3 7.458289852530976e+02 7.373315781279124e+02 9.801365830907572e+01 -5.473885205171283e+01 4 5.044797459972945e+02 -4.887500875319618e+02 -9.747013548024091e+01 7.824793127270530e+01 - ME 6.800308216903296e-05 + ME 8.091578731489026e-06 Event 99 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -796,7 +796,7 @@ Event 99 Batch 0 2 1.698125854886770e+02 8.336002034290719e+01 8.774494220182726e+01 -1.191144253093525e+02 3 6.496622934125946e+02 5.714329899004554e+02 -6.230613627727958e+01 3.027265745152471e+02 4 6.805251210987285e+02 -6.547930102433627e+02 -2.543880592454771e+01 -1.836121492058947e+02 - ME 6.115029137493471e-04 + ME 1.856310681395454e-04 Event 100 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -804,7 +804,7 @@ Event 100 Batch 0 2 6.141460480129781e+02 -5.842473718080511e+02 -5.092222124447417e+01 1.823110095657221e+02 3 3.909476383151783e+02 2.539115798088024e+02 -2.930333502072385e+02 -5.000421191795168e+01 4 4.949063136718440e+02 3.303357919992488e+02 3.439555714517127e+02 -1.323067976477707e+02 - ME 1.550407956048336e-04 + ME 2.380755205932631e-05 Event 101 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -812,7 +812,7 @@ Event 101 Batch 0 2 7.469346538870473e+02 3.524232024688497e+02 -1.488240016505349e+02 -6.415299525912136e+02 3 6.502268999047169e+02 -2.777200960400715e+02 1.351761574712158e+02 5.721835160737410e+02 4 1.028384462082358e+02 -7.470310642877820e+01 1.364784417931910e+01 6.934643651747267e+01 - ME 1.080054053054822e-04 + ME 7.777208667430486e-05 Event 102 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -820,7 +820,7 @@ Event 102 Batch 0 2 7.426790432885583e+02 -3.141071077544728e+02 6.615000409077074e+02 1.238005738162371e+02 3 6.735764515788642e+01 -4.139700837311957e+00 -5.533298776898177e+01 -3.818606686673834e+01 4 6.899633115535552e+02 3.182468085917849e+02 -6.061670531387255e+02 -8.561450694949879e+01 - ME 6.292262541994918e-04 + ME 1.796768498680773e-04 Event 103 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -828,7 +828,7 @@ Event 103 Batch 0 2 4.837874798175253e+02 -2.731724972668680e+02 1.247027290420595e+02 -3.793103501549069e+02 3 4.466406321977809e+02 -2.904538080082218e+02 -1.536665846758871e+02 3.025078850172422e+02 4 5.695718879846930e+02 5.636263052750895e+02 2.896385563382777e+01 7.680246513766473e+01 - ME 8.140894767450013e-05 + ME 2.998858312831636e-05 Event 104 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -836,7 +836,7 @@ Event 104 Batch 0 2 5.788466572679498e+02 3.572346730226224e+02 -3.682137844992378e+02 2.680773207965347e+02 3 2.925711988065158e+02 2.155069407513812e+02 1.697995838195863e+02 -1.016010147279926e+02 4 6.285821439255348e+02 -5.727416137740034e+02 1.984142006796517e+02 -1.664763060685422e+02 - ME 2.849770726480251e-04 + ME 7.634200862908681e-05 Event 105 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -844,7 +844,7 @@ Event 105 Batch 0 2 3.361125455083114e+02 2.619004058447622e+02 4.338373361330959e+01 -2.061496357605196e+02 3 5.299016201311088e+02 2.892532450564946e+02 2.091058919093095e+02 3.916669672191841e+02 4 6.339858343605800e+02 -5.511536509012568e+02 -2.524896255226191e+02 -1.855173314586645e+02 - ME 2.866662317167052e-04 + ME 1.089382545947932e-04 Event 106 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -852,7 +852,7 @@ Event 106 Batch 0 2 3.578050478863485e+02 -2.265838270225943e+02 2.740910124726658e+02 -3.947579646386072e+01 3 5.202885196186892e+02 1.412729374205232e+02 1.631578432376887e+02 4.734148487210871e+02 4 6.219064324949621e+02 8.531088960207101e+01 -4.372488557103545e+02 -4.339390522572265e+02 - ME 1.912263829178338e-03 + ME 4.548955126640399e-04 Event 107 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -860,7 +860,7 @@ Event 107 Batch 0 2 5.409822745993889e+02 9.278463733038997e+01 5.102180459532771e+02 -1.540466750365499e+02 3 2.501852297905710e+02 1.682301834486207e+02 1.474652503315489e+02 1.120056004263085e+02 4 7.088324956100398e+02 -2.610148207790107e+02 -6.576832962848259e+02 4.204107461024153e+01 - ME 7.096163321035572e-04 + ME 2.159102073406285e-04 Event 108 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -868,7 +868,7 @@ Event 108 Batch 0 2 6.835202199428555e+02 6.670011709444186e+02 6.653656309718588e+01 1.337243986739828e+02 3 2.377887385005082e+02 -1.098327419601477e+02 7.667443498831059e+01 -1.964720946353502e+02 4 5.786910415566365e+02 -5.571684289842709e+02 -1.432109980854965e+02 6.274769596136723e+01 - ME 1.143500637563713e-04 + ME 2.960130886583330e-05 Event 109 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -876,7 +876,7 @@ Event 109 Batch 0 2 5.978180281189351e+02 4.291222314737005e+02 2.249703559956599e+02 3.501840146583366e+02 3 3.585061336071061e+02 -3.227227650115256e+02 1.541688059097761e+02 2.467071262824850e+01 4 5.436758382739589e+02 -1.063994664621746e+02 -3.791391619054360e+02 -3.748547272865851e+02 - ME 1.159187207430584e-03 + ME 1.100286424576873e-04 Event 110 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -884,7 +884,7 @@ Event 110 Batch 0 2 7.073952645543156e+01 -4.753982451958468e+01 4.872856968801237e+01 -1.922426029646691e+01 3 7.438039776014969e+02 1.707202332282495e+02 -7.225114374584515e+02 4.556513803361385e+01 4 6.854564959430718e+02 -1.231804087086648e+02 6.737828677704391e+02 -2.634087773714689e+01 - ME 5.177444310012934e-04 + ME 1.052942530962122e-04 Event 111 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -892,7 +892,7 @@ Event 111 Batch 0 2 5.206822291802364e+02 -3.873336848644893e+02 2.415505427333673e+02 -2.504714268307115e+02 3 5.478000561519707e+02 4.687653961676166e+02 -2.245690260344170e+02 -1.729527606656598e+02 4 4.315177146677929e+02 -8.143171130312743e+01 -1.698151669895031e+01 4.234241874963712e+02 - ME 1.041517236520828e-04 + ME 8.545692640795734e-05 Event 112 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -900,7 +900,7 @@ Event 112 Batch 0 2 3.610471238372959e+02 2.563298943277285e+02 9.635756626046441e+01 -2.352981732387216e+02 3 6.139063356201009e+02 1.031778254919422e+02 -4.257030126280926e+02 4.301305270271111e+02 4 5.250465405426031e+02 -3.595077198196707e+02 3.293454463676283e+02 -1.948323537883896e+02 - ME 2.333567140730066e-04 + ME 5.572029836371622e-05 Event 113 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -908,7 +908,7 @@ Event 113 Batch 0 2 5.886653054136124e+02 3.035646198144377e+02 3.278619896967805e+02 -3.832517176826292e+02 3 5.420023902452333e+02 -3.658357535838290e+02 -3.990519958595696e+02 2.623541560166928e+01 4 3.693323043411537e+02 6.227113376939163e+01 7.119000616278893e+01 3.570163020809600e+02 - ME 6.906402420910258e-05 + ME 4.986188449478774e-05 Event 114 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -916,7 +916,7 @@ Event 114 Batch 0 2 5.165204340356855e+02 2.346362244736889e+01 6.298471388966840e+00 5.159487827839334e+02 3 5.932916594323345e+02 3.608814360715946e+02 -5.336137507463695e+01 -4.678804824963537e+02 4 3.901879065319798e+02 -3.843450585189634e+02 4.706290368567026e+01 -4.806830028757967e+01 - ME 5.363382776736297e-04 + ME 4.029549711869195e-04 Event 115 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -924,7 +924,7 @@ Event 115 Batch 0 2 5.432307281524777e+02 2.250327918244370e+02 4.870559856477670e+02 -8.506664127290338e+01 3 4.265243530840496e+02 2.057819224248363e+02 -2.472237669715339e+02 2.801021835354204e+02 4 5.302449187634726e+02 -4.308147142492733e+02 -2.398322186762331e+02 -1.950355422625171e+02 - ME 2.364149932043149e-04 + ME 4.159321993514108e-05 Event 116 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -932,7 +932,7 @@ Event 116 Batch 0 2 4.402635748890415e+02 -4.240500842615081e+02 -5.733358735035193e+01 -1.035683405941509e+02 3 4.399967684638562e+02 1.183617589007452e+02 -1.041572505293867e+02 -4.107784286579766e+02 4 6.197396566471035e+02 3.056883253607625e+02 1.614908378797388e+02 5.143467692521278e+02 - ME 1.343295643586522e-04 + ME 4.172733678506819e-05 Event 117 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -940,7 +940,7 @@ Event 117 Batch 0 2 3.074085311587982e+02 -4.270248480828711e+01 -3.034838508096459e+02 2.395944736750828e+01 3 5.360984061023379e+02 3.510554986169303e+02 -1.596589010508530e+02 -3.723849798683070e+02 4 6.564930627388640e+02 -3.083530138086433e+02 4.631427518604987e+02 3.484255325007987e+02 - ME 1.795895763168496e-04 + ME 4.142391000026985e-05 Event 118 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -948,7 +948,7 @@ Event 118 Batch 0 2 5.403602961735903e+02 4.471526113902045e+02 -1.804334130868151e+02 -2.439007487679592e+02 3 5.654623567965698e+02 -5.534570111367966e+02 -1.157195831079003e+02 6.480112868522320e+00 4 3.941773470298406e+02 1.063043997465919e+02 2.961529961947150e+02 2.374206358994370e+02 - ME 3.055618730902428e-05 + ME 7.288650603673961e-06 Event 119 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -956,7 +956,7 @@ Event 119 Batch 0 2 8.009099446659010e+01 5.775399043490319e+01 -2.629604726664823e+01 4.886268393818209e+01 3 7.131140611332349e+02 2.472685400460709e+02 -2.870014097539109e+02 -6.041689532644716e+02 4 7.067949444001758e+02 -3.050225304809738e+02 3.132974570205592e+02 5.553062693262896e+02 - ME 6.861262467765907e-04 + ME 2.815424392761942e-04 Event 120 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -964,7 +964,7 @@ Event 120 Batch 0 2 5.007248873753321e+02 2.708997263130530e+02 -3.880896283797751e+02 1.634784128397387e+02 3 7.413897277398672e+02 -4.257033276374029e+02 5.921425482134987e+02 -1.334264135464211e+02 4 2.578853848848011e+02 1.548036013243502e+02 -2.040529198337238e+02 -3.005199929331748e+01 - ME 1.034513276694145e-04 + ME 6.003662532288496e-06 Event 121 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -972,7 +972,7 @@ Event 121 Batch 0 2 5.732265116821120e+02 -1.149395375629033e+02 4.260916136383032e+02 3.658189076403451e+02 3 4.323948798659248e+02 -2.148488009071912e+01 -4.178027098651986e+02 1.092914804138530e+02 4 4.943786084519640e+02 1.364244176536226e+02 -8.288903773105691e+00 -4.751103880541979e+02 - ME 8.074833733477824e-02 + ME 7.661241871407340e-04 Event 122 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -980,7 +980,7 @@ Event 122 Batch 0 2 3.423360304412701e+02 2.648046119434483e+02 2.369247279710451e+01 -2.156644197927059e+02 3 6.059487982275789e+02 2.457729689670163e+01 -4.569077875801422e+02 3.972469964635579e+02 4 5.517151713311508e+02 -2.893819088401499e+02 4.332153147830377e+02 -1.815825766708520e+02 - ME 2.180123533398812e-04 + ME 5.274300345459390e-05 Event 123 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -988,7 +988,7 @@ Event 123 Batch 0 2 1.430133297276668e+02 -4.205671322284506e+01 3.498095937953869e+01 1.321377229770999e+02 3 7.140350670908600e+02 -2.955397919833849e+01 -6.570980288365154e+02 -2.778395577453968e+02 4 6.429516031814733e+02 7.161069242118367e+01 6.221170694569771e+02 1.457018347682969e+02 - ME 5.626335206455025e-04 + ME 2.698780233597045e-04 Event 124 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -996,7 +996,7 @@ Event 124 Batch 0 2 6.053457283343441e+02 5.458657819531910e+02 -1.853964251366731e+01 -2.610177782464909e+02 3 7.499633671623128e+02 -6.784114238502394e+02 2.145325921506613e+01 3.189713933003628e+02 4 1.446909045033435e+02 1.325456418970486e+02 -2.913616701398675e+00 -5.795361505387172e+01 - ME 4.169465060943616e-04 + ME 2.629538535113942e-05 Event 125 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1004,7 +1004,7 @@ Event 125 Batch 0 2 6.695439244882118e+02 9.058534244088493e+01 6.586171675820721e+02 7.941529525294386e+01 3 9.341516463500346e+01 3.490868167113007e+01 5.232133368429144e+01 6.906703243419068e+01 4 7.370409108767834e+02 -1.254940241120154e+02 -7.109385012663632e+02 -1.484823276871337e+02 - ME 1.111472366347957e-02 + ME 4.436636984625360e-03 Event 126 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1012,7 +1012,7 @@ Event 126 Batch 0 2 6.465564354211967e+02 -2.094351601488127e+02 -1.930091683601272e+02 -5.804477571728034e+02 3 1.356182567235447e+02 -2.832094442380729e+01 9.735247446175231e+01 -9.007070211700794e+01 4 7.178253078552584e+02 2.377561045726200e+02 9.565669389837488e+01 6.705184592898115e+02 - ME 1.775660879411100e-03 + ME 1.230970446288030e-03 Event 127 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1020,7 +1020,7 @@ Event 127 Batch 0 2 4.508388003927651e+02 -3.846405138087858e+02 7.756355374444065e+01 2.220162025777267e+02 3 6.162879941073576e+02 2.174727303224461e+02 1.334711143222092e+02 -5.609830344035003e+02 4 4.328732054998774e+02 1.671677834863399e+02 -2.110346680666500e+02 3.389668318257735e+02 - ME 3.922171581774212e-05 + ME 2.127227557837123e-05 Event 128 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1028,7 +1028,7 @@ Event 128 Batch 0 2 7.468963146802857e+02 5.701805835528932e+02 -3.440982003215339e+02 -3.381488363986430e+02 3 1.196664332518719e+02 -9.337643239636876e+01 2.398139841985228e+01 7.089280393650260e+01 4 6.334372520678420e+02 -4.768041511565244e+02 3.201168019016817e+02 2.672560324621404e+02 - ME 2.053620454072734e-04 + ME 7.842790653965437e-05 Event 129 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1036,7 +1036,7 @@ Event 129 Batch 0 2 4.378966182438207e+02 -4.256397208622688e+02 4.624364030548149e+01 9.190104474357973e+01 3 7.127537996732577e+02 5.790589826349546e+02 -1.369827771626340e+02 -3.923574802896586e+02 4 3.493495820829217e+02 -1.534192617726859e+02 9.073913685715252e+01 3.004564355460789e+02 - ME 1.668072874757384e-05 + ME 1.046217618618756e-05 Event 130 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1044,7 +1044,7 @@ Event 130 Batch 0 2 6.322026526626455e+02 5.905875735566585e+02 -2.387291116192753e+01 -2.243136110600485e+02 3 5.268087771404591e+02 -3.287250458747471e+02 1.913681034684307e+02 3.644798771698754e+02 4 3.409885701968954e+02 -2.618625276819114e+02 -1.674951923065032e+02 -1.401662661098267e+02 - ME 2.766647151388132e-04 + ME 3.412796728096272e-05 Event 131 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1052,7 +1052,7 @@ Event 131 Batch 0 2 2.691964685177017e+02 -2.641651354044939e+02 4.065264362900757e+01 -3.210735842607325e+01 3 5.382709487855662e+02 -3.022535437819008e+02 -4.307865739991411e+02 1.131429946566680e+02 4 6.925325826967319e+02 5.664186791863947e+02 3.901339303701337e+02 -8.103563623059465e+01 - ME 5.354423766199649e-04 + ME 1.516502654737588e-04 Event 132 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1060,7 +1060,7 @@ Event 132 Batch 0 2 1.376388194981169e+02 -2.491804956023667e+01 3.114513197621116e+01 1.317327453336230e+02 3 7.332494677489981e+02 -3.054807357444667e+02 -6.882601889638243e+00 -6.665500220046781e+02 4 6.291117127528858e+02 3.303987853047034e+02 -2.426253008657308e+01 5.348172766710551e+02 - ME 3.625143788027957e-04 + ME 2.459616839911958e-04 Event 133 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1068,7 +1068,7 @@ Event 133 Batch 0 2 5.818916885738672e+02 -3.437736592641007e+02 -2.113522447259726e+02 -4.192228966514222e+02 3 7.075583625851592e+02 3.695171106849944e+02 9.875952986414086e+01 5.952667441040354e+02 4 2.105499488409736e+02 -2.574345142089370e+01 1.125927148618317e+02 -1.760438474526132e+02 - ME 6.644965721204062e-03 + ME 3.278402967978973e-04 Event 134 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1076,7 +1076,7 @@ Event 134 Batch 0 2 7.039051474789593e+02 -1.767404282002263e+02 5.832845063404937e+02 3.521710697233707e+02 3 6.740856043500099e+02 9.540039380435479e+01 -5.203258634262522e+02 -4.177932056695244e+02 4 1.220092481710302e+02 8.134003439587134e+01 -6.295864291424151e+01 6.562213594615410e+01 - ME 6.394436352069354e-05 + ME 3.621089826286842e-05 Event 135 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1084,7 +1084,7 @@ Event 135 Batch 0 2 7.491379873081086e+02 -6.603965492909807e+02 -9.243924572685610e+01 -3.413782470545817e+02 3 4.360367703469753e+02 3.763875731093294e+02 3.833030381995060e+01 2.167746473012021e+02 4 3.148252423449159e+02 2.840089761816513e+02 5.410894190690560e+01 1.246035997533796e+02 - ME 3.729096801849378e-05 + ME 1.170602675185252e-05 Event 136 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1092,7 +1092,7 @@ Event 136 Batch 0 2 6.907976432034611e+02 -8.965778913807024e+01 -5.375684903631193e+02 -4.244796613161184e+02 3 4.317447428217263e+02 2.541758793770707e+02 2.501815833403360e+02 2.433255445990286e+02 4 3.774576139748129e+02 -1.645180902390004e+02 2.873869070227833e+02 1.811541167170898e+02 - ME 3.295715598818487e-05 + ME 1.221598515374744e-05 Event 137 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1100,7 +1100,7 @@ Event 137 Batch 0 2 5.927917878715718e+02 -5.453882061843875e+02 -2.239274061847312e+02 6.172783069514800e+01 3 3.718333194205911e+02 2.859809174201715e+02 -2.363544177495510e+02 2.472896101988843e+01 4 5.353748927078371e+02 2.594072887642160e+02 4.602818239342820e+02 -8.645679171503701e+01 - ME 1.267334233155001e-04 + ME 2.222722395048600e-05 Event 138 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1108,7 +1108,7 @@ Event 138 Batch 0 2 1.164849493482387e+02 2.012854405109472e+01 -2.573298799707043e+01 -1.118096528381494e+02 3 7.481698498358139e+02 -1.044692284663333e+02 -4.003634472873074e+00 7.408294509656059e+02 4 6.353452008159477e+02 8.434068441523856e+01 2.973662246994375e+01 -6.290197981274564e+02 - ME 3.545594402685597e+00 + ME 1.183014588836486e-01 Event 139 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1116,7 +1116,7 @@ Event 139 Batch 0 2 3.415587822283577e+02 -2.468214832259765e+02 1.926082427237748e+02 1.365416492148350e+02 3 5.828887331044928e+02 -1.023403009989268e+02 -5.561813319045077e+02 1.412376154306548e+02 4 5.755524846671491e+02 3.491617842249035e+02 3.635730891807333e+02 -2.777792646454897e+02 - ME 4.142320485322521e-04 + ME 5.213154494000113e-05 Event 140 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1124,7 +1124,7 @@ Event 140 Batch 0 2 4.395392082109443e+02 -3.037880820376849e+02 -2.455930383243060e+02 -2.014735126343029e+02 3 4.709796125547878e+02 -2.826270024952004e+02 2.984919122515593e+02 2.298833426397907e+02 4 5.894811792342680e+02 5.864150845328855e+02 -5.289887392725340e+01 -2.840983000548780e+01 - ME 1.220048440917972e-04 + ME 2.990357782498624e-05 Event 141 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1132,7 +1132,7 @@ Event 141 Batch 0 2 3.025838986653694e+02 -2.680006525137058e+02 -6.218827689980458e+01 -1.259574698062632e+02 3 5.104624598690772e+02 -2.829910827131053e+02 4.173533268753467e+02 -7.939880721102661e+01 4 6.869536414655528e+02 5.509917352268112e+02 -3.551650499755422e+02 2.053562770172896e+02 - ME 3.735313583347012e-04 + ME 7.151804808113674e-05 Event 142 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1140,7 +1140,7 @@ Event 142 Batch 0 2 4.390011511178412e+02 -3.153925512561953e+02 3.992377088505197e+01 -3.027468279160259e+02 3 4.597282536099518e+02 2.984856708041211e+02 -2.221794712617382e+02 -2.699863960308454e+02 4 6.012705952722066e+02 1.690688045207421e+01 1.822557003766862e+02 5.727332239468712e+02 - ME 1.630913878361870e-04 + ME 8.945447985744934e-05 Event 143 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1148,7 +1148,7 @@ Event 143 Batch 0 2 7.103308443495001e+02 -3.626595603160224e+02 2.462759922459802e+02 5.589240443825270e+02 3 3.424564807343295e+02 4.507572778536915e+01 -2.357842367637252e+02 -2.442343416788665e+02 4 4.472126749161695e+02 3.175838325306533e+02 -1.049175548225529e+01 -3.146897027036604e+02 - ME 1.304325296055160e-03 + ME 1.789392510542836e-04 Event 144 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1156,7 +1156,7 @@ Event 144 Batch 0 2 6.893886390440568e+02 -2.470805413393656e+02 1.331686162420120e+02 6.296618309717105e+02 3 7.132719020730987e+02 2.482972988978650e+02 -2.304803220538649e+02 -6.276815106349294e+02 4 9.733945888284487e+01 -1.216757558499225e+00 9.731170581185302e+01 -1.980320336781234e+00 - ME 3.769348793094523e-04 + ME 1.486904409371019e-04 Event 145 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1164,7 +1164,7 @@ Event 145 Batch 0 2 3.784954309743686e+02 2.391836032855264e+02 1.115572896135236e+01 -2.931305935912622e+02 3 7.389406222827198e+02 -4.231861417520660e+02 1.513250860114713e+02 5.865555822189353e+02 4 3.825639467429113e+02 1.840025384665394e+02 -1.624808149728234e+02 -2.934249886276727e+02 - ME 2.193982780219728e-03 + ME 2.016505354100400e-04 Event 146 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1172,7 +1172,7 @@ Event 146 Batch 0 2 4.681255842987410e+02 -3.253195724522379e+01 1.754808059398437e+02 -4.327698247100133e+02 3 2.875849079819393e+02 2.091841587061404e+01 1.879781824316579e+02 -2.166372592748876e+02 4 7.442895077193195e+02 1.161354137460973e+01 -3.634589883715017e+02 6.494070839849006e+02 - ME 5.347932692815789e-02 + ME 1.210467216316050e-02 Event 147 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1180,7 +1180,7 @@ Event 147 Batch 0 2 2.442136391928777e+02 -1.784444843977844e+02 -1.666832492802189e+02 -3.816014311599316e+00 3 5.551361515401285e+02 1.378338123621512e+02 -5.199472642306259e+02 1.372327560591401e+02 4 7.006502092669938e+02 4.061067203563306e+01 6.866305135108448e+02 -1.334167417475408e+02 - ME 7.450632204513606e-04 + ME 2.360352365747709e-04 Event 148 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1188,7 +1188,7 @@ Event 148 Batch 0 2 4.547263863263726e+02 3.928375677411887e+02 5.145105706241225e+01 2.231759855356057e+02 3 7.397285466814292e+02 -5.611511356388266e+02 -1.533645573573770e+02 -4.569322031694095e+02 4 3.055450669921979e+02 1.683135678976379e+02 1.019135002949646e+02 2.337562176338038e+02 - ME 1.440225905683450e-05 + ME 6.307552439231181e-06 Event 149 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1196,7 +1196,7 @@ Event 149 Batch 0 2 2.343018799311635e+02 9.853424545130945e+01 1.924850318874441e+02 -9.021023174733594e+01 3 7.291173748950658e+02 3.429747374294529e+01 -5.990516617369192e+02 4.142136359886766e+02 4 5.365807451737705e+02 -1.328317191942547e+02 4.065666298494750e+02 -3.240034042413406e+02 - ME 8.405553848068603e-04 + ME 8.298171355094406e-05 Event 150 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1204,7 +1204,7 @@ Event 150 Batch 0 2 4.707648023587808e+02 -8.969278865174961e+01 -3.008719699078221e+02 3.507859183712497e+02 3 6.876639918976698e+02 3.906111988928598e+02 4.609284537794546e+02 -3.284046551871671e+02 4 3.415712057435500e+02 -3.009184102411105e+02 -1.600564838716325e+02 -2.238126318408256e+01 - ME 1.070125715137075e-04 + ME 1.887585788236135e-05 Event 151 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1212,7 +1212,7 @@ Event 151 Batch 0 2 6.503034458278056e+02 -1.575298496674962e+02 -3.658248853789647e+01 -6.298735108350154e+02 3 6.998690336552314e+02 1.302751858829802e+02 -1.019415103826456e+02 6.800389464387812e+02 4 1.498275205169629e+02 2.725466378451580e+01 1.385239989205421e+02 -5.016543560376590e+01 - ME 6.663776898009472e-04 + ME 4.060174493404880e-04 Event 152 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1220,7 +1220,7 @@ Event 152 Batch 0 2 7.401192382353395e+02 1.493701961830190e+02 6.288419447382046e+02 3.605867993093739e+02 3 7.332111095478891e+02 -1.230079111936445e+02 -6.287602831147091e+02 -3.565502647954901e+02 4 2.666965221677112e+01 -2.636228498937447e+01 -8.166162349550861e-02 -4.036534513883709e+00 - ME 8.446403371723604e-04 + ME 1.210964379505254e-04 Event 153 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1228,7 +1228,7 @@ Event 153 Batch 0 2 5.645797071775899e+02 7.941901905692946e+01 3.691428696980725e+02 -4.197337333594241e+02 3 6.079979027943974e+02 1.021455738177839e+02 -5.566920170809548e+02 2.220849604771994e+02 4 3.274223900280123e+02 -1.815645928747133e+02 1.875491473828823e+02 1.976487728822249e+02 - ME 2.846663840296023e-05 + ME 9.895323747190810e-06 Event 154 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1236,7 +1236,7 @@ Event 154 Batch 0 2 6.022174885419887e+02 -5.152457849782368e+02 -1.493252664732707e+02 -2.736597328082223e+02 3 3.617627670199851e+02 1.925398333816265e+02 -2.626238171638091e+02 1.575736108034646e+02 4 5.360197444380261e+02 3.227059515966102e+02 4.119490836370796e+02 1.160861220047577e+02 - ME 6.437319974597944e-05 + ME 1.660411512586943e-05 Event 155 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1244,7 +1244,7 @@ Event 155 Batch 0 2 6.202229507100907e+02 -2.107861924791831e+02 -3.212541876154504e+02 4.868690137883067e+02 3 2.943040328093193e+02 2.940980302320592e+02 1.073731199058907e+01 2.433613089266508e+00 4 5.854730164805898e+02 -8.331183775287627e+01 3.105168756248616e+02 -4.893026268775732e+02 - ME 5.904510654775639e-03 + ME 4.918845171174253e-04 Event 156 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1252,7 +1252,7 @@ Event 156 Batch 0 2 4.945486805149833e+02 4.540818864859257e+02 -1.431706201593249e+02 -1.337542944644701e+02 3 5.997303202813281e+02 -3.624214233270367e+02 -5.726286247273350e+01 4.743923835389624e+02 4 4.057209992036886e+02 -9.166046315888883e+01 2.004334826320584e+02 -3.406380890744924e+02 - ME 4.701306652347430e-03 + ME 1.986837824231628e-04 Event 157 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1260,7 +1260,7 @@ Event 157 Batch 0 2 4.617003083190191e+02 3.118400043328062e+02 3.404502064148864e+02 -4.079626411035589e+00 3 5.720097526413113e+02 -4.999240316044806e+01 -4.329264075474301e+02 -3.705005295422582e+02 4 4.662899390396696e+02 -2.618476011723578e+02 9.247620113254365e+01 3.745801559532937e+02 - ME 3.907978340087068e-05 + ME 1.403598809900552e-05 Event 158 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1268,7 +1268,7 @@ Event 158 Batch 0 2 6.784877363061535e+02 -5.707102180762959e+02 -3.102223423027389e+02 -1.959529373021938e+02 3 5.650909444059712e+02 5.525284805868615e+02 7.765167789879932e+01 8.950011457818250e+01 4 2.564213192878751e+02 1.818173748943443e+01 2.325706644039396e+02 1.064528227240114e+02 - ME 3.503179830087694e-05 + ME 8.470133063482862e-06 Event 159 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1276,7 +1276,7 @@ Event 159 Batch 0 2 5.369491563274252e+02 2.154713482252002e+02 -2.912667909729743e+02 3.962955349875316e+02 3 6.066564496499102e+02 -4.020061311781470e+01 5.572389608252350e+02 -2.364332868806716e+02 4 3.563943940226648e+02 -1.752707351073854e+02 -2.659721698522608e+02 -1.598622481068599e+02 - ME 3.198473025834927e-04 + ME 3.562393617300492e-05 Event 160 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1284,7 +1284,7 @@ Event 160 Batch 0 2 6.492474755438517e+02 3.490068395973682e+02 1.460348644657111e+02 -5.276270735801970e+02 3 2.857818814470013e+02 -2.550253586192556e+02 1.227259509083862e+02 3.964456076362119e+01 4 5.649706430091471e+02 -9.398148097811273e+01 -2.687608153740973e+02 4.879825128165764e+02 - ME 6.719464076924620e-05 + ME 3.516238941302227e-05 Event 161 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1292,7 +1292,7 @@ Event 161 Batch 0 2 6.770282049439580e+02 -2.863253153105184e+02 -4.911270786072976e+02 -3.676672364525180e+02 3 1.598243093356544e+02 -7.505362471426160e+01 1.299195075310522e+02 -5.506073768810752e+01 4 6.631474857203874e+02 3.613789400247800e+02 3.612075710762453e+02 4.227279741406256e+02 - ME 1.577168105051119e-04 + ME 5.970757951131334e-05 Event 162 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1300,7 +1300,7 @@ Event 162 Batch 0 2 5.178592782584632e+02 -3.271131571456631e+02 3.943743741889439e+02 -7.512700901574514e+01 3 3.730686930366258e+02 -2.885924195736573e+01 -1.360208443078026e+02 -3.461874113706257e+02 4 6.090720287049110e+02 3.559723991030290e+02 -2.583535298811414e+02 4.213144203863710e+02 - ME 1.031749267713353e-04 + ME 2.768303103320498e-05 Event 163 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1308,7 +1308,7 @@ Event 163 Batch 0 2 5.388642316037673e+02 3.152159924116781e+02 3.539969933522669e+01 -4.356149670486711e+02 3 5.364171791816749e+02 -5.299694218906361e+02 3.369785517714305e+01 7.576448071880543e+01 4 4.247185892145582e+02 2.147534294789580e+02 -6.909755451236977e+01 3.598504863298658e+02 - ME 3.508094027565679e-05 + ME 1.485600561394433e-05 Event 164 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1316,7 +1316,7 @@ Event 164 Batch 0 2 6.862697092177667e+02 4.132218376422068e+02 1.310202162324327e+02 -5.320221138485150e+02 3 4.476895523579005e+02 -2.769046850483522e+02 1.374187337517142e+02 3.238299280529301e+02 4 3.660407384243329e+02 -1.363171525938544e+02 -2.684389499841469e+02 2.081921857955847e+02 - ME 3.375894779915149e-05 + ME 1.755563256840939e-05 Event 165 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1324,7 +1324,7 @@ Event 165 Batch 0 2 2.382444910715278e+02 -2.158277263671036e+02 -9.471372817531817e+00 -1.004446273032522e+02 3 7.304591383576048e+02 4.619003715882296e+02 -1.223345688256177e+02 5.524969256086772e+02 4 5.312963705708673e+02 -2.460726452211260e+02 1.318059416431495e+02 -4.520522983054250e+02 - ME 6.966498968932957e-03 + ME 4.549138184301779e-04 Event 166 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1332,7 +1332,7 @@ Event 166 Batch 0 2 2.131352071380649e+02 -7.633553084455029e+01 -1.899581415396244e+02 5.929087379418958e+01 3 7.305557876753161e+02 8.980971292745940e+01 7.136333043711877e+02 1.279589045828712e+02 4 5.563090051866194e+02 -1.347418208290915e+01 -5.236751628315633e+02 -1.872497783770607e+02 - ME 3.314006956523505e-04 + ME 3.352199959657985e-05 Event 167 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1340,7 +1340,7 @@ Event 167 Batch 0 2 4.122964103002419e+02 -3.405127102276982e+02 6.366431608201744e+01 2.235761145061386e+02 3 4.697083356610920e+02 -2.521100678451879e+02 -2.856113063438232e+01 -3.952855880214881e+02 4 6.179952540386658e+02 5.926227780728861e+02 -3.510318544763516e+01 1.717094735153495e+02 - ME 1.146777177775239e-04 + ME 3.829535931496594e-05 Event 168 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1348,7 +1348,7 @@ Event 168 Batch 0 2 7.156643283953484e+02 -3.999734570317170e+02 4.816586825103861e+02 3.467009924560655e+02 3 6.192344221355605e+02 2.722545660880235e+02 -4.999454120042317e+02 -2.436869012025525e+02 4 1.651012494690919e+02 1.277188909436936e+02 1.828672949384504e+01 -1.030140912535133e+02 - ME 1.017624049822302e-03 + ME 5.027887292283473e-05 Event 169 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1356,7 +1356,7 @@ Event 169 Batch 0 2 3.626022684949455e+02 7.511110909567982e+01 -2.030941161665286e+02 -2.908461902563517e+02 3 5.580565590514408e+02 -2.529981754432838e+02 -3.439969378312538e+02 3.592842232626199e+02 4 5.793411724536141e+02 1.778870663476037e+02 5.470910539977822e+02 -6.843803300626824e+01 - ME 1.371698416063432e-04 + ME 4.350242525242475e-05 Event 170 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1364,7 +1364,7 @@ Event 170 Batch 0 2 6.602909342483501e+02 4.699653539595539e+02 -3.020118498241596e+02 3.520021683086903e+02 3 1.039297502933440e+02 3.247420585022842e+01 -9.851348423194945e+01 6.473976746580508e+00 4 7.357793154583061e+02 -5.024395598097824e+02 4.005253340561092e+02 -3.584761450552709e+02 - ME 1.673719496447659e-02 + ME 9.967260301798612e-03 Event 171 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1372,7 +1372,7 @@ Event 171 Batch 0 2 1.506693011949600e+02 -3.657300520509282e+01 -1.244227366169959e+02 -7.669834565089053e+01 3 6.344013325830570e+02 -2.026333084464634e+02 -4.956100871165362e+02 3.402578943089165e+02 4 7.149293662219835e+02 2.392063136515561e+02 6.200328237335323e+02 -2.635595486580261e+02 - ME 2.133207113512388e-03 + ME 9.157902172934166e-04 Event 172 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1380,7 +1380,7 @@ Event 172 Batch 0 2 5.352445157558213e+02 -2.018352690102651e+02 3.892440882325296e+02 -3.069825004886504e+02 3 6.716112180685394e+02 2.825227203806547e+02 -5.978593235713698e+02 1.175022124175027e+02 4 2.931442661756383e+02 -8.068745137038898e+01 2.086152353388391e+02 1.894802880711483e+02 - ME 2.630379932615259e-05 + ME 8.067092159940342e-06 Event 173 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1388,7 +1388,7 @@ Event 173 Batch 0 2 6.571348515648592e+02 -2.769863586381786e+02 5.805753619381593e+02 1.343019708712704e+02 3 5.332990408103321e+02 1.871824832342877e+02 -4.782426732337677e+02 1.437168410371092e+02 4 3.095661076248081e+02 8.980387540389081e+01 -1.023326887043915e+02 -2.780188119083794e+02 - ME 9.985413945498126e-03 + ME 1.269359653092767e-04 Event 174 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1396,7 +1396,7 @@ Event 174 Batch 0 2 6.091496911716730e+02 -4.752584064243671e+02 3.135726231883978e+01 -3.797492797588730e+02 3 6.417481529658018e+02 3.309293137608124e+02 9.015643604119191e+01 5.424004960996682e+02 4 2.491021558625255e+02 1.443290926635548e+02 -1.215136983600317e+02 -1.626512163407953e+02 - ME 1.319192968737130e-03 + ME 1.362612102685676e-04 Event 175 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1404,7 +1404,7 @@ Event 175 Batch 0 2 5.399801778396885e+02 1.966672297646830e+02 2.343185748302537e+02 -4.449667388535759e+02 3 6.987953575798327e+02 -1.857207036318898e+02 -9.664246188148675e+01 6.666955876403318e+02 4 2.612244645804785e+02 -1.094652613279307e+01 -1.376761129487668e+02 -2.217288487867561e+02 - ME 9.528877211334405e-03 + ME 9.613528518728674e-04 Event 176 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1412,7 +1412,7 @@ Event 176 Batch 0 2 6.615757321243968e+02 -4.129469954321281e+02 4.686878756164518e+02 -2.179194886871010e+02 3 1.607981401590110e+02 -6.355407199259605e+01 7.929314438200207e+00 1.474925346731048e+02 4 6.776261277165921e+02 4.765010674247242e+02 -4.766171900546519e+02 7.042695401399614e+01 - ME 6.965204353376922e-04 + ME 3.097907077728356e-04 Event 177 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1420,7 +1420,7 @@ Event 177 Batch 0 2 4.314334067424883e+02 -3.493619040652741e+02 -2.026482683689240e+01 -2.523299055494341e+02 3 4.840006500668400e+02 -1.846595828310067e+02 -1.450727057198388e+02 4.232155216776995e+02 4 5.845659431906716e+02 5.340214868962809e+02 1.653375325567312e+02 -1.708856161282654e+02 - ME 2.160100049311594e-04 + ME 1.084300812640113e-04 Event 178 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1428,7 +1428,7 @@ Event 178 Batch 0 2 4.528135981327372e+02 -2.544528544607913e+02 1.436928116455424e+02 3.458992272209776e+02 3 3.053350882587867e+02 -1.380299578048218e+02 2.072032295570572e+02 1.767599177741536e+02 4 7.418513136084770e+02 3.924828122656132e+02 -3.508960412025996e+02 -5.226591449951313e+02 - ME 7.384409254828141e-02 + ME 5.382438151181503e-02 Event 179 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1436,7 +1436,7 @@ Event 179 Batch 0 2 7.433145319259943e+02 -2.538538580850882e+02 -6.778753511348521e+02 -1.689962142519080e+02 3 1.647945947160298e+02 1.009041857568576e+02 1.171651165877689e+02 5.699069397138987e+01 4 5.918908733579761e+02 1.529496723282306e+02 5.607102345470832e+02 1.120055202805181e+02 - ME 1.335347052581446e-04 + ME 3.739915465576335e-05 Event 180 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1444,7 +1444,7 @@ Event 180 Batch 0 2 2.396120216689867e+02 1.204528233788652e+02 -1.081248155319049e+02 1.766750195544080e+02 3 5.541470271917004e+02 2.767127195685322e+02 2.999096875483201e+02 3.749175614572557e+02 4 7.062409511393131e+02 -3.971655429473975e+02 -1.917848720164151e+02 -5.515925810116636e+02 - ME 1.316593054412419e-02 + ME 2.792447184071457e-03 Event 181 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1452,7 +1452,7 @@ Event 181 Batch 0 2 2.165494222755782e+02 1.336973493521793e+02 -1.495065670853883e+02 -8.164837697364385e+01 3 6.960869932595207e+02 -2.848973600545249e+02 2.209041937252092e+01 6.347303441548928e+02 4 5.873635844649011e+02 1.512000107023455e+02 1.274161477128675e+02 -5.530819671812490e+02 - ME 6.164296623062663e-02 + ME 3.488874737600980e-03 Event 182 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1460,7 +1460,7 @@ Event 182 Batch 0 2 6.472681881349898e+02 4.279258056181361e+02 3.994050733201775e+02 -2.762448183472868e+02 3 5.337197582091030e+02 -3.479343829022644e+02 -4.034091782989213e+02 -3.254965992745409e+01 4 3.190120536559070e+02 -7.999142271587166e+01 4.004104978744005e+00 3.087944782747408e+02 - ME 6.393158381765308e-05 + ME 5.523679400573375e-05 Event 183 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1468,7 +1468,7 @@ Event 183 Batch 0 2 6.165307808531154e+02 -3.276949594572818e+02 8.808524820164887e+01 -5.147496540405800e+02 3 2.975460412740734e+02 -1.030095950018341e+02 -2.375020297789284e+02 1.466814775843215e+02 4 5.859231778728107e+02 4.307045544591158e+02 1.494167815772794e+02 3.680681764562588e+02 - ME 6.887775529805495e-05 + ME 2.562496117427957e-05 Event 184 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1476,7 +1476,7 @@ Event 184 Batch 0 2 5.645337360463252e+02 -3.940276919793660e+02 3.776398996283964e+02 1.443212503288767e+02 3 5.368100353438223e+02 2.392766596964613e+02 -1.719264331693737e+02 -4.487237410122139e+02 4 3.986562286098531e+02 1.547510322829050e+02 -2.057134664590229e+02 3.044024906833372e+02 - ME 3.553984578535888e-05 + ME 1.712138666139329e-05 Event 185 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1484,7 +1484,7 @@ Event 185 Batch 0 2 6.347397779710931e+02 2.522092504724420e+02 -1.599825720327363e+02 5.600809373302327e+02 3 4.566768168089404e+02 -3.359958684022406e+02 -1.272903681003782e+02 -2.818823400219340e+02 4 4.085834052199659e+02 8.378661792979838e+01 2.872729401331145e+02 -2.781985973082986e+02 - ME 1.184197550833168e-03 + ME 1.836859309200860e-04 Event 186 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1492,7 +1492,7 @@ Event 186 Batch 0 2 7.089823220133230e+02 -5.197119220861886e+02 4.248734840868308e+02 -2.281183322067745e+02 3 5.364076825758043e+02 3.588264146200084e+02 -3.973752875032956e+02 3.270606945152315e+01 4 2.546099954108725e+02 1.608855074661802e+02 -2.749819658353518e+01 1.954122627552515e+02 - ME 2.583895514537347e-05 + ME 1.318469173008218e-05 Event 187 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1500,7 +1500,7 @@ Event 187 Batch 0 2 4.835105223217566e+02 -2.128653471696258e+02 1.375287019182911e+02 -4.117725407538514e+02 3 7.240136612790383e+02 4.407273454759851e+02 -4.896543389042274e+01 5.723264583716990e+02 4 2.924758163992057e+02 -2.278619983063593e+02 -8.856326802786833e+01 -1.605539176178473e+02 - ME 5.307563978210835e-04 + ME 9.185777086042985e-05 Event 188 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1508,7 +1508,7 @@ Event 188 Batch 0 2 6.611118500396009e+02 3.502021063704277e+02 -2.011693879247277e+02 -5.234102027267809e+02 3 3.072944371702247e+02 -6.894916504330918e+01 -1.599953986835475e+02 2.531350551695447e+02 4 5.315937127901742e+02 -2.812529413271184e+02 3.611647866082752e+02 2.702751475572362e+02 - ME 6.863567490702385e-05 + ME 3.862980709292737e-05 Event 189 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1516,7 +1516,7 @@ Event 189 Batch 0 2 7.498478362545707e+02 6.780504955298834e+02 -3.199144947524264e+02 -1.319162971889924e+01 3 3.253008430749361e+02 -2.985087551774363e+02 1.291384938207140e+02 6.034152914782593e+00 4 4.248513206704935e+02 -3.795417403524470e+02 1.907760009317124e+02 7.157476804116639e+00 - ME 8.583750584152986e-05 + ME 1.504471760657040e-05 Event 190 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1524,7 +1524,7 @@ Event 190 Batch 0 2 4.938867893347995e+02 3.689671478502748e+02 -1.218724623869293e+02 3.048516153777389e+02 3 5.264063001598521e+02 6.631942569346465e+01 1.276367949726208e+02 -5.063735530147588e+02 4 4.797069105053494e+02 -4.352865735437401e+02 -5.764332585691415e+00 2.015219376370201e+02 - ME 4.759343488474735e-05 + ME 2.269926034328256e-05 Event 191 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1532,7 +1532,7 @@ Event 191 Batch 0 2 3.681793141805986e+02 -3.225132888415706e+02 1.579589482507471e+02 -8.117977937027918e+01 3 5.431126642386394e+02 4.058413736814005e+01 9.147123993851424e+01 5.338139246166097e+02 4 5.887080215807621e+02 2.819291514734305e+02 -2.494301881892614e+02 -4.526341452463304e+02 - ME 4.908990110546420e-03 + ME 1.427494731558637e-03 Event 192 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1540,7 +1540,7 @@ Event 192 Batch 0 2 6.054165399887861e+02 1.497087111729466e+02 8.905021611535379e+01 5.798159601983524e+02 3 2.106656439489222e+02 1.451894976721945e+02 -1.487249448604451e+02 3.436443048222171e+01 4 6.839178160622922e+02 -2.948982088451411e+02 5.967472874509133e+01 -6.141803906805740e+02 - ME 4.294450320853435e-02 + ME 6.984876913518998e-03 Event 193 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1548,7 +1548,7 @@ Event 193 Batch 0 2 2.753169163933055e+02 -1.695475157411122e+02 -2.139406274107579e+02 3.581134319495643e+01 3 5.760219428901971e+02 -3.264616044953138e+02 1.527507522369444e+02 -4.493231656306969e+02 4 6.486611407164972e+02 4.960091202364260e+02 6.118987517381347e+01 4.135118224357404e+02 - ME 1.537583375796735e-04 + ME 4.273063058931925e-05 Event 194 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1556,7 +1556,7 @@ Event 194 Batch 0 2 3.445934948105150e+02 -2.970257025567896e+02 -8.183019525038441e+01 1.543509890854414e+02 3 7.485441862377920e+02 6.623797851941252e+02 1.083400559332054e+02 -3.314119056355291e+02 4 4.068623189516925e+02 -3.653540826373358e+02 -2.650986068282081e+01 1.770609165500877e+02 - ME 3.024610065690235e-05 + ME 4.921158833271929e-06 Event 195 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1564,7 +1564,7 @@ Event 195 Batch 0 2 2.012122274303647e+02 -5.190018365965096e+01 1.322177369426910e+02 -1.425173724194237e+02 3 7.122630330184543e+02 -3.054768058087834e+02 -2.528097616133813e+02 5.916838461125119e+02 4 5.865247395511832e+02 3.573769894684365e+02 1.205920246706904e+02 -4.491664736930883e+02 - ME 3.011639483286710e-03 + ME 4.696445912229638e-04 Event 196 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1572,7 +1572,7 @@ Event 196 Batch 0 2 4.490485793345989e+02 3.485190427929747e+02 -2.661098616642627e+01 -2.819059396826192e+02 3 5.531554978829222e+02 -3.330165694254377e+02 4.416170126965178e+02 7.442003978758296e+00 4 4.977959227824785e+02 -1.550247336753688e+01 -4.150060265300915e+02 2.744639357038610e+02 - ME 4.340266456570635e-05 + ME 9.363355109875406e-06 Event 197 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1580,7 +1580,7 @@ Event 197 Batch 0 2 3.951249254444253e+02 -2.278358800090239e+02 3.101157211704546e+02 -8.968142489336992e+01 3 3.607080640108546e+02 -2.889948719219027e+02 2.155030307719242e+02 -1.227661082778765e+01 4 7.441670105447209e+02 5.168307519309257e+02 -5.256187519423792e+02 1.019580357211576e+02 - ME 3.377741088449004e-02 + ME 6.597373610109231e-03 Event 198 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1588,7 +1588,7 @@ Event 198 Batch 0 2 3.750236904637998e+02 1.183014344420310e+02 -1.005952209347265e+02 -3.413621838211424e+02 3 4.381296266085964e+02 -2.726825461625328e+02 1.003845461170281e+02 -3.279096546785175e+02 4 6.868466829276033e+02 1.543811117205018e+02 2.106748176980602e-01 6.692718384996598e+02 - ME 9.606390506705955e-04 + ME 6.145502577419889e-04 Event 199 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1596,7 +1596,7 @@ Event 199 Batch 0 2 2.454478562244572e+02 -2.058455361543722e+02 -1.131056012155068e+02 -7.126982772660261e+01 3 5.321797086694488e+02 -9.806778012582416e+01 -4.820333037417012e+02 -2.030808875905193e+02 4 7.223724351060940e+02 3.039133162801963e+02 5.951389049572081e+02 2.743507153171219e+02 - ME 1.577081887352965e-03 + ME 3.088173795554332e-04 Event 200 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1604,7 +1604,7 @@ Event 200 Batch 0 2 3.952431318363244e+02 3.031309873729303e+02 9.337877017948550e+01 2.358159092128122e+02 3 6.094031244332663e+02 -7.796753338981905e+01 -5.315426896439308e+02 -2.876727322709444e+02 4 4.953537437304092e+02 -2.251634539831113e+02 4.381639194644453e+02 5.185682305813224e+01 - ME 6.703240553489506e-05 + ME 1.668296552597111e-05 Event 201 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1612,7 +1612,7 @@ Event 201 Batch 0 2 6.497938633639732e+02 3.771120671245744e+02 3.553445817627057e+02 -3.921081252746440e+02 3 3.369790646193914e+02 -2.140351778515325e+02 1.061239955238163e+02 2.376584318047305e+02 4 5.132270720166357e+02 -1.630768892730420e+02 -4.614685772865220e+02 1.544496934699135e+02 - ME 6.283412004793947e-05 + ME 2.404518058628388e-05 Event 202 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1620,7 +1620,7 @@ Event 202 Batch 0 2 7.267802742470179e+02 6.523432021666289e+02 -1.481957728499301e+02 2.840702844913056e+02 3 3.546086620137576e+02 -3.102429173963679e+02 -5.939291787501398e+01 -1.611493614224694e+02 4 4.186110637392242e+02 -3.421002847702610e+02 2.075886907249440e+02 -1.229209230688360e+02 - ME 1.894138330341389e-04 + ME 2.830403199974809e-05 Event 203 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1628,7 +1628,7 @@ Event 203 Batch 0 2 4.830190702985662e+02 2.789429895135886e+02 -3.943102945050296e+02 -4.197918611657844e+00 3 5.247163710833165e+02 -4.266462829986153e+02 3.263988520595893e+01 3.037019215942698e+02 4 4.922645586181170e+02 1.477032934850268e+02 3.616704092990706e+02 -2.995040029826120e+02 - ME 5.831910678002871e-04 + ME 5.153190919865371e-05 Event 204 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1636,7 +1636,7 @@ Event 204 Batch 0 2 6.952375769935185e+02 3.823764713153302e+01 6.531840992713522e+02 -2.350397908115460e+02 3 6.250862947179036e+02 1.031861473443961e+02 -5.506835576815644e+02 2.771878679515999e+02 4 1.796761282885781e+02 -1.414237944759291e+02 -1.025005415897879e+02 -4.214807714005369e+01 - ME 1.802858800889920e-04 + ME 1.903000177287069e-05 Event 205 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1644,7 +1644,7 @@ Event 205 Batch 0 2 5.625197268936781e+02 2.955060596751036e+02 4.395356105446072e+02 -1.895074112086703e+02 3 3.144813194259642e+02 -1.941101430078122e+02 -7.073026664887073e+00 -2.473251401357733e+02 4 6.229989536803572e+02 -1.013959166672914e+02 -4.324625838797200e+02 4.368325513444433e+02 - ME 1.140145509231641e-04 + ME 3.163472493443465e-05 Event 206 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1652,7 +1652,7 @@ Event 206 Batch 0 2 5.487698581700869e+02 -4.771827558939671e+02 -2.639484985605369e+02 6.145050708573941e+01 3 4.357856725513919e+02 1.877155863290790e+02 1.701172104948722e+02 3.545872893148349e+02 4 5.154444692785200e+02 2.894671695648880e+02 9.383128806566407e+01 -4.160377964005746e+02 - ME 4.167786087259531e-03 + ME 3.341888001113221e-04 Event 207 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1660,7 +1660,7 @@ Event 207 Batch 0 2 5.289473514933904e+02 -3.230637718239221e+02 -3.258094337294262e+02 2.631792409740627e+02 3 3.730441408755686e+02 -1.145152671243400e+02 -7.298530142052728e+01 -3.474497523579300e+02 4 5.980085076310412e+02 4.375790389482623e+02 3.987947351499535e+02 8.427051138386733e+01 - ME 1.161501350367753e-04 + ME 3.789028948405571e-05 Event 208 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1668,7 +1668,7 @@ Event 208 Batch 0 2 3.144460531270953e+02 3.105028133645123e+02 -3.495125011961062e+01 3.525242310830974e+01 3 7.230517599976935e+02 -6.554206809343713e+02 2.220922910679198e+02 2.095294558946058e+02 4 4.625021868752117e+02 3.449178675698588e+02 -1.871410409483092e+02 -2.447818790029155e+02 - ME 4.858457850437588e-04 + ME 2.941989209837521e-05 Event 209 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1676,7 +1676,7 @@ Event 209 Batch 0 2 2.827014058170527e+02 -6.682954863774688e+01 -1.958656753088385e+02 -1.925890275057887e+02 3 5.969812148172332e+02 5.625717004655273e+02 1.060136244597389e+02 -1.692949027847388e+02 4 6.203173793657136e+02 -4.957421518277804e+02 8.985205084909943e+01 3.618839302905275e+02 - ME 1.004351001266980e-04 + ME 2.261939336541961e-05 Event 210 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1684,7 +1684,7 @@ Event 210 Batch 0 2 3.369223392964550e+02 -2.366581006943837e+02 8.850719545688517e+01 -2.228813191927023e+02 3 6.926279093100447e+02 9.835546321295956e+01 -1.581805884470998e+02 6.671120783270956e+02 4 4.704497513935005e+02 1.383026374814242e+02 6.967339299021461e+01 -4.442307591343933e+02 - ME 5.974710408786874e-02 + ME 3.044010300440331e-03 Event 211 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1692,7 +1692,7 @@ Event 211 Batch 0 2 5.754314663824422e+02 -1.965408456680789e+02 -5.399725108422632e+02 3.037689947684008e+01 3 6.656941886103589e+02 4.112771407945243e+02 5.114655840792436e+02 1.113679599883347e+02 4 2.588743450071987e+02 -2.147362951264454e+02 2.850692676301957e+01 -1.417448594651748e+02 - ME 4.382347812376007e-04 + ME 1.754510489093768e-05 Event 212 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1700,7 +1700,7 @@ Event 212 Batch 0 2 5.922157374848572e+02 8.073316194509509e+00 4.947261155542873e+02 -3.254233732830556e+02 3 3.635572903001510e+02 8.951663862813328e+01 4.011175755255380e+01 3.500738802669425e+02 4 5.442269722149914e+02 -9.758995482264278e+01 -5.348378731068407e+02 -2.465050698388706e+01 - ME 3.041427876287276e-04 + ME 1.919214373141161e-04 Event 213 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1708,7 +1708,7 @@ Event 213 Batch 0 2 7.434820262506830e+02 2.991548764052629e+02 2.111623598614188e+02 -6.470566753063675e+02 3 5.607612173038236e+02 -2.664197873565705e+02 -1.905271140771768e+02 4.551626726109781e+02 4 1.957567564454930e+02 -3.273508904869271e+01 -2.063524578424195e+01 1.918940026953895e+02 - ME 1.827786070323022e-04 + ME 1.896082550340891e-04 Event 214 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1716,7 +1716,7 @@ Event 214 Batch 0 2 5.400874280734793e+02 3.457358963402696e+02 2.445843697627679e+02 -3.351710101016577e+02 3 3.400793067879315e+02 1.482066942304564e+02 1.256466447865830e+02 2.791086371729012e+02 4 6.198332651385892e+02 -4.939425905707261e+02 -3.702310145493508e+02 5.606237292875651e+01 - ME 1.356968066378560e-04 + ME 6.515553919952984e-05 Event 215 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1724,7 +1724,7 @@ Event 215 Batch 0 2 3.916345321859864e+02 3.271767110560381e+02 -1.945589530122144e+02 9.208594000107233e+01 3 6.136750729169615e+02 -1.269585669220027e+02 2.644680756040779e+02 -5.390132228350478e+02 4 4.946903948970534e+02 -2.002181441340350e+02 -6.990912259186331e+01 4.469272828339764e+02 - ME 6.207321332343461e-05 + ME 3.427926940877871e-05 Event 216 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1732,7 +1732,7 @@ Event 216 Batch 0 2 3.767411090262154e+02 1.602503356822860e+02 2.758455349572533e+02 -2.004069210086422e+02 3 4.061922956351256e+02 3.340053729931861e+02 2.237650079776778e+02 5.798114391563544e+01 4 7.170665953386593e+02 -4.942557086754721e+02 -4.996105429349309e+02 1.424257770930068e+02 - ME 1.232271832865728e-03 + ME 2.360785017217177e-04 Event 217 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1740,7 +1740,7 @@ Event 217 Batch 0 2 6.474118977458852e+02 -5.378641111590873e+02 -3.279650037002520e+02 1.492759847325320e+02 3 5.088298200539713e+02 3.261878344469131e+02 1.555821256186315e+02 -3.581947579501665e+02 4 3.437582822001433e+02 2.116762767121744e+02 1.723828780816206e+02 2.089187732176345e+02 - ME 3.357118960820415e-05 + ME 1.388331578224744e-05 Event 218 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1748,7 +1748,7 @@ Event 218 Batch 0 2 6.658501161076259e+02 -6.577627036244854e+02 -3.020200479570956e+01 9.895676706252418e+01 3 2.516345839620714e+02 1.565221509782131e+02 -1.156477271957936e+02 1.595192254662914e+02 4 5.825152999303023e+02 5.012405526462722e+02 1.458497319915031e+02 -2.584759925288157e+02 - ME 5.956187308313417e-04 + ME 1.036808356896783e-04 Event 219 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1756,7 +1756,7 @@ Event 219 Batch 0 2 4.328556070633435e+02 6.122246558068494e+01 -1.687441385117925e+02 3.938796795879554e+02 3 6.500677455605621e+02 -3.703058656885360e+02 4.356876543064814e+02 -3.092537914719426e+02 4 4.170766473760945e+02 3.090834001078509e+02 -2.669435157946888e+02 -8.462588811601287e+01 - ME 2.797067114354785e-04 + ME 9.046106878448173e-05 Event 220 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1764,7 +1764,7 @@ Event 220 Batch 0 2 3.686297280598666e+02 -3.497113779929074e+02 -8.765282776369953e+01 7.685577594963354e+01 3 4.155522773953191e+02 -1.777404948015450e+02 -1.525848366500187e+02 3.432344379292750e+02 4 7.158179945448145e+02 5.274518727944524e+02 2.402376644137182e+02 -4.200902138789084e+02 - ME 3.485410710153060e-03 + ME 1.676729229638681e-03 Event 221 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1772,7 +1772,7 @@ Event 221 Batch 0 2 5.295220830718469e+02 3.654688468413813e+01 4.204675060608333e+02 3.197890523886257e+02 3 7.127556392876786e+02 -1.727486268095863e+02 -4.342549693537605e+02 -5.381460163035255e+02 4 2.577222776404743e+02 1.362017421254481e+02 1.378746329292729e+01 2.183569639148998e+02 - ME 2.819264207321091e-05 + ME 2.031931825964470e-05 Event 222 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1780,7 +1780,7 @@ Event 222 Batch 0 2 2.464305981122427e+02 -2.054199106396077e+02 6.127423271580306e+01 1.215572638876956e+02 3 6.926647117218595e+02 4.702892479611936e+02 3.872350261814336e+02 -3.296383785530530e+02 4 5.609046901658980e+02 -2.648693373215859e+02 -4.485092588972366e+02 2.080811146653574e+02 - ME 6.319142394583372e-05 + ME 1.678695785515194e-05 Event 223 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1788,7 +1788,7 @@ Event 223 Batch 0 2 2.463384302181125e+02 -1.209251938955738e+02 -2.140981972257043e+02 -1.488897673935926e+01 3 6.819620845265065e+02 -2.400891875757811e+02 5.819023806457059e+02 2.623339210620683e+02 4 5.716994852553812e+02 3.610143814713547e+02 -3.678041834200016e+02 -2.474449443227091e+02 - ME 3.931927185620913e-04 + ME 4.810915220985587e-05 Event 224 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1796,7 +1796,7 @@ Event 224 Batch 0 2 2.236851263016067e+02 -8.671871524968952e+01 1.717231909970332e+02 1.141317038679677e+02 3 5.308972974363861e+02 -3.715833295102001e+01 4.680039348616383e+02 2.478780257941054e+02 4 7.454175762620068e+02 1.238770482007099e+02 -6.397271258586715e+02 -3.620097296620728e+02 - ME 8.708656265179471e-02 + ME 6.017706528853119e-02 Event 225 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1804,7 +1804,7 @@ Event 225 Batch 0 2 5.094176014319268e+02 1.569347096242780e+02 -1.561291130928888e+00 -4.846394040251013e+02 3 7.252311334449815e+02 -3.845161955462210e+02 -4.374219820797174e+01 6.133466494377277e+02 4 2.653512651230916e+02 2.275814859219426e+02 4.530348933890067e+01 -1.287072454126262e+02 - ME 3.974215742688118e-04 + ME 1.151501859389029e-04 Event 226 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1812,7 +1812,7 @@ Event 226 Batch 0 2 6.863217264048350e+02 -2.391756120967483e+02 -6.171186323675804e+02 1.816511279850093e+02 3 5.332348374442744e+02 1.096335504493486e+02 4.112484130583279e+02 -3.212391931833643e+02 4 2.804434361508906e+02 1.295420616473995e+02 2.058702193092524e+02 1.395880651983551e+02 - ME 3.797053871351767e-05 + ME 1.438206074993319e-05 Event 227 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1820,7 +1820,7 @@ Event 227 Batch 0 2 7.243206345463230e+02 -5.280189925476210e+02 -1.406011303275692e+02 4.754657162080069e+02 3 5.487499634657129e+02 3.840442912861271e+02 -1.353123555187442e+01 -3.917312987222202e+02 4 2.269294019879644e+02 1.439747012614939e+02 1.541323658794436e+02 -8.373441748578679e+01 - ME 2.903986554770466e-04 + ME 5.165623507180856e-05 Event 228 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1828,7 +1828,7 @@ Event 228 Batch 0 2 2.119578664379945e+02 1.625437651479949e+01 -1.806612394559917e+02 1.096514885776142e+02 3 6.254097456672617e+02 -3.200704000326812e+01 3.158243706171928e+02 5.388579277416935e+02 4 6.626323878947439e+02 1.575266348846865e+01 -1.351631311612011e+02 -6.485094163193077e+02 - ME 8.951233069377997e-01 + ME 3.800526374221887e-02 Event 229 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1836,7 +1836,7 @@ Event 229 Batch 0 2 5.921227120343664e+02 -3.877491982207575e+02 4.449193714386763e+02 -4.802726626309342e+01 3 4.688278331283221e+02 3.470549659129084e+02 -1.517581364471262e+02 -2.762641051115459e+02 4 4.390494548373113e+02 4.069423230784909e+01 -2.931612349915501e+02 3.242913713746393e+02 - ME 3.492131538818778e-05 + ME 1.250052930035257e-05 Event 230 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1844,7 +1844,7 @@ Event 230 Batch 0 2 4.261952284727868e+02 2.153699775439378e+02 -1.171086083390750e+02 3.486312082969335e+02 3 3.540619701921573e+02 3.070144260847319e+01 1.307424531367546e+02 3.276029778648147e+02 4 7.197428013350559e+02 -2.460714201524109e+02 -1.363384479767965e+01 -6.762341861617483e+02 - ME 3.186738302883428e-01 + ME 4.711214236813061e-02 Event 231 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1852,7 +1852,7 @@ Event 231 Batch 0 2 4.205236024420392e+02 7.533931576750228e+01 -3.260217181731272e+02 -2.547036061581322e+02 3 5.397543491930860e+02 8.423195081267914e+01 -1.158376015978276e+02 5.204050211049134e+02 4 5.397220483648740e+02 -1.595712665801811e+02 4.418593197709548e+02 -2.657014149467809e+02 - ME 5.532186388062512e-04 + ME 3.265984123744224e-04 Event 232 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1860,7 +1860,7 @@ Event 232 Batch 0 2 4.295782852421121e+02 3.239064445356881e+02 9.240815775655221e-01 2.821724019337124e+02 3 7.183371274312143e+02 -6.155391061575082e+02 -1.955291718271078e+02 -3.144649112405858e+02 4 3.520845873266736e+02 2.916326616218201e+02 1.946050902495422e+02 3.229250930687335e+01 - ME 6.730603828970119e-05 + ME 1.049779024540051e-05 Event 233 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1868,7 +1868,7 @@ Event 233 Batch 0 2 3.640046126075324e+02 -2.220120664068515e+02 -1.165482463207536e+02 2.638683509799470e+02 3 4.682121509308883e+02 -1.009786196736112e+02 3.762431872847591e+02 2.597441061312976e+02 4 6.677832364615790e+02 3.229906860804628e+02 -2.596949409640055e+02 -5.236124571112447e+02 - ME 5.385640989777132e-03 + ME 7.598357868514145e-04 Event 234 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1876,7 +1876,7 @@ Event 234 Batch 0 2 8.690043548936441e+01 -2.607433849884744e+01 -7.258333015587984e+01 4.004341073848801e+01 3 6.785651905172676e+02 -3.574930335951373e+02 -4.725723606052789e+01 5.748184081539155e+02 4 7.345343739933678e+02 3.835673720939847e+02 1.198405662164078e+02 -6.148618188924036e+02 - ME 1.962113644780599e-01 + ME 8.152211059226219e-02 Event 235 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1884,7 +1884,7 @@ Event 235 Batch 0 2 3.000566282865331e+02 1.219146462304108e+01 -2.126850238006026e+02 2.113064812540423e+02 3 7.160981218147422e+02 2.575873756248088e+02 2.779062108697769e+02 -6.076293293985470e+02 4 4.838452498987246e+02 -2.697788402478500e+02 -6.522118706917435e+01 3.963228481445046e+02 - ME 3.940402333844027e-05 + ME 2.498899672933017e-05 Event 236 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1892,7 +1892,7 @@ Event 236 Batch 0 2 1.510518772182422e+02 -9.497518588910037e+01 1.467158067736534e+01 1.165380984781943e+02 3 6.955499852411461e+02 5.933480346078575e+02 3.495450158124774e+02 9.770452249822526e+01 4 6.533981375406115e+02 -4.983728487187572e+02 -3.642165964898426e+02 -2.142426209764196e+02 - ME 1.121647028585911e-03 + ME 2.623118294900277e-04 Event 237 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1900,7 +1900,7 @@ Event 237 Batch 0 2 2.173874152942701e+02 2.069918593916189e+02 -3.850229167793934e+01 -5.412237993169356e+01 3 7.305677895866185e+02 -6.701932224704495e+02 -2.421540700080861e+02 1.610333695687662e+02 4 5.520447951191120e+02 4.632013630788306e+02 2.806563616860255e+02 -1.069109896370727e+02 - ME 1.822378225061386e-04 + ME 2.170005261464319e-05 Event 238 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1908,7 +1908,7 @@ Event 238 Batch 0 2 6.349573912113930e+02 -3.336495545457479e+02 -4.785400196851591e+02 2.506956580500139e+02 3 5.768887318987100e+02 4.812119270965607e+02 2.334547330568691e+02 -2.161818165921041e+02 4 2.881538768898968e+02 -1.475623725508129e+02 2.450852866282900e+02 -3.451384145790988e+01 - ME 9.810731053503000e-05 + ME 1.383744831772315e-05 Event 239 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1916,7 +1916,7 @@ Event 239 Batch 0 2 5.349076725903783e+02 -5.331874414268931e+02 1.887721601290929e+01 -3.848403846142781e+01 3 3.658437465440003e+02 8.335465236419728e+01 1.670818061666301e+01 -3.558292926602242e+02 4 5.992485808656214e+02 4.498327890626960e+02 -3.558539662957234e+01 3.943133311216517e+02 - ME 9.226736931333760e-05 + ME 2.560110521983184e-05 Event 240 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1924,7 +1924,7 @@ Event 240 Batch 0 2 2.870582387324442e+02 1.830793600232297e+02 -1.562409872742485e+02 1.564389154054251e+02 3 6.007192677438852e+02 3.433229388031108e+02 4.688113613010560e+02 -1.523446941819630e+02 4 6.122224935236703e+02 -5.264022988263405e+02 -3.125703740268075e+02 -4.094221223461989e+00 - ME 1.424405912705748e-04 + ME 3.548113744927254e-05 Event 241 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1932,7 +1932,7 @@ Event 241 Batch 0 2 7.424696267657401e+02 4.823783107714221e+02 2.498315161211407e+02 5.061190823507636e+02 3 2.455726236162737e+02 -1.827879695947952e+02 -1.199757723946156e+02 -1.118046764652876e+02 4 5.119577496179861e+02 -2.995903411766270e+02 -1.298557437265251e+02 -3.943144058854759e+02 - ME 2.705973755259623e-03 + ME 2.366266620918590e-04 Event 242 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1940,7 +1940,7 @@ Event 242 Batch 0 2 7.249130370348905e+02 1.676828147928013e+02 6.059046362201677e+02 -3.609168279440810e+02 3 6.240672718074169e+02 -4.529413961306761e+01 -5.490982345027019e+02 2.930862151720549e+02 4 1.510196911576933e+02 -1.223886751797337e+02 -5.680640171746593e+01 6.783061277202641e+01 - ME 4.587322306592483e-05 + ME 1.668420503127583e-05 Event 243 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1948,7 +1948,7 @@ Event 243 Batch 0 2 4.655090712555229e+02 2.096323612054770e+02 2.113490506800235e+02 3.578890153850057e+02 3 5.764797256412519e+02 6.697224883641857e+01 -5.382210340689440e+02 -1.953502251008744e+02 4 4.580112031032257e+02 -2.766046100418949e+02 3.268719833889206e+02 -1.625387902841314e+02 - ME 2.309042201876567e-04 + ME 3.999521919602606e-05 Event 244 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1956,7 +1956,7 @@ Event 244 Batch 0 2 5.237109195354749e+02 1.305098338947756e+02 -4.868141165486322e+02 -1.423106687020528e+02 3 5.804450110242352e+02 -4.045654344879671e+02 2.643676733537771e+02 3.214855413949400e+02 4 3.958440694402901e+02 2.740556005931916e+02 2.224464431948551e+02 -1.791748726928872e+02 - ME 2.644202232750943e-04 + ME 2.634847163425152e-05 Event 245 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1964,7 +1964,7 @@ Event 245 Batch 0 2 2.629169357520612e+02 2.457511487795889e+02 -4.402365929491729e+01 -8.242333044139184e+01 3 6.931386101565748e+02 -5.195573187661655e+02 4.004017488088275e+02 -2.240084037645317e+02 4 5.439444540913644e+02 2.738061699865766e+02 -3.563780895139104e+02 3.064317342059234e+02 - ME 4.288053786412853e-05 + ME 1.052590061693975e-05 Event 246 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1972,7 +1972,7 @@ Event 246 Batch 0 2 6.300937687157445e+02 -5.459948028041557e+02 3.085954426748102e+02 6.063567799240802e+01 3 1.673910408536145e+02 -3.546130270298926e+01 7.662824936562275e+01 -1.445350060290698e+02 4 7.025151904306430e+02 5.814561055071442e+02 -3.852236920404341e+02 8.389932803666261e+01 - ME 6.282756509154168e-04 + ME 1.915763997923398e-04 Event 247 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1980,7 +1980,7 @@ Event 247 Batch 0 2 2.577847506495701e+02 2.418237207037818e+02 -8.449121421856779e+01 2.890502538162603e+01 3 5.130193185035739e+02 4.381905811488919e+02 1.366496386102691e+02 2.291390669832418e+02 4 7.291959308468561e+02 -6.800143018526737e+02 -5.215842439170134e+01 -2.580440923648679e+02 - ME 4.005872724472581e-03 + ME 1.831864018495938e-03 Event 248 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1988,7 +1988,7 @@ Event 248 Batch 0 2 7.033207479153643e+02 -5.040306065309413e+02 -2.020637997366072e+02 4.469714117975369e+02 3 1.758360012551320e+02 -1.471306652922549e+01 -4.035460943683606e+00 -1.751728862172264e+02 4 6.208432508295037e+02 5.187436730601667e+02 2.060992606802909e+02 -2.717985255803103e+02 - ME 5.592865021063005e-04 + ME 1.512538512828554e-04 Event 249 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -1996,7 +1996,7 @@ Event 249 Batch 0 2 3.018816177222694e+02 5.523075638651412e+01 1.752331212074551e+02 2.395316845419020e+02 3 6.597415560701297e+02 6.315352823685419e+01 -6.561001191322722e+02 -2.834054254405022e+01 4 5.383768262076012e+02 -1.183842846233684e+02 4.808669979248172e+02 -2.111911419978518e+02 - ME 4.868100986861644e-04 + ME 9.225490912808109e-05 Event 250 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2004,7 +2004,7 @@ Event 250 Batch 0 2 2.166381935101301e+02 -1.289072913913530e+02 -1.189615590004073e+02 -1.271344351215279e+02 3 6.815426093761062e+02 -2.511966318704653e+02 5.323234433390903e+02 3.435583388650892e+02 4 6.018191971137635e+02 3.801039232618182e+02 -4.133618843386827e+02 -2.164239037435611e+02 - ME 3.468666532553966e-04 + ME 6.586594805989363e-05 Event 251 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2012,7 +2012,7 @@ Event 251 Batch 0 2 6.676961532387151e+02 -3.991265595084280e+01 -4.419965947723094e+02 4.988628500443886e+02 3 7.150412702460949e+02 3.921851524844908e+01 5.505653759000154e+02 -4.545587894617490e+02 4 1.172625765151894e+02 6.941407023942340e-01 -1.085687811277060e+02 -4.430406058263954e+01 - ME 5.615833562023813e-04 + ME 4.930952510857648e-05 Event 252 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2020,7 +2020,7 @@ Event 252 Batch 0 2 2.112668789066533e+02 -1.147554660376938e+02 3.364589711187055e+01 -1.741632301749357e+02 3 7.393007599584276e+02 2.529046383258835e+02 -3.593132473314827e+02 5.945576909606565e+02 4 5.494323611349191e+02 -1.381491722881897e+02 3.256673502196121e+02 -4.203944607857206e+02 - ME 2.709805393201018e-03 + ME 3.541023077707110e-04 Event 253 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2028,7 +2028,7 @@ Event 253 Batch 0 2 7.299659304470913e+01 -4.405884533650594e+01 -5.451291667290519e+01 2.038780663930336e+01 3 7.253475305576840e+02 3.245698054519170e+02 -1.402290280555607e+02 -6.333397991328418e+02 4 7.016558763976062e+02 -2.805109601154107e+02 1.947419447284657e+02 6.129519924935382e+02 - ME 6.484723438037138e-04 + ME 3.511004874943257e-04 Event 254 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2036,7 +2036,7 @@ Event 254 Batch 0 2 1.982520535096858e+02 -6.164633378269741e+01 1.773450413210087e+02 -6.365801262063783e+01 3 7.183815394471145e+02 -1.984891252513599e+02 -6.893152145826987e+02 -3.896971029099802e+01 4 5.833664070431995e+02 2.601354590340572e+02 5.119701732616900e+02 1.026277229116358e+02 - ME 9.210498573936143e-05 + ME 1.539519794804785e-05 Event 255 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2044,7 +2044,7 @@ Event 255 Batch 0 2 5.347080663542586e+02 -5.063606624096446e+02 1.592577719822621e+02 6.440929941880935e+01 3 2.475406015289465e+02 -1.856063881081879e+02 3.468010668896048e+00 -1.637516137347836e+02 4 7.177513321167953e+02 6.919670505178326e+02 -1.627257826511582e+02 9.934231431597431e+01 - ME 1.305481727349711e-03 + ME 3.137689362725149e-04 Event 0 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2052,7 +2052,7 @@ Event 0 Batch 1 2 5.775677821222389e+02 4.314431287975208e+02 -2.652567205762379e+02 -2.776332864556192e+02 3 6.023469575940325e+02 -3.228069847179709e+02 5.005558924007591e+02 8.978477890465942e+01 4 3.200852602837275e+02 -1.086361440795499e+02 -2.352991718245218e+02 1.878485075509607e+02 - ME 2.846168667868940e-05 + ME 7.533072458757011e-06 Event 1 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2060,7 +2060,7 @@ Event 1 Batch 1 2 7.241206267812560e+02 3.541578305635416e+02 -4.894807402105655e+02 3.991635230623179e+02 3 7.375567605136832e+02 -3.903081173548693e+02 4.920451519627784e+02 -3.867054653560791e+02 4 3.832261270506111e+01 3.615028679132773e+01 -2.564411752212873e+00 -1.245805770623896e+01 - ME 1.002871021831580e-03 + ME 7.043932941624384e-05 Event 2 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2068,7 +2068,7 @@ Event 2 Batch 1 2 4.849204091734790e+02 2.108660079931152e+02 4.054727376659824e+02 1.620962335024329e+02 3 2.728468517759738e+02 4.961449545460115e+01 2.005017763154939e+02 1.782774356422519e+02 4 7.422327390505470e+02 -2.604805034477164e+02 -6.059745139814763e+02 -3.403736691446848e+02 - ME 2.729395913593408e-02 + ME 1.721146206228212e-02 Event 3 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2076,7 +2076,7 @@ Event 3 Batch 1 2 4.264155576764489e+02 -4.170952165204416e+02 -7.054834331799705e+01 5.370977042744418e+01 3 7.108631972082329e+02 6.832597695609467e+02 -1.727180704166534e+02 -9.301097030017993e+01 4 3.627212451153183e+02 -2.661645530405051e+02 2.432664137346505e+02 3.930119987273574e+01 - ME 5.466137525204964e-05 + ME 5.739226791327231e-06 Event 4 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2084,7 +2084,7 @@ Event 4 Batch 1 2 7.183269968238449e+02 -3.584978055671311e+02 -5.048824553914336e+02 -3.640971079361008e+02 3 7.387431276480253e+02 4.013538934928407e+02 5.036810263913359e+02 3.618865629982628e+02 4 4.292987552812846e+01 -4.285608792570924e+01 1.201429000097643e+00 2.210544937839338e+00 - ME 3.145606575501715e-04 + ME 5.884725836744927e-05 Event 5 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2092,7 +2092,7 @@ Event 5 Batch 1 2 4.529780005473896e+02 -8.443182436392424e+01 4.445408460134587e+02 -2.106590230986445e+01 3 4.683757780543924e+02 -6.076819021151039e+01 -1.335482427838441e+02 -4.448010379662153e+02 4 5.786462213982179e+02 1.452000145754347e+02 -3.109926032296145e+02 4.658669402760799e+02 - ME 8.481958952475706e-05 + ME 2.851579396246287e-05 Event 6 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2100,7 +2100,7 @@ Event 6 Batch 1 2 6.238848262005389e+02 -1.065131260140052e+02 -4.741487807795934e+02 -3.912418229627633e+02 3 1.729069432107234e+02 -1.460869767542721e+02 -8.199113358821990e+01 4.281191710484079e+01 4 7.032082305887380e+02 2.526001027682771e+02 5.561399143678132e+02 3.484299058579224e+02 - ME 4.868510537699180e-04 + ME 1.468701510222534e-04 Event 7 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2108,7 +2108,7 @@ Event 7 Batch 1 2 6.977203086376783e+02 -6.126072843634399e+02 -1.744636661244187e+02 2.847602033865263e+02 3 1.614193396272251e+02 -4.571584237043670e+00 8.497734613495712e+01 -1.371646983269120e+02 4 6.408603517350967e+02 6.171788686004836e+02 8.948631998946138e+01 -1.475955050596143e+02 - ME 3.540796080305845e-04 + ME 9.523334397108766e-05 Event 8 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2116,7 +2116,7 @@ Event 8 Batch 1 2 6.871091945484288e+02 4.059708628308462e+02 2.886614153103366e+02 4.732666173272762e+02 3 5.653302025665631e+02 -2.838835484844413e+02 -7.353399035097291e+01 -4.833229987253825e+02 4 2.475606028850081e+02 -1.220873143464048e+02 -2.151274249593637e+02 1.005638139810634e+01 - ME 8.785466054587446e-05 + ME 3.726341895116938e-05 Event 9 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2124,7 +2124,7 @@ Event 9 Batch 1 2 1.618579955503452e+02 1.385215220188489e+01 1.601201234527701e+02 -1.917484467788566e+01 3 7.196660585644588e+02 -4.527189715496824e+02 -4.214090439733052e+02 3.679391067910628e+02 4 6.184759458851959e+02 4.388668193477974e+02 2.612889205205349e+02 -3.487642621131772e+02 - ME 1.054640649369016e-03 + ME 1.276556148007894e-04 Event 10 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2132,7 +2132,7 @@ Event 10 Batch 1 2 7.832785200561162e+01 1.027681340851886e+01 -7.242726264265977e+01 -2.799877018853974e+01 3 7.448007230566494e+02 2.520540107528716e+02 6.813719334665398e+02 1.641011304445167e+02 4 6.768714249377393e+02 -2.623308241613905e+02 -6.089446708238800e+02 -1.361023602559769e+02 - ME 5.876642887714617e-04 + ME 1.087112534498832e-04 Event 11 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2140,7 +2140,7 @@ Event 11 Batch 1 2 5.478627446486676e+02 2.070882322301630e+02 -4.708081692757452e+02 1.887000762823861e+02 3 6.997827604382593e+02 -4.209013422316021e+02 4.569873120768409e+02 -3.220257264800591e+02 4 2.523544949130733e+02 2.138131100014392e+02 1.382085719890436e+01 1.333256501976729e+02 - ME 2.703695959900953e-05 + ME 7.092902148917371e-06 Event 12 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2148,7 +2148,7 @@ Event 12 Batch 1 2 5.802868936311938e+02 -4.467002255894120e+01 5.211262762381961e+02 -2.513262266832405e+02 3 5.208038834706859e+02 2.151797013176283e+01 -4.993650129388666e+02 -1.463155694111945e+02 4 3.989092228981199e+02 2.315205242717860e+01 -2.176126329932955e+01 3.976417960944350e+02 - ME 5.046437564325244e-04 + ME 4.980323856672599e-04 Event 13 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2156,7 +2156,7 @@ Event 13 Batch 1 2 5.774880087360024e+02 1.576445054854711e+02 5.481077151088400e+02 -9.065617884226717e+01 3 5.915098138161557e+02 -3.018001633277128e+02 -3.808656371901898e+02 3.372564123391869e+02 4 3.310021774478421e+02 1.441556578422419e+02 -1.672420779186502e+02 -2.466002334969197e+02 - ME 1.505341700965184e-03 + ME 5.587942683639647e-05 Event 14 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2164,7 +2164,7 @@ Event 14 Batch 1 2 2.531797527967491e+02 -8.400833666640553e+01 -2.384535242035555e+02 -1.350938161690895e+01 3 5.261064571264828e+02 -1.751971590790252e+02 -3.334570051994592e+02 3.672878780523887e+02 4 7.207137900767681e+02 2.592054957454308e+02 5.719105294030147e+02 -3.537784964354798e+02 - ME 3.373121845959189e-03 + ME 1.659114310450813e-03 Event 15 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2172,7 +2172,7 @@ Event 15 Batch 1 2 4.605848765362425e+02 3.563504404614684e+02 1.735853700506503e+02 2.345653669687875e+02 3 4.216445088607453e+02 1.370719005416187e+02 -3.933730877164850e+02 6.521502736890037e+01 4 6.177706146030118e+02 -4.934223410030871e+02 2.197877176658347e+02 -2.997803943376878e+02 - ME 4.613631402771334e-04 + ME 9.110622752737525e-05 Event 16 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2180,7 +2180,7 @@ Event 16 Batch 1 2 4.972484926572777e+02 -1.474122335888775e+02 -4.748950276275915e+02 -6.399787981958280e-01 3 5.072511849723048e+02 4.846784046822065e+02 1.224000792205880e+02 -8.607455661990267e+01 4 4.955003223704169e+02 -3.372661710933285e+02 3.524949484070036e+02 8.671453541809866e+01 - ME 5.856804747367533e-05 + ME 1.035537635543116e-05 Event 17 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2188,7 +2188,7 @@ Event 17 Batch 1 2 3.182636773520259e+02 -9.176062613973060e+01 -1.890905041641619e+02 2.389906630959087e+02 3 6.376303990615819e+02 -4.240378519397394e+02 2.706855745366566e+02 -3.917827786765570e+02 4 5.441059235863918e+02 5.157984780794702e+02 -8.159507037249479e+01 1.527921155806483e+02 - ME 7.445984612273079e-05 + ME 2.964570775197734e-05 Event 18 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2196,7 +2196,7 @@ Event 18 Batch 1 2 5.532560008158404e+02 -4.148613005881325e+02 1.689647846464811e+02 -3.247047971041214e+02 3 3.650144721835348e+02 -1.597348634907620e+02 -2.160675866909894e+02 2.470529017650751e+02 4 5.817295270006244e+02 5.745961640788944e+02 4.710280204450838e+01 7.765189533904635e+01 - ME 9.119298978738387e-05 + ME 3.148325734685632e-05 Event 19 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2204,7 +2204,7 @@ Event 19 Batch 1 2 3.263687475619531e+02 -1.904667433734991e+02 2.390747946355329e+02 -1.143775398573919e+02 3 7.331345945903582e+02 2.597391859223821e+02 -6.739404183465077e+02 1.258022320965774e+02 4 4.404966578476884e+02 -6.927244254888298e+01 4.348656237109747e+02 -1.142469223918529e+01 - ME 8.793129888044293e-05 + ME 9.665339952809457e-06 Event 20 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2212,7 +2212,7 @@ Event 20 Batch 1 2 9.588718605412237e+01 4.259536217794532e+01 8.056474827260676e+01 -2.982128277051557e+01 3 7.250265356668370e+02 3.120913743414047e+02 -4.446787057645155e+02 4.801284204484703e+02 4 6.790862782790414e+02 -3.546867365193502e+02 3.641139574919093e+02 -4.503071376779550e+02 - ME 3.686389281265799e-03 + ME 6.402422614019696e-04 Event 21 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2220,7 +2220,7 @@ Event 21 Batch 1 2 1.825278201605081e+02 -1.533737674675502e+02 8.574830442242751e+01 4.939757963742074e+01 3 7.183016103669913e+02 1.713205736990392e+02 -6.275703015775031e+02 -3.045685162014731e+02 4 5.991705694725008e+02 -1.794680623148897e+01 5.418219971550755e+02 2.551709365640523e+02 - ME 7.470861105912214e-05 + ME 1.806434468406198e-05 Event 22 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2228,7 +2228,7 @@ Event 22 Batch 1 2 2.349542451120770e+02 9.235159917618290e+01 -2.156570331301489e+02 -1.291214495308476e+01 3 7.360601907662837e+02 -2.182033070539752e+02 6.568866822530020e+02 -2.503433799808774e+02 4 5.289855641216395e+02 1.258517078777923e+02 -4.412296491228531e+02 2.632555249339621e+02 - ME 3.893602972207037e-05 + ME 8.007442232312076e-06 Event 23 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2236,7 +2236,7 @@ Event 23 Batch 1 2 2.350908908124364e+02 -7.377772511691019e+00 -2.298431804723787e+02 -4.884063683135331e+01 3 6.797114625392685e+02 -5.485955088721076e+02 3.603976926464840e+02 1.765336882516069e+02 4 5.851976466482949e+02 5.559732813837987e+02 -1.305545121741055e+02 -1.276930514202538e+02 - ME 2.057468423101862e-04 + ME 3.185713653214173e-05 Event 24 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2244,7 +2244,7 @@ Event 24 Batch 1 2 4.355364173804401e+02 2.538053291625626e+02 -2.665393838801487e+02 -2.328767540869265e+02 3 4.093863144993796e+02 -1.953012891316528e+02 -3.573484670764558e+02 4.191221827828568e+01 4 6.550772681201798e+02 -5.850404003090968e+01 6.238878509566048e+02 1.909645358086408e+02 - ME 1.895168702655672e-04 + ME 3.721637657688893e-05 Event 25 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2252,7 +2252,7 @@ Event 25 Batch 1 2 7.365386968907909e+02 3.875876454009267e+02 3.151568854896985e+02 5.412404333367775e+02 3 5.208510884285567e+02 -2.430585576296288e+02 -1.518636440371932e+02 -4.349089876054084e+02 4 2.426102146806534e+02 -1.445290877712977e+02 -1.632932414525050e+02 -1.063314457313693e+02 - ME 3.717867207603688e-04 + ME 7.982561935336398e-05 Event 26 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2260,7 +2260,7 @@ Event 26 Batch 1 2 7.198867014174701e+02 5.189601929589824e+02 4.797253921416957e+02 -1.370428003807496e+02 3 3.889101953712928e+02 -1.847394503243419e+02 -2.837815501141775e+02 1.912864537085460e+02 4 3.912031032112371e+02 -3.342207426346404e+02 -1.959438420275183e+02 -5.424365332779646e+01 - ME 1.222836766708484e-04 + ME 1.928349098758061e-05 Event 27 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2268,7 +2268,7 @@ Event 27 Batch 1 2 6.732032222628646e+02 5.870808395006010e+02 -9.126179303429218e+01 3.165595544104447e+02 3 1.177373967283342e+02 7.847176641415683e+01 5.304379211899001e+00 -8.761358356661104e+01 4 7.090593810088013e+02 -6.655526059147578e+02 8.595741382239324e+01 -2.289459708438336e+02 - ME 1.603290018002586e-03 + ME 6.795383824785976e-04 Event 28 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2276,7 +2276,7 @@ Event 28 Batch 1 2 6.475300414228806e+02 3.136396845517189e+02 3.816259196370642e+02 -4.186728559156669e+02 3 7.290923529036073e+02 -2.791764769994177e+02 -4.112865540505715e+02 5.333662195995520e+02 4 1.233776056735125e+02 -3.446320755230100e+01 2.966063441350738e+01 -1.146933636838856e+02 - ME 5.037107889244314e-02 + ME 6.311296815400830e-04 Event 29 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2284,7 +2284,7 @@ Event 29 Batch 1 2 3.156754590345620e+02 -2.870540678871016e+02 4.159516713841874e+01 -1.245825012466667e+02 3 4.770060274033896e+02 -2.355061130652810e+02 -3.231858413754910e+02 -2.600433287405434e+02 4 7.073185135620483e+02 5.225601809523826e+02 2.815906742370723e+02 3.846258299872100e+02 - ME 7.956699356695784e-04 + ME 1.321807869823317e-04 Event 30 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2292,7 +2292,7 @@ Event 30 Batch 1 2 6.091290614220995e+02 1.543004089904798e+02 4.216196287493766e+00 -5.892468251447810e+02 3 2.079357839022729e+02 2.034647466922837e+02 4.185675980476618e+01 9.348729279626889e+00 4 6.829351546756266e+02 -3.577651556827627e+02 -4.607295609226003e+01 5.798980958651539e+02 - ME 3.902231064020147e-04 + ME 1.448382779935031e-04 Event 31 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2300,7 +2300,7 @@ Event 31 Batch 1 2 6.901710072855793e+02 1.433309098684656e+01 6.447948515477649e+02 -2.457034416076623e+02 3 5.898919363861644e+02 1.120085307876391e+02 -4.815950471622465e+02 3.217029626736535e+02 4 2.199370563282564e+02 -1.263416217744856e+02 -1.631998043855182e+02 -7.599952106599136e+01 - ME 2.415465849322543e-04 + ME 2.376400497996635e-05 Event 32 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2308,7 +2308,7 @@ Event 32 Batch 1 2 6.144498311923271e+02 5.832947925341469e+02 -1.925283703230110e+02 1.576726595169125e+01 3 2.478450424037004e+02 5.004284035329792e+01 2.389954177960992e+02 4.247433867565734e+01 4 6.377051264039724e+02 -6.333376328874447e+02 -4.646704747308818e+01 -5.824160462734862e+01 - ME 2.160220890176678e-04 + ME 5.390650629646604e-05 Event 33 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2316,7 +2316,7 @@ Event 33 Batch 1 2 6.134536717469736e+02 -1.625429495269566e+02 -1.853973484494194e+02 5.617232593785355e+02 3 5.361644687950269e+02 -3.755831293394986e+01 -9.992652347025609e+01 -5.254297294928764e+02 4 3.503818594579993e+02 2.001012624609065e+02 2.853238719196754e+02 -3.629352988565911e+01 - ME 1.224582992507153e-04 + ME 1.005452860076771e-04 Event 34 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2324,7 +2324,7 @@ Event 34 Batch 1 2 3.840838099420727e+02 -2.442269925519278e+02 -3.827314394217582e+01 -2.939535943332559e+02 3 6.022630974514659e+02 3.956891925431131e+01 5.086724982658299e+02 3.200116071158652e+02 4 5.136530926064613e+02 2.046580732976165e+02 -4.703993543236541e+02 -2.605801278260916e+01 - ME 9.608243105510499e-05 + ME 2.313941306740064e-05 Event 35 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2332,7 +2332,7 @@ Event 35 Batch 1 2 3.454350783663418e+02 -3.439607925797615e+02 2.363778141880094e+01 -2.139209721976717e+01 3 6.705698302143294e+02 5.215327591153251e+02 4.060443141865528e+02 -1.131171661597076e+02 4 4.839950914193290e+02 -1.775719665355635e+02 -4.296820956053536e+02 1.345092633794747e+02 - ME 4.862206803317224e-05 + ME 7.982017052260048e-06 Event 36 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2340,7 +2340,7 @@ Event 36 Batch 1 2 7.098652154429357e+02 2.489290984574327e+02 -1.674080692141068e+02 -6.433641786725617e+02 3 6.178479130357197e+02 -1.435715807033598e+02 2.588953561477193e+02 5.423065917191846e+02 4 1.722868715213448e+02 -1.053575177540730e+02 -9.148728693361247e+01 1.010575869533772e+02 - ME 6.680529568232270e-05 + ME 5.562249548714765e-05 Event 37 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2348,7 +2348,7 @@ Event 37 Batch 1 2 6.906872786346031e+02 1.495946561071237e+02 1.712833879510068e+02 6.521750966909805e+02 3 3.682276595245592e+02 -1.358558710218083e+02 1.194309698061993e+02 -3.207351477449753e+02 4 4.410850618408380e+02 -1.373878508531530e+01 -2.907143577572061e+02 -3.314399489460051e+02 - ME 2.014943348935539e-03 + ME 5.542438863722841e-04 Event 38 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2356,7 +2356,7 @@ Event 38 Batch 1 2 6.131720166645955e+02 -5.222102655174087e+02 6.340623138461877e+00 3.213038392347352e+02 3 4.540063357567760e+02 2.932429176443922e+02 -3.207297067242505e+02 -1.313879727496968e+02 4 4.328216475786277e+02 2.289673478730168e+02 3.143890835857886e+02 -1.899158664850380e+02 - ME 2.589645049118943e-04 + ME 3.150821423911933e-05 Event 39 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2364,7 +2364,7 @@ Event 39 Batch 1 2 2.929747896182304e+02 2.510117592312210e+02 -1.378648144805472e+02 6.181113983529403e+01 3 6.287164314722783e+02 3.864928360025993e+01 6.254120614625328e+02 5.148142827864510e+01 4 5.783087789094894e+02 -2.896610428314818e+02 -4.875472469819856e+02 -1.132925681139394e+02 - ME 1.708238325115053e-04 + ME 2.723120294663496e-05 Event 40 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2372,7 +2372,7 @@ Event 40 Batch 1 2 1.143487538112954e+02 -3.203572478439017e+01 1.022340126870988e+02 3.996944439980560e+01 3 7.361483923235807e+02 5.924235295921244e+02 -3.838567751530157e+02 -2.088128187524163e+02 4 6.495028538651248e+02 -5.603878048077345e+02 2.816227624659169e+02 1.688433743526105e+02 - ME 2.026369815874481e-04 + ME 4.279185076498264e-05 Event 41 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2380,7 +2380,7 @@ Event 41 Batch 1 2 6.384898508133350e+02 5.540399192408263e+02 -3.014826159773289e+02 -9.908223727147148e+01 3 3.510407251698805e+02 -1.719168197014114e+02 2.065966849440144e+02 -2.258140996521069e+02 4 5.104694240167846e+02 -3.821230995394149e+02 9.488593103331458e+01 3.248963369235784e+02 - ME 4.455092331482675e-05 + ME 1.488395965626735e-05 Event 42 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2388,7 +2388,7 @@ Event 42 Batch 1 2 3.291654598309212e+02 -1.090829060981258e+02 2.972891943885482e+02 -8.983292515941632e+01 3 6.884965239796815e+02 4.933628807557017e+02 -2.919492821202986e+02 3.812953554581829e+02 4 4.823380161893969e+02 -3.842799746575757e+02 -5.339912268249619e+00 -2.914624302987665e+02 - ME 6.690811667999076e-04 + ME 5.767145017550451e-05 Event 43 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2396,7 +2396,7 @@ Event 43 Batch 1 2 3.674173006007981e+02 2.791827424102563e+02 1.079644067383057e+02 2.130637369397045e+02 3 7.392205647816575e+02 -6.110484627794917e+02 -4.247874240022372e+01 -4.138385868609020e+02 4 3.933621346175442e+02 3.318657203692355e+02 -6.548566433808202e+01 2.007748499211975e+02 - ME 2.734436884563990e-05 + ME 6.513986915725277e-06 Event 44 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2404,7 +2404,7 @@ Event 44 Batch 1 2 2.081359682230012e+02 -1.082501549908087e+02 1.771964605001424e+02 1.427934167997762e+01 3 7.449563315308093e+02 5.092828751965591e+02 -5.388739609944279e+02 7.215083562608928e+01 4 5.469077002461893e+02 -4.010327202057504e+02 3.616775004942854e+02 -8.643017730606689e+01 - ME 1.760644262839344e-04 + ME 1.838899544278803e-05 Event 45 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2412,7 +2412,7 @@ Event 45 Batch 1 2 5.180982465404422e+02 4.470261481799612e+02 -3.368837017252423e+01 -2.597277606009553e+02 3 3.377595659674062e+02 -7.316527185649456e+01 2.454727770679006e+02 -2.201624016839132e+02 4 6.441421874921515e+02 -3.738608763234666e+02 -2.117844068953763e+02 4.798901622848684e+02 - ME 1.645403798734011e-04 + ME 4.091340785269233e-05 Event 46 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2420,7 +2420,7 @@ Event 46 Batch 1 2 6.296560291524888e+02 2.172411497655985e+02 5.821614514430422e+02 -1.017892054705761e+02 3 6.224001894826197e+02 1.405102091633609e+01 -6.218608257778048e+02 2.176414579432105e+01 4 2.479437813648912e+02 -2.312921706819346e+02 3.969937433476264e+01 8.002505967625511e+01 - ME 4.041878897626609e-05 + ME 7.434320230190137e-06 Event 47 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2428,7 +2428,7 @@ Event 47 Batch 1 2 5.458843469271557e+02 -1.019033861791133e+02 -1.559739004096151e+02 5.131058004898495e+02 3 2.573134207008558e+02 6.791700498899543e+01 -2.412204887508016e+02 5.839651284901167e+01 4 6.968022323719882e+02 3.398638119011781e+01 3.971943891604168e+02 -5.715023133388611e+02 - ME 1.408798022766008e-02 + ME 4.005478861198618e-03 Event 48 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2436,7 +2436,7 @@ Event 48 Batch 1 2 6.623920218006384e+02 -6.284562032939594e+02 -1.837527125398962e+02 -1.002044496053409e+02 3 1.251779629744606e+02 -7.502448682133647e+01 9.550779386908961e+01 3.031682869117444e+01 4 7.124300152249010e+02 7.034806901152959e+02 8.824491867080658e+01 6.988762091416655e+01 - ME 8.682321044518227e-04 + ME 3.004757451335502e-04 Event 49 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2444,7 +2444,7 @@ Event 49 Batch 1 2 2.397494808364364e+02 2.393958238941666e+02 -4.144666783354266e+00 -1.233996761053010e+01 3 6.782491241100328e+02 -3.516321535544010e+02 -2.705899831712919e+02 5.129890485673947e+02 4 5.820013950535307e+02 1.122363296602344e+02 2.747346499546462e+02 -5.006490809568646e+02 - ME 9.041285542966720e-03 + ME 6.040872325723622e-04 Event 50 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2452,7 +2452,7 @@ Event 50 Batch 1 2 4.764898792162554e+02 4.667163214316568e+02 5.900817880915086e+01 -7.573978570375913e+01 3 5.114228101321805e+02 -2.035689445851523e+02 -4.549677995197112e+02 -1.145306811477843e+02 4 5.120873106515638e+02 -2.631473768465044e+02 3.959596207105603e+02 1.902704668515434e+02 - ME 5.157319121365441e-05 + ME 9.692662313613028e-06 Event 51 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2460,7 +2460,7 @@ Event 51 Batch 1 2 4.678795643859630e+02 4.629737719234085e+02 5.365495313512251e+01 4.108186077915564e+01 3 6.311645871918951e+02 -4.500610707732837e+02 -4.345770688214700e+02 8.340587481742408e+01 4 4.009558484221416e+02 -1.291270115012470e+01 3.809221156863474e+02 -1.244877355965797e+02 - ME 1.517985021504320e-04 + ME 1.293558494013996e-05 Event 52 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2468,7 +2468,7 @@ Event 52 Batch 1 2 3.696230029266819e+02 2.516704934433110e+02 2.514038675722595e+02 1.003953305301004e+02 3 6.696174214325739e+02 -2.754912388418390e+01 -6.493999246431116e+02 -1.609604756850079e+02 4 4.607595756407442e+02 -2.241213695591271e+02 3.979960570708519e+02 6.056514515490756e+01 - ME 5.727699238559496e-05 + ME 8.655753222194317e-06 Event 53 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2476,7 +2476,7 @@ Event 53 Batch 1 2 7.284624742442375e+01 -4.271742504396477e+01 -2.683807109937144e+01 -5.255012179908527e+01 3 7.493542950735829e+02 3.356513586119740e+02 2.501807367708783e+02 6.215139772812374e+02 4 6.777994575019936e+02 -2.929339335680093e+02 -2.233426656715069e+02 -5.689638554821522e+02 - ME 1.612275481129464e-02 + ME 2.372423861687152e-03 Event 54 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2484,7 +2484,7 @@ Event 54 Batch 1 2 7.460259847230064e+02 2.055186857047568e+01 6.233229443227743e+02 4.093908861479223e+02 3 5.756222844616437e+02 2.606063779094539e+01 -4.696411468594731e+02 -3.318117699890848e+02 4 1.783517308153497e+02 -4.661250636142109e+01 -1.536817974633012e+02 -7.757911615883735e+01 - ME 4.374243668355642e-04 + ME 5.046268590690708e-05 Event 55 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2492,7 +2492,7 @@ Event 55 Batch 1 2 5.967428482894213e+02 -8.165820254184375e+01 5.098287527914877e+02 -2.991798919868828e+02 3 5.942526243827265e+02 5.606061544962815e+01 -2.905196430116550e+02 5.153559216750568e+02 4 3.090045273278509e+02 2.559758709221549e+01 -2.193091097798325e+02 -2.161760296881746e+02 - ME 1.779007466146034e-03 + ME 1.849048785615045e-04 Event 56 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2500,7 +2500,7 @@ Event 56 Batch 1 2 5.610874267302015e+02 -4.199055433713192e+02 3.580252469767042e+02 1.015694718309908e+02 3 6.303091265298390e+02 2.130872195586830e+02 -5.453843477211296e+02 -2.333224059286980e+02 4 3.086034467399593e+02 2.068183238126362e+02 1.873591007444254e+02 1.317529340977073e+02 - ME 3.258989367177766e-05 + ME 7.213009143835112e-06 Event 57 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2508,7 +2508,7 @@ Event 57 Batch 1 2 6.552053965855981e+02 4.516249927537604e+02 7.110694105335197e+00 4.746350341729917e+02 3 6.035190443408458e+02 -3.717228873476765e+02 2.148772607224587e+02 -4.241286299324850e+02 4 2.412755590735562e+02 -7.990210540608396e+01 -2.219879548277939e+02 -5.050640424050685e+01 - ME 1.623545585873121e-04 + ME 3.752873989265266e-05 Event 58 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2516,7 +2516,7 @@ Event 58 Batch 1 2 2.959982971085279e+02 1.850007048157144e+02 -2.304987961744356e+02 1.612563397119956e+01 3 7.018897389129390e+02 -3.764226030262936e+02 4.376344751014918e+02 3.992884868423144e+02 4 5.021119639785326e+02 1.914218982105791e+02 -2.071356789270567e+02 -4.154141208135139e+02 - ME 4.558573859477246e-03 + ME 1.901193343270815e-04 Event 59 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2524,7 +2524,7 @@ Event 59 Batch 1 2 5.521089721327345e+02 1.223876815062619e+02 -3.629066091228882e+01 -5.371485459866160e+02 3 4.098988410471214e+02 -5.841964900319319e+01 -3.626461945087767e+02 1.819119075553315e+02 4 5.379921868201441e+02 -6.396803250306872e+01 3.989368554210655e+02 3.552366384312845e+02 - ME 5.148841296796537e-05 + ME 1.780280399801712e-05 Event 60 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2532,7 +2532,7 @@ Event 60 Batch 1 2 7.143828168925960e+02 -4.584044193456332e+02 -2.419772079280938e+02 -4.915844060170314e+02 3 1.284110307517517e+02 8.324300347118127e+01 -7.889851197070540e+01 5.774963203893758e+01 4 6.572061523556514e+02 3.751614158744520e+02 3.208757198987992e+02 4.338347739780938e+02 - ME 1.673517837789511e-04 + ME 7.144001898958308e-05 Event 61 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2540,7 +2540,7 @@ Event 61 Batch 1 2 4.394390210968651e+02 -2.137451655543886e+02 -3.779414621253704e+02 -6.767502250635177e+01 3 4.431311911324728e+02 3.845666395406355e+02 -2.150363068358313e+02 4.725610065709574e+01 4 6.174297877706618e+02 -1.708214739862469e+02 5.929777689612018e+02 2.041892184925626e+01 - ME 1.368591177943825e-04 + ME 2.870354731125455e-05 Event 62 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2548,7 +2548,7 @@ Event 62 Batch 1 2 7.301725729481176e+02 4.281927891852710e+02 5.652737593150771e+02 -1.739784429324868e+02 3 7.567373964415995e+01 2.589885732647599e+01 -5.696550981957816e+01 4.255225906941358e+01 4 6.941536874077224e+02 -4.540916465117469e+02 -5.083082494954988e+02 1.314261838630732e+02 - ME 8.513592598060080e-04 + ME 2.379197431250548e-04 Event 63 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2556,7 +2556,7 @@ Event 63 Batch 1 2 4.361152320236988e+02 -3.738769057978321e+02 1.427754799584550e+02 -1.732850750548248e+02 3 5.817148313055657e+02 5.081993893256957e+02 2.829214478037172e+02 -8.998890070513914e+00 4 4.821699366707353e+02 -1.343224835278637e+02 -4.256969277621721e+02 1.822839651253387e+02 - ME 4.544766189571194e-05 + ME 8.350404272725701e-06 Event 64 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2564,7 +2564,7 @@ Event 64 Batch 1 2 6.097675704107204e+02 3.288514690970509e+02 4.971291587853200e+02 -1.285916042465611e+02 3 5.709532610348123e+02 -6.501292612520263e+01 -4.768258747557200e+02 3.072426254385416e+02 4 3.192791685544673e+02 -2.638385429718484e+02 -2.030328402960006e+01 -1.786510211919805e+02 - ME 4.598138986874043e-04 + ME 3.000969253297957e-05 Event 65 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2572,7 +2572,7 @@ Event 65 Batch 1 2 6.258641293880484e+02 3.743515439843765e+02 -1.622018320411498e+02 -4.746128903155367e+02 3 7.438702198751357e+02 -4.029113627030089e+02 2.325939036896868e+02 5.804355380128616e+02 4 1.302656507368158e+02 2.855981871863233e+01 -7.039207164853700e+01 -1.058226476973252e+02 - ME 6.427333508548903e-03 + ME 3.162776051460646e-04 Event 66 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2580,7 +2580,7 @@ Event 66 Batch 1 2 3.731957242404369e+02 1.596860493342637e+01 -3.714568973276624e+02 3.224632809376674e+01 3 6.079923612940432e+02 4.451199598539357e+02 3.189341902600864e+02 -2.642043054431177e+02 4 5.188119144655197e+02 -4.610885647873621e+02 5.252270706757586e+01 2.319579773493509e+02 - ME 4.681392980523237e-05 + ME 1.034065067393998e-05 Event 67 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2588,7 +2588,7 @@ Event 67 Batch 1 2 7.084256499213539e+02 6.318790977834966e+02 -2.229764540025608e+02 2.299504472951746e+02 3 5.168612394424738e+01 1.130069959366449e+01 -1.428140623590627e+01 4.837138651102398e+01 4 7.398882261343989e+02 -6.431797973771612e+02 2.372578602384670e+02 -2.783218338061985e+02 - ME 5.878400132197954e-02 + ME 1.479715191731530e-02 Event 68 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2596,7 +2596,7 @@ Event 68 Batch 1 2 5.644037677826096e+02 -7.446914007305443e+01 3.170710956176409e+02 4.609467220707991e+02 3 4.303832728799333e+02 -1.588265612792408e+02 -3.994808673830752e+02 -2.046757440246668e+01 4 5.052129593374568e+02 2.332957013522950e+02 8.240977176543441e+01 -4.404791476683325e+02 - ME 8.108482137897523e-03 + ME 3.274273226082449e-04 Event 69 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2604,7 +2604,7 @@ Event 69 Batch 1 2 2.379282923937934e+02 -4.413455715133102e+01 1.058497776082811e+02 -2.084654354245804e+02 3 5.822935131976616e+02 -5.806422676829345e+02 4.095409019445288e+01 -1.559022092337181e+01 4 6.797781944085444e+02 6.247768248342655e+02 -1.468038678027338e+02 2.240556563479522e+02 - ME 3.039802585689931e-04 + ME 6.379305675073031e-05 Event 70 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2612,7 +2612,7 @@ Event 70 Batch 1 2 5.861861307468000e+02 1.831219916849830e+02 2.904683423406074e+02 -4.750880530376756e+02 3 4.633200606614189e+02 -4.245314712871158e+02 -1.339518705596282e+02 1.284344380284135e+02 4 4.504938085917810e+02 2.414094796021329e+02 -1.565164717809791e+02 3.466536150092620e+02 - ME 3.530491740557932e-05 + ME 1.325653453486623e-05 Event 71 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2620,7 +2620,7 @@ Event 71 Batch 1 2 7.383412459951699e+02 5.748049255568963e+02 -1.639684737984460e+02 -4.334298474879633e+02 3 3.973981306646684e+02 -3.228684354469153e+02 -4.837114091238284e+00 2.316416412804533e+02 4 3.642606233401616e+02 -2.519364901099809e+02 1.688055878896842e+02 2.017882062075102e+02 - ME 3.103530482016079e-05 + ME 1.333441808219846e-05 Event 72 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2628,7 +2628,7 @@ Event 72 Batch 1 2 3.538199915090663e+02 3.512029503136998e+02 -6.467835580753929e+00 -4.246458742680748e+01 3 5.344234504985296e+02 1.310173344785605e+01 3.836805260246265e+01 5.328833470497182e+02 4 6.117565579924039e+02 -3.643046837615559e+02 -3.190021702170876e+01 -4.904187596229107e+02 - ME 9.376669006106200e-03 + ME 2.994704399169685e-03 Event 73 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2636,7 +2636,7 @@ Event 73 Batch 1 2 4.694927197571710e+02 1.451947293992222e+02 -1.807863847612341e+02 4.082379055705570e+02 3 5.537325951281179e+02 -5.796379956652479e+01 5.401382741253894e+02 -1.072876026015002e+02 4 4.767746851147115e+02 -8.723092983269744e+01 -3.593518893641554e+02 -3.009503029690568e+02 - ME 1.077472469645428e-03 + ME 1.535829386616431e-04 Event 74 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2644,7 +2644,7 @@ Event 74 Batch 1 2 6.258444305735198e+02 -3.349227552763227e+02 4.941036656040852e+02 1.880679848209580e+02 3 5.555040664889822e+02 3.765538795180102e+01 -5.474422011270130e+02 -8.645158222500005e+01 4 3.186515029374982e+02 2.972673673245214e+02 5.333853552292791e+01 -1.016164025959578e+02 - ME 1.623439923565115e-04 + ME 1.487896902219418e-05 Event 75 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2652,7 +2652,7 @@ Event 75 Batch 1 2 3.943316317993887e+02 5.588489849751632e+01 -2.552251009651266e+02 -2.953548066221912e+02 3 5.467466262348042e+02 -3.021648543602057e+02 -2.377479281839000e+02 3.887212326756534e+02 4 5.589217419658066e+02 2.462799558626894e+02 4.929730291490265e+02 -9.336642605346221e+01 - ME 1.348649436679123e-04 + ME 4.632408498797698e-05 Event 76 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2660,7 +2660,7 @@ Event 76 Batch 1 2 5.517772830004059e+02 2.282681125856672e+02 -4.885490190451381e+02 -1.169260227747471e+02 3 4.245403880864563e+02 -2.793100283061228e+02 1.521744876196477e+02 -2.811821020654221e+02 4 5.236823289131380e+02 5.104191572045557e+01 3.363745314254903e+02 3.981081248401691e+02 - ME 5.074216551061466e-05 + ME 1.645260485784409e-05 Event 77 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2668,7 +2668,7 @@ Event 77 Batch 1 2 3.781543446472003e+02 -5.926925448310480e+01 -1.775497893613220e+02 3.285786605157444e+02 3 6.702964816234122e+02 -6.066564226432872e+01 -1.057468051743550e+02 -6.591165802199176e+02 4 4.515491737293867e+02 1.199348967474336e+02 2.832965945356770e+02 3.305379197041734e+02 - ME 6.321080405055773e-05 + ME 5.041095643414513e-05 Event 78 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2676,7 +2676,7 @@ Event 78 Batch 1 2 4.564262045363139e+02 1.882572856930395e+02 1.751822011208171e+02 -3.770878823051468e+02 3 3.809544602625751e+02 -2.816334489555117e+02 1.992812047321844e+02 -1.615422627793184e+02 4 6.626193352011103e+02 9.337616326247226e+01 -3.744634058530013e+02 5.386301450844651e+02 - ME 2.572921643188974e-04 + ME 6.222463480998997e-05 Event 79 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2684,7 +2684,7 @@ Event 79 Batch 1 2 6.126536521478922e+02 6.075062399138452e+02 -4.178945028651393e+01 6.733726903166659e+01 3 2.872846052831658e+02 -1.084163947926161e+02 2.139961846825774e+01 2.651799127051085e+02 4 6.000617425689430e+02 -4.990898451212283e+02 2.038983181825616e+01 -3.325171817367756e+02 - ME 1.996659951821530e-03 + ME 6.289823950094716e-04 Event 80 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2692,7 +2692,7 @@ Event 80 Batch 1 2 4.171281258707700e+02 -2.756641813219371e+02 1.445082905894664e+01 3.127240094205691e+02 3 3.805235327384960e+02 -2.955852199231463e+02 2.395269588958384e+02 7.373784162959287e+00 4 7.023483413907342e+02 5.712494012450838e+02 -2.539777879547846e+02 -3.200977935835284e+02 - ME 1.297520069620947e-03 + ME 5.629434448779270e-04 Event 81 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2700,7 +2700,7 @@ Event 81 Batch 1 2 7.471091333863935e+02 -9.753029041192970e+01 7.407154559164039e+02 -7.162458282065091e-01 3 6.775352561453885e+02 9.550863422814814e+01 -6.702673865908516e+02 -2.595678293896889e+01 4 7.535561046821789e+01 2.021656183781575e+00 -7.044806932555213e+01 2.667302876717550e+01 - ME 1.022399816924924e-04 + ME 2.904529061551848e-05 Event 82 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2708,7 +2708,7 @@ Event 82 Batch 1 2 4.309094465924175e+02 3.042233433179616e+02 2.799835808203350e+02 -1.214096495919827e+02 3 5.540384887187945e+02 -4.824447657759213e+02 1.988969596446625e+02 1.861335391629672e+02 4 5.150520646887885e+02 1.782214224579596e+02 -4.788805404649973e+02 -6.472388957098450e+01 - ME 1.053635072607165e-04 + ME 1.778678120024833e-05 Event 83 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2716,7 +2716,7 @@ Event 83 Batch 1 2 4.869534474909295e+02 -4.727010820510885e+02 1.062322962656182e+02 4.890855018466118e+01 3 3.520990385354405e+02 -1.437544586613779e+02 -3.142298368411062e+02 6.758696761482639e+01 4 6.609475139736298e+02 6.164555407124665e+02 2.079975405754878e+02 -1.164955177994876e+02 - ME 2.998516055200512e-04 + ME 7.948516811691567e-05 Event 84 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2724,7 +2724,7 @@ Event 84 Batch 1 2 1.391975815431583e+01 -3.682657486111166e-01 -1.138840508663312e+01 -7.995516055627093e+00 3 7.493632094786751e+02 -3.452281541586202e+01 3.833012084573049e+02 6.429880080772211e+02 4 7.367170323670085e+02 3.489108116447313e+01 -3.719128033706718e+02 -6.349924920215940e+02 - ME 3.806217512266510e-01 + ME 8.671177508029917e-02 Event 85 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2732,7 +2732,7 @@ Event 85 Batch 1 2 7.362448947738020e+02 6.409220704967113e+02 3.243429451315054e+02 1.614840505254833e+02 3 1.517836214454495e+02 -1.266859291808411e+02 -6.780846852200752e+01 4.889738933094901e+01 4 6.119714837807480e+02 -5.142361413158706e+02 -2.565344766094980e+02 -2.103814398564324e+02 - ME 5.694785892689211e-04 + ME 1.062305495679385e-04 Event 86 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2740,7 +2740,7 @@ Event 86 Batch 1 2 5.451728369778392e+02 -6.605005893803180e+01 1.066920544886257e+02 -5.305352178712969e+02 3 3.158718592284829e+02 -1.755596039144849e+02 2.550395858012225e+02 6.251932981237656e+01 4 6.389553037936773e+02 2.416096628525165e+02 -3.617316402898481e+02 4.680158880589203e+02 - ME 1.469986179099727e-04 + ME 4.057626974930324e-05 Event 87 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2748,7 +2748,7 @@ Event 87 Batch 1 2 3.414211232216659e+02 1.437256906952883e+02 1.534640422371205e+02 -2.689983214749668e+02 3 5.081668091119999e+02 4.794742948200324e+02 -1.464748766741243e+02 8.296394996143997e+01 4 6.504120676663341e+02 -6.231999855153207e+02 -6.989165562996117e+00 1.860343715135268e+02 - ME 1.823135893899652e-04 + ME 3.656584417835253e-05 Event 88 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2756,7 +2756,7 @@ Event 88 Batch 1 2 2.925516585730864e+02 1.655911293372511e+01 2.598275245766865e+02 -1.334238591297045e+02 3 7.159840369510271e+02 -1.056844973272874e+02 -3.694097043713192e+02 6.041526284885822e+02 4 4.914643044758866e+02 8.912538439356234e+01 1.095821797946327e+02 -4.707287693588777e+02 - ME 8.728488941697977e-02 + ME 2.327745727475104e-03 Event 89 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2764,7 +2764,7 @@ Event 89 Batch 1 2 6.333634651097186e+02 1.209853522660007e+02 5.372166546881791e+02 -3.129058794565919e+02 3 6.221307427802806e+02 5.757192259699385e+01 -4.327483989541182e+02 4.432391657372765e+02 4 2.445057921100010e+02 -1.785572748629945e+02 -1.044682557340609e+02 -1.303332862806847e+02 - ME 5.497507832908574e-04 + ME 5.047204144927262e-05 Event 90 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2772,7 +2772,7 @@ Event 90 Batch 1 2 3.111538587406461e+02 2.628215106651484e+02 -6.985334981761831e+01 -1.512021390726355e+02 3 5.216486323898988e+02 1.252715366480781e+02 4.457714554600226e+02 -2.402335265468457e+02 4 6.671975088694549e+02 -3.880930473132266e+02 -3.759181056424042e+02 3.914356656194811e+02 - ME 2.329075524537458e-04 + ME 4.503542584588689e-05 Event 91 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2780,7 +2780,7 @@ Event 91 Batch 1 2 3.007803348469016e+02 8.390513937949677e+01 2.884042062049404e+02 -1.586667134655829e+01 3 6.256884422056424e+02 2.364580673743878e+02 -3.590826126759745e+02 -4.545693416378727e+02 4 5.735312229474563e+02 -3.203632067538847e+02 7.067840647103421e+01 4.704360129844310e+02 - ME 6.478111274774788e-05 + ME 2.635583378174906e-05 Event 92 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2788,7 +2788,7 @@ Event 92 Batch 1 2 6.843865618656529e+02 -2.264962467301474e+02 -5.909185329480341e+02 2.605757158639088e+02 3 6.645516272550811e+02 3.453347116263074e+02 4.983670680340538e+02 -2.720350487207341e+02 4 1.510618108792659e+02 -1.188384648961601e+02 9.255146491398015e+01 1.145933285682523e+01 - ME 9.365402433981294e-05 + ME 1.711437740567050e-05 Event 93 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2796,7 +2796,7 @@ Event 93 Batch 1 2 5.579763469381434e+02 2.180908585044468e+02 5.135246110359701e+02 8.151996049100932e+00 3 3.333821836060117e+02 1.681122988324202e+02 -1.261705574188212e+02 2.587719570738210e+02 4 6.086414694558448e+02 -3.862031573368670e+02 -3.873540536171486e+02 -2.669239531229223e+02 - ME 5.183695239236329e-04 + ME 1.157787815150910e-04 Event 94 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2804,7 +2804,7 @@ Event 94 Batch 1 2 4.534979734151987e+02 1.139662723650677e+02 2.686183171543304e+01 4.381216071501101e+02 3 3.856184698299744e+02 1.545134372854228e+02 -3.452526490806396e+02 7.501873282757614e+01 4 6.608835567548277e+02 -2.684797096504910e+02 3.183908173652065e+02 -5.131403399776862e+02 - ME 6.944325623628402e-03 + ME 1.545010233607317e-03 Event 95 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2812,7 +2812,7 @@ Event 95 Batch 1 2 2.828073115974175e+02 -5.711637476392460e+01 5.915078172645698e+01 -2.705898746219725e+02 3 6.809618671276158e+02 3.772100991821226e+02 3.247893528880094e+02 4.646864338535512e+02 4 5.362308212749670e+02 -3.200937244181981e+02 -3.839401346144663e+02 -1.940965592315787e+02 - ME 2.560512106670314e-04 + ME 6.408796328924562e-05 Event 96 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2820,7 +2820,7 @@ Event 96 Batch 1 2 4.639832102051440e+02 -4.275497908582962e+02 -1.317248975374901e+02 -1.230046627491649e+02 3 7.474114851375481e+02 6.594176555428718e+02 2.654537688070380e+02 2.309254864669502e+02 4 2.886053046573076e+02 -2.318678646845757e+02 -1.337288712695479e+02 -1.079208237177853e+02 - ME 2.440162169445852e-04 + ME 1.445191791082226e-05 Event 97 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2828,7 +2828,7 @@ Event 97 Batch 1 2 5.095921959312568e+02 3.190102848863560e+02 3.100341192456060e+02 2.485869851668986e+02 3 4.555541331018014e+02 -2.788120391899956e+02 2.221549471930723e+02 -2.836205112936887e+02 4 5.348536709669415e+02 -4.019824569636059e+01 -5.321890664386783e+02 3.503352612679014e+01 - ME 8.198891770965733e-05 + ME 2.250661525403011e-05 Event 98 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2836,7 +2836,7 @@ Event 98 Batch 1 2 5.299941952467790e+02 -2.570048161992350e+02 -4.630296380940593e+02 -2.111695271961878e+01 3 7.352146396921255e+02 2.361229278157243e+02 6.962552486063584e+02 3.893348873424185e+00 4 2.347911650610957e+02 2.088188838351074e+01 -2.332256105122990e+02 1.722360384619465e+01 - ME 6.760444392591968e-05 + ME 5.654417419793765e-06 Event 99 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2844,7 +2844,7 @@ Event 99 Batch 1 2 4.290897291078425e+02 3.747236205606835e+02 2.040795775432686e+02 -4.529602465443949e+01 3 6.438744429739487e+02 -5.215755139094103e+02 2.133414139578182e+01 3.769325350988583e+02 4 4.270358279182090e+02 1.468518933487271e+02 -2.254137189390505e+02 -3.316365104444187e+02 - ME 2.024851967866169e-03 + ME 8.457850707842401e-05 Event 100 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2852,7 +2852,7 @@ Event 100 Batch 1 2 5.119062275524872e+02 -4.721600394809319e+02 -1.845880136125884e+02 7.099400083769524e+01 3 4.523854579707449e+02 2.836789572262426e+02 -3.060214184981774e+02 -1.747276258374610e+02 4 5.357083144767672e+02 1.884810822546894e+02 4.906094321107658e+02 1.037336249997658e+02 - ME 6.898305006855298e-05 + ME 1.420495101373495e-05 Event 101 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2860,7 +2860,7 @@ Event 101 Batch 1 2 6.024072815192737e+02 -3.080418730730875e+02 -4.692284526425155e+02 2.186993289696520e+02 3 3.347434020484399e+02 8.940653726951260e+01 -3.939923552329941e+01 -3.201676381969582e+02 4 5.628493164322859e+02 2.186353358035749e+02 5.086276881658150e+02 1.014683092273061e+02 - ME 9.290725627447436e-05 + ME 2.743452031293993e-05 Event 102 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2868,7 +2868,7 @@ Event 102 Batch 1 2 5.910857738801296e+02 3.707548039128416e+02 -7.516477307090547e+01 -4.541734518311494e+02 3 2.311218706704979e+02 4.536804143672514e+01 -2.262982016400413e+02 1.217307902336991e+01 4 6.777923554493723e+02 -4.161228453495667e+02 3.014629747109467e+02 4.420003728077793e+02 - ME 2.633339755449651e-04 + ME 7.158169676479796e-05 Event 103 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2876,7 +2876,7 @@ Event 103 Batch 1 2 6.627949406417042e+02 7.189602123685950e+01 -6.391860825813610e+02 -1.599038689489492e+02 3 5.519979886399102e+02 1.442810582977179e+02 4.734454174874869e+02 2.444057944057306e+02 4 2.852070707183856e+02 -2.161770795345774e+02 1.657406650938741e+02 -8.450192545678139e+01 - ME 1.652798222861839e-04 + ME 1.658567428345252e-05 Event 104 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2884,7 +2884,7 @@ Event 104 Batch 1 2 4.368180791462563e+02 -3.483499330357901e+02 -2.596280064690262e+02 4.533935023690698e+01 3 4.635715977792429e+02 1.873023362819025e+02 -2.251347602994603e+02 -3.593477435519053e+02 4 5.996103230745010e+02 1.610475967538876e+02 4.847627667684865e+02 3.140083933149983e+02 - ME 9.158171748371188e-05 + ME 2.162124469235967e-05 Event 105 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2892,7 +2892,7 @@ Event 105 Batch 1 2 5.701708357490469e+02 2.288495716262106e+02 -4.521314661478370e+02 -2.613422905391967e+02 3 3.711008490497917e+02 -3.362590561223710e+02 -8.126001400906793e+01 1.343223639771668e+02 4 5.587283152011612e+02 1.074094844961603e+02 5.333914801569049e+02 1.270199265620299e+02 - ME 7.043372303967046e-05 + ME 1.720246557093887e-05 Event 106 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2900,7 +2900,7 @@ Event 106 Batch 1 2 6.775588183099673e+02 5.149765831731705e+02 3.445381345095063e+02 -2.741870619150275e+02 3 7.044100837534635e+02 -4.546975847980706e+02 -4.392260662935809e+02 3.106833358270535e+02 4 1.180310979365712e+02 -6.027899837509908e+01 9.468793178407486e+01 -3.649627391202603e+01 - ME 3.259673897057837e-04 + ME 2.786544600802367e-05 Event 107 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2908,7 +2908,7 @@ Event 107 Batch 1 2 6.046880513041550e+02 2.289413119004024e+02 -5.349774474143721e+02 -1.644160754103499e+02 3 3.366746442316215e+02 -7.166101576320902e+01 2.452245434825371e+01 3.280444544890399e+02 4 5.586373044642238e+02 -1.572802961371935e+02 5.104549930661184e+02 -1.636283790786902e+02 - ME 8.859556065170558e-04 + ME 4.667002706670146e-04 Event 108 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2916,7 +2916,7 @@ Event 108 Batch 1 2 6.239206451413978e+02 -2.218030564243363e+02 5.011455197099735e+02 -2.982172759400455e+02 3 2.841199272340513e+02 1.209406641294798e+02 7.967327320293104e+01 2.444374323800143e+02 4 5.919594276245514e+02 1.008623922948564e+02 -5.808187929129044e+02 5.377984356003120e+01 - ME 1.727643234936365e-04 + ME 7.961277501126149e-05 Event 109 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2924,7 +2924,7 @@ Event 109 Batch 1 2 3.093404598873124e+02 1.546999830656544e+02 1.629193992247174e+02 2.126421988200774e+02 3 5.287372542258961e+02 -2.136116696975048e+02 -1.865832176193536e+02 4.462284633214169e+02 4 6.619222858867909e+02 5.891168663185049e+01 2.366381839463621e+01 -6.588706621414941e+02 - ME 1.686695657867669e+01 + ME 2.902408960420708e-01 Event 110 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2932,7 +2932,7 @@ Event 110 Batch 1 2 4.920948406187608e+02 -8.595212543403569e+01 -4.824913009925944e+02 -4.440392734262522e+01 3 4.634042325716594e+02 -2.085760624772916e+00 1.255608851371819e+02 4.460645653843308e+02 4 5.445009268095798e+02 8.803788605880843e+01 3.569304158554124e+02 -4.016606380417056e+02 - ME 4.151412887207382e-03 + ME 1.043536440561108e-03 Event 111 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2940,7 +2940,7 @@ Event 111 Batch 1 2 4.637454700443120e+02 1.543048221589588e+02 -4.372769385391800e+02 6.225902899506631e+00 3 3.246747011850293e+02 -5.128652792678845e+01 -2.274142471268230e+02 2.259781269206006e+02 4 7.115798287706589e+02 -1.030182942321705e+02 6.646911856660031e+02 -2.322040298201072e+02 - ME 1.240833065187375e-03 + ME 5.219332617201280e-04 Event 112 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2948,7 +2948,7 @@ Event 112 Batch 1 2 6.923761777814550e+02 3.939190124845535e+02 4.398224952082178e+01 -5.676954684419625e+02 3 5.277418353503033e+02 -4.270527740856185e+02 4.970714905179168e+01 3.060499505927539e+02 4 2.798819868682421e+02 3.313376160106501e+01 -9.368939857261346e+01 2.616455178492087e+02 - ME 5.385735959435035e-05 + ME 4.381536575941429e-05 Event 113 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2956,7 +2956,7 @@ Event 113 Batch 1 2 7.174898838850694e+02 -6.130145063482008e+02 3.726797356942233e+02 1.071275347265524e+01 3 1.705115822510491e+02 3.993583199494100e+01 -1.624320619120163e+02 3.309311510932528e+01 4 6.119985338638814e+02 5.730786743532599e+02 -2.102476737822071e+02 -4.380586858198049e+01 - ME 2.197559713387976e-04 + ME 4.914674319256647e-05 Event 114 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2964,7 +2964,7 @@ Event 114 Batch 1 2 6.772826088252357e+02 -1.430288042596954e+02 -3.410390118171982e+02 5.674036356844296e+02 3 6.725037798358682e+02 3.626161999767239e+01 2.510744134018114e+02 -6.228226615527174e+02 4 1.502136113388951e+02 1.067671842620232e+02 8.996459841538707e+01 5.541902586828807e+01 - ME 8.926156406775035e-05 + ME 7.986648389935193e-05 Event 115 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2972,7 +2972,7 @@ Event 115 Batch 1 2 9.320551230331124e+01 1.288474310894606e+01 -2.581623869377880e+01 8.862715576190526e+01 3 6.672654287607164e+02 1.525114284892182e+02 2.829200767588875e+02 5.847560574856374e+02 4 7.395290589359720e+02 -1.653961715981643e+02 -2.571038380651088e+02 -6.733832132475428e+02 - ME 1.800237703627863e+00 + ME 4.304938165075599e-01 Event 116 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2980,7 +2980,7 @@ Event 116 Batch 1 2 4.951202926530015e+02 -4.575339943514647e+02 4.220102313368785e+01 1.844608951947751e+02 3 3.101750696753587e+02 -4.711582585559527e+01 2.172188132736168e+02 2.163438466008694e+02 4 6.947046376716394e+02 5.046498202070600e+02 -2.594198364073050e+02 -4.008047417956444e+02 - ME 1.933367100533606e-03 + ME 5.988625984136040e-04 Event 117 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2988,7 +2988,7 @@ Event 117 Batch 1 2 6.543248494478489e+02 1.390926466871539e+02 9.107024539473488e+01 6.328510524967589e+02 3 5.040443237953712e+02 6.874740772121054e+01 1.336336536624387e+02 -4.811200690999848e+02 4 3.416308267567792e+02 -2.078400544083643e+02 -2.247038990571737e+02 -1.517309833967742e+02 - ME 4.207453923038474e-04 + ME 3.026560085299302e-04 Event 118 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -2996,7 +2996,7 @@ Event 118 Batch 1 2 5.829230400014206e+02 5.307803371482089e+02 -3.192285892796672e+01 2.388565162167381e+02 3 3.965113090906140e+02 -5.470249758902820e+01 2.256187790844517e+02 -3.214420966810604e+02 4 5.205656509079653e+02 -4.760778395591807e+02 -1.936959201564850e+02 8.258558046432242e+01 - ME 7.464562943747175e-05 + ME 2.168340782914014e-05 Event 119 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3004,7 +3004,7 @@ Event 119 Batch 1 2 3.549567073991255e+02 2.281637891139605e+02 1.474502150787006e+02 2.284600261271838e+02 3 4.727085372220640e+02 7.463684946128350e+01 -3.092948822053327e+02 3.495988811576870e+02 4 6.723347553788102e+02 -3.028006385752440e+02 1.618446671266322e+02 -5.780589072848707e+02 - ME 1.455012849105755e-02 + ME 1.664672733965846e-03 Event 120 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3012,7 +3012,7 @@ Event 120 Batch 1 2 7.192117275853698e+02 4.094232477570927e+02 -5.552624156333899e+02 -2.032775518283800e+02 3 3.685061529232585e+02 -2.522084621786424e+02 1.741347663658646e+02 2.046087962197375e+02 4 4.122821194913712e+02 -1.572147855784500e+02 3.811276492675253e+02 -1.331244391357209e+00 - ME 9.281995463485567e-05 + ME 1.900262756274459e-05 Event 121 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3020,7 +3020,7 @@ Event 121 Batch 1 2 1.923953846467517e+02 -5.182078839520096e+01 -1.486351786617837e+02 -1.106262789198433e+02 3 6.582127150877787e+02 -3.509182841037630e+02 -1.191939510078701e+02 5.439606035624541e+02 4 6.493919002654695e+02 4.027390724989639e+02 2.678291296696539e+02 -4.333343246426108e+02 - ME 1.925188892577692e-03 + ME 5.360055113881300e-04 Event 122 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3028,7 +3028,7 @@ Event 122 Batch 1 2 6.905732817636248e+02 3.462508192534570e+02 -5.375670569609784e+02 -2.608131264380775e+02 3 7.097575386120018e+02 -2.677396278645660e+02 5.849221766424142e+02 2.998954860604125e+02 4 9.966917962437387e+01 -7.851119138889094e+01 -4.735511968143584e+01 -3.908235962233509e+01 - ME 5.007312135859238e-04 + ME 3.451011759976180e-05 Event 123 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3036,7 +3036,7 @@ Event 123 Batch 1 2 4.035126033432560e+02 2.481103298242076e+01 -3.878573016343356e+02 -1.085059780294573e+02 3 3.541388771651666e+02 1.572344474048876e+02 -3.105653677404273e+02 -6.512161875550808e+01 4 7.423485194915780e+02 -1.820454803873083e+02 6.984226693747627e+02 1.736275967849660e+02 - ME 2.043564129780385e-02 + ME 3.471230489499830e-03 Event 124 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3044,7 +3044,7 @@ Event 124 Batch 1 2 5.353042728143347e+02 -4.785252055946481e+02 -2.279396245170433e+02 7.488537693644093e+01 3 7.454081943698113e+02 6.785307544150930e+02 3.069354144183444e+02 -3.193811081429426e+01 4 2.192875328158541e+02 -2.000055488204448e+02 -7.899578990130104e+01 -4.294726612214667e+01 - ME 1.399009675490331e-04 + ME 6.765427234678898e-06 Event 125 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3052,7 +3052,7 @@ Event 125 Batch 1 2 7.351681880566981e+02 -1.932492970253984e+01 -4.393064933429818e+02 -5.891592456452273e+02 3 6.537497908129355e+02 -2.883189353576726e+01 3.454898907503182e+02 5.542510679217788e+02 4 1.110820211303664e+02 4.815682323830688e+01 9.381660259266363e+01 3.490817772344844e+01 - ME 1.431077255619906e-04 + ME 6.639428548470109e-05 Event 126 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3060,7 +3060,7 @@ Event 126 Batch 1 2 5.568747108147126e+02 1.149185667256990e+02 4.264979152236775e+02 -3.391204725116689e+02 3 6.934211462641822e+02 -1.939160042589616e+02 -6.294239612595663e+02 2.169215212257340e+02 4 2.497041429211053e+02 7.899743753326281e+01 2.029260460358889e+02 1.221989512859350e+02 - ME 3.344185566612618e-05 + ME 9.143592130512915e-06 Event 127 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3068,7 +3068,7 @@ Event 127 Batch 1 2 7.108931196972316e+02 4.270547743949553e+02 5.664613189451065e+02 -4.598718776252147e+01 3 4.445675167124290e+02 -1.247884466860518e+02 -4.129475031266345e+02 1.074359351009545e+02 4 3.445393635903407e+02 -3.022663277089035e+02 -1.535138158184720e+02 -6.144874733843321e+01 - ME 1.180920695556687e-04 + ME 1.427738327825488e-05 Event 128 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3076,7 +3076,7 @@ Event 128 Batch 1 2 5.312407894292422e+02 -7.192118124205533e+01 -4.398126160332176e+02 -2.891521793453568e+02 3 5.717192413787027e+02 3.434745903572437e+02 1.811915566412192e+02 4.195923218357252e+02 4 3.970399691920551e+02 -2.715534091151883e+02 2.586210593919984e+02 -1.304401424903685e+02 - ME 1.848006274423395e-04 + ME 3.532660248239223e-05 Event 129 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3084,7 +3084,7 @@ Event 129 Batch 1 2 6.644129951428383e+02 -3.595672586482287e+02 4.645590915434784e+02 3.103882489514914e+02 3 1.967652372382455e+02 -5.204943416929049e+01 8.794498000645085e+00 -1.895522930301724e+02 4 6.388217676189169e+02 4.116166928175192e+02 -4.733535895441232e+02 -1.208359559213191e+02 - ME 3.082956717278722e-04 + ME 9.192558188476414e-05 Event 130 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3092,7 +3092,7 @@ Event 130 Batch 1 2 7.302263990443511e+02 -1.919590472356484e+02 3.836584700935805e+02 -5.909217345563752e+02 3 4.156541164903923e+02 2.203243106780774e+02 -1.767969453775071e+02 3.049071707664833e+02 4 3.541194844652567e+02 -2.836526344242890e+01 -2.068615247160734e+02 2.860145637898919e+02 - ME 3.110012368642411e-05 + ME 2.258971422042701e-05 Event 131 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3100,7 +3100,7 @@ Event 131 Batch 1 2 2.308323688168238e+02 -1.780469473698228e+02 1.469011263880862e+02 1.710582294195638e+00 3 7.308075033948297e+02 5.219262643529272e+02 -3.840435213624620e+02 3.379099810545737e+02 4 5.383601277883465e+02 -3.438793169831044e+02 2.371423949743758e+02 -3.396205633487694e+02 - ME 1.061667055612532e-03 + ME 7.770640764079256e-05 Event 132 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3108,7 +3108,7 @@ Event 132 Batch 1 2 5.909630762789660e+02 -4.293852116769707e+02 -3.988922148105424e+02 7.583335995300355e+01 3 5.415993952096327e+02 2.260703809971038e+02 3.221145619770360e+02 -3.721079100067703e+02 4 3.674375285114020e+02 2.033148306798666e+02 7.677765283350686e+01 2.962745500537670e+02 - ME 3.321676569401813e-05 + ME 1.628447412544396e-05 Event 133 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3116,7 +3116,7 @@ Event 133 Batch 1 2 4.506052863582997e+02 2.189991325227701e+02 -3.914006430783634e+02 -4.347459771134355e+01 3 4.043998006859111e+02 3.160348074769272e+02 8.738893432792010e+01 2.366946839598570e+02 4 6.449949129557901e+02 -5.350339399996973e+02 3.040117087504433e+02 -1.932200862485142e+02 - ME 3.121497332919934e-04 + ME 8.705579101282482e-05 Event 134 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3124,7 +3124,7 @@ Event 134 Batch 1 2 7.151470882937614e+02 -1.041377497037516e+01 -4.186394096729767e+01 7.138447461686595e+02 3 3.416424731356660e+02 1.638631808685801e+02 3.081581136487586e+01 -2.981925940995343e+02 4 4.432104385705719e+02 -1.534494058982047e+02 1.104812960242199e+01 -4.156521520691248e+02 - ME 5.534325530265236e-02 + ME 6.342792451335309e-03 Event 135 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3132,7 +3132,7 @@ Event 135 Batch 1 2 7.115730144432832e+02 -3.219296530898238e+02 2.184242454110169e+02 -5.958089478700319e+02 3 1.627059459894212e+02 -6.880794311551747e+01 -3.259803939022061e+01 1.437917231708342e+02 4 6.257210395672955e+02 3.907375962053413e+02 -1.858262060207963e+02 4.520172246991979e+02 - ME 2.112989182930814e-04 + ME 1.277979532321233e-04 Event 136 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3140,7 +3140,7 @@ Event 136 Batch 1 2 7.195404287114588e+02 -4.369992732083461e+02 -4.270318019286997e+02 3.800182941743402e+02 3 6.668605996318223e+02 3.634158794560479e+02 4.690430049045651e+02 -3.043527845290675e+02 4 1.135989716567186e+02 7.358339375229815e+01 -4.201120297586535e+01 -7.566550964527264e+01 - ME 1.804344388349211e-03 + ME 7.515399240093053e-05 Event 137 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3148,7 +3148,7 @@ Event 137 Batch 1 2 6.722782806744999e+02 -6.045581260407005e+02 -2.538460778300668e+02 1.484241478840623e+02 3 6.869263774705689e+02 6.661257235671316e+02 1.481819739565761e+02 -7.865412297735662e+01 4 1.407953418549304e+02 -6.156759752643097e+01 1.056641038734908e+02 -6.977002490670534e+01 - ME 5.192812231664224e-04 + ME 2.119149330726453e-05 Event 138 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3156,7 +3156,7 @@ Event 138 Batch 1 2 6.463287544295633e+02 8.684709774942756e+01 2.409249839962013e+02 -5.934253049048401e+02 3 3.917330799270068e+02 1.767690441671677e+02 4.696120064017492e+01 3.464132742372293e+02 4 4.619381656434300e+02 -2.636161419165952e+02 -2.878861846363762e+02 2.470120306676108e+02 - ME 5.804753959762886e-05 + ME 4.203806696206548e-05 Event 139 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3164,7 +3164,7 @@ Event 139 Batch 1 2 2.994802063237944e+02 -1.272876183039153e+02 6.552211336810879e+00 2.710042891410713e+02 3 7.257546970836092e+02 -8.848613612326799e+00 5.127896146768584e+00 -7.256826352181574e+02 4 4.747650965925943e+02 1.361362319162416e+02 -1.168010748357900e+01 4.546783460770868e+02 - ME 1.724196014694060e-04 + ME 1.500396153249019e-04 Event 140 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3172,7 +3172,7 @@ Event 140 Batch 1 2 7.326756101999780e+02 5.655005379385240e+02 4.343799907428446e+02 1.683351270988810e+02 3 7.428339005597779e+02 -5.680473426214219e+02 -4.534832054058505e+02 -1.532233754243464e+02 4 2.449048924024402e+01 2.546804682897962e+00 1.910321466300584e+01 -1.511175167453447e+01 - ME 4.669436438173466e-03 + ME 1.024603362434272e-04 Event 141 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3180,7 +3180,7 @@ Event 141 Batch 1 2 7.363238871411332e+02 -6.772722174663238e+02 -2.824373475598683e+02 -6.086341204880675e+01 3 5.504260535970963e+02 4.650298533191528e+02 2.914345410616540e+02 4.221355560271704e+01 4 2.132500592617708e+02 2.122423641471711e+02 -8.997193501785816e+00 1.864985644608987e+01 - ME 7.300791864660033e-05 + ME 1.166401869382226e-05 Event 142 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3188,7 +3188,7 @@ Event 142 Batch 1 2 5.862280565156834e+02 4.248793793115829e+01 -2.479279504752411e+02 -5.295184989682986e+02 3 4.287264749982929e+02 -3.025296967755320e+02 2.785471849307642e+02 1.212173201341831e+02 4 4.850454684860405e+02 2.600417588443628e+02 -3.061923445551928e+01 4.083011788341197e+02 - ME 4.569028399965169e-05 + ME 1.949810022878841e-05 Event 143 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3196,7 +3196,7 @@ Event 143 Batch 1 2 2.464531733710510e+02 4.046044690030688e+01 -2.103865804466287e+02 1.218179201483223e+02 3 5.378449948854583e+02 4.607829603950880e+02 -2.747641700963839e+02 3.822241180409925e+01 4 7.157018317434903e+02 -5.012434072953949e+02 4.851507505430126e+02 -1.600403319524219e+02 - ME 1.284493741497843e-03 + ME 4.863434295951330e-04 Event 144 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3204,7 +3204,7 @@ Event 144 Batch 1 2 5.367418008803521e+02 -1.343004856786532e+02 -4.048537736989352e+02 -3.258044847458254e+02 3 6.294877130859599e+02 3.313530054622211e+02 5.282137272543231e+02 8.631468610520756e+01 4 3.337704860336884e+02 -1.970525197835678e+02 -1.233599535553879e+02 2.394897986406179e+02 - ME 2.612855607885159e-05 + ME 8.754930746282009e-06 Event 145 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3212,7 +3212,7 @@ Event 145 Batch 1 2 6.805380148481771e+01 -3.411514819754512e+01 -4.339750646760406e+01 -3.980116822894492e+01 3 6.831461500979880e+02 -3.834019790669201e+02 -2.756424954453614e+02 -4.936727656514237e+02 4 7.488000484171945e+02 4.175171272644653e+02 3.190400019129655e+02 5.334739338803686e+02 - ME 4.832444287218038e-01 + ME 4.117012994651258e-01 Event 146 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3220,7 +3220,7 @@ Event 146 Batch 1 2 5.031746658797123e+02 4.202301876294930e+02 2.767377273314875e+02 2.750283520766640e+00 3 4.317115817339341e+02 -1.098088257924671e+02 -5.455162180567243e+01 4.139336083717602e+02 4 5.651137523863538e+02 -3.104213618370259e+02 -2.221861055258150e+02 -4.166838918925268e+02 - ME 4.446377084117306e-03 + ME 1.122040831263755e-03 Event 147 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3228,7 +3228,7 @@ Event 147 Batch 1 2 4.251223043705630e+02 -4.223502783198938e+02 -4.694338569631599e+01 1.206377286808446e+01 3 5.457819748703678e+02 2.791608945230574e+02 -4.384138579515959e+02 -1.665546403390879e+02 4 5.290957207590696e+02 1.431893837968364e+02 4.853572436479118e+02 1.544908674710035e+02 - ME 5.820013407126093e-05 + ME 1.117959404473985e-05 Event 148 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3236,7 +3236,7 @@ Event 148 Batch 1 2 6.905785821272525e+02 6.249608768654489e+02 -6.243387159972350e+01 -2.870970082698929e+02 3 1.361638260920089e+02 2.862044352088506e+01 1.704210379179796e+01 1.320266050727362e+02 4 6.732575917807402e+02 -6.535813203863343e+02 4.539176780792534e+01 1.550704031971573e+02 - ME 9.573948308169230e-04 + ME 5.047601105033982e-04 Event 149 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3244,7 +3244,7 @@ Event 149 Batch 1 2 6.694705528096943e+02 -5.216497821741067e+02 -3.785079074709545e+02 1.811189935345937e+02 3 2.821401257551277e+02 1.148500354702071e-01 2.786662494166578e+02 -4.413795199872407e+01 4 5.483893214351779e+02 5.215349321386365e+02 9.984165805429673e+01 -1.369810415358697e+02 - ME 1.943324414096923e-04 + ME 3.486097449584098e-05 Event 150 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3252,7 +3252,7 @@ Event 150 Batch 1 2 4.637486188995366e+02 -4.033412855298819e+02 -2.279949807412008e+02 -1.992178895453991e+01 3 3.756800751656199e+02 6.230662615514293e+01 -2.632310737913946e+02 -2.606967683041707e+02 4 6.605713059348438e+02 3.410346593747391e+02 4.912260545325952e+02 2.806185572587107e+02 - ME 2.156945366470290e-04 + ME 4.211370643652993e-05 Event 151 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3260,7 +3260,7 @@ Event 151 Batch 1 2 3.821954355913596e+02 -2.528320044280690e+02 2.861764538722267e+02 1.588602445142563e+01 3 6.796189325418250e+02 2.911670128135291e+02 -4.900375979142738e+02 3.700902818893582e+02 4 4.381856318668152e+02 -3.833500838546018e+01 2.038611440420471e+02 -3.859763063407838e+02 - ME 8.197229841786387e-03 + ME 1.923941526207248e-04 Event 152 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3268,7 +3268,7 @@ Event 152 Batch 1 2 6.751133298339792e+02 -2.999578895043981e+02 -2.855974213275218e+02 -5.331391803034741e+02 3 4.976977783498468e+02 -3.003988119418482e+00 1.843802943840355e+02 4.622747685874795e+02 4 3.271888918161745e+02 3.029618776238166e+02 1.012171269434863e+02 7.086441171599445e+01 - ME 1.204579535049519e-04 + ME 6.977738125195056e-05 Event 153 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3276,7 +3276,7 @@ Event 153 Batch 1 2 1.729293620257127e+02 1.558357805102956e+02 -7.193392860849491e+01 2.110174585940510e+01 3 6.524550819255464e+02 2.410158908712478e+02 5.786677971610501e+02 1.809766692333240e+02 4 6.746155560487412e+02 -3.968516713815435e+02 -5.067338685525552e+02 -2.020784150927291e+02 - ME 5.985591428637023e-04 + ME 1.391654510317005e-04 Event 154 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3284,7 +3284,7 @@ Event 154 Batch 1 2 6.585658455851002e+02 -2.410305357139302e+02 -2.116446673272157e+02 -5.751693564652295e+02 3 5.764400833248005e+02 3.388133979948972e+02 3.092747322371399e+02 3.490527051926400e+02 4 2.649940710900988e+02 -9.778286228096688e+01 -9.763006490992416e+01 2.261166512725894e+02 - ME 3.655181799213059e-05 + ME 2.686434432328395e-05 Event 155 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3292,7 +3292,7 @@ Event 155 Batch 1 2 5.686586231936359e+02 -1.693366246265498e+02 -1.542203680657918e+02 5.204938187588979e+02 3 1.882190564276536e+02 -1.089234770645493e+02 -9.145416397064866e+01 1.232810822434430e+02 4 7.431223203787102e+02 2.782601016910992e+02 2.456745320364404e+02 -6.437749010023409e+02 - ME 6.696396361607482e-01 + ME 4.701119881405690e-01 Event 156 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3300,7 +3300,7 @@ Event 156 Batch 1 2 6.143652095725128e+02 2.879464601546110e+02 5.379391909976823e+02 -7.178351904348040e+01 3 6.287751645293085e+02 -4.584164185734781e+02 -4.225140875260598e+02 -8.181956094447702e+01 4 2.568596258981782e+02 1.704699584188668e+02 -1.154251034716223e+02 1.536030799879581e+02 - ME 2.899571701789112e-05 + ME 7.769660148731367e-06 Event 157 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3308,7 +3308,7 @@ Event 157 Batch 1 2 5.050842109798973e+02 4.185498850973046e+02 -1.305174306570672e+02 -2.507812875014723e+02 3 5.170424494038050e+02 -3.084595065654854e+02 3.930456446728388e+02 -1.330441599566699e+02 4 4.778733396162975e+02 -1.100903785318191e+02 -2.625282140157716e+02 3.838254474581424e+02 - ME 4.033251359625283e-05 + ME 1.243977993100618e-05 Event 158 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3316,7 +3316,7 @@ Event 158 Batch 1 2 4.312542366204098e+02 -3.114503370626313e+02 2.737030704635235e+02 1.185982013584742e+02 3 6.944315393047829e+02 2.166643175309468e+02 -6.173965008138002e+02 -2.326226495269423e+02 4 3.743142240748070e+02 9.478601953168439e+01 3.436934303502764e+02 1.140244481684682e+02 - ME 3.680357310121394e-05 + ME 5.864250821924803e-06 Event 159 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3324,7 +3324,7 @@ Event 159 Batch 1 2 5.860112473308646e+02 -1.581297551692178e+02 4.935632758462007e+02 2.734948907463652e+02 3 3.772013313646349e+02 -2.371132827856262e+02 -1.305099443644436e+02 -2.627266448837395e+02 4 5.367874213045002e+02 3.952430379548442e+02 -3.630533314817573e+02 -1.076824586262577e+01 - ME 1.030382455754272e-04 + ME 2.805189658646002e-05 Event 160 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3332,7 +3332,7 @@ Event 160 Batch 1 2 5.883409724804535e+02 -3.739819298758817e+02 -2.887651121595530e+02 3.505671490956299e+02 3 4.300332553173178e+02 1.788055146224819e+02 3.829208006453583e+02 7.955406370837679e+01 4 4.816257722022287e+02 1.951764152533999e+02 -9.415568848580530e+01 -4.301212128040066e+02 - ME 9.797271586219467e-03 + ME 2.307516153071828e-04 Event 161 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3340,7 +3340,7 @@ Event 161 Batch 1 2 6.868305165969147e+02 4.119610488151656e+00 5.515184990814985e+02 4.093244831537709e+02 3 3.260821955312833e+02 -1.956999890649130e+02 -2.483451099187458e+02 -7.972338993006402e+01 4 4.870872878718022e+02 1.915803785767614e+02 -3.031733891627526e+02 -3.296010932237070e+02 - ME 1.075603053132144e-03 + ME 9.860610555787331e-05 Event 162 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3348,7 +3348,7 @@ Event 162 Batch 1 2 2.159818802305119e+02 -2.018126805027919e+02 4.096951387107715e+01 -6.512536763314942e+01 3 6.870078865581224e+02 4.896730732821633e+02 -2.356527215298929e+02 -4.203188222421333e+02 4 5.970102332113654e+02 -2.878603927793715e+02 1.946832076588156e+02 4.854441898752826e+02 - ME 5.344822454174306e-05 + ME 2.809071549115161e-05 Event 163 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3356,7 +3356,7 @@ Event 163 Batch 1 2 4.889699854403287e+02 -4.067839821807834e+01 -2.740835242435768e+02 4.028835269878222e+02 3 4.282392920294498e+02 4.007468150560176e+02 -8.832740907173851e+01 -1.224301852772270e+02 4 5.827907225302220e+02 -3.600684168379390e+02 3.624109333153153e+02 -2.804533417105952e+02 - ME 4.336231422638298e-04 + ME 1.173701793303044e-04 Event 164 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3364,7 +3364,7 @@ Event 164 Batch 1 2 6.224346677404150e+02 -1.282049393554146e+02 5.480608628970117e+02 -2.657399098565701e+02 3 7.444531740822750e+02 1.794330131141779e+02 -6.708967511266460e+02 2.681638893170603e+02 4 1.331121581773107e+02 -5.122807375876333e+01 1.228358882296343e+02 -2.423979460490191e+00 - ME 1.368953177788070e-04 + ME 1.571413941583783e-05 Event 165 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3372,7 +3372,7 @@ Event 165 Batch 1 2 6.980339706506675e+02 -5.154669325341684e+01 -4.947847840614098e+02 4.896757907618869e+02 3 1.362964882116331e+02 4.252532371924361e+01 -5.641238783031591e+01 -1.165588780002596e+02 4 6.656695411377010e+02 9.021369534174053e+00 5.511971718917263e+02 -3.731169127616273e+02 - ME 1.450267418906797e-03 + ME 4.238311927693088e-04 Event 166 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3380,7 +3380,7 @@ Event 166 Batch 1 2 3.060640747281171e+02 -1.981167412190918e+02 -9.095380261170779e+01 -2.148310510107333e+02 3 5.580104478575086e+02 -3.585720992432471e+02 -1.558095186186280e+02 3.981521109704927e+02 4 6.359254774143739e+02 5.566888404623389e+02 2.467633212303362e+02 -1.833210599597597e+02 - ME 3.000804338470548e-04 + ME 1.099447007687216e-04 Event 167 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3388,7 +3388,7 @@ Event 167 Batch 1 2 2.833153623322893e+02 2.526850217013923e+02 8.687924899084067e+01 9.417998957332070e+01 3 6.595685044563415e+02 -8.780626893611850e+01 -2.875856231737449e+02 -5.870393347553995e+02 4 5.571161332113688e+02 -1.648787527652738e+02 2.007063741829043e+02 4.928593451820789e+02 - ME 7.367447958524992e-05 + ME 4.244421486768831e-05 Event 168 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3396,7 +3396,7 @@ Event 168 Batch 1 2 6.026267479353969e+02 -5.987968578530475e+02 5.775180228477150e+00 6.758674164241529e+01 3 4.991211680715713e+02 3.812575567959843e+02 3.220701575873951e+02 -5.952259631185711e+00 4 3.982520839930309e+02 2.175393010570631e+02 -3.278453378158730e+02 -6.163448201122968e+01 - ME 9.606399998327532e-05 + ME 1.203107058680061e-05 Event 169 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3404,7 +3404,7 @@ Event 169 Batch 1 2 5.510662376679772e+02 -9.251111075413947e+01 -5.291920243323356e+02 -1.227660134875281e+02 3 5.034535790022877e+02 -2.816014265681677e+02 3.283802195198170e+02 2.575511098657944e+02 4 4.454801833297348e+02 3.741125373223072e+02 2.008118048125185e+02 -1.347850963782663e+02 - ME 1.532484123791625e-04 + ME 2.085195230877358e-05 Event 170 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3412,7 +3412,7 @@ Event 170 Batch 1 2 2.814808559369750e+02 3.658097943502287e+01 -1.412301634042880e+02 -2.407225480659935e+02 3 6.646522150540470e+02 2.753499086551696e+02 -1.631412967142655e+02 5.825203104495404e+02 4 5.538669290089779e+02 -3.119308880901926e+02 3.043714601185535e+02 -3.417977623835468e+02 - ME 7.823510217753851e-04 + ME 2.587160315460459e-04 Event 171 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3420,7 +3420,7 @@ Event 171 Batch 1 2 1.777965289077954e+02 -6.143496808852239e+01 -1.603735842336773e+00 1.668375809551635e+02 3 7.439290290569696e+02 2.163074211412066e+01 -1.907051550939623e+01 -7.433699124308462e+02 4 5.782744420352348e+02 3.980422597440174e+01 2.067425135173305e+01 5.765323314756826e+02 - ME 2.063755640794395e-03 + ME 1.981167274383509e-03 Event 172 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3428,7 +3428,7 @@ Event 172 Batch 1 2 1.369499454750680e+02 -1.250080331667568e+01 -3.518152151649629e+01 -1.317622025690455e+02 3 6.692885586315896e+02 -2.346283187163472e+02 -6.130705295376303e+02 1.305421486874673e+02 4 6.937614958933425e+02 2.471291220330227e+02 6.482520510541266e+02 1.220053881578238e+00 - ME 5.039586079692636e-04 + ME 1.548169060571347e-04 Event 173 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3436,7 +3436,7 @@ Event 173 Batch 1 2 7.088772083623137e+02 4.973951266878932e+01 3.171232495758680e+01 -7.064185769505260e+02 3 5.785136264307895e+02 8.584813303397833e+01 5.766505028397120e+01 5.691949191590089e+02 4 2.126091652068944e+02 -1.355876457027672e+02 -8.937737524155732e+01 1.372236577915166e+02 - ME 1.743760900867476e-04 + ME 1.732961413682620e-04 Event 174 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3444,7 +3444,7 @@ Event 174 Batch 1 2 4.367208701713482e+02 -3.923163287174704e+01 4.325755195957351e+02 -4.543585887727652e+01 3 3.528978856725088e+02 9.622572295106905e+01 1.987077746703234e+02 -2.753048278549415e+02 4 7.103812441561454e+02 -5.699409007932221e+01 -6.312832942660567e+02 3.207406867322186e+02 - ME 9.353677491192390e-04 + ME 1.541208918572365e-04 Event 175 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3452,7 +3452,7 @@ Event 175 Batch 1 2 6.418562164876806e+02 1.962785648722137e+02 -6.110736372974047e+02 -6.567908015856712e+00 3 4.843421844702149e+02 -1.886631806266161e+02 3.569879071908527e+02 -2.674942804112337e+02 4 3.738015990421035e+02 -7.615384245597569e+00 2.540857301065516e+02 2.740621884270906e+02 - ME 3.029111560812189e-05 + ME 1.279055979705581e-05 Event 176 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3460,7 +3460,7 @@ Event 176 Batch 1 2 6.288652703123263e+02 4.005522031116294e+02 3.691482793515075e+02 3.142594606996526e+02 3 7.209127580467475e+02 -4.124575135572966e+02 -5.165298058232565e+02 -2.877341896975221e+02 4 1.502219716409257e+02 1.190531044566666e+01 1.473815264717492e+02 -2.652527100213051e+01 - ME 1.719274466020296e-04 + ME 1.300720357566141e-05 Event 177 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3468,7 +3468,7 @@ Event 177 Batch 1 2 4.716578040000077e+02 -4.521622645932388e+02 -1.012739918234145e+01 1.338200520767543e+02 3 3.021382980750606e+02 -2.714821202364266e+02 6.773215888881064e+01 -1.140059832109250e+02 4 7.262038979249317e+02 7.236443848296653e+02 -5.760475970646905e+01 -1.981406886582933e+01 - ME 2.354271252348000e-03 + ME 6.442260552556652e-04 Event 178 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3476,7 +3476,7 @@ Event 178 Batch 1 2 7.350088877399502e+02 -3.684484945749095e+02 -2.561732769425163e+02 -5.821159885132296e+02 3 1.415495174310248e+02 7.181268644032879e+01 1.095010133995263e+02 5.374692563910759e+01 4 6.234415948290248e+02 2.966358081345808e+02 1.466722635429900e+02 5.283690628741219e+02 - ME 1.035408980291912e-04 + ME 6.828487731379645e-05 Event 179 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3484,7 +3484,7 @@ Event 179 Batch 1 2 7.426064621425413e+02 6.748632301344054e+01 7.201624948975951e+02 -1.681544967131679e+02 3 5.821031882499326e+02 8.394276920418550e-01 -5.588194474899291e+02 1.629854049874919e+02 4 1.752903496075256e+02 -6.832575070548241e+01 -1.613430474076661e+02 5.169091725675888e+00 - ME 9.197132478706931e-05 + ME 1.412410550503903e-05 Event 180 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3492,7 +3492,7 @@ Event 180 Batch 1 2 6.099515195485484e+02 2.272495331206023e+02 1.762692760011278e+02 -5.378918555193875e+02 3 5.718889655176699e+02 4.324570510796980e+01 -3.278409766521432e+02 4.665909256493895e+02 4 3.181595149337819e+02 -2.704952382285720e+02 1.515717006510154e+02 7.130092986999803e+01 - ME 5.401477812349802e-05 + ME 3.043963963928669e-05 Event 181 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3500,7 +3500,7 @@ Event 181 Batch 1 2 1.206370886915177e+02 -8.151225636567759e+01 1.767749325039422e+01 8.715827822142556e+01 3 6.451493408002739e+02 -6.748216257939080e+01 4.373428479320614e+02 4.694625256943417e+02 4 7.342135705082084e+02 1.489944189450684e+02 -4.550203411824557e+02 -5.566208039157672e+02 - ME 7.131653341377736e-02 + ME 2.625479922313071e-02 Event 182 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3508,7 +3508,7 @@ Event 182 Batch 1 2 4.626866082364760e+02 -3.084610429505738e+02 3.306629079434072e+02 9.794245113140897e+01 3 4.974966719253473e+02 3.582955998671217e+02 1.664640547097976e+02 -3.023523113558579e+02 4 5.398167198381765e+02 -4.983455691654795e+01 -4.971269626532048e+02 2.044098602244489e+02 - ME 5.959042767905828e-05 + ME 1.414799589613471e-05 Event 183 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3516,7 +3516,7 @@ Event 183 Batch 1 2 3.304723045950491e+02 3.244647182058462e+00 3.209425641774955e+02 7.872284845075714e+01 3 4.379804819457451e+02 2.312428523500660e+02 3.131807483468383e+02 2.006775141049615e+02 4 7.315472134592065e+02 -2.344874995321247e+02 -6.341233125243344e+02 -2.794003625557186e+02 - ME 4.899988668912175e-03 + ME 2.330806393221907e-03 Event 184 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3524,7 +3524,7 @@ Event 184 Batch 1 2 7.470051035005908e+02 -4.953964753944513e+02 -4.028924750569613e+02 3.876552725878485e+02 3 2.183325716323390e+02 1.119040172022777e+02 1.451703047217021e+02 -1.186262424448778e+02 4 5.346623248670695e+02 3.834924581921736e+02 2.577221703352594e+02 -2.690290301429710e+02 - ME 5.441344453720516e-04 + ME 7.987999480474686e-05 Event 185 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3532,7 +3532,7 @@ Event 185 Batch 1 2 4.448583927494090e+02 2.810173563272025e+02 -3.384637477435971e+02 6.610995769032235e+01 3 6.236443795626774e+02 -1.690803760724666e+02 5.125139620028374e+02 3.125277225134823e+02 4 4.314972276879136e+02 -1.119369802547359e+02 -1.740502142592404e+02 -3.786376802038046e+02 - ME 6.949230823829164e-03 + ME 1.405605442011058e-04 Event 186 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3540,7 +3540,7 @@ Event 186 Batch 1 2 6.802792190696962e+02 -1.681815241656754e+02 5.427923640013703e+02 3.739936368565512e+02 3 6.331554869749547e+02 3.172201723440435e+02 -4.588808692389625e+02 -2.994755095011972e+02 4 1.865652939553488e+02 -1.490386481783679e+02 -8.391149476240778e+01 -7.451812735535422e+01 - ME 3.276943053321406e-04 + ME 3.045129627255903e-05 Event 187 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3548,7 +3548,7 @@ Event 187 Batch 1 2 7.472897115267965e+02 -6.988402471604775e+02 -2.391684329048669e+02 1.134137672609268e+02 3 6.826908170748527e+02 6.328852277257668e+02 2.212839847556716e+02 -1.286718241709738e+02 4 7.001947139835140e+01 6.595501943471052e+01 1.788444814919547e+01 1.525805691004725e+01 - ME 1.461490870437387e-04 + ME 3.485925693242860e-05 Event 188 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3556,7 +3556,7 @@ Event 188 Batch 1 2 6.496068877140275e+02 -5.024316730938291e+02 -3.980061777252906e+02 -1.055585379310702e+02 3 4.885976180718368e+02 4.424928723138696e+02 1.459942636040002e+02 -1.470148473169288e+02 4 3.617954942141354e+02 5.993880077995960e+01 2.520119141212904e+02 2.525733852479991e+02 - ME 2.843805826594158e-05 + ME 1.006519408431335e-05 Event 189 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3564,7 +3564,7 @@ Event 189 Batch 1 2 4.082379946778654e+02 2.679237131173331e+02 -7.718184435750955e+01 2.981913934867987e+02 3 5.864211573889181e+02 -5.780822197382728e+02 -6.394893886953379e+01 7.497502433004084e+01 4 5.053408479332167e+02 3.101585066209396e+02 1.411307832270433e+02 -3.731664178168398e+02 - ME 1.937644878671120e-03 + ME 1.322787627040098e-04 Event 190 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3572,7 +3572,7 @@ Event 190 Batch 1 2 6.472516823166364e+02 6.463779961822676e+02 -3.289365889632791e+01 6.945035458816692e+00 3 4.318767277050750e+02 -3.286790725415815e+02 -7.183748821760624e+00 -2.800642229191639e+02 4 4.208715899782885e+02 -3.176989236406859e+02 4.007740771808847e+01 2.731191874603472e+02 - ME 3.409584379294133e-05 + ME 1.272332211942340e-05 Event 191 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3580,7 +3580,7 @@ Event 191 Batch 1 2 6.757500036387052e+02 6.222744522021635e+02 -2.261571472854044e+02 1.351499844096745e+02 3 3.644673602666567e+02 -2.020102809038697e+02 1.114149692296405e+02 -2.821613151026251e+02 4 4.597826360946380e+02 -4.202641712982938e+02 1.147421780557637e+02 1.470113306929507e+02 - ME 5.389305783035389e-05 + ME 1.560703181590231e-05 Event 192 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3588,7 +3588,7 @@ Event 192 Batch 1 2 7.394562478491531e+02 -7.307873850878615e+02 3.988568028534699e+01 1.056147375500683e+02 3 8.098058518630978e+01 5.419286926826393e+01 4.244928426361276e+00 -6.002473390399248e+01 4 6.795631669645365e+02 6.765945158195976e+02 -4.413060871170821e+01 -4.559000364607596e+01 - ME 4.204295748489254e-04 + ME 1.231033846344155e-04 Event 193 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3596,7 +3596,7 @@ Event 193 Batch 1 2 5.607395612273153e+02 -3.164229781907934e+02 -3.517992386171808e+02 -3.009030576558548e+02 3 3.741643617741927e+02 -2.156271676189966e+02 1.666697084176705e+02 2.563690747778811e+02 4 5.650960769984922e+02 5.320501458097899e+02 1.851295301995104e+02 4.453398287797368e+01 - ME 9.141090879934244e-05 + ME 3.026844143728605e-05 Event 194 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3604,7 +3604,7 @@ Event 194 Batch 1 2 5.729373416862012e+02 -2.155045544874616e+02 -1.679805246197324e+02 5.035846779262559e+02 3 2.831035485618876e+02 -2.543279085173982e+02 1.042261812492671e+02 -6.783684323208054e+01 4 6.439591097519118e+02 4.698324630048598e+02 6.375434337046515e+01 -4.357478346941756e+02 - ME 1.781231321893996e-03 + ME 5.497724763810379e-04 Event 195 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3612,7 +3612,7 @@ Event 195 Batch 1 2 5.572874060171201e+02 -5.433144409127298e+02 3.646295232533866e+01 1.185290019729285e+02 3 6.765845568040619e+02 5.574999049241243e+02 -1.212989803269169e+01 -3.831623469093195e+02 4 2.661280371788181e+02 -1.418546401139455e+01 -2.433305429264712e+01 2.646333449363910e+02 - ME 3.395618115588225e-04 + ME 3.378534889977447e-04 Event 196 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3620,7 +3620,7 @@ Event 196 Batch 1 2 5.405888343305829e+02 3.940239871950471e+02 -8.826690628749978e+01 -3.594305754554688e+02 3 6.983754392688073e+02 -3.888370902622853e+02 -5.513072771506098e+01 5.774898910559966e+02 4 2.610357264006097e+02 -5.186896932761887e+00 1.433976340025607e+02 -2.180593156005277e+02 - ME 5.539073969003598e-03 + ME 2.676929502290073e-04 Event 197 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3628,7 +3628,7 @@ Event 197 Batch 1 2 2.783346334111661e+02 2.282410890438732e+02 -1.474467226896361e+02 6.029624695020830e+01 3 6.434654504578666e+02 1.172104173128919e+01 6.205939438823057e+02 1.696277097949658e+02 4 5.781999161309674e+02 -2.399621307751624e+02 -4.731472211926695e+02 -2.299239567451741e+02 - ME 3.321087064690878e-04 + ME 4.280180350752636e-05 Event 198 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3636,7 +3636,7 @@ Event 198 Batch 1 2 4.349536439683943e+02 1.774777254208009e+02 -9.709992209949135e+01 3.850427697141142e+02 3 4.134500153047116e+02 7.095914770071803e+01 -4.041194890923881e+02 -5.092301099466194e+01 4 6.515963407268921e+02 -2.484368731215197e+02 5.012194111918782e+02 -3.341197587194521e+02 - ME 7.849443582399766e-04 + ME 2.926862112764983e-04 Event 199 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3644,7 +3644,7 @@ Event 199 Batch 1 2 6.682109290882580e+02 2.136897997740939e+02 -5.035763266519416e+02 3.837361052354048e+02 3 1.424120473397155e+02 8.952788458880865e+01 -4.686863299276860e+01 -1.003458038481504e+02 4 6.893770235720265e+02 -3.032176843629025e+02 5.504449596447103e+02 -2.833903013872543e+02 - ME 1.167594898598604e-03 + ME 4.183851150998592e-04 Event 200 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3652,7 +3652,7 @@ Event 200 Batch 1 2 5.959952693237885e+02 -4.878566955018547e+02 -2.510837703973929e+01 -3.414319479966339e+02 3 4.479637599869168e+02 4.499951041477978e+01 7.146287716862105e+01 4.399313940955211e+02 4 4.560409706892941e+02 4.428571850870749e+02 -4.635450012888173e+01 -9.849944609888662e+01 - ME 5.545496796633981e-04 + ME 3.228844805909175e-04 Event 201 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3660,7 +3660,7 @@ Event 201 Batch 1 2 5.203096708642927e+02 -1.112696379946441e+02 1.367824427202020e+02 4.895219960522141e+02 3 2.871951825199399e+02 -2.582762312778227e+02 1.200876310962787e+02 3.678888524092984e+01 4 6.924951466157675e+02 3.695458692724667e+02 -2.568700738164807e+02 -5.263108812931440e+02 - ME 6.577575910850049e-03 + ME 2.285182473348715e-03 Event 202 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3668,7 +3668,7 @@ Event 202 Batch 1 2 2.158792376054218e+02 2.112389782008981e+01 -7.195062193526132e+01 -2.024369881546198e+02 3 5.463652944256570e+02 2.787950008966254e+02 -3.108926376755554e+02 -3.523267663221479e+02 4 7.377554679689213e+02 -2.999188987167153e+02 3.828432596108168e+02 5.547637544767679e+02 - ME 8.695282964050810e-03 + ME 1.952686275320307e-03 Event 203 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3676,7 +3676,7 @@ Event 203 Batch 1 2 7.124273471334275e+02 4.879265047129839e+02 -1.059167473143779e+02 -5.081949365946950e+02 3 6.746108110440506e+02 -5.248642991835990e+02 4.352799102536777e+01 4.215714978711400e+02 4 1.129618418225217e+02 3.693779447061509e+01 6.238875628901040e+01 8.662343872355494e+01 - ME 5.361938367485652e-05 + ME 4.211918129012132e-05 Event 204 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3684,7 +3684,7 @@ Event 204 Batch 1 2 7.084787759842808e+02 4.992472551829619e+02 -4.528122431715626e+02 -2.183012291454193e+02 3 1.034373169902747e+02 -8.959882065299325e+01 -3.938861547415055e+01 -3.346441176487074e+01 4 6.880839070254444e+02 -4.096484345299685e+02 4.922008586457131e+02 2.517656409102901e+02 - ME 2.988048706021647e-04 + ME 1.033102023766027e-04 Event 205 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3692,7 +3692,7 @@ Event 205 Batch 1 2 6.496569846879349e+02 -5.869603795046561e+02 -2.345911576090251e+02 1.499956646614410e+02 3 2.543878192344406e+02 -1.851019090219859e+00 2.474675926596849e+02 -5.890268997594536e+01 4 5.959551960776247e+02 5.888113985948760e+02 -1.287643505065981e+01 -9.109297468549572e+01 - ME 1.871447246980874e-04 + ME 4.134215827558992e-05 Event 206 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3700,7 +3700,7 @@ Event 206 Batch 1 2 6.172060642836410e+02 2.978040691523503e+02 4.166709400833434e+02 3.444435946201744e+02 3 7.205754982426181e+02 -2.468045809177361e+02 -5.690387091428452e+02 -3.667580878490107e+02 4 1.622184374737409e+02 -5.099948823461420e+01 1.523677690595017e+02 2.231449322883641e+01 - ME 7.356489425273393e-05 + ME 1.138691716042452e-05 Event 207 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3708,7 +3708,7 @@ Event 207 Batch 1 2 5.250113096394139e+02 -1.091977068802181e+02 -4.322753509449321e+02 2.772196909074646e+02 3 5.240251005653129e+02 3.541948269240045e+02 3.738549241960732e+02 9.685466564450643e+01 4 4.509635897952731e+02 -2.449971200437864e+02 5.842042674885889e+01 -3.740743565519710e+02 - ME 3.378615964480245e-03 + ME 9.518274156960593e-05 Event 208 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3716,7 +3716,7 @@ Event 208 Batch 1 2 4.449444343820048e+02 1.928662436733418e+02 -3.595193210859464e+02 1.775500478872298e+02 3 4.894053462810564e+02 -2.195789585225567e+02 2.295326432211599e+02 3.723136307450180e+02 4 5.656502193369389e+02 2.671271484921488e+01 1.299866778647865e+02 -5.498636786322478e+02 - ME 2.068943926258950e-01 + ME 2.179806976662403e-03 Event 209 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3724,7 +3724,7 @@ Event 209 Batch 1 2 4.949423498078044e+02 -2.830370809537592e+02 -1.684680620467476e+02 -3.694271951395289e+02 3 6.326444171345161e+02 3.898538983719823e+02 -1.748162179498052e+02 4.665749526039372e+02 4 3.724132330576786e+02 -1.068168174182231e+02 3.432842799965525e+02 -9.714775746440780e+01 - ME 1.473942246791387e-04 + ME 3.638076645868775e-05 Event 210 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3732,7 +3732,7 @@ Event 210 Batch 1 2 5.469464199121014e+02 -4.947084169679945e+02 2.319240083666633e+02 -2.500445517953792e+01 3 2.929141603572806e+02 -5.602902696925145e+01 2.099470855189298e+01 2.867379913571110e+02 4 6.601394197306178e+02 5.507374439372461e+02 -2.529187169185561e+02 -2.617335361775729e+02 - ME 1.577330101330874e-03 + ME 7.792286450853471e-04 Event 211 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3740,7 +3740,7 @@ Event 211 Batch 1 2 5.484404249965427e+02 1.659778109685243e+01 3.514591842057613e+02 -4.206992456262192e+02 3 4.635537606517395e+02 -3.607884938122542e+02 -3.140996451540818e+01 2.893564685231623e+02 4 4.880058143517181e+02 3.441907127154018e+02 -3.200492196903532e+02 1.313427771030569e+02 - ME 4.999214184618137e-05 + ME 1.717788621912363e-05 Event 212 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3748,7 +3748,7 @@ Event 212 Batch 1 2 6.930853388432640e+02 -3.424793196872474e+02 -8.152110066892747e+01 5.970171795281683e+02 3 9.131624224772825e+01 6.738328155058525e+01 1.365968298972706e+01 6.009627714210347e+01 4 7.155984189090078e+02 2.750960381366621e+02 6.786141767920034e+01 -6.571134566702718e+02 - ME 3.224436999651524e-01 + ME 4.440767413899675e-02 Event 213 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3756,7 +3756,7 @@ Event 213 Batch 1 2 7.316448870278512e+02 4.203233031264803e+02 4.913598772661251e+02 -3.423419819067778e+02 3 4.750162603483208e+02 -1.726357548525294e+02 -3.708603862154638e+02 2.414537588813190e+02 4 2.933388526238279e+02 -2.476875482739507e+02 -1.204994910506614e+02 1.008882230254589e+02 - ME 4.008080891216109e-05 + ME 1.166473784051930e-05 Event 214 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3764,7 +3764,7 @@ Event 214 Batch 1 2 4.805779599533694e+02 3.904513572450257e+02 -1.742898429406511e+02 2.193763065287195e+02 3 6.164938851206517e+02 -5.563771061772993e+02 2.227142270499353e+02 1.445946028815716e+02 4 4.029281549259790e+02 1.659257489322735e+02 -4.842438410928419e+01 -3.639709094102910e+02 - ME 1.130096726278085e-02 + ME 1.644694060635318e-04 Event 215 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3772,7 +3772,7 @@ Event 215 Batch 1 2 4.610896439725640e+02 -3.106576460930037e+02 -3.050258363865880e+02 -1.518378274323046e+02 3 7.153470686812809e+02 2.726436938726979e+02 6.046054769368644e+02 2.680280994976061e+02 4 3.235632873461531e+02 3.801395222030658e+01 -2.995796405502758e+02 -1.161902720653026e+02 - ME 2.130646114222361e-04 + ME 1.638803663744001e-05 Event 216 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3780,7 +3780,7 @@ Event 216 Batch 1 2 5.309452696424389e+02 -4.912950836090372e+02 -3.608909251460832e+01 -1.980646298023531e+02 3 6.627369363365399e+02 4.479096066616000e+02 2.308759280187052e+02 4.304573578259469e+02 4 3.063177940210212e+02 4.338547694743724e+01 -1.947868355040969e+02 -2.323927280235938e+02 - ME 1.881406502208647e-03 + ME 7.684209531203918e-05 Event 217 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3788,7 +3788,7 @@ Event 217 Batch 1 2 4.608032244164870e+02 2.215832851737383e+02 3.318832460795877e+02 -2.304212888079594e+02 3 3.107022283044695e+02 -4.724697178681157e+01 2.830528592337836e+02 -1.190994425256424e+02 4 7.284945472790432e+02 -1.743363133869267e+02 -6.149361053133712e+02 3.495207313336019e+02 - ME 2.894775763457067e-03 + ME 4.426756984161849e-04 Event 218 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3796,7 +3796,7 @@ Event 218 Batch 1 2 6.336891602166270e+02 5.249943224110900e+02 1.648031440577737e+02 -3.142973702098814e+02 3 5.195346944320743e+02 -3.655895580768890e+02 -3.610279413409480e+02 7.693763263116504e+01 4 3.467761453512956e+02 -1.594047643342018e+02 1.962247972831736e+02 2.373597375787177e+02 - ME 2.703962034458943e-05 + ME 8.957256945094420e-06 Event 219 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3804,7 +3804,7 @@ Event 219 Batch 1 2 2.579228498517417e+02 -4.166553381892272e+01 1.191899344508913e+02 2.249042891828000e+02 3 7.453266221408651e+02 -3.354388163550532e+01 -3.947818065141064e+02 -6.312954196904914e+02 4 4.967505280073930e+02 7.520941545442813e+01 2.755918720632151e+02 4.063911305076915e+02 - ME 6.103184694489295e-05 + ME 4.019449398167179e-05 Event 220 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3812,7 +3812,7 @@ Event 220 Batch 1 2 4.940336288355577e+02 -2.383755021420815e+02 -2.918661661143953e+02 3.194690712363630e+02 3 7.129224521449780e+02 2.727447507998269e+02 2.535039959962389e+02 -6.079510240944473e+02 4 2.930439190194635e+02 -3.436924865774512e+01 3.836217011815621e+01 2.884819528580837e+02 - ME 1.761519882509421e-04 + ME 1.677977866215262e-04 Event 221 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3820,7 +3820,7 @@ Event 221 Batch 1 2 3.305414381337777e+02 -2.712796684963201e+02 -1.199910663213094e+02 -1.458325333632650e+02 3 7.388441803280767e+02 5.510455284380058e+02 4.375213740715825e+02 2.254209298704556e+02 4 4.306143815381457e+02 -2.797658599416856e+02 -3.175303077502730e+02 -7.958839650719051e+01 - ME 1.338118621913618e-04 + ME 1.392897982206581e-05 Event 222 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3828,7 +3828,7 @@ Event 222 Batch 1 2 4.657562074797755e+02 2.823280548971349e+02 2.956503281023745e+02 2.231828795335844e+02 3 4.791948192186352e+02 -3.228825926298714e+02 2.575611801233854e+02 -2.429747818931873e+02 4 5.550489733015891e+02 4.055453773273638e+01 -5.532115082257600e+02 1.979190235960287e+01 - ME 9.040551632672907e-05 + ME 2.328731171682892e-05 Event 223 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3836,7 +3836,7 @@ Event 223 Batch 1 2 1.612164685986321e+02 -4.527922182271191e+01 -1.095260585492910e+01 1.543391792239740e+02 3 6.984218503485876e+02 -4.629950983513680e+02 2.605715575888556e+02 -4.533553609726805e+02 4 6.403616810527805e+02 5.082743201740799e+02 -2.496189517339264e+02 2.990161817487066e+02 - ME 4.148580235863498e-04 + ME 2.446487784841432e-04 Event 224 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3844,7 +3844,7 @@ Event 224 Batch 1 2 1.663853414671972e+02 -1.350882138037309e+02 9.706071747767010e+01 3.804401292344658e+00 3 6.436745581417563e+02 -4.469273298203079e+02 -4.412749113764766e+02 -1.408877256838118e+02 4 6.899401003910457e+02 5.820155436240389e+02 3.442141938988058e+02 1.370833243914657e+02 - ME 3.449215697364171e-04 + ME 9.431632941984795e-05 Event 225 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3852,7 +3852,7 @@ Event 225 Batch 1 2 6.702356777533546e+02 6.117158080352369e+02 -2.649249521350114e+02 -6.952987609335720e+01 3 6.901224376513153e+02 -6.564819557015361e+02 1.560869289536550e+02 1.446972404640001e+02 4 1.396418845953297e+02 4.476614766629927e+01 1.088380231813564e+02 -7.516736437064299e+01 - ME 6.407468428023662e-04 + ME 2.456039108263569e-05 Event 226 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3860,7 +3860,7 @@ Event 226 Batch 1 2 7.307777643673112e+02 -4.569648094661606e+02 4.416236342013199e+02 -3.608155616351098e+02 3 1.446420186345137e+02 4.133161435221925e+01 -3.411742569426914e+01 1.343466131828505e+02 4 6.245802169981752e+02 4.156331951139413e+02 -4.075062085070508e+02 2.264689484522593e+02 - ME 4.858390443010437e-04 + ME 2.774761612267077e-04 Event 227 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3868,7 +3868,7 @@ Event 227 Batch 1 2 7.408615397889290e+02 -4.398089081634772e+02 -5.325812259979131e+02 2.679574278743413e+02 3 4.035753807128123e+02 3.000971513323747e+02 2.468113220276344e+02 -1.090823496201683e+02 4 3.555630794982585e+02 1.397117568311025e+02 2.857699039702786e+02 -1.588750782541728e+02 - ME 3.215647103618368e-04 + ME 3.077346064218035e-05 Event 228 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3876,7 +3876,7 @@ Event 228 Batch 1 2 5.775455372723294e+02 -3.656199842755111e+02 -6.289501053880601e+01 4.426342647953073e+02 3 3.247306314578497e+02 8.776645762339835e+01 3.116872137482897e+02 2.445634292125525e+01 4 5.977238312698206e+02 2.778535266521127e+02 -2.487922032094836e+02 -4.670906077165625e+02 - ME 3.156934429573604e-03 + ME 3.399241079583280e-04 Event 229 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3884,7 +3884,7 @@ Event 229 Batch 1 2 3.665477125629453e+02 -2.081014917770363e+02 2.317985113364040e+02 -1.931850016112187e+02 3 6.187040836990479e+02 -2.134593092471877e+02 -3.484367286517815e+02 4.645661552545953e+02 4 5.147482037380067e+02 4.215608010242241e+02 1.166382173153775e+02 -2.713811536433765e+02 - ME 4.392210547845218e-04 + ME 8.330968691049859e-05 Event 230 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3892,7 +3892,7 @@ Event 230 Batch 1 2 5.913978529013565e+02 -4.986092821675885e+02 -3.028328044703767e+02 9.712104143419764e+01 3 3.439186614041002e+02 -6.573524045766426e+01 3.216488491089061e+02 -1.024741025375549e+02 4 5.646834856945436e+02 5.643445226252528e+02 -1.881604463852933e+01 5.353061103357447e+00 - ME 1.067159092411647e-04 + ME 2.296146042402505e-05 Event 231 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3900,7 +3900,7 @@ Event 231 Batch 1 2 5.760768557894827e+02 -7.075794524290799e+01 5.609870884449791e+02 1.102331327656218e+02 3 6.038619762337338e+02 -2.467027894308989e+02 -5.464177649873398e+02 -7.221250677108812e+01 4 3.200611679767834e+02 3.174607346738069e+02 -1.456932345763944e+01 -3.802062599453370e+01 - ME 8.750887998909065e-05 + ME 9.438631267217403e-06 Event 232 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3908,7 +3908,7 @@ Event 232 Batch 1 2 7.230187249684843e+02 -2.426041066061352e+02 1.884455685697195e+02 -6.545132479937492e+02 3 4.821326920133732e+02 2.438648429837413e+02 -1.563760752388986e+01 4.156168142598493e+02 4 2.948485830181424e+02 -1.260736377606032e+00 -1.728079610458298e+02 2.388964337338999e+02 - ME 4.549716999825542e-05 + ME 3.745272037455064e-05 Event 233 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3916,7 +3916,7 @@ Event 233 Batch 1 2 3.540260977608100e+02 -1.904526694678991e+02 -1.042089619355360e+02 -2.796475475319170e+02 3 4.925592302096041e+02 1.195034224421750e+02 3.554637678715695e+02 -3.193415679485398e+02 4 6.534146720295859e+02 7.094924702572415e+01 -2.512548059360335e+02 5.989891154804569e+02 - ME 2.494643034161164e-04 + ME 1.035644942794080e-04 Event 234 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3924,7 +3924,7 @@ Event 234 Batch 1 2 1.866526101194276e+02 7.776953530733704e+01 -1.047503781897390e+01 1.693557493124073e+02 3 6.012752698516817e+02 5.974840035795012e+02 -4.570329760029643e+01 4.955829083294186e+01 4 7.120721200288899e+02 -6.752535388868379e+02 5.617833541927040e+01 -2.189140401453492e+02 - ME 2.154454342135980e-03 + ME 6.655948749153013e-04 Event 235 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3932,7 +3932,7 @@ Event 235 Batch 1 2 5.032945404607945e+02 1.612889276925247e+02 2.561838854094329e+02 -4.020710050699558e+02 3 7.153634726767370e+02 -3.739069589148947e+02 -1.979140468542061e+02 5.768609140624169e+02 4 2.813419868624690e+02 2.126180312223700e+02 -5.826983855522722e+01 -1.747899089924609e+02 - ME 8.184939555880423e-04 + ME 1.137471703441233e-04 Event 236 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3940,7 +3940,7 @@ Event 236 Batch 1 2 6.980797829886610e+02 -9.803971882836288e+00 4.740144261428889e+02 5.123764137440797e+02 3 5.519387921056282e+02 -1.638876688381594e+02 -3.209728652821290e+02 -4.180355032606608e+02 4 2.499814249057108e+02 1.736916407209956e+02 -1.530415608607599e+02 -9.434091048341891e+01 - ME 2.813360227943072e-04 + ME 5.842524801707843e-05 Event 237 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3948,7 +3948,7 @@ Event 237 Batch 1 2 1.604490925133743e+02 6.212857081252698e+01 9.075394990141041e+01 1.168232534834160e+02 3 6.578242662283152e+02 5.348507070161563e+02 -3.810396531957998e+02 3.842224792439630e+01 4 6.817266412583107e+02 -5.969792778286832e+02 2.902857032943894e+02 -1.552455014078122e+02 - ME 8.205069948818567e-04 + ME 1.834055676127939e-04 Event 238 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3956,7 +3956,7 @@ Event 238 Batch 1 2 2.789018340499539e+02 1.069933592962543e+02 -2.572713415352736e+02 1.225197647611563e+01 3 4.761759619803052e+02 7.755191627191856e+01 -4.591043622469822e+02 -9.976187456245104e+01 4 7.449222039697408e+02 -1.845452755681728e+02 7.163757037822556e+02 8.750989808633538e+01 - ME 4.130258343824905e-02 + ME 9.445005309896021e-03 Event 239 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3964,7 +3964,7 @@ Event 239 Batch 1 2 4.581461811054764e+02 -3.899520773556200e+02 2.006122777919944e+02 1.326273524830990e+02 3 3.013476461129690e+02 -2.996604136348060e+02 3.145663680794619e+01 4.951799549362093e+00 4 7.405061727815548e+02 6.896124909904260e+02 -2.320689145999406e+02 -1.375791520324611e+02 - ME 1.351152256907066e-02 + ME 4.970363634614722e-03 Event 240 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3972,7 +3972,7 @@ Event 240 Batch 1 2 5.932490652975304e+02 -4.094504138983958e+01 -3.300190662632461e+02 4.912793227530680e+02 3 3.147487537014150e+02 3.081803657249563e+02 4.097350029662016e+01 -4.912038692507519e+01 4 5.920021810010543e+02 -2.672353243351168e+02 2.890455659666260e+02 -4.421589358279927e+02 - ME 2.300291351402201e-03 + ME 3.420638167820422e-04 Event 241 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3980,7 +3980,7 @@ Event 241 Batch 1 2 4.438703186026563e+01 1.425431959717181e+01 -4.430288595443099e+00 -4.180186016371768e+01 3 7.139617398095604e+02 -8.415544716076485e+01 -5.657765076565163e+02 -4.272659242311072e+02 4 7.416512283301737e+02 6.990112756359306e+01 5.702067962519594e+02 4.690677843948249e+02 - ME 9.657825758456334e-03 + ME 9.983667466725972e-03 Event 242 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3988,7 +3988,7 @@ Event 242 Batch 1 2 3.798759956195423e+02 -1.259218082844715e+02 -3.429343473884153e+02 1.041417477651927e+02 3 6.208895880511435e+02 5.354328139337265e+02 1.248673426784089e+02 -2.884852319370315e+02 4 4.992344163293142e+02 -4.095110056492549e+02 2.180670047100064e+02 1.843434841718389e+02 - ME 4.523810239016752e-05 + ME 1.030886114253601e-05 Event 243 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -3996,7 +3996,7 @@ Event 243 Batch 1 2 2.320641800899440e+02 1.658639294991472e+02 7.783463994856535e+01 1.424243988788334e+02 3 6.251485586341132e+02 -2.328139095298017e+02 -4.262931976140131e+02 3.935511574875350e+02 4 6.427872612759426e+02 6.694998003065477e+01 3.484585576654476e+02 -5.359755563663684e+02 - ME 1.068434238404496e-02 + ME 8.493072129055412e-04 Event 244 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4004,7 +4004,7 @@ Event 244 Batch 1 2 6.609991843787810e+02 -2.293678857540617e+02 -4.971623496474938e+02 -3.703240376037023e+02 3 1.091403980947070e+02 1.154537470975927e+01 -9.115666825632124e+00 -1.081445118228680e+02 4 7.298604175265119e+02 2.178225110443025e+02 5.062780164731259e+02 4.784685494265703e+02 - ME 2.129811247265830e-03 + ME 9.635755455313371e-04 Event 245 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4012,7 +4012,7 @@ Event 245 Batch 1 2 4.893629130846664e+02 -3.546974954177181e+02 3.112856868655738e+02 -1.294873298810978e+02 3 7.129026631852477e+02 5.703735458058533e+02 -4.257115617679147e+02 -4.091322034012423e+01 4 2.977344237300874e+02 -2.156760503881352e+02 1.144258749023406e+02 1.704005502212233e+02 - ME 2.548352504440589e-05 + ME 5.312368446054512e-06 Event 246 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4020,7 +4020,7 @@ Event 246 Batch 1 2 3.999457395350199e+02 9.605025124341067e+01 9.072234098128430e+01 3.774922524438975e+02 3 3.675469088581873e+02 -1.615841482674670e+01 2.570183669846762e+02 2.622426259669196e+02 4 7.325073516067924e+02 -7.989183641666393e+01 -3.477407079659604e+02 -6.397348784108170e+02 - ME 1.294421983622042e-01 + ME 5.023802198964801e-02 Event 247 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4028,7 +4028,7 @@ Event 247 Batch 1 2 6.711864521923226e+02 3.763073240556692e+02 5.338170415278108e+02 1.546719678644905e+02 3 5.231557804938882e+02 -1.057595517177888e+02 -5.121603131388773e+02 -1.409615302513522e+01 4 3.056577673137891e+02 -2.705477723378804e+02 -2.165672838893370e+01 -1.405758148393554e+02 - ME 2.873345328272106e-04 + ME 1.980507958825256e-05 Event 248 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4036,7 +4036,7 @@ Event 248 Batch 1 2 6.307803946875938e+02 -6.240065811552291e+01 -3.654556314590158e+02 5.103256270499047e+02 3 3.935347424219227e+02 -2.188782290807617e+02 2.916853933646314e+01 -3.257470040392325e+02 4 4.756848628904837e+02 2.812788871962847e+02 3.362870921225527e+02 -1.845786230106721e+02 - ME 2.418190194667681e-04 + ME 8.712398839363553e-05 Event 249 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4044,7 +4044,7 @@ Event 249 Batch 1 2 4.326970760901858e+02 -4.070406664121577e+02 -1.467447404863359e+02 3.261392852829594e+00 3 4.839435229991528e+02 2.335311811831339e+01 2.018595963184923e+02 -4.392136936630267e+02 4 5.833594009106607e+02 3.836875482938447e+02 -5.511485583215654e+01 4.359523008101972e+02 - ME 8.354140201035124e-05 + ME 2.487145538635957e-05 Event 250 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4052,7 +4052,7 @@ Event 250 Batch 1 2 7.010671671345858e+02 -6.122994886156980e+02 -2.473946684860857e+02 2.353303785738851e+02 3 5.574643785654457e+02 3.902114201641945e+02 2.260985614407801e+02 -3.276904354069721e+02 4 2.414684542999681e+02 2.220880684515034e+02 2.129610704530562e+01 9.236005683308701e+01 - ME 4.704118057291807e-05 + ME 1.645582299148298e-05 Event 251 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4060,7 +4060,7 @@ Event 251 Batch 1 2 7.364006127103795e+02 5.379960890463808e+02 4.302640987755426e+02 2.602285070392761e+02 3 3.051282143252570e+01 -2.901685968644106e+00 1.337962970917706e+01 -2.726899336532026e+01 4 7.330865658570956e+02 -5.350944030777371e+02 -4.436437284847198e+02 -2.329595136739561e+02 - ME 8.340546584740779e-03 + ME 6.389613086136084e-03 Event 252 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4068,7 +4068,7 @@ Event 252 Batch 1 2 5.965625584838610e+02 -7.369842915522101e+01 -5.671364104158780e+02 -1.697401534860145e+02 3 6.549338760881149e+02 -1.514014639568436e+02 6.313240788068730e+02 8.628954906696529e+01 4 2.485035654280235e+02 2.250998931120648e+02 -6.418766839099484e+01 8.345060441904938e+01 - ME 3.985162011735342e-05 + ME 7.225550854378042e-06 Event 253 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4076,7 +4076,7 @@ Event 253 Batch 1 2 5.728678540484714e+02 3.212236187283236e+01 -4.622666283104808e+02 -3.368312580807653e+02 3 7.160302400837320e+02 1.132435775281999e+02 5.206369974620781e+02 4.783433011307397e+02 4 2.111019058677967e+02 -1.453659394010323e+02 -5.837036915159722e+01 -1.415120430499744e+02 - ME 1.248429186447426e-03 + ME 7.499676590470843e-05 Event 254 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4084,7 +4084,7 @@ Event 254 Batch 1 2 5.579357369440610e+02 1.333150067790222e+02 -6.785864805882139e+01 5.375077668373273e+02 3 6.202682598689536e+02 -4.039338689731095e+02 2.012068793592834e+02 -4.255419314189536e+02 4 3.217960031869852e+02 2.706188621940872e+02 -1.333482313004621e+02 -1.119658354183736e+02 - ME 6.088720978226072e-04 + ME 2.226893396847405e-04 Event 255 Batch 1 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 @@ -4092,5 +4092,5 @@ Event 255 Batch 1 2 7.263612771087843e+02 3.396063850675520e+02 -6.401091575508393e+02 5.028393902637355e+01 3 1.540578578981475e+02 -3.080387127739228e+01 1.060177193258910e+02 -1.074485378375538e+02 4 6.195808649930684e+02 -3.088025137901597e+02 5.340914382249483e+02 5.716459881118030e+01 - ME 1.547064591142216e-04 + ME 4.003666322732326e-05 From 73a5f23d86a65471cc4b67ee8c8a3cdcd40ee9b2 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 17:25:32 +0200 Subject: [PATCH 042/119] [oct23av] in CODEGEN/generateAndCompare.sh, remove py3_model.pkl during patchMad.sh execution, and add it to gitignore --- epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh | 1 + epochX/cudacpp/CODEGEN/generateAndCompare.sh | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh index 7eeb4162e5..ffd18bf8e8 100755 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh @@ -63,6 +63,7 @@ if [ "${tmadmode}" != "0" ]; then \rm -f ${dir}/bin/internal/run_plot* \rm -f ${dir}/HTML/* \rm -rf ${dir}/bin/internal/__pycache__ + \rm -rf ${dir}/bin/internal/ufomodel/py3_model.pkl \rm -rf ${dir}/bin/internal/ufomodel/__pycache__ touch ${dir}/HTML/.keep # new file fi diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index fe1bcf5981..ffff6353b6 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -271,6 +271,11 @@ results.pkl run_[0-9]* events.lhe* EOF + if [ -d ${OUTDIR}/${proc}.${autosuffix}/bin/internal/ufomodel ]; then # see PR #762 + cat << EOF > ${OUTDIR}/${proc}.${autosuffix}/bin/internal/ufomodel/.gitignore +py3_model.pkl +EOF + fi if [ -f ${OUTDIR}/${proc}.${autosuffix}/SubProcesses/proc_characteristics ]; then sed -i 's/bias_module = None/bias_module = dummy/' ${OUTDIR}/${proc}.${autosuffix}/SubProcesses/proc_characteristics fi From 06d970efa88c0fbf3054036547bdbcd961882876 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 17:29:26 +0200 Subject: [PATCH 043/119] [oct23av] regenerate all 8 mad and 7 sa processes again, removing py3_model.pkl from the repo and adding it to .gitignore --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 16 +-- .../bin/internal/ufomodel/.gitignore | 1 + .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 12 +-- .../gg_tt.mad/SubProcesses/counters.cc | 99 ------------------- .../gg_tt.mad/SubProcesses/ompnumthreads.cc | 25 ----- .../bin/internal/ufomodel/.gitignore | 1 + .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 18 ++-- .../gg_tt01g.mad/SubProcesses/counters.cc | 99 ------------------- .../SubProcesses/ompnumthreads.cc | 25 ----- .../bin/internal/ufomodel/.gitignore | 1 + .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 16 +-- .../gg_ttg.mad/SubProcesses/counters.cc | 99 ------------------- .../gg_ttg.mad/SubProcesses/ompnumthreads.cc | 25 ----- .../bin/internal/ufomodel/.gitignore | 1 + .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 10 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 ++-- .../gg_ttgg.mad/SubProcesses/counters.cc | 99 ------------------- .../gg_ttgg.mad/SubProcesses/ompnumthreads.cc | 25 ----- .../bin/internal/ufomodel/.gitignore | 1 + .../CODEGEN_cudacpp_gg_ttgg_log.txt | 12 +-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 18 ++-- .../gg_ttggg.mad/SubProcesses/counters.cc | 99 ------------------- .../SubProcesses/ompnumthreads.cc | 25 ----- .../bin/internal/ufomodel/.gitignore | 1 + .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 +-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 ++-- .../gq_ttq.mad/SubProcesses/counters.cc | 99 ------------------- .../gq_ttq.mad/SubProcesses/ompnumthreads.cc | 25 ----- .../bin/internal/ufomodel/.gitignore | 1 + .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 10 +- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 6 +- .../CODEGEN_mad_pp_tt012j_log.txt | 54 +++++----- .../pp_tt012j.mad/SubProcesses/counters.cc | 99 ------------------- .../SubProcesses/ompnumthreads.cc | 25 ----- .../bin/internal/ufomodel/.gitignore | 1 + 37 files changed, 131 insertions(+), 991 deletions(-) create mode 100644 epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/.gitignore delete mode 100644 epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc delete mode 100644 epochX/cudacpp/gg_tt.mad/SubProcesses/ompnumthreads.cc create mode 100644 epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/.gitignore delete mode 100644 epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc delete mode 100644 epochX/cudacpp/gg_tt01g.mad/SubProcesses/ompnumthreads.cc create mode 100644 epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/.gitignore delete mode 100644 epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc delete mode 100644 epochX/cudacpp/gg_ttg.mad/SubProcesses/ompnumthreads.cc create mode 100644 epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/.gitignore delete mode 100644 epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc delete mode 100644 epochX/cudacpp/gg_ttgg.mad/SubProcesses/ompnumthreads.cc create mode 100644 epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/.gitignore delete mode 100644 epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc delete mode 100644 epochX/cudacpp/gg_ttggg.mad/SubProcesses/ompnumthreads.cc create mode 100644 epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/.gitignore delete mode 100644 epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc delete mode 100644 epochX/cudacpp/gq_ttq.mad/SubProcesses/ompnumthreads.cc create mode 100644 epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/.gitignore delete mode 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc delete mode 100644 epochX/cudacpp/pp_tt012j.mad/SubProcesses/ompnumthreads.cc create mode 100644 epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/.gitignore diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index be5cee0fb8..3476c5c66b 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005257368087768555  +DEBUG: model prefixing takes 0.005334377288818359  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,19 +191,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.097 s +Wrote files for 8 helas calls in 0.096 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.197 s +ALOHA: aloha creates 3 routines in 0.199 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.251 s +ALOHA: aloha creates 7 routines in 0.252 s FFV1 FFV1 FFV2 @@ -314,6 +314,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.494s -user 0m2.190s -sys 0m0.291s +real 0m2.484s +user 0m2.173s +sys 0m0.301s diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/.gitignore b/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/.gitignore new file mode 100644 index 0000000000..dc4db554f7 --- /dev/null +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/ufomodel/.gitignore @@ -0,0 +1 @@ +py3_model.pkl diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index b13f728dee..177569f8c4 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00538325309753418  +DEBUG: model prefixing takes 0.005261898040771484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -181,7 +181,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.262 s +ALOHA: aloha creates 4 routines in 0.268 s FFV1 FFV1 FFV2 @@ -201,6 +201,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.651s -user 0m0.601s -sys 0m0.044s +real 0m0.660s +user 0m0.598s +sys 0m0.056s diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index b5c53c1161..6020b75c49 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005456686019897461  +DEBUG: model prefixing takes 0.005294084548950195  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -200,7 +200,7 @@ ALOHA: aloha creates 2 routines in 0.143 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.140 s +ALOHA: aloha creates 4 routines in 0.131 s VVV1 FFV1 FFV1 @@ -303,6 +303,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.913s -user 0m2.038s -sys 0m0.292s +real 0m2.624s +user 0m2.008s +sys 0m0.304s diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc deleted file mode 100644 index cd6ecc8acd..0000000000 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/.gitignore b/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/.gitignore new file mode 100644 index 0000000000..dc4db554f7 --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/ufomodel/.gitignore @@ -0,0 +1 @@ +py3_model.pkl diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 23c04c9100..3126ec4e59 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005602359771728516  +DEBUG: model prefixing takes 0.005253314971923828  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.141 s VVV1 FFV1 FFV1 @@ -196,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.539s -user 0m0.490s -sys 0m0.045s +real 0m0.542s +user 0m0.474s +sys 0m0.059s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index b27d021202..1f7f68b2ce 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005415201187133789  +DEBUG: model prefixing takes 0.005590200424194336  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -184,7 +184,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -201,7 +201,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -216,15 +216,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.041 s -Wrote files for 46 helas calls in 0.238 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s +Wrote files for 46 helas calls in 0.236 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.321 s +ALOHA: aloha creates 5 routines in 0.322 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -349,6 +349,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.906s -user 0m2.575s -sys 0m0.318s +real 0m2.905s +user 0m2.586s +sys 0m0.307s diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc deleted file mode 100644 index cd6ecc8acd..0000000000 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/.gitignore b/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/.gitignore new file mode 100644 index 0000000000..dc4db554f7 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/ufomodel/.gitignore @@ -0,0 +1 @@ +py3_model.pkl diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index d367fef872..67ec0f298b 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005761861801147461  +DEBUG: model prefixing takes 0.0055065155029296875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,14 +191,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.146 s +Wrote files for 36 helas calls in 0.148 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.323 s +ALOHA: aloha creates 5 routines in 0.319 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.304 s VVV1 VVV1 FFV1 @@ -318,6 +318,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.947s -user 0m2.470s -sys 0m0.321s +real 0m2.789s +user 0m2.472s +sys 0m0.309s diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc deleted file mode 100644 index cd6ecc8acd..0000000000 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/.gitignore b/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/.gitignore new file mode 100644 index 0000000000..dc4db554f7 --- /dev/null +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/ufomodel/.gitignore @@ -0,0 +1 @@ +py3_model.pkl diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 20d22ac1c4..6b39ab9408 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005301237106323242  +DEBUG: model prefixing takes 0.00561976432800293  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -204,6 +204,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.792s -user 0m0.717s -sys 0m0.059s +real 0m0.825s +user 0m0.714s +sys 0m0.058s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 745da9d88c..ca66753b97 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005432605743408203  +DEBUG: model prefixing takes 0.005291461944580078  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.157 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.423 s -Wrote files for 222 helas calls in 0.710 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.416 s +Wrote files for 222 helas calls in 0.716 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.325 s +ALOHA: aloha creates 5 routines in 0.326 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.309 s VVV1 VVV1 FFV1 @@ -321,6 +321,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.903s -user 0m3.552s -sys 0m0.335s +real 0m3.896s +user 0m3.569s +sys 0m0.315s diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc deleted file mode 100644 index cd6ecc8acd..0000000000 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/.gitignore b/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/.gitignore new file mode 100644 index 0000000000..dc4db554f7 --- /dev/null +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/ufomodel/.gitignore @@ -0,0 +1 @@ +py3_model.pkl diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 373a89a800..de1c10fb29 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005511283874511719  +DEBUG: model prefixing takes 0.005267620086669922  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.416 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.315 s +ALOHA: aloha creates 5 routines in 0.312 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.438s -user 0m1.366s -sys 0m0.060s +real 0m1.430s +user 0m1.363s +sys 0m0.053s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 7e024b5fd3..7e0cdd133a 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005335092544555664  +DEBUG: model prefixing takes 0.00532841682434082  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.895 s +1 processes with 1240 diagrams generated in 1.848 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,8 +192,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.515 s -Wrote files for 2281 helas calls in 46.436 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.468 s +Wrote files for 2281 helas calls in 46.375 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -323,6 +323,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m57.704s -user 0m56.670s -sys 0m0.842s +real 0m57.540s +user 0m56.533s +sys 0m0.804s diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc deleted file mode 100644 index cd6ecc8acd..0000000000 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/.gitignore b/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/.gitignore new file mode 100644 index 0000000000..dc4db554f7 --- /dev/null +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/ufomodel/.gitignore @@ -0,0 +1 @@ +py3_model.pkl diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 31573e7e51..93a7080330 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005338430404663086  +DEBUG: model prefixing takes 0.005595684051513672  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.850 s +1 processes with 1240 diagrams generated in 1.844 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.482 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.495 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.364 s +ALOHA: aloha creates 5 routines in 0.349 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m13.206s -user 0m12.699s -sys 0m0.116s +real 0m12.991s +user 0m12.831s +sys 0m0.101s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 63bb0f3c9e..470e92412b 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005384206771850586  +DEBUG: model prefixing takes 0.005367279052734375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.076 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -197,7 +197,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -214,7 +214,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -230,16 +230,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.216 s +Wrote files for 32 helas calls in 0.214 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.144 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.129 s +ALOHA: aloha creates 4 routines in 0.130 s FFV1 FFV1 FFV1 @@ -358,6 +358,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.606s -user 0m2.256s -sys 0m0.312s +real 0m2.575s +user 0m2.217s +sys 0m0.315s diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc deleted file mode 100644 index cd6ecc8acd..0000000000 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/.gitignore b/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/.gitignore new file mode 100644 index 0000000000..dc4db554f7 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/ufomodel/.gitignore @@ -0,0 +1 @@ +py3_model.pkl diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 71c2006493..455aaad666 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005433082580566406  +DEBUG: model prefixing takes 0.005272865295410156  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.076 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -227,6 +227,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.651s -user 0m0.593s -sys 0m0.051s +real 0m0.652s +user 0m0.588s +sys 0m0.058s diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 64ce042fd4..78ccf0d626 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -164,6 +164,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.428s -user 0m0.366s -sys 0m0.055s +real 0m0.460s +user 0m0.364s +sys 0m0.058s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index e2ec882498..1c26065d41 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005310535430908203  +DEBUG: model prefixing takes 0.00558018684387207  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.135 s +13 processes with 76 diagrams generated in 0.133 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,15 +801,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.287 s -Wrote files for 810 helas calls in 3.227 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.264 s +Wrote files for 810 helas calls in 3.591 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.330 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -1096,6 +1096,6 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.420s -user 0m8.812s -sys 0m0.563s +real 0m9.770s +user 0m8.785s +sys 0m0.562s diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc deleted file mode 100644 index cd6ecc8acd..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int matrix1_counter = 0; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/.gitignore b/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/.gitignore new file mode 100644 index 0000000000..dc4db554f7 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/ufomodel/.gitignore @@ -0,0 +1 @@ +py3_model.pkl From 0af61519f0f4c4ccf5ca60ac745bcf508c4e6b42 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 17:59:24 +0200 Subject: [PATCH 044/119] [oct23av] in CODEGEN, fix a silly issue in my previous conflict resolution --- epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py | 1 - 1 file changed, 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index c28fa40e19..e620ddf998 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -208,7 +208,6 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): self.add_input_for_banner() if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL'] else: patchlevel = '' -<<<<<<< HEAD # OLDEST implementation (AV) #path = os.path.realpath(os.curdir + os.sep + 'PLUGIN' + os.sep + 'CUDACPP_OUTPUT') #misc.sprint(path) From f237f9ff1f869f35a3ff8f0a3febe982588c28e5 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 18:20:19 +0200 Subject: [PATCH 045/119] [oct23av] in CODEGEN, ensure that patchMad.sh stdout/stderr are always dumped to the screen: this is also needed to avoid silent failures In particular returncode != 0 may silently fail, for instance because 'madevent treatcards run' may silently fail For instance treatcards run run_card missed argument cudacpp_backend. Takes default: CPP run_card missed argument cudacpp_backend. Takes default: CPP Command "treatcards run" interrupted with error: Exception : pass in reset simd Please report this bug on https://bugs.launchpad.net/mg5amcnlo More information is found in 'ME5_debug'. Please attach this file to your report. --- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py | 12 +++++++++--- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh | 6 ++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index e620ddf998..cf96ca4ead 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -204,6 +204,7 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): MG5options are all the options of the main interface outputflags is a list of options provided when doing the output command""" misc.sprint('Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self)) + misc.sprint(self.in_madevent_mode) if self.in_madevent_mode: self.add_input_for_banner() if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL'] @@ -221,11 +222,16 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): # logger.info("####### \n stderr is \n %s", stderr) # raise Exception('ERROR! the O/S call to patchMad.sh failed') # NEW implementation (OM PR #764) + # **NB** AV: patchMad.sh may silently fail, for instance because 'madevent treatcards run' may silently fail + # **NB** AV: currently, error checking is done by looking for error strings on the full generation log + # **NB** AV: for this reason, but also because I want to always see the output, I change the Popen call to always dump stdout and stderr plugin_path = os.path.dirname(os.path.realpath( __file__ )) - p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ###p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)], + ### stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)]) # AV always dump patchMad.sh stdout/stderr stdout, stderr = p.communicate() - if p.returncode != 0: + misc.sprint(p.returncode) + if p.returncode != 0: # AV: WARNING! this may silently fail, for instance because 'madevent treatcards run' may silently fail logger.debug("####### \n stdout is \n %s", stdout) logger.info("####### \n stderr is \n %s", stderr) logger.info("return code is %s\n", p.returncode) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh index 09f268515c..b08e16e31f 100755 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh @@ -49,8 +49,10 @@ fi # These two steps are part of "cd Source; make" but they actually are code-generating steps if [ "${tmadmode}" != "0" ]; then - ${dir}/bin/madevent treatcards run - ${dir}/bin/madevent treatcards param + ${dir}/bin/madevent treatcards run # AV BUG! THIS MAY SILENTLY FAIL (should check if output contains "Please report this bug") + ###echo status=$? + ${dir}/bin/madevent treatcards param # AV BUG! THIS MAY SILENTLY FAIL (should check if output contains "Please report this bug") + ###echo status=$? fi # Cleanup From b940731af1c1f3189957db31b074e89e2ff4c077 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 18:57:22 +0200 Subject: [PATCH 046/119] [oct23av] in CODEGEN, add copyright and license to launch_plugin.py --- .../cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py index 23271b846e..cd9b7737a7 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py @@ -1,3 +1,7 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: O. Mattelaer (Aug 2023) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, A. Valassi (2023) for the MG5aMC CUDACPP plugin. import logging import os From a30fd576308614ae10fa1d815de6f4d9698e9924 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 19:07:37 +0200 Subject: [PATCH 047/119] [oct23av] in CODEGEN, improve formatting and add an optional CUDACPPRunCard, but this does not solve code generation --- .../PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py | 24 +++++-------------- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 16 ++++++------- 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py index cd9b7737a7..99729e5a10 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py @@ -23,22 +23,15 @@ import madgraph.various.banner as banner_mod class CPPMEInterface(madevent_interface.MadEventCmdShell): - def compile(self, *args, **opts): """ """ - import multiprocessing if not self.options['nb_core'] or self.options['nb_core'] == 'None': self.options['nb_core'] = multiprocessing.cpu_count() - if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): import pathlib import os pjoin = os.path.join - - - - cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend == 'FORTRAN': @@ -54,15 +47,12 @@ def compile(self, *args, **opts): return misc.compile(nb_core=self.options['nb_core'], *args, **opts) class CPPRunCard(banner_mod.RunCardLO): - def reset_simd(self, old_value, new_value, name): if not hasattr(self, 'path'): raise Exception - if name == "vector_size" and new_value <= int(old_value): # code can handle the new size -> do not recompile return - Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) @@ -72,33 +62,31 @@ def plugin_input(self, finput): def default_setup(self): super().default_setup() self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) - def write_one_include_file(self, output_dir, incname, output_file=None): """write one include file at the time""" - if incname == "vector.inc" and 'vector_size' not in self.user_set: return super().write_one_include_file(output_dir, incname, output_file) - def check_validity(self): """ensure that PLUGIN information are consistent""" - super().check_validity() - if self['SDE_strategy'] != 1: logger.warning('SDE_strategy different of 1 is not supported with SMD/GPU mode') self['sde_strategy'] = 1 - if self['hel_recycling']: self['hel_recycling'] = False class GPURunCard(CPPRunCard): - def default_setup(self): super(CPPRunCard, self).default_setup() self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False) - + +#class CUDACPPRunCard(CPPRunCard): +# def default_setup(self): +# super(CPPRunCard, self).default_setup() +# self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + MEINTERFACE = CPPMEInterface RunCard = CPPRunCard diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index cf96ca4ead..20e609eedf 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -272,11 +272,11 @@ def add_madevent_plugin_fct(self): plugin_path = os.path.dirname(os.path.realpath( __file__ )) ###files.cp(pjoin(plugin_path, 'plugin_interface.py'), pjoin(self.dir_path, 'bin', 'internal')) # AV FIXME (added by OM, but file is missing?) files.cp(pjoin(plugin_path, 'launch_plugin.py'), pjoin(self.dir_path, 'bin', 'internal')) - files.ln( pjoin(self.dir_path, 'lib'), pjoin(self.dir_path, 'SubProcesses')) + files.ln(pjoin(self.dir_path, 'lib'), pjoin(self.dir_path, 'SubProcesses')) #------------------------------------------------------------------------------------ -class SIMD_ProcessExporter(PLUGIN_ProcessExporter): +class SIMD_ProcessExporter(PLUGIN_ProcessExporter): def change_output_args(args, cmd): """ """ cmd._export_format = "madevent" @@ -285,11 +285,10 @@ def change_output_args(args, cmd): if 'vector_size' not in ''.join(args): args.append('--vector_size=16') return args - - - -class GPU_ProcessExporter(PLUGIN_ProcessExporter): +#------------------------------------------------------------------------------------ + +class GPU_ProcessExporter(PLUGIN_ProcessExporter): def change_output_args(args, cmd): """ """ cmd._export_format = "madevent" @@ -300,11 +299,12 @@ def change_output_args(args, cmd): return args def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): - misc.sprint("enter dedicated function") out = super().finalize(matrix_element, cmdhistory, MG5options, outputflag) - #change RunCard class to have default for GPU + # OM change RunCard class to have default for GPU text = open(pjoin(self.dir_path, 'bin', 'internal', 'launch_plugin.py'), 'r').read() text = text.replace('RunCard = CPPRunCard', 'RunCard = GPURunCard') open(pjoin(self.dir_path, 'bin', 'internal', 'launch_plugin.py'), 'w').write(text) return out + +#------------------------------------------------------------------------------------ From 2fa4830827be695c673766b82a6599ce5b6755ad Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 19:53:31 +0200 Subject: [PATCH 048/119] [oct23av] in CODEGEN move tmadmode steps (SDE config, runcard/paramcard generation, cleanup) from patchMad.sh to generateAndCompare.sh This is meant to fix various code generation issues introduced in generateAndCompare.sh by PR #764 --- .../PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh | 31 ++----------------- epochX/cudacpp/CODEGEN/generateAndCompare.sh | 26 ++++++++++++++-- 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh index b08e16e31f..a88ac5cb0a 100755 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh @@ -42,32 +42,9 @@ if [ ! -e ${dir} ]; then echo "ERROR! Directory $dir does not exist"; exit 1; fi # AV Recover special 'tmad' mode used by generateAndCompare.sh, after OM's changes that commented this out in patchMad.sh tmadmode=0 -if [ "${MG5AMC_TMADMODE}" != "" ]; then +if [ "${CUDACPP_CODEGEN_TMADMODE}" != "" ]; then tmadmode=1 - echo "DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=${MG5AMC_TMADMODE})" -fi - -# These two steps are part of "cd Source; make" but they actually are code-generating steps -if [ "${tmadmode}" != "0" ]; then - ${dir}/bin/madevent treatcards run # AV BUG! THIS MAY SILENTLY FAIL (should check if output contains "Please report this bug") - ###echo status=$? - ${dir}/bin/madevent treatcards param # AV BUG! THIS MAY SILENTLY FAIL (should check if output contains "Please report this bug") - ###echo status=$? -fi - -# Cleanup -if [ "${tmadmode}" != "0" ]; then - \rm -f ${dir}/crossx.html - \rm -f ${dir}/index.html - \rm -f ${dir}/madevent.tar.gz - \rm -f ${dir}/Cards/delphes_trigger.dat - \rm -f ${dir}/Cards/plot_card.dat - \rm -f ${dir}/bin/internal/run_plot* - \rm -f ${dir}/HTML/* - \rm -rf ${dir}/bin/internal/__pycache__ - \rm -rf ${dir}/bin/internal/ufomodel/py3_model.pkl - \rm -rf ${dir}/bin/internal/ufomodel/__pycache__ - touch ${dir}/HTML/.keep # new file + echo "DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=${CUDACPP_CODEGEN_TMADMODE})" fi # Exit here for patchlevel 0 (--upstream) @@ -84,10 +61,6 @@ fi # (1) Process-independent patches touch ${dir}/Events/.keep # this file should already be present (mg5amcnlo copies it from Template/LO/Events/.keep) \cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/fbridge_common.inc ${dir}/SubProcesses # new file -if [ "${tmadmode}" != "0" ]; then - sed -i 's/2 = sde_strategy/1 = sde_strategy/' ${dir}/Cards/run_card.dat # use strategy SDE=1 in multichannel mode (see #419) - sed -i 's/SDE_STRAT = 2/SDE_STRAT = 1/' ${dir}/Source/run_card.inc # use strategy SDE=1 in multichannel mode (see #419) -fi if [ "${patchlevel}" == "2" ]; then cd ${dir} if [ "${tmadmode}" != "0" ]; then diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index ffff6353b6..c1c7886e54 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -7,7 +7,7 @@ set -e # fail on error # AV Recover special 'tmad' mode used by generateAndCompare.sh, after OM's changes that commented this out in patchMad.sh -export MG5AMC_TMADMODE=1 +export CUDACPP_CODEGEN_TMADMODE=1 #-------------------------------------------------------------------------------------- @@ -204,9 +204,31 @@ function codeGenAndDiff() cat ${outproc}_log.txt | egrep -v '(Crash Annotation)' > ${outproc}_log.txt.new # remove firefox 'glxtest: libEGL initialize failed' errors \mv ${outproc}_log.txt.new ${outproc}_log.txt fi + # Patches moved here from patchMad.sh after Olivier's PR #764 (THIS IS ONLY NEEDED IN THE MADGRAPH4GPU GIT REPO) + if [ "${OUTBCK}" == "mad" ]; then + # Force the use of strategy SDE=1 in multichannel mode (see #419) + sed -i 's/2 = sde_strategy/1 = sde_strategy/' ${outproc}/Cards/run_card.dat + # Generate run_card.inc and param_card.inc (include stdout and stderr in the code generation log which is later checked for errors) + # These two steps are part of "cd Source; make" but they actually are code-generating steps + ${outproc}/bin/madevent treatcards run >> ${outproc}_log.txt 2>&1 # AV BUG! THIS MAY SILENTLY FAIL (check if output contains "Please report this bug") + ${outproc}/bin/madevent treatcards param >> ${outproc}_log.txt 2>&1 # AV BUG! THIS MAY SILENTLY FAIL (check if output contains "Please report this bug") + # Cleanup + \rm -f ${outproc}/crossx.html + \rm -f ${outproc}/index.html + \rm -f ${outproc}/madevent.tar.gz + \rm -f ${outproc}/Cards/delphes_trigger.dat + \rm -f ${outproc}/Cards/plot_card.dat + \rm -f ${outproc}/bin/internal/run_plot* + \rm -f ${outproc}/HTML/* + \rm -rf ${outproc}/bin/internal/__pycache__ + \rm -rf ${outproc}/bin/internal/ufomodel/py3_model.pkl + \rm -rf ${outproc}/bin/internal/ufomodel/__pycache__ + touch ${outproc}/HTML/.keep # new file + fi + # Check the code generation log for errors if [ -d ${outproc} ] && ! grep -q "Please report this bug" ${outproc}_log.txt; then ###cat ${outproc}_log.txt; exit 0 # FOR DEBUGGING - cat ${MG5AMC_HOME}/${outproc}_log.txt | egrep 'INFO: (Try|Creat|Organiz|Process)' + cat ${MG5AMC_HOME}/${outproc}_log.txt | { egrep 'INFO: (Try|Creat|Organiz|Process)' || true; } else echo "*** ERROR! Code generation failed" cat ${MG5AMC_HOME}/${outproc}_log.txt From e8031f4f5a30e253e382425df6b8c6c963db2800 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 20:16:42 +0200 Subject: [PATCH 049/119] [oct23av] in CODEGEN launch_plugin.py, make the exception in reset_simd more user friendly... --- .../cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py index 99729e5a10..63a17e871b 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py @@ -49,7 +49,7 @@ def compile(self, *args, **opts): class CPPRunCard(banner_mod.RunCardLO): def reset_simd(self, old_value, new_value, name): if not hasattr(self, 'path'): - raise Exception + raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') if name == "vector_size" and new_value <= int(old_value): # code can handle the new size -> do not recompile return From 226ac3f10ffb8ed07bc85a31151173340e2b58bc Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 21:02:27 +0200 Subject: [PATCH 050/119] [oct23av] in CODEGEN, add a workaround for the exception thrown by reset_simd if path is not set (internal error) --- .../cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py index 63a17e871b..c9d1c7706a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py @@ -49,7 +49,9 @@ def compile(self, *args, **opts): class CPPRunCard(banner_mod.RunCardLO): def reset_simd(self, old_value, new_value, name): if not hasattr(self, 'path'): - raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') + logger.warning('WARNING! CPPRunCard instance has no attribute path') + return + ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') if name == "vector_size" and new_value <= int(old_value): # code can handle the new size -> do not recompile return From 70b770cdd13845e9615a3a2fc38407d130b2e06a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 21:06:42 +0200 Subject: [PATCH 051/119] [oct23av] regenerate all 8 mad and 7 sa processes after merging and patching Olivier's PR #764 Code changes are only in .mad directories and mainly in the python infrastructure. There are several matrix1.f that have changed too however (including ggttg/gg/ggg. but not ggtt). I checked that ggtt and also ggttg tput/tmad tests succeed (logs not kept). --- .../CODEGEN_cudacpp_ee_mumu_log.txt | 15 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 69 +- epochX/cudacpp/gg_tt.mad/Source/make_opts | 9 +- epochX/cudacpp/gg_tt.mad/Source/makefile | 4 +- .../gg_tt.mad/SubProcesses/dummy_fct.f | 10 +- .../cudacpp/gg_tt.mad/bin/internal/banner.py | 281 +- .../bin/internal/check_param_card.py | 2 +- .../bin/internal/common_run_interface.py | 11 +- .../gg_tt.mad/bin/internal/extended_cmd.py | 8 +- .../gg_tt.mad/bin/internal/gen_ximprove.py | 9 +- .../gg_tt.mad/bin/internal/launch_plugin.py | 62 +- .../bin/internal/madevent_interface.py | 6 +- epochX/cudacpp/gg_tt.mad/bin/madevent | 6 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 11 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 99 +- epochX/cudacpp/gg_tt01g.mad/Source/make_opts | 9 +- epochX/cudacpp/gg_tt01g.mad/Source/makefile | 4 +- .../SubProcesses/P2_gg_ttxg/matrix1.f | 49 +- .../gg_tt01g.mad/SubProcesses/dummy_fct.f | 10 +- .../gg_tt01g.mad/bin/internal/banner.py | 281 +- .../bin/internal/check_param_card.py | 2 +- .../bin/internal/common_run_interface.py | 11 +- .../gg_tt01g.mad/bin/internal/extended_cmd.py | 8 +- .../gg_tt01g.mad/bin/internal/gen_ximprove.py | 9 +- .../bin/internal/launch_plugin.py | 62 +- .../bin/internal/madevent_interface.py | 6 +- epochX/cudacpp/gg_tt01g.mad/bin/madevent | 6 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 81 +- epochX/cudacpp/gg_ttg.mad/Source/make_opts | 9 +- epochX/cudacpp/gg_ttg.mad/Source/makefile | 4 +- .../SubProcesses/P1_gg_ttxg/matrix1.f | 49 +- .../gg_ttg.mad/SubProcesses/dummy_fct.f | 10 +- .../cudacpp/gg_ttg.mad/bin/internal/banner.py | 281 +- .../bin/internal/check_param_card.py | 2 +- .../bin/internal/common_run_interface.py | 11 +- .../gg_ttg.mad/bin/internal/extended_cmd.py | 8 +- .../gg_ttg.mad/bin/internal/gen_ximprove.py | 9 +- .../gg_ttg.mad/bin/internal/launch_plugin.py | 62 +- .../bin/internal/madevent_interface.py | 6 +- epochX/cudacpp/gg_ttg.mad/bin/madevent | 6 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 15 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 83 +- epochX/cudacpp/gg_ttgg.mad/Source/make_opts | 9 +- epochX/cudacpp/gg_ttgg.mad/Source/makefile | 4 +- .../SubProcesses/P1_gg_ttxgg/matrix1.f | 638 +- .../gg_ttgg.mad/SubProcesses/dummy_fct.f | 10 +- .../gg_ttgg.mad/bin/internal/banner.py | 281 +- .../bin/internal/check_param_card.py | 2 +- .../bin/internal/common_run_interface.py | 11 +- .../gg_ttgg.mad/bin/internal/extended_cmd.py | 8 +- .../gg_ttgg.mad/bin/internal/gen_ximprove.py | 9 +- .../gg_ttgg.mad/bin/internal/launch_plugin.py | 62 +- .../bin/internal/madevent_interface.py | 6 +- epochX/cudacpp/gg_ttgg.mad/bin/madevent | 6 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 17 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 85 +- epochX/cudacpp/gg_ttggg.mad/Source/make_opts | 9 +- epochX/cudacpp/gg_ttggg.mad/Source/makefile | 4 +- .../SubProcesses/P1_gg_ttxggg/matrix1.f | 10181 ++++++++-------- .../gg_ttggg.mad/SubProcesses/dummy_fct.f | 10 +- .../gg_ttggg.mad/bin/internal/banner.py | 281 +- .../bin/internal/check_param_card.py | 2 +- .../bin/internal/common_run_interface.py | 11 +- .../gg_ttggg.mad/bin/internal/extended_cmd.py | 8 +- .../gg_ttggg.mad/bin/internal/gen_ximprove.py | 9 +- .../bin/internal/launch_plugin.py | 62 +- .../bin/internal/madevent_interface.py | 6 +- epochX/cudacpp/gg_ttggg.mad/bin/madevent | 6 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 17 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 113 +- epochX/cudacpp/gq_ttq.mad/Source/make_opts | 9 +- epochX/cudacpp/gq_ttq.mad/Source/makefile | 4 +- .../gq_ttq.mad/SubProcesses/dummy_fct.f | 10 +- .../cudacpp/gq_ttq.mad/bin/internal/banner.py | 281 +- .../bin/internal/check_param_card.py | 2 +- .../bin/internal/common_run_interface.py | 11 +- .../gq_ttq.mad/bin/internal/extended_cmd.py | 8 +- .../gq_ttq.mad/bin/internal/gen_ximprove.py | 9 +- .../gq_ttq.mad/bin/internal/launch_plugin.py | 62 +- .../bin/internal/madevent_interface.py | 6 +- epochX/cudacpp/gq_ttq.mad/bin/madevent | 6 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 15 +- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 11 +- .../CODEGEN_mad_pp_tt012j_log.txt | 333 +- epochX/cudacpp/pp_tt012j.mad/Source/make_opts | 9 +- epochX/cudacpp/pp_tt012j.mad/Source/makefile | 4 +- .../SubProcesses/P1_gg_ttxg/matrix1.f | 49 +- .../SubProcesses/P2_gg_ttxgg/matrix1.f | 638 +- .../SubProcesses/P2_gg_ttxuux/matrix1.f | 61 +- .../SubProcesses/P2_gu_ttxgu/matrix1.f | 84 +- .../SubProcesses/P2_gux_ttxgux/matrix1.f | 92 +- .../SubProcesses/P2_uc_ttxuc/matrix1.f | 10 +- .../SubProcesses/P2_ucx_ttxucx/matrix1.f | 14 +- .../SubProcesses/P2_uu_ttxuu/matrix1.f | 30 +- .../SubProcesses/P2_uux_ttxgg/matrix1.f | 78 +- .../SubProcesses/P2_uux_ttxuux/matrix1.f | 24 +- .../SubProcesses/P2_uxcx_ttxuxcx/matrix1.f | 10 +- .../SubProcesses/P2_uxux_ttxuxux/matrix1.f | 30 +- .../pp_tt012j.mad/SubProcesses/dummy_fct.f | 10 +- .../pp_tt012j.mad/bin/internal/banner.py | 281 +- .../bin/internal/check_param_card.py | 2 +- .../bin/internal/common_run_interface.py | 11 +- .../bin/internal/extended_cmd.py | 8 +- .../bin/internal/gen_ximprove.py | 9 +- .../bin/internal/launch_plugin.py | 62 +- .../bin/internal/madevent_interface.py | 6 +- epochX/cudacpp/pp_tt012j.mad/bin/madevent | 6 +- 107 files changed, 8704 insertions(+), 7153 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 177569f8c4..415537ec64 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005261898040771484  +DEBUG: model prefixing takes 0.005320072174072266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,14 +174,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.268 s +ALOHA: aloha creates 4 routines in 0.260 s FFV1 FFV1 FFV2 @@ -198,9 +198,10 @@ FileWriter for / FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.660s -user 0m0.598s -sys 0m0.056s +real 0m0.650s +user 0m0.591s +sys 0m0.055s diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 6020b75c49..5f2b8c9ba8 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005294084548950195  +DEBUG: model prefixing takes 0.005397319793701172  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -184,18 +184,18 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.103 s +Wrote files for 10 helas calls in 0.102 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.147 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -219,8 +219,31 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: self.in_madevent_mode =  True [output.py at line 207]  +DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +patching file Source/genps.inc +patching file Source/makefile +patching file SubProcesses/makefile +patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 385 (offset 5 lines). +patching file bin/internal/madevent_interface.py +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). +patching file driver.f +patching file matrix1.f +DEBUG: p.returncode =  0 [output.py at line 233]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. +Type "launch" to generate events from this process, or see +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README +Run "open index.html" to see more information about this process. +quit + +real 0m1.975s +user 0m1.493s +sys 0m0.208s ************************************************************ * * * W E L C O M E to * @@ -248,8 +271,7 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run -run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP +WARNING! CPPRunCard instance has no attribute path quit INFO: launch in debug mode @@ -283,26 +305,3 @@ treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common -patching file Source/genps.inc -patching file Source/makefile -patching file SubProcesses/makefile -patching file Source/make_opts -patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). -patching file bin/internal/gen_ximprove.py -patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 -patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). -patching file driver.f -patching file matrix1.f -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. -Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README -Run "open index.html" to see more information about this process. -quit - -real 0m2.624s -user 0m2.008s -sys 0m0.304s diff --git a/epochX/cudacpp/gg_tt.mad/Source/make_opts b/epochX/cudacpp/gg_tt.mad/Source/make_opts index bd3c24228d..57f5f7bb96 100644 --- a/epochX/cudacpp/gg_tt.mad/Source/make_opts +++ b/epochX/cudacpp/gg_tt.mad/Source/make_opts @@ -1,17 +1,12 @@ -pdlabel1= -pdlabel2= -lhapdf= -PYTHIA8_PATH=NotInstalled -MG5AMC_VERSION=3.5.0_lo_vect GLOBAL_FLAG=-O3 -ffast-math -fbounds-check -ALOHA_FLAG= -MATRIX_FLAG= DEFAULT_CPP_COMPILER=g++ MACFLAG= STDLIB=-lstdc++ STDLIB_FLAG= DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime +PYTHIA8_PATH=NotInstalled #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gg_tt.mad/Source/makefile b/epochX/cudacpp/gg_tt.mad/Source/makefile index dbe08b846e..00c73099a0 100644 --- a/epochX/cudacpp/gg_tt.mad/Source/makefile +++ b/epochX/cudacpp/gg_tt.mad/Source/makefile @@ -136,5 +136,7 @@ cleanSource: clean: cleanSource for i in `ls -d ../SubProcesses/P*`; do cd $$i; make clean; cd -; done; -cleanall: cleanSource +cleanavx: + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; +cleanall: cleanSource # THIS IS THE ONE for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/dummy_fct.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/dummy_fct.f index 076cf29d67..4f7a204b8f 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/dummy_fct.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/dummy_fct.f @@ -32,7 +32,7 @@ logical FUNCTION dummy_cuts(P) LOGICAL IS_A_NU(NEXTERNAL),IS_HEAVY(NEXTERNAL) logical do_cuts(nexternal) COMMON /TO_SPECISA/IS_A_J,IS_A_A,IS_A_L,IS_A_B,IS_A_NU,IS_HEAVY, - . IS_A_ONIUM, do_cuts + & IS_A_ONIUM, do_cuts dummy_cuts=.true. @@ -118,15 +118,16 @@ double precision function user_dynamical_scale(P) C ************************************************************ -C default for the library implementing a dummt bias function +C default for the library implementing a dummy bias function C ************************************************************ subroutine bias_wgt_custom(p, original_weight, bias_weight) - implicit none + implicit none C C Parameters C include 'nexternal.inc' -C + +C C Arguments C double precision p(0:3, nexternal) @@ -161,3 +162,4 @@ subroutine bias_wgt_custom(p, original_weight, bias_weight) return end subroutine bias_wgt_custom + diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py index e9f421ae5f..824815f47b 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py @@ -537,7 +537,7 @@ def charge_card(self, tag): self.param_card = param_card_reader.ParamCard(param_card) return self.param_card elif tag == 'mgruncard': - self.run_card = RunCard(self[tag]) + self.run_card = RunCard(self[tag], unknown_warning=False) return self.run_card elif tag == 'mg5proccard': proc_card = self[tag].split('\n') @@ -2625,6 +2625,7 @@ class RunCard(ConfigFile): default_include_file = 'run_card.inc' default_autodef_file = 'run.inc' donewarning = [] + include_as_parameter = [] def plugin_input(self, finput): @@ -2671,18 +2672,40 @@ def __new__(cls, finput=None, **opt): elif isinstance(finput, cls): target_class = finput.__class__ elif isinstance(finput, str): + path = finput if '\n' not in finput: finput = open(finput).read() if 'req_acc_FO' in finput: target_class = RunCardNLO else: target_class = RunCardLO + if MADEVENT and os.path.exists(pjoin(MEDIR, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(MEDIR, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): + misc.sprint('try to use plugin class') + pydir = path.replace('run_card.dat', '../bin/internal/') + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + else: return None target_class.fill_post_set_from_blocks() - - return super(RunCard, cls).__new__(target_class, finput, **opt) + out = super(RunCard, cls).__new__(target_class, finput, **opt) + if not isinstance(out, RunCard): #should not happen but in presence of missmatch of library loaded. + out.__init__(finput, **opt) + return out else: return super(RunCard, cls).__new__(cls, finput, **opt) @@ -2710,7 +2733,7 @@ def __init__(self, *args, **opts): self.system_default = {} self.display_block = [] # set some block to be displayed - + self.fct_mod = {} # {param: (fct_pointer, *argument, **opts)} self.cut_class = {} self.warned=False @@ -2747,7 +2770,7 @@ def get_lepton_densities(cls): def add_param(self, name, value, fortran_name=None, include=True, hidden=False, legacy=False, cut=False, system=False, sys_default=None, - autodef=False, + autodef=False, fct_mod=None, **opts): """ add a parameter to the card. value is the default value and defines the type (int/float/bool/str) of the input. @@ -2761,6 +2784,7 @@ def add_param(self, name, value, fortran_name=None, include=True, If a path (Source/PDF/pdf.inc) the definition will be added within that file Default is False (does not add the definition) entry added in the run_card will automatically have this on True. + fct_mod: defines a function to run if the parameter is modify in the include file options of **opts: - allowed: list of valid options. '*' means anything else should be allowed. empty list means anything possible as well. @@ -2785,8 +2809,12 @@ def add_param(self, name, value, fortran_name=None, include=True, if autodef: self.definition_path[autodef].append(name) self.user_set.add(name) + # function to trigger if a value is modified in the include file + # main target is action to force correct recompilation (like for compilation flag/...) + if fct_mod: + self.fct_mod[name] = fct_mod - def read(self, finput, consistency=True): + def read(self, finput, consistency=True, unknown_warning=True): """Read the input file, this can be a path to a file, a file object, a str with the content of the file.""" @@ -2794,6 +2822,7 @@ def read(self, finput, consistency=True): if "\n" in finput: finput = finput.split('\n') elif os.path.isfile(finput): + self.path = finput finput = open(finput) else: raise Exception("No such file %s" % finput) @@ -2808,7 +2837,7 @@ def read(self, finput, consistency=True): name = name.lower().strip() if name not in self: #looks like an entry added by a user -> add it nicely - self.add_unknown_entry(name, value) + self.add_unknown_entry(name, value, unknown_warning) else: self.set( name, value, user=True) # parameter not set in the run_card can be set to compatiblity value @@ -2820,7 +2849,7 @@ def read(self, finput, consistency=True): logger.warning(str(error)) else: raise - def add_unknown_entry(self, name, value): + def add_unknown_entry(self, name, value, unknow_warning): """function to add an entry to the run_card when the associated parameter does not exists. This is based on the guess_entry_fromname for the various syntax providing input. This then call add_param accordingly. @@ -2859,7 +2888,7 @@ def add_unknown_entry(self, name, value): raise Exception("dictionary need to have at least one entry") default['dict']['__type__'] = default[self.guess_type_from_value(default_value[0])] - if name not in RunCard.donewarning: + if name not in RunCard.donewarning and unknow_warning: logger.warning("Found unexpected entry in run_card: \"%s\" with value \"%s\".\n"+\ " The type was assigned to %s. \n"+\ " The definition of that variable will %sbe automatically added to fortran file %s\n"+\ @@ -2897,7 +2926,16 @@ def valid_line(self, line, tmp): return False else: return True - + + + def reset_simd(self, old_value, new_value, name, *args, **opts): + raise Exception('pass in reset simd') + + def make_clean(self,old_value, new_value, name, dir): + raise Exception('pass make clean for ', dir) + + def make_Ptouch(self,old_value, new_value, name, reset): + raise Exception('pass Ptouch for ', reset) def write(self, output_file, template=None, python_template=False, write_hidden=False, template_options=None, **opt): @@ -3072,6 +3110,77 @@ def write(self, output_file, template=None, python_template=False, else: output_file.write(text) + def get_last_value_include(self, output_dir): + """For paraeter in self.fct_mod + parse the associate inc file to get the value of the previous run. + We return a dictionary {name: old_value} + if inc file does not exist we will return the current value (i.e. set has no change) + """ + + #remember that + # default_include_file is a class variable + # self.includepath is on the form include_path : [list of param ] + out = {} + + # setup inc_to_parse to be like self.includepath (include_path : [list of param ]) + # BUT only containing the parameter that need to be tracked for the fct_mod option + inc_to_parse = {} + for inc_file, params in self.includepath.items(): + if not inc_file: + continue + if any(p in params for p in self.fct_mod): + inc_to_parse[inc_file] = [name for name in self.includepath[inc_file] if name in self.fct_mod] + + # now loop over the files and ask the associate function + for inc_file, params in inc_to_parse.items(): + if inc_file is True: + inc_file = self.default_include_file + out.update(self.get_value_from_include(inc_file, params, output_dir)) + + return out + + def get_value_from_include(self, path, list_of_params, output_dir): + """for a given include file return the current value of the requested parameter + return a dictionary {name: value} + if path does not exists return the current value in self for all parameter""" + + #WARNING DOES NOT HANDLE LIST/DICT so far + + # handle case where file is missing + if not os.path.exists(pjoin(output_dir,path)): + misc.sprint("include file not existing", pjoin(output_dir,path)) + out = {name: self[name] for name in list_of_params} + + with open(pjoin(output_dir,path), 'r') as fsock: + text = fsock.read() + + for name in list_of_params: + misc.sprint(name, name in self.fortran_name) + misc.sprint(self.fortran_name[name] if name in self.fortran_name[name] else name) + to_track = [self.fortran_name[name] if name in self.fortran_name else name for name in list_of_params] + pattern = re.compile(r"\(?(%(names)s)\s?=\s?([^)]*)\)?" % {'names':'|'.join(to_track)}, re.I) + out = dict(pattern.findall(text)) + misc.sprint(out) + for name in list_of_params: + if name in self.fortran_name: + value = out[self.fortran_name[name]] + del out[self.fortran_name[name]] + out[name] = value + + for name, value in out.items(): + try: + out[name] = self.format_variable(value, type(self[name])) + except Exception: + continue + + if len(out) != len(list_of_params): + misc.sprint(list_of_params) + misc.sprint(to_track) + misc.sprint(self.fortran_name) + misc.sprint(text) + raise Exception + return out + def get_default(self, name, default=None, log_level=None): """return self[name] if exist otherwise default. log control if we @@ -3362,71 +3471,93 @@ def write_include_file(self, output_dir, output_file=None): #ensusre that system only parameter are correctly set self.update_system_parameter_for_include() + value_in_old_include = self.get_last_value_include(output_dir) + + if output_dir: self.write_autodef(output_dir, output_file=None) # check/fix status of customised functions self.edit_dummy_fct_from_file(self["custom_fcts"], os.path.dirname(output_dir)) for incname in self.includepath: - if incname is True: - pathinc = self.default_include_file - elif incname is False: - continue - else: - pathinc = incname + self.write_one_include_file(output_dir, incname, output_file) + + for name,value in value_in_old_include.items(): + if value != self[name]: + self.fct_mod[name][0](value, self[name], name, *self.fct_mod[name][1],**self.fct_mod[name][2]) - if output_file: - fsock = output_file + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + + misc.sprint(incname) + if incname is True: + pathinc = self.default_include_file + elif incname is False: + return + else: + pathinc = incname + + if output_file: + fsock = output_file + else: + fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) + + + for key in self.includepath[incname]: + #define the fortran name + if key in self.fortran_name: + fortran_name = self.fortran_name[key] else: - fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) - for key in self.includepath[incname]: - #define the fortran name - if key in self.fortran_name: - fortran_name = self.fortran_name[key] + fortran_name = key + + if incname in self.include_as_parameter: + fsock.writelines('INTEGER %s\n' % fortran_name) + #get the value with warning if the user didn't set it + value = self.get_default(key) + if hasattr(self, 'mod_inc_%s' % key): + value = getattr(self, 'mod_inc_%s' % key)(value) + # Special treatment for strings containing a list of + # strings. Convert it to a list of strings + if isinstance(value, list): + # in case of a list, add the length of the list as 0th + # element in fortran. Only in case of integer or float + # list (not for bool nor string) + targettype = self.list_parameter[key] + if targettype is bool: + pass + elif targettype is int: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) + fsock.writelines(line) + elif targettype is float: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) + fsock.writelines(line) + # output the rest of the list in fortran + for i,v in enumerate(value): + line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) + fsock.writelines(line) + elif isinstance(value, dict): + for fortran_name, onevalue in value.items(): + line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) + fsock.writelines(line) + elif isinstance(incname,str) and 'compile' in incname: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, value) else: - fortran_name = key - - #get the value with warning if the user didn't set it - value = self.get_default(key) - if hasattr(self, 'mod_inc_%s' % key): - value = getattr(self, 'mod_inc_%s' % key)(value) - # Special treatment for strings containing a list of - # strings. Convert it to a list of strings - if isinstance(value, list): - # in case of a list, add the length of the list as 0th - # element in fortran. Only in case of integer or float - # list (not for bool nor string) - targettype = self.list_parameter[key] - if targettype is bool: - pass - elif targettype is int: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) - fsock.writelines(line) - elif targettype is float: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) - fsock.writelines(line) - # output the rest of the list in fortran - for i,v in enumerate(value): - line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) - fsock.writelines(line) - elif isinstance(value, dict): - for fortran_name, onevalue in value.items(): - line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) - fsock.writelines(line) - elif isinstance(incname,str) and 'compile' in incname: line = '%s = %s \n' % (fortran_name, value) - fsock.write(line) + fsock.write(line) + else: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, self.f77_formatting(value)) else: line = '%s = %s \n' % (fortran_name, self.f77_formatting(value)) - fsock.writelines(line) - if not output_file: - fsock.close() - path = pjoin(output_dir,pathinc) - if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): - files.mv(path+'.tmp', path) - else: - os.remove(path+'.tmp') - + fsock.writelines(line) + if not output_file: + fsock.close() + path = pjoin(output_dir,pathinc) + if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): + files.mv(path+'.tmp', path) + else: + os.remove(path+'.tmp') def write_autodef(self, output_dir, output_file=None): """ Add the definition of variable to run.inc if the variable is set with autodef. @@ -3765,13 +3896,14 @@ def remove_all_cut(self): %(tmin_for_channel)s = tmin_for_channel ! limit the non-singular reach of --some-- channel of integration related to T-channel diagram (value between -1 and 0), -1 is no impact %(survey_splitting)s = survey_splitting ! for loop-induced control how many core are used at survey for the computation of a single iteration. %(survey_nchannel_per_job)s = survey_nchannel_per_job ! control how many Channel are integrated inside a single job on cluster/multicore - %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) + %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) #********************************************************************* -# Compilation flag. No automatic re-compilation (need manual "make clean" in Source) +# Compilation flag. #********************************************************************* %(global_flag)s = global_flag ! fortran optimization flag use for the all code. %(aloha_flag)s = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' %(matrix_flag)s = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' + %(vector_size)s = vector_size ! size designed for SIMD/OpenMP/GPU (number of events in lockstep) """ template_off = '# To see advanced option for Phase-Space optimization: type "update psoptim"' @@ -3927,9 +4059,12 @@ class RunCardLO(RunCard): "get_dummy_x1_x2": pjoin("SubProcesses","dummy_fct.f"), "dummy_boostframe": pjoin("SubProcesses","dummy_fct.f"), "user_dynamical_scale": pjoin("SubProcesses","dummy_fct.f"), + "bias_wgt_custom": pjoin("SubProcesses","dummy_fct.f"), "user_": pjoin("SubProcesses","dummy_fct.f") # all function starting by user will be added to that file } + include_as_parameter = ['vector.inc'] + if MG5DIR: default_run_card = pjoin(MG5DIR, "internal", "default_run_card_lo.dat") @@ -4163,10 +4298,15 @@ def default_setup(self): self.add_param('hel_splitamp', True, hidden=True, include=False, comment='decide if amplitude aloha call can be splitted in two or not when doing helicity per helicity optimization.') self.add_param('hel_zeroamp', True, hidden=True, include=False, comment='decide if zero amplitude can be removed from the computation when doing helicity per helicity optimization.') self.add_param('SDE_strategy', 1, allowed=[1,2], fortran_name="sde_strat", comment="decide how Multi-channel should behaves \"1\" means full single diagram enhanced (hep-ph/0208156), \"2\" use the product of the denominator") - self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check') - self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math') - self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3') - + self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check', + fct_mod=(self.make_clean, ('Source'),{})) + self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math', + fct_mod=(self.make_clean, ('Source/DHELAS'),{})) + self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3', + fct_mod=(self.make_Ptouch, ('matrix'),{})) + self.add_param('vector_size', 1, include='vector.inc', hidden=True, comment='lockstep size for parralelism run', + fortran_name='VECSIZE_MEMMAX', fct_mod=(self.reset_simd,(),{})) + # parameter allowing to define simple cut via the pdg # Special syntax are related to those. (can not be edit directly) self.add_param('pt_min_pdg',{'__type__':0.}, include=False, cut=True) @@ -4188,8 +4328,7 @@ def default_setup(self): self.add_param('mxxmin4pdg',[-1.], system=True) self.add_param('mxxpart_antipart', [False], system=True) - # CUDACPP parameters - self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + def check_validity(self): """ """ diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/check_param_card.py b/epochX/cudacpp/gg_tt.mad/bin/internal/check_param_card.py index fe874a06a4..71089d7480 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/check_param_card.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/check_param_card.py @@ -85,7 +85,7 @@ def load_str(self, text): self.value= ' '.join(data[len(self.lhacode):]) # check that lhacode are the first entry otherwise return invalid param. if ' '.join([str(i) for i in self.lhacode]) != ' '.join(data[:len(self.lhacode)]): - raise InvalidParam + raise InvalidParam("line was %s" % str(data)) else: self.value = data[-1] diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py index 5d0187e3fa..14c7f310dc 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py @@ -749,13 +749,15 @@ def writeRunWeb(me_dir): class RunWebHandling(object): - def __init__(self, me_dir, crashifpresent=True, warnifpresent=True): + def __init__(self, me_dir, crashifpresent=True, warnifpresent=True, force_run=False): """raise error if RunWeb already exists me_dir is the directory where the write RunWeb""" self.remove_run_web = True self.me_dir = me_dir - + if force_run: + self.remove_run_web = False + return if crashifpresent or warnifpresent: if os.path.exists(pjoin(me_dir, 'RunWeb')): pid = open(pjoin(me_dir, 'RunWeb')).read() @@ -6574,7 +6576,7 @@ def reask(self, *args, **opt): fail_due_to_format = 0 #parameter to avoid infinite loop def postcmd(self, stop, line): - if line not in [None, '0', 'done', '']: + if line not in [None, '0', 'done', '',0]: ending_question = cmd.OneLinePathCompletion.postcmd(self,stop,line) else: ending_question = True @@ -7533,7 +7535,8 @@ def open_file(self, answer): else: raise if time.time() - start < .5: - self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y') + self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y', + timeout=False) self.reload_card(path) def reload_card(self, path): diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/gg_tt.mad/bin/internal/extended_cmd.py index a6a8609dce..2f37070580 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/extended_cmd.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/extended_cmd.py @@ -1108,9 +1108,12 @@ def ask(self, question, default, choices=[], path_msg=None, if alias: choices += list(alias.keys()) + + question_instance = obj(question, allow_arg=choices, default=default, mother_interface=self, **opt) - + if fct_timeout is None: + fct_timeout = lambda x: question_instance.postcmd(x, default) if x and default else False if first_cmd: if isinstance(first_cmd, str): question_instance.onecmd(first_cmd) @@ -2271,6 +2274,9 @@ def postcmd(self, stop, line): if n: self.default(line) return self.postcmd(stop, line) + elif self.value is None and line: + self.default(line) + return self.postcmd(stop, line) if not self.casesensitive: for ans in self.allow_arg: if ans.lower() == self.value.lower(): diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py index 3b8ec31215..a88d60b282 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py @@ -154,10 +154,15 @@ def get_helicity(self, to_submit=True, clean=True): p = misc.Popen(['./gensym'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=Pdir) #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts + (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - + try: + nb_channel = max([math.floor(float(d)) for d in stdout.split()]) + except Exception as error: + misc.sprint(stdout, 'no channel or error for %s' % Pdir) + continue + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py index f4c9cb6334..c9d1c7706a 100644 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py @@ -1,6 +1,12 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: O. Mattelaer (Aug 2023) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, A. Valassi (2023) for the MG5aMC CUDACPP plugin. import logging - +import os +import subprocess +pjoin = os.path.join logger = logging.getLogger('cmdprint') # for stdout try: @@ -9,20 +15,23 @@ import internal.madevent_interface as madevent_interface import internal.misc as misc import internal.extended_cmd as extended_cmd + import internal.banner as banner_mod else: import madgraph.interface.madevent_interface as madevent_interface import madgraph.various.misc as misc import madgraph.interface.extended_cmd as extended_cmd + import madgraph.various.banner as banner_mod class CPPMEInterface(madevent_interface.MadEventCmdShell): - def compile(self, *args, **opts): """ """ import multiprocessing if not self.options['nb_core'] or self.options['nb_core'] == 'None': self.options['nb_core'] = multiprocessing.cpu_count() - if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + import pathlib + import os + pjoin = os.path.join cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend == 'FORTRAN': @@ -36,5 +45,50 @@ def compile(self, *args, **opts): return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) - + +class CPPRunCard(banner_mod.RunCardLO): + def reset_simd(self, old_value, new_value, name): + if not hasattr(self, 'path'): + logger.warning('WARNING! CPPRunCard instance has no attribute path') + return + ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') + if name == "vector_size" and new_value <= int(old_value): + # code can handle the new size -> do not recompile + return + Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') + subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + def plugin_input(self, finput): + return + + def default_setup(self): + super().default_setup() + self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + if incname == "vector.inc" and 'vector_size' not in self.user_set: + return + super().write_one_include_file(output_dir, incname, output_file) + + def check_validity(self): + """ensure that PLUGIN information are consistent""" + super().check_validity() + if self['SDE_strategy'] != 1: + logger.warning('SDE_strategy different of 1 is not supported with SMD/GPU mode') + self['sde_strategy'] = 1 + if self['hel_recycling']: + self['hel_recycling'] = False + +class GPURunCard(CPPRunCard): + def default_setup(self): + super(CPPRunCard, self).default_setup() + self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False) + +#class CUDACPPRunCard(CPPRunCard): +# def default_setup(self): +# super(CPPRunCard, self).default_setup() +# self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + MEINTERFACE = CPPMEInterface +RunCard = CPPRunCard diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py index 920e07a926..d722702891 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py @@ -3796,9 +3796,11 @@ def do_combine_events(self, line): if self.run_card['bias_module'].lower() not in ['dummy', 'none'] and nb_event: self.correct_bias() - + elif self.run_card['custom_fcts']: + self.correct_bias() + logger.info("combine events done in %s", time.time()-start) - + self.to_store.append('event') diff --git a/epochX/cudacpp/gg_tt.mad/bin/madevent b/epochX/cudacpp/gg_tt.mad/bin/madevent index 10b6a71fa2..dff9711b73 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/madevent +++ b/epochX/cudacpp/gg_tt.mad/bin/madevent @@ -173,6 +173,10 @@ if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): launch_interface = launch_plugin.MEINTERFACE +#Source use this executable for compilation always allow it +force_run = False +if (args and args[0] == 'treatcards'): + force_run=True # Call the cmd interface main loop try: @@ -180,7 +184,7 @@ try: launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) - with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): + with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), force_run=force_run): if (args and os.path.isfile(args[0])): # They are an input file input_file = args[0] diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 3126ec4e59..f185804aa5 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005253314971923828  +DEBUG: model prefixing takes 0.005272865295410156  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -193,9 +193,10 @@ FileWriter for / FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.542s -user 0m0.474s -sys 0m0.059s +real 0m0.547s +user 0m0.471s +sys 0m0.060s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 1f7f68b2ce..e5d91cbc60 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005590200424194336  +DEBUG: model prefixing takes 0.005311250686645508  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -184,7 +184,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,15 +193,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -210,21 +210,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.236 s +Wrote files for 46 helas calls in 0.242 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.322 s +ALOHA: aloha creates 5 routines in 0.321 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -232,7 +232,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.305 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -256,8 +256,40 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: self.in_madevent_mode =  True [output.py at line 207]  +DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +patching file Source/genps.inc +patching file Source/makefile +patching file SubProcesses/makefile +patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 385 (offset 5 lines). +patching file bin/internal/madevent_interface.py +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). +patching file driver.f +patching file matrix1.f +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). +patching file driver.f +patching file matrix1.f +Hunk #2 succeeded at 159 (offset 16 lines). +Hunk #3 succeeded at 237 (offset 16 lines). +Hunk #4 succeeded at 265 (offset 16 lines). +Hunk #5 succeeded at 310 (offset 16 lines). +DEBUG: p.returncode =  0 [output.py at line 233]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. +Type "launch" to generate events from this process, or see +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README +Run "open index.html" to see more information about this process. +quit + +real 0m2.268s +user 0m2.030s +sys 0m0.237s ************************************************************ * * * W E L C O M E to * @@ -285,8 +317,7 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run -run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP +WARNING! CPPRunCard instance has no attribute path quit INFO: launch in debug mode @@ -320,35 +351,3 @@ treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common -patching file Source/genps.inc -patching file Source/makefile -patching file SubProcesses/makefile -patching file Source/make_opts -patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). -patching file bin/internal/gen_ximprove.py -patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 -patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). -patching file driver.f -patching file matrix1.f -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 -patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). -patching file driver.f -patching file matrix1.f -Hunk #2 succeeded at 159 (offset 16 lines). -Hunk #3 succeeded at 237 (offset 16 lines). -Hunk #4 succeeded at 265 (offset 16 lines). -Hunk #5 succeeded at 310 (offset 16 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. -Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README -Run "open index.html" to see more information about this process. -quit - -real 0m2.905s -user 0m2.586s -sys 0m0.307s diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/make_opts b/epochX/cudacpp/gg_tt01g.mad/Source/make_opts index bd3c24228d..57f5f7bb96 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Source/make_opts +++ b/epochX/cudacpp/gg_tt01g.mad/Source/make_opts @@ -1,17 +1,12 @@ -pdlabel1= -pdlabel2= -lhapdf= -PYTHIA8_PATH=NotInstalled -MG5AMC_VERSION=3.5.0_lo_vect GLOBAL_FLAG=-O3 -ffast-math -fbounds-check -ALOHA_FLAG= -MATRIX_FLAG= DEFAULT_CPP_COMPILER=g++ MACFLAG= STDLIB=-lstdc++ STDLIB_FLAG= DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime +PYTHIA8_PATH=NotInstalled #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/makefile b/epochX/cudacpp/gg_tt01g.mad/Source/makefile index dbe08b846e..00c73099a0 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Source/makefile +++ b/epochX/cudacpp/gg_tt01g.mad/Source/makefile @@ -136,5 +136,7 @@ cleanSource: clean: cleanSource for i in `ls -d ../SubProcesses/P*`; do cd $$i; make clean; cd -; done; -cleanall: cleanSource +cleanavx: + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; +cleanall: cleanSource # THIS IS THE ONE for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f index eb85b7ebb0..02f406668c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f @@ -359,7 +359,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C LOCAL VARIABLES C INTEGER I,J,M,N - COMPLEX*16 ZTEMP, TMP_JAMP(10) + COMPLEX*16 ZTEMP, TMP_JAMP(9) REAL*8 CF(NCOLOR,NCOLOR) COMPLEX*16 AMP(NGRAPHS), JAMP(NCOLOR,NAMPSO) COMPLEX*16 W(6,NWAVEFUNCS) @@ -508,33 +508,30 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(15) + AMP(16) ! used 4 times TMP_JAMP(2) = AMP(1) + AMP(18) ! used 4 times TMP_JAMP(1) = AMP(12) - AMP(17) ! used 4 times - TMP_JAMP(10) = TMP_JAMP(3) - TMP_JAMP(2) ! used 2 times - TMP_JAMP(9) = TMP_JAMP(1) + ((-0.000000000000000D+00 + TMP_JAMP(9) = TMP_JAMP(3) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(8) ! used 2 times + TMP_JAMP(8) = TMP_JAMP(3) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(5) ! used 2 times + TMP_JAMP(7) = TMP_JAMP(2) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(2) ! used 2 times + TMP_JAMP(6) = TMP_JAMP(2) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(3) ! used 2 times + TMP_JAMP(5) = TMP_JAMP(1) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(11) ! used 2 times - TMP_JAMP(8) = TMP_JAMP(2) - TMP_JAMP(1) ! used 2 times - TMP_JAMP(7) = TMP_JAMP(1) + ((0.000000000000000D+00, + TMP_JAMP(4) = TMP_JAMP(1) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(10) ! used 2 times - TMP_JAMP(6) = TMP_JAMP(3) - TMP_JAMP(1) ! used 2 times - TMP_JAMP(5) = TMP_JAMP(2) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(3) ! used 2 times - TMP_JAMP(4) = TMP_JAMP(3) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(5) ! used 2 times - JAMP(1,1) = (-1.000000000000000D+00)*AMP(6)+TMP_JAMP(4)+( - $ -1.000000000000000D+00)*TMP_JAMP(5) - JAMP(2,1) = (-1.000000000000000D+00)*AMP(4)+(-1.000000000000000D - $ +00)*TMP_JAMP(4)+TMP_JAMP(9) - JAMP(3,1) = (-1.000000000000000D+00)*AMP(13)+TMP_JAMP(5)+( - $ -1.000000000000000D+00)*TMP_JAMP(7) - JAMP(4,1) = (-1.000000000000000D+00)*AMP(7)+((0.000000000000000D - $ +00,1.000000000000000D+00))*AMP(8)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*AMP(10)+(-1.000000000000000D+00) - $ *TMP_JAMP(6) - JAMP(5,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(2)+((0.000000000000000D+00,-1.000000000000000D+00))*AMP(11) - $ +(-1.000000000000000D+00)*AMP(14)+TMP_JAMP(8) - JAMP(6,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(2)+((0.000000000000000D+00,-1.000000000000000D+00))*AMP(8) - $ +(-1.000000000000000D+00)*AMP(9)+TMP_JAMP(10) + JAMP(1,1) = (-1.000000000000000D+00)*AMP(6)+(-1.000000000000000D + $ +00)*TMP_JAMP(6)+TMP_JAMP(8) + JAMP(2,1) = (-1.000000000000000D+00)*AMP(4)+TMP_JAMP(5)+( + $ -1.000000000000000D+00)*TMP_JAMP(8) + JAMP(3,1) = (-1.000000000000000D+00)*AMP(13)+( + $ -1.000000000000000D+00)*TMP_JAMP(4)+TMP_JAMP(6) + JAMP(4,1) = (-1.000000000000000D+00)*AMP(7)+TMP_JAMP(4)+( + $ -1.000000000000000D+00)*TMP_JAMP(9) + JAMP(5,1) = (-1.000000000000000D+00)*AMP(14)+( + $ -1.000000000000000D+00)*TMP_JAMP(5)+TMP_JAMP(7) + JAMP(6,1) = (-1.000000000000000D+00)*AMP(9)+(-1.000000000000000D + $ +00)*TMP_JAMP(7)+TMP_JAMP(9) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/dummy_fct.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/dummy_fct.f index 076cf29d67..4f7a204b8f 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/dummy_fct.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/dummy_fct.f @@ -32,7 +32,7 @@ logical FUNCTION dummy_cuts(P) LOGICAL IS_A_NU(NEXTERNAL),IS_HEAVY(NEXTERNAL) logical do_cuts(nexternal) COMMON /TO_SPECISA/IS_A_J,IS_A_A,IS_A_L,IS_A_B,IS_A_NU,IS_HEAVY, - . IS_A_ONIUM, do_cuts + & IS_A_ONIUM, do_cuts dummy_cuts=.true. @@ -118,15 +118,16 @@ double precision function user_dynamical_scale(P) C ************************************************************ -C default for the library implementing a dummt bias function +C default for the library implementing a dummy bias function C ************************************************************ subroutine bias_wgt_custom(p, original_weight, bias_weight) - implicit none + implicit none C C Parameters C include 'nexternal.inc' -C + +C C Arguments C double precision p(0:3, nexternal) @@ -161,3 +162,4 @@ subroutine bias_wgt_custom(p, original_weight, bias_weight) return end subroutine bias_wgt_custom + diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py index e9f421ae5f..824815f47b 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py @@ -537,7 +537,7 @@ def charge_card(self, tag): self.param_card = param_card_reader.ParamCard(param_card) return self.param_card elif tag == 'mgruncard': - self.run_card = RunCard(self[tag]) + self.run_card = RunCard(self[tag], unknown_warning=False) return self.run_card elif tag == 'mg5proccard': proc_card = self[tag].split('\n') @@ -2625,6 +2625,7 @@ class RunCard(ConfigFile): default_include_file = 'run_card.inc' default_autodef_file = 'run.inc' donewarning = [] + include_as_parameter = [] def plugin_input(self, finput): @@ -2671,18 +2672,40 @@ def __new__(cls, finput=None, **opt): elif isinstance(finput, cls): target_class = finput.__class__ elif isinstance(finput, str): + path = finput if '\n' not in finput: finput = open(finput).read() if 'req_acc_FO' in finput: target_class = RunCardNLO else: target_class = RunCardLO + if MADEVENT and os.path.exists(pjoin(MEDIR, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(MEDIR, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): + misc.sprint('try to use plugin class') + pydir = path.replace('run_card.dat', '../bin/internal/') + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + else: return None target_class.fill_post_set_from_blocks() - - return super(RunCard, cls).__new__(target_class, finput, **opt) + out = super(RunCard, cls).__new__(target_class, finput, **opt) + if not isinstance(out, RunCard): #should not happen but in presence of missmatch of library loaded. + out.__init__(finput, **opt) + return out else: return super(RunCard, cls).__new__(cls, finput, **opt) @@ -2710,7 +2733,7 @@ def __init__(self, *args, **opts): self.system_default = {} self.display_block = [] # set some block to be displayed - + self.fct_mod = {} # {param: (fct_pointer, *argument, **opts)} self.cut_class = {} self.warned=False @@ -2747,7 +2770,7 @@ def get_lepton_densities(cls): def add_param(self, name, value, fortran_name=None, include=True, hidden=False, legacy=False, cut=False, system=False, sys_default=None, - autodef=False, + autodef=False, fct_mod=None, **opts): """ add a parameter to the card. value is the default value and defines the type (int/float/bool/str) of the input. @@ -2761,6 +2784,7 @@ def add_param(self, name, value, fortran_name=None, include=True, If a path (Source/PDF/pdf.inc) the definition will be added within that file Default is False (does not add the definition) entry added in the run_card will automatically have this on True. + fct_mod: defines a function to run if the parameter is modify in the include file options of **opts: - allowed: list of valid options. '*' means anything else should be allowed. empty list means anything possible as well. @@ -2785,8 +2809,12 @@ def add_param(self, name, value, fortran_name=None, include=True, if autodef: self.definition_path[autodef].append(name) self.user_set.add(name) + # function to trigger if a value is modified in the include file + # main target is action to force correct recompilation (like for compilation flag/...) + if fct_mod: + self.fct_mod[name] = fct_mod - def read(self, finput, consistency=True): + def read(self, finput, consistency=True, unknown_warning=True): """Read the input file, this can be a path to a file, a file object, a str with the content of the file.""" @@ -2794,6 +2822,7 @@ def read(self, finput, consistency=True): if "\n" in finput: finput = finput.split('\n') elif os.path.isfile(finput): + self.path = finput finput = open(finput) else: raise Exception("No such file %s" % finput) @@ -2808,7 +2837,7 @@ def read(self, finput, consistency=True): name = name.lower().strip() if name not in self: #looks like an entry added by a user -> add it nicely - self.add_unknown_entry(name, value) + self.add_unknown_entry(name, value, unknown_warning) else: self.set( name, value, user=True) # parameter not set in the run_card can be set to compatiblity value @@ -2820,7 +2849,7 @@ def read(self, finput, consistency=True): logger.warning(str(error)) else: raise - def add_unknown_entry(self, name, value): + def add_unknown_entry(self, name, value, unknow_warning): """function to add an entry to the run_card when the associated parameter does not exists. This is based on the guess_entry_fromname for the various syntax providing input. This then call add_param accordingly. @@ -2859,7 +2888,7 @@ def add_unknown_entry(self, name, value): raise Exception("dictionary need to have at least one entry") default['dict']['__type__'] = default[self.guess_type_from_value(default_value[0])] - if name not in RunCard.donewarning: + if name not in RunCard.donewarning and unknow_warning: logger.warning("Found unexpected entry in run_card: \"%s\" with value \"%s\".\n"+\ " The type was assigned to %s. \n"+\ " The definition of that variable will %sbe automatically added to fortran file %s\n"+\ @@ -2897,7 +2926,16 @@ def valid_line(self, line, tmp): return False else: return True - + + + def reset_simd(self, old_value, new_value, name, *args, **opts): + raise Exception('pass in reset simd') + + def make_clean(self,old_value, new_value, name, dir): + raise Exception('pass make clean for ', dir) + + def make_Ptouch(self,old_value, new_value, name, reset): + raise Exception('pass Ptouch for ', reset) def write(self, output_file, template=None, python_template=False, write_hidden=False, template_options=None, **opt): @@ -3072,6 +3110,77 @@ def write(self, output_file, template=None, python_template=False, else: output_file.write(text) + def get_last_value_include(self, output_dir): + """For paraeter in self.fct_mod + parse the associate inc file to get the value of the previous run. + We return a dictionary {name: old_value} + if inc file does not exist we will return the current value (i.e. set has no change) + """ + + #remember that + # default_include_file is a class variable + # self.includepath is on the form include_path : [list of param ] + out = {} + + # setup inc_to_parse to be like self.includepath (include_path : [list of param ]) + # BUT only containing the parameter that need to be tracked for the fct_mod option + inc_to_parse = {} + for inc_file, params in self.includepath.items(): + if not inc_file: + continue + if any(p in params for p in self.fct_mod): + inc_to_parse[inc_file] = [name for name in self.includepath[inc_file] if name in self.fct_mod] + + # now loop over the files and ask the associate function + for inc_file, params in inc_to_parse.items(): + if inc_file is True: + inc_file = self.default_include_file + out.update(self.get_value_from_include(inc_file, params, output_dir)) + + return out + + def get_value_from_include(self, path, list_of_params, output_dir): + """for a given include file return the current value of the requested parameter + return a dictionary {name: value} + if path does not exists return the current value in self for all parameter""" + + #WARNING DOES NOT HANDLE LIST/DICT so far + + # handle case where file is missing + if not os.path.exists(pjoin(output_dir,path)): + misc.sprint("include file not existing", pjoin(output_dir,path)) + out = {name: self[name] for name in list_of_params} + + with open(pjoin(output_dir,path), 'r') as fsock: + text = fsock.read() + + for name in list_of_params: + misc.sprint(name, name in self.fortran_name) + misc.sprint(self.fortran_name[name] if name in self.fortran_name[name] else name) + to_track = [self.fortran_name[name] if name in self.fortran_name else name for name in list_of_params] + pattern = re.compile(r"\(?(%(names)s)\s?=\s?([^)]*)\)?" % {'names':'|'.join(to_track)}, re.I) + out = dict(pattern.findall(text)) + misc.sprint(out) + for name in list_of_params: + if name in self.fortran_name: + value = out[self.fortran_name[name]] + del out[self.fortran_name[name]] + out[name] = value + + for name, value in out.items(): + try: + out[name] = self.format_variable(value, type(self[name])) + except Exception: + continue + + if len(out) != len(list_of_params): + misc.sprint(list_of_params) + misc.sprint(to_track) + misc.sprint(self.fortran_name) + misc.sprint(text) + raise Exception + return out + def get_default(self, name, default=None, log_level=None): """return self[name] if exist otherwise default. log control if we @@ -3362,71 +3471,93 @@ def write_include_file(self, output_dir, output_file=None): #ensusre that system only parameter are correctly set self.update_system_parameter_for_include() + value_in_old_include = self.get_last_value_include(output_dir) + + if output_dir: self.write_autodef(output_dir, output_file=None) # check/fix status of customised functions self.edit_dummy_fct_from_file(self["custom_fcts"], os.path.dirname(output_dir)) for incname in self.includepath: - if incname is True: - pathinc = self.default_include_file - elif incname is False: - continue - else: - pathinc = incname + self.write_one_include_file(output_dir, incname, output_file) + + for name,value in value_in_old_include.items(): + if value != self[name]: + self.fct_mod[name][0](value, self[name], name, *self.fct_mod[name][1],**self.fct_mod[name][2]) - if output_file: - fsock = output_file + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + + misc.sprint(incname) + if incname is True: + pathinc = self.default_include_file + elif incname is False: + return + else: + pathinc = incname + + if output_file: + fsock = output_file + else: + fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) + + + for key in self.includepath[incname]: + #define the fortran name + if key in self.fortran_name: + fortran_name = self.fortran_name[key] else: - fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) - for key in self.includepath[incname]: - #define the fortran name - if key in self.fortran_name: - fortran_name = self.fortran_name[key] + fortran_name = key + + if incname in self.include_as_parameter: + fsock.writelines('INTEGER %s\n' % fortran_name) + #get the value with warning if the user didn't set it + value = self.get_default(key) + if hasattr(self, 'mod_inc_%s' % key): + value = getattr(self, 'mod_inc_%s' % key)(value) + # Special treatment for strings containing a list of + # strings. Convert it to a list of strings + if isinstance(value, list): + # in case of a list, add the length of the list as 0th + # element in fortran. Only in case of integer or float + # list (not for bool nor string) + targettype = self.list_parameter[key] + if targettype is bool: + pass + elif targettype is int: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) + fsock.writelines(line) + elif targettype is float: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) + fsock.writelines(line) + # output the rest of the list in fortran + for i,v in enumerate(value): + line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) + fsock.writelines(line) + elif isinstance(value, dict): + for fortran_name, onevalue in value.items(): + line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) + fsock.writelines(line) + elif isinstance(incname,str) and 'compile' in incname: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, value) else: - fortran_name = key - - #get the value with warning if the user didn't set it - value = self.get_default(key) - if hasattr(self, 'mod_inc_%s' % key): - value = getattr(self, 'mod_inc_%s' % key)(value) - # Special treatment for strings containing a list of - # strings. Convert it to a list of strings - if isinstance(value, list): - # in case of a list, add the length of the list as 0th - # element in fortran. Only in case of integer or float - # list (not for bool nor string) - targettype = self.list_parameter[key] - if targettype is bool: - pass - elif targettype is int: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) - fsock.writelines(line) - elif targettype is float: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) - fsock.writelines(line) - # output the rest of the list in fortran - for i,v in enumerate(value): - line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) - fsock.writelines(line) - elif isinstance(value, dict): - for fortran_name, onevalue in value.items(): - line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) - fsock.writelines(line) - elif isinstance(incname,str) and 'compile' in incname: line = '%s = %s \n' % (fortran_name, value) - fsock.write(line) + fsock.write(line) + else: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, self.f77_formatting(value)) else: line = '%s = %s \n' % (fortran_name, self.f77_formatting(value)) - fsock.writelines(line) - if not output_file: - fsock.close() - path = pjoin(output_dir,pathinc) - if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): - files.mv(path+'.tmp', path) - else: - os.remove(path+'.tmp') - + fsock.writelines(line) + if not output_file: + fsock.close() + path = pjoin(output_dir,pathinc) + if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): + files.mv(path+'.tmp', path) + else: + os.remove(path+'.tmp') def write_autodef(self, output_dir, output_file=None): """ Add the definition of variable to run.inc if the variable is set with autodef. @@ -3765,13 +3896,14 @@ def remove_all_cut(self): %(tmin_for_channel)s = tmin_for_channel ! limit the non-singular reach of --some-- channel of integration related to T-channel diagram (value between -1 and 0), -1 is no impact %(survey_splitting)s = survey_splitting ! for loop-induced control how many core are used at survey for the computation of a single iteration. %(survey_nchannel_per_job)s = survey_nchannel_per_job ! control how many Channel are integrated inside a single job on cluster/multicore - %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) + %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) #********************************************************************* -# Compilation flag. No automatic re-compilation (need manual "make clean" in Source) +# Compilation flag. #********************************************************************* %(global_flag)s = global_flag ! fortran optimization flag use for the all code. %(aloha_flag)s = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' %(matrix_flag)s = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' + %(vector_size)s = vector_size ! size designed for SIMD/OpenMP/GPU (number of events in lockstep) """ template_off = '# To see advanced option for Phase-Space optimization: type "update psoptim"' @@ -3927,9 +4059,12 @@ class RunCardLO(RunCard): "get_dummy_x1_x2": pjoin("SubProcesses","dummy_fct.f"), "dummy_boostframe": pjoin("SubProcesses","dummy_fct.f"), "user_dynamical_scale": pjoin("SubProcesses","dummy_fct.f"), + "bias_wgt_custom": pjoin("SubProcesses","dummy_fct.f"), "user_": pjoin("SubProcesses","dummy_fct.f") # all function starting by user will be added to that file } + include_as_parameter = ['vector.inc'] + if MG5DIR: default_run_card = pjoin(MG5DIR, "internal", "default_run_card_lo.dat") @@ -4163,10 +4298,15 @@ def default_setup(self): self.add_param('hel_splitamp', True, hidden=True, include=False, comment='decide if amplitude aloha call can be splitted in two or not when doing helicity per helicity optimization.') self.add_param('hel_zeroamp', True, hidden=True, include=False, comment='decide if zero amplitude can be removed from the computation when doing helicity per helicity optimization.') self.add_param('SDE_strategy', 1, allowed=[1,2], fortran_name="sde_strat", comment="decide how Multi-channel should behaves \"1\" means full single diagram enhanced (hep-ph/0208156), \"2\" use the product of the denominator") - self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check') - self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math') - self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3') - + self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check', + fct_mod=(self.make_clean, ('Source'),{})) + self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math', + fct_mod=(self.make_clean, ('Source/DHELAS'),{})) + self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3', + fct_mod=(self.make_Ptouch, ('matrix'),{})) + self.add_param('vector_size', 1, include='vector.inc', hidden=True, comment='lockstep size for parralelism run', + fortran_name='VECSIZE_MEMMAX', fct_mod=(self.reset_simd,(),{})) + # parameter allowing to define simple cut via the pdg # Special syntax are related to those. (can not be edit directly) self.add_param('pt_min_pdg',{'__type__':0.}, include=False, cut=True) @@ -4188,8 +4328,7 @@ def default_setup(self): self.add_param('mxxmin4pdg',[-1.], system=True) self.add_param('mxxpart_antipart', [False], system=True) - # CUDACPP parameters - self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + def check_validity(self): """ """ diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/check_param_card.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/check_param_card.py index fe874a06a4..71089d7480 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/check_param_card.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/check_param_card.py @@ -85,7 +85,7 @@ def load_str(self, text): self.value= ' '.join(data[len(self.lhacode):]) # check that lhacode are the first entry otherwise return invalid param. if ' '.join([str(i) for i in self.lhacode]) != ' '.join(data[:len(self.lhacode)]): - raise InvalidParam + raise InvalidParam("line was %s" % str(data)) else: self.value = data[-1] diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py index 5d0187e3fa..14c7f310dc 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py @@ -749,13 +749,15 @@ def writeRunWeb(me_dir): class RunWebHandling(object): - def __init__(self, me_dir, crashifpresent=True, warnifpresent=True): + def __init__(self, me_dir, crashifpresent=True, warnifpresent=True, force_run=False): """raise error if RunWeb already exists me_dir is the directory where the write RunWeb""" self.remove_run_web = True self.me_dir = me_dir - + if force_run: + self.remove_run_web = False + return if crashifpresent or warnifpresent: if os.path.exists(pjoin(me_dir, 'RunWeb')): pid = open(pjoin(me_dir, 'RunWeb')).read() @@ -6574,7 +6576,7 @@ def reask(self, *args, **opt): fail_due_to_format = 0 #parameter to avoid infinite loop def postcmd(self, stop, line): - if line not in [None, '0', 'done', '']: + if line not in [None, '0', 'done', '',0]: ending_question = cmd.OneLinePathCompletion.postcmd(self,stop,line) else: ending_question = True @@ -7533,7 +7535,8 @@ def open_file(self, answer): else: raise if time.time() - start < .5: - self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y') + self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y', + timeout=False) self.reload_card(path) def reload_card(self, path): diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/extended_cmd.py index a6a8609dce..2f37070580 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/extended_cmd.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/extended_cmd.py @@ -1108,9 +1108,12 @@ def ask(self, question, default, choices=[], path_msg=None, if alias: choices += list(alias.keys()) + + question_instance = obj(question, allow_arg=choices, default=default, mother_interface=self, **opt) - + if fct_timeout is None: + fct_timeout = lambda x: question_instance.postcmd(x, default) if x and default else False if first_cmd: if isinstance(first_cmd, str): question_instance.onecmd(first_cmd) @@ -2271,6 +2274,9 @@ def postcmd(self, stop, line): if n: self.default(line) return self.postcmd(stop, line) + elif self.value is None and line: + self.default(line) + return self.postcmd(stop, line) if not self.casesensitive: for ans in self.allow_arg: if ans.lower() == self.value.lower(): diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py index 3b8ec31215..a88d60b282 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py @@ -154,10 +154,15 @@ def get_helicity(self, to_submit=True, clean=True): p = misc.Popen(['./gensym'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=Pdir) #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts + (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - + try: + nb_channel = max([math.floor(float(d)) for d in stdout.split()]) + except Exception as error: + misc.sprint(stdout, 'no channel or error for %s' % Pdir) + continue + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py index f4c9cb6334..c9d1c7706a 100644 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py @@ -1,6 +1,12 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: O. Mattelaer (Aug 2023) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, A. Valassi (2023) for the MG5aMC CUDACPP plugin. import logging - +import os +import subprocess +pjoin = os.path.join logger = logging.getLogger('cmdprint') # for stdout try: @@ -9,20 +15,23 @@ import internal.madevent_interface as madevent_interface import internal.misc as misc import internal.extended_cmd as extended_cmd + import internal.banner as banner_mod else: import madgraph.interface.madevent_interface as madevent_interface import madgraph.various.misc as misc import madgraph.interface.extended_cmd as extended_cmd + import madgraph.various.banner as banner_mod class CPPMEInterface(madevent_interface.MadEventCmdShell): - def compile(self, *args, **opts): """ """ import multiprocessing if not self.options['nb_core'] or self.options['nb_core'] == 'None': self.options['nb_core'] = multiprocessing.cpu_count() - if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + import pathlib + import os + pjoin = os.path.join cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend == 'FORTRAN': @@ -36,5 +45,50 @@ def compile(self, *args, **opts): return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) - + +class CPPRunCard(banner_mod.RunCardLO): + def reset_simd(self, old_value, new_value, name): + if not hasattr(self, 'path'): + logger.warning('WARNING! CPPRunCard instance has no attribute path') + return + ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') + if name == "vector_size" and new_value <= int(old_value): + # code can handle the new size -> do not recompile + return + Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') + subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + def plugin_input(self, finput): + return + + def default_setup(self): + super().default_setup() + self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + if incname == "vector.inc" and 'vector_size' not in self.user_set: + return + super().write_one_include_file(output_dir, incname, output_file) + + def check_validity(self): + """ensure that PLUGIN information are consistent""" + super().check_validity() + if self['SDE_strategy'] != 1: + logger.warning('SDE_strategy different of 1 is not supported with SMD/GPU mode') + self['sde_strategy'] = 1 + if self['hel_recycling']: + self['hel_recycling'] = False + +class GPURunCard(CPPRunCard): + def default_setup(self): + super(CPPRunCard, self).default_setup() + self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False) + +#class CUDACPPRunCard(CPPRunCard): +# def default_setup(self): +# super(CPPRunCard, self).default_setup() +# self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + MEINTERFACE = CPPMEInterface +RunCard = CPPRunCard diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py index 920e07a926..d722702891 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py @@ -3796,9 +3796,11 @@ def do_combine_events(self, line): if self.run_card['bias_module'].lower() not in ['dummy', 'none'] and nb_event: self.correct_bias() - + elif self.run_card['custom_fcts']: + self.correct_bias() + logger.info("combine events done in %s", time.time()-start) - + self.to_store.append('event') diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/madevent b/epochX/cudacpp/gg_tt01g.mad/bin/madevent index 10b6a71fa2..dff9711b73 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/madevent +++ b/epochX/cudacpp/gg_tt01g.mad/bin/madevent @@ -173,6 +173,10 @@ if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): launch_interface = launch_plugin.MEINTERFACE +#Source use this executable for compilation always allow it +force_run = False +if (args and args[0] == 'treatcards'): + force_run=True # Call the cmd interface main loop try: @@ -180,7 +184,7 @@ try: launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) - with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): + with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), force_run=force_run): if (args and os.path.isfile(args[0])): # They are an input file input_file = args[0] diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 67ec0f298b..e6f353626a 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055065155029296875  +DEBUG: model prefixing takes 0.0053670406341552734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -184,21 +184,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.148 s +Wrote files for 36 helas calls in 0.147 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.319 s +ALOHA: aloha creates 5 routines in 0.336 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.304 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -230,8 +230,35 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: self.in_madevent_mode =  True [output.py at line 207]  +DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +patching file Source/genps.inc +patching file Source/makefile +patching file SubProcesses/makefile +patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 385 (offset 5 lines). +patching file bin/internal/madevent_interface.py +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). +patching file driver.f +patching file matrix1.f +Hunk #2 succeeded at 159 (offset 16 lines). +Hunk #3 succeeded at 237 (offset 16 lines). +Hunk #4 succeeded at 265 (offset 16 lines). +Hunk #5 succeeded at 310 (offset 16 lines). +DEBUG: p.returncode =  0 [output.py at line 233]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. +Type "launch" to generate events from this process, or see +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README +Run "open index.html" to see more information about this process. +quit + +real 0m2.253s +user 0m1.939s +sys 0m0.240s ************************************************************ * * * W E L C O M E to * @@ -259,8 +286,7 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run -run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP +WARNING! CPPRunCard instance has no attribute path quit INFO: launch in debug mode @@ -294,30 +320,3 @@ treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common -patching file Source/genps.inc -patching file Source/makefile -patching file SubProcesses/makefile -patching file Source/make_opts -patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). -patching file bin/internal/gen_ximprove.py -patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 -patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). -patching file driver.f -patching file matrix1.f -Hunk #2 succeeded at 159 (offset 16 lines). -Hunk #3 succeeded at 237 (offset 16 lines). -Hunk #4 succeeded at 265 (offset 16 lines). -Hunk #5 succeeded at 310 (offset 16 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. -Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README -Run "open index.html" to see more information about this process. -quit - -real 0m2.789s -user 0m2.472s -sys 0m0.309s diff --git a/epochX/cudacpp/gg_ttg.mad/Source/make_opts b/epochX/cudacpp/gg_ttg.mad/Source/make_opts index bd3c24228d..57f5f7bb96 100644 --- a/epochX/cudacpp/gg_ttg.mad/Source/make_opts +++ b/epochX/cudacpp/gg_ttg.mad/Source/make_opts @@ -1,17 +1,12 @@ -pdlabel1= -pdlabel2= -lhapdf= -PYTHIA8_PATH=NotInstalled -MG5AMC_VERSION=3.5.0_lo_vect GLOBAL_FLAG=-O3 -ffast-math -fbounds-check -ALOHA_FLAG= -MATRIX_FLAG= DEFAULT_CPP_COMPILER=g++ MACFLAG= STDLIB=-lstdc++ STDLIB_FLAG= DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime +PYTHIA8_PATH=NotInstalled #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gg_ttg.mad/Source/makefile b/epochX/cudacpp/gg_ttg.mad/Source/makefile index dbe08b846e..00c73099a0 100644 --- a/epochX/cudacpp/gg_ttg.mad/Source/makefile +++ b/epochX/cudacpp/gg_ttg.mad/Source/makefile @@ -136,5 +136,7 @@ cleanSource: clean: cleanSource for i in `ls -d ../SubProcesses/P*`; do cd $$i; make clean; cd -; done; -cleanall: cleanSource +cleanavx: + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; +cleanall: cleanSource # THIS IS THE ONE for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f index fc924825c2..bf665ff6e0 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -359,7 +359,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C LOCAL VARIABLES C INTEGER I,J,M,N - COMPLEX*16 ZTEMP, TMP_JAMP(10) + COMPLEX*16 ZTEMP, TMP_JAMP(9) REAL*8 CF(NCOLOR,NCOLOR) COMPLEX*16 AMP(NGRAPHS), JAMP(NCOLOR,NAMPSO) COMPLEX*16 W(6,NWAVEFUNCS) @@ -508,33 +508,30 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(15) + AMP(16) ! used 4 times TMP_JAMP(2) = AMP(1) + AMP(18) ! used 4 times TMP_JAMP(1) = AMP(12) - AMP(17) ! used 4 times - TMP_JAMP(10) = TMP_JAMP(3) - TMP_JAMP(2) ! used 2 times - TMP_JAMP(9) = TMP_JAMP(1) + ((-0.000000000000000D+00 + TMP_JAMP(9) = TMP_JAMP(3) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(8) ! used 2 times + TMP_JAMP(8) = TMP_JAMP(3) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(5) ! used 2 times + TMP_JAMP(7) = TMP_JAMP(2) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(2) ! used 2 times + TMP_JAMP(6) = TMP_JAMP(2) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(3) ! used 2 times + TMP_JAMP(5) = TMP_JAMP(1) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(11) ! used 2 times - TMP_JAMP(8) = TMP_JAMP(2) - TMP_JAMP(1) ! used 2 times - TMP_JAMP(7) = TMP_JAMP(1) + ((0.000000000000000D+00, + TMP_JAMP(4) = TMP_JAMP(1) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(10) ! used 2 times - TMP_JAMP(6) = TMP_JAMP(3) - TMP_JAMP(1) ! used 2 times - TMP_JAMP(5) = TMP_JAMP(2) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(3) ! used 2 times - TMP_JAMP(4) = TMP_JAMP(3) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(5) ! used 2 times - JAMP(1,1) = (-1.000000000000000D+00)*AMP(6)+TMP_JAMP(4)+( - $ -1.000000000000000D+00)*TMP_JAMP(5) - JAMP(2,1) = (-1.000000000000000D+00)*AMP(4)+(-1.000000000000000D - $ +00)*TMP_JAMP(4)+TMP_JAMP(9) - JAMP(3,1) = (-1.000000000000000D+00)*AMP(13)+TMP_JAMP(5)+( - $ -1.000000000000000D+00)*TMP_JAMP(7) - JAMP(4,1) = (-1.000000000000000D+00)*AMP(7)+((0.000000000000000D - $ +00,1.000000000000000D+00))*AMP(8)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*AMP(10)+(-1.000000000000000D+00) - $ *TMP_JAMP(6) - JAMP(5,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(2)+((0.000000000000000D+00,-1.000000000000000D+00))*AMP(11) - $ +(-1.000000000000000D+00)*AMP(14)+TMP_JAMP(8) - JAMP(6,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(2)+((0.000000000000000D+00,-1.000000000000000D+00))*AMP(8) - $ +(-1.000000000000000D+00)*AMP(9)+TMP_JAMP(10) + JAMP(1,1) = (-1.000000000000000D+00)*AMP(6)+(-1.000000000000000D + $ +00)*TMP_JAMP(6)+TMP_JAMP(8) + JAMP(2,1) = (-1.000000000000000D+00)*AMP(4)+TMP_JAMP(5)+( + $ -1.000000000000000D+00)*TMP_JAMP(8) + JAMP(3,1) = (-1.000000000000000D+00)*AMP(13)+( + $ -1.000000000000000D+00)*TMP_JAMP(4)+TMP_JAMP(6) + JAMP(4,1) = (-1.000000000000000D+00)*AMP(7)+TMP_JAMP(4)+( + $ -1.000000000000000D+00)*TMP_JAMP(9) + JAMP(5,1) = (-1.000000000000000D+00)*AMP(14)+( + $ -1.000000000000000D+00)*TMP_JAMP(5)+TMP_JAMP(7) + JAMP(6,1) = (-1.000000000000000D+00)*AMP(9)+(-1.000000000000000D + $ +00)*TMP_JAMP(7)+TMP_JAMP(9) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/dummy_fct.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/dummy_fct.f index 076cf29d67..4f7a204b8f 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/dummy_fct.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/dummy_fct.f @@ -32,7 +32,7 @@ logical FUNCTION dummy_cuts(P) LOGICAL IS_A_NU(NEXTERNAL),IS_HEAVY(NEXTERNAL) logical do_cuts(nexternal) COMMON /TO_SPECISA/IS_A_J,IS_A_A,IS_A_L,IS_A_B,IS_A_NU,IS_HEAVY, - . IS_A_ONIUM, do_cuts + & IS_A_ONIUM, do_cuts dummy_cuts=.true. @@ -118,15 +118,16 @@ double precision function user_dynamical_scale(P) C ************************************************************ -C default for the library implementing a dummt bias function +C default for the library implementing a dummy bias function C ************************************************************ subroutine bias_wgt_custom(p, original_weight, bias_weight) - implicit none + implicit none C C Parameters C include 'nexternal.inc' -C + +C C Arguments C double precision p(0:3, nexternal) @@ -161,3 +162,4 @@ subroutine bias_wgt_custom(p, original_weight, bias_weight) return end subroutine bias_wgt_custom + diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py index e9f421ae5f..824815f47b 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py @@ -537,7 +537,7 @@ def charge_card(self, tag): self.param_card = param_card_reader.ParamCard(param_card) return self.param_card elif tag == 'mgruncard': - self.run_card = RunCard(self[tag]) + self.run_card = RunCard(self[tag], unknown_warning=False) return self.run_card elif tag == 'mg5proccard': proc_card = self[tag].split('\n') @@ -2625,6 +2625,7 @@ class RunCard(ConfigFile): default_include_file = 'run_card.inc' default_autodef_file = 'run.inc' donewarning = [] + include_as_parameter = [] def plugin_input(self, finput): @@ -2671,18 +2672,40 @@ def __new__(cls, finput=None, **opt): elif isinstance(finput, cls): target_class = finput.__class__ elif isinstance(finput, str): + path = finput if '\n' not in finput: finput = open(finput).read() if 'req_acc_FO' in finput: target_class = RunCardNLO else: target_class = RunCardLO + if MADEVENT and os.path.exists(pjoin(MEDIR, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(MEDIR, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): + misc.sprint('try to use plugin class') + pydir = path.replace('run_card.dat', '../bin/internal/') + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + else: return None target_class.fill_post_set_from_blocks() - - return super(RunCard, cls).__new__(target_class, finput, **opt) + out = super(RunCard, cls).__new__(target_class, finput, **opt) + if not isinstance(out, RunCard): #should not happen but in presence of missmatch of library loaded. + out.__init__(finput, **opt) + return out else: return super(RunCard, cls).__new__(cls, finput, **opt) @@ -2710,7 +2733,7 @@ def __init__(self, *args, **opts): self.system_default = {} self.display_block = [] # set some block to be displayed - + self.fct_mod = {} # {param: (fct_pointer, *argument, **opts)} self.cut_class = {} self.warned=False @@ -2747,7 +2770,7 @@ def get_lepton_densities(cls): def add_param(self, name, value, fortran_name=None, include=True, hidden=False, legacy=False, cut=False, system=False, sys_default=None, - autodef=False, + autodef=False, fct_mod=None, **opts): """ add a parameter to the card. value is the default value and defines the type (int/float/bool/str) of the input. @@ -2761,6 +2784,7 @@ def add_param(self, name, value, fortran_name=None, include=True, If a path (Source/PDF/pdf.inc) the definition will be added within that file Default is False (does not add the definition) entry added in the run_card will automatically have this on True. + fct_mod: defines a function to run if the parameter is modify in the include file options of **opts: - allowed: list of valid options. '*' means anything else should be allowed. empty list means anything possible as well. @@ -2785,8 +2809,12 @@ def add_param(self, name, value, fortran_name=None, include=True, if autodef: self.definition_path[autodef].append(name) self.user_set.add(name) + # function to trigger if a value is modified in the include file + # main target is action to force correct recompilation (like for compilation flag/...) + if fct_mod: + self.fct_mod[name] = fct_mod - def read(self, finput, consistency=True): + def read(self, finput, consistency=True, unknown_warning=True): """Read the input file, this can be a path to a file, a file object, a str with the content of the file.""" @@ -2794,6 +2822,7 @@ def read(self, finput, consistency=True): if "\n" in finput: finput = finput.split('\n') elif os.path.isfile(finput): + self.path = finput finput = open(finput) else: raise Exception("No such file %s" % finput) @@ -2808,7 +2837,7 @@ def read(self, finput, consistency=True): name = name.lower().strip() if name not in self: #looks like an entry added by a user -> add it nicely - self.add_unknown_entry(name, value) + self.add_unknown_entry(name, value, unknown_warning) else: self.set( name, value, user=True) # parameter not set in the run_card can be set to compatiblity value @@ -2820,7 +2849,7 @@ def read(self, finput, consistency=True): logger.warning(str(error)) else: raise - def add_unknown_entry(self, name, value): + def add_unknown_entry(self, name, value, unknow_warning): """function to add an entry to the run_card when the associated parameter does not exists. This is based on the guess_entry_fromname for the various syntax providing input. This then call add_param accordingly. @@ -2859,7 +2888,7 @@ def add_unknown_entry(self, name, value): raise Exception("dictionary need to have at least one entry") default['dict']['__type__'] = default[self.guess_type_from_value(default_value[0])] - if name not in RunCard.donewarning: + if name not in RunCard.donewarning and unknow_warning: logger.warning("Found unexpected entry in run_card: \"%s\" with value \"%s\".\n"+\ " The type was assigned to %s. \n"+\ " The definition of that variable will %sbe automatically added to fortran file %s\n"+\ @@ -2897,7 +2926,16 @@ def valid_line(self, line, tmp): return False else: return True - + + + def reset_simd(self, old_value, new_value, name, *args, **opts): + raise Exception('pass in reset simd') + + def make_clean(self,old_value, new_value, name, dir): + raise Exception('pass make clean for ', dir) + + def make_Ptouch(self,old_value, new_value, name, reset): + raise Exception('pass Ptouch for ', reset) def write(self, output_file, template=None, python_template=False, write_hidden=False, template_options=None, **opt): @@ -3072,6 +3110,77 @@ def write(self, output_file, template=None, python_template=False, else: output_file.write(text) + def get_last_value_include(self, output_dir): + """For paraeter in self.fct_mod + parse the associate inc file to get the value of the previous run. + We return a dictionary {name: old_value} + if inc file does not exist we will return the current value (i.e. set has no change) + """ + + #remember that + # default_include_file is a class variable + # self.includepath is on the form include_path : [list of param ] + out = {} + + # setup inc_to_parse to be like self.includepath (include_path : [list of param ]) + # BUT only containing the parameter that need to be tracked for the fct_mod option + inc_to_parse = {} + for inc_file, params in self.includepath.items(): + if not inc_file: + continue + if any(p in params for p in self.fct_mod): + inc_to_parse[inc_file] = [name for name in self.includepath[inc_file] if name in self.fct_mod] + + # now loop over the files and ask the associate function + for inc_file, params in inc_to_parse.items(): + if inc_file is True: + inc_file = self.default_include_file + out.update(self.get_value_from_include(inc_file, params, output_dir)) + + return out + + def get_value_from_include(self, path, list_of_params, output_dir): + """for a given include file return the current value of the requested parameter + return a dictionary {name: value} + if path does not exists return the current value in self for all parameter""" + + #WARNING DOES NOT HANDLE LIST/DICT so far + + # handle case where file is missing + if not os.path.exists(pjoin(output_dir,path)): + misc.sprint("include file not existing", pjoin(output_dir,path)) + out = {name: self[name] for name in list_of_params} + + with open(pjoin(output_dir,path), 'r') as fsock: + text = fsock.read() + + for name in list_of_params: + misc.sprint(name, name in self.fortran_name) + misc.sprint(self.fortran_name[name] if name in self.fortran_name[name] else name) + to_track = [self.fortran_name[name] if name in self.fortran_name else name for name in list_of_params] + pattern = re.compile(r"\(?(%(names)s)\s?=\s?([^)]*)\)?" % {'names':'|'.join(to_track)}, re.I) + out = dict(pattern.findall(text)) + misc.sprint(out) + for name in list_of_params: + if name in self.fortran_name: + value = out[self.fortran_name[name]] + del out[self.fortran_name[name]] + out[name] = value + + for name, value in out.items(): + try: + out[name] = self.format_variable(value, type(self[name])) + except Exception: + continue + + if len(out) != len(list_of_params): + misc.sprint(list_of_params) + misc.sprint(to_track) + misc.sprint(self.fortran_name) + misc.sprint(text) + raise Exception + return out + def get_default(self, name, default=None, log_level=None): """return self[name] if exist otherwise default. log control if we @@ -3362,71 +3471,93 @@ def write_include_file(self, output_dir, output_file=None): #ensusre that system only parameter are correctly set self.update_system_parameter_for_include() + value_in_old_include = self.get_last_value_include(output_dir) + + if output_dir: self.write_autodef(output_dir, output_file=None) # check/fix status of customised functions self.edit_dummy_fct_from_file(self["custom_fcts"], os.path.dirname(output_dir)) for incname in self.includepath: - if incname is True: - pathinc = self.default_include_file - elif incname is False: - continue - else: - pathinc = incname + self.write_one_include_file(output_dir, incname, output_file) + + for name,value in value_in_old_include.items(): + if value != self[name]: + self.fct_mod[name][0](value, self[name], name, *self.fct_mod[name][1],**self.fct_mod[name][2]) - if output_file: - fsock = output_file + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + + misc.sprint(incname) + if incname is True: + pathinc = self.default_include_file + elif incname is False: + return + else: + pathinc = incname + + if output_file: + fsock = output_file + else: + fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) + + + for key in self.includepath[incname]: + #define the fortran name + if key in self.fortran_name: + fortran_name = self.fortran_name[key] else: - fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) - for key in self.includepath[incname]: - #define the fortran name - if key in self.fortran_name: - fortran_name = self.fortran_name[key] + fortran_name = key + + if incname in self.include_as_parameter: + fsock.writelines('INTEGER %s\n' % fortran_name) + #get the value with warning if the user didn't set it + value = self.get_default(key) + if hasattr(self, 'mod_inc_%s' % key): + value = getattr(self, 'mod_inc_%s' % key)(value) + # Special treatment for strings containing a list of + # strings. Convert it to a list of strings + if isinstance(value, list): + # in case of a list, add the length of the list as 0th + # element in fortran. Only in case of integer or float + # list (not for bool nor string) + targettype = self.list_parameter[key] + if targettype is bool: + pass + elif targettype is int: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) + fsock.writelines(line) + elif targettype is float: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) + fsock.writelines(line) + # output the rest of the list in fortran + for i,v in enumerate(value): + line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) + fsock.writelines(line) + elif isinstance(value, dict): + for fortran_name, onevalue in value.items(): + line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) + fsock.writelines(line) + elif isinstance(incname,str) and 'compile' in incname: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, value) else: - fortran_name = key - - #get the value with warning if the user didn't set it - value = self.get_default(key) - if hasattr(self, 'mod_inc_%s' % key): - value = getattr(self, 'mod_inc_%s' % key)(value) - # Special treatment for strings containing a list of - # strings. Convert it to a list of strings - if isinstance(value, list): - # in case of a list, add the length of the list as 0th - # element in fortran. Only in case of integer or float - # list (not for bool nor string) - targettype = self.list_parameter[key] - if targettype is bool: - pass - elif targettype is int: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) - fsock.writelines(line) - elif targettype is float: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) - fsock.writelines(line) - # output the rest of the list in fortran - for i,v in enumerate(value): - line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) - fsock.writelines(line) - elif isinstance(value, dict): - for fortran_name, onevalue in value.items(): - line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) - fsock.writelines(line) - elif isinstance(incname,str) and 'compile' in incname: line = '%s = %s \n' % (fortran_name, value) - fsock.write(line) + fsock.write(line) + else: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, self.f77_formatting(value)) else: line = '%s = %s \n' % (fortran_name, self.f77_formatting(value)) - fsock.writelines(line) - if not output_file: - fsock.close() - path = pjoin(output_dir,pathinc) - if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): - files.mv(path+'.tmp', path) - else: - os.remove(path+'.tmp') - + fsock.writelines(line) + if not output_file: + fsock.close() + path = pjoin(output_dir,pathinc) + if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): + files.mv(path+'.tmp', path) + else: + os.remove(path+'.tmp') def write_autodef(self, output_dir, output_file=None): """ Add the definition of variable to run.inc if the variable is set with autodef. @@ -3765,13 +3896,14 @@ def remove_all_cut(self): %(tmin_for_channel)s = tmin_for_channel ! limit the non-singular reach of --some-- channel of integration related to T-channel diagram (value between -1 and 0), -1 is no impact %(survey_splitting)s = survey_splitting ! for loop-induced control how many core are used at survey for the computation of a single iteration. %(survey_nchannel_per_job)s = survey_nchannel_per_job ! control how many Channel are integrated inside a single job on cluster/multicore - %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) + %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) #********************************************************************* -# Compilation flag. No automatic re-compilation (need manual "make clean" in Source) +# Compilation flag. #********************************************************************* %(global_flag)s = global_flag ! fortran optimization flag use for the all code. %(aloha_flag)s = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' %(matrix_flag)s = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' + %(vector_size)s = vector_size ! size designed for SIMD/OpenMP/GPU (number of events in lockstep) """ template_off = '# To see advanced option for Phase-Space optimization: type "update psoptim"' @@ -3927,9 +4059,12 @@ class RunCardLO(RunCard): "get_dummy_x1_x2": pjoin("SubProcesses","dummy_fct.f"), "dummy_boostframe": pjoin("SubProcesses","dummy_fct.f"), "user_dynamical_scale": pjoin("SubProcesses","dummy_fct.f"), + "bias_wgt_custom": pjoin("SubProcesses","dummy_fct.f"), "user_": pjoin("SubProcesses","dummy_fct.f") # all function starting by user will be added to that file } + include_as_parameter = ['vector.inc'] + if MG5DIR: default_run_card = pjoin(MG5DIR, "internal", "default_run_card_lo.dat") @@ -4163,10 +4298,15 @@ def default_setup(self): self.add_param('hel_splitamp', True, hidden=True, include=False, comment='decide if amplitude aloha call can be splitted in two or not when doing helicity per helicity optimization.') self.add_param('hel_zeroamp', True, hidden=True, include=False, comment='decide if zero amplitude can be removed from the computation when doing helicity per helicity optimization.') self.add_param('SDE_strategy', 1, allowed=[1,2], fortran_name="sde_strat", comment="decide how Multi-channel should behaves \"1\" means full single diagram enhanced (hep-ph/0208156), \"2\" use the product of the denominator") - self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check') - self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math') - self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3') - + self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check', + fct_mod=(self.make_clean, ('Source'),{})) + self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math', + fct_mod=(self.make_clean, ('Source/DHELAS'),{})) + self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3', + fct_mod=(self.make_Ptouch, ('matrix'),{})) + self.add_param('vector_size', 1, include='vector.inc', hidden=True, comment='lockstep size for parralelism run', + fortran_name='VECSIZE_MEMMAX', fct_mod=(self.reset_simd,(),{})) + # parameter allowing to define simple cut via the pdg # Special syntax are related to those. (can not be edit directly) self.add_param('pt_min_pdg',{'__type__':0.}, include=False, cut=True) @@ -4188,8 +4328,7 @@ def default_setup(self): self.add_param('mxxmin4pdg',[-1.], system=True) self.add_param('mxxpart_antipart', [False], system=True) - # CUDACPP parameters - self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + def check_validity(self): """ """ diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/check_param_card.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/check_param_card.py index fe874a06a4..71089d7480 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/check_param_card.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/check_param_card.py @@ -85,7 +85,7 @@ def load_str(self, text): self.value= ' '.join(data[len(self.lhacode):]) # check that lhacode are the first entry otherwise return invalid param. if ' '.join([str(i) for i in self.lhacode]) != ' '.join(data[:len(self.lhacode)]): - raise InvalidParam + raise InvalidParam("line was %s" % str(data)) else: self.value = data[-1] diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py index 5d0187e3fa..14c7f310dc 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py @@ -749,13 +749,15 @@ def writeRunWeb(me_dir): class RunWebHandling(object): - def __init__(self, me_dir, crashifpresent=True, warnifpresent=True): + def __init__(self, me_dir, crashifpresent=True, warnifpresent=True, force_run=False): """raise error if RunWeb already exists me_dir is the directory where the write RunWeb""" self.remove_run_web = True self.me_dir = me_dir - + if force_run: + self.remove_run_web = False + return if crashifpresent or warnifpresent: if os.path.exists(pjoin(me_dir, 'RunWeb')): pid = open(pjoin(me_dir, 'RunWeb')).read() @@ -6574,7 +6576,7 @@ def reask(self, *args, **opt): fail_due_to_format = 0 #parameter to avoid infinite loop def postcmd(self, stop, line): - if line not in [None, '0', 'done', '']: + if line not in [None, '0', 'done', '',0]: ending_question = cmd.OneLinePathCompletion.postcmd(self,stop,line) else: ending_question = True @@ -7533,7 +7535,8 @@ def open_file(self, answer): else: raise if time.time() - start < .5: - self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y') + self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y', + timeout=False) self.reload_card(path) def reload_card(self, path): diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/extended_cmd.py index a6a8609dce..2f37070580 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/extended_cmd.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/extended_cmd.py @@ -1108,9 +1108,12 @@ def ask(self, question, default, choices=[], path_msg=None, if alias: choices += list(alias.keys()) + + question_instance = obj(question, allow_arg=choices, default=default, mother_interface=self, **opt) - + if fct_timeout is None: + fct_timeout = lambda x: question_instance.postcmd(x, default) if x and default else False if first_cmd: if isinstance(first_cmd, str): question_instance.onecmd(first_cmd) @@ -2271,6 +2274,9 @@ def postcmd(self, stop, line): if n: self.default(line) return self.postcmd(stop, line) + elif self.value is None and line: + self.default(line) + return self.postcmd(stop, line) if not self.casesensitive: for ans in self.allow_arg: if ans.lower() == self.value.lower(): diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py index 3b8ec31215..a88d60b282 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py @@ -154,10 +154,15 @@ def get_helicity(self, to_submit=True, clean=True): p = misc.Popen(['./gensym'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=Pdir) #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts + (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - + try: + nb_channel = max([math.floor(float(d)) for d in stdout.split()]) + except Exception as error: + misc.sprint(stdout, 'no channel or error for %s' % Pdir) + continue + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py index f4c9cb6334..c9d1c7706a 100644 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py @@ -1,6 +1,12 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: O. Mattelaer (Aug 2023) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, A. Valassi (2023) for the MG5aMC CUDACPP plugin. import logging - +import os +import subprocess +pjoin = os.path.join logger = logging.getLogger('cmdprint') # for stdout try: @@ -9,20 +15,23 @@ import internal.madevent_interface as madevent_interface import internal.misc as misc import internal.extended_cmd as extended_cmd + import internal.banner as banner_mod else: import madgraph.interface.madevent_interface as madevent_interface import madgraph.various.misc as misc import madgraph.interface.extended_cmd as extended_cmd + import madgraph.various.banner as banner_mod class CPPMEInterface(madevent_interface.MadEventCmdShell): - def compile(self, *args, **opts): """ """ import multiprocessing if not self.options['nb_core'] or self.options['nb_core'] == 'None': self.options['nb_core'] = multiprocessing.cpu_count() - if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + import pathlib + import os + pjoin = os.path.join cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend == 'FORTRAN': @@ -36,5 +45,50 @@ def compile(self, *args, **opts): return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) - + +class CPPRunCard(banner_mod.RunCardLO): + def reset_simd(self, old_value, new_value, name): + if not hasattr(self, 'path'): + logger.warning('WARNING! CPPRunCard instance has no attribute path') + return + ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') + if name == "vector_size" and new_value <= int(old_value): + # code can handle the new size -> do not recompile + return + Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') + subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + def plugin_input(self, finput): + return + + def default_setup(self): + super().default_setup() + self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + if incname == "vector.inc" and 'vector_size' not in self.user_set: + return + super().write_one_include_file(output_dir, incname, output_file) + + def check_validity(self): + """ensure that PLUGIN information are consistent""" + super().check_validity() + if self['SDE_strategy'] != 1: + logger.warning('SDE_strategy different of 1 is not supported with SMD/GPU mode') + self['sde_strategy'] = 1 + if self['hel_recycling']: + self['hel_recycling'] = False + +class GPURunCard(CPPRunCard): + def default_setup(self): + super(CPPRunCard, self).default_setup() + self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False) + +#class CUDACPPRunCard(CPPRunCard): +# def default_setup(self): +# super(CPPRunCard, self).default_setup() +# self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + MEINTERFACE = CPPMEInterface +RunCard = CPPRunCard diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py index 920e07a926..d722702891 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py @@ -3796,9 +3796,11 @@ def do_combine_events(self, line): if self.run_card['bias_module'].lower() not in ['dummy', 'none'] and nb_event: self.correct_bias() - + elif self.run_card['custom_fcts']: + self.correct_bias() + logger.info("combine events done in %s", time.time()-start) - + self.to_store.append('event') diff --git a/epochX/cudacpp/gg_ttg.mad/bin/madevent b/epochX/cudacpp/gg_ttg.mad/bin/madevent index 10b6a71fa2..dff9711b73 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/madevent +++ b/epochX/cudacpp/gg_ttg.mad/bin/madevent @@ -173,6 +173,10 @@ if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): launch_interface = launch_plugin.MEINTERFACE +#Source use this executable for compilation always allow it +force_run = False +if (args and args[0] == 'treatcards'): + force_run=True # Call the cmd interface main loop try: @@ -180,7 +184,7 @@ try: launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) - with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): + with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), force_run=force_run): if (args and os.path.isfile(args[0])): # They are an input file input_file = args[0] diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 6b39ab9408..72297bf232 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00561976432800293  +DEBUG: model prefixing takes 0.005309343338012695  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.321 s +ALOHA: aloha creates 5 routines in 0.322 s VVV1 VVV1 FFV1 @@ -201,9 +201,10 @@ FileWriter for / FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.825s -user 0m0.714s -sys 0m0.058s +real 0m0.788s +user 0m0.726s +sys 0m0.046s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index ca66753b97..b334f66c3b 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005291461944580078  +DEBUG: model prefixing takes 0.00529932975769043  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.156 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -184,21 +184,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.416 s -Wrote files for 222 helas calls in 0.716 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s +Wrote files for 222 helas calls in 0.679 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.326 s +ALOHA: aloha creates 5 routines in 0.327 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.309 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -233,8 +233,35 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: self.in_madevent_mode =  True [output.py at line 207]  +DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +patching file Source/genps.inc +patching file Source/makefile +patching file SubProcesses/makefile +patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 385 (offset 5 lines). +patching file bin/internal/madevent_interface.py +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). +patching file driver.f +patching file matrix1.f +Hunk #2 succeeded at 191 (offset 48 lines). +Hunk #3 succeeded at 269 (offset 48 lines). +Hunk #4 succeeded at 297 (offset 48 lines). +Hunk #5 succeeded at 342 (offset 48 lines). +DEBUG: p.returncode =  0 [output.py at line 233]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. +Type "launch" to generate events from this process, or see +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README +Run "open index.html" to see more information about this process. +quit + +real 0m3.229s +user 0m3.016s +sys 0m0.210s ************************************************************ * * * W E L C O M E to * @@ -262,8 +289,7 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run -run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP +WARNING! CPPRunCard instance has no attribute path quit INFO: launch in debug mode @@ -297,30 +323,3 @@ treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common -patching file Source/genps.inc -patching file Source/makefile -patching file SubProcesses/makefile -patching file Source/make_opts -patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). -patching file bin/internal/gen_ximprove.py -patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 -patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). -patching file driver.f -patching file matrix1.f -Hunk #2 succeeded at 191 (offset 48 lines). -Hunk #3 succeeded at 269 (offset 48 lines). -Hunk #4 succeeded at 297 (offset 48 lines). -Hunk #5 succeeded at 342 (offset 48 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. -Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README -Run "open index.html" to see more information about this process. -quit - -real 0m3.896s -user 0m3.569s -sys 0m0.315s diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/make_opts b/epochX/cudacpp/gg_ttgg.mad/Source/make_opts index bd3c24228d..57f5f7bb96 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Source/make_opts +++ b/epochX/cudacpp/gg_ttgg.mad/Source/make_opts @@ -1,17 +1,12 @@ -pdlabel1= -pdlabel2= -lhapdf= -PYTHIA8_PATH=NotInstalled -MG5AMC_VERSION=3.5.0_lo_vect GLOBAL_FLAG=-O3 -ffast-math -fbounds-check -ALOHA_FLAG= -MATRIX_FLAG= DEFAULT_CPP_COMPILER=g++ MACFLAG= STDLIB=-lstdc++ STDLIB_FLAG= DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime +PYTHIA8_PATH=NotInstalled #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/makefile b/epochX/cudacpp/gg_ttgg.mad/Source/makefile index dbe08b846e..00c73099a0 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Source/makefile +++ b/epochX/cudacpp/gg_ttgg.mad/Source/makefile @@ -136,5 +136,7 @@ cleanSource: clean: cleanSource for i in `ls -d ../SubProcesses/P*`; do cd $$i; make clean; cd -; done; -cleanall: cleanSource +cleanavx: + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; +cleanall: cleanSource # THIS IS THE ONE for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f index 77f5152327..df931e07c4 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f @@ -391,7 +391,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C LOCAL VARIABLES C INTEGER I,J,M,N - COMPLEX*16 ZTEMP, TMP_JAMP(163) + COMPLEX*16 ZTEMP, TMP_JAMP(155) REAL*8 CF(NCOLOR,NCOLOR) COMPLEX*16 AMP(NGRAPHS), JAMP(NCOLOR,NAMPSO) COMPLEX*16 W(6,NWAVEFUNCS) @@ -1218,362 +1218,318 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(84) + AMP(86) ! used 8 times TMP_JAMP(2) = AMP(81) - AMP(83) ! used 8 times TMP_JAMP(1) = AMP(82) + AMP(85) ! used 8 times - TMP_JAMP(30) = TMP_JAMP(11) + AMP(121) ! used 8 times - TMP_JAMP(29) = TMP_JAMP(12) - AMP(132) ! used 8 times + TMP_JAMP(30) = TMP_JAMP(15) - AMP(157) ! used 8 times + TMP_JAMP(29) = TMP_JAMP(14) + AMP(159) ! used 8 times TMP_JAMP(28) = TMP_JAMP(13) + AMP(130) ! used 8 times - TMP_JAMP(27) = TMP_JAMP(14) + AMP(159) ! used 8 times - TMP_JAMP(26) = TMP_JAMP(15) - AMP(157) ! used 8 times - TMP_JAMP(25) = TMP_JAMP(8) - AMP(131) ! used 8 times + TMP_JAMP(27) = TMP_JAMP(12) - AMP(132) ! used 8 times + TMP_JAMP(26) = TMP_JAMP(11) + AMP(121) ! used 8 times + TMP_JAMP(25) = TMP_JAMP(10) + AMP(154) ! used 8 times TMP_JAMP(24) = TMP_JAMP(9) - AMP(156) ! used 8 times - TMP_JAMP(23) = TMP_JAMP(10) + AMP(154) ! used 8 times - TMP_JAMP(22) = TMP_JAMP(6) + AMP(114) ! used 8 times - TMP_JAMP(21) = TMP_JAMP(7) + AMP(158) ! used 8 times - TMP_JAMP(20) = TMP_JAMP(4) - AMP(141) ! used 8 times - TMP_JAMP(19) = TMP_JAMP(5) + AMP(139) ! used 8 times - TMP_JAMP(18) = TMP_JAMP(2) + AMP(105) ! used 8 times - TMP_JAMP(17) = TMP_JAMP(3) - AMP(155) ! used 8 times + TMP_JAMP(23) = TMP_JAMP(8) - AMP(131) ! used 8 times + TMP_JAMP(22) = TMP_JAMP(7) + AMP(158) ! used 8 times + TMP_JAMP(21) = TMP_JAMP(6) + AMP(114) ! used 8 times + TMP_JAMP(20) = TMP_JAMP(5) + AMP(139) ! used 8 times + TMP_JAMP(19) = TMP_JAMP(4) - AMP(141) ! used 8 times + TMP_JAMP(18) = TMP_JAMP(3) - AMP(155) ! used 8 times + TMP_JAMP(17) = TMP_JAMP(2) + AMP(105) ! used 8 times TMP_JAMP(16) = TMP_JAMP(1) - AMP(140) ! used 8 times - TMP_JAMP(90) = AMP(108) + AMP(133) ! used 4 times - TMP_JAMP(89) = AMP(51) + AMP(52) ! used 4 times - TMP_JAMP(88) = AMP(40) - AMP(54) ! used 4 times - TMP_JAMP(87) = AMP(11) - AMP(135) ! used 4 times - TMP_JAMP(86) = TMP_JAMP(26) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(51) ! used 4 times - TMP_JAMP(85) = TMP_JAMP(28) + TMP_JAMP(27) ! used 4 times - TMP_JAMP(84) = TMP_JAMP(29) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(11) ! used 4 times - TMP_JAMP(83) = TMP_JAMP(30) + TMP_JAMP(29) ! used 4 times - TMP_JAMP(82) = AMP(102) + AMP(151) ! used 4 times - TMP_JAMP(81) = AMP(69) - AMP(134) ! used 4 times - TMP_JAMP(80) = AMP(59) - AMP(153) ! used 4 times - TMP_JAMP(79) = TMP_JAMP(23) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(102) ! used 4 times - TMP_JAMP(78) = TMP_JAMP(24) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(59) ! used 4 times - TMP_JAMP(77) = TMP_JAMP(25) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(69) ! used 4 times - TMP_JAMP(76) = TMP_JAMP(29) + TMP_JAMP(25) ! used 4 times - TMP_JAMP(75) = TMP_JAMP(30) - TMP_JAMP(23) ! used 4 times - TMP_JAMP(74) = AMP(43) - AMP(53) ! used 4 times - TMP_JAMP(73) = TMP_JAMP(21) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(43) ! used 4 times - TMP_JAMP(72) = TMP_JAMP(22) + ((0.000000000000000D+00, + TMP_JAMP(80) = TMP_JAMP(30) + TMP_JAMP(29) ! used 4 times + TMP_JAMP(79) = TMP_JAMP(30) - TMP_JAMP(22) ! used 4 times + TMP_JAMP(78) = TMP_JAMP(29) + TMP_JAMP(22) ! used 4 times + TMP_JAMP(77) = TMP_JAMP(28) + TMP_JAMP(27) ! used 4 times + TMP_JAMP(76) = TMP_JAMP(28) - TMP_JAMP(23) ! used 4 times + TMP_JAMP(75) = TMP_JAMP(27) + TMP_JAMP(23) ! used 4 times + TMP_JAMP(74) = TMP_JAMP(27) + TMP_JAMP(19) ! used 4 times + TMP_JAMP(73) = TMP_JAMP(26) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(18) ! used 4 times + TMP_JAMP(72) = TMP_JAMP(26) - TMP_JAMP(25) ! used 4 times + TMP_JAMP(71) = TMP_JAMP(26) - TMP_JAMP(19) ! used 4 times + TMP_JAMP(70) = TMP_JAMP(26) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(17) ! used 4 times + TMP_JAMP(69) = TMP_JAMP(25) + TMP_JAMP(24) ! used 4 times + TMP_JAMP(68) = TMP_JAMP(25) - TMP_JAMP(18) ! used 4 times + TMP_JAMP(67) = TMP_JAMP(24) - TMP_JAMP(23) ! used 4 times + TMP_JAMP(66) = TMP_JAMP(24) + TMP_JAMP(18) ! used 4 times + TMP_JAMP(65) = TMP_JAMP(22) + TMP_JAMP(20) ! used 4 times + TMP_JAMP(64) = TMP_JAMP(21) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(62) ! used 4 times - TMP_JAMP(71) = TMP_JAMP(22) + TMP_JAMP(21) ! used 4 times - TMP_JAMP(70) = TMP_JAMP(27) + TMP_JAMP(21) ! used 4 times - TMP_JAMP(69) = TMP_JAMP(28) - TMP_JAMP(25) ! used 4 times - TMP_JAMP(68) = AMP(119) + AMP(145) ! used 4 times - TMP_JAMP(67) = AMP(14) - AMP(147) ! used 4 times - TMP_JAMP(66) = TMP_JAMP(20) + TMP_JAMP(19) ! used 4 times - TMP_JAMP(65) = TMP_JAMP(22) - TMP_JAMP(19) ! used 4 times - TMP_JAMP(64) = TMP_JAMP(29) + TMP_JAMP(20) ! used 4 times - TMP_JAMP(63) = AMP(77) - AMP(152) ! used 4 times - TMP_JAMP(62) = TMP_JAMP(17) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(77) ! used 4 times - TMP_JAMP(61) = TMP_JAMP(18) + ((-0.000000000000000D+00 + TMP_JAMP(63) = TMP_JAMP(21) - TMP_JAMP(20) ! used 4 times + TMP_JAMP(62) = TMP_JAMP(21) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(61) ! used 4 times + TMP_JAMP(61) = TMP_JAMP(20) + TMP_JAMP(19) ! used 4 times + TMP_JAMP(60) = TMP_JAMP(20) - TMP_JAMP(16) ! used 4 times + TMP_JAMP(59) = TMP_JAMP(19) + TMP_JAMP(16) ! used 4 times + TMP_JAMP(58) = TMP_JAMP(18) - TMP_JAMP(16) ! used 4 times + TMP_JAMP(57) = TMP_JAMP(17) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(79) ! used 4 times - TMP_JAMP(60) = TMP_JAMP(24) + TMP_JAMP(17) ! used 4 times - TMP_JAMP(59) = TMP_JAMP(28) - TMP_JAMP(18) ! used 4 times - TMP_JAMP(58) = AMP(89) - AMP(146) ! used 4 times - TMP_JAMP(57) = TMP_JAMP(20) + TMP_JAMP(16) ! used 4 times - TMP_JAMP(56) = AMP(117) + AMP(142) ! used 4 times - TMP_JAMP(55) = AMP(8) - AMP(144) ! used 4 times - TMP_JAMP(54) = TMP_JAMP(19) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(117) ! used 4 times - TMP_JAMP(53) = TMP_JAMP(20) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(8) ! used 4 times - TMP_JAMP(52) = TMP_JAMP(26) - TMP_JAMP(21) ! used 4 times - TMP_JAMP(51) = TMP_JAMP(30) - TMP_JAMP(20) ! used 4 times - TMP_JAMP(50) = AMP(87) - AMP(143) ! used 4 times - TMP_JAMP(49) = TMP_JAMP(16) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(87) ! used 4 times - TMP_JAMP(48) = TMP_JAMP(23) - TMP_JAMP(17) ! used 4 times - TMP_JAMP(47) = TMP_JAMP(18) + ((0.000000000000000D+00, + TMP_JAMP(56) = TMP_JAMP(17) - TMP_JAMP(16) ! used 4 times + TMP_JAMP(55) = TMP_JAMP(17) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(80) ! used 4 times - TMP_JAMP(46) = TMP_JAMP(19) - TMP_JAMP(16) ! used 4 times - TMP_JAMP(45) = TMP_JAMP(27) + TMP_JAMP(18) ! used 4 times - TMP_JAMP(44) = TMP_JAMP(28) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(110) ! used 4 times - TMP_JAMP(43) = TMP_JAMP(29) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(16) ! used 4 times - TMP_JAMP(42) = TMP_JAMP(22) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(61) ! used 4 times - TMP_JAMP(41) = TMP_JAMP(24) - TMP_JAMP(22) ! used 4 times - TMP_JAMP(40) = TMP_JAMP(25) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(71) ! used 4 times - TMP_JAMP(39) = AMP(96) + AMP(148) ! used 4 times - TMP_JAMP(38) = TMP_JAMP(23) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(96) ! used 4 times - TMP_JAMP(37) = TMP_JAMP(24) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(56) ! used 4 times - TMP_JAMP(36) = TMP_JAMP(26) + TMP_JAMP(23) ! used 4 times - TMP_JAMP(35) = TMP_JAMP(17) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(74) ! used 4 times - TMP_JAMP(34) = TMP_JAMP(30) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(17) ! used 4 times - TMP_JAMP(33) = TMP_JAMP(26) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(33) ! used 4 times - TMP_JAMP(32) = TMP_JAMP(27) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(22) ! used 4 times - TMP_JAMP(31) = TMP_JAMP(21) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(25) ! used 4 times - TMP_JAMP(98) = TMP_JAMP(43) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(138) ! used 4 times - TMP_JAMP(97) = TMP_JAMP(44) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(136) ! used 4 times - TMP_JAMP(96) = TMP_JAMP(40) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(137) ! used 4 times - TMP_JAMP(95) = TMP_JAMP(37) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(150) ! used 4 times - TMP_JAMP(94) = TMP_JAMP(35) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(149) ! used 4 times - TMP_JAMP(93) = TMP_JAMP(32) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(36) ! used 4 times - TMP_JAMP(92) = TMP_JAMP(33) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(34) ! used 4 times - TMP_JAMP(91) = TMP_JAMP(31) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(35) ! used 4 times - TMP_JAMP(151) = AMP(18) + ((0.000000000000000D+00, + TMP_JAMP(54) = AMP(108) + AMP(133) ! used 4 times + TMP_JAMP(53) = AMP(51) + AMP(52) ! used 4 times + TMP_JAMP(52) = AMP(40) - AMP(54) ! used 4 times + TMP_JAMP(51) = AMP(11) - AMP(135) ! used 4 times + TMP_JAMP(50) = AMP(102) + AMP(151) ! used 4 times + TMP_JAMP(49) = AMP(69) - AMP(134) ! used 4 times + TMP_JAMP(48) = AMP(59) - AMP(153) ! used 4 times + TMP_JAMP(47) = AMP(43) - AMP(53) ! used 4 times + TMP_JAMP(46) = AMP(119) + AMP(145) ! used 4 times + TMP_JAMP(45) = AMP(14) - AMP(147) ! used 4 times + TMP_JAMP(44) = AMP(77) - AMP(152) ! used 4 times + TMP_JAMP(43) = AMP(89) - AMP(146) ! used 4 times + TMP_JAMP(42) = AMP(117) + AMP(142) ! used 4 times + TMP_JAMP(41) = AMP(8) - AMP(144) ! used 4 times + TMP_JAMP(40) = AMP(87) - AMP(143) ! used 4 times + TMP_JAMP(39) = AMP(110) + AMP(136) ! used 4 times + TMP_JAMP(38) = AMP(16) - AMP(138) ! used 4 times + TMP_JAMP(37) = AMP(71) - AMP(137) ! used 4 times + TMP_JAMP(36) = AMP(96) + AMP(148) ! used 4 times + TMP_JAMP(35) = AMP(56) - AMP(150) ! used 4 times + TMP_JAMP(34) = AMP(74) - AMP(149) ! used 4 times + TMP_JAMP(33) = AMP(33) + AMP(34) ! used 4 times + TMP_JAMP(32) = AMP(22) - AMP(36) ! used 4 times + TMP_JAMP(31) = AMP(25) - AMP(35) ! used 4 times + TMP_JAMP(142) = TMP_JAMP(80) + TMP_JAMP(77) ! used 2 times + TMP_JAMP(141) = TMP_JAMP(80) + TMP_JAMP(68) ! used 2 times + TMP_JAMP(140) = TMP_JAMP(79) - TMP_JAMP(61) ! used 2 times + TMP_JAMP(139) = TMP_JAMP(79) + TMP_JAMP(69) ! used 2 times + TMP_JAMP(138) = TMP_JAMP(78) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(47) ! used 2 times + TMP_JAMP(137) = TMP_JAMP(77) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(39) ! used 2 times + TMP_JAMP(136) = TMP_JAMP(76) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(54) ! used 2 times + TMP_JAMP(135) = TMP_JAMP(76) + TMP_JAMP(66) ! used 2 times + TMP_JAMP(134) = TMP_JAMP(76) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(39) ! used 2 times + TMP_JAMP(133) = TMP_JAMP(75) + TMP_JAMP(61) ! used 2 times + TMP_JAMP(132) = TMP_JAMP(73) + AMP(50) ! used 2 times + TMP_JAMP(131) = TMP_JAMP(70) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(36) ! used 2 times + TMP_JAMP(130) = TMP_JAMP(67) - TMP_JAMP(27) ! used 2 times + TMP_JAMP(129) = TMP_JAMP(61) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(46) ! used 2 times + TMP_JAMP(128) = TMP_JAMP(61) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(41) ! used 2 times + TMP_JAMP(127) = TMP_JAMP(58) - TMP_JAMP(25) ! used 2 times + TMP_JAMP(126) = TMP_JAMP(58) + TMP_JAMP(24) ! used 2 times + TMP_JAMP(125) = TMP_JAMP(58) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(43) ! used 2 times + TMP_JAMP(124) = TMP_JAMP(55) + AMP(111) ! used 2 times + TMP_JAMP(123) = TMP_JAMP(54) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(109) ! used 2 times + TMP_JAMP(122) = TMP_JAMP(53) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(50) ! used 2 times - TMP_JAMP(150) = TMP_JAMP(87) + ((0.000000000000000D+00, + TMP_JAMP(121) = TMP_JAMP(53) - TMP_JAMP(47) ! used 2 times + TMP_JAMP(120) = TMP_JAMP(52) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(42) ! used 2 times + TMP_JAMP(119) = TMP_JAMP(52) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(41) ! used 2 times + TMP_JAMP(118) = TMP_JAMP(51) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(10) ! used 2 times - TMP_JAMP(149) = TMP_JAMP(90) + TMP_JAMP(88) ! used 2 times - TMP_JAMP(148) = TMP_JAMP(82) - AMP(18) ! used 2 times - TMP_JAMP(147) = TMP_JAMP(74) + ((-0.000000000000000D+00 + TMP_JAMP(117) = TMP_JAMP(51) + TMP_JAMP(49) ! used 2 times + TMP_JAMP(116) = TMP_JAMP(51) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(12) ! used 2 times + TMP_JAMP(115) = TMP_JAMP(50) - TMP_JAMP(44) ! used 2 times + TMP_JAMP(114) = TMP_JAMP(49) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(70) ! used 2 times + TMP_JAMP(113) = TMP_JAMP(48) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(60) ! used 2 times + TMP_JAMP(112) = TMP_JAMP(48) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(58) ! used 2 times + TMP_JAMP(111) = TMP_JAMP(47) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(44) ! used 2 times - TMP_JAMP(146) = TMP_JAMP(68) + TMP_JAMP(67) ! used 2 times - TMP_JAMP(145) = TMP_JAMP(77) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(134) ! used 2 times - TMP_JAMP(144) = AMP(79) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(109) ! used 2 times - TMP_JAMP(143) = TMP_JAMP(63) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(76) ! used 2 times - TMP_JAMP(142) = TMP_JAMP(90) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(59) ! used 2 times - TMP_JAMP(141) = TMP_JAMP(67) + TMP_JAMP(58) ! used 2 times - TMP_JAMP(140) = AMP(7) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(18) ! used 2 times - TMP_JAMP(139) = TMP_JAMP(54) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(142) ! used 2 times - TMP_JAMP(138) = TMP_JAMP(55) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(51) ! used 2 times - TMP_JAMP(137) = TMP_JAMP(89) - TMP_JAMP(74) ! used 2 times - TMP_JAMP(136) = TMP_JAMP(49) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(143) ! used 2 times - TMP_JAMP(135) = TMP_JAMP(82) - TMP_JAMP(63) ! used 2 times - TMP_JAMP(134) = AMP(41) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(80) ! used 2 times - TMP_JAMP(133) = TMP_JAMP(56) - TMP_JAMP(50) ! used 2 times - TMP_JAMP(132) = TMP_JAMP(88) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(45) ! used 2 times - TMP_JAMP(131) = TMP_JAMP(47) + AMP(111) ! used 2 times - TMP_JAMP(130) = TMP_JAMP(53) - AMP(9) ! used 2 times - TMP_JAMP(129) = TMP_JAMP(98) + TMP_JAMP(97) ! used 2 times - TMP_JAMP(128) = AMP(58) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(61) ! used 2 times - TMP_JAMP(127) = TMP_JAMP(80) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(41) ! used 2 times - TMP_JAMP(126) = TMP_JAMP(42) - AMP(72) ! used 2 times - TMP_JAMP(125) = TMP_JAMP(96) - TMP_JAMP(42) ! used 2 times - TMP_JAMP(124) = TMP_JAMP(98) + TMP_JAMP(96) ! used 2 times - TMP_JAMP(123) = TMP_JAMP(36) - AMP(39) ! used 2 times - TMP_JAMP(122) = TMP_JAMP(89) - TMP_JAMP(39) ! used 2 times - TMP_JAMP(121) = TMP_JAMP(95) - AMP(55) ! used 2 times - TMP_JAMP(120) = TMP_JAMP(58) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(46) ! used 2 times - TMP_JAMP(119) = TMP_JAMP(68) - TMP_JAMP(58) ! used 2 times - TMP_JAMP(118) = TMP_JAMP(94) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(58) ! used 2 times - TMP_JAMP(117) = TMP_JAMP(95) + TMP_JAMP(94) ! used 2 times - TMP_JAMP(116) = TMP_JAMP(94) - AMP(73) ! used 2 times - TMP_JAMP(115) = TMP_JAMP(95) + AMP(57) ! used 2 times - TMP_JAMP(114) = TMP_JAMP(96) - TMP_JAMP(95) ! used 2 times - TMP_JAMP(113) = TMP_JAMP(97) - TMP_JAMP(96) ! used 2 times - TMP_JAMP(112) = TMP_JAMP(38) + AMP(95) ! used 2 times - TMP_JAMP(111) = TMP_JAMP(67) + ((-0.000000000000000D+00 + TMP_JAMP(110) = TMP_JAMP(46) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(120) ! used 2 times + TMP_JAMP(109) = TMP_JAMP(46) - TMP_JAMP(43) ! used 2 times + TMP_JAMP(108) = TMP_JAMP(45) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(12) ! used 2 times + TMP_JAMP(107) = TMP_JAMP(45) + TMP_JAMP(43) ! used 2 times + TMP_JAMP(106) = TMP_JAMP(45) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(13) ! used 2 times - TMP_JAMP(110) = TMP_JAMP(67) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(34) ! used 2 times - TMP_JAMP(109) = TMP_JAMP(98) + AMP(15) ! used 2 times - TMP_JAMP(108) = TMP_JAMP(98) + TMP_JAMP(34) ! used 2 times - TMP_JAMP(107) = TMP_JAMP(61) - AMP(23) ! used 2 times - TMP_JAMP(106) = TMP_JAMP(93) + TMP_JAMP(92) ! used 2 times - TMP_JAMP(105) = TMP_JAMP(68) + ((-0.000000000000000D+00 + TMP_JAMP(105) = TMP_JAMP(44) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(76) ! used 2 times + TMP_JAMP(104) = TMP_JAMP(42) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(45) ! used 2 times + TMP_JAMP(103) = TMP_JAMP(42) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(118) ! used 2 times + TMP_JAMP(102) = TMP_JAMP(41) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(7) ! used 2 times + TMP_JAMP(101) = TMP_JAMP(40) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(78) ! used 2 times + TMP_JAMP(100) = TMP_JAMP(40) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(88) ! used 2 times + TMP_JAMP(99) = TMP_JAMP(39) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(111) ! used 2 times + TMP_JAMP(98) = TMP_JAMP(39) - TMP_JAMP(37) ! used 2 times + TMP_JAMP(97) = TMP_JAMP(38) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(9) ! used 2 times + TMP_JAMP(96) = TMP_JAMP(38) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(15) ! used 2 times + TMP_JAMP(95) = TMP_JAMP(37) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(72) ! used 2 times + TMP_JAMP(94) = TMP_JAMP(36) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(39) ! used 2 times + TMP_JAMP(93) = TMP_JAMP(35) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(55) ! used 2 times + TMP_JAMP(92) = TMP_JAMP(35) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(57) ! used 2 times + TMP_JAMP(91) = TMP_JAMP(34) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(75) ! used 2 times + TMP_JAMP(90) = TMP_JAMP(34) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(73) ! used 2 times + TMP_JAMP(89) = TMP_JAMP(33) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(21) ! used 2 times + TMP_JAMP(88) = TMP_JAMP(33) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(32) ! used 2 times + TMP_JAMP(87) = TMP_JAMP(32) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(23) ! used 2 times + TMP_JAMP(86) = TMP_JAMP(32) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(24) ! used 2 times + TMP_JAMP(85) = TMP_JAMP(31) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(27) ! used 2 times - TMP_JAMP(104) = TMP_JAMP(91) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(68) ! used 2 times - TMP_JAMP(103) = TMP_JAMP(93) + TMP_JAMP(91) ! used 2 times - TMP_JAMP(102) = TMP_JAMP(91) - AMP(26) ! used 2 times - TMP_JAMP(101) = TMP_JAMP(92) - TMP_JAMP(91) ! used 2 times - TMP_JAMP(100) = TMP_JAMP(97) + TMP_JAMP(93) ! used 2 times - TMP_JAMP(99) = TMP_JAMP(92) + TMP_JAMP(34) ! used 2 times - TMP_JAMP(163) = TMP_JAMP(149) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(42) ! used 2 times - TMP_JAMP(162) = TMP_JAMP(144) - TMP_JAMP(142) ! used 2 times - TMP_JAMP(161) = TMP_JAMP(140) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(138) ! used 2 times - TMP_JAMP(160) = TMP_JAMP(135) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(48) ! used 2 times - TMP_JAMP(159) = TMP_JAMP(133) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(46) ! used 2 times - TMP_JAMP(158) = TMP_JAMP(134) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(132) ! used 2 times - TMP_JAMP(157) = TMP_JAMP(130) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(144) ! used 2 times - TMP_JAMP(156) = TMP_JAMP(128) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(127) ! used 2 times - TMP_JAMP(155) = TMP_JAMP(123) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(122) ! used 2 times - TMP_JAMP(154) = TMP_JAMP(112) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(148) ! used 2 times - TMP_JAMP(153) = TMP_JAMP(100) + AMP(24) ! used 2 times - TMP_JAMP(152) = TMP_JAMP(99) + AMP(32) ! used 2 times + TMP_JAMP(84) = TMP_JAMP(31) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(26) ! used 2 times + TMP_JAMP(83) = TMP_JAMP(25) + AMP(95) ! used 2 times + TMP_JAMP(82) = AMP(18) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(101) ! used 2 times + TMP_JAMP(81) = AMP(79) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(90) ! used 2 times + TMP_JAMP(155) = TMP_JAMP(131) - TMP_JAMP(83) ! used 2 times + TMP_JAMP(154) = TMP_JAMP(119) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(55) ! used 2 times + TMP_JAMP(153) = TMP_JAMP(114) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(64) ! used 2 times + TMP_JAMP(152) = TMP_JAMP(111) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(64) ! used 2 times + TMP_JAMP(151) = TMP_JAMP(105) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(57) ! used 2 times + TMP_JAMP(150) = TMP_JAMP(103) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(62) ! used 2 times + TMP_JAMP(149) = TMP_JAMP(100) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(55) ! used 2 times + TMP_JAMP(148) = TMP_JAMP(95) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(62) ! used 2 times + TMP_JAMP(147) = TMP_JAMP(94) - TMP_JAMP(53) ! used 2 times + TMP_JAMP(146) = TMP_JAMP(89) - TMP_JAMP(50) ! used 2 times + TMP_JAMP(145) = TMP_JAMP(88) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(70) ! used 2 times + TMP_JAMP(144) = TMP_JAMP(84) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(62) ! used 2 times + TMP_JAMP(143) = TMP_JAMP(81) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(56) ! used 2 times JAMP(1,1) = (-1.000000000000000D+00)*AMP(30) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(109) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(152) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(153) - JAMP(2,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(27)+(-1.000000000000000D+00)*AMP(28)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(66)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(104)+TMP_JAMP(111) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(152) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(80) + $ +TMP_JAMP(86)+TMP_JAMP(96)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(137)+TMP_JAMP(145) + JAMP(2,1) = (-1.000000000000000D+00)*AMP(28) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(79) + $ +TMP_JAMP(85)+TMP_JAMP(106)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(129)+(-1.000000000000000D+00) + $ *TMP_JAMP(145) JAMP(3,1) = (-1.000000000000000D+00)*AMP(31) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(72) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(102) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(125) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(153) - JAMP(4,1) = (-1.000000000000000D+00)*AMP(19) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(21) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(26) - $ +AMP(151)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(79)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(101)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(156) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(78) + $ +(-1.000000000000000D+00)*TMP_JAMP(86)+TMP_JAMP(95) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(134) + $ +(-1.000000000000000D+00)*TMP_JAMP(144) + JAMP(4,1) = (-1.000000000000000D+00)*AMP(19)+TMP_JAMP(112) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(139) + $ +TMP_JAMP(144)+(-1.000000000000000D+00)*TMP_JAMP(146) JAMP(5,1) = (-1.000000000000000D+00)*AMP(29) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(90) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(103) - $ +(-1.000000000000000D+00)*TMP_JAMP(105)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(107)+TMP_JAMP(120) - JAMP(6,1) = (-1.000000000000000D+00)*AMP(20) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(21) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(76) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(106) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(107) - $ +(-1.000000000000000D+00)*TMP_JAMP(160) - JAMP(7,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(15)+((0.000000000000000D+00,1.000000000000000D+00))*AMP(57) - $ +(-1.000000000000000D+00)*AMP(93)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(108)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(114)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(154) - JAMP(8,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(13)+((0.000000000000000D+00,1.000000000000000D+00))*AMP(75) - $ +(-1.000000000000000D+00)*AMP(91)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(57)+(-1.000000000000000D+00) - $ *TMP_JAMP(110)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(118)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(154) - JAMP(9,1) = (-1.000000000000000D+00)*AMP(94) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(113) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(115) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(116) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(131) - JAMP(10,1) = (-1.000000000000000D+00)*AMP(38) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(116) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(155) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(158) - JAMP(11,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(55)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(75)+(-1.000000000000000D+00)*AMP(92)+((0.000000000000000D - $ +00,1.000000000000000D+00))*AMP(120)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(46)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(72)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(117)+(-1.000000000000000D+00) - $ *TMP_JAMP(119) - JAMP(12,1) = (-1.000000000000000D+00)*AMP(37)+( - $ -1.000000000000000D+00)*AMP(62)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(71)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(121)+TMP_JAMP(147) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(29) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(65) + $ +(-1.000000000000000D+00)*TMP_JAMP(85)+(-1.000000000000000D+00) + $ *TMP_JAMP(87)+(-1.000000000000000D+00)*TMP_JAMP(109) + $ +TMP_JAMP(143) + JAMP(6,1) = (-1.000000000000000D+00)*AMP(20)+TMP_JAMP(87) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(141) + $ +TMP_JAMP(146)+TMP_JAMP(151) + JAMP(7,1) = (-1.000000000000000D+00)*AMP(93)+( + $ -1.000000000000000D+00)*TMP_JAMP(37)+TMP_JAMP(92)+( + $ -1.000000000000000D+00)*TMP_JAMP(96)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(130)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(155) + JAMP(8,1) = (-1.000000000000000D+00)*AMP(91) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(19) + $ +TMP_JAMP(91)+(-1.000000000000000D+00)*TMP_JAMP(106) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(125) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(155) - JAMP(13,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(118)+(-1.000000000000000D+00)*AMP(126) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(124) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(126) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(139) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(157) - JAMP(14,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(78)+(-1.000000000000000D+00)*AMP(98)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*AMP(118)+AMP(152) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(62) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(156) - $ +(-1.000000000000000D+00)*TMP_JAMP(159) - JAMP(15,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(88)+(-1.000000000000000D+00)*AMP(127)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(129)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(131)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(136)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(157) - JAMP(16,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(45)+(-1.000000000000000D+00)*AMP(47)+AMP(53) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(88) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(73) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(158) - $ +(-1.000000000000000D+00)*TMP_JAMP(159) - JAMP(17,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(78)+(-1.000000000000000D+00)*AMP(97)+((0.000000000000000D - $ +00,1.000000000000000D+00))*AMP(101)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(136)+(-1.000000000000000D+00) - $ *TMP_JAMP(160)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(161) - JAMP(18,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(45)+(-1.000000000000000D+00)*AMP(46)+((0.000000000000000D - $ +00,1.000000000000000D+00))*AMP(50)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(52)+(-1.000000000000000D+00) - $ *TMP_JAMP(137)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(139)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(161) - JAMP(19,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(12)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(90)+(-1.000000000000000D+00)*AMP(128)+(-1.000000000000000D - $ +00)*AMP(135)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(57)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(84)+(-1.000000000000000D+00)*TMP_JAMP(141)+( - $ -1.000000000000000D+00)*TMP_JAMP(162) - JAMP(20,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(60)+(-1.000000000000000D+00)*AMP(100)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(60)+(-1.000000000000000D - $ +00)*TMP_JAMP(80)+(-1.000000000000000D+00)*TMP_JAMP(143) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(145) - $ +TMP_JAMP(162) - JAMP(21,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(12)+(-1.000000000000000D+00)*AMP(62)+((0.000000000000000D - $ +00,1.000000000000000D+00))*AMP(70)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*AMP(120)+(-1.000000000000000D+00) - $ *AMP(129)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(64)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(65)+(-1.000000000000000D+00)*TMP_JAMP(87) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(145) - $ +TMP_JAMP(146) + JAMP(9,1) = (-1.000000000000000D+00)*AMP(94)+( + $ -1.000000000000000D+00)*TMP_JAMP(90)+(-1.000000000000000D+00) + $ *TMP_JAMP(92)+(-1.000000000000000D+00)*TMP_JAMP(98) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(124) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(135) + JAMP(10,1) = (-1.000000000000000D+00)*AMP(38)+TMP_JAMP(90) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(141) + $ +(-1.000000000000000D+00)*TMP_JAMP(147)+TMP_JAMP(154) + JAMP(11,1) = AMP(62)+(-1.000000000000000D+00)*AMP(92) + $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(120) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(63)+( + $ -1.000000000000000D+00)*TMP_JAMP(91)+(-1.000000000000000D+00) + $ *TMP_JAMP(93)+(-1.000000000000000D+00)*TMP_JAMP(109) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(126) + JAMP(12,1) = (-1.000000000000000D+00)*AMP(37)+TMP_JAMP(93) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(139) + $ +TMP_JAMP(147)+TMP_JAMP(152) + JAMP(13,1) = (-1.000000000000000D+00)*AMP(126) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(75) + $ +(-1.000000000000000D+00)*TMP_JAMP(97)+TMP_JAMP(103) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(128) + $ +(-1.000000000000000D+00)*TMP_JAMP(148) + JAMP(14,1) = (-1.000000000000000D+00)*AMP(98) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(20)+( + $ -1.000000000000000D+00)*TMP_JAMP(44)+TMP_JAMP(101)+( + $ -1.000000000000000D+00)*TMP_JAMP(112)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(126)+(-1.000000000000000D+00) + $ *TMP_JAMP(150) + JAMP(15,1) = (-1.000000000000000D+00)*AMP(127)+( + $ -1.000000000000000D+00)*TMP_JAMP(41)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(59)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(77)+TMP_JAMP(97)+TMP_JAMP(99) + $ +(-1.000000000000000D+00)*TMP_JAMP(149) + JAMP(16,1) = (-1.000000000000000D+00)*AMP(47) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(60)+( + $ -1.000000000000000D+00)*TMP_JAMP(104)+(-1.000000000000000D+00) + $ *TMP_JAMP(119)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(138)+TMP_JAMP(149) + JAMP(17,1) = (-1.000000000000000D+00)*AMP(97) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(71) + $ +TMP_JAMP(82)+(-1.000000000000000D+00)*TMP_JAMP(101)+( + $ -1.000000000000000D+00)*TMP_JAMP(102)+(-1.000000000000000D+00) + $ *TMP_JAMP(115)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(127) + JAMP(18,1) = (-1.000000000000000D+00)*AMP(46)+TMP_JAMP(102) + $ +TMP_JAMP(104)+(-1.000000000000000D+00)*TMP_JAMP(121) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(132) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(140) + JAMP(19,1) = (-1.000000000000000D+00)*AMP(128) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(28) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(74) + $ +(-1.000000000000000D+00)*TMP_JAMP(107)+TMP_JAMP(116) + $ +TMP_JAMP(123)+(-1.000000000000000D+00)*TMP_JAMP(143) + JAMP(20,1) = (-1.000000000000000D+00)*AMP(100)+TMP_JAMP(49)+( + $ -1.000000000000000D+00)*TMP_JAMP(113)+(-1.000000000000000D+00) + $ *TMP_JAMP(123)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(135)+(-1.000000000000000D+00)*TMP_JAMP(151) + JAMP(21,1) = (-1.000000000000000D+00)*AMP(129)+( + $ -1.000000000000000D+00)*TMP_JAMP(51)+TMP_JAMP(108)+TMP_JAMP(110) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(133) + $ +(-1.000000000000000D+00)*TMP_JAMP(153) JAMP(22,1) = (-1.000000000000000D+00)*AMP(49) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(70) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(69) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(70) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(72) - $ +TMP_JAMP(81)+(-1.000000000000000D+00)*TMP_JAMP(147)+( - $ -1.000000000000000D+00)*TMP_JAMP(163) - JAMP(23,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(60)+(-1.000000000000000D+00)*AMP(99)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*AMP(101)+(-1.000000000000000D+00) - $ *AMP(153)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(75)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(76)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(78)+(-1.000000000000000D+00)*TMP_JAMP(81) - $ +TMP_JAMP(148)+(-1.000000000000000D+00)*TMP_JAMP(150) - JAMP(24,1) = (-1.000000000000000D+00)*AMP(48)+AMP(52) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(83) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(85) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(86) - $ +TMP_JAMP(150)+TMP_JAMP(151)+TMP_JAMP(163) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(78) + $ +TMP_JAMP(114)+(-1.000000000000000D+00)*TMP_JAMP(120) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(136) + $ +(-1.000000000000000D+00)*TMP_JAMP(152) + JAMP(23,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *AMP(10)+(-1.000000000000000D+00)*AMP(99)+TMP_JAMP(50) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(72)+( + $ -1.000000000000000D+00)*TMP_JAMP(82)+TMP_JAMP(113)+( + $ -1.000000000000000D+00)*TMP_JAMP(117)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(130) + JAMP(24,1) = (-1.000000000000000D+00)*AMP(48)+TMP_JAMP(54) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(73) + $ +TMP_JAMP(118)+TMP_JAMP(120)+TMP_JAMP(122)+((0.000000000000000D + $ +00,-1.000000000000000D+00))*TMP_JAMP(142) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/dummy_fct.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/dummy_fct.f index 076cf29d67..4f7a204b8f 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/dummy_fct.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/dummy_fct.f @@ -32,7 +32,7 @@ logical FUNCTION dummy_cuts(P) LOGICAL IS_A_NU(NEXTERNAL),IS_HEAVY(NEXTERNAL) logical do_cuts(nexternal) COMMON /TO_SPECISA/IS_A_J,IS_A_A,IS_A_L,IS_A_B,IS_A_NU,IS_HEAVY, - . IS_A_ONIUM, do_cuts + & IS_A_ONIUM, do_cuts dummy_cuts=.true. @@ -118,15 +118,16 @@ double precision function user_dynamical_scale(P) C ************************************************************ -C default for the library implementing a dummt bias function +C default for the library implementing a dummy bias function C ************************************************************ subroutine bias_wgt_custom(p, original_weight, bias_weight) - implicit none + implicit none C C Parameters C include 'nexternal.inc' -C + +C C Arguments C double precision p(0:3, nexternal) @@ -161,3 +162,4 @@ subroutine bias_wgt_custom(p, original_weight, bias_weight) return end subroutine bias_wgt_custom + diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py index e9f421ae5f..824815f47b 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py @@ -537,7 +537,7 @@ def charge_card(self, tag): self.param_card = param_card_reader.ParamCard(param_card) return self.param_card elif tag == 'mgruncard': - self.run_card = RunCard(self[tag]) + self.run_card = RunCard(self[tag], unknown_warning=False) return self.run_card elif tag == 'mg5proccard': proc_card = self[tag].split('\n') @@ -2625,6 +2625,7 @@ class RunCard(ConfigFile): default_include_file = 'run_card.inc' default_autodef_file = 'run.inc' donewarning = [] + include_as_parameter = [] def plugin_input(self, finput): @@ -2671,18 +2672,40 @@ def __new__(cls, finput=None, **opt): elif isinstance(finput, cls): target_class = finput.__class__ elif isinstance(finput, str): + path = finput if '\n' not in finput: finput = open(finput).read() if 'req_acc_FO' in finput: target_class = RunCardNLO else: target_class = RunCardLO + if MADEVENT and os.path.exists(pjoin(MEDIR, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(MEDIR, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): + misc.sprint('try to use plugin class') + pydir = path.replace('run_card.dat', '../bin/internal/') + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + else: return None target_class.fill_post_set_from_blocks() - - return super(RunCard, cls).__new__(target_class, finput, **opt) + out = super(RunCard, cls).__new__(target_class, finput, **opt) + if not isinstance(out, RunCard): #should not happen but in presence of missmatch of library loaded. + out.__init__(finput, **opt) + return out else: return super(RunCard, cls).__new__(cls, finput, **opt) @@ -2710,7 +2733,7 @@ def __init__(self, *args, **opts): self.system_default = {} self.display_block = [] # set some block to be displayed - + self.fct_mod = {} # {param: (fct_pointer, *argument, **opts)} self.cut_class = {} self.warned=False @@ -2747,7 +2770,7 @@ def get_lepton_densities(cls): def add_param(self, name, value, fortran_name=None, include=True, hidden=False, legacy=False, cut=False, system=False, sys_default=None, - autodef=False, + autodef=False, fct_mod=None, **opts): """ add a parameter to the card. value is the default value and defines the type (int/float/bool/str) of the input. @@ -2761,6 +2784,7 @@ def add_param(self, name, value, fortran_name=None, include=True, If a path (Source/PDF/pdf.inc) the definition will be added within that file Default is False (does not add the definition) entry added in the run_card will automatically have this on True. + fct_mod: defines a function to run if the parameter is modify in the include file options of **opts: - allowed: list of valid options. '*' means anything else should be allowed. empty list means anything possible as well. @@ -2785,8 +2809,12 @@ def add_param(self, name, value, fortran_name=None, include=True, if autodef: self.definition_path[autodef].append(name) self.user_set.add(name) + # function to trigger if a value is modified in the include file + # main target is action to force correct recompilation (like for compilation flag/...) + if fct_mod: + self.fct_mod[name] = fct_mod - def read(self, finput, consistency=True): + def read(self, finput, consistency=True, unknown_warning=True): """Read the input file, this can be a path to a file, a file object, a str with the content of the file.""" @@ -2794,6 +2822,7 @@ def read(self, finput, consistency=True): if "\n" in finput: finput = finput.split('\n') elif os.path.isfile(finput): + self.path = finput finput = open(finput) else: raise Exception("No such file %s" % finput) @@ -2808,7 +2837,7 @@ def read(self, finput, consistency=True): name = name.lower().strip() if name not in self: #looks like an entry added by a user -> add it nicely - self.add_unknown_entry(name, value) + self.add_unknown_entry(name, value, unknown_warning) else: self.set( name, value, user=True) # parameter not set in the run_card can be set to compatiblity value @@ -2820,7 +2849,7 @@ def read(self, finput, consistency=True): logger.warning(str(error)) else: raise - def add_unknown_entry(self, name, value): + def add_unknown_entry(self, name, value, unknow_warning): """function to add an entry to the run_card when the associated parameter does not exists. This is based on the guess_entry_fromname for the various syntax providing input. This then call add_param accordingly. @@ -2859,7 +2888,7 @@ def add_unknown_entry(self, name, value): raise Exception("dictionary need to have at least one entry") default['dict']['__type__'] = default[self.guess_type_from_value(default_value[0])] - if name not in RunCard.donewarning: + if name not in RunCard.donewarning and unknow_warning: logger.warning("Found unexpected entry in run_card: \"%s\" with value \"%s\".\n"+\ " The type was assigned to %s. \n"+\ " The definition of that variable will %sbe automatically added to fortran file %s\n"+\ @@ -2897,7 +2926,16 @@ def valid_line(self, line, tmp): return False else: return True - + + + def reset_simd(self, old_value, new_value, name, *args, **opts): + raise Exception('pass in reset simd') + + def make_clean(self,old_value, new_value, name, dir): + raise Exception('pass make clean for ', dir) + + def make_Ptouch(self,old_value, new_value, name, reset): + raise Exception('pass Ptouch for ', reset) def write(self, output_file, template=None, python_template=False, write_hidden=False, template_options=None, **opt): @@ -3072,6 +3110,77 @@ def write(self, output_file, template=None, python_template=False, else: output_file.write(text) + def get_last_value_include(self, output_dir): + """For paraeter in self.fct_mod + parse the associate inc file to get the value of the previous run. + We return a dictionary {name: old_value} + if inc file does not exist we will return the current value (i.e. set has no change) + """ + + #remember that + # default_include_file is a class variable + # self.includepath is on the form include_path : [list of param ] + out = {} + + # setup inc_to_parse to be like self.includepath (include_path : [list of param ]) + # BUT only containing the parameter that need to be tracked for the fct_mod option + inc_to_parse = {} + for inc_file, params in self.includepath.items(): + if not inc_file: + continue + if any(p in params for p in self.fct_mod): + inc_to_parse[inc_file] = [name for name in self.includepath[inc_file] if name in self.fct_mod] + + # now loop over the files and ask the associate function + for inc_file, params in inc_to_parse.items(): + if inc_file is True: + inc_file = self.default_include_file + out.update(self.get_value_from_include(inc_file, params, output_dir)) + + return out + + def get_value_from_include(self, path, list_of_params, output_dir): + """for a given include file return the current value of the requested parameter + return a dictionary {name: value} + if path does not exists return the current value in self for all parameter""" + + #WARNING DOES NOT HANDLE LIST/DICT so far + + # handle case where file is missing + if not os.path.exists(pjoin(output_dir,path)): + misc.sprint("include file not existing", pjoin(output_dir,path)) + out = {name: self[name] for name in list_of_params} + + with open(pjoin(output_dir,path), 'r') as fsock: + text = fsock.read() + + for name in list_of_params: + misc.sprint(name, name in self.fortran_name) + misc.sprint(self.fortran_name[name] if name in self.fortran_name[name] else name) + to_track = [self.fortran_name[name] if name in self.fortran_name else name for name in list_of_params] + pattern = re.compile(r"\(?(%(names)s)\s?=\s?([^)]*)\)?" % {'names':'|'.join(to_track)}, re.I) + out = dict(pattern.findall(text)) + misc.sprint(out) + for name in list_of_params: + if name in self.fortran_name: + value = out[self.fortran_name[name]] + del out[self.fortran_name[name]] + out[name] = value + + for name, value in out.items(): + try: + out[name] = self.format_variable(value, type(self[name])) + except Exception: + continue + + if len(out) != len(list_of_params): + misc.sprint(list_of_params) + misc.sprint(to_track) + misc.sprint(self.fortran_name) + misc.sprint(text) + raise Exception + return out + def get_default(self, name, default=None, log_level=None): """return self[name] if exist otherwise default. log control if we @@ -3362,71 +3471,93 @@ def write_include_file(self, output_dir, output_file=None): #ensusre that system only parameter are correctly set self.update_system_parameter_for_include() + value_in_old_include = self.get_last_value_include(output_dir) + + if output_dir: self.write_autodef(output_dir, output_file=None) # check/fix status of customised functions self.edit_dummy_fct_from_file(self["custom_fcts"], os.path.dirname(output_dir)) for incname in self.includepath: - if incname is True: - pathinc = self.default_include_file - elif incname is False: - continue - else: - pathinc = incname + self.write_one_include_file(output_dir, incname, output_file) + + for name,value in value_in_old_include.items(): + if value != self[name]: + self.fct_mod[name][0](value, self[name], name, *self.fct_mod[name][1],**self.fct_mod[name][2]) - if output_file: - fsock = output_file + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + + misc.sprint(incname) + if incname is True: + pathinc = self.default_include_file + elif incname is False: + return + else: + pathinc = incname + + if output_file: + fsock = output_file + else: + fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) + + + for key in self.includepath[incname]: + #define the fortran name + if key in self.fortran_name: + fortran_name = self.fortran_name[key] else: - fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) - for key in self.includepath[incname]: - #define the fortran name - if key in self.fortran_name: - fortran_name = self.fortran_name[key] + fortran_name = key + + if incname in self.include_as_parameter: + fsock.writelines('INTEGER %s\n' % fortran_name) + #get the value with warning if the user didn't set it + value = self.get_default(key) + if hasattr(self, 'mod_inc_%s' % key): + value = getattr(self, 'mod_inc_%s' % key)(value) + # Special treatment for strings containing a list of + # strings. Convert it to a list of strings + if isinstance(value, list): + # in case of a list, add the length of the list as 0th + # element in fortran. Only in case of integer or float + # list (not for bool nor string) + targettype = self.list_parameter[key] + if targettype is bool: + pass + elif targettype is int: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) + fsock.writelines(line) + elif targettype is float: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) + fsock.writelines(line) + # output the rest of the list in fortran + for i,v in enumerate(value): + line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) + fsock.writelines(line) + elif isinstance(value, dict): + for fortran_name, onevalue in value.items(): + line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) + fsock.writelines(line) + elif isinstance(incname,str) and 'compile' in incname: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, value) else: - fortran_name = key - - #get the value with warning if the user didn't set it - value = self.get_default(key) - if hasattr(self, 'mod_inc_%s' % key): - value = getattr(self, 'mod_inc_%s' % key)(value) - # Special treatment for strings containing a list of - # strings. Convert it to a list of strings - if isinstance(value, list): - # in case of a list, add the length of the list as 0th - # element in fortran. Only in case of integer or float - # list (not for bool nor string) - targettype = self.list_parameter[key] - if targettype is bool: - pass - elif targettype is int: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) - fsock.writelines(line) - elif targettype is float: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) - fsock.writelines(line) - # output the rest of the list in fortran - for i,v in enumerate(value): - line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) - fsock.writelines(line) - elif isinstance(value, dict): - for fortran_name, onevalue in value.items(): - line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) - fsock.writelines(line) - elif isinstance(incname,str) and 'compile' in incname: line = '%s = %s \n' % (fortran_name, value) - fsock.write(line) + fsock.write(line) + else: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, self.f77_formatting(value)) else: line = '%s = %s \n' % (fortran_name, self.f77_formatting(value)) - fsock.writelines(line) - if not output_file: - fsock.close() - path = pjoin(output_dir,pathinc) - if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): - files.mv(path+'.tmp', path) - else: - os.remove(path+'.tmp') - + fsock.writelines(line) + if not output_file: + fsock.close() + path = pjoin(output_dir,pathinc) + if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): + files.mv(path+'.tmp', path) + else: + os.remove(path+'.tmp') def write_autodef(self, output_dir, output_file=None): """ Add the definition of variable to run.inc if the variable is set with autodef. @@ -3765,13 +3896,14 @@ def remove_all_cut(self): %(tmin_for_channel)s = tmin_for_channel ! limit the non-singular reach of --some-- channel of integration related to T-channel diagram (value between -1 and 0), -1 is no impact %(survey_splitting)s = survey_splitting ! for loop-induced control how many core are used at survey for the computation of a single iteration. %(survey_nchannel_per_job)s = survey_nchannel_per_job ! control how many Channel are integrated inside a single job on cluster/multicore - %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) + %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) #********************************************************************* -# Compilation flag. No automatic re-compilation (need manual "make clean" in Source) +# Compilation flag. #********************************************************************* %(global_flag)s = global_flag ! fortran optimization flag use for the all code. %(aloha_flag)s = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' %(matrix_flag)s = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' + %(vector_size)s = vector_size ! size designed for SIMD/OpenMP/GPU (number of events in lockstep) """ template_off = '# To see advanced option for Phase-Space optimization: type "update psoptim"' @@ -3927,9 +4059,12 @@ class RunCardLO(RunCard): "get_dummy_x1_x2": pjoin("SubProcesses","dummy_fct.f"), "dummy_boostframe": pjoin("SubProcesses","dummy_fct.f"), "user_dynamical_scale": pjoin("SubProcesses","dummy_fct.f"), + "bias_wgt_custom": pjoin("SubProcesses","dummy_fct.f"), "user_": pjoin("SubProcesses","dummy_fct.f") # all function starting by user will be added to that file } + include_as_parameter = ['vector.inc'] + if MG5DIR: default_run_card = pjoin(MG5DIR, "internal", "default_run_card_lo.dat") @@ -4163,10 +4298,15 @@ def default_setup(self): self.add_param('hel_splitamp', True, hidden=True, include=False, comment='decide if amplitude aloha call can be splitted in two or not when doing helicity per helicity optimization.') self.add_param('hel_zeroamp', True, hidden=True, include=False, comment='decide if zero amplitude can be removed from the computation when doing helicity per helicity optimization.') self.add_param('SDE_strategy', 1, allowed=[1,2], fortran_name="sde_strat", comment="decide how Multi-channel should behaves \"1\" means full single diagram enhanced (hep-ph/0208156), \"2\" use the product of the denominator") - self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check') - self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math') - self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3') - + self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check', + fct_mod=(self.make_clean, ('Source'),{})) + self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math', + fct_mod=(self.make_clean, ('Source/DHELAS'),{})) + self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3', + fct_mod=(self.make_Ptouch, ('matrix'),{})) + self.add_param('vector_size', 1, include='vector.inc', hidden=True, comment='lockstep size for parralelism run', + fortran_name='VECSIZE_MEMMAX', fct_mod=(self.reset_simd,(),{})) + # parameter allowing to define simple cut via the pdg # Special syntax are related to those. (can not be edit directly) self.add_param('pt_min_pdg',{'__type__':0.}, include=False, cut=True) @@ -4188,8 +4328,7 @@ def default_setup(self): self.add_param('mxxmin4pdg',[-1.], system=True) self.add_param('mxxpart_antipart', [False], system=True) - # CUDACPP parameters - self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + def check_validity(self): """ """ diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/check_param_card.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/check_param_card.py index fe874a06a4..71089d7480 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/check_param_card.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/check_param_card.py @@ -85,7 +85,7 @@ def load_str(self, text): self.value= ' '.join(data[len(self.lhacode):]) # check that lhacode are the first entry otherwise return invalid param. if ' '.join([str(i) for i in self.lhacode]) != ' '.join(data[:len(self.lhacode)]): - raise InvalidParam + raise InvalidParam("line was %s" % str(data)) else: self.value = data[-1] diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py index 5d0187e3fa..14c7f310dc 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py @@ -749,13 +749,15 @@ def writeRunWeb(me_dir): class RunWebHandling(object): - def __init__(self, me_dir, crashifpresent=True, warnifpresent=True): + def __init__(self, me_dir, crashifpresent=True, warnifpresent=True, force_run=False): """raise error if RunWeb already exists me_dir is the directory where the write RunWeb""" self.remove_run_web = True self.me_dir = me_dir - + if force_run: + self.remove_run_web = False + return if crashifpresent or warnifpresent: if os.path.exists(pjoin(me_dir, 'RunWeb')): pid = open(pjoin(me_dir, 'RunWeb')).read() @@ -6574,7 +6576,7 @@ def reask(self, *args, **opt): fail_due_to_format = 0 #parameter to avoid infinite loop def postcmd(self, stop, line): - if line not in [None, '0', 'done', '']: + if line not in [None, '0', 'done', '',0]: ending_question = cmd.OneLinePathCompletion.postcmd(self,stop,line) else: ending_question = True @@ -7533,7 +7535,8 @@ def open_file(self, answer): else: raise if time.time() - start < .5: - self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y') + self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y', + timeout=False) self.reload_card(path) def reload_card(self, path): diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/extended_cmd.py index a6a8609dce..2f37070580 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/extended_cmd.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/extended_cmd.py @@ -1108,9 +1108,12 @@ def ask(self, question, default, choices=[], path_msg=None, if alias: choices += list(alias.keys()) + + question_instance = obj(question, allow_arg=choices, default=default, mother_interface=self, **opt) - + if fct_timeout is None: + fct_timeout = lambda x: question_instance.postcmd(x, default) if x and default else False if first_cmd: if isinstance(first_cmd, str): question_instance.onecmd(first_cmd) @@ -2271,6 +2274,9 @@ def postcmd(self, stop, line): if n: self.default(line) return self.postcmd(stop, line) + elif self.value is None and line: + self.default(line) + return self.postcmd(stop, line) if not self.casesensitive: for ans in self.allow_arg: if ans.lower() == self.value.lower(): diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py index 3b8ec31215..a88d60b282 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py @@ -154,10 +154,15 @@ def get_helicity(self, to_submit=True, clean=True): p = misc.Popen(['./gensym'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=Pdir) #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts + (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - + try: + nb_channel = max([math.floor(float(d)) for d in stdout.split()]) + except Exception as error: + misc.sprint(stdout, 'no channel or error for %s' % Pdir) + continue + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py index f4c9cb6334..c9d1c7706a 100644 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py @@ -1,6 +1,12 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: O. Mattelaer (Aug 2023) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, A. Valassi (2023) for the MG5aMC CUDACPP plugin. import logging - +import os +import subprocess +pjoin = os.path.join logger = logging.getLogger('cmdprint') # for stdout try: @@ -9,20 +15,23 @@ import internal.madevent_interface as madevent_interface import internal.misc as misc import internal.extended_cmd as extended_cmd + import internal.banner as banner_mod else: import madgraph.interface.madevent_interface as madevent_interface import madgraph.various.misc as misc import madgraph.interface.extended_cmd as extended_cmd + import madgraph.various.banner as banner_mod class CPPMEInterface(madevent_interface.MadEventCmdShell): - def compile(self, *args, **opts): """ """ import multiprocessing if not self.options['nb_core'] or self.options['nb_core'] == 'None': self.options['nb_core'] = multiprocessing.cpu_count() - if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + import pathlib + import os + pjoin = os.path.join cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend == 'FORTRAN': @@ -36,5 +45,50 @@ def compile(self, *args, **opts): return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) - + +class CPPRunCard(banner_mod.RunCardLO): + def reset_simd(self, old_value, new_value, name): + if not hasattr(self, 'path'): + logger.warning('WARNING! CPPRunCard instance has no attribute path') + return + ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') + if name == "vector_size" and new_value <= int(old_value): + # code can handle the new size -> do not recompile + return + Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') + subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + def plugin_input(self, finput): + return + + def default_setup(self): + super().default_setup() + self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + if incname == "vector.inc" and 'vector_size' not in self.user_set: + return + super().write_one_include_file(output_dir, incname, output_file) + + def check_validity(self): + """ensure that PLUGIN information are consistent""" + super().check_validity() + if self['SDE_strategy'] != 1: + logger.warning('SDE_strategy different of 1 is not supported with SMD/GPU mode') + self['sde_strategy'] = 1 + if self['hel_recycling']: + self['hel_recycling'] = False + +class GPURunCard(CPPRunCard): + def default_setup(self): + super(CPPRunCard, self).default_setup() + self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False) + +#class CUDACPPRunCard(CPPRunCard): +# def default_setup(self): +# super(CPPRunCard, self).default_setup() +# self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + MEINTERFACE = CPPMEInterface +RunCard = CPPRunCard diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py index 920e07a926..d722702891 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py @@ -3796,9 +3796,11 @@ def do_combine_events(self, line): if self.run_card['bias_module'].lower() not in ['dummy', 'none'] and nb_event: self.correct_bias() - + elif self.run_card['custom_fcts']: + self.correct_bias() + logger.info("combine events done in %s", time.time()-start) - + self.to_store.append('event') diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/madevent b/epochX/cudacpp/gg_ttgg.mad/bin/madevent index 10b6a71fa2..dff9711b73 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/madevent +++ b/epochX/cudacpp/gg_ttgg.mad/bin/madevent @@ -173,6 +173,10 @@ if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): launch_interface = launch_plugin.MEINTERFACE +#Source use this executable for compilation always allow it +force_run = False +if (args and args[0] == 'treatcards'): + force_run=True # Call the cmd interface main loop try: @@ -180,7 +184,7 @@ try: launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) - with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): + with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), force_run=force_run): if (args and os.path.isfile(args[0])): # They are an input file input_file = args[0] diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index de1c10fb29..06e55809ba 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005267620086669922  +DEBUG: model prefixing takes 0.00528264045715332  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.156 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.416 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.423 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.312 s +ALOHA: aloha creates 5 routines in 0.328 s VVV1 VVV1 FFV1 @@ -204,9 +204,10 @@ FileWriter for / FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m1.430s -user 0m1.363s -sys 0m0.053s +real 0m1.515s +user 0m1.393s +sys 0m0.045s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 7e0cdd133a..f1893e745c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00532841682434082  +DEBUG: model prefixing takes 0.005330801010131836  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.848 s +1 processes with 1240 diagrams generated in 1.864 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,9 +175,9 @@ INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1592 term in 36s. Introduce 2768 contraction +INFO: Color-Flow passed to 1630 term in 7s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -186,21 +186,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.468 s -Wrote files for 2281 helas calls in 46.375 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.598 s +Wrote files for 2281 helas calls in 18.100 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.315 s +ALOHA: aloha creates 5 routines in 0.313 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -235,8 +235,35 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: self.in_madevent_mode =  True [output.py at line 207]  +DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +patching file Source/genps.inc +patching file Source/makefile +patching file SubProcesses/makefile +patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 385 (offset 5 lines). +patching file bin/internal/madevent_interface.py +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +patching file auto_dsig1.f +Hunk #1 succeeded at 483 (offset 14 lines). +patching file driver.f +patching file matrix1.f +Hunk #2 succeeded at 255 (offset 112 lines). +Hunk #3 succeeded at 333 (offset 112 lines). +Hunk #4 succeeded at 361 (offset 112 lines). +Hunk #5 succeeded at 406 (offset 112 lines). +DEBUG: p.returncode =  0 [output.py at line 233]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. +Type "launch" to generate events from this process, or see +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README +Run "open index.html" to see more information about this process. +quit + +real 0m28.759s +user 0m28.272s +sys 0m0.392s ************************************************************ * * * W E L C O M E to * @@ -264,8 +291,7 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run -run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP +WARNING! CPPRunCard instance has no attribute path quit INFO: launch in debug mode @@ -299,30 +325,3 @@ treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common -patching file Source/genps.inc -patching file Source/makefile -patching file SubProcesses/makefile -patching file Source/make_opts -patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). -patching file bin/internal/gen_ximprove.py -patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 -patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). -patching file driver.f -patching file matrix1.f -Hunk #2 succeeded at 255 (offset 112 lines). -Hunk #3 succeeded at 333 (offset 112 lines). -Hunk #4 succeeded at 361 (offset 112 lines). -Hunk #5 succeeded at 406 (offset 112 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. -Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README -Run "open index.html" to see more information about this process. -quit - -real 0m57.540s -user 0m56.533s -sys 0m0.804s diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/make_opts b/epochX/cudacpp/gg_ttggg.mad/Source/make_opts index bd3c24228d..57f5f7bb96 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Source/make_opts +++ b/epochX/cudacpp/gg_ttggg.mad/Source/make_opts @@ -1,17 +1,12 @@ -pdlabel1= -pdlabel2= -lhapdf= -PYTHIA8_PATH=NotInstalled -MG5AMC_VERSION=3.5.0_lo_vect GLOBAL_FLAG=-O3 -ffast-math -fbounds-check -ALOHA_FLAG= -MATRIX_FLAG= DEFAULT_CPP_COMPILER=g++ MACFLAG= STDLIB=-lstdc++ STDLIB_FLAG= DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime +PYTHIA8_PATH=NotInstalled #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/makefile b/epochX/cudacpp/gg_ttggg.mad/Source/makefile index dbe08b846e..00c73099a0 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Source/makefile +++ b/epochX/cudacpp/gg_ttggg.mad/Source/makefile @@ -136,5 +136,7 @@ cleanSource: clean: cleanSource for i in `ls -d ../SubProcesses/P*`; do cd $$i; make clean; cd -; done; -cleanall: cleanSource +cleanavx: + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; +cleanall: cleanSource # THIS IS THE ONE for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f index fc156798a8..b8a6a894de 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f @@ -455,7 +455,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C LOCAL VARIABLES C INTEGER I,J,M,N - COMPLEX*16 ZTEMP, TMP_JAMP(2768) + COMPLEX*16 ZTEMP, TMP_JAMP(3030) REAL*8 CF(NCOLOR,NCOLOR) COMPLEX*16 AMP(NGRAPHS), JAMP(NCOLOR,NAMPSO) COMPLEX*16 W(6,NWAVEFUNCS) @@ -13558,5081 +13558,5242 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(:,:) = (0D0,0D0) C JAMPs contributing to orders ALL_ORDERS=1 - TMP_JAMP(210) = AMP(1370) - AMP(1382) ! used 16 times - TMP_JAMP(209) = AMP(1358) - AMP(1361) ! used 16 times - TMP_JAMP(208) = AMP(1329) + AMP(1811) ! used 16 times - TMP_JAMP(207) = AMP(1327) - AMP(1810) ! used 16 times - TMP_JAMP(206) = AMP(1322) + AMP(1388) ! used 16 times - TMP_JAMP(205) = AMP(1320) + AMP(1324) ! used 16 times - TMP_JAMP(204) = AMP(1310) - AMP(1313) ! used 16 times - TMP_JAMP(203) = AMP(1309) + AMP(1311) ! used 16 times - TMP_JAMP(202) = AMP(1103) + AMP(1359) ! used 16 times - TMP_JAMP(201) = AMP(1102) + AMP(1486) ! used 16 times - TMP_JAMP(200) = AMP(1098) - AMP(1817) ! used 16 times - TMP_JAMP(199) = AMP(1096) + AMP(1816) ! used 16 times - TMP_JAMP(198) = AMP(1089) - AMP(1487) ! used 16 times - TMP_JAMP(197) = AMP(1085) + AMP(1087) ! used 16 times - TMP_JAMP(196) = AMP(1084) - AMP(1100) ! used 16 times - TMP_JAMP(195) = AMP(1080) + AMP(1082) ! used 16 times - TMP_JAMP(194) = AMP(1079) - AMP(1099) ! used 16 times - TMP_JAMP(193) = AMP(1076) + AMP(1078) ! used 16 times - TMP_JAMP(192) = AMP(45) + AMP(1323) ! used 16 times - TMP_JAMP(191) = AMP(43) - AMP(1371) ! used 16 times - TMP_JAMP(190) = AMP(41) - AMP(1492) ! used 16 times - TMP_JAMP(189) = AMP(37) + AMP(40) ! used 16 times - TMP_JAMP(188) = AMP(29) - AMP(1326) ! used 16 times - TMP_JAMP(187) = AMP(25) + AMP(28) ! used 16 times - TMP_JAMP(186) = AMP(18) + AMP(1493) ! used 16 times - TMP_JAMP(185) = AMP(16) + AMP(26) ! used 16 times - TMP_JAMP(184) = AMP(5) - AMP(38) ! used 16 times - TMP_JAMP(183) = AMP(2) + AMP(4) ! used 16 times - TMP_JAMP(182) = AMP(1378) + AMP(1383) ! used 16 times - TMP_JAMP(181) = AMP(1367) + AMP(1369) ! used 16 times - TMP_JAMP(180) = AMP(1332) - AMP(1677) ! used 16 times - TMP_JAMP(179) = AMP(1330) - AMP(1389) ! used 16 times - TMP_JAMP(178) = AMP(1325) - AMP(1675) ! used 16 times - TMP_JAMP(177) = AMP(1319) + AMP(1321) ! used 16 times - TMP_JAMP(176) = AMP(1315) - AMP(1318) ! used 16 times - TMP_JAMP(175) = AMP(1314) + AMP(1316) ! used 16 times - TMP_JAMP(174) = AMP(579) - AMP(1380) ! used 16 times - TMP_JAMP(173) = AMP(577) - AMP(1489) ! used 16 times - TMP_JAMP(172) = AMP(573) + AMP(1683) ! used 16 times - TMP_JAMP(171) = AMP(571) + AMP(576) ! used 16 times - TMP_JAMP(170) = AMP(567) + AMP(1490) ! used 16 times - TMP_JAMP(169) = AMP(566) + AMP(1681) ! used 16 times - TMP_JAMP(168) = AMP(556) - AMP(559) ! used 16 times - TMP_JAMP(167) = AMP(555) + AMP(557) ! used 16 times - TMP_JAMP(166) = AMP(554) - AMP(574) ! used 16 times - TMP_JAMP(165) = AMP(551) + AMP(553) ! used 16 times - TMP_JAMP(164) = AMP(1377) + AMP(1865) ! used 16 times - TMP_JAMP(163) = AMP(1357) + AMP(1360) ! used 16 times - TMP_JAMP(162) = AMP(1176) - AMP(1814) ! used 16 times - TMP_JAMP(161) = AMP(1174) + AMP(1813) ! used 16 times - TMP_JAMP(160) = AMP(1173) - AMP(1871) ! used 16 times - TMP_JAMP(159) = AMP(1163) + AMP(1165) ! used 16 times - TMP_JAMP(158) = AMP(1159) + AMP(1161) ! used 16 times - TMP_JAMP(157) = AMP(1158) + AMP(1160) ! used 16 times - TMP_JAMP(156) = AMP(636) - AMP(1172) ! used 16 times - TMP_JAMP(155) = AMP(634) + AMP(1376) ! used 16 times - TMP_JAMP(154) = AMP(575) + AMP(578) ! used 16 times - TMP_JAMP(153) = AMP(565) - AMP(1166) ! used 16 times - TMP_JAMP(152) = AMP(521) + AMP(524) ! used 16 times - TMP_JAMP(151) = AMP(520) + AMP(523) ! used 16 times - TMP_JAMP(150) = AMP(1375) - AMP(1864) ! used 16 times - TMP_JAMP(149) = AMP(1368) + AMP(1372) ! used 16 times - TMP_JAMP(148) = AMP(1171) + AMP(1870) ! used 16 times - TMP_JAMP(147) = AMP(1170) - AMP(1542) ! used 16 times - TMP_JAMP(146) = AMP(1167) - AMP(1541) ! used 16 times - TMP_JAMP(145) = AMP(1164) + AMP(1168) ! used 16 times - TMP_JAMP(144) = AMP(1154) + AMP(1156) ! used 16 times - TMP_JAMP(143) = AMP(1153) + AMP(1155) ! used 16 times - TMP_JAMP(142) = AMP(42) - AMP(1374) ! used 16 times - TMP_JAMP(141) = AMP(24) + AMP(1548) ! used 16 times - TMP_JAMP(140) = AMP(22) + AMP(39) ! used 16 times - TMP_JAMP(139) = AMP(17) + AMP(1547) ! used 16 times - TMP_JAMP(138) = AMP(7) + AMP(9) ! used 16 times - TMP_JAMP(137) = AMP(6) + AMP(8) ! used 16 times - TMP_JAMP(136) = AMP(1379) - AMP(1785) ! used 16 times - TMP_JAMP(135) = AMP(1365) + AMP(1780) ! used 16 times - TMP_JAMP(134) = AMP(1362) + AMP(1364) ! used 16 times - TMP_JAMP(133) = AMP(1101) + AMP(1104) ! used 16 times - TMP_JAMP(132) = AMP(732) + AMP(1680) ! used 16 times - TMP_JAMP(131) = AMP(731) + AMP(1791) ! used 16 times - TMP_JAMP(130) = AMP(725) + AMP(1678) ! used 16 times - TMP_JAMP(129) = AMP(724) - AMP(1088) ! used 16 times - TMP_JAMP(128) = AMP(722) - AMP(1782) ! used 16 times - TMP_JAMP(127) = AMP(719) + AMP(721) ! used 16 times - TMP_JAMP(126) = AMP(715) - AMP(718) ! used 16 times - TMP_JAMP(125) = AMP(714) + AMP(716) ! used 16 times - TMP_JAMP(124) = AMP(681) + AMP(684) ! used 16 times - TMP_JAMP(123) = AMP(679) + AMP(682) ! used 16 times - TMP_JAMP(122) = AMP(1373) - AMP(1783) ! used 16 times - TMP_JAMP(121) = AMP(1363) - AMP(1366) ! used 16 times - TMP_JAMP(120) = AMP(729) + AMP(1545) ! used 16 times - TMP_JAMP(119) = AMP(728) + AMP(1789) ! used 16 times - TMP_JAMP(118) = AMP(726) + AMP(1544) ! used 16 times - TMP_JAMP(117) = AMP(720) - AMP(723) ! used 16 times - TMP_JAMP(116) = AMP(710) + AMP(712) ! used 16 times - TMP_JAMP(115) = AMP(709) + AMP(711) ! used 16 times - TMP_JAMP(114) = AMP(1346) + AMP(1385) ! used 16 times - TMP_JAMP(113) = AMP(1334) - AMP(1337) ! used 16 times - TMP_JAMP(112) = AMP(1181) + AMP(1336) ! used 16 times - TMP_JAMP(111) = AMP(1180) + AMP(1540) ! used 16 times - TMP_JAMP(110) = AMP(1162) - AMP(1178) ! used 16 times - TMP_JAMP(109) = AMP(1157) - AMP(1177) ! used 16 times - TMP_JAMP(108) = AMP(44) + AMP(1347) ! used 16 times - TMP_JAMP(107) = AMP(36) - AMP(1546) ! used 16 times - TMP_JAMP(106) = AMP(31) + AMP(34) ! used 16 times - TMP_JAMP(105) = AMP(10) - AMP(33) ! used 16 times - TMP_JAMP(104) = AMP(1354) - AMP(1386) ! used 16 times - TMP_JAMP(103) = AMP(1343) + AMP(1345) ! used 16 times - TMP_JAMP(102) = AMP(738) - AMP(1355) ! used 16 times - TMP_JAMP(101) = AMP(736) - AMP(1543) ! used 16 times - TMP_JAMP(100) = AMP(730) + AMP(735) ! used 16 times - TMP_JAMP(99) = AMP(713) - AMP(733) ! used 16 times - TMP_JAMP(98) = AMP(1353) + AMP(1838) ! used 16 times - TMP_JAMP(97) = AMP(1333) + AMP(1335) ! used 16 times - TMP_JAMP(96) = AMP(1095) - AMP(1844) ! used 16 times - TMP_JAMP(95) = AMP(1081) + AMP(1083) ! used 16 times - TMP_JAMP(94) = AMP(795) - AMP(1094) ! used 16 times - TMP_JAMP(93) = AMP(793) + AMP(1352) ! used 16 times - TMP_JAMP(92) = AMP(734) + AMP(737) ! used 16 times - TMP_JAMP(91) = AMP(680) + AMP(683) ! used 16 times - TMP_JAMP(90) = AMP(1351) - AMP(1837) ! used 16 times - TMP_JAMP(89) = AMP(1344) + AMP(1348) ! used 16 times - TMP_JAMP(88) = AMP(1093) + AMP(1843) ! used 16 times - TMP_JAMP(87) = AMP(1092) - AMP(1488) ! used 16 times - TMP_JAMP(86) = AMP(1086) + AMP(1090) ! used 16 times - TMP_JAMP(85) = AMP(1075) + AMP(1077) ! used 16 times - TMP_JAMP(84) = AMP(35) - AMP(1350) ! used 16 times - TMP_JAMP(83) = AMP(21) + AMP(1494) ! used 16 times - TMP_JAMP(82) = AMP(19) + AMP(32) ! used 16 times - TMP_JAMP(81) = AMP(1) + AMP(3) ! used 16 times - TMP_JAMP(80) = AMP(1356) - AMP(1731) ! used 16 times - TMP_JAMP(79) = AMP(1341) + AMP(1726) ! used 16 times - TMP_JAMP(78) = AMP(1338) + AMP(1340) ! used 16 times - TMP_JAMP(77) = AMP(1179) + AMP(1182) ! used 16 times - TMP_JAMP(76) = AMP(572) + AMP(1737) ! used 16 times - TMP_JAMP(75) = AMP(563) - AMP(1728) ! used 16 times - TMP_JAMP(74) = AMP(560) + AMP(562) ! used 16 times - TMP_JAMP(73) = AMP(522) + AMP(525) ! used 16 times - TMP_JAMP(72) = AMP(1349) - AMP(1729) ! used 16 times - TMP_JAMP(71) = AMP(1339) - AMP(1342) ! used 16 times - TMP_JAMP(70) = AMP(570) + AMP(1491) ! used 16 times - TMP_JAMP(69) = AMP(569) + AMP(1735) ! used 16 times - TMP_JAMP(68) = AMP(561) - AMP(564) ! used 16 times - TMP_JAMP(67) = AMP(550) + AMP(552) ! used 16 times - TMP_JAMP(66) = AMP(1317) + AMP(1672) ! used 16 times - TMP_JAMP(65) = AMP(1259) + AMP(1312) ! used 16 times - TMP_JAMP(64) = AMP(1257) + AMP(1260) ! used 16 times - TMP_JAMP(63) = AMP(1251) - AMP(1868) ! used 16 times - TMP_JAMP(62) = AMP(1240) - AMP(1256) ! used 16 times - TMP_JAMP(61) = AMP(1237) + AMP(1239) ! used 16 times - TMP_JAMP(60) = AMP(635) - AMP(1250) ! used 16 times - TMP_JAMP(59) = AMP(558) - AMP(1674) ! used 16 times - TMP_JAMP(58) = AMP(540) + AMP(543) ! used 16 times - TMP_JAMP(57) = AMP(539) + AMP(542) ! used 16 times - TMP_JAMP(56) = AMP(1249) + AMP(1867) ! used 16 times - TMP_JAMP(55) = AMP(1242) + AMP(1246) ! used 16 times - TMP_JAMP(54) = AMP(727) - AMP(1247) ! used 16 times - TMP_JAMP(53) = AMP(717) - AMP(1673) ! used 16 times - TMP_JAMP(52) = AMP(699) + AMP(702) ! used 16 times - TMP_JAMP(51) = AMP(697) + AMP(700) ! used 16 times - TMP_JAMP(50) = AMP(1254) - AMP(1841) ! used 16 times - TMP_JAMP(49) = AMP(1236) + AMP(1238) ! used 16 times - TMP_JAMP(48) = AMP(794) - AMP(1253) ! used 16 times - TMP_JAMP(47) = AMP(698) + AMP(701) ! used 16 times - TMP_JAMP(46) = AMP(1252) + AMP(1840) ! used 16 times - TMP_JAMP(45) = AMP(1241) + AMP(1243) ! used 16 times - TMP_JAMP(44) = AMP(568) - AMP(1244) ! used 16 times - TMP_JAMP(43) = AMP(538) + AMP(541) ! used 16 times - TMP_JAMP(42) = AMP(1258) + AMP(1594) ! used 16 times - TMP_JAMP(41) = AMP(1248) - AMP(1596) ! used 16 times - TMP_JAMP(40) = AMP(1235) - AMP(1255) ! used 16 times - TMP_JAMP(39) = AMP(1231) + AMP(1233) ! used 16 times - TMP_JAMP(38) = AMP(30) - AMP(1600) ! used 16 times - TMP_JAMP(37) = AMP(23) + AMP(1602) ! used 16 times - TMP_JAMP(36) = AMP(15) - AMP(27) ! used 16 times - TMP_JAMP(35) = AMP(11) + AMP(13) ! used 16 times - TMP_JAMP(34) = AMP(1245) - AMP(1595) ! used 16 times - TMP_JAMP(33) = AMP(1232) + AMP(1234) ! used 16 times - TMP_JAMP(32) = AMP(20) + AMP(1601) ! used 16 times - TMP_JAMP(31) = AMP(12) + AMP(14) ! used 16 times - TMP_JAMP(30) = AMP(954) - AMP(1097) ! used 16 times - TMP_JAMP(29) = AMP(952) + AMP(1328) ! used 16 times - TMP_JAMP(28) = AMP(897) - AMP(1331) ! used 16 times - TMP_JAMP(27) = AMP(893) + AMP(896) ! used 16 times - TMP_JAMP(26) = AMP(890) + AMP(1788) ! used 16 times - TMP_JAMP(25) = AMP(889) + AMP(894) ! used 16 times - TMP_JAMP(24) = AMP(881) - AMP(1781) ! used 16 times - TMP_JAMP(23) = AMP(878) + AMP(880) ! used 16 times - TMP_JAMP(22) = AMP(840) + AMP(843) ! used 16 times - TMP_JAMP(21) = AMP(839) + AMP(842) ! used 16 times - TMP_JAMP(20) = AMP(953) - AMP(1175) ! used 16 times - TMP_JAMP(19) = AMP(887) + AMP(1786) ! used 16 times - TMP_JAMP(18) = AMP(886) - AMP(1169) ! used 16 times - TMP_JAMP(17) = AMP(879) - AMP(882) ! used 16 times - TMP_JAMP(16) = AMP(857) + AMP(860) ! used 16 times - TMP_JAMP(15) = AMP(856) + AMP(859) ! used 16 times - TMP_JAMP(14) = AMP(891) + AMP(1734) ! used 16 times - TMP_JAMP(13) = AMP(876) - AMP(1727) ! used 16 times - TMP_JAMP(12) = AMP(873) + AMP(875) ! used 16 times - TMP_JAMP(11) = AMP(858) + AMP(861) ! used 16 times - TMP_JAMP(10) = AMP(884) + AMP(1732) ! used 16 times - TMP_JAMP(9) = AMP(883) - AMP(1091) ! used 16 times - TMP_JAMP(8) = AMP(874) - AMP(877) ! used 16 times - TMP_JAMP(7) = AMP(838) + AMP(841) ! used 16 times - TMP_JAMP(6) = AMP(895) - AMP(1597) ! used 16 times - TMP_JAMP(5) = AMP(888) + AMP(1599) ! used 16 times - TMP_JAMP(4) = AMP(872) - AMP(892) ! used 16 times - TMP_JAMP(3) = AMP(868) + AMP(870) ! used 16 times - TMP_JAMP(2) = AMP(885) + AMP(1598) ! used 16 times - TMP_JAMP(1) = AMP(869) + AMP(871) ! used 16 times - TMP_JAMP(315) = TMP_JAMP(186) + TMP_JAMP(183) ! used 16 times - TMP_JAMP(314) = TMP_JAMP(188) + TMP_JAMP(185) ! used 16 times - TMP_JAMP(313) = TMP_JAMP(190) - TMP_JAMP(184) ! used 16 times - TMP_JAMP(312) = TMP_JAMP(191) + TMP_JAMP(189) ! used 16 times - TMP_JAMP(311) = TMP_JAMP(192) - TMP_JAMP(187) ! used 16 times - TMP_JAMP(310) = TMP_JAMP(198) + TMP_JAMP(193) ! used 16 times - TMP_JAMP(309) = TMP_JAMP(199) - TMP_JAMP(197) ! used 16 times - TMP_JAMP(308) = TMP_JAMP(200) + TMP_JAMP(195) ! used 16 times - TMP_JAMP(307) = TMP_JAMP(201) - TMP_JAMP(194) ! used 16 times - TMP_JAMP(306) = TMP_JAMP(202) - TMP_JAMP(196) ! used 16 times - TMP_JAMP(305) = TMP_JAMP(206) - TMP_JAMP(204) ! used 16 times - TMP_JAMP(304) = TMP_JAMP(207) - TMP_JAMP(205) ! used 16 times - TMP_JAMP(303) = TMP_JAMP(208) + TMP_JAMP(203) ! used 16 times - TMP_JAMP(302) = TMP_JAMP(210) - TMP_JAMP(209) ! used 16 times - TMP_JAMP(301) = TMP_JAMP(169) - TMP_JAMP(168) ! used 16 times - TMP_JAMP(300) = TMP_JAMP(170) + TMP_JAMP(165) ! used 16 times - TMP_JAMP(299) = TMP_JAMP(172) + TMP_JAMP(167) ! used 16 times - TMP_JAMP(298) = TMP_JAMP(173) - TMP_JAMP(166) ! used 16 times - TMP_JAMP(297) = TMP_JAMP(174) + TMP_JAMP(171) ! used 16 times - TMP_JAMP(296) = TMP_JAMP(178) - TMP_JAMP(176) ! used 16 times - TMP_JAMP(295) = TMP_JAMP(179) - TMP_JAMP(177) ! used 16 times - TMP_JAMP(294) = TMP_JAMP(180) + TMP_JAMP(175) ! used 16 times - TMP_JAMP(293) = TMP_JAMP(182) - TMP_JAMP(181) ! used 16 times - TMP_JAMP(292) = TMP_JAMP(153) + TMP_JAMP(151) ! used 16 times - TMP_JAMP(291) = TMP_JAMP(155) - TMP_JAMP(154) ! used 16 times - TMP_JAMP(290) = TMP_JAMP(156) + TMP_JAMP(152) ! used 16 times - TMP_JAMP(289) = TMP_JAMP(160) + TMP_JAMP(158) ! used 16 times - TMP_JAMP(288) = TMP_JAMP(161) - TMP_JAMP(159) ! used 16 times - TMP_JAMP(287) = TMP_JAMP(162) + TMP_JAMP(157) ! used 16 times - TMP_JAMP(286) = TMP_JAMP(164) + TMP_JAMP(163) ! used 16 times - TMP_JAMP(285) = TMP_JAMP(139) + TMP_JAMP(138) ! used 16 times - TMP_JAMP(284) = TMP_JAMP(141) + TMP_JAMP(137) ! used 16 times - TMP_JAMP(283) = TMP_JAMP(142) + TMP_JAMP(140) ! used 16 times - TMP_JAMP(282) = TMP_JAMP(146) + TMP_JAMP(144) ! used 16 times - TMP_JAMP(281) = TMP_JAMP(147) + TMP_JAMP(143) ! used 16 times - TMP_JAMP(280) = TMP_JAMP(148) - TMP_JAMP(145) ! used 16 times - TMP_JAMP(279) = TMP_JAMP(150) - TMP_JAMP(149) ! used 16 times - TMP_JAMP(278) = TMP_JAMP(128) + TMP_JAMP(124) ! used 16 times - TMP_JAMP(277) = TMP_JAMP(129) + TMP_JAMP(123) ! used 16 times - TMP_JAMP(276) = TMP_JAMP(130) - TMP_JAMP(126) ! used 16 times - TMP_JAMP(275) = TMP_JAMP(131) + TMP_JAMP(127) ! used 16 times - TMP_JAMP(274) = TMP_JAMP(132) + TMP_JAMP(125) ! used 16 times - TMP_JAMP(273) = TMP_JAMP(135) + TMP_JAMP(133) ! used 16 times - TMP_JAMP(272) = TMP_JAMP(136) + TMP_JAMP(134) ! used 16 times - TMP_JAMP(271) = TMP_JAMP(118) + TMP_JAMP(116) ! used 16 times - TMP_JAMP(270) = TMP_JAMP(119) - TMP_JAMP(117) ! used 16 times - TMP_JAMP(269) = TMP_JAMP(120) + TMP_JAMP(115) ! used 16 times - TMP_JAMP(268) = TMP_JAMP(122) - TMP_JAMP(121) ! used 16 times - TMP_JAMP(267) = TMP_JAMP(107) - TMP_JAMP(105) ! used 16 times - TMP_JAMP(266) = TMP_JAMP(108) - TMP_JAMP(106) ! used 16 times - TMP_JAMP(265) = TMP_JAMP(111) - TMP_JAMP(109) ! used 16 times - TMP_JAMP(264) = TMP_JAMP(112) - TMP_JAMP(110) ! used 16 times - TMP_JAMP(263) = TMP_JAMP(114) - TMP_JAMP(113) ! used 16 times - TMP_JAMP(262) = TMP_JAMP(101) - TMP_JAMP(99) ! used 16 times - TMP_JAMP(261) = TMP_JAMP(102) + TMP_JAMP(100) ! used 16 times - TMP_JAMP(260) = TMP_JAMP(104) - TMP_JAMP(103) ! used 16 times - TMP_JAMP(259) = TMP_JAMP(93) - TMP_JAMP(92) ! used 16 times - TMP_JAMP(258) = TMP_JAMP(94) + TMP_JAMP(91) ! used 16 times - TMP_JAMP(257) = TMP_JAMP(96) + TMP_JAMP(95) ! used 16 times - TMP_JAMP(256) = TMP_JAMP(98) + TMP_JAMP(97) ! used 16 times - TMP_JAMP(255) = TMP_JAMP(83) + TMP_JAMP(81) ! used 16 times - TMP_JAMP(254) = TMP_JAMP(84) + TMP_JAMP(82) ! used 16 times - TMP_JAMP(253) = TMP_JAMP(87) + TMP_JAMP(85) ! used 16 times - TMP_JAMP(252) = TMP_JAMP(88) - TMP_JAMP(86) ! used 16 times - TMP_JAMP(251) = TMP_JAMP(90) - TMP_JAMP(89) ! used 16 times - TMP_JAMP(250) = TMP_JAMP(75) + TMP_JAMP(73) ! used 16 times - TMP_JAMP(249) = TMP_JAMP(76) + TMP_JAMP(74) ! used 16 times - TMP_JAMP(248) = TMP_JAMP(79) + TMP_JAMP(77) ! used 16 times - TMP_JAMP(247) = TMP_JAMP(80) + TMP_JAMP(78) ! used 16 times - TMP_JAMP(246) = TMP_JAMP(69) - TMP_JAMP(68) ! used 16 times - TMP_JAMP(245) = TMP_JAMP(70) + TMP_JAMP(67) ! used 16 times - TMP_JAMP(244) = TMP_JAMP(72) - TMP_JAMP(71) ! used 16 times - TMP_JAMP(243) = TMP_JAMP(59) + TMP_JAMP(58) ! used 16 times - TMP_JAMP(242) = TMP_JAMP(60) + TMP_JAMP(57) ! used 16 times - TMP_JAMP(241) = TMP_JAMP(63) + TMP_JAMP(61) ! used 16 times - TMP_JAMP(240) = TMP_JAMP(65) - TMP_JAMP(62) ! used 16 times - TMP_JAMP(239) = TMP_JAMP(66) + TMP_JAMP(64) ! used 16 times - TMP_JAMP(238) = TMP_JAMP(53) + TMP_JAMP(52) ! used 16 times - TMP_JAMP(237) = TMP_JAMP(54) + TMP_JAMP(51) ! used 16 times - TMP_JAMP(236) = TMP_JAMP(56) - TMP_JAMP(55) ! used 16 times - TMP_JAMP(235) = TMP_JAMP(48) + TMP_JAMP(47) ! used 16 times - TMP_JAMP(234) = TMP_JAMP(50) + TMP_JAMP(49) ! used 16 times - TMP_JAMP(233) = TMP_JAMP(44) + TMP_JAMP(43) ! used 16 times - TMP_JAMP(232) = TMP_JAMP(46) - TMP_JAMP(45) ! used 16 times - TMP_JAMP(231) = TMP_JAMP(37) + TMP_JAMP(35) ! used 16 times - TMP_JAMP(230) = TMP_JAMP(38) - TMP_JAMP(36) ! used 16 times - TMP_JAMP(229) = TMP_JAMP(41) + TMP_JAMP(39) ! used 16 times - TMP_JAMP(228) = TMP_JAMP(42) - TMP_JAMP(40) ! used 16 times - TMP_JAMP(227) = TMP_JAMP(32) + TMP_JAMP(31) ! used 16 times - TMP_JAMP(226) = TMP_JAMP(34) + TMP_JAMP(33) ! used 16 times - TMP_JAMP(225) = TMP_JAMP(24) + TMP_JAMP(22) ! used 16 times - TMP_JAMP(224) = TMP_JAMP(26) + TMP_JAMP(23) ! used 16 times - TMP_JAMP(223) = TMP_JAMP(28) + TMP_JAMP(25) ! used 16 times - TMP_JAMP(222) = TMP_JAMP(29) - TMP_JAMP(27) ! used 16 times - TMP_JAMP(221) = TMP_JAMP(30) + TMP_JAMP(21) ! used 16 times - TMP_JAMP(220) = TMP_JAMP(18) + TMP_JAMP(15) ! used 16 times - TMP_JAMP(219) = TMP_JAMP(19) - TMP_JAMP(17) ! used 16 times - TMP_JAMP(218) = TMP_JAMP(20) + TMP_JAMP(16) ! used 16 times - TMP_JAMP(217) = TMP_JAMP(13) + TMP_JAMP(11) ! used 16 times - TMP_JAMP(216) = TMP_JAMP(14) + TMP_JAMP(12) ! used 16 times - TMP_JAMP(215) = TMP_JAMP(9) + TMP_JAMP(7) ! used 16 times - TMP_JAMP(214) = TMP_JAMP(10) - TMP_JAMP(8) ! used 16 times - TMP_JAMP(213) = TMP_JAMP(5) + TMP_JAMP(3) ! used 16 times - TMP_JAMP(212) = TMP_JAMP(6) - TMP_JAMP(4) ! used 16 times - TMP_JAMP(211) = TMP_JAMP(2) + TMP_JAMP(1) ! used 16 times - TMP_JAMP(405) = TMP_JAMP(302) - AMP(1390) ! used 16 times - TMP_JAMP(404) = TMP_JAMP(303) + AMP(1822) ! used 16 times - TMP_JAMP(403) = TMP_JAMP(304) - AMP(1819) ! used 16 times - TMP_JAMP(402) = TMP_JAMP(305) + AMP(1392) ! used 16 times - TMP_JAMP(401) = TMP_JAMP(307) + AMP(1501) ! used 16 times - TMP_JAMP(400) = TMP_JAMP(308) - AMP(1824) ! used 16 times - TMP_JAMP(399) = TMP_JAMP(309) + AMP(1821) ! used 16 times - TMP_JAMP(398) = TMP_JAMP(310) - AMP(1495) ! used 16 times - TMP_JAMP(397) = TMP_JAMP(311) + AMP(1387) ! used 16 times - TMP_JAMP(396) = TMP_JAMP(312) + AMP(1381) ! used 16 times - TMP_JAMP(395) = TMP_JAMP(313) - AMP(1503) ! used 16 times - TMP_JAMP(394) = TMP_JAMP(315) + AMP(1497) ! used 16 times - TMP_JAMP(393) = TMP_JAMP(293) + AMP(1393) ! used 16 times - TMP_JAMP(392) = TMP_JAMP(294) - AMP(1687) ! used 16 times - TMP_JAMP(391) = TMP_JAMP(295) - AMP(1395) ! used 16 times - TMP_JAMP(390) = TMP_JAMP(296) - AMP(1684) ! used 16 times - TMP_JAMP(389) = TMP_JAMP(298) - AMP(1502) ! used 16 times - TMP_JAMP(388) = TMP_JAMP(299) + AMP(1689) ! used 16 times - TMP_JAMP(387) = TMP_JAMP(300) + AMP(1496) ! used 16 times - TMP_JAMP(386) = TMP_JAMP(301) + AMP(1686) ! used 16 times - TMP_JAMP(385) = TMP_JAMP(286) + AMP(1876) ! used 16 times - TMP_JAMP(384) = TMP_JAMP(287) - AMP(1823) ! used 16 times - TMP_JAMP(383) = TMP_JAMP(288) + AMP(1820) ! used 16 times - TMP_JAMP(382) = TMP_JAMP(289) - AMP(1878) ! used 16 times - TMP_JAMP(381) = TMP_JAMP(290) + AMP(1872) ! used 16 times - TMP_JAMP(380) = TMP_JAMP(291) + AMP(1866) ! used 16 times - TMP_JAMP(379) = TMP_JAMP(279) - AMP(1873) ! used 16 times - TMP_JAMP(378) = TMP_JAMP(280) + AMP(1875) ! used 16 times - TMP_JAMP(377) = TMP_JAMP(281) - AMP(1552) ! used 16 times - TMP_JAMP(376) = TMP_JAMP(282) - AMP(1549) ! used 16 times - TMP_JAMP(375) = TMP_JAMP(284) + AMP(1554) ! used 16 times - TMP_JAMP(374) = TMP_JAMP(285) + AMP(1551) ! used 16 times - TMP_JAMP(373) = TMP_JAMP(272) - AMP(1795) ! used 16 times - TMP_JAMP(372) = TMP_JAMP(273) - AMP(1784) ! used 16 times - TMP_JAMP(371) = TMP_JAMP(274) + AMP(1688) ! used 16 times - TMP_JAMP(370) = TMP_JAMP(275) + AMP(1797) ! used 16 times - TMP_JAMP(369) = TMP_JAMP(276) + AMP(1685) ! used 16 times - TMP_JAMP(368) = TMP_JAMP(278) + AMP(1790) ! used 16 times - TMP_JAMP(367) = TMP_JAMP(268) - AMP(1792) ! used 16 times - TMP_JAMP(366) = TMP_JAMP(269) + AMP(1553) ! used 16 times - TMP_JAMP(365) = TMP_JAMP(270) + AMP(1794) ! used 16 times - TMP_JAMP(364) = TMP_JAMP(271) + AMP(1550) ! used 16 times - TMP_JAMP(363) = TMP_JAMP(263) + AMP(1391) ! used 16 times - TMP_JAMP(362) = TMP_JAMP(265) + AMP(1555) ! used 16 times - TMP_JAMP(361) = TMP_JAMP(266) + AMP(1384) ! used 16 times - TMP_JAMP(360) = TMP_JAMP(267) - AMP(1557) ! used 16 times - TMP_JAMP(359) = TMP_JAMP(260) - AMP(1394) ! used 16 times - TMP_JAMP(358) = TMP_JAMP(262) - AMP(1556) ! used 16 times - TMP_JAMP(357) = TMP_JAMP(256) + AMP(1849) ! used 16 times - TMP_JAMP(356) = TMP_JAMP(257) - AMP(1851) ! used 16 times - TMP_JAMP(355) = TMP_JAMP(258) + AMP(1845) ! used 16 times - TMP_JAMP(354) = TMP_JAMP(259) + AMP(1839) ! used 16 times - TMP_JAMP(353) = TMP_JAMP(251) - AMP(1846) ! used 16 times - TMP_JAMP(352) = TMP_JAMP(252) + AMP(1848) ! used 16 times - TMP_JAMP(351) = TMP_JAMP(253) - AMP(1498) ! used 16 times - TMP_JAMP(350) = TMP_JAMP(255) + AMP(1500) ! used 16 times - TMP_JAMP(349) = TMP_JAMP(247) - AMP(1741) ! used 16 times - TMP_JAMP(348) = TMP_JAMP(248) - AMP(1730) ! used 16 times - TMP_JAMP(347) = TMP_JAMP(249) + AMP(1743) ! used 16 times - TMP_JAMP(346) = TMP_JAMP(250) + AMP(1736) ! used 16 times - TMP_JAMP(345) = TMP_JAMP(244) - AMP(1738) ! used 16 times - TMP_JAMP(344) = TMP_JAMP(245) + AMP(1499) ! used 16 times - TMP_JAMP(343) = TMP_JAMP(246) + AMP(1740) ! used 16 times - TMP_JAMP(342) = TMP_JAMP(239) - AMP(1676) ! used 16 times - TMP_JAMP(341) = TMP_JAMP(241) - AMP(1877) ! used 16 times - TMP_JAMP(340) = TMP_JAMP(242) + AMP(1869) ! used 16 times - TMP_JAMP(339) = TMP_JAMP(243) + AMP(1682) ! used 16 times - TMP_JAMP(338) = TMP_JAMP(236) + AMP(1874) ! used 16 times - TMP_JAMP(337) = TMP_JAMP(238) + AMP(1679) ! used 16 times - TMP_JAMP(336) = TMP_JAMP(234) - AMP(1850) ! used 16 times - TMP_JAMP(335) = TMP_JAMP(235) + AMP(1842) ! used 16 times - TMP_JAMP(334) = TMP_JAMP(232) + AMP(1847) ! used 16 times - TMP_JAMP(333) = TMP_JAMP(228) + AMP(1609) ! used 16 times - TMP_JAMP(332) = TMP_JAMP(229) - AMP(1606) ! used 16 times - TMP_JAMP(331) = TMP_JAMP(230) - AMP(1611) ! used 16 times - TMP_JAMP(330) = TMP_JAMP(231) + AMP(1608) ! used 16 times - TMP_JAMP(329) = TMP_JAMP(226) - AMP(1603) ! used 16 times - TMP_JAMP(328) = TMP_JAMP(227) + AMP(1605) ! used 16 times - TMP_JAMP(327) = TMP_JAMP(221) + AMP(1818) ! used 16 times - TMP_JAMP(326) = TMP_JAMP(222) + AMP(1812) ! used 16 times - TMP_JAMP(325) = TMP_JAMP(224) + AMP(1796) ! used 16 times - TMP_JAMP(324) = TMP_JAMP(225) + AMP(1787) ! used 16 times - TMP_JAMP(323) = TMP_JAMP(218) + AMP(1815) ! used 16 times - TMP_JAMP(322) = TMP_JAMP(219) + AMP(1793) ! used 16 times - TMP_JAMP(321) = TMP_JAMP(216) + AMP(1742) ! used 16 times - TMP_JAMP(320) = TMP_JAMP(217) + AMP(1733) ! used 16 times - TMP_JAMP(319) = TMP_JAMP(214) + AMP(1739) ! used 16 times - TMP_JAMP(318) = TMP_JAMP(212) - AMP(1610) ! used 16 times - TMP_JAMP(317) = TMP_JAMP(213) + AMP(1607) ! used 16 times - TMP_JAMP(316) = TMP_JAMP(211) + AMP(1604) ! used 16 times - TMP_JAMP(1030) = AMP(1455) + AMP(1456) ! used 8 times - TMP_JAMP(1029) = AMP(1147) + AMP(1537) ! used 8 times - TMP_JAMP(1028) = AMP(1125) - AMP(1516) ! used 8 times - TMP_JAMP(1027) = AMP(1122) + AMP(1123) ! used 8 times - TMP_JAMP(1026) = AMP(1117) + AMP(1125) ! used 8 times - TMP_JAMP(1025) = AMP(439) - AMP(442) ! used 8 times - TMP_JAMP(1024) = AMP(421) - AMP(424) ! used 8 times - TMP_JAMP(1023) = AMP(420) + AMP(422) ! used 8 times - TMP_JAMP(1022) = AMP(353) + AMP(440) ! used 8 times - TMP_JAMP(1021) = AMP(341) - AMP(353) ! used 8 times - TMP_JAMP(1020) = AMP(339) - AMP(447) ! used 8 times - TMP_JAMP(1019) = AMP(337) + AMP(339) ! used 8 times - TMP_JAMP(1018) = AMP(152) - AMP(1539) ! used 8 times - TMP_JAMP(1017) = AMP(151) + AMP(157) ! used 8 times - TMP_JAMP(1016) = AMP(139) - AMP(159) ! used 8 times - TMP_JAMP(1015) = AMP(99) - AMP(1458) ! used 8 times - TMP_JAMP(1014) = AMP(90) + AMP(1518) ! used 8 times - TMP_JAMP(1013) = AMP(88) + AMP(99) ! used 8 times - TMP_JAMP(1012) = AMP(84) + AMP(90) ! used 8 times - TMP_JAMP(1011) = TMP_JAMP(306) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(341) ! used 8 times - TMP_JAMP(1010) = TMP_JAMP(314) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(88) ! used 8 times - TMP_JAMP(1009) = TMP_JAMP(394) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(84) ! used 8 times - TMP_JAMP(1008) = TMP_JAMP(395) - TMP_JAMP(394) ! used 8 times - TMP_JAMP(1007) = TMP_JAMP(397) - TMP_JAMP(396) ! used 8 times - TMP_JAMP(1006) = TMP_JAMP(398) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1117) ! used 8 times - TMP_JAMP(1005) = TMP_JAMP(400) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(337) ! used 8 times - TMP_JAMP(1004) = TMP_JAMP(400) - TMP_JAMP(399) ! used 8 times - TMP_JAMP(1003) = TMP_JAMP(401) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1147) ! used 8 times - TMP_JAMP(1002) = TMP_JAMP(401) - TMP_JAMP(398) ! used 8 times - TMP_JAMP(1001) = TMP_JAMP(404) - TMP_JAMP(403) ! used 8 times - TMP_JAMP(1000) = TMP_JAMP(405) + TMP_JAMP(402) ! used 8 times - TMP_JAMP(999) = AMP(1457) - AMP(1690) ! used 8 times - TMP_JAMP(998) = AMP(1453) - AMP(1457) ! used 8 times - TMP_JAMP(997) = AMP(1064) - AMP(1066) ! used 8 times - TMP_JAMP(996) = AMP(1046) - AMP(1048) ! used 8 times - TMP_JAMP(995) = AMP(1044) + AMP(1050) ! used 8 times - TMP_JAMP(994) = AMP(622) - AMP(1538) ! used 8 times - TMP_JAMP(993) = AMP(597) - AMP(599) ! used 8 times - TMP_JAMP(992) = AMP(592) + AMP(600) ! used 8 times - TMP_JAMP(991) = AMP(513) - AMP(1068) ! used 8 times - TMP_JAMP(990) = AMP(504) + AMP(1662) ! used 8 times - TMP_JAMP(989) = AMP(502) + AMP(513) ! used 8 times - TMP_JAMP(988) = AMP(498) + AMP(504) ! used 8 times - TMP_JAMP(987) = TMP_JAMP(297) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(502) ! used 8 times - TMP_JAMP(986) = TMP_JAMP(387) + TMP_JAMP(386) ! used 8 times - TMP_JAMP(985) = TMP_JAMP(388) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(498) ! used 8 times - TMP_JAMP(984) = TMP_JAMP(388) - TMP_JAMP(386) ! used 8 times - TMP_JAMP(983) = TMP_JAMP(389) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(622) ! used 8 times - TMP_JAMP(982) = TMP_JAMP(389) - TMP_JAMP(387) ! used 8 times - TMP_JAMP(981) = TMP_JAMP(390) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1453) ! used 8 times - TMP_JAMP(980) = TMP_JAMP(392) - TMP_JAMP(390) ! used 8 times - TMP_JAMP(979) = TMP_JAMP(393) + TMP_JAMP(391) ! used 8 times - TMP_JAMP(978) = TMP_JAMP(394) + TMP_JAMP(387) ! used 8 times - TMP_JAMP(977) = TMP_JAMP(395) + TMP_JAMP(389) ! used 8 times - TMP_JAMP(976) = TMP_JAMP(396) - TMP_JAMP(393) ! used 8 times - TMP_JAMP(975) = TMP_JAMP(397) + TMP_JAMP(391) ! used 8 times - TMP_JAMP(974) = AMP(1201) - AMP(1826) ! used 8 times - TMP_JAMP(973) = AMP(1200) + AMP(1201) ! used 8 times - TMP_JAMP(972) = AMP(626) + AMP(631) ! used 8 times - TMP_JAMP(971) = AMP(598) - AMP(1202) ! used 8 times - TMP_JAMP(970) = AMP(526) + AMP(598) ! used 8 times - TMP_JAMP(969) = AMP(517) - AMP(633) ! used 8 times - TMP_JAMP(968) = AMP(441) + AMP(463) ! used 8 times - TMP_JAMP(967) = AMP(438) + AMP(441) ! used 8 times - TMP_JAMP(966) = AMP(356) + AMP(358) ! used 8 times - TMP_JAMP(965) = AMP(355) + AMP(357) ! used 8 times - TMP_JAMP(964) = TMP_JAMP(292) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(526) ! used 8 times - TMP_JAMP(963) = TMP_JAMP(380) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(626) ! used 8 times - TMP_JAMP(962) = TMP_JAMP(381) - TMP_JAMP(380) ! used 8 times - TMP_JAMP(961) = TMP_JAMP(383) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1200) ! used 8 times - TMP_JAMP(960) = TMP_JAMP(384) - TMP_JAMP(383) ! used 8 times - TMP_JAMP(959) = TMP_JAMP(385) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(438) ! used 8 times - TMP_JAMP(958) = TMP_JAMP(385) + TMP_JAMP(380) ! used 8 times - TMP_JAMP(957) = TMP_JAMP(385) + TMP_JAMP(382) ! used 8 times - TMP_JAMP(956) = TMP_JAMP(387) - TMP_JAMP(292) ! used 8 times - TMP_JAMP(955) = TMP_JAMP(398) - TMP_JAMP(387) ! used 8 times - TMP_JAMP(954) = TMP_JAMP(399) + TMP_JAMP(383) ! used 8 times - TMP_JAMP(953) = TMP_JAMP(400) + TMP_JAMP(384) ! used 8 times - TMP_JAMP(952) = TMP_JAMP(401) - TMP_JAMP(389) ! used 8 times - TMP_JAMP(951) = AMP(1464) + AMP(1465) ! used 8 times - TMP_JAMP(950) = AMP(1212) + AMP(1213) ! used 8 times - TMP_JAMP(949) = AMP(1207) + AMP(1215) ! used 8 times - TMP_JAMP(948) = AMP(1203) - AMP(1570) ! used 8 times - TMP_JAMP(947) = AMP(1195) + AMP(1203) ! used 8 times - TMP_JAMP(946) = AMP(111) - AMP(1467) ! used 8 times - TMP_JAMP(945) = AMP(108) + AMP(1581) ! used 8 times - TMP_JAMP(944) = AMP(106) + AMP(111) ! used 8 times - TMP_JAMP(943) = AMP(102) + AMP(108) ! used 8 times - TMP_JAMP(942) = AMP(89) + AMP(1572) ! used 8 times - TMP_JAMP(941) = AMP(86) + AMP(89) ! used 8 times - TMP_JAMP(940) = TMP_JAMP(283) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(106) ! used 8 times - TMP_JAMP(939) = TMP_JAMP(374) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(86) ! used 8 times - TMP_JAMP(938) = TMP_JAMP(375) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(102) ! used 8 times - TMP_JAMP(937) = TMP_JAMP(376) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1195) ! used 8 times - TMP_JAMP(936) = TMP_JAMP(377) + TMP_JAMP(376) ! used 8 times - TMP_JAMP(935) = TMP_JAMP(379) + TMP_JAMP(378) ! used 8 times - TMP_JAMP(934) = TMP_JAMP(380) + TMP_JAMP(379) ! used 8 times - TMP_JAMP(933) = TMP_JAMP(381) - TMP_JAMP(378) ! used 8 times - TMP_JAMP(932) = TMP_JAMP(394) + TMP_JAMP(374) ! used 8 times - TMP_JAMP(931) = TMP_JAMP(395) + TMP_JAMP(283) ! used 8 times - TMP_JAMP(930) = AMP(1150) + AMP(1774) ! used 8 times - TMP_JAMP(929) = AMP(1067) - AMP(1768) ! used 8 times - TMP_JAMP(928) = AMP(1062) + AMP(1067) ! used 8 times - TMP_JAMP(927) = AMP(758) + AMP(1691) ! used 8 times - TMP_JAMP(926) = AMP(757) - AMP(1124) ! used 8 times - TMP_JAMP(925) = AMP(756) - AMP(758) ! used 8 times - TMP_JAMP(924) = AMP(685) + AMP(757) ! used 8 times - TMP_JAMP(923) = AMP(678) - AMP(1776) ! used 8 times - TMP_JAMP(922) = AMP(663) + AMP(1661) ! used 8 times - TMP_JAMP(921) = AMP(659) + AMP(662) ! used 8 times - TMP_JAMP(920) = AMP(657) + AMP(663) ! used 8 times - TMP_JAMP(919) = TMP_JAMP(277) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(685) ! used 8 times - TMP_JAMP(918) = TMP_JAMP(368) + ((0.000000000000000D+00 + TMP_JAMP(15) = AMP(37) + AMP(40) ! used 16 times + TMP_JAMP(14) = AMP(25) + AMP(28) ! used 16 times + TMP_JAMP(13) = AMP(16) + AMP(26) ! used 16 times + TMP_JAMP(12) = AMP(5) - AMP(38) ! used 16 times + TMP_JAMP(11) = AMP(2) + AMP(4) ! used 16 times + TMP_JAMP(10) = AMP(22) + AMP(39) ! used 16 times + TMP_JAMP(9) = AMP(7) + AMP(9) ! used 16 times + TMP_JAMP(8) = AMP(6) + AMP(8) ! used 16 times + TMP_JAMP(7) = AMP(31) + AMP(34) ! used 16 times + TMP_JAMP(6) = AMP(10) - AMP(33) ! used 16 times + TMP_JAMP(5) = AMP(19) + AMP(32) ! used 16 times + TMP_JAMP(4) = AMP(1) + AMP(3) ! used 16 times + TMP_JAMP(3) = AMP(15) - AMP(27) ! used 16 times + TMP_JAMP(2) = AMP(11) + AMP(13) ! used 16 times + TMP_JAMP(1) = AMP(12) + AMP(14) ! used 16 times + TMP_JAMP(30) = TMP_JAMP(15) + AMP(43) ! used 16 times + TMP_JAMP(29) = TMP_JAMP(14) - AMP(45) ! used 16 times + TMP_JAMP(28) = TMP_JAMP(13) + AMP(29) ! used 16 times + TMP_JAMP(27) = TMP_JAMP(12) - AMP(41) ! used 16 times + TMP_JAMP(26) = TMP_JAMP(11) + AMP(18) ! used 16 times + TMP_JAMP(25) = TMP_JAMP(10) + AMP(42) ! used 16 times + TMP_JAMP(24) = TMP_JAMP(9) + AMP(17) ! used 16 times + TMP_JAMP(23) = TMP_JAMP(8) + AMP(24) ! used 16 times + TMP_JAMP(22) = TMP_JAMP(7) - AMP(44) ! used 16 times + TMP_JAMP(21) = TMP_JAMP(6) - AMP(36) ! used 16 times + TMP_JAMP(20) = TMP_JAMP(5) + AMP(35) ! used 16 times + TMP_JAMP(19) = TMP_JAMP(4) + AMP(21) ! used 16 times + TMP_JAMP(18) = TMP_JAMP(3) - AMP(30) ! used 16 times + TMP_JAMP(17) = TMP_JAMP(2) + AMP(23) ! used 16 times + TMP_JAMP(16) = TMP_JAMP(1) + AMP(20) ! used 16 times + TMP_JAMP(113) = TMP_JAMP(30) + TMP_JAMP(29) ! used 8 times + TMP_JAMP(112) = TMP_JAMP(30) - TMP_JAMP(22) ! used 8 times + TMP_JAMP(111) = TMP_JAMP(30) - TMP_JAMP(25) ! used 8 times + TMP_JAMP(110) = TMP_JAMP(29) + TMP_JAMP(28) ! used 8 times + TMP_JAMP(109) = TMP_JAMP(29) + TMP_JAMP(22) ! used 8 times + TMP_JAMP(108) = TMP_JAMP(28) - TMP_JAMP(26) ! used 8 times + TMP_JAMP(107) = TMP_JAMP(28) + TMP_JAMP(24) ! used 8 times + TMP_JAMP(106) = TMP_JAMP(27) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(152) ! used 8 times + TMP_JAMP(105) = TMP_JAMP(27) + TMP_JAMP(26) ! used 8 times + TMP_JAMP(104) = TMP_JAMP(27) - TMP_JAMP(25) ! used 8 times + TMP_JAMP(103) = TMP_JAMP(27) - TMP_JAMP(19) ! used 8 times + TMP_JAMP(102) = TMP_JAMP(27) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(153) ! used 8 times + TMP_JAMP(101) = TMP_JAMP(26) + TMP_JAMP(24) ! used 8 times + TMP_JAMP(100) = TMP_JAMP(26) + TMP_JAMP(19) ! used 8 times + TMP_JAMP(99) = TMP_JAMP(25) - TMP_JAMP(23) ! used 8 times + TMP_JAMP(98) = TMP_JAMP(25) + TMP_JAMP(17) ! used 8 times + TMP_JAMP(97) = TMP_JAMP(24) + TMP_JAMP(23) ! used 8 times + TMP_JAMP(96) = TMP_JAMP(23) + TMP_JAMP(17) ! used 8 times + TMP_JAMP(95) = TMP_JAMP(22) + TMP_JAMP(20) ! used 8 times + TMP_JAMP(94) = TMP_JAMP(21) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(146) ! used 8 times + TMP_JAMP(93) = TMP_JAMP(21) - TMP_JAMP(20) ! used 8 times + TMP_JAMP(92) = TMP_JAMP(21) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(147) ! used 8 times + TMP_JAMP(91) = TMP_JAMP(20) - TMP_JAMP(19) ! used 8 times + TMP_JAMP(90) = TMP_JAMP(20) + TMP_JAMP(16) ! used 8 times + TMP_JAMP(89) = TMP_JAMP(19) + TMP_JAMP(16) ! used 8 times + TMP_JAMP(88) = TMP_JAMP(18) - TMP_JAMP(17) ! used 8 times + TMP_JAMP(87) = TMP_JAMP(18) + TMP_JAMP(16) ! used 8 times + TMP_JAMP(86) = TMP_JAMP(18) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(140) ! used 8 times + TMP_JAMP(85) = TMP_JAMP(17) + TMP_JAMP(16) ! used 8 times + TMP_JAMP(84) = AMP(439) - AMP(442) ! used 8 times + TMP_JAMP(83) = AMP(421) - AMP(424) ! used 8 times + TMP_JAMP(82) = AMP(420) + AMP(422) ! used 8 times + TMP_JAMP(81) = AMP(341) - AMP(353) ! used 8 times + TMP_JAMP(80) = AMP(337) + AMP(339) ! used 8 times + TMP_JAMP(79) = AMP(151) + AMP(157) ! used 8 times + TMP_JAMP(78) = AMP(139) - AMP(159) ! used 8 times + TMP_JAMP(77) = AMP(88) + AMP(99) ! used 8 times + TMP_JAMP(76) = AMP(84) + AMP(90) ! used 8 times + TMP_JAMP(75) = AMP(438) + AMP(441) ! used 8 times + TMP_JAMP(74) = AMP(356) + AMP(358) ! used 8 times + TMP_JAMP(73) = AMP(355) + AMP(357) ! used 8 times + TMP_JAMP(72) = AMP(106) + AMP(111) ! used 8 times + TMP_JAMP(71) = AMP(102) + AMP(108) ! used 8 times + TMP_JAMP(70) = AMP(86) + AMP(89) ! used 8 times + TMP_JAMP(69) = AMP(430) - AMP(433) ! used 8 times + TMP_JAMP(68) = AMP(359) - AMP(371) ! used 8 times + TMP_JAMP(67) = AMP(145) - AMP(158) ! used 8 times + TMP_JAMP(66) = AMP(429) + AMP(431) ! used 8 times + TMP_JAMP(65) = AMP(338) + AMP(340) ! used 8 times + TMP_JAMP(64) = AMP(118) + AMP(123) ! used 8 times + TMP_JAMP(63) = AMP(114) + AMP(120) ! used 8 times + TMP_JAMP(62) = AMP(377) - AMP(389) ! used 8 times + TMP_JAMP(61) = AMP(374) + AMP(376) ! used 8 times + TMP_JAMP(60) = AMP(373) + AMP(375) ! used 8 times + TMP_JAMP(59) = AMP(104) + AMP(107) ! used 8 times + TMP_JAMP(58) = AMP(116) + AMP(119) ! used 8 times + TMP_JAMP(57) = AMP(70) + AMP(81) ! used 8 times + TMP_JAMP(56) = AMP(66) + AMP(72) ! used 8 times + TMP_JAMP(55) = AMP(68) + AMP(71) ! used 8 times + TMP_JAMP(54) = AMP(130) + AMP(135) ! used 8 times + TMP_JAMP(53) = AMP(126) + AMP(132) ! used 8 times + TMP_JAMP(52) = AMP(128) + AMP(131) ! used 8 times + TMP_JAMP(51) = AMP(52) + AMP(63) ! used 8 times + TMP_JAMP(50) = AMP(48) + AMP(54) ! used 8 times + TMP_JAMP(49) = AMP(50) + AMP(53) ! used 8 times + TMP_JAMP(48) = AMP(149) + AMP(154) ! used 8 times + TMP_JAMP(47) = AMP(137) - AMP(156) ! used 8 times + TMP_JAMP(46) = AMP(143) - AMP(155) ! used 8 times + TMP_JAMP(45) = AMP(280) - AMP(283) ! used 8 times + TMP_JAMP(44) = AMP(262) - AMP(265) ! used 8 times + TMP_JAMP(43) = AMP(261) + AMP(263) ! used 8 times + TMP_JAMP(42) = AMP(182) - AMP(194) ! used 8 times + TMP_JAMP(41) = AMP(178) + AMP(180) ! used 8 times + TMP_JAMP(40) = AMP(279) + AMP(282) ! used 8 times + TMP_JAMP(39) = AMP(197) + AMP(199) ! used 8 times + TMP_JAMP(38) = AMP(196) + AMP(198) ! used 8 times + TMP_JAMP(37) = AMP(271) - AMP(274) ! used 8 times + TMP_JAMP(36) = AMP(200) - AMP(212) ! used 8 times + TMP_JAMP(35) = AMP(270) + AMP(272) ! used 8 times + TMP_JAMP(34) = AMP(179) + AMP(181) ! used 8 times + TMP_JAMP(33) = AMP(218) - AMP(230) ! used 8 times + TMP_JAMP(32) = AMP(215) + AMP(217) ! used 8 times + TMP_JAMP(31) = AMP(214) + AMP(216) ! used 8 times + TMP_JAMP(140) = TMP_JAMP(82) + AMP(445) ! used 8 times + TMP_JAMP(139) = TMP_JAMP(81) - AMP(440) ! used 8 times + TMP_JAMP(138) = TMP_JAMP(80) - AMP(447) ! used 8 times + TMP_JAMP(137) = TMP_JAMP(75) + AMP(463) ! used 8 times + TMP_JAMP(136) = TMP_JAMP(74) - AMP(465) ! used 8 times + TMP_JAMP(135) = TMP_JAMP(73) - AMP(446) ! used 8 times + TMP_JAMP(134) = TMP_JAMP(68) - AMP(432) ! used 8 times + TMP_JAMP(133) = TMP_JAMP(66) + AMP(454) ! used 8 times + TMP_JAMP(132) = TMP_JAMP(65) - AMP(456) ! used 8 times + TMP_JAMP(131) = TMP_JAMP(62) - AMP(423) ! used 8 times + TMP_JAMP(130) = TMP_JAMP(61) - AMP(464) ! used 8 times + TMP_JAMP(129) = TMP_JAMP(60) - AMP(455) ! used 8 times + TMP_JAMP(128) = TMP_JAMP(45) + AMP(316) ! used 8 times + TMP_JAMP(127) = TMP_JAMP(44) - AMP(318) ! used 8 times + TMP_JAMP(126) = TMP_JAMP(43) + AMP(286) ! used 8 times + TMP_JAMP(125) = TMP_JAMP(42) - AMP(281) ! used 8 times + TMP_JAMP(124) = TMP_JAMP(41) - AMP(288) ! used 8 times + TMP_JAMP(123) = TMP_JAMP(40) + AMP(304) ! used 8 times + TMP_JAMP(122) = TMP_JAMP(39) - AMP(306) ! used 8 times + TMP_JAMP(121) = TMP_JAMP(38) - AMP(287) ! used 8 times + TMP_JAMP(120) = TMP_JAMP(37) - AMP(317) ! used 8 times + TMP_JAMP(119) = TMP_JAMP(36) - AMP(273) ! used 8 times + TMP_JAMP(118) = TMP_JAMP(35) + AMP(295) ! used 8 times + TMP_JAMP(117) = TMP_JAMP(34) - AMP(297) ! used 8 times + TMP_JAMP(116) = TMP_JAMP(33) - AMP(264) ! used 8 times + TMP_JAMP(115) = TMP_JAMP(32) - AMP(305) ! used 8 times + TMP_JAMP(114) = TMP_JAMP(31) - AMP(296) ! used 8 times + TMP_JAMP(312) = TMP_JAMP(140) + TMP_JAMP(138) ! used 4 times + TMP_JAMP(311) = TMP_JAMP(140) - TMP_JAMP(135) ! used 4 times + TMP_JAMP(310) = TMP_JAMP(139) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(352) ! used 4 times + TMP_JAMP(309) = TMP_JAMP(139) - TMP_JAMP(138) ! used 4 times + TMP_JAMP(308) = TMP_JAMP(139) - TMP_JAMP(137) ! used 4 times + TMP_JAMP(307) = TMP_JAMP(139) + TMP_JAMP(132) ! used 4 times + TMP_JAMP(306) = TMP_JAMP(139) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(354) ! used 4 times + TMP_JAMP(305) = TMP_JAMP(138) + TMP_JAMP(135) ! used 4 times + TMP_JAMP(304) = TMP_JAMP(138) + TMP_JAMP(132) ! used 4 times + TMP_JAMP(303) = TMP_JAMP(137) + TMP_JAMP(136) ! used 4 times + TMP_JAMP(302) = TMP_JAMP(137) - TMP_JAMP(84) ! used 4 times + TMP_JAMP(301) = TMP_JAMP(136) + TMP_JAMP(135) ! used 4 times + TMP_JAMP(300) = TMP_JAMP(136) + TMP_JAMP(130) ! used 4 times + TMP_JAMP(299) = TMP_JAMP(134) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(370) ! used 4 times + TMP_JAMP(298) = TMP_JAMP(134) - TMP_JAMP(133) ! used 4 times + TMP_JAMP(297) = TMP_JAMP(134) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(372) ! used 4 times + TMP_JAMP(296) = TMP_JAMP(133) + TMP_JAMP(132) ! used 4 times + TMP_JAMP(295) = TMP_JAMP(133) - TMP_JAMP(129) ! used 4 times + TMP_JAMP(294) = TMP_JAMP(132) + TMP_JAMP(129) ! used 4 times + TMP_JAMP(293) = TMP_JAMP(131) + TMP_JAMP(130) ! used 4 times + TMP_JAMP(292) = TMP_JAMP(131) - TMP_JAMP(129) ! used 4 times + TMP_JAMP(291) = TMP_JAMP(131) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(388) ! used 4 times + TMP_JAMP(290) = TMP_JAMP(130) + TMP_JAMP(129) ! used 4 times + TMP_JAMP(289) = TMP_JAMP(128) + TMP_JAMP(127) ! used 4 times + TMP_JAMP(288) = TMP_JAMP(128) - TMP_JAMP(120) ! used 4 times + TMP_JAMP(287) = TMP_JAMP(128) - TMP_JAMP(123) ! used 4 times + TMP_JAMP(286) = TMP_JAMP(127) + TMP_JAMP(126) ! used 4 times + TMP_JAMP(285) = TMP_JAMP(127) + TMP_JAMP(120) ! used 4 times + TMP_JAMP(284) = TMP_JAMP(126) + TMP_JAMP(124) ! used 4 times + TMP_JAMP(283) = TMP_JAMP(126) - TMP_JAMP(121) ! used 4 times + TMP_JAMP(282) = TMP_JAMP(125) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(195) ! used 4 times + TMP_JAMP(281) = TMP_JAMP(125) - TMP_JAMP(124) ! used 4 times + TMP_JAMP(280) = TMP_JAMP(125) - TMP_JAMP(123) ! used 4 times + TMP_JAMP(279) = TMP_JAMP(125) + TMP_JAMP(117) ! used 4 times + TMP_JAMP(278) = TMP_JAMP(125) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(193) ! used 4 times + TMP_JAMP(277) = TMP_JAMP(124) + TMP_JAMP(121) ! used 4 times + TMP_JAMP(276) = TMP_JAMP(124) + TMP_JAMP(117) ! used 4 times + TMP_JAMP(275) = TMP_JAMP(123) + TMP_JAMP(122) ! used 4 times + TMP_JAMP(274) = TMP_JAMP(123) - TMP_JAMP(115) ! used 4 times + TMP_JAMP(273) = TMP_JAMP(122) + TMP_JAMP(121) ! used 4 times + TMP_JAMP(272) = TMP_JAMP(122) + TMP_JAMP(115) ! used 4 times + TMP_JAMP(271) = TMP_JAMP(120) + TMP_JAMP(118) ! used 4 times + TMP_JAMP(270) = TMP_JAMP(119) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(213) ! used 4 times + TMP_JAMP(269) = TMP_JAMP(119) - TMP_JAMP(118) ! used 4 times + TMP_JAMP(268) = TMP_JAMP(119) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(211) ! used 4 times + TMP_JAMP(267) = TMP_JAMP(118) + TMP_JAMP(117) ! used 4 times + TMP_JAMP(266) = TMP_JAMP(118) - TMP_JAMP(114) ! used 4 times + TMP_JAMP(265) = TMP_JAMP(117) + TMP_JAMP(114) ! used 4 times + TMP_JAMP(264) = TMP_JAMP(116) + TMP_JAMP(115) ! used 4 times + TMP_JAMP(263) = TMP_JAMP(116) - TMP_JAMP(114) ! used 4 times + TMP_JAMP(262) = TMP_JAMP(116) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(231) ! used 4 times + TMP_JAMP(261) = TMP_JAMP(115) + TMP_JAMP(114) ! used 4 times + TMP_JAMP(260) = TMP_JAMP(112) - TMP_JAMP(91) ! used 4 times + TMP_JAMP(259) = TMP_JAMP(112) - TMP_JAMP(99) ! used 4 times + TMP_JAMP(258) = TMP_JAMP(109) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(67) ! used 4 times + TMP_JAMP(257) = TMP_JAMP(109) + TMP_JAMP(107) ! used 4 times + TMP_JAMP(256) = TMP_JAMP(108) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(54) ! used 4 times + TMP_JAMP(255) = TMP_JAMP(107) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(54) ! used 4 times + TMP_JAMP(254) = TMP_JAMP(102) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(51) ! used 4 times + TMP_JAMP(253) = TMP_JAMP(101) - TMP_JAMP(91) ! used 4 times + TMP_JAMP(252) = TMP_JAMP(97) + TMP_JAMP(26) ! used 4 times + TMP_JAMP(251) = TMP_JAMP(97) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(71) ! used 4 times + TMP_JAMP(250) = TMP_JAMP(91) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(64) ! used 4 times + TMP_JAMP(249) = TMP_JAMP(91) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(56) ! used 4 times + TMP_JAMP(248) = TMP_JAMP(86) + AMP(134) ! used 4 times + TMP_JAMP(247) = TMP_JAMP(85) + TMP_JAMP(25) ! used 4 times + TMP_JAMP(246) = TMP_JAMP(85) + TMP_JAMP(23) ! used 4 times + TMP_JAMP(245) = TMP_JAMP(85) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(58) ! used 4 times + TMP_JAMP(244) = TMP_JAMP(84) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(437) ! used 4 times + TMP_JAMP(243) = TMP_JAMP(84) + TMP_JAMP(83) ! used 4 times + TMP_JAMP(242) = TMP_JAMP(84) - TMP_JAMP(69) ! used 4 times + TMP_JAMP(241) = TMP_JAMP(83) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(419) ! used 4 times + TMP_JAMP(240) = TMP_JAMP(83) + TMP_JAMP(69) ! used 4 times + TMP_JAMP(239) = TMP_JAMP(79) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(437) ! used 4 times + TMP_JAMP(238) = TMP_JAMP(79) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(150) ! used 4 times + TMP_JAMP(237) = TMP_JAMP(79) - TMP_JAMP(67) ! used 4 times + TMP_JAMP(236) = TMP_JAMP(78) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(419) ! used 4 times + TMP_JAMP(235) = TMP_JAMP(78) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(98) ! used 4 times + TMP_JAMP(234) = TMP_JAMP(78) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(138) ! used 4 times + TMP_JAMP(233) = TMP_JAMP(77) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(98) ! used 4 times + TMP_JAMP(232) = TMP_JAMP(77) - TMP_JAMP(76) ! used 4 times + TMP_JAMP(231) = TMP_JAMP(77) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(28) ! used 4 times + TMP_JAMP(230) = TMP_JAMP(76) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(85) ! used 4 times + TMP_JAMP(229) = TMP_JAMP(76) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(96) ! used 4 times + TMP_JAMP(228) = TMP_JAMP(72) - TMP_JAMP(71) ! used 4 times + TMP_JAMP(227) = TMP_JAMP(72) + TMP_JAMP(59) ! used 4 times + TMP_JAMP(226) = TMP_JAMP(71) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(103) ! used 4 times + TMP_JAMP(225) = TMP_JAMP(70) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(93) ! used 4 times + TMP_JAMP(224) = TMP_JAMP(70) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(87) ! used 4 times + TMP_JAMP(223) = TMP_JAMP(69) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(428) ! used 4 times + TMP_JAMP(222) = TMP_JAMP(67) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(428) ! used 4 times + TMP_JAMP(221) = TMP_JAMP(67) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(144) ! used 4 times + TMP_JAMP(220) = TMP_JAMP(64) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(122) ! used 4 times + TMP_JAMP(219) = TMP_JAMP(64) + TMP_JAMP(58) ! used 4 times + TMP_JAMP(218) = TMP_JAMP(63) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(96) ! used 4 times + TMP_JAMP(217) = TMP_JAMP(63) + TMP_JAMP(58) ! used 4 times + TMP_JAMP(216) = TMP_JAMP(63) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(115) ! used 4 times + TMP_JAMP(215) = TMP_JAMP(59) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(105) ! used 4 times + TMP_JAMP(214) = TMP_JAMP(58) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(117) ! used 4 times + TMP_JAMP(213) = TMP_JAMP(57) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(80) ! used 4 times + TMP_JAMP(212) = TMP_JAMP(57) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(79) ! used 4 times + TMP_JAMP(211) = TMP_JAMP(56) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(67) ! used 4 times + TMP_JAMP(210) = TMP_JAMP(55) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(75) ! used 4 times + TMP_JAMP(209) = TMP_JAMP(55) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(69) ! used 4 times + TMP_JAMP(208) = TMP_JAMP(54) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(134) ! used 4 times + TMP_JAMP(207) = TMP_JAMP(54) + TMP_JAMP(52) ! used 4 times + TMP_JAMP(206) = TMP_JAMP(53) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(78) ! used 4 times + TMP_JAMP(205) = TMP_JAMP(53) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(127) ! used 4 times + TMP_JAMP(204) = TMP_JAMP(52) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(129) ! used 4 times + TMP_JAMP(203) = TMP_JAMP(51) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(62) ! used 4 times + TMP_JAMP(202) = TMP_JAMP(50) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(49) ! used 4 times + TMP_JAMP(201) = TMP_JAMP(50) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(60) ! used 4 times + TMP_JAMP(200) = TMP_JAMP(49) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(57) ! used 4 times + TMP_JAMP(199) = TMP_JAMP(49) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(51) ! used 4 times + TMP_JAMP(198) = TMP_JAMP(48) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(109) ! used 4 times + TMP_JAMP(197) = TMP_JAMP(48) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(148) ! used 4 times + TMP_JAMP(196) = TMP_JAMP(47) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(29) ! used 4 times + TMP_JAMP(195) = TMP_JAMP(47) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(133) ! used 4 times + TMP_JAMP(194) = TMP_JAMP(46) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(121) ! used 4 times + TMP_JAMP(193) = TMP_JAMP(46) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(142) ! used 4 times + TMP_JAMP(192) = TMP_JAMP(28) - AMP(97) ! used 4 times + TMP_JAMP(191) = TMP_JAMP(25) - AMP(61) ! used 4 times + TMP_JAMP(190) = AMP(416) + AMP(451) ! used 4 times + TMP_JAMP(189) = AMP(350) - AMP(453) ! used 4 times + TMP_JAMP(188) = AMP(85) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(152) ! used 4 times + TMP_JAMP(187) = AMP(443) + AMP(466) ! used 4 times + TMP_JAMP(186) = AMP(368) - AMP(452) ! used 4 times + TMP_JAMP(185) = AMP(361) - AMP(468) ! used 4 times + TMP_JAMP(184) = AMP(110) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(152) ! used 4 times + TMP_JAMP(183) = AMP(434) + AMP(457) ! used 4 times + TMP_JAMP(182) = AMP(343) - AMP(459) ! used 4 times + TMP_JAMP(181) = AMP(379) - AMP(467) ! used 4 times + TMP_JAMP(180) = AMP(381) - AMP(458) ! used 4 times + TMP_JAMP(179) = AMP(97) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(141) ! used 4 times + TMP_JAMP(178) = AMP(117) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(141) ! used 4 times + TMP_JAMP(177) = AMP(407) + AMP(460) ! used 4 times + TMP_JAMP(176) = AMP(347) - AMP(462) ! used 4 times + TMP_JAMP(175) = AMP(386) - AMP(461) ! used 4 times + TMP_JAMP(174) = AMP(425) + AMP(448) ! used 4 times + TMP_JAMP(173) = AMP(345) - AMP(450) ! used 4 times + TMP_JAMP(172) = AMP(363) - AMP(449) ! used 4 times + TMP_JAMP(171) = AMP(398) + AMP(469) ! used 4 times + TMP_JAMP(170) = AMP(365) - AMP(471) ! used 4 times + TMP_JAMP(169) = AMP(383) - AMP(470) ! used 4 times + TMP_JAMP(168) = AMP(331) + AMP(334) ! used 4 times + TMP_JAMP(167) = AMP(325) - AMP(336) ! used 4 times + TMP_JAMP(166) = AMP(328) - AMP(335) ! used 4 times + TMP_JAMP(165) = AMP(136) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(141) ! used 4 times + TMP_JAMP(164) = AMP(266) + AMP(289) ! used 4 times + TMP_JAMP(163) = AMP(186) - AMP(291) ! used 4 times + TMP_JAMP(162) = AMP(172) + AMP(175) ! used 4 times + TMP_JAMP(161) = AMP(166) - AMP(177) ! used 4 times + TMP_JAMP(160) = AMP(239) + AMP(310) ! used 4 times + TMP_JAMP(159) = AMP(206) - AMP(312) ! used 4 times + TMP_JAMP(158) = AMP(204) - AMP(290) ! used 4 times + TMP_JAMP(157) = AMP(169) - AMP(176) ! used 4 times + TMP_JAMP(156) = AMP(248) + AMP(301) ! used 4 times + TMP_JAMP(155) = AMP(188) - AMP(303) ! used 4 times + TMP_JAMP(154) = AMP(224) - AMP(311) ! used 4 times + TMP_JAMP(153) = AMP(227) - AMP(302) ! used 4 times + TMP_JAMP(152) = AMP(275) + AMP(298) ! used 4 times + TMP_JAMP(151) = AMP(184) - AMP(300) ! used 4 times + TMP_JAMP(150) = AMP(222) - AMP(299) ! used 4 times + TMP_JAMP(149) = AMP(257) + AMP(292) ! used 4 times + TMP_JAMP(148) = AMP(191) - AMP(294) ! used 4 times + TMP_JAMP(147) = AMP(209) - AMP(293) ! used 4 times + TMP_JAMP(146) = AMP(284) + AMP(307) ! used 4 times + TMP_JAMP(145) = AMP(202) - AMP(309) ! used 4 times + TMP_JAMP(144) = AMP(220) - AMP(308) ! used 4 times + TMP_JAMP(143) = AMP(278) + AMP(313) ! used 4 times + TMP_JAMP(142) = AMP(260) - AMP(315) ! used 4 times + TMP_JAMP(141) = AMP(269) - AMP(314) ! used 4 times + TMP_JAMP(324) = TMP_JAMP(254) - TMP_JAMP(191) ! used 4 times + TMP_JAMP(323) = TMP_JAMP(234) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(86) ! used 4 times + TMP_JAMP(322) = TMP_JAMP(224) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(94) ! used 4 times + TMP_JAMP(321) = TMP_JAMP(221) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(94) ! used 4 times + TMP_JAMP(320) = TMP_JAMP(215) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(88) ! used 4 times + TMP_JAMP(319) = TMP_JAMP(212) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(92) ! used 4 times + TMP_JAMP(318) = TMP_JAMP(209) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(86) ! used 4 times + TMP_JAMP(317) = TMP_JAMP(204) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(92) ! used 4 times + TMP_JAMP(316) = TMP_JAMP(198) - TMP_JAMP(72) ! used 4 times + TMP_JAMP(315) = TMP_JAMP(197) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(102) ! used 4 times + TMP_JAMP(314) = TMP_JAMP(196) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(165) ! used 4 times + TMP_JAMP(313) = TMP_JAMP(193) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(92) ! used 4 times + TMP_JAMP(325) = TMP_JAMP(190) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(140) ! used 3 times + TMP_JAMP(531) = TMP_JAMP(325) + TMP_JAMP(189) ! used 2 times + TMP_JAMP(530) = TMP_JAMP(325) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(417) ! used 2 times + TMP_JAMP(529) = TMP_JAMP(324) - TMP_JAMP(245) ! used 2 times + TMP_JAMP(528) = TMP_JAMP(324) + TMP_JAMP(252) ! used 2 times + TMP_JAMP(527) = TMP_JAMP(322) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(253) ! used 2 times + TMP_JAMP(526) = TMP_JAMP(321) + TMP_JAMP(299) ! used 2 times + TMP_JAMP(525) = TMP_JAMP(321) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(259) ! used 2 times + TMP_JAMP(524) = TMP_JAMP(320) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(251) ! used 2 times + TMP_JAMP(523) = TMP_JAMP(320) - TMP_JAMP(314) ! used 2 times + TMP_JAMP(522) = TMP_JAMP(318) + TMP_JAMP(234) ! used 2 times + TMP_JAMP(521) = TMP_JAMP(317) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(249) ! used 2 times + TMP_JAMP(520) = TMP_JAMP(316) + TMP_JAMP(314) ! used 2 times + TMP_JAMP(519) = TMP_JAMP(316) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(160) ! used 2 times + TMP_JAMP(518) = TMP_JAMP(315) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(250) ! used 2 times + TMP_JAMP(517) = TMP_JAMP(315) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(256) ! used 2 times + TMP_JAMP(516) = TMP_JAMP(315) - TMP_JAMP(278) ! used 2 times + TMP_JAMP(515) = TMP_JAMP(314) + TMP_JAMP(214) ! used 2 times + TMP_JAMP(514) = TMP_JAMP(313) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(259) ! used 2 times + TMP_JAMP(513) = TMP_JAMP(313) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(255) ! used 2 times + TMP_JAMP(512) = TMP_JAMP(313) + TMP_JAMP(268) ! used 2 times + TMP_JAMP(511) = TMP_JAMP(312) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(174) ! used 2 times + TMP_JAMP(510) = TMP_JAMP(311) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(174) ! used 2 times + TMP_JAMP(509) = TMP_JAMP(306) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(171) ! used 2 times + TMP_JAMP(508) = TMP_JAMP(305) + TMP_JAMP(296) ! used 2 times + TMP_JAMP(507) = TMP_JAMP(302) - TMP_JAMP(239) ! used 2 times + TMP_JAMP(506) = TMP_JAMP(301) + TMP_JAMP(138) ! used 2 times + TMP_JAMP(505) = TMP_JAMP(296) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(176) ! used 2 times + TMP_JAMP(504) = TMP_JAMP(296) - TMP_JAMP(242) ! used 2 times + TMP_JAMP(503) = TMP_JAMP(293) - AMP(378) ! used 2 times + TMP_JAMP(502) = TMP_JAMP(291) + AMP(418) ! used 2 times + TMP_JAMP(501) = TMP_JAMP(291) - AMP(426) ! used 2 times + TMP_JAMP(500) = TMP_JAMP(290) + TMP_JAMP(136) ! used 2 times + TMP_JAMP(499) = TMP_JAMP(290) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(180) ! used 2 times + TMP_JAMP(498) = TMP_JAMP(288) - TMP_JAMP(267) ! used 2 times + TMP_JAMP(497) = TMP_JAMP(288) - TMP_JAMP(275) ! used 2 times + TMP_JAMP(496) = TMP_JAMP(285) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(157) ! used 2 times + TMP_JAMP(495) = TMP_JAMP(285) + TMP_JAMP(283) ! used 2 times + TMP_JAMP(494) = TMP_JAMP(278) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(146) ! used 2 times + TMP_JAMP(493) = TMP_JAMP(277) + TMP_JAMP(267) ! used 2 times + TMP_JAMP(492) = TMP_JAMP(273) + TMP_JAMP(124) ! used 2 times + TMP_JAMP(491) = TMP_JAMP(267) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(151) ! used 2 times + TMP_JAMP(490) = TMP_JAMP(262) - AMP(258) ! used 2 times + TMP_JAMP(489) = TMP_JAMP(261) - TMP_JAMP(123) ! used 2 times + TMP_JAMP(488) = TMP_JAMP(261) + TMP_JAMP(122) ! used 2 times + TMP_JAMP(487) = TMP_JAMP(261) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(153) ! used 2 times + TMP_JAMP(486) = TMP_JAMP(257) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(233) ! used 2 times + TMP_JAMP(485) = TMP_JAMP(256) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(205) ! used 2 times + TMP_JAMP(484) = TMP_JAMP(255) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(204) ! used 2 times + TMP_JAMP(483) = TMP_JAMP(250) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(216) ! used 2 times + TMP_JAMP(482) = TMP_JAMP(246) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(226) ! used 2 times + TMP_JAMP(481) = TMP_JAMP(246) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(219) ! used 2 times + TMP_JAMP(480) = TMP_JAMP(240) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(167) ! used 2 times + TMP_JAMP(479) = TMP_JAMP(239) - TMP_JAMP(203) ! used 2 times + TMP_JAMP(478) = TMP_JAMP(238) - AMP(436) ! used 2 times + TMP_JAMP(477) = TMP_JAMP(238) + TMP_JAMP(235) ! used 2 times + TMP_JAMP(476) = TMP_JAMP(234) + TMP_JAMP(213) ! used 2 times + TMP_JAMP(475) = TMP_JAMP(232) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(188) ! used 2 times + TMP_JAMP(474) = TMP_JAMP(231) + TMP_JAMP(225) ! used 2 times + TMP_JAMP(473) = TMP_JAMP(230) + TMP_JAMP(228) ! used 2 times + TMP_JAMP(472) = TMP_JAMP(229) - TMP_JAMP(217) ! used 2 times + TMP_JAMP(471) = TMP_JAMP(227) - TMP_JAMP(211) ! used 2 times + TMP_JAMP(470) = TMP_JAMP(226) + AMP(101) ! used 2 times + TMP_JAMP(469) = TMP_JAMP(225) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(184) ! used 2 times + TMP_JAMP(468) = TMP_JAMP(224) + AMP(82) ! used 2 times + TMP_JAMP(467) = TMP_JAMP(223) + AMP(427) ! used 2 times + TMP_JAMP(466) = TMP_JAMP(222) + TMP_JAMP(213) ! used 2 times + TMP_JAMP(465) = TMP_JAMP(220) - TMP_JAMP(218) ! used 2 times + TMP_JAMP(464) = TMP_JAMP(216) - TMP_JAMP(200) ! used 2 times + TMP_JAMP(463) = TMP_JAMP(216) - TMP_JAMP(194) ! used 2 times + TMP_JAMP(462) = TMP_JAMP(214) + TMP_JAMP(194) ! used 2 times + TMP_JAMP(461) = TMP_JAMP(213) - TMP_JAMP(211) ! used 2 times + TMP_JAMP(460) = TMP_JAMP(212) + TMP_JAMP(206) ! used 2 times + TMP_JAMP(459) = TMP_JAMP(210) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(184) ! used 2 times + TMP_JAMP(458) = TMP_JAMP(210) - TMP_JAMP(59) ! used 2 times + TMP_JAMP(457) = TMP_JAMP(208) - TMP_JAMP(206) ! used 2 times + TMP_JAMP(456) = TMP_JAMP(207) - TMP_JAMP(201) ! used 2 times + TMP_JAMP(455) = TMP_JAMP(205) - TMP_JAMP(201) ! used 2 times + TMP_JAMP(454) = TMP_JAMP(205) - TMP_JAMP(195) ! used 2 times + TMP_JAMP(453) = TMP_JAMP(204) + TMP_JAMP(195) ! used 2 times + TMP_JAMP(452) = TMP_JAMP(203) - AMP(397) ! used 2 times + TMP_JAMP(451) = TMP_JAMP(203) - TMP_JAMP(79) ! used 2 times + TMP_JAMP(450) = TMP_JAMP(202) + AMP(47) ! used 2 times + TMP_JAMP(449) = TMP_JAMP(202) + TMP_JAMP(200) ! used 2 times + TMP_JAMP(448) = TMP_JAMP(199) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(113) ! used 2 times + TMP_JAMP(447) = TMP_JAMP(199) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(107) ! used 2 times + TMP_JAMP(446) = TMP_JAMP(195) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(149) ! used 2 times + TMP_JAMP(445) = TMP_JAMP(194) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(156) ! used 2 times + TMP_JAMP(444) = TMP_JAMP(192) + TMP_JAMP(178) ! used 2 times + TMP_JAMP(443) = TMP_JAMP(189) + TMP_JAMP(186) ! used 2 times + TMP_JAMP(442) = TMP_JAMP(189) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(351) ! used 2 times + TMP_JAMP(441) = TMP_JAMP(187) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(332) ! used 2 times + TMP_JAMP(440) = TMP_JAMP(186) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(369) ! used 2 times + TMP_JAMP(439) = TMP_JAMP(186) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(367) ! used 2 times + TMP_JAMP(438) = TMP_JAMP(185) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(369) ! used 2 times + TMP_JAMP(437) = TMP_JAMP(185) + TMP_JAMP(181) ! used 2 times + TMP_JAMP(436) = TMP_JAMP(185) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(360) ! used 2 times + TMP_JAMP(435) = TMP_JAMP(183) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(435) ! used 2 times + TMP_JAMP(434) = TMP_JAMP(183) - TMP_JAMP(180) ! used 2 times + TMP_JAMP(433) = TMP_JAMP(182) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(351) ! used 2 times + TMP_JAMP(432) = TMP_JAMP(182) + TMP_JAMP(180) ! used 2 times + TMP_JAMP(431) = TMP_JAMP(182) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(342) ! used 2 times + TMP_JAMP(430) = TMP_JAMP(181) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(137) ! used 2 times + TMP_JAMP(429) = TMP_JAMP(180) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(380) ! used 2 times + TMP_JAMP(428) = TMP_JAMP(179) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(91) ! used 2 times + TMP_JAMP(427) = TMP_JAMP(177) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(406) ! used 2 times + TMP_JAMP(426) = TMP_JAMP(177) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(408) ! used 2 times + TMP_JAMP(425) = TMP_JAMP(176) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(346) ! used 2 times + TMP_JAMP(424) = TMP_JAMP(175) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(387) ! used 2 times + TMP_JAMP(423) = TMP_JAMP(175) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(385) ! used 2 times + TMP_JAMP(422) = TMP_JAMP(174) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(426) ! used 2 times + TMP_JAMP(421) = TMP_JAMP(174) - TMP_JAMP(172) ! used 2 times + TMP_JAMP(420) = TMP_JAMP(173) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(348) ! used 2 times + TMP_JAMP(419) = TMP_JAMP(173) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(344) ! used 2 times + TMP_JAMP(418) = TMP_JAMP(172) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(362) ! used 2 times + TMP_JAMP(417) = TMP_JAMP(171) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(397) ! used 2 times + TMP_JAMP(416) = TMP_JAMP(171) + TMP_JAMP(170) ! used 2 times + TMP_JAMP(415) = TMP_JAMP(170) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(366) ! used 2 times + TMP_JAMP(414) = TMP_JAMP(169) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(384) ! used 2 times + TMP_JAMP(413) = TMP_JAMP(169) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(382) ! used 2 times + TMP_JAMP(412) = TMP_JAMP(168) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(332) ! used 2 times + TMP_JAMP(411) = TMP_JAMP(168) + TMP_JAMP(167) ! used 2 times + TMP_JAMP(410) = TMP_JAMP(168) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(333) ! used 2 times + TMP_JAMP(409) = TMP_JAMP(167) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(327) ! used 2 times + TMP_JAMP(408) = TMP_JAMP(166) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(330) ! used 2 times + TMP_JAMP(407) = TMP_JAMP(166) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(329) ! used 2 times + TMP_JAMP(406) = TMP_JAMP(164) + TMP_JAMP(163) ! used 2 times + TMP_JAMP(405) = TMP_JAMP(163) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(185) ! used 2 times + TMP_JAMP(404) = TMP_JAMP(163) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(189) ! used 2 times + TMP_JAMP(403) = TMP_JAMP(162) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(174) ! used 2 times + TMP_JAMP(402) = TMP_JAMP(162) - TMP_JAMP(157) ! used 2 times + TMP_JAMP(401) = TMP_JAMP(161) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(168) ! used 2 times + TMP_JAMP(400) = TMP_JAMP(161) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(167) ! used 2 times + TMP_JAMP(399) = TMP_JAMP(160) + TMP_JAMP(159) ! used 2 times + TMP_JAMP(398) = TMP_JAMP(160) - TMP_JAMP(154) ! used 2 times + TMP_JAMP(397) = TMP_JAMP(159) + TMP_JAMP(154) ! used 2 times + TMP_JAMP(396) = TMP_JAMP(159) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(205) ! used 2 times + TMP_JAMP(395) = TMP_JAMP(158) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(207) ! used 2 times + TMP_JAMP(394) = TMP_JAMP(158) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(203) ! used 2 times + TMP_JAMP(393) = TMP_JAMP(157) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(170) ! used 2 times + TMP_JAMP(392) = TMP_JAMP(156) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(249) ! used 2 times + TMP_JAMP(391) = TMP_JAMP(156) - TMP_JAMP(153) ! used 2 times + TMP_JAMP(390) = TMP_JAMP(155) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(189) ! used 2 times + TMP_JAMP(389) = TMP_JAMP(155) + TMP_JAMP(153) ! used 2 times + TMP_JAMP(388) = TMP_JAMP(155) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(187) ! used 2 times + TMP_JAMP(387) = TMP_JAMP(154) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(223) ! used 2 times + TMP_JAMP(386) = TMP_JAMP(153) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(226) ! used 2 times + TMP_JAMP(385) = TMP_JAMP(152) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(171) ! used 2 times + TMP_JAMP(384) = TMP_JAMP(152) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(276) ! used 2 times + TMP_JAMP(383) = TMP_JAMP(151) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(183) ! used 2 times + TMP_JAMP(382) = TMP_JAMP(150) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(225) ! used 2 times + TMP_JAMP(381) = TMP_JAMP(150) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(221) ! used 2 times + TMP_JAMP(380) = TMP_JAMP(149) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(258) ! used 2 times + TMP_JAMP(379) = TMP_JAMP(149) - TMP_JAMP(147) ! used 2 times + TMP_JAMP(378) = TMP_JAMP(148) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(192) ! used 2 times + TMP_JAMP(377) = TMP_JAMP(148) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(190) ! used 2 times + TMP_JAMP(376) = TMP_JAMP(147) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(208) ! used 2 times + TMP_JAMP(375) = TMP_JAMP(146) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(173) ! used 2 times + TMP_JAMP(374) = TMP_JAMP(145) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(201) ! used 2 times + TMP_JAMP(373) = TMP_JAMP(145) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(210) ! used 2 times + TMP_JAMP(372) = TMP_JAMP(144) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(228) ! used 2 times + TMP_JAMP(371) = TMP_JAMP(144) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(219) ! used 2 times + TMP_JAMP(370) = TMP_JAMP(143) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(238) ! used 2 times + TMP_JAMP(369) = TMP_JAMP(143) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(277) ! used 2 times + TMP_JAMP(368) = TMP_JAMP(142) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(127) ! used 2 times + TMP_JAMP(367) = TMP_JAMP(142) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(256) ! used 2 times + TMP_JAMP(366) = TMP_JAMP(141) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(247) ! used 2 times + TMP_JAMP(365) = TMP_JAMP(141) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(268) ! used 2 times + TMP_JAMP(364) = TMP_JAMP(137) + AMP(399) ! used 2 times + TMP_JAMP(363) = TMP_JAMP(136) - AMP(364) ! used 2 times + TMP_JAMP(362) = TMP_JAMP(126) + AMP(267) ! used 2 times + TMP_JAMP(361) = TMP_JAMP(123) + AMP(285) ! used 2 times + TMP_JAMP(360) = TMP_JAMP(113) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(125) ! used 2 times + TMP_JAMP(359) = TMP_JAMP(112) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(113) ! used 2 times + TMP_JAMP(358) = TMP_JAMP(111) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(100) ! used 2 times + TMP_JAMP(357) = TMP_JAMP(110) - TMP_JAMP(105) ! used 2 times + TMP_JAMP(356) = TMP_JAMP(109) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(124) ! used 2 times + TMP_JAMP(355) = TMP_JAMP(108) - TMP_JAMP(89) ! used 2 times + TMP_JAMP(354) = TMP_JAMP(106) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(65) ! used 2 times + TMP_JAMP(353) = TMP_JAMP(104) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(92) ! used 2 times + TMP_JAMP(352) = TMP_JAMP(103) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(74) ! used 2 times + TMP_JAMP(351) = TMP_JAMP(101) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(76) ! used 2 times + TMP_JAMP(350) = TMP_JAMP(100) + TMP_JAMP(87) ! used 2 times + TMP_JAMP(349) = TMP_JAMP(98) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(46) ! used 2 times + TMP_JAMP(348) = TMP_JAMP(96) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(59) ! used 2 times + TMP_JAMP(347) = TMP_JAMP(95) + TMP_JAMP(87) ! used 2 times + TMP_JAMP(346) = TMP_JAMP(93) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(146) ! used 2 times + TMP_JAMP(345) = TMP_JAMP(90) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(64) ! used 2 times + TMP_JAMP(344) = TMP_JAMP(77) - AMP(94) ! used 2 times + TMP_JAMP(343) = TMP_JAMP(76) - AMP(95) ! used 2 times + TMP_JAMP(342) = TMP_JAMP(64) + AMP(112) ! used 2 times + TMP_JAMP(341) = TMP_JAMP(56) - AMP(77) ! used 2 times + TMP_JAMP(340) = TMP_JAMP(52) - AMP(58) ! used 2 times + TMP_JAMP(339) = TMP_JAMP(30) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(83) ! used 2 times + TMP_JAMP(338) = TMP_JAMP(20) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(73) ! used 2 times + TMP_JAMP(337) = TMP_JAMP(19) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(55) ! used 2 times + TMP_JAMP(336) = AMP(349) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(352) ! used 2 times + TMP_JAMP(335) = AMP(352) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(444) ! used 2 times + TMP_JAMP(334) = AMP(378) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(390) ! used 2 times + TMP_JAMP(333) = AMP(380) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(390) ! used 2 times + TMP_JAMP(332) = AMP(56) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(122) ! used 2 times + TMP_JAMP(331) = AMP(326) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(390) ! used 2 times + TMP_JAMP(330) = AMP(185) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(195) ! used 2 times + TMP_JAMP(329) = AMP(195) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(240) ! used 2 times + TMP_JAMP(328) = AMP(223) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(229) ! used 2 times + TMP_JAMP(327) = AMP(226) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(229) ! used 2 times + TMP_JAMP(326) = AMP(229) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(259) ! used 2 times + TMP_JAMP(578) = TMP_JAMP(525) - TMP_JAMP(450) ! used 2 times + TMP_JAMP(577) = TMP_JAMP(519) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(370) ! used 2 times + TMP_JAMP(576) = TMP_JAMP(516) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(369) ! used 2 times + TMP_JAMP(575) = TMP_JAMP(512) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(365) ! used 2 times + TMP_JAMP(574) = TMP_JAMP(509) - TMP_JAMP(364) ! used 2 times + TMP_JAMP(573) = TMP_JAMP(494) - TMP_JAMP(361) ! used 2 times + TMP_JAMP(572) = TMP_JAMP(486) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(468) ! used 2 times + TMP_JAMP(571) = TMP_JAMP(482) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(458) ! used 2 times + TMP_JAMP(570) = TMP_JAMP(481) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(449) ! used 2 times + TMP_JAMP(569) = TMP_JAMP(477) + TMP_JAMP(475) ! used 2 times + TMP_JAMP(568) = TMP_JAMP(474) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(428) ! used 2 times + TMP_JAMP(567) = TMP_JAMP(473) + TMP_JAMP(469) ! used 2 times + TMP_JAMP(566) = TMP_JAMP(472) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(444) ! used 2 times + TMP_JAMP(565) = TMP_JAMP(471) - TMP_JAMP(459) ! used 2 times + TMP_JAMP(564) = TMP_JAMP(465) + TMP_JAMP(343) ! used 2 times + TMP_JAMP(563) = TMP_JAMP(464) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(337) ! used 2 times + TMP_JAMP(562) = TMP_JAMP(460) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(351) ! used 2 times + TMP_JAMP(561) = TMP_JAMP(457) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(355) ! used 2 times + TMP_JAMP(560) = TMP_JAMP(456) - TMP_JAMP(447) ! used 2 times + TMP_JAMP(559) = TMP_JAMP(455) + TMP_JAMP(340) ! used 2 times + TMP_JAMP(558) = TMP_JAMP(446) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(367) ! used 2 times + TMP_JAMP(557) = TMP_JAMP(445) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(366) ! used 2 times + TMP_JAMP(556) = TMP_JAMP(426) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(297) ! used 2 times + TMP_JAMP(555) = TMP_JAMP(418) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(297) ! used 2 times + TMP_JAMP(554) = TMP_JAMP(410) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(306) ! used 2 times + TMP_JAMP(553) = TMP_JAMP(408) + TMP_JAMP(183) ! used 2 times + TMP_JAMP(552) = TMP_JAMP(407) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(297) ! used 2 times + TMP_JAMP(551) = TMP_JAMP(400) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(262) ! used 2 times + TMP_JAMP(550) = TMP_JAMP(394) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(270) ! used 2 times + TMP_JAMP(549) = TMP_JAMP(393) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(270) ! used 2 times + TMP_JAMP(548) = TMP_JAMP(384) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(268) ! used 2 times + TMP_JAMP(547) = TMP_JAMP(381) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(262) ! used 2 times + TMP_JAMP(546) = TMP_JAMP(375) - TMP_JAMP(162) ! used 2 times + TMP_JAMP(545) = TMP_JAMP(368) - TMP_JAMP(326) ! used 2 times + TMP_JAMP(544) = TMP_JAMP(362) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(164) ! used 2 times + TMP_JAMP(543) = TMP_JAMP(357) + TMP_JAMP(339) ! used 2 times + TMP_JAMP(542) = TMP_JAMP(354) - TMP_JAMP(260) ! used 2 times + TMP_JAMP(541) = TMP_JAMP(353) + TMP_JAMP(252) ! used 2 times + TMP_JAMP(540) = TMP_JAMP(352) - TMP_JAMP(247) ! used 2 times + TMP_JAMP(539) = TMP_JAMP(350) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(344) ! used 2 times + TMP_JAMP(538) = TMP_JAMP(349) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(323) ! used 2 times + TMP_JAMP(537) = TMP_JAMP(348) - TMP_JAMP(248) ! used 2 times + TMP_JAMP(536) = TMP_JAMP(347) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(342) ! used 2 times + TMP_JAMP(535) = TMP_JAMP(346) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(332) ! used 2 times + TMP_JAMP(534) = TMP_JAMP(341) + TMP_JAMP(318) ! used 2 times + TMP_JAMP(533) = TMP_JAMP(338) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(319) ! used 2 times + TMP_JAMP(532) = TMP_JAMP(335) - TMP_JAMP(187) ! used 2 times + TMP_JAMP(593) = TMP_JAMP(571) + TMP_JAMP(533) ! used 2 times + TMP_JAMP(592) = TMP_JAMP(570) - TMP_JAMP(535) ! used 2 times + TMP_JAMP(591) = TMP_JAMP(569) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(543) ! used 2 times + TMP_JAMP(590) = TMP_JAMP(568) - TMP_JAMP(524) ! used 2 times + TMP_JAMP(589) = TMP_JAMP(567) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(541) ! used 2 times + TMP_JAMP(588) = TMP_JAMP(566) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(539) ! used 2 times + TMP_JAMP(587) = TMP_JAMP(565) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(540) ! used 2 times + TMP_JAMP(586) = TMP_JAMP(564) + TMP_JAMP(527) ! used 2 times + TMP_JAMP(585) = TMP_JAMP(563) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(529) ! used 2 times + TMP_JAMP(584) = TMP_JAMP(562) + TMP_JAMP(521) ! used 2 times + TMP_JAMP(583) = TMP_JAMP(561) + TMP_JAMP(534) ! used 2 times + TMP_JAMP(582) = TMP_JAMP(560) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(537) ! used 2 times + TMP_JAMP(581) = TMP_JAMP(559) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(528) ! used 2 times + TMP_JAMP(580) = TMP_JAMP(542) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(461) ! used 2 times + TMP_JAMP(579) = TMP_JAMP(538) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(448) ! used 2 times + TMP_JAMP(638) = AMP(571) + AMP(576) ! used 16 times + TMP_JAMP(637) = AMP(556) - AMP(559) ! used 16 times + TMP_JAMP(636) = AMP(555) + AMP(557) ! used 16 times + TMP_JAMP(635) = AMP(554) - AMP(574) ! used 16 times + TMP_JAMP(634) = AMP(551) + AMP(553) ! used 16 times + TMP_JAMP(633) = AMP(575) + AMP(578) ! used 16 times + TMP_JAMP(632) = AMP(521) + AMP(524) ! used 16 times + TMP_JAMP(631) = AMP(520) + AMP(523) ! used 16 times + TMP_JAMP(630) = AMP(719) + AMP(721) ! used 16 times + TMP_JAMP(629) = AMP(715) - AMP(718) ! used 16 times + TMP_JAMP(628) = AMP(714) + AMP(716) ! used 16 times + TMP_JAMP(627) = AMP(681) + AMP(684) ! used 16 times + TMP_JAMP(626) = AMP(679) + AMP(682) ! used 16 times + TMP_JAMP(625) = AMP(720) - AMP(723) ! used 16 times + TMP_JAMP(624) = AMP(710) + AMP(712) ! used 16 times + TMP_JAMP(623) = AMP(709) + AMP(711) ! used 16 times + TMP_JAMP(622) = AMP(730) + AMP(735) ! used 16 times + TMP_JAMP(621) = AMP(713) - AMP(733) ! used 16 times + TMP_JAMP(620) = AMP(734) + AMP(737) ! used 16 times + TMP_JAMP(619) = AMP(680) + AMP(683) ! used 16 times + TMP_JAMP(618) = AMP(560) + AMP(562) ! used 16 times + TMP_JAMP(617) = AMP(522) + AMP(525) ! used 16 times + TMP_JAMP(616) = AMP(561) - AMP(564) ! used 16 times + TMP_JAMP(615) = AMP(550) + AMP(552) ! used 16 times + TMP_JAMP(614) = AMP(540) + AMP(543) ! used 16 times + TMP_JAMP(613) = AMP(539) + AMP(542) ! used 16 times + TMP_JAMP(612) = AMP(699) + AMP(702) ! used 16 times + TMP_JAMP(611) = AMP(697) + AMP(700) ! used 16 times + TMP_JAMP(610) = AMP(698) + AMP(701) ! used 16 times + TMP_JAMP(609) = AMP(538) + AMP(541) ! used 16 times + TMP_JAMP(608) = AMP(893) + AMP(896) ! used 16 times + TMP_JAMP(607) = AMP(889) + AMP(894) ! used 16 times + TMP_JAMP(606) = AMP(878) + AMP(880) ! used 16 times + TMP_JAMP(605) = AMP(840) + AMP(843) ! used 16 times + TMP_JAMP(604) = AMP(839) + AMP(842) ! used 16 times + TMP_JAMP(603) = AMP(879) - AMP(882) ! used 16 times + TMP_JAMP(602) = AMP(857) + AMP(860) ! used 16 times + TMP_JAMP(601) = AMP(856) + AMP(859) ! used 16 times + TMP_JAMP(600) = AMP(873) + AMP(875) ! used 16 times + TMP_JAMP(599) = AMP(858) + AMP(861) ! used 16 times + TMP_JAMP(598) = AMP(874) - AMP(877) ! used 16 times + TMP_JAMP(597) = AMP(838) + AMP(841) ! used 16 times + TMP_JAMP(596) = AMP(872) - AMP(892) ! used 16 times + TMP_JAMP(595) = AMP(868) + AMP(870) ! used 16 times + TMP_JAMP(594) = AMP(869) + AMP(871) ! used 16 times + TMP_JAMP(680) = TMP_JAMP(638) + AMP(579) ! used 16 times + TMP_JAMP(679) = TMP_JAMP(637) - AMP(566) ! used 16 times + TMP_JAMP(678) = TMP_JAMP(636) + AMP(573) ! used 16 times + TMP_JAMP(677) = TMP_JAMP(635) - AMP(577) ! used 16 times + TMP_JAMP(676) = TMP_JAMP(634) + AMP(567) ! used 16 times + TMP_JAMP(675) = TMP_JAMP(633) - AMP(634) ! used 16 times + TMP_JAMP(674) = TMP_JAMP(632) + AMP(636) ! used 16 times + TMP_JAMP(673) = TMP_JAMP(631) + AMP(565) ! used 16 times + TMP_JAMP(672) = TMP_JAMP(630) + AMP(731) ! used 16 times + TMP_JAMP(671) = TMP_JAMP(629) - AMP(725) ! used 16 times + TMP_JAMP(670) = TMP_JAMP(628) + AMP(732) ! used 16 times + TMP_JAMP(669) = TMP_JAMP(627) + AMP(722) ! used 16 times + TMP_JAMP(668) = TMP_JAMP(626) + AMP(724) ! used 16 times + TMP_JAMP(667) = TMP_JAMP(625) - AMP(728) ! used 16 times + TMP_JAMP(666) = TMP_JAMP(624) + AMP(726) ! used 16 times + TMP_JAMP(665) = TMP_JAMP(623) + AMP(729) ! used 16 times + TMP_JAMP(664) = TMP_JAMP(622) + AMP(738) ! used 16 times + TMP_JAMP(663) = TMP_JAMP(621) - AMP(736) ! used 16 times + TMP_JAMP(662) = TMP_JAMP(620) - AMP(793) ! used 16 times + TMP_JAMP(661) = TMP_JAMP(619) + AMP(795) ! used 16 times + TMP_JAMP(660) = TMP_JAMP(618) + AMP(572) ! used 16 times + TMP_JAMP(659) = TMP_JAMP(617) + AMP(563) ! used 16 times + TMP_JAMP(658) = TMP_JAMP(616) - AMP(569) ! used 16 times + TMP_JAMP(657) = TMP_JAMP(615) + AMP(570) ! used 16 times + TMP_JAMP(656) = TMP_JAMP(614) + AMP(558) ! used 16 times + TMP_JAMP(655) = TMP_JAMP(613) + AMP(635) ! used 16 times + TMP_JAMP(654) = TMP_JAMP(612) + AMP(717) ! used 16 times + TMP_JAMP(653) = TMP_JAMP(611) + AMP(727) ! used 16 times + TMP_JAMP(652) = TMP_JAMP(610) + AMP(794) ! used 16 times + TMP_JAMP(651) = TMP_JAMP(609) + AMP(568) ! used 16 times + TMP_JAMP(650) = TMP_JAMP(607) + AMP(897) ! used 16 times + TMP_JAMP(649) = TMP_JAMP(606) + AMP(890) ! used 16 times + TMP_JAMP(648) = TMP_JAMP(605) + AMP(881) ! used 16 times + TMP_JAMP(647) = TMP_JAMP(603) - AMP(887) ! used 16 times + TMP_JAMP(646) = TMP_JAMP(601) + AMP(886) ! used 16 times + TMP_JAMP(645) = TMP_JAMP(600) + AMP(891) ! used 16 times + TMP_JAMP(644) = TMP_JAMP(599) + AMP(876) ! used 16 times + TMP_JAMP(643) = TMP_JAMP(598) - AMP(884) ! used 16 times + TMP_JAMP(642) = TMP_JAMP(597) + AMP(883) ! used 16 times + TMP_JAMP(641) = TMP_JAMP(596) - AMP(895) ! used 16 times + TMP_JAMP(640) = TMP_JAMP(595) + AMP(888) ! used 16 times + TMP_JAMP(639) = TMP_JAMP(594) + AMP(885) ! used 16 times + TMP_JAMP(835) = TMP_JAMP(680) - TMP_JAMP(678) ! used 8 times + TMP_JAMP(834) = TMP_JAMP(680) + TMP_JAMP(660) ! used 8 times + TMP_JAMP(833) = TMP_JAMP(680) + TMP_JAMP(675) ! used 8 times + TMP_JAMP(832) = TMP_JAMP(679) - TMP_JAMP(676) ! used 8 times + TMP_JAMP(831) = TMP_JAMP(679) + TMP_JAMP(678) ! used 8 times + TMP_JAMP(830) = TMP_JAMP(679) + TMP_JAMP(671) ! used 8 times + TMP_JAMP(829) = TMP_JAMP(679) - TMP_JAMP(673) ! used 8 times + TMP_JAMP(828) = TMP_JAMP(678) + TMP_JAMP(670) ! used 8 times + TMP_JAMP(827) = TMP_JAMP(678) + TMP_JAMP(660) ! used 8 times + TMP_JAMP(826) = TMP_JAMP(677) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(622) ! used 8 times + TMP_JAMP(825) = TMP_JAMP(677) + TMP_JAMP(676) ! used 8 times + TMP_JAMP(824) = TMP_JAMP(677) - TMP_JAMP(675) ! used 8 times + TMP_JAMP(823) = TMP_JAMP(677) - TMP_JAMP(657) ! used 8 times + TMP_JAMP(822) = TMP_JAMP(677) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(623) ! used 8 times + TMP_JAMP(821) = TMP_JAMP(676) - TMP_JAMP(673) ! used 8 times + TMP_JAMP(820) = TMP_JAMP(676) + TMP_JAMP(657) ! used 8 times + TMP_JAMP(819) = TMP_JAMP(675) + TMP_JAMP(674) ! used 8 times + TMP_JAMP(818) = TMP_JAMP(675) - TMP_JAMP(655) ! used 8 times + TMP_JAMP(817) = TMP_JAMP(674) + TMP_JAMP(673) ! used 8 times + TMP_JAMP(816) = TMP_JAMP(674) + TMP_JAMP(655) ! used 8 times + TMP_JAMP(815) = TMP_JAMP(672) + TMP_JAMP(670) ! used 8 times + TMP_JAMP(814) = TMP_JAMP(672) + TMP_JAMP(664) ! used 8 times + TMP_JAMP(813) = TMP_JAMP(672) + TMP_JAMP(667) ! used 8 times + TMP_JAMP(812) = TMP_JAMP(671) - TMP_JAMP(668) ! used 8 times + TMP_JAMP(811) = TMP_JAMP(671) + TMP_JAMP(670) ! used 8 times + TMP_JAMP(810) = TMP_JAMP(671) - TMP_JAMP(666) ! used 8 times + TMP_JAMP(809) = TMP_JAMP(670) - TMP_JAMP(664) ! used 8 times + TMP_JAMP(808) = TMP_JAMP(669) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(678) ! used 8 times - TMP_JAMP(917) = TMP_JAMP(369) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(756) ! used 8 times - TMP_JAMP(916) = TMP_JAMP(371) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(657) ! used 8 times - TMP_JAMP(915) = TMP_JAMP(371) - TMP_JAMP(369) ! used 8 times - TMP_JAMP(914) = TMP_JAMP(372) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1150) ! used 8 times - TMP_JAMP(913) = TMP_JAMP(373) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1062) ! used 8 times - TMP_JAMP(912) = TMP_JAMP(373) + TMP_JAMP(370) ! used 8 times - TMP_JAMP(911) = TMP_JAMP(386) + TMP_JAMP(369) ! used 8 times - TMP_JAMP(910) = TMP_JAMP(388) + TMP_JAMP(371) ! used 8 times - TMP_JAMP(909) = TMP_JAMP(398) - TMP_JAMP(277) ! used 8 times - TMP_JAMP(908) = TMP_JAMP(401) - TMP_JAMP(372) ! used 8 times - TMP_JAMP(907) = AMP(1462) - AMP(1466) ! used 8 times - TMP_JAMP(906) = AMP(771) + AMP(1580) ! used 8 times - TMP_JAMP(905) = AMP(768) - AMP(770) ! used 8 times - TMP_JAMP(904) = AMP(763) + AMP(771) ! used 8 times - TMP_JAMP(903) = AMP(751) + AMP(759) ! used 8 times - TMP_JAMP(902) = TMP_JAMP(366) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(763) ! used 8 times - TMP_JAMP(901) = TMP_JAMP(367) + TMP_JAMP(365) ! used 8 times - TMP_JAMP(900) = TMP_JAMP(368) + TMP_JAMP(365) ! used 8 times - TMP_JAMP(899) = TMP_JAMP(372) + TMP_JAMP(367) ! used 8 times - TMP_JAMP(898) = TMP_JAMP(374) + TMP_JAMP(364) ! used 8 times - TMP_JAMP(897) = TMP_JAMP(375) + TMP_JAMP(366) ! used 8 times - TMP_JAMP(896) = AMP(1225) + AMP(1591) ! used 8 times - TMP_JAMP(895) = AMP(430) - AMP(433) ! used 8 times - TMP_JAMP(894) = AMP(371) + AMP(432) ! used 8 times - TMP_JAMP(893) = AMP(359) - AMP(371) ! used 8 times - TMP_JAMP(892) = AMP(146) - AMP(1593) ! used 8 times - TMP_JAMP(891) = AMP(145) - AMP(158) ! used 8 times - TMP_JAMP(890) = TMP_JAMP(264) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(359) ! used 8 times - TMP_JAMP(889) = TMP_JAMP(362) + TMP_JAMP(360) ! used 8 times - TMP_JAMP(888) = TMP_JAMP(363) + TMP_JAMP(361) ! used 8 times - TMP_JAMP(887) = TMP_JAMP(374) + TMP_JAMP(314) ! used 8 times - TMP_JAMP(886) = TMP_JAMP(374) - TMP_JAMP(360) ! used 8 times - TMP_JAMP(885) = TMP_JAMP(376) - TMP_JAMP(362) ! used 8 times - TMP_JAMP(884) = TMP_JAMP(384) + TMP_JAMP(264) ! used 8 times - TMP_JAMP(883) = TMP_JAMP(397) + TMP_JAMP(361) ! used 8 times - TMP_JAMP(882) = TMP_JAMP(402) + TMP_JAMP(363) ! used 8 times - TMP_JAMP(881) = TMP_JAMP(403) - TMP_JAMP(383) ! used 8 times - TMP_JAMP(880) = TMP_JAMP(404) - TMP_JAMP(384) ! used 8 times - TMP_JAMP(879) = AMP(1057) - AMP(1073) ! used 8 times - TMP_JAMP(878) = AMP(1055) - AMP(1057) ! used 8 times - TMP_JAMP(877) = AMP(781) - AMP(1592) ! used 8 times - TMP_JAMP(876) = AMP(672) - AMP(1058) ! used 8 times - TMP_JAMP(875) = AMP(661) + AMP(672) ! used 8 times - TMP_JAMP(874) = TMP_JAMP(261) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(661) ! used 8 times - TMP_JAMP(873) = TMP_JAMP(358) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(781) ! used 8 times - TMP_JAMP(872) = TMP_JAMP(359) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1055) ! used 8 times - TMP_JAMP(871) = TMP_JAMP(360) + TMP_JAMP(358) ! used 8 times - TMP_JAMP(870) = TMP_JAMP(361) + TMP_JAMP(359) ! used 8 times - TMP_JAMP(869) = TMP_JAMP(364) - TMP_JAMP(358) ! used 8 times - TMP_JAMP(868) = TMP_JAMP(390) - TMP_JAMP(369) ! used 8 times - TMP_JAMP(867) = TMP_JAMP(391) + TMP_JAMP(359) ! used 8 times - TMP_JAMP(866) = TMP_JAMP(392) - TMP_JAMP(371) ! used 8 times - TMP_JAMP(865) = AMP(785) + AMP(790) ! used 8 times - TMP_JAMP(864) = AMP(676) - AMP(792) ! used 8 times - TMP_JAMP(863) = AMP(429) + AMP(431) ! used 8 times - TMP_JAMP(862) = AMP(340) - AMP(456) ! used 8 times - TMP_JAMP(861) = AMP(338) + AMP(340) ! used 8 times - TMP_JAMP(860) = TMP_JAMP(355) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(676) ! used 8 times - TMP_JAMP(859) = TMP_JAMP(355) - TMP_JAMP(354) ! used 8 times - TMP_JAMP(858) = TMP_JAMP(356) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(338) ! used 8 times - TMP_JAMP(857) = TMP_JAMP(357) + TMP_JAMP(354) ! used 8 times - TMP_JAMP(856) = TMP_JAMP(357) + TMP_JAMP(356) ! used 8 times - TMP_JAMP(855) = TMP_JAMP(362) - TMP_JAMP(358) ! used 8 times - TMP_JAMP(854) = TMP_JAMP(376) - TMP_JAMP(364) ! used 8 times - TMP_JAMP(853) = TMP_JAMP(399) + TMP_JAMP(277) ! used 8 times - TMP_JAMP(852) = TMP_JAMP(400) + TMP_JAMP(356) ! used 8 times - TMP_JAMP(851) = AMP(1473) + AMP(1474) ! used 8 times - TMP_JAMP(850) = AMP(1134) + AMP(1135) ! used 8 times - TMP_JAMP(849) = AMP(1129) + AMP(1137) ! used 8 times - TMP_JAMP(848) = AMP(123) - AMP(1476) ! used 8 times - TMP_JAMP(847) = AMP(120) + AMP(1527) ! used 8 times - TMP_JAMP(846) = AMP(118) + AMP(123) ! used 8 times - TMP_JAMP(845) = AMP(114) + AMP(120) ! used 8 times - TMP_JAMP(844) = TMP_JAMP(254) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(118) ! used 8 times - TMP_JAMP(843) = TMP_JAMP(350) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(114) ! used 8 times - TMP_JAMP(842) = TMP_JAMP(351) + TMP_JAMP(350) ! used 8 times - TMP_JAMP(841) = TMP_JAMP(353) + TMP_JAMP(352) ! used 8 times - TMP_JAMP(840) = TMP_JAMP(354) + TMP_JAMP(353) ! used 8 times - TMP_JAMP(839) = TMP_JAMP(355) - TMP_JAMP(352) ! used 8 times - TMP_JAMP(838) = TMP_JAMP(360) + TMP_JAMP(254) ! used 8 times - TMP_JAMP(837) = TMP_JAMP(394) + TMP_JAMP(350) ! used 8 times - TMP_JAMP(836) = TMP_JAMP(398) + TMP_JAMP(351) ! used 8 times - TMP_JAMP(835) = AMP(1228) + AMP(1720) ! used 8 times - TMP_JAMP(834) = AMP(1053) + AMP(1059) ! used 8 times - TMP_JAMP(833) = AMP(519) - AMP(1722) ! used 8 times - TMP_JAMP(832) = AMP(503) + AMP(1716) ! used 8 times - TMP_JAMP(831) = AMP(500) + AMP(503) ! used 8 times - TMP_JAMP(830) = TMP_JAMP(346) + ((0.000000000000000D+00 + TMP_JAMP(807) = TMP_JAMP(669) - TMP_JAMP(668) ! used 8 times + TMP_JAMP(806) = TMP_JAMP(669) - TMP_JAMP(667) ! used 8 times + TMP_JAMP(805) = TMP_JAMP(669) + TMP_JAMP(661) ! used 8 times + TMP_JAMP(804) = TMP_JAMP(669) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(677) ! used 8 times + TMP_JAMP(803) = TMP_JAMP(668) - TMP_JAMP(666) ! used 8 times + TMP_JAMP(802) = TMP_JAMP(668) + TMP_JAMP(661) ! used 8 times + TMP_JAMP(801) = TMP_JAMP(667) - TMP_JAMP(665) ! used 8 times + TMP_JAMP(800) = TMP_JAMP(667) + TMP_JAMP(647) ! used 8 times + TMP_JAMP(799) = TMP_JAMP(667) - TMP_JAMP(653) ! used 8 times + TMP_JAMP(798) = TMP_JAMP(666) + TMP_JAMP(665) ! used 8 times + TMP_JAMP(797) = TMP_JAMP(665) - TMP_JAMP(653) ! used 8 times + TMP_JAMP(796) = TMP_JAMP(664) + TMP_JAMP(662) ! used 8 times + TMP_JAMP(795) = TMP_JAMP(663) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(781) ! used 8 times + TMP_JAMP(794) = TMP_JAMP(663) - TMP_JAMP(662) ! used 8 times + TMP_JAMP(793) = TMP_JAMP(663) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(782) ! used 8 times + TMP_JAMP(792) = TMP_JAMP(662) + TMP_JAMP(661) ! used 8 times + TMP_JAMP(791) = TMP_JAMP(662) - TMP_JAMP(652) ! used 8 times + TMP_JAMP(790) = TMP_JAMP(661) + TMP_JAMP(652) ! used 8 times + TMP_JAMP(789) = TMP_JAMP(660) + TMP_JAMP(658) ! used 8 times + TMP_JAMP(788) = TMP_JAMP(659) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(519) ! used 8 times - TMP_JAMP(829) = TMP_JAMP(347) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(500) ! used 8 times - TMP_JAMP(828) = TMP_JAMP(347) + TMP_JAMP(346) ! used 8 times - TMP_JAMP(827) = TMP_JAMP(348) + TMP_JAMP(346) ! used 8 times - TMP_JAMP(826) = TMP_JAMP(349) - TMP_JAMP(261) ! used 8 times - TMP_JAMP(825) = TMP_JAMP(349) + TMP_JAMP(347) ! used 8 times - TMP_JAMP(824) = TMP_JAMP(349) + TMP_JAMP(348) ! used 8 times - TMP_JAMP(823) = TMP_JAMP(362) - TMP_JAMP(348) ! used 8 times - TMP_JAMP(822) = TMP_JAMP(386) + TMP_JAMP(292) ! used 8 times - TMP_JAMP(821) = TMP_JAMP(388) + TMP_JAMP(347) ! used 8 times - TMP_JAMP(820) = AMP(1471) - AMP(1475) ! used 8 times - TMP_JAMP(819) = AMP(612) + AMP(1526) ! used 8 times - TMP_JAMP(818) = AMP(609) - AMP(611) ! used 8 times - TMP_JAMP(817) = AMP(604) + AMP(612) ! used 8 times - TMP_JAMP(816) = TMP_JAMP(344) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(604) ! used 8 times - TMP_JAMP(815) = TMP_JAMP(345) + TMP_JAMP(343) ! used 8 times - TMP_JAMP(814) = TMP_JAMP(346) + TMP_JAMP(343) ! used 8 times - TMP_JAMP(813) = TMP_JAMP(348) + TMP_JAMP(345) ! used 8 times - TMP_JAMP(812) = TMP_JAMP(350) + TMP_JAMP(344) ! used 8 times - TMP_JAMP(811) = TMP_JAMP(387) + TMP_JAMP(344) ! used 8 times - TMP_JAMP(810) = AMP(1305) + AMP(1669) ! used 8 times - TMP_JAMP(809) = AMP(536) - AMP(1671) ! used 8 times - TMP_JAMP(808) = AMP(535) - AMP(632) ! used 8 times - TMP_JAMP(807) = AMP(389) + AMP(423) ! used 8 times - TMP_JAMP(806) = AMP(377) - AMP(389) ! used 8 times - TMP_JAMP(805) = AMP(374) + AMP(376) ! used 8 times - TMP_JAMP(804) = TMP_JAMP(240) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(377) ! used 8 times - TMP_JAMP(803) = TMP_JAMP(341) - TMP_JAMP(340) ! used 8 times - TMP_JAMP(802) = TMP_JAMP(342) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1305) ! used 8 times - TMP_JAMP(801) = TMP_JAMP(342) + TMP_JAMP(339) ! used 8 times - TMP_JAMP(800) = TMP_JAMP(381) + TMP_JAMP(340) ! used 8 times - TMP_JAMP(799) = TMP_JAMP(382) + TMP_JAMP(341) ! used 8 times - TMP_JAMP(798) = TMP_JAMP(386) + TMP_JAMP(339) ! used 8 times - TMP_JAMP(797) = TMP_JAMP(390) + TMP_JAMP(342) ! used 8 times - TMP_JAMP(796) = TMP_JAMP(403) - TMP_JAMP(390) ! used 8 times - TMP_JAMP(795) = TMP_JAMP(404) + TMP_JAMP(240) ! used 8 times - TMP_JAMP(794) = AMP(1291) - AMP(1886) ! used 8 times - TMP_JAMP(793) = AMP(1290) + AMP(1291) ! used 8 times - TMP_JAMP(792) = AMP(769) - AMP(1292) ! used 8 times - TMP_JAMP(791) = AMP(706) + AMP(769) ! used 8 times - TMP_JAMP(790) = AMP(695) - AMP(1670) ! used 8 times - TMP_JAMP(789) = TMP_JAMP(237) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(706) ! used 8 times - TMP_JAMP(788) = TMP_JAMP(338) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1290) ! used 8 times - TMP_JAMP(787) = TMP_JAMP(339) + TMP_JAMP(337) ! used 8 times - TMP_JAMP(786) = TMP_JAMP(340) - TMP_JAMP(338) ! used 8 times - TMP_JAMP(785) = TMP_JAMP(369) + TMP_JAMP(337) ! used 8 times - TMP_JAMP(784) = TMP_JAMP(377) - TMP_JAMP(366) ! used 8 times - TMP_JAMP(783) = TMP_JAMP(378) + TMP_JAMP(338) ! used 8 times - TMP_JAMP(782) = AMP(694) - AMP(791) ! used 8 times - TMP_JAMP(781) = AMP(373) + AMP(375) ! used 8 times - TMP_JAMP(780) = TMP_JAMP(342) - TMP_JAMP(337) ! used 8 times - TMP_JAMP(779) = TMP_JAMP(355) + TMP_JAMP(335) ! used 8 times - TMP_JAMP(778) = TMP_JAMP(356) + TMP_JAMP(336) ! used 8 times - TMP_JAMP(777) = AMP(1300) - AMP(1859) ! used 8 times - TMP_JAMP(776) = AMP(1299) + AMP(1300) ! used 8 times - TMP_JAMP(775) = AMP(610) - AMP(1301) ! used 8 times - TMP_JAMP(774) = AMP(547) + AMP(610) ! used 8 times - TMP_JAMP(773) = TMP_JAMP(233) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(547) ! used 8 times - TMP_JAMP(772) = TMP_JAMP(334) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1299) ! used 8 times - TMP_JAMP(771) = TMP_JAMP(335) - TMP_JAMP(334) ! used 8 times - TMP_JAMP(770) = TMP_JAMP(339) - TMP_JAMP(233) ! used 8 times - TMP_JAMP(769) = TMP_JAMP(344) - TMP_JAMP(233) ! used 8 times - TMP_JAMP(768) = TMP_JAMP(351) - TMP_JAMP(344) ! used 8 times - TMP_JAMP(767) = TMP_JAMP(352) + TMP_JAMP(334) ! used 8 times - TMP_JAMP(766) = AMP(1304) + AMP(1642) ! used 8 times - TMP_JAMP(765) = AMP(1285) + AMP(1293) ! used 8 times - TMP_JAMP(764) = AMP(141) - AMP(1644) ! used 8 times - TMP_JAMP(763) = AMP(107) + AMP(1635) ! used 8 times - TMP_JAMP(762) = AMP(104) + AMP(107) ! used 8 times - TMP_JAMP(761) = TMP_JAMP(330) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(104) ! used 8 times - TMP_JAMP(760) = TMP_JAMP(331) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(141) ! used 8 times - TMP_JAMP(759) = TMP_JAMP(331) + TMP_JAMP(314) ! used 8 times - TMP_JAMP(758) = TMP_JAMP(332) - TMP_JAMP(237) ! used 8 times - TMP_JAMP(757) = TMP_JAMP(333) + TMP_JAMP(332) ! used 8 times - TMP_JAMP(756) = TMP_JAMP(342) - TMP_JAMP(333) ! used 8 times - TMP_JAMP(755) = TMP_JAMP(375) + TMP_JAMP(330) ! used 8 times - TMP_JAMP(754) = AMP(1302) - AMP(1639) ! used 8 times - TMP_JAMP(753) = AMP(1294) + AMP(1302) ! used 8 times - TMP_JAMP(752) = AMP(116) + AMP(119) ! used 8 times - TMP_JAMP(751) = TMP_JAMP(329) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1294) ! used 8 times - TMP_JAMP(750) = TMP_JAMP(329) - TMP_JAMP(233) ! used 8 times - TMP_JAMP(749) = TMP_JAMP(333) - TMP_JAMP(329) ! used 8 times - TMP_JAMP(748) = TMP_JAMP(350) + TMP_JAMP(328) ! used 8 times - TMP_JAMP(747) = AMP(942) + AMP(946) ! used 8 times - TMP_JAMP(746) = AMP(837) - AMP(1775) ! used 8 times - TMP_JAMP(745) = AMP(833) - AMP(948) ! used 8 times - TMP_JAMP(744) = AMP(831) - AMP(1049) ! used 8 times - TMP_JAMP(743) = AMP(821) + AMP(1769) ! used 8 times - TMP_JAMP(742) = AMP(820) + AMP(831) ! used 8 times - TMP_JAMP(741) = AMP(818) + AMP(821) ! used 8 times - TMP_JAMP(740) = TMP_JAMP(223) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(820) ! used 8 times - TMP_JAMP(739) = TMP_JAMP(325) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(818) ! used 8 times - TMP_JAMP(738) = TMP_JAMP(325) + TMP_JAMP(324) ! used 8 times - TMP_JAMP(737) = TMP_JAMP(327) - TMP_JAMP(326) ! used 8 times - TMP_JAMP(736) = TMP_JAMP(368) + TMP_JAMP(324) ! used 8 times - TMP_JAMP(735) = TMP_JAMP(370) + TMP_JAMP(325) ! used 8 times - TMP_JAMP(734) = TMP_JAMP(392) - TMP_JAMP(223) ! used 8 times - TMP_JAMP(733) = TMP_JAMP(399) - TMP_JAMP(327) ! used 8 times - TMP_JAMP(732) = TMP_JAMP(403) + TMP_JAMP(326) ! used 8 times - TMP_JAMP(731) = AMP(929) + AMP(1805) ! used 8 times - TMP_JAMP(730) = AMP(928) - AMP(1214) ! used 8 times - TMP_JAMP(729) = AMP(927) - AMP(929) ! used 8 times - TMP_JAMP(728) = AMP(865) + AMP(928) ! used 8 times - TMP_JAMP(727) = AMP(851) - AMP(947) ! used 8 times - TMP_JAMP(726) = TMP_JAMP(220) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(865) ! used 8 times - TMP_JAMP(725) = TMP_JAMP(322) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(927) ! used 8 times - TMP_JAMP(724) = TMP_JAMP(324) + TMP_JAMP(322) ! used 8 times - TMP_JAMP(723) = TMP_JAMP(327) + TMP_JAMP(323) ! used 8 times - TMP_JAMP(722) = TMP_JAMP(365) + TMP_JAMP(322) ! used 8 times - TMP_JAMP(721) = TMP_JAMP(377) - TMP_JAMP(220) ! used 8 times - TMP_JAMP(720) = TMP_JAMP(383) - TMP_JAMP(323) ! used 8 times - TMP_JAMP(719) = AMP(855) - AMP(1721) ! used 8 times - TMP_JAMP(718) = AMP(816) + AMP(822) ! used 8 times - TMP_JAMP(717) = TMP_JAMP(326) + TMP_JAMP(323) ! used 8 times - TMP_JAMP(716) = TMP_JAMP(346) + TMP_JAMP(320) ! used 8 times - TMP_JAMP(715) = TMP_JAMP(347) + TMP_JAMP(321) ! used 8 times - TMP_JAMP(714) = AMP(938) + AMP(1751) ! used 8 times - TMP_JAMP(713) = AMP(936) - AMP(938) ! used 8 times - TMP_JAMP(712) = AMP(847) + AMP(937) ! used 8 times - TMP_JAMP(711) = TMP_JAMP(319) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(936) ! used 8 times - TMP_JAMP(710) = TMP_JAMP(320) + TMP_JAMP(319) ! used 8 times - TMP_JAMP(709) = TMP_JAMP(343) + TMP_JAMP(319) ! used 8 times - TMP_JAMP(708) = TMP_JAMP(351) - TMP_JAMP(215) ! used 8 times - TMP_JAMP(707) = AMP(941) - AMP(1643) ! used 8 times - TMP_JAMP(706) = AMP(930) + AMP(1634) ! used 8 times - TMP_JAMP(705) = AMP(922) + AMP(930) ! used 8 times - TMP_JAMP(704) = TMP_JAMP(317) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(922) ! used 8 times - TMP_JAMP(703) = TMP_JAMP(330) + TMP_JAMP(317) ! used 8 times - TMP_JAMP(702) = TMP_JAMP(331) + TMP_JAMP(318) ! used 8 times - TMP_JAMP(701) = AMP(939) + AMP(1640) ! used 8 times - TMP_JAMP(700) = AMP(931) + AMP(939) ! used 8 times - TMP_JAMP(699) = TMP_JAMP(316) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(931) ! used 8 times - TMP_JAMP(698) = TMP_JAMP(318) - TMP_JAMP(316) ! used 8 times - TMP_JAMP(697) = TMP_JAMP(328) + TMP_JAMP(316) ! used 8 times - TMP_JAMP(696) = AMP(1435) + AMP(1852) ! used 8 times - TMP_JAMP(695) = AMP(1434) + AMP(1435) ! used 8 times - TMP_JAMP(694) = AMP(1113) - AMP(1507) ! used 8 times - TMP_JAMP(693) = AMP(1111) - AMP(1854) ! used 8 times - TMP_JAMP(692) = AMP(1110) + AMP(1111) ! used 8 times - TMP_JAMP(691) = AMP(1105) + AMP(1113) ! used 8 times - TMP_JAMP(690) = AMP(81) - AMP(1437) ! used 8 times - TMP_JAMP(689) = AMP(72) + AMP(1509) ! used 8 times - TMP_JAMP(688) = AMP(70) + AMP(81) ! used 8 times - TMP_JAMP(687) = AMP(66) + AMP(72) ! used 8 times - TMP_JAMP(686) = TMP_JAMP(254) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(70) ! used 8 times - TMP_JAMP(685) = TMP_JAMP(350) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(66) ! used 8 times - TMP_JAMP(684) = TMP_JAMP(351) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1105) ! used 8 times - TMP_JAMP(683) = TMP_JAMP(352) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1110) ! used 8 times - TMP_JAMP(682) = TMP_JAMP(353) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1434) ! used 8 times - TMP_JAMP(681) = TMP_JAMP(356) - TMP_JAMP(352) ! used 8 times - TMP_JAMP(680) = TMP_JAMP(357) - TMP_JAMP(353) ! used 8 times - TMP_JAMP(679) = TMP_JAMP(395) + TMP_JAMP(350) ! used 8 times - TMP_JAMP(678) = TMP_JAMP(396) + TMP_JAMP(361) ! used 8 times - TMP_JAMP(677) = TMP_JAMP(401) + TMP_JAMP(351) ! used 8 times - TMP_JAMP(676) = TMP_JAMP(405) - TMP_JAMP(363) ! used 8 times - TMP_JAMP(675) = AMP(1436) - AMP(1744) ! used 8 times - TMP_JAMP(674) = AMP(1432) - AMP(1436) ! used 8 times - TMP_JAMP(673) = AMP(588) + AMP(1508) ! used 8 times - TMP_JAMP(672) = AMP(587) + AMP(1746) ! used 8 times - TMP_JAMP(671) = AMP(585) - AMP(587) ! used 8 times - TMP_JAMP(670) = AMP(580) + AMP(588) ! used 8 times - TMP_JAMP(669) = TMP_JAMP(343) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(585) ! used 8 times - TMP_JAMP(668) = TMP_JAMP(344) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(580) ! used 8 times - TMP_JAMP(667) = TMP_JAMP(345) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1432) ! used 8 times - TMP_JAMP(666) = TMP_JAMP(347) - TMP_JAMP(343) ! used 8 times - TMP_JAMP(665) = TMP_JAMP(349) - TMP_JAMP(345) ! used 8 times - TMP_JAMP(664) = TMP_JAMP(389) + TMP_JAMP(344) ! used 8 times - TMP_JAMP(663) = TMP_JAMP(393) - TMP_JAMP(359) ! used 8 times - TMP_JAMP(662) = AMP(1279) - AMP(1853) ! used 8 times - TMP_JAMP(661) = AMP(1278) + AMP(1279) ! used 8 times - TMP_JAMP(660) = AMP(586) - AMP(1280) ! used 8 times - TMP_JAMP(659) = AMP(544) + AMP(586) ! used 8 times - TMP_JAMP(658) = TMP_JAMP(233) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(544) ! used 8 times - TMP_JAMP(657) = TMP_JAMP(334) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1278) ! used 8 times - TMP_JAMP(656) = TMP_JAMP(336) - TMP_JAMP(334) ! used 8 times - TMP_JAMP(655) = TMP_JAMP(380) + TMP_JAMP(340) ! used 8 times - TMP_JAMP(654) = TMP_JAMP(385) - TMP_JAMP(341) ! used 8 times - TMP_JAMP(653) = AMP(1281) - AMP(1624) ! used 8 times - TMP_JAMP(652) = AMP(1273) + AMP(1281) ! used 8 times - TMP_JAMP(651) = AMP(71) + AMP(1626) ! used 8 times - TMP_JAMP(650) = AMP(68) + AMP(71) ! used 8 times - TMP_JAMP(649) = TMP_JAMP(328) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(68) ! used 8 times - TMP_JAMP(648) = TMP_JAMP(329) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1273) ! used 8 times - TMP_JAMP(647) = TMP_JAMP(330) + TMP_JAMP(283) ! used 8 times - TMP_JAMP(646) = TMP_JAMP(330) + TMP_JAMP(328) ! used 8 times - TMP_JAMP(645) = TMP_JAMP(332) + TMP_JAMP(329) ! used 8 times - TMP_JAMP(644) = TMP_JAMP(379) - TMP_JAMP(338) ! used 8 times - TMP_JAMP(643) = AMP(917) + AMP(1745) ! used 8 times - TMP_JAMP(642) = AMP(916) - AMP(1112) ! used 8 times - TMP_JAMP(641) = AMP(915) - AMP(917) ! used 8 times - TMP_JAMP(640) = AMP(844) + AMP(916) ! used 8 times - TMP_JAMP(639) = TMP_JAMP(215) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(844) ! used 8 times - TMP_JAMP(638) = TMP_JAMP(319) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(915) ! used 8 times - TMP_JAMP(637) = TMP_JAMP(321) - TMP_JAMP(319) ! used 8 times - TMP_JAMP(636) = TMP_JAMP(372) - TMP_JAMP(324) ! used 8 times - TMP_JAMP(635) = TMP_JAMP(373) - TMP_JAMP(325) ! used 8 times - TMP_JAMP(634) = AMP(918) + AMP(1625) ! used 8 times - TMP_JAMP(633) = AMP(910) + AMP(918) ! used 8 times - TMP_JAMP(632) = TMP_JAMP(316) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(910) ! used 8 times - TMP_JAMP(631) = TMP_JAMP(317) + TMP_JAMP(316) ! used 8 times - TMP_JAMP(630) = TMP_JAMP(367) - TMP_JAMP(322) ! used 8 times - TMP_JAMP(629) = AMP(1303) + AMP(1645) ! used 8 times - TMP_JAMP(628) = AMP(140) - AMP(1647) ! used 8 times - TMP_JAMP(627) = TMP_JAMP(328) + TMP_JAMP(254) ! used 8 times - TMP_JAMP(626) = TMP_JAMP(331) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(140) ! used 8 times - TMP_JAMP(625) = TMP_JAMP(333) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1303) ! used 8 times - TMP_JAMP(624) = TMP_JAMP(353) - TMP_JAMP(334) ! used 8 times - TMP_JAMP(623) = TMP_JAMP(357) - TMP_JAMP(336) ! used 8 times - TMP_JAMP(622) = TMP_JAMP(397) + TMP_JAMP(331) ! used 8 times - TMP_JAMP(621) = TMP_JAMP(402) + TMP_JAMP(240) ! used 8 times - TMP_JAMP(620) = AMP(940) - AMP(1646) ! used 8 times - TMP_JAMP(619) = TMP_JAMP(318) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(940) ! used 8 times - TMP_JAMP(618) = TMP_JAMP(345) - TMP_JAMP(319) ! used 8 times - TMP_JAMP(617) = TMP_JAMP(349) - TMP_JAMP(321) ! used 8 times - TMP_JAMP(616) = TMP_JAMP(391) - TMP_JAMP(223) ! used 8 times - TMP_JAMP(615) = AMP(944) + AMP(949) ! used 8 times - TMP_JAMP(614) = AMP(835) - AMP(951) ! used 8 times - TMP_JAMP(613) = TMP_JAMP(326) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(944) ! used 8 times - TMP_JAMP(612) = TMP_JAMP(327) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(835) ! used 8 times - TMP_JAMP(611) = TMP_JAMP(329) - TMP_JAMP(316) ! used 8 times - TMP_JAMP(610) = TMP_JAMP(333) - TMP_JAMP(318) ! used 8 times - TMP_JAMP(609) = TMP_JAMP(352) + TMP_JAMP(215) ! used 8 times - TMP_JAMP(608) = TMP_JAMP(400) - TMP_JAMP(327) ! used 8 times - TMP_JAMP(607) = TMP_JAMP(404) + TMP_JAMP(326) ! used 8 times - TMP_JAMP(606) = AMP(1483) + AMP(1831) ! used 8 times - TMP_JAMP(605) = AMP(1146) - AMP(1531) ! used 8 times - TMP_JAMP(604) = AMP(1144) - AMP(1833) ! used 8 times - TMP_JAMP(603) = AMP(135) - AMP(1485) ! used 8 times - TMP_JAMP(602) = AMP(132) + AMP(1533) ! used 8 times - TMP_JAMP(601) = TMP_JAMP(314) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(130) ! used 8 times - TMP_JAMP(600) = TMP_JAMP(394) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(126) ! used 8 times - TMP_JAMP(599) = TMP_JAMP(398) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1138) ! used 8 times - TMP_JAMP(598) = TMP_JAMP(399) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1143) ! used 8 times - TMP_JAMP(597) = TMP_JAMP(403) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1482) ! used 8 times - TMP_JAMP(596) = AMP(1306) + AMP(1666) ! used 8 times - TMP_JAMP(595) = AMP(537) - AMP(1668) ! used 8 times - TMP_JAMP(594) = TMP_JAMP(339) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(537) ! used 8 times - TMP_JAMP(593) = TMP_JAMP(342) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1306) ! used 8 times - TMP_JAMP(592) = TMP_JAMP(343) + TMP_JAMP(233) ! used 8 times - TMP_JAMP(591) = TMP_JAMP(388) + TMP_JAMP(339) ! used 8 times - TMP_JAMP(590) = TMP_JAMP(392) + TMP_JAMP(342) ! used 8 times - TMP_JAMP(589) = AMP(1484) - AMP(1696) ! used 8 times - TMP_JAMP(588) = AMP(621) + AMP(1532) ! used 8 times - TMP_JAMP(587) = AMP(620) + AMP(1698) ! used 8 times - TMP_JAMP(586) = TMP_JAMP(386) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(618) ! used 8 times - TMP_JAMP(585) = TMP_JAMP(387) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(613) ! used 8 times - TMP_JAMP(584) = TMP_JAMP(390) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1480) ! used 8 times - TMP_JAMP(583) = AMP(1227) + AMP(1723) ! used 8 times - TMP_JAMP(582) = AMP(518) - AMP(1725) ! used 8 times - TMP_JAMP(581) = TMP_JAMP(346) + ((-0.000000000000000D+00, + TMP_JAMP(787) = TMP_JAMP(659) - TMP_JAMP(658) ! used 8 times + TMP_JAMP(786) = TMP_JAMP(659) + ((-0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(518) ! used 8 times - TMP_JAMP(580) = TMP_JAMP(348) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1227) ! used 8 times - TMP_JAMP(579) = TMP_JAMP(353) - TMP_JAMP(345) ! used 8 times - TMP_JAMP(578) = TMP_JAMP(381) + TMP_JAMP(346) ! used 8 times - TMP_JAMP(577) = TMP_JAMP(382) - TMP_JAMP(264) ! used 8 times - TMP_JAMP(576) = AMP(854) - AMP(1724) ! used 8 times - TMP_JAMP(575) = TMP_JAMP(320) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(854) ! used 8 times - TMP_JAMP(574) = TMP_JAMP(332) - TMP_JAMP(317) ! used 8 times - TMP_JAMP(573) = TMP_JAMP(378) + TMP_JAMP(220) ! used 8 times - TMP_JAMP(572) = AMP(853) - AMP(950) ! used 8 times - TMP_JAMP(571) = TMP_JAMP(323) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(853) ! used 8 times - TMP_JAMP(570) = TMP_JAMP(348) - TMP_JAMP(320) ! used 8 times - TMP_JAMP(569) = TMP_JAMP(384) - TMP_JAMP(323) ! used 8 times - TMP_JAMP(568) = AMP(1222) - AMP(1832) ! used 8 times - TMP_JAMP(567) = AMP(619) - AMP(1223) ! used 8 times - TMP_JAMP(566) = TMP_JAMP(292) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(529) ! used 8 times - TMP_JAMP(565) = TMP_JAMP(383) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1221) ! used 8 times - TMP_JAMP(564) = AMP(1226) + AMP(1588) ! used 8 times - TMP_JAMP(563) = AMP(147) - AMP(1590) ! used 8 times - TMP_JAMP(562) = TMP_JAMP(360) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(147) ! used 8 times - TMP_JAMP(561) = TMP_JAMP(362) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1226) ! used 8 times - TMP_JAMP(560) = TMP_JAMP(375) + TMP_JAMP(360) ! used 8 times - TMP_JAMP(559) = TMP_JAMP(377) + TMP_JAMP(362) ! used 8 times - TMP_JAMP(558) = AMP(1224) - AMP(1585) ! used 8 times - TMP_JAMP(557) = AMP(131) + AMP(1587) ! used 8 times - TMP_JAMP(556) = TMP_JAMP(374) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(128) ! used 8 times - TMP_JAMP(555) = TMP_JAMP(376) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1216) ! used 8 times - TMP_JAMP(554) = AMP(783) + AMP(787) ! used 8 times - TMP_JAMP(553) = AMP(674) - AMP(789) ! used 8 times - TMP_JAMP(552) = TMP_JAMP(354) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(783) ! used 8 times - TMP_JAMP(551) = TMP_JAMP(355) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(674) ! used 8 times - TMP_JAMP(550) = TMP_JAMP(368) + TMP_JAMP(355) ! used 8 times - TMP_JAMP(549) = TMP_JAMP(370) + TMP_JAMP(261) ! used 8 times - TMP_JAMP(548) = AMP(692) - AMP(788) ! used 8 times - TMP_JAMP(547) = TMP_JAMP(335) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(692) ! used 8 times - TMP_JAMP(546) = TMP_JAMP(365) + TMP_JAMP(237) ! used 8 times - TMP_JAMP(545) = AMP(696) - AMP(1667) ! used 8 times - TMP_JAMP(544) = TMP_JAMP(337) + ((-0.000000000000000D+00 + TMP_JAMP(785) = TMP_JAMP(658) - TMP_JAMP(657) ! used 8 times + TMP_JAMP(784) = TMP_JAMP(658) + TMP_JAMP(643) ! used 8 times + TMP_JAMP(783) = TMP_JAMP(658) - TMP_JAMP(651) ! used 8 times + TMP_JAMP(782) = TMP_JAMP(657) - TMP_JAMP(651) ! used 8 times + TMP_JAMP(781) = TMP_JAMP(656) + TMP_JAMP(655) ! used 8 times + TMP_JAMP(780) = TMP_JAMP(656) - TMP_JAMP(651) ! used 8 times + TMP_JAMP(779) = TMP_JAMP(656) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(537) ! used 8 times + TMP_JAMP(778) = TMP_JAMP(655) + TMP_JAMP(651) ! used 8 times + TMP_JAMP(777) = TMP_JAMP(654) - TMP_JAMP(653) ! used 8 times + TMP_JAMP(776) = TMP_JAMP(654) + TMP_JAMP(652) ! used 8 times + TMP_JAMP(775) = TMP_JAMP(654) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(696) ! used 8 times - TMP_JAMP(543) = TMP_JAMP(354) + TMP_JAMP(335) ! used 8 times - TMP_JAMP(542) = TMP_JAMP(371) + TMP_JAMP(337) ! used 8 times - TMP_JAMP(541) = AMP(779) + AMP(1697) ! used 8 times - TMP_JAMP(540) = AMP(778) - AMP(1145) ! used 8 times - TMP_JAMP(539) = TMP_JAMP(277) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(688) ! used 8 times - TMP_JAMP(538) = TMP_JAMP(369) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(777) ! used 8 times - TMP_JAMP(537) = AMP(782) - AMP(1589) ! used 8 times - TMP_JAMP(536) = TMP_JAMP(358) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(782) ! used 8 times - TMP_JAMP(535) = TMP_JAMP(366) + TMP_JAMP(358) ! used 8 times - TMP_JAMP(534) = AMP(780) + AMP(1586) ! used 8 times - TMP_JAMP(533) = TMP_JAMP(364) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(772) ! used 8 times - TMP_JAMP(532) = AMP(1414) + AMP(1879) ! used 8 times - TMP_JAMP(531) = AMP(1413) + AMP(1414) ! used 8 times - TMP_JAMP(530) = AMP(1191) - AMP(1561) ! used 8 times - TMP_JAMP(529) = AMP(1189) - AMP(1881) ! used 8 times - TMP_JAMP(528) = AMP(1188) + AMP(1189) ! used 8 times - TMP_JAMP(527) = AMP(63) - AMP(1416) ! used 8 times - TMP_JAMP(526) = AMP(54) + AMP(1563) ! used 8 times - TMP_JAMP(525) = AMP(52) + AMP(63) ! used 8 times - TMP_JAMP(524) = TMP_JAMP(283) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(52) ! used 8 times - TMP_JAMP(523) = TMP_JAMP(375) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(48) ! used 8 times - TMP_JAMP(522) = TMP_JAMP(377) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1183) ! used 8 times - TMP_JAMP(521) = TMP_JAMP(378) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1188) ! used 8 times - TMP_JAMP(520) = TMP_JAMP(379) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1413) ! used 8 times - TMP_JAMP(519) = TMP_JAMP(382) - TMP_JAMP(378) ! used 8 times - TMP_JAMP(518) = TMP_JAMP(396) - TMP_JAMP(283) ! used 8 times - TMP_JAMP(517) = TMP_JAMP(405) + TMP_JAMP(385) ! used 8 times - TMP_JAMP(516) = AMP(1415) - AMP(1798) ! used 8 times - TMP_JAMP(515) = AMP(1411) - AMP(1415) ! used 8 times - TMP_JAMP(514) = AMP(747) + AMP(1562) ! used 8 times - TMP_JAMP(513) = AMP(746) + AMP(1800) ! used 8 times - TMP_JAMP(512) = AMP(744) - AMP(746) ! used 8 times - TMP_JAMP(511) = TMP_JAMP(365) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(744) ! used 8 times - TMP_JAMP(510) = TMP_JAMP(366) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(739) ! used 8 times - TMP_JAMP(509) = TMP_JAMP(367) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1411) ! used 8 times - TMP_JAMP(508) = TMP_JAMP(370) - TMP_JAMP(365) ! used 8 times - TMP_JAMP(507) = TMP_JAMP(393) + TMP_JAMP(373) ! used 8 times - TMP_JAMP(506) = AMP(1267) - AMP(1880) ! used 8 times - TMP_JAMP(505) = AMP(1266) + AMP(1267) ! used 8 times - TMP_JAMP(504) = AMP(745) - AMP(1268) ! used 8 times - TMP_JAMP(503) = TMP_JAMP(237) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(703) ! used 8 times - TMP_JAMP(502) = TMP_JAMP(338) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1266) ! used 8 times - TMP_JAMP(501) = TMP_JAMP(341) - TMP_JAMP(338) ! used 8 times - TMP_JAMP(500) = AMP(1269) - AMP(1615) ! used 8 times - TMP_JAMP(499) = AMP(53) + AMP(1617) ! used 8 times - TMP_JAMP(498) = TMP_JAMP(330) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(50) ! used 8 times - TMP_JAMP(497) = TMP_JAMP(332) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1261) ! used 8 times - TMP_JAMP(496) = AMP(905) + AMP(1799) ! used 8 times - TMP_JAMP(495) = AMP(904) - AMP(1190) ! used 8 times - TMP_JAMP(494) = AMP(903) - AMP(905) ! used 8 times - TMP_JAMP(493) = TMP_JAMP(220) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(862) ! used 8 times - TMP_JAMP(492) = TMP_JAMP(322) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(903) ! used 8 times - TMP_JAMP(491) = TMP_JAMP(325) - TMP_JAMP(322) ! used 8 times - TMP_JAMP(490) = AMP(906) + AMP(1616) ! used 8 times - TMP_JAMP(489) = TMP_JAMP(317) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(898) ! used 8 times - TMP_JAMP(488) = AMP(1149) + AMP(1777) ! used 8 times - TMP_JAMP(487) = TMP_JAMP(368) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(677) ! used 8 times - TMP_JAMP(486) = TMP_JAMP(372) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1149) ! used 8 times - TMP_JAMP(485) = TMP_JAMP(372) + TMP_JAMP(306) ! used 8 times - TMP_JAMP(484) = TMP_JAMP(379) - TMP_JAMP(367) ! used 8 times - TMP_JAMP(483) = TMP_JAMP(324) + ((0.000000000000000D+00, + TMP_JAMP(774) = TMP_JAMP(653) + TMP_JAMP(652) ! used 8 times + TMP_JAMP(773) = TMP_JAMP(650) + TMP_JAMP(649) ! used 8 times + TMP_JAMP(772) = TMP_JAMP(650) - TMP_JAMP(645) ! used 8 times + TMP_JAMP(771) = TMP_JAMP(649) + TMP_JAMP(645) ! used 8 times + TMP_JAMP(770) = TMP_JAMP(649) + TMP_JAMP(647) ! used 8 times + TMP_JAMP(769) = TMP_JAMP(648) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(837) ! used 8 times + TMP_JAMP(768) = TMP_JAMP(648) - TMP_JAMP(642) ! used 8 times + TMP_JAMP(767) = TMP_JAMP(648) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(836) ! used 8 times - TMP_JAMP(482) = TMP_JAMP(395) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(153) ! used 8 times - TMP_JAMP(481) = TMP_JAMP(401) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1148) ! used 8 times - TMP_JAMP(480) = AMP(624) + AMP(628) ! used 8 times - TMP_JAMP(479) = TMP_JAMP(380) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(624) ! used 8 times - TMP_JAMP(478) = TMP_JAMP(380) - TMP_JAMP(297) ! used 8 times - TMP_JAMP(477) = TMP_JAMP(381) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(515) ! used 8 times - TMP_JAMP(476) = TMP_JAMP(340) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(533) ! used 8 times - TMP_JAMP(475) = TMP_JAMP(389) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(623) ! used 8 times - TMP_JAMP(474) = AMP(1006) + AMP(1012) ! used 8 times - TMP_JAMP(473) = AMP(1004) - AMP(1006) ! used 8 times - TMP_JAMP(472) = AMP(990) - AMP(1651) ! used 8 times - TMP_JAMP(471) = AMP(988) - AMP(1014) ! used 8 times - TMP_JAMP(470) = AMP(986) - AMP(988) ! used 8 times - TMP_JAMP(469) = AMP(495) - AMP(1008) ! used 8 times - TMP_JAMP(468) = AMP(486) + AMP(1653) ! used 8 times - TMP_JAMP(467) = TMP_JAMP(297) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(484) ! used 8 times - TMP_JAMP(466) = TMP_JAMP(388) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(480) ! used 8 times - TMP_JAMP(465) = TMP_JAMP(391) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(986) ! used 8 times - TMP_JAMP(464) = TMP_JAMP(392) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(984) ! used 8 times - TMP_JAMP(463) = TMP_JAMP(393) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1004) ! used 8 times - TMP_JAMP(462) = TMP_JAMP(402) - TMP_JAMP(391) ! used 8 times - TMP_JAMP(461) = TMP_JAMP(405) - TMP_JAMP(393) ! used 8 times - TMP_JAMP(460) = AMP(1007) - AMP(1759) ! used 8 times - TMP_JAMP(459) = AMP(645) + AMP(1652) ! used 8 times - TMP_JAMP(458) = AMP(644) + AMP(1761) ! used 8 times - TMP_JAMP(457) = TMP_JAMP(370) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(641) ! used 8 times - TMP_JAMP(456) = TMP_JAMP(371) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(639) ! used 8 times - TMP_JAMP(455) = TMP_JAMP(373) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1002) ! used 8 times - TMP_JAMP(454) = AMP(997) - AMP(1013) ! used 8 times - TMP_JAMP(453) = AMP(995) - AMP(997) ! used 8 times - TMP_JAMP(452) = AMP(654) - AMP(998) ! used 8 times - TMP_JAMP(451) = TMP_JAMP(261) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(643) ! used 8 times - TMP_JAMP(450) = TMP_JAMP(359) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(995) ! used 8 times - TMP_JAMP(449) = TMP_JAMP(363) - TMP_JAMP(359) ! used 8 times - TMP_JAMP(448) = AMP(999) - AMP(1705) ! used 8 times - TMP_JAMP(447) = AMP(485) + AMP(1707) ! used 8 times - TMP_JAMP(446) = TMP_JAMP(347) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(482) ! used 8 times - TMP_JAMP(445) = TMP_JAMP(349) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(993) ! used 8 times - TMP_JAMP(444) = AMP(813) - AMP(989) ! used 8 times - TMP_JAMP(443) = AMP(803) + AMP(1760) ! used 8 times - TMP_JAMP(442) = TMP_JAMP(223) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(802) ! used 8 times - TMP_JAMP(441) = TMP_JAMP(325) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(800) ! used 8 times - TMP_JAMP(440) = AMP(804) + AMP(1706) ! used 8 times - TMP_JAMP(439) = TMP_JAMP(321) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(798) ! used 8 times - TMP_JAMP(438) = TMP_JAMP(396) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(149) ! used 8 times - TMP_JAMP(437) = TMP_JAMP(397) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(137) ! used 8 times - TMP_JAMP(436) = TMP_JAMP(361) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(143) ! used 8 times - TMP_JAMP(435) = AMP(283) - AMP(316) ! used 8 times - TMP_JAMP(434) = AMP(265) + AMP(318) ! used 8 times - TMP_JAMP(433) = AMP(263) + AMP(286) ! used 8 times - TMP_JAMP(432) = AMP(194) + AMP(281) ! used 8 times - TMP_JAMP(431) = AMP(180) - AMP(288) ! used 8 times - TMP_JAMP(430) = TMP_JAMP(306) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(182) ! used 8 times - TMP_JAMP(429) = TMP_JAMP(400) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(178) ! used 8 times - TMP_JAMP(428) = TMP_JAMP(402) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(262) ! used 8 times - TMP_JAMP(427) = TMP_JAMP(404) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(261) ! used 8 times - TMP_JAMP(426) = TMP_JAMP(405) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(280) ! used 8 times - TMP_JAMP(425) = AMP(282) + AMP(304) ! used 8 times - TMP_JAMP(424) = AMP(199) - AMP(306) ! used 8 times - TMP_JAMP(423) = AMP(198) - AMP(287) ! used 8 times - TMP_JAMP(422) = TMP_JAMP(382) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(197) ! used 8 times - TMP_JAMP(421) = TMP_JAMP(384) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(196) ! used 8 times - TMP_JAMP(420) = TMP_JAMP(385) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(279) ! used 8 times - TMP_JAMP(419) = AMP(274) + AMP(317) ! used 8 times - TMP_JAMP(418) = AMP(212) + AMP(273) ! used 8 times - TMP_JAMP(417) = TMP_JAMP(264) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(200) ! used 8 times - TMP_JAMP(416) = TMP_JAMP(363) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(271) ! used 8 times - TMP_JAMP(415) = AMP(272) + AMP(295) ! used 8 times - TMP_JAMP(414) = AMP(181) - AMP(297) ! used 8 times - TMP_JAMP(413) = TMP_JAMP(356) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(179) ! used 8 times - TMP_JAMP(412) = TMP_JAMP(357) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(270) ! used 8 times - TMP_JAMP(411) = AMP(230) + AMP(264) ! used 8 times - TMP_JAMP(410) = AMP(217) - AMP(305) ! used 8 times - TMP_JAMP(409) = TMP_JAMP(240) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(218) ! used 8 times - TMP_JAMP(408) = TMP_JAMP(341) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(215) ! used 8 times - TMP_JAMP(407) = AMP(216) - AMP(296) ! used 8 times - TMP_JAMP(406) = TMP_JAMP(336) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(214) ! used 8 times - TMP_JAMP(1123) = TMP_JAMP(1023) + AMP(445) ! used 8 times - TMP_JAMP(1122) = TMP_JAMP(1024) - AMP(477) ! used 8 times - TMP_JAMP(1121) = TMP_JAMP(1025) + AMP(475) ! used 8 times - TMP_JAMP(1120) = TMP_JAMP(1027) - AMP(1827) ! used 8 times - TMP_JAMP(1119) = TMP_JAMP(1030) + AMP(1825) ! used 8 times - TMP_JAMP(1118) = TMP_JAMP(992) + AMP(1517) ! used 8 times - TMP_JAMP(1117) = TMP_JAMP(993) - AMP(1692) ! used 8 times - TMP_JAMP(1116) = TMP_JAMP(995) - AMP(1660) ! used 8 times - TMP_JAMP(1115) = TMP_JAMP(996) + AMP(1074) ! used 8 times - TMP_JAMP(1114) = TMP_JAMP(997) - AMP(1072) ! used 8 times - TMP_JAMP(1113) = TMP_JAMP(965) - AMP(446) ! used 8 times - TMP_JAMP(1112) = TMP_JAMP(966) - AMP(465) ! used 8 times - TMP_JAMP(1111) = TMP_JAMP(949) - AMP(1579) ! used 8 times - TMP_JAMP(1110) = TMP_JAMP(950) - AMP(1887) ! used 8 times - TMP_JAMP(1109) = TMP_JAMP(951) + AMP(1885) ! used 8 times - TMP_JAMP(1108) = TMP_JAMP(921) + AMP(1770) ! used 8 times - TMP_JAMP(1107) = TMP_JAMP(903) + AMP(1571) ! used 8 times - TMP_JAMP(1106) = TMP_JAMP(905) - AMP(1806) ! used 8 times - TMP_JAMP(1105) = TMP_JAMP(907) + AMP(1804) ! used 8 times - TMP_JAMP(1104) = TMP_JAMP(895) - AMP(476) ! used 8 times - TMP_JAMP(1103) = TMP_JAMP(863) + AMP(454) ! used 8 times - TMP_JAMP(1102) = TMP_JAMP(849) - AMP(1525) ! used 8 times - TMP_JAMP(1101) = TMP_JAMP(850) - AMP(1860) ! used 8 times - TMP_JAMP(1100) = TMP_JAMP(851) + AMP(1858) ! used 8 times - TMP_JAMP(1099) = TMP_JAMP(834) - AMP(1714) ! used 8 times - TMP_JAMP(1098) = TMP_JAMP(818) - AMP(1752) ! used 8 times - TMP_JAMP(1097) = TMP_JAMP(820) + AMP(1750) ! used 8 times - TMP_JAMP(1096) = TMP_JAMP(805) - AMP(464) ! used 8 times - TMP_JAMP(1095) = TMP_JAMP(781) - AMP(455) ! used 8 times - TMP_JAMP(1094) = TMP_JAMP(765) - AMP(1633) ! used 8 times - TMP_JAMP(1093) = TMP_JAMP(752) + AMP(1641) ! used 8 times - TMP_JAMP(1092) = TMP_JAMP(718) + AMP(1715) ! used 8 times - TMP_JAMP(1091) = TMP_JAMP(712) - AMP(1136) ! used 8 times - TMP_JAMP(1090) = TMP_JAMP(602) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(600) ! used 8 times - TMP_JAMP(1089) = TMP_JAMP(603) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(601) ! used 8 times - TMP_JAMP(1088) = TMP_JAMP(604) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(598) ! used 8 times - TMP_JAMP(1087) = TMP_JAMP(605) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(599) ! used 8 times - TMP_JAMP(1086) = TMP_JAMP(606) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(597) ! used 8 times - TMP_JAMP(1085) = TMP_JAMP(587) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(586) ! used 8 times - TMP_JAMP(1084) = TMP_JAMP(588) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(585) ! used 8 times - TMP_JAMP(1083) = TMP_JAMP(589) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(584) ! used 8 times - TMP_JAMP(1082) = TMP_JAMP(567) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(566) ! used 8 times - TMP_JAMP(1081) = TMP_JAMP(568) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(565) ! used 8 times - TMP_JAMP(1080) = TMP_JAMP(557) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(556) ! used 8 times - TMP_JAMP(1079) = TMP_JAMP(558) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(555) ! used 8 times - TMP_JAMP(1078) = TMP_JAMP(540) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(539) ! used 8 times - TMP_JAMP(1077) = TMP_JAMP(541) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(538) ! used 8 times - TMP_JAMP(1076) = TMP_JAMP(534) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(533) ! used 8 times - TMP_JAMP(1075) = TMP_JAMP(526) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(523) ! used 8 times - TMP_JAMP(1074) = TMP_JAMP(530) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(522) ! used 8 times - TMP_JAMP(1073) = TMP_JAMP(514) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(510) ! used 8 times - TMP_JAMP(1072) = TMP_JAMP(504) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(503) ! used 8 times - TMP_JAMP(1071) = TMP_JAMP(499) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(498) ! used 8 times - TMP_JAMP(1070) = TMP_JAMP(500) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(497) ! used 8 times - TMP_JAMP(1069) = TMP_JAMP(495) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(493) ! used 8 times - TMP_JAMP(1068) = TMP_JAMP(490) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(489) ! used 8 times - TMP_JAMP(1067) = TMP_JAMP(487) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1779) ! used 8 times - TMP_JAMP(1066) = TMP_JAMP(483) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1778) ! used 8 times - TMP_JAMP(1065) = TMP_JAMP(481) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1534) ! used 8 times - TMP_JAMP(1064) = TMP_JAMP(482) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1536) ! used 8 times - TMP_JAMP(1063) = TMP_JAMP(477) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(630) ! used 8 times - TMP_JAMP(1062) = TMP_JAMP(476) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(629) ! used 8 times - TMP_JAMP(1061) = TMP_JAMP(475) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1535) ! used 8 times - TMP_JAMP(1060) = TMP_JAMP(468) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(466) ! used 8 times - TMP_JAMP(1059) = TMP_JAMP(469) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(467) ! used 8 times - TMP_JAMP(1058) = TMP_JAMP(472) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(464) ! used 8 times - TMP_JAMP(1057) = TMP_JAMP(458) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(457) ! used 8 times - TMP_JAMP(1056) = TMP_JAMP(459) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(456) ! used 8 times - TMP_JAMP(1055) = TMP_JAMP(460) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(455) ! used 8 times - TMP_JAMP(1054) = TMP_JAMP(452) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(451) ! used 8 times - TMP_JAMP(1053) = TMP_JAMP(447) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(446) ! used 8 times - TMP_JAMP(1052) = TMP_JAMP(448) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(445) ! used 8 times - TMP_JAMP(1051) = TMP_JAMP(443) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(441) ! used 8 times - TMP_JAMP(1050) = TMP_JAMP(444) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(442) ! used 8 times - TMP_JAMP(1049) = TMP_JAMP(440) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(439) ! used 8 times - TMP_JAMP(1048) = TMP_JAMP(437) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(156) ! used 8 times - TMP_JAMP(1047) = TMP_JAMP(438) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(154) ! used 8 times - TMP_JAMP(1046) = TMP_JAMP(436) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(155) ! used 8 times - TMP_JAMP(1045) = TMP_JAMP(431) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(429) ! used 8 times - TMP_JAMP(1044) = TMP_JAMP(432) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(430) ! used 8 times - TMP_JAMP(1043) = TMP_JAMP(433) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(427) ! used 8 times - TMP_JAMP(1042) = TMP_JAMP(434) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(428) ! used 8 times - TMP_JAMP(1041) = TMP_JAMP(435) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(426) ! used 8 times - TMP_JAMP(1040) = TMP_JAMP(423) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(421) ! used 8 times - TMP_JAMP(1039) = TMP_JAMP(424) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(422) ! used 8 times - TMP_JAMP(1038) = TMP_JAMP(425) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(420) ! used 8 times - TMP_JAMP(1037) = TMP_JAMP(418) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(417) ! used 8 times - TMP_JAMP(1036) = TMP_JAMP(419) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(416) ! used 8 times - TMP_JAMP(1035) = TMP_JAMP(414) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(413) ! used 8 times - TMP_JAMP(1034) = TMP_JAMP(415) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(412) ! used 8 times - TMP_JAMP(1033) = TMP_JAMP(410) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(408) ! used 8 times - TMP_JAMP(1032) = TMP_JAMP(411) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(409) ! used 8 times - TMP_JAMP(1031) = TMP_JAMP(407) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(406) ! used 8 times - TMP_JAMP(1140) = TMP_JAMP(1012) + AMP(1518) ! used 7 times - TMP_JAMP(1139) = TMP_JAMP(1019) - AMP(447) ! used 7 times - TMP_JAMP(1138) = TMP_JAMP(988) + AMP(1662) ! used 7 times - TMP_JAMP(1137) = TMP_JAMP(998) + AMP(1690) ! used 7 times - TMP_JAMP(1136) = TMP_JAMP(947) - AMP(1570) ! used 7 times - TMP_JAMP(1135) = TMP_JAMP(920) + AMP(1661) ! used 7 times - TMP_JAMP(1134) = TMP_JAMP(925) - AMP(1691) ! used 7 times - TMP_JAMP(1133) = TMP_JAMP(861) - AMP(456) ! used 7 times - TMP_JAMP(1132) = TMP_JAMP(845) + AMP(1527) ! used 7 times - TMP_JAMP(1131) = TMP_JAMP(831) + AMP(1716) ! used 7 times - TMP_JAMP(1130) = TMP_JAMP(774) - AMP(1301) ! used 7 times - TMP_JAMP(1129) = TMP_JAMP(753) - AMP(1639) ! used 7 times - TMP_JAMP(1128) = TMP_JAMP(741) + AMP(1769) ! used 7 times - TMP_JAMP(1127) = TMP_JAMP(705) + AMP(1634) ! used 7 times - TMP_JAMP(1126) = TMP_JAMP(700) + AMP(1640) ! used 7 times - TMP_JAMP(1125) = TMP_JAMP(695) + AMP(1852) ! used 7 times - TMP_JAMP(1124) = TMP_JAMP(674) + AMP(1744) ! used 7 times - TMP_JAMP(1158) = TMP_JAMP(1026) - AMP(1516) ! used 6 times - TMP_JAMP(1157) = TMP_JAMP(967) + AMP(463) ! used 6 times - TMP_JAMP(1156) = TMP_JAMP(973) - AMP(1826) ! used 6 times - TMP_JAMP(1155) = TMP_JAMP(941) + AMP(1572) ! used 6 times - TMP_JAMP(1154) = TMP_JAMP(928) - AMP(1768) ! used 6 times - TMP_JAMP(1153) = TMP_JAMP(904) + AMP(1580) ! used 6 times - TMP_JAMP(1152) = TMP_JAMP(943) + AMP(1581) ! used 6 times - TMP_JAMP(1151) = TMP_JAMP(878) + AMP(1073) ! used 6 times - TMP_JAMP(1150) = TMP_JAMP(846) - AMP(1476) ! used 6 times - TMP_JAMP(1149) = TMP_JAMP(817) + AMP(1526) ! used 6 times - TMP_JAMP(1148) = TMP_JAMP(762) + AMP(1635) ! used 6 times - TMP_JAMP(1147) = TMP_JAMP(742) - AMP(1049) ! used 6 times - TMP_JAMP(1146) = TMP_JAMP(728) - AMP(1214) ! used 6 times - TMP_JAMP(1145) = TMP_JAMP(692) - AMP(1854) ! used 6 times - TMP_JAMP(1144) = TMP_JAMP(671) - AMP(1746) ! used 6 times - TMP_JAMP(1143) = TMP_JAMP(650) + AMP(1626) ! used 6 times - TMP_JAMP(1142) = TMP_JAMP(652) - AMP(1624) ! used 6 times - TMP_JAMP(1141) = TMP_JAMP(633) + AMP(1625) ! used 6 times - TMP_JAMP(1168) = TMP_JAMP(1015) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1010) ! used 5 times - TMP_JAMP(1167) = TMP_JAMP(971) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(964) ! used 5 times - TMP_JAMP(1166) = TMP_JAMP(924) - AMP(1124) ! used 5 times - TMP_JAMP(1165) = TMP_JAMP(894) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(890) ! used 5 times - TMP_JAMP(1164) = TMP_JAMP(876) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(874) ! used 5 times - TMP_JAMP(1163) = TMP_JAMP(793) - AMP(1886) ! used 5 times - TMP_JAMP(1162) = TMP_JAMP(776) - AMP(1859) ! used 5 times - TMP_JAMP(1161) = TMP_JAMP(791) - AMP(1292) ! used 5 times - TMP_JAMP(1160) = TMP_JAMP(729) - AMP(1805) ! used 5 times - TMP_JAMP(1159) = TMP_JAMP(713) - AMP(1751) ! used 5 times - TMP_JAMP(1837) = AMP(437) + AMP(472) ! used 4 times - TMP_JAMP(1836) = AMP(419) - AMP(474) ! used 4 times - TMP_JAMP(1835) = AMP(416) + AMP(451) ! used 4 times - TMP_JAMP(1834) = AMP(350) - AMP(453) ! used 4 times - TMP_JAMP(1833) = AMP(85) - AMP(1515) ! used 4 times - TMP_JAMP(1832) = TMP_JAMP(1011) + AMP(352) ! used 4 times - TMP_JAMP(1831) = TMP_JAMP(1018) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(85) ! used 4 times - TMP_JAMP(1830) = TMP_JAMP(1022) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1011) ! used 4 times - TMP_JAMP(1829) = TMP_JAMP(1029) + TMP_JAMP(1022) ! used 4 times - TMP_JAMP(1828) = TMP_JAMP(1119) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(416) ! used 4 times - TMP_JAMP(1827) = TMP_JAMP(1120) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(350) ! used 4 times - TMP_JAMP(1826) = TMP_JAMP(1120) + TMP_JAMP(1119) ! used 4 times - TMP_JAMP(1825) = TMP_JAMP(1121) + TMP_JAMP(1017) ! used 4 times - TMP_JAMP(1824) = TMP_JAMP(1122) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(419) ! used 4 times - TMP_JAMP(1823) = TMP_JAMP(1123) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(416) ! used 4 times - TMP_JAMP(1822) = TMP_JAMP(1123) + TMP_JAMP(1122) ! used 4 times - TMP_JAMP(1821) = TMP_JAMP(1158) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1118) ! used 4 times - TMP_JAMP(1820) = TMP_JAMP(1168) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(98) ! used 4 times - TMP_JAMP(1819) = TMP_JAMP(1168) + TMP_JAMP(1016) ! used 4 times - TMP_JAMP(1818) = AMP(1061) + AMP(1069) ! used 4 times - TMP_JAMP(1817) = AMP(1043) - AMP(1071) ! used 4 times - TMP_JAMP(1816) = AMP(1041) + AMP(1663) ! used 4 times - TMP_JAMP(1815) = AMP(593) - AMP(1514) ! used 4 times - TMP_JAMP(1814) = AMP(510) - AMP(1665) ! used 4 times - TMP_JAMP(1813) = TMP_JAMP(987) + AMP(511) ! used 4 times - TMP_JAMP(1812) = TMP_JAMP(991) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(987) ! used 4 times - TMP_JAMP(1811) = TMP_JAMP(994) - TMP_JAMP(991) ! used 4 times - TMP_JAMP(1810) = TMP_JAMP(1016) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1043) ! used 4 times - TMP_JAMP(1809) = TMP_JAMP(1114) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1061) ! used 4 times - TMP_JAMP(1808) = TMP_JAMP(1114) + TMP_JAMP(1017) ! used 4 times - TMP_JAMP(1807) = TMP_JAMP(1115) + TMP_JAMP(1016) ! used 4 times - TMP_JAMP(1806) = TMP_JAMP(1116) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1041) ! used 4 times - TMP_JAMP(1805) = TMP_JAMP(1116) + TMP_JAMP(1115) ! used 4 times - TMP_JAMP(1804) = TMP_JAMP(1118) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(593) ! used 4 times - TMP_JAMP(1803) = TMP_JAMP(1137) + TMP_JAMP(1116) ! used 4 times - TMP_JAMP(1802) = TMP_JAMP(1137) + TMP_JAMP(1117) ! used 4 times - TMP_JAMP(1801) = TMP_JAMP(1140) + TMP_JAMP(1118) ! used 4 times - TMP_JAMP(1800) = AMP(443) + AMP(466) ! used 4 times - TMP_JAMP(1799) = AMP(368) - AMP(452) ! used 4 times - TMP_JAMP(1798) = AMP(361) - AMP(468) ! used 4 times - TMP_JAMP(1797) = TMP_JAMP(972) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(627) ! used 4 times - TMP_JAMP(1796) = TMP_JAMP(1029) - TMP_JAMP(994) ! used 4 times - TMP_JAMP(1795) = TMP_JAMP(1112) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(361) ! used 4 times - TMP_JAMP(1794) = TMP_JAMP(1112) - TMP_JAMP(969) ! used 4 times - TMP_JAMP(1793) = TMP_JAMP(1139) + TMP_JAMP(1113) ! used 4 times - TMP_JAMP(1792) = TMP_JAMP(1157) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(443) ! used 4 times - TMP_JAMP(1791) = TMP_JAMP(1157) - TMP_JAMP(972) ! used 4 times - TMP_JAMP(1790) = AMP(1459) + AMP(1888) ! used 4 times - TMP_JAMP(1789) = AMP(1210) - AMP(1890) ! used 4 times - TMP_JAMP(1788) = AMP(1204) + AMP(1573) ! used 4 times - TMP_JAMP(1787) = TMP_JAMP(944) - AMP(1467) ! used 4 times - TMP_JAMP(1786) = TMP_JAMP(969) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1210) ! used 4 times - TMP_JAMP(1785) = TMP_JAMP(1018) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(110) ! used 4 times - TMP_JAMP(1784) = TMP_JAMP(1109) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1459) ! used 4 times - TMP_JAMP(1783) = TMP_JAMP(1111) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1204) ! used 4 times - TMP_JAMP(1782) = TMP_JAMP(1111) - TMP_JAMP(1110) ! used 4 times - TMP_JAMP(1781) = TMP_JAMP(1155) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(93) ! used 4 times - TMP_JAMP(1780) = AMP(1063) + AMP(1765) ! used 4 times - TMP_JAMP(1779) = AMP(669) - AMP(1664) ! used 4 times - TMP_JAMP(1778) = AMP(660) - AMP(1767) ! used 4 times - TMP_JAMP(1777) = TMP_JAMP(930) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1152) ! used 4 times - TMP_JAMP(1776) = TMP_JAMP(1108) + ((0.000000000000000D+00, + TMP_JAMP(766) = TMP_JAMP(647) - TMP_JAMP(646) ! used 8 times + TMP_JAMP(765) = TMP_JAMP(647) - TMP_JAMP(640) ! used 8 times + TMP_JAMP(764) = TMP_JAMP(646) - TMP_JAMP(640) ! used 8 times + TMP_JAMP(763) = TMP_JAMP(645) + TMP_JAMP(643) ! used 8 times + TMP_JAMP(762) = TMP_JAMP(644) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(855) ! used 8 times + TMP_JAMP(761) = TMP_JAMP(644) - TMP_JAMP(643) ! used 8 times + TMP_JAMP(760) = TMP_JAMP(644) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(854) ! used 8 times + TMP_JAMP(759) = TMP_JAMP(643) - TMP_JAMP(642) ! used 8 times + TMP_JAMP(758) = TMP_JAMP(643) - TMP_JAMP(639) ! used 8 times + TMP_JAMP(757) = TMP_JAMP(642) - TMP_JAMP(639) ! used 8 times + TMP_JAMP(756) = TMP_JAMP(641) - TMP_JAMP(640) ! used 8 times + TMP_JAMP(755) = TMP_JAMP(641) + TMP_JAMP(639) ! used 8 times + TMP_JAMP(754) = TMP_JAMP(641) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(940) ! used 8 times + TMP_JAMP(753) = TMP_JAMP(640) + TMP_JAMP(639) ! used 8 times + TMP_JAMP(752) = TMP_JAMP(608) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(942) ! used 8 times + TMP_JAMP(751) = TMP_JAMP(608) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(944) ! used 8 times + TMP_JAMP(750) = TMP_JAMP(604) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(833) ! used 8 times + TMP_JAMP(749) = TMP_JAMP(604) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(835) ! used 8 times + TMP_JAMP(748) = TMP_JAMP(602) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(851) ! used 8 times + TMP_JAMP(747) = TMP_JAMP(602) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(853) ! used 8 times + TMP_JAMP(746) = AMP(597) - AMP(599) ! used 8 times + TMP_JAMP(745) = AMP(592) + AMP(600) ! used 8 times + TMP_JAMP(744) = AMP(502) + AMP(513) ! used 8 times + TMP_JAMP(743) = AMP(498) + AMP(504) ! used 8 times + TMP_JAMP(742) = AMP(626) + AMP(631) ! used 8 times + TMP_JAMP(741) = AMP(526) + AMP(598) ! used 8 times + TMP_JAMP(740) = AMP(517) - AMP(633) ! used 8 times + TMP_JAMP(739) = AMP(756) - AMP(758) ! used 8 times + TMP_JAMP(738) = AMP(685) + AMP(757) ! used 8 times + TMP_JAMP(737) = AMP(659) + AMP(662) ! used 8 times + TMP_JAMP(736) = AMP(657) + AMP(663) ! used 8 times + TMP_JAMP(735) = AMP(768) - AMP(770) ! used 8 times + TMP_JAMP(734) = AMP(763) + AMP(771) ! used 8 times + TMP_JAMP(733) = AMP(751) + AMP(759) ! used 8 times + TMP_JAMP(732) = AMP(661) + AMP(672) ! used 8 times + TMP_JAMP(731) = AMP(785) + AMP(790) ! used 8 times + TMP_JAMP(730) = AMP(676) - AMP(792) ! used 8 times + TMP_JAMP(729) = AMP(500) + AMP(503) ! used 8 times + TMP_JAMP(728) = AMP(609) - AMP(611) ! used 8 times + TMP_JAMP(727) = AMP(604) + AMP(612) ! used 8 times + TMP_JAMP(726) = AMP(535) - AMP(632) ! used 8 times + TMP_JAMP(725) = AMP(706) + AMP(769) ! used 8 times + TMP_JAMP(724) = AMP(694) - AMP(791) ! used 8 times + TMP_JAMP(723) = AMP(547) + AMP(610) ! used 8 times + TMP_JAMP(722) = AMP(820) + AMP(831) ! used 8 times + TMP_JAMP(721) = AMP(818) + AMP(821) ! used 8 times + TMP_JAMP(720) = AMP(927) - AMP(929) ! used 8 times + TMP_JAMP(719) = AMP(865) + AMP(928) ! used 8 times + TMP_JAMP(718) = AMP(816) + AMP(822) ! used 8 times + TMP_JAMP(717) = AMP(936) - AMP(938) ! used 8 times + TMP_JAMP(716) = AMP(847) + AMP(937) ! used 8 times + TMP_JAMP(715) = AMP(922) + AMP(930) ! used 8 times + TMP_JAMP(714) = AMP(931) + AMP(939) ! used 8 times + TMP_JAMP(713) = AMP(585) - AMP(587) ! used 8 times + TMP_JAMP(712) = AMP(580) + AMP(588) ! used 8 times + TMP_JAMP(711) = AMP(544) + AMP(586) ! used 8 times + TMP_JAMP(710) = AMP(915) - AMP(917) ! used 8 times + TMP_JAMP(709) = AMP(844) + AMP(916) ! used 8 times + TMP_JAMP(708) = AMP(910) + AMP(918) ! used 8 times + TMP_JAMP(707) = AMP(618) - AMP(620) ! used 8 times + TMP_JAMP(706) = AMP(613) + AMP(621) ! used 8 times + TMP_JAMP(705) = AMP(529) + AMP(619) ! used 8 times + TMP_JAMP(704) = AMP(783) + AMP(787) ! used 8 times + TMP_JAMP(703) = AMP(674) - AMP(789) ! used 8 times + TMP_JAMP(702) = AMP(692) - AMP(788) ! used 8 times + TMP_JAMP(701) = AMP(777) - AMP(779) ! used 8 times + TMP_JAMP(700) = AMP(688) + AMP(778) ! used 8 times + TMP_JAMP(699) = AMP(772) + AMP(780) ! used 8 times + TMP_JAMP(698) = AMP(744) - AMP(746) ! used 8 times + TMP_JAMP(697) = AMP(739) + AMP(747) ! used 8 times + TMP_JAMP(696) = AMP(703) + AMP(745) ! used 8 times + TMP_JAMP(695) = AMP(903) - AMP(905) ! used 8 times + TMP_JAMP(694) = AMP(862) + AMP(904) ! used 8 times + TMP_JAMP(693) = AMP(898) + AMP(906) ! used 8 times + TMP_JAMP(692) = AMP(624) + AMP(628) ! used 8 times + TMP_JAMP(691) = AMP(515) - AMP(630) ! used 8 times + TMP_JAMP(690) = AMP(533) - AMP(629) ! used 8 times + TMP_JAMP(689) = AMP(484) + AMP(495) ! used 8 times + TMP_JAMP(688) = AMP(480) + AMP(486) ! used 8 times + TMP_JAMP(687) = AMP(641) + AMP(644) ! used 8 times + TMP_JAMP(686) = AMP(639) + AMP(645) ! used 8 times + TMP_JAMP(685) = AMP(643) + AMP(654) ! used 8 times + TMP_JAMP(684) = AMP(482) + AMP(485) ! used 8 times + TMP_JAMP(683) = AMP(802) + AMP(813) ! used 8 times + TMP_JAMP(682) = AMP(800) + AMP(803) ! used 8 times + TMP_JAMP(681) = AMP(798) + AMP(804) ! used 8 times + TMP_JAMP(1043) = TMP_JAMP(834) + TMP_JAMP(785) ! used 4 times + TMP_JAMP(1042) = TMP_JAMP(834) + TMP_JAMP(819) ! used 4 times + TMP_JAMP(1041) = TMP_JAMP(831) - TMP_JAMP(680) ! used 4 times + TMP_JAMP(1040) = TMP_JAMP(829) + TMP_JAMP(827) ! used 4 times + TMP_JAMP(1039) = TMP_JAMP(829) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(707) ! used 4 times + TMP_JAMP(1038) = TMP_JAMP(827) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(729) ! used 4 times + TMP_JAMP(1037) = TMP_JAMP(822) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(692) ! used 4 times + TMP_JAMP(1036) = TMP_JAMP(821) - TMP_JAMP(785) ! used 4 times + TMP_JAMP(1035) = TMP_JAMP(817) - TMP_JAMP(676) ! used 4 times + TMP_JAMP(1034) = TMP_JAMP(817) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(741) ! used 4 times + TMP_JAMP(1033) = TMP_JAMP(814) + TMP_JAMP(771) ! used 4 times + TMP_JAMP(1032) = TMP_JAMP(814) + TMP_JAMP(792) ! used 4 times + TMP_JAMP(1031) = TMP_JAMP(814) + TMP_JAMP(801) ! used 4 times + TMP_JAMP(1030) = TMP_JAMP(811) + TMP_JAMP(672) ! used 4 times + TMP_JAMP(1029) = TMP_JAMP(810) + TMP_JAMP(809) ! used 4 times + TMP_JAMP(1028) = TMP_JAMP(810) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(701) ! used 4 times + TMP_JAMP(1027) = TMP_JAMP(809) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(732) ! used 4 times + TMP_JAMP(1026) = TMP_JAMP(804) + TMP_JAMP(767) ! used 4 times + TMP_JAMP(1025) = TMP_JAMP(803) + TMP_JAMP(792) ! used 4 times + TMP_JAMP(1024) = TMP_JAMP(798) - TMP_JAMP(668) ! used 4 times + TMP_JAMP(1023) = TMP_JAMP(798) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(734) ! used 4 times + TMP_JAMP(1022) = TMP_JAMP(792) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(731) ! used 4 times + TMP_JAMP(1021) = TMP_JAMP(792) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(703) ! used 4 times + TMP_JAMP(1020) = TMP_JAMP(786) + TMP_JAMP(760) ! used 4 times + TMP_JAMP(1019) = TMP_JAMP(785) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(728) ! used 4 times + TMP_JAMP(1018) = TMP_JAMP(785) + TMP_JAMP(759) ! used 4 times + TMP_JAMP(1017) = TMP_JAMP(785) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(712) ! used 4 times + TMP_JAMP(1016) = TMP_JAMP(779) - AMP(617) ! used 4 times + TMP_JAMP(1015) = TMP_JAMP(779) + TMP_JAMP(775) ! used 4 times + TMP_JAMP(1014) = TMP_JAMP(778) - TMP_JAMP(675) ! used 4 times + TMP_JAMP(1013) = TMP_JAMP(778) + TMP_JAMP(674) ! used 4 times + TMP_JAMP(1012) = TMP_JAMP(778) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(723) ! used 4 times + TMP_JAMP(1011) = TMP_JAMP(775) - AMP(776) ! used 4 times + TMP_JAMP(1010) = TMP_JAMP(774) - TMP_JAMP(665) ! used 4 times + TMP_JAMP(1009) = TMP_JAMP(774) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(724) ! used 4 times + TMP_JAMP(1008) = TMP_JAMP(773) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(721) ! used 4 times + TMP_JAMP(1007) = TMP_JAMP(772) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(718) ! used 4 times + TMP_JAMP(1006) = TMP_JAMP(771) + TMP_JAMP(759) ! used 4 times + TMP_JAMP(1005) = TMP_JAMP(771) + TMP_JAMP(766) ! used 4 times + TMP_JAMP(1004) = TMP_JAMP(769) - AMP(832) ! used 4 times + TMP_JAMP(1003) = TMP_JAMP(769) + AMP(819) ! used 4 times + TMP_JAMP(1002) = TMP_JAMP(766) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(720) ! used 4 times + TMP_JAMP(1001) = TMP_JAMP(759) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(717) ! used 4 times + TMP_JAMP(1000) = TMP_JAMP(753) - TMP_JAMP(646) ! used 4 times + TMP_JAMP(999) = TMP_JAMP(752) + AMP(943) ! used 4 times + TMP_JAMP(998) = TMP_JAMP(751) + TMP_JAMP(749) ! used 4 times + TMP_JAMP(997) = TMP_JAMP(751) - TMP_JAMP(747) ! used 4 times + TMP_JAMP(996) = TMP_JAMP(750) - AMP(832) ! used 4 times + TMP_JAMP(995) = TMP_JAMP(750) + TMP_JAMP(748) ! used 4 times + TMP_JAMP(994) = TMP_JAMP(749) - AMP(834) ! used 4 times + TMP_JAMP(993) = TMP_JAMP(748) - AMP(850) ! used 4 times + TMP_JAMP(992) = TMP_JAMP(747) - AMP(852) ! used 4 times + TMP_JAMP(991) = TMP_JAMP(746) - TMP_JAMP(745) ! used 4 times + TMP_JAMP(990) = TMP_JAMP(746) + TMP_JAMP(739) ! used 4 times + TMP_JAMP(989) = TMP_JAMP(746) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(596) ! used 4 times + TMP_JAMP(988) = TMP_JAMP(745) - TMP_JAMP(741) ! used 4 times + TMP_JAMP(987) = TMP_JAMP(745) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(601) ! used 4 times + TMP_JAMP(986) = TMP_JAMP(744) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(511) ! used 4 times + TMP_JAMP(985) = TMP_JAMP(744) + TMP_JAMP(729) ! used 4 times + TMP_JAMP(984) = TMP_JAMP(743) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(510) ! used 4 times + TMP_JAMP(983) = TMP_JAMP(743) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(499) ! used 4 times + TMP_JAMP(982) = TMP_JAMP(742) - TMP_JAMP(726) ! used 4 times + TMP_JAMP(981) = TMP_JAMP(742) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(494) ! used 4 times + TMP_JAMP(980) = TMP_JAMP(741) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(528) ! used 4 times + TMP_JAMP(979) = TMP_JAMP(741) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(527) ! used 4 times + TMP_JAMP(978) = TMP_JAMP(740) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(528) ! used 4 times + TMP_JAMP(977) = TMP_JAMP(740) + TMP_JAMP(726) ! used 4 times + TMP_JAMP(976) = TMP_JAMP(740) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(516) ! used 4 times + TMP_JAMP(975) = TMP_JAMP(739) - TMP_JAMP(738) ! used 4 times + TMP_JAMP(974) = TMP_JAMP(739) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(755) ! used 4 times + TMP_JAMP(973) = TMP_JAMP(738) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(686) ! used 4 times + TMP_JAMP(972) = TMP_JAMP(738) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(687) ! used 4 times + TMP_JAMP(971) = TMP_JAMP(737) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(660) ! used 4 times - TMP_JAMP(1775) = TMP_JAMP(1134) + ((0.000000000000000D+00 + TMP_JAMP(970) = TMP_JAMP(737) + TMP_JAMP(721) ! used 4 times + TMP_JAMP(969) = TMP_JAMP(737) + TMP_JAMP(732) ! used 4 times + TMP_JAMP(968) = TMP_JAMP(736) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(669) ! used 4 times - TMP_JAMP(1774) = TMP_JAMP(1134) + TMP_JAMP(1117) ! used 4 times - TMP_JAMP(1773) = TMP_JAMP(1138) + TMP_JAMP(1135) ! used 4 times - TMP_JAMP(1772) = TMP_JAMP(1154) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1063) ! used 4 times - TMP_JAMP(1771) = AMP(1461) + AMP(1807) ! used 4 times - TMP_JAMP(1770) = AMP(767) - AMP(1809) ! used 4 times - TMP_JAMP(1769) = AMP(760) - AMP(1574) ! used 4 times - TMP_JAMP(1768) = TMP_JAMP(923) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(767) ! used 4 times - TMP_JAMP(1767) = TMP_JAMP(1105) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1461) ! used 4 times - TMP_JAMP(1766) = TMP_JAMP(1106) + TMP_JAMP(923) ! used 4 times - TMP_JAMP(1765) = TMP_JAMP(1106) + TMP_JAMP(1105) ! used 4 times - TMP_JAMP(1764) = TMP_JAMP(1107) + ((-0.000000000000000D+00, + TMP_JAMP(967) = TMP_JAMP(736) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(658) ! used 4 times + TMP_JAMP(966) = TMP_JAMP(735) - TMP_JAMP(734) ! used 4 times + TMP_JAMP(965) = TMP_JAMP(735) + TMP_JAMP(720) ! used 4 times + TMP_JAMP(964) = TMP_JAMP(735) - TMP_JAMP(725) ! used 4 times + TMP_JAMP(963) = TMP_JAMP(735) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(648) ! used 4 times + TMP_JAMP(962) = TMP_JAMP(734) - TMP_JAMP(725) ! used 4 times + TMP_JAMP(961) = TMP_JAMP(734) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(764) ! used 4 times + TMP_JAMP(960) = TMP_JAMP(733) + ((-0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(760) ! used 4 times - TMP_JAMP(1763) = AMP(1196) + AMP(1567) ! used 4 times - TMP_JAMP(1762) = AMP(428) - AMP(473) ! used 4 times - TMP_JAMP(1761) = AMP(87) - AMP(1569) ! used 4 times - TMP_JAMP(1760) = TMP_JAMP(892) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(87) ! used 4 times - TMP_JAMP(1759) = TMP_JAMP(896) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(370) ! used 4 times - TMP_JAMP(1758) = TMP_JAMP(1016) + TMP_JAMP(891) ! used 4 times - TMP_JAMP(1757) = TMP_JAMP(1104) + TMP_JAMP(891) ! used 4 times - TMP_JAMP(1756) = TMP_JAMP(1122) + TMP_JAMP(1104) ! used 4 times - TMP_JAMP(1755) = TMP_JAMP(1123) - TMP_JAMP(1113) ! used 4 times - TMP_JAMP(1754) = TMP_JAMP(1136) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1196) ! used 4 times - TMP_JAMP(1753) = TMP_JAMP(1136) - TMP_JAMP(896) ! used 4 times - TMP_JAMP(1752) = AMP(1052) - AMP(1070) ! used 4 times - TMP_JAMP(1751) = AMP(752) - AMP(1568) ! used 4 times - TMP_JAMP(1750) = TMP_JAMP(892) + TMP_JAMP(877) ! used 4 times - TMP_JAMP(1749) = TMP_JAMP(1107) + ((0.000000000000000D+00 + TMP_JAMP(959) = TMP_JAMP(733) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(752) ! used 4 times - TMP_JAMP(1748) = TMP_JAMP(1134) - TMP_JAMP(1107) ! used 4 times - TMP_JAMP(1747) = TMP_JAMP(1151) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1052) ! used 4 times - TMP_JAMP(1746) = TMP_JAMP(1151) + TMP_JAMP(891) ! used 4 times - TMP_JAMP(1745) = AMP(434) + AMP(457) ! used 4 times - TMP_JAMP(1744) = AMP(343) - AMP(459) ! used 4 times - TMP_JAMP(1743) = TMP_JAMP(877) + ((0.000000000000000D+00, + TMP_JAMP(958) = TMP_JAMP(732) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(670) ! used 4 times + TMP_JAMP(957) = TMP_JAMP(731) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(786) ! used 4 times - TMP_JAMP(1742) = TMP_JAMP(877) + TMP_JAMP(865) ! used 4 times - TMP_JAMP(1741) = TMP_JAMP(1133) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(343) ! used 4 times - TMP_JAMP(1740) = TMP_JAMP(1133) + TMP_JAMP(1103) ! used 4 times - TMP_JAMP(1739) = TMP_JAMP(1136) - TMP_JAMP(1107) ! used 4 times - TMP_JAMP(1738) = AMP(1468) + AMP(1861) ! used 4 times - TMP_JAMP(1737) = AMP(1132) - AMP(1863) ! used 4 times - TMP_JAMP(1736) = AMP(1126) + AMP(1519) ! used 4 times - TMP_JAMP(1735) = AMP(96) - AMP(1521) ! used 4 times - TMP_JAMP(1734) = TMP_JAMP(1100) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1468) ! used 4 times - TMP_JAMP(1733) = TMP_JAMP(1100) - TMP_JAMP(865) ! used 4 times - TMP_JAMP(1732) = TMP_JAMP(1102) - TMP_JAMP(1101) ! used 4 times - TMP_JAMP(1731) = TMP_JAMP(1140) - TMP_JAMP(1132) ! used 4 times - TMP_JAMP(1730) = TMP_JAMP(1150) - TMP_JAMP(892) ! used 4 times - TMP_JAMP(1729) = AMP(1054) + AMP(1711) ! used 4 times - TMP_JAMP(1728) = AMP(501) - AMP(1713) ! used 4 times - TMP_JAMP(1727) = TMP_JAMP(896) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1230) ! used 4 times - TMP_JAMP(1726) = TMP_JAMP(896) - TMP_JAMP(835) ! used 4 times - TMP_JAMP(1725) = TMP_JAMP(1131) + ((0.000000000000000D+00, + TMP_JAMP(956) = TMP_JAMP(731) - TMP_JAMP(724) ! used 4 times + TMP_JAMP(955) = TMP_JAMP(730) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(687) ! used 4 times + TMP_JAMP(954) = TMP_JAMP(730) + TMP_JAMP(724) ! used 4 times + TMP_JAMP(953) = TMP_JAMP(730) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(675) ! used 4 times + TMP_JAMP(952) = TMP_JAMP(729) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(501) ! used 4 times - TMP_JAMP(1724) = TMP_JAMP(1131) + TMP_JAMP(1099) ! used 4 times - TMP_JAMP(1723) = AMP(1470) + AMP(1753) ! used 4 times - TMP_JAMP(1722) = AMP(608) - AMP(1755) ! used 4 times - TMP_JAMP(1721) = AMP(601) - AMP(1520) ! used 4 times - TMP_JAMP(1720) = TMP_JAMP(835) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1470) ! used 4 times - TMP_JAMP(1719) = TMP_JAMP(1097) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1470) ! used 4 times - TMP_JAMP(1718) = TMP_JAMP(1098) + ((-0.000000000000000D+00 + TMP_JAMP(951) = TMP_JAMP(729) + TMP_JAMP(718) ! used 4 times + TMP_JAMP(950) = TMP_JAMP(728) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(608) ! used 4 times - TMP_JAMP(1717) = TMP_JAMP(1098) + TMP_JAMP(1097) ! used 4 times - TMP_JAMP(1716) = TMP_JAMP(1149) - TMP_JAMP(1118) ! used 4 times - TMP_JAMP(1715) = AMP(1452) + AMP(1693) ! used 4 times - TMP_JAMP(1714) = AMP(596) - AMP(1695) ! used 4 times - TMP_JAMP(1713) = AMP(379) - AMP(467) ! used 4 times - TMP_JAMP(1712) = TMP_JAMP(806) - AMP(423) ! used 4 times - TMP_JAMP(1711) = TMP_JAMP(969) + TMP_JAMP(808) ! used 4 times - TMP_JAMP(1710) = TMP_JAMP(1096) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(379) ! used 4 times - TMP_JAMP(1709) = TMP_JAMP(1112) + TMP_JAMP(1096) ! used 4 times - TMP_JAMP(1708) = TMP_JAMP(1117) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(596) ! used 4 times - TMP_JAMP(1707) = TMP_JAMP(1117) + TMP_JAMP(809) ! used 4 times - TMP_JAMP(1706) = TMP_JAMP(1137) - TMP_JAMP(1119) ! used 4 times - TMP_JAMP(1705) = AMP(1288) - AMP(1889) ! used 4 times - TMP_JAMP(1704) = AMP(755) - AMP(1694) ! used 4 times - TMP_JAMP(1703) = TMP_JAMP(790) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(708) ! used 4 times - TMP_JAMP(1702) = TMP_JAMP(808) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1288) ! used 4 times - TMP_JAMP(1701) = TMP_JAMP(809) + TMP_JAMP(790) ! used 4 times - TMP_JAMP(1700) = AMP(381) - AMP(458) ! used 4 times - TMP_JAMP(1699) = TMP_JAMP(790) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(693) ! used 4 times - TMP_JAMP(1698) = TMP_JAMP(810) - TMP_JAMP(790) ! used 4 times - TMP_JAMP(1697) = TMP_JAMP(1095) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(381) ! used 4 times - TMP_JAMP(1696) = TMP_JAMP(1095) - TMP_JAMP(782) ! used 4 times - TMP_JAMP(1695) = AMP(1297) - AMP(1862) ! used 4 times - TMP_JAMP(1694) = TMP_JAMP(782) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1297) ! used 4 times - TMP_JAMP(1693) = TMP_JAMP(809) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(549) ! used 4 times - TMP_JAMP(1692) = TMP_JAMP(1130) - TMP_JAMP(809) ! used 4 times - TMP_JAMP(1691) = AMP(1286) + AMP(1630) ! used 4 times - TMP_JAMP(1690) = TMP_JAMP(764) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(97) ! used 4 times - TMP_JAMP(1689) = TMP_JAMP(810) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1307) ! used 4 times - TMP_JAMP(1688) = TMP_JAMP(1094) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1286) ! used 4 times - TMP_JAMP(1687) = TMP_JAMP(1094) + TMP_JAMP(766) ! used 4 times - TMP_JAMP(1686) = TMP_JAMP(1148) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(105) ! used 4 times - TMP_JAMP(1685) = AMP(1295) + AMP(1636) ! used 4 times - TMP_JAMP(1684) = AMP(117) - AMP(1638) ! used 4 times - TMP_JAMP(1683) = TMP_JAMP(766) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1295) ! used 4 times - TMP_JAMP(1682) = TMP_JAMP(1093) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(117) ! used 4 times - TMP_JAMP(1681) = TMP_JAMP(1129) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1295) ! used 4 times - TMP_JAMP(1680) = TMP_JAMP(1130) - TMP_JAMP(1129) ! used 4 times - TMP_JAMP(1679) = AMP(1120) - AMP(1830) ! used 4 times - TMP_JAMP(1678) = AMP(819) - AMP(1766) ! used 4 times - TMP_JAMP(1677) = TMP_JAMP(733) + AMP(1120) ! used 4 times - TMP_JAMP(1676) = TMP_JAMP(923) + TMP_JAMP(746) ! used 4 times - TMP_JAMP(1675) = TMP_JAMP(1119) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1450) ! used 4 times - TMP_JAMP(1674) = TMP_JAMP(1120) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(733) ! used 4 times - TMP_JAMP(1673) = TMP_JAMP(1120) - TMP_JAMP(745) ! used 4 times - TMP_JAMP(1672) = TMP_JAMP(1128) + TMP_JAMP(1108) ! used 4 times - TMP_JAMP(1671) = TMP_JAMP(1147) + ((-0.000000000000000D+00 + TMP_JAMP(949) = TMP_JAMP(728) + TMP_JAMP(717) ! used 4 times + TMP_JAMP(948) = TMP_JAMP(728) - TMP_JAMP(723) ! used 4 times + TMP_JAMP(947) = TMP_JAMP(727) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(601) ! used 4 times + TMP_JAMP(946) = TMP_JAMP(727) - TMP_JAMP(723) ! used 4 times + TMP_JAMP(945) = TMP_JAMP(727) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(605) ! used 4 times + TMP_JAMP(944) = TMP_JAMP(726) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(534) ! used 4 times + TMP_JAMP(943) = TMP_JAMP(725) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(708) ! used 4 times + TMP_JAMP(942) = TMP_JAMP(724) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(693) ! used 4 times + TMP_JAMP(941) = TMP_JAMP(723) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(549) ! used 4 times + TMP_JAMP(940) = TMP_JAMP(722) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(830) ! used 4 times - TMP_JAMP(1670) = TMP_JAMP(1147) - TMP_JAMP(747) ! used 4 times - TMP_JAMP(1669) = AMP(1198) - AMP(1829) ! used 4 times - TMP_JAMP(1668) = AMP(926) - AMP(1808) ! used 4 times - TMP_JAMP(1667) = TMP_JAMP(746) + ((0.000000000000000D+00 + TMP_JAMP(939) = TMP_JAMP(722) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(829) ! used 4 times + TMP_JAMP(938) = TMP_JAMP(721) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(819) ! used 4 times + TMP_JAMP(937) = TMP_JAMP(721) + TMP_JAMP(718) ! used 4 times + TMP_JAMP(936) = TMP_JAMP(720) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(926) ! used 4 times - TMP_JAMP(1666) = TMP_JAMP(1146) + ((-0.000000000000000D+00 + TMP_JAMP(935) = TMP_JAMP(720) - TMP_JAMP(715) ! used 4 times + TMP_JAMP(934) = TMP_JAMP(719) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(866) ! used 4 times - TMP_JAMP(1665) = TMP_JAMP(1146) + TMP_JAMP(727) ! used 4 times - TMP_JAMP(1664) = AMP(817) - AMP(1712) ! used 4 times - TMP_JAMP(1663) = TMP_JAMP(727) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(850) ! used 4 times - TMP_JAMP(1662) = TMP_JAMP(727) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(717) ! used 4 times - TMP_JAMP(1661) = TMP_JAMP(747) - TMP_JAMP(727) ! used 4 times - TMP_JAMP(1660) = TMP_JAMP(1092) + ((-0.000000000000000D+00, + TMP_JAMP(933) = TMP_JAMP(719) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(867) ! used 4 times + TMP_JAMP(932) = TMP_JAMP(718) + ((-0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(817) ! used 4 times - TMP_JAMP(1659) = TMP_JAMP(1092) + TMP_JAMP(719) ! used 4 times - TMP_JAMP(1658) = AMP(935) - AMP(1754) ! used 4 times - TMP_JAMP(1657) = TMP_JAMP(719) + ((0.000000000000000D+00 + TMP_JAMP(931) = TMP_JAMP(717) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(935) ! used 4 times - TMP_JAMP(1656) = TMP_JAMP(745) + ((-0.000000000000000D+00 + TMP_JAMP(930) = TMP_JAMP(717) - TMP_JAMP(714) ! used 4 times + TMP_JAMP(929) = TMP_JAMP(716) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(848) ! used 4 times - TMP_JAMP(1655) = TMP_JAMP(1102) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(708) ! used 4 times - TMP_JAMP(1654) = TMP_JAMP(1102) - TMP_JAMP(1091) ! used 4 times - TMP_JAMP(1653) = AMP(923) - AMP(1631) ! used 4 times - TMP_JAMP(1652) = TMP_JAMP(747) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(943) ! used 4 times - TMP_JAMP(1651) = TMP_JAMP(747) + TMP_JAMP(707) ! used 4 times - TMP_JAMP(1650) = AMP(932) - AMP(1637) ! used 4 times - TMP_JAMP(1649) = TMP_JAMP(707) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(932) ! used 4 times - TMP_JAMP(1648) = TMP_JAMP(1126) + ((-0.000000000000000D+00, + TMP_JAMP(928) = TMP_JAMP(716) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(849) ! used 4 times + TMP_JAMP(927) = TMP_JAMP(715) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(923) ! used 4 times + TMP_JAMP(926) = TMP_JAMP(715) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(646) ! used 4 times + TMP_JAMP(925) = TMP_JAMP(714) + ((-0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(932) ! used 4 times - TMP_JAMP(1647) = TMP_JAMP(1126) - TMP_JAMP(1091) ! used 4 times - TMP_JAMP(1646) = AMP(1506) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1509) ! used 4 times - TMP_JAMP(1645) = AMP(1504) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1507) ! used 4 times - TMP_JAMP(1644) = AMP(407) + AMP(460) ! used 4 times - TMP_JAMP(1643) = AMP(347) - AMP(462) ! used 4 times - TMP_JAMP(1642) = TMP_JAMP(677) - AMP(1106) ! used 4 times - TMP_JAMP(1641) = TMP_JAMP(679) - AMP(67) ! used 4 times - TMP_JAMP(1640) = TMP_JAMP(690) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(686) ! used 4 times - TMP_JAMP(1639) = TMP_JAMP(891) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(80) ! used 4 times - TMP_JAMP(1638) = TMP_JAMP(1017) - TMP_JAMP(891) ! used 4 times - TMP_JAMP(1637) = TMP_JAMP(1018) + TMP_JAMP(687) ! used 4 times - TMP_JAMP(1636) = TMP_JAMP(1029) + TMP_JAMP(691) ! used 4 times - TMP_JAMP(1635) = TMP_JAMP(1103) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(407) ! used 4 times - TMP_JAMP(1634) = TMP_JAMP(1121) - TMP_JAMP(1104) ! used 4 times - TMP_JAMP(1633) = TMP_JAMP(1125) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(407) ! used 4 times - TMP_JAMP(1632) = TMP_JAMP(1125) + TMP_JAMP(1103) ! used 4 times - TMP_JAMP(1631) = TMP_JAMP(1145) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(347) ! used 4 times - TMP_JAMP(1630) = AMP(1505) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1508) ! used 4 times - TMP_JAMP(1629) = AMP(1032) + AMP(1717) ! used 4 times - TMP_JAMP(1628) = AMP(507) - AMP(1719) ! used 4 times - TMP_JAMP(1627) = TMP_JAMP(664) - AMP(581) ! used 4 times - TMP_JAMP(1626) = TMP_JAMP(994) + TMP_JAMP(670) ! used 4 times - TMP_JAMP(1625) = TMP_JAMP(1099) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1032) ! used 4 times - TMP_JAMP(1624) = TMP_JAMP(1124) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1032) ! used 4 times - TMP_JAMP(1623) = TMP_JAMP(1124) + TMP_JAMP(1099) ! used 4 times - TMP_JAMP(1622) = TMP_JAMP(1144) + ((-0.000000000000000D+00 + TMP_JAMP(924) = TMP_JAMP(714) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(642) ! used 4 times + TMP_JAMP(923) = TMP_JAMP(713) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(507) ! used 4 times - TMP_JAMP(1621) = AMP(386) - AMP(461) ! used 4 times - TMP_JAMP(1620) = TMP_JAMP(660) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(658) ! used 4 times - TMP_JAMP(1619) = TMP_JAMP(661) - AMP(1853) ! used 4 times - TMP_JAMP(1618) = TMP_JAMP(808) + ((0.000000000000000D+00, + TMP_JAMP(922) = TMP_JAMP(713) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(584) ! used 4 times + TMP_JAMP(921) = TMP_JAMP(712) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(581) ! used 4 times + TMP_JAMP(920) = TMP_JAMP(711) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(546) ! used 4 times - TMP_JAMP(1617) = TMP_JAMP(972) - TMP_JAMP(808) ! used 4 times - TMP_JAMP(1616) = TMP_JAMP(1095) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(386) ! used 4 times - TMP_JAMP(1615) = TMP_JAMP(1096) + TMP_JAMP(1095) ! used 4 times - TMP_JAMP(1614) = AMP(1282) + AMP(1627) ! used 4 times - TMP_JAMP(1613) = AMP(75) - AMP(1629) ! used 4 times - TMP_JAMP(1612) = TMP_JAMP(946) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(940) ! used 4 times - TMP_JAMP(1611) = TMP_JAMP(1142) - TMP_JAMP(1094) ! used 4 times - TMP_JAMP(1610) = TMP_JAMP(1143) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(75) ! used 4 times - TMP_JAMP(1609) = AMP(828) - AMP(1718) ! used 4 times - TMP_JAMP(1608) = TMP_JAMP(641) - AMP(1745) ! used 4 times - TMP_JAMP(1607) = TMP_JAMP(642) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(639) ! used 4 times - TMP_JAMP(1606) = TMP_JAMP(746) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(845) ! used 4 times - TMP_JAMP(1605) = TMP_JAMP(930) - TMP_JAMP(746) ! used 4 times - TMP_JAMP(1604) = TMP_JAMP(1092) + ((0.000000000000000D+00 + TMP_JAMP(919) = TMP_JAMP(711) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(545) ! used 4 times + TMP_JAMP(918) = TMP_JAMP(710) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(828) ! used 4 times - TMP_JAMP(1603) = TMP_JAMP(1128) + TMP_JAMP(1092) ! used 4 times - TMP_JAMP(1602) = AMP(919) - AMP(1628) ! used 4 times - TMP_JAMP(1601) = TMP_JAMP(1141) - TMP_JAMP(1127) ! used 4 times - TMP_JAMP(1600) = AMP(1274) + AMP(1621) ! used 4 times - TMP_JAMP(1599) = AMP(69) - AMP(1623) ! used 4 times - TMP_JAMP(1598) = TMP_JAMP(622) + AMP(138) ! used 4 times - TMP_JAMP(1597) = TMP_JAMP(625) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1645) ! used 4 times - TMP_JAMP(1596) = TMP_JAMP(627) - TMP_JAMP(624) ! used 4 times - TMP_JAMP(1595) = TMP_JAMP(1016) - TMP_JAMP(628) ! used 4 times - TMP_JAMP(1594) = TMP_JAMP(1103) - TMP_JAMP(1095) ! used 4 times - TMP_JAMP(1593) = TMP_JAMP(1143) + TMP_JAMP(688) ! used 4 times - TMP_JAMP(1592) = AMP(911) - AMP(1622) ! used 4 times - TMP_JAMP(1591) = TMP_JAMP(619) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1646) ! used 4 times - TMP_JAMP(1590) = TMP_JAMP(627) - TMP_JAMP(618) ! used 4 times - TMP_JAMP(1589) = TMP_JAMP(1099) - TMP_JAMP(1092) ! used 4 times - TMP_JAMP(1588) = TMP_JAMP(1147) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(616) ! used 4 times - TMP_JAMP(1587) = TMP_JAMP(1147) + TMP_JAMP(1115) ! used 4 times - TMP_JAMP(1586) = TMP_JAMP(607) - AMP(425) ! used 4 times - TMP_JAMP(1585) = TMP_JAMP(608) - AMP(345) ! used 4 times - TMP_JAMP(1584) = TMP_JAMP(609) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1112) ! used 4 times - TMP_JAMP(1583) = TMP_JAMP(611) + TMP_JAMP(609) ! used 4 times - TMP_JAMP(1582) = TMP_JAMP(611) - TMP_JAMP(610) ! used 4 times - TMP_JAMP(1581) = TMP_JAMP(629) - TMP_JAMP(620) ! used 4 times - TMP_JAMP(1580) = TMP_JAMP(807) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(804) ! used 4 times - TMP_JAMP(1579) = TMP_JAMP(1123) - TMP_JAMP(615) ! used 4 times - TMP_JAMP(1578) = TMP_JAMP(1139) - TMP_JAMP(614) ! used 4 times - TMP_JAMP(1577) = TMP_JAMP(1142) - TMP_JAMP(1141) ! used 4 times - TMP_JAMP(1576) = TMP_JAMP(1145) - TMP_JAMP(640) ! used 4 times - TMP_JAMP(1575) = AMP(1477) + AMP(1834) ! used 4 times - TMP_JAMP(1574) = AMP(1141) - AMP(1836) ! used 4 times - TMP_JAMP(1573) = AMP(1114) + AMP(1510) ! used 4 times - TMP_JAMP(1572) = AMP(949) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1477) ! used 4 times - TMP_JAMP(1571) = AMP(78) - AMP(1512) ! used 4 times - TMP_JAMP(1570) = TMP_JAMP(612) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(951) ! used 4 times - TMP_JAMP(1569) = TMP_JAMP(613) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(949) ! used 4 times - TMP_JAMP(1568) = TMP_JAMP(626) - AMP(134) ! used 4 times - TMP_JAMP(1567) = TMP_JAMP(685) + AMP(78) ! used 4 times - TMP_JAMP(1566) = TMP_JAMP(689) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(685) ! used 4 times - TMP_JAMP(1565) = TMP_JAMP(694) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(684) ! used 4 times - TMP_JAMP(1564) = TMP_JAMP(1086) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(613) ! used 4 times - TMP_JAMP(1563) = TMP_JAMP(1088) - TMP_JAMP(1087) ! used 4 times - TMP_JAMP(1562) = TMP_JAMP(1090) - TMP_JAMP(689) ! used 4 times - TMP_JAMP(1561) = TMP_JAMP(1090) - TMP_JAMP(1089) ! used 4 times - TMP_JAMP(1560) = TMP_JAMP(590) + AMP(1045) ! used 4 times - TMP_JAMP(1559) = TMP_JAMP(591) + AMP(499) ! used 4 times - TMP_JAMP(1558) = TMP_JAMP(611) - TMP_JAMP(592) ! used 4 times - TMP_JAMP(1557) = TMP_JAMP(1116) + TMP_JAMP(596) ! used 4 times - TMP_JAMP(1556) = TMP_JAMP(1138) + TMP_JAMP(595) ! used 4 times - TMP_JAMP(1555) = TMP_JAMP(1144) - TMP_JAMP(659) ! used 4 times - TMP_JAMP(1554) = AMP(1668) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1701) ! used 4 times - TMP_JAMP(1553) = AMP(1666) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1699) ! used 4 times - TMP_JAMP(1552) = AMP(1479) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1666) ! used 4 times - TMP_JAMP(1551) = AMP(617) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1668) ! used 4 times - TMP_JAMP(1550) = AMP(589) - AMP(1511) ! used 4 times - TMP_JAMP(1549) = TMP_JAMP(593) - AMP(1479) ! used 4 times - TMP_JAMP(1548) = TMP_JAMP(594) - AMP(617) ! used 4 times - TMP_JAMP(1547) = TMP_JAMP(668) + AMP(589) ! used 4 times - TMP_JAMP(1546) = TMP_JAMP(673) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(668) ! used 4 times - TMP_JAMP(1545) = TMP_JAMP(1083) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(593) ! used 4 times - TMP_JAMP(1544) = TMP_JAMP(1084) - TMP_JAMP(673) ! used 4 times - TMP_JAMP(1543) = TMP_JAMP(1085) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(594) ! used 4 times - TMP_JAMP(1542) = TMP_JAMP(1089) - TMP_JAMP(1083) ! used 4 times - TMP_JAMP(1541) = TMP_JAMP(1090) + TMP_JAMP(1084) ! used 4 times - TMP_JAMP(1540) = AMP(1431) + AMP(1747) ! used 4 times - TMP_JAMP(1539) = AMP(584) - AMP(1749) ! used 4 times - TMP_JAMP(1538) = TMP_JAMP(578) - AMP(516) ! used 4 times - TMP_JAMP(1537) = TMP_JAMP(580) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1723) ! used 4 times - TMP_JAMP(1536) = TMP_JAMP(592) - TMP_JAMP(579) ! used 4 times - TMP_JAMP(1535) = TMP_JAMP(662) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(657) ! used 4 times - TMP_JAMP(1534) = TMP_JAMP(969) - TMP_JAMP(582) ! used 4 times - TMP_JAMP(1533) = TMP_JAMP(1125) - TMP_JAMP(1124) ! used 4 times - TMP_JAMP(1532) = AMP(914) - AMP(1748) ! used 4 times - TMP_JAMP(1531) = TMP_JAMP(575) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1724) ! used 4 times - TMP_JAMP(1530) = TMP_JAMP(643) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(638) ! used 4 times - TMP_JAMP(1529) = TMP_JAMP(1127) - TMP_JAMP(1094) ! used 4 times - TMP_JAMP(1528) = TMP_JAMP(1146) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(573) ! used 4 times - TMP_JAMP(1527) = TMP_JAMP(1146) - TMP_JAMP(1110) ! used 4 times - TMP_JAMP(1526) = TMP_JAMP(569) - AMP(363) ! used 4 times - TMP_JAMP(1525) = TMP_JAMP(583) - TMP_JAMP(576) ! used 4 times - TMP_JAMP(1524) = TMP_JAMP(609) + TMP_JAMP(579) ! used 4 times - TMP_JAMP(1523) = TMP_JAMP(1113) - TMP_JAMP(572) ! used 4 times - TMP_JAMP(1522) = AMP(1219) - AMP(1835) ! used 4 times - TMP_JAMP(1521) = AMP(950) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1219) ! used 4 times - TMP_JAMP(1520) = AMP(531) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1725) ! used 4 times - TMP_JAMP(1519) = TMP_JAMP(571) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(950) ! used 4 times - TMP_JAMP(1518) = TMP_JAMP(581) + AMP(531) ! used 4 times - TMP_JAMP(1517) = TMP_JAMP(1081) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(571) ! used 4 times - TMP_JAMP(1516) = TMP_JAMP(1082) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(581) ! used 4 times - TMP_JAMP(1515) = TMP_JAMP(1082) - TMP_JAMP(1081) ! used 4 times - TMP_JAMP(1514) = TMP_JAMP(1087) - TMP_JAMP(1084) ! used 4 times - TMP_JAMP(1513) = TMP_JAMP(1088) + TMP_JAMP(1081) ! used 4 times - TMP_JAMP(1512) = TMP_JAMP(559) - AMP(1208) ! used 4 times - TMP_JAMP(1511) = TMP_JAMP(560) - AMP(103) ! used 4 times - TMP_JAMP(1510) = TMP_JAMP(1111) + TMP_JAMP(564) ! used 4 times - TMP_JAMP(1509) = TMP_JAMP(1152) + TMP_JAMP(563) ! used 4 times - TMP_JAMP(1508) = AMP(1584) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1590) ! used 4 times - TMP_JAMP(1507) = AMP(1582) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1588) ! used 4 times - TMP_JAMP(1506) = TMP_JAMP(561) + AMP(1217) ! used 4 times - TMP_JAMP(1505) = TMP_JAMP(562) + AMP(129) ! used 4 times - TMP_JAMP(1504) = TMP_JAMP(1080) + TMP_JAMP(1079) ! used 4 times - TMP_JAMP(1503) = TMP_JAMP(1082) - TMP_JAMP(1079) ! used 4 times - TMP_JAMP(1502) = TMP_JAMP(1090) + TMP_JAMP(1080) ! used 4 times - TMP_JAMP(1501) = AMP(1429) + AMP(1855) ! used 4 times - TMP_JAMP(1500) = AMP(1108) - AMP(1857) ! used 4 times - TMP_JAMP(1499) = TMP_JAMP(550) - AMP(673) ! used 4 times - TMP_JAMP(1498) = TMP_JAMP(552) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(787) ! used 4 times - TMP_JAMP(1497) = TMP_JAMP(923) - TMP_JAMP(553) ! used 4 times - TMP_JAMP(1496) = AMP(1276) - AMP(1856) ! used 4 times - TMP_JAMP(1495) = TMP_JAMP(547) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(788) ! used 4 times - TMP_JAMP(1494) = TMP_JAMP(1161) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(546) ! used 4 times - TMP_JAMP(1493) = TMP_JAMP(1161) - TMP_JAMP(1106) ! used 4 times - TMP_JAMP(1492) = TMP_JAMP(542) + AMP(658) ! used 4 times - TMP_JAMP(1491) = TMP_JAMP(554) - TMP_JAMP(548) ! used 4 times - TMP_JAMP(1490) = TMP_JAMP(1135) + TMP_JAMP(545) ! used 4 times - TMP_JAMP(1489) = AMP(1667) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1700) ! used 4 times - TMP_JAMP(1488) = AMP(776) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1667) ! used 4 times - TMP_JAMP(1487) = AMP(689) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(789) ! used 4 times - TMP_JAMP(1486) = TMP_JAMP(544) - AMP(776) ! used 4 times - TMP_JAMP(1485) = TMP_JAMP(551) + AMP(689) ! used 4 times - TMP_JAMP(1484) = TMP_JAMP(1077) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(544) ! used 4 times - TMP_JAMP(1483) = TMP_JAMP(1078) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(551) ! used 4 times - TMP_JAMP(1482) = TMP_JAMP(1085) + TMP_JAMP(1077) ! used 4 times - TMP_JAMP(1481) = TMP_JAMP(1087) - TMP_JAMP(1078) ! used 4 times - TMP_JAMP(1480) = TMP_JAMP(535) - AMP(764) ! used 4 times - TMP_JAMP(1479) = TMP_JAMP(1153) + TMP_JAMP(537) ! used 4 times - TMP_JAMP(1478) = AMP(1583) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1589) ! used 4 times - TMP_JAMP(1477) = TMP_JAMP(536) + AMP(773) ! used 4 times - TMP_JAMP(1476) = TMP_JAMP(1080) + TMP_JAMP(1076) ! used 4 times - TMP_JAMP(1475) = AMP(471) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1881) ! used 4 times - TMP_JAMP(1474) = AMP(398) + AMP(469) ! used 4 times - TMP_JAMP(1473) = AMP(49) - AMP(1560) ! used 4 times - TMP_JAMP(1472) = TMP_JAMP(518) - AMP(62) ! used 4 times - TMP_JAMP(1471) = TMP_JAMP(519) + AMP(365) ! used 4 times - TMP_JAMP(1470) = TMP_JAMP(532) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(520) ! used 4 times - TMP_JAMP(1469) = TMP_JAMP(892) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(49) ! used 4 times - TMP_JAMP(1468) = TMP_JAMP(896) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1184) ! used 4 times - TMP_JAMP(1467) = TMP_JAMP(1017) - TMP_JAMP(525) ! used 4 times - TMP_JAMP(1466) = TMP_JAMP(1075) + TMP_JAMP(892) ! used 4 times - TMP_JAMP(1465) = TMP_JAMP(1075) + TMP_JAMP(1074) ! used 4 times - TMP_JAMP(1464) = TMP_JAMP(1112) + TMP_JAMP(528) ! used 4 times - TMP_JAMP(1463) = TMP_JAMP(1157) - TMP_JAMP(1121) ! used 4 times - TMP_JAMP(1462) = AMP(1773) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1800) ! used 4 times - TMP_JAMP(1461) = AMP(1023) + AMP(1771) ! used 4 times - TMP_JAMP(1460) = AMP(740) - AMP(1559) ! used 4 times - TMP_JAMP(1459) = TMP_JAMP(508) - AMP(666) ! used 4 times - TMP_JAMP(1458) = TMP_JAMP(516) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(509) ! used 4 times - TMP_JAMP(1457) = TMP_JAMP(877) + ((0.000000000000000D+00 + TMP_JAMP(917) = TMP_JAMP(710) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(914) ! used 4 times + TMP_JAMP(916) = TMP_JAMP(709) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(845) ! used 4 times + TMP_JAMP(915) = TMP_JAMP(709) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(846) ! used 4 times + TMP_JAMP(914) = TMP_JAMP(708) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(919) ! used 4 times + TMP_JAMP(913) = TMP_JAMP(708) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(911) ! used 4 times + TMP_JAMP(912) = TMP_JAMP(707) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(617) ! used 4 times + TMP_JAMP(911) = TMP_JAMP(707) - TMP_JAMP(706) ! used 4 times + TMP_JAMP(910) = TMP_JAMP(707) + TMP_JAMP(701) ! used 4 times + TMP_JAMP(909) = TMP_JAMP(707) - TMP_JAMP(705) ! used 4 times + TMP_JAMP(908) = TMP_JAMP(706) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(589) ! used 4 times + TMP_JAMP(907) = TMP_JAMP(706) - TMP_JAMP(705) ! used 4 times + TMP_JAMP(906) = TMP_JAMP(705) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(531) ! used 4 times + TMP_JAMP(905) = TMP_JAMP(704) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(671) ! used 4 times + TMP_JAMP(904) = TMP_JAMP(704) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(784) ! used 4 times + TMP_JAMP(903) = TMP_JAMP(703) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(673) ! used 4 times + TMP_JAMP(902) = TMP_JAMP(702) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(707) ! used 4 times + TMP_JAMP(901) = TMP_JAMP(702) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(691) ! used 4 times + TMP_JAMP(900) = TMP_JAMP(701) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(776) ! used 4 times + TMP_JAMP(899) = TMP_JAMP(701) - TMP_JAMP(700) ! used 4 times + TMP_JAMP(898) = TMP_JAMP(701) - TMP_JAMP(699) ! used 4 times + TMP_JAMP(897) = TMP_JAMP(700) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(689) ! used 4 times + TMP_JAMP(896) = TMP_JAMP(700) - TMP_JAMP(699) ! used 4 times + TMP_JAMP(895) = TMP_JAMP(699) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(773) ! used 4 times + TMP_JAMP(894) = TMP_JAMP(698) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(666) ! used 4 times + TMP_JAMP(893) = TMP_JAMP(698) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(743) ! used 4 times + TMP_JAMP(892) = TMP_JAMP(697) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(740) ! used 4 times - TMP_JAMP(1456) = TMP_JAMP(1073) + TMP_JAMP(877) ! used 4 times - TMP_JAMP(1455) = TMP_JAMP(1075) + TMP_JAMP(1073) ! used 4 times - TMP_JAMP(1454) = TMP_JAMP(1108) + TMP_JAMP(512) ! used 4 times - TMP_JAMP(1453) = TMP_JAMP(1154) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(507) ! used 4 times - TMP_JAMP(1452) = TMP_JAMP(1154) - TMP_JAMP(1114) ! used 4 times - TMP_JAMP(1451) = AMP(470) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1880) ! used 4 times - TMP_JAMP(1450) = TMP_JAMP(501) + AMP(383) ! used 4 times - TMP_JAMP(1449) = TMP_JAMP(782) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(543) ! used 4 times - TMP_JAMP(1448) = TMP_JAMP(865) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(543) ! used 4 times - TMP_JAMP(1447) = TMP_JAMP(865) - TMP_JAMP(782) ! used 4 times - TMP_JAMP(1446) = TMP_JAMP(1072) + TMP_JAMP(782) ! used 4 times - TMP_JAMP(1445) = TMP_JAMP(1074) - TMP_JAMP(1073) ! used 4 times - TMP_JAMP(1444) = TMP_JAMP(1096) + TMP_JAMP(505) ! used 4 times - TMP_JAMP(1443) = AMP(1270) + AMP(1618) ! used 4 times - TMP_JAMP(1442) = AMP(57) - AMP(1620) ! used 4 times - TMP_JAMP(1441) = TMP_JAMP(1070) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1270) ! used 4 times - TMP_JAMP(1440) = TMP_JAMP(1072) - TMP_JAMP(1070) ! used 4 times - TMP_JAMP(1439) = TMP_JAMP(1075) + TMP_JAMP(1071) ! used 4 times - TMP_JAMP(1438) = TMP_JAMP(1093) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(57) ! used 4 times - TMP_JAMP(1437) = TMP_JAMP(1093) - TMP_JAMP(1071) ! used 4 times - TMP_JAMP(1436) = TMP_JAMP(1150) + TMP_JAMP(1093) ! used 4 times - TMP_JAMP(1435) = AMP(1772) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1799) ! used 4 times - TMP_JAMP(1434) = TMP_JAMP(491) - AMP(825) ! used 4 times - TMP_JAMP(1433) = TMP_JAMP(719) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(570) ! used 4 times - TMP_JAMP(1432) = TMP_JAMP(835) - TMP_JAMP(719) ! used 4 times - TMP_JAMP(1431) = TMP_JAMP(1069) - TMP_JAMP(719) ! used 4 times - TMP_JAMP(1430) = TMP_JAMP(1074) - TMP_JAMP(1069) ! used 4 times - TMP_JAMP(1429) = TMP_JAMP(1128) + TMP_JAMP(494) ! used 4 times - TMP_JAMP(1428) = AMP(907) - AMP(1619) ! used 4 times - TMP_JAMP(1427) = TMP_JAMP(1071) + TMP_JAMP(1068) ! used 4 times - TMP_JAMP(1426) = AMP(1262) + AMP(1612) ! used 4 times - TMP_JAMP(1425) = TMP_JAMP(1070) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1262) ! used 4 times - TMP_JAMP(1424) = TMP_JAMP(1071) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(51) ! used 4 times - TMP_JAMP(1423) = AMP(899) - AMP(1613) ! used 4 times - TMP_JAMP(1422) = TMP_JAMP(1068) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(899) ! used 4 times - TMP_JAMP(1421) = TMP_JAMP(1069) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(864) ! used 4 times - TMP_JAMP(1420) = TMP_JAMP(1070) - TMP_JAMP(1068) ! used 4 times - TMP_JAMP(1419) = AMP(1192) + AMP(1564) ! used 4 times - TMP_JAMP(1418) = AMP(60) - AMP(1566) ! used 4 times - TMP_JAMP(1417) = TMP_JAMP(1074) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1192) ! used 4 times - TMP_JAMP(1416) = TMP_JAMP(1075) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(60) ! used 4 times - TMP_JAMP(1415) = TMP_JAMP(1079) - TMP_JAMP(1074) ! used 4 times - TMP_JAMP(1414) = TMP_JAMP(1080) - TMP_JAMP(1075) ! used 4 times - TMP_JAMP(1413) = TMP_JAMP(1086) - TMP_JAMP(1081) ! used 4 times - TMP_JAMP(1412) = TMP_JAMP(1089) + TMP_JAMP(1080) ! used 4 times - TMP_JAMP(1411) = TMP_JAMP(1072) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(704) ! used 4 times - TMP_JAMP(1410) = AMP(748) - AMP(1565) ! used 4 times - TMP_JAMP(1409) = TMP_JAMP(1073) + ((-0.000000000000000D+00, + TMP_JAMP(891) = TMP_JAMP(697) + ((-0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(748) ! used 4 times - TMP_JAMP(1408) = TMP_JAMP(1076) - TMP_JAMP(1073) ! used 4 times - TMP_JAMP(1407) = TMP_JAMP(1083) - TMP_JAMP(1077) ! used 4 times - TMP_JAMP(1406) = AMP(1410) + AMP(1801) ! used 4 times - TMP_JAMP(1405) = AMP(743) - AMP(1803) ! used 4 times - TMP_JAMP(1404) = TMP_JAMP(484) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1798) ! used 4 times - TMP_JAMP(1403) = TMP_JAMP(485) - AMP(354) ! used 4 times - TMP_JAMP(1402) = TMP_JAMP(511) - AMP(743) ! used 4 times - TMP_JAMP(1401) = TMP_JAMP(513) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(511) ! used 4 times - TMP_JAMP(1400) = TMP_JAMP(531) - TMP_JAMP(515) ! used 4 times - TMP_JAMP(1399) = TMP_JAMP(1021) + TMP_JAMP(488) ! used 4 times - TMP_JAMP(1398) = TMP_JAMP(1067) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(513) ! used 4 times - TMP_JAMP(1397) = AMP(902) - AMP(1802) ! used 4 times - TMP_JAMP(1396) = TMP_JAMP(496) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(492) ! used 4 times - TMP_JAMP(1395) = TMP_JAMP(1067) + TMP_JAMP(1066) ! used 4 times - TMP_JAMP(1394) = TMP_JAMP(1091) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(849) ! used 4 times - TMP_JAMP(1393) = TMP_JAMP(1091) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(609) ! used 4 times - TMP_JAMP(1392) = TMP_JAMP(1091) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1066) ! used 4 times - TMP_JAMP(1391) = TMP_JAMP(1101) - TMP_JAMP(1091) ! used 4 times - TMP_JAMP(1390) = TMP_JAMP(1129) - TMP_JAMP(1126) ! used 4 times - TMP_JAMP(1389) = TMP_JAMP(1066) - AMP(834) ! used 4 times - TMP_JAMP(1388) = TMP_JAMP(1067) + AMP(690) ! used 4 times - TMP_JAMP(1387) = TMP_JAMP(1078) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(690) ! used 4 times - TMP_JAMP(1386) = TMP_JAMP(1078) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1067) ! used 4 times - TMP_JAMP(1385) = TMP_JAMP(1079) - TMP_JAMP(1076) ! used 4 times - TMP_JAMP(1384) = TMP_JAMP(1088) - TMP_JAMP(1078) ! used 4 times - TMP_JAMP(1383) = AMP(1130) + AMP(1522) ! used 4 times - TMP_JAMP(1382) = AMP(115) - AMP(1524) ! used 4 times - TMP_JAMP(1381) = TMP_JAMP(486) + AMP(1151) ! used 4 times - TMP_JAMP(1380) = TMP_JAMP(524) - AMP(61) ! used 4 times - TMP_JAMP(1379) = TMP_JAMP(527) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(524) ! used 4 times - TMP_JAMP(1378) = TMP_JAMP(1064) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(527) ! used 4 times - TMP_JAMP(1377) = TMP_JAMP(1102) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1130) ! used 4 times - TMP_JAMP(1376) = TMP_JAMP(1102) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1065) ! used 4 times - TMP_JAMP(1375) = AMP(1139) + AMP(1528) ! used 4 times - TMP_JAMP(1374) = AMP(127) - AMP(1530) ! used 4 times - TMP_JAMP(1373) = TMP_JAMP(1087) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1139) ! used 4 times - TMP_JAMP(1372) = TMP_JAMP(1087) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1065) ! used 4 times - TMP_JAMP(1371) = TMP_JAMP(1090) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(127) ! used 4 times - TMP_JAMP(1370) = TMP_JAMP(1090) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1064) ! used 4 times - TMP_JAMP(1369) = AMP(1408) + AMP(1882) ! used 4 times - TMP_JAMP(1368) = AMP(1186) - AMP(1884) ! used 4 times - TMP_JAMP(1367) = TMP_JAMP(478) + AMP(512) ! used 4 times - TMP_JAMP(1366) = TMP_JAMP(521) + AMP(1186) ! used 4 times - TMP_JAMP(1365) = TMP_JAMP(529) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(521) ! used 4 times - TMP_JAMP(1364) = TMP_JAMP(989) - TMP_JAMP(480) ! used 4 times - TMP_JAMP(1363) = TMP_JAMP(1063) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(529) ! used 4 times - TMP_JAMP(1362) = AMP(1264) - AMP(1883) ! used 4 times - TMP_JAMP(1361) = TMP_JAMP(502) + AMP(1264) ! used 4 times - TMP_JAMP(1360) = TMP_JAMP(506) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(502) ! used 4 times - TMP_JAMP(1359) = TMP_JAMP(1062) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(506) ! used 4 times - TMP_JAMP(1358) = TMP_JAMP(1063) + TMP_JAMP(1062) ! used 4 times - TMP_JAMP(1357) = TMP_JAMP(1130) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(592) ! used 4 times - TMP_JAMP(1356) = TMP_JAMP(1130) - TMP_JAMP(1098) ! used 4 times - TMP_JAMP(1355) = TMP_JAMP(1062) - AMP(532) ! used 4 times - TMP_JAMP(1354) = TMP_JAMP(1063) + AMP(530) ! used 4 times - TMP_JAMP(1353) = TMP_JAMP(1082) + ((-0.000000000000000D+00 + TMP_JAMP(890) = TMP_JAMP(696) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(705) ! used 4 times + TMP_JAMP(889) = TMP_JAMP(696) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(704) ! used 4 times + TMP_JAMP(888) = TMP_JAMP(695) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(825) ! used 4 times + TMP_JAMP(887) = TMP_JAMP(695) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(902) ! used 4 times + TMP_JAMP(886) = TMP_JAMP(694) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(863) ! used 4 times + TMP_JAMP(885) = TMP_JAMP(694) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(864) ! used 4 times + TMP_JAMP(884) = TMP_JAMP(693) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(907) ! used 4 times + TMP_JAMP(883) = TMP_JAMP(693) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(899) ! used 4 times + TMP_JAMP(882) = TMP_JAMP(692) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(512) ! used 4 times + TMP_JAMP(881) = TMP_JAMP(691) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(514) ! used 4 times + TMP_JAMP(880) = TMP_JAMP(691) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(530) ! used 4 times - TMP_JAMP(1352) = TMP_JAMP(1082) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1063) ! used 4 times - TMP_JAMP(1351) = TMP_JAMP(1085) + TMP_JAMP(1082) ! used 4 times - TMP_JAMP(1350) = AMP(625) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(628) ! used 4 times - TMP_JAMP(1349) = AMP(605) - AMP(1523) ! used 4 times - TMP_JAMP(1348) = TMP_JAMP(479) - AMP(625) ! used 4 times - TMP_JAMP(1347) = TMP_JAMP(1061) - TMP_JAMP(479) ! used 4 times - TMP_JAMP(1346) = TMP_JAMP(1064) + TMP_JAMP(1061) ! used 4 times - TMP_JAMP(1345) = AMP(614) - AMP(1529) ! used 4 times - TMP_JAMP(1344) = TMP_JAMP(1084) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(614) ! used 4 times - TMP_JAMP(1343) = TMP_JAMP(1084) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1061) ! used 4 times - TMP_JAMP(1342) = AMP(985) + AMP(1648) ! used 4 times - TMP_JAMP(1341) = AMP(481) - AMP(1650) ! used 4 times - TMP_JAMP(1340) = AMP(336) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1014) ! used 4 times - TMP_JAMP(1339) = AMP(334) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1012) ! used 4 times - TMP_JAMP(1338) = TMP_JAMP(461) + AMP(331) ! used 4 times - TMP_JAMP(1337) = TMP_JAMP(462) + AMP(325) ! used 4 times - TMP_JAMP(1336) = TMP_JAMP(809) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(481) ! used 4 times - TMP_JAMP(1335) = TMP_JAMP(810) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(985) ! used 4 times - TMP_JAMP(1334) = TMP_JAMP(1058) + TMP_JAMP(810) ! used 4 times - TMP_JAMP(1333) = TMP_JAMP(1060) + TMP_JAMP(809) ! used 4 times - TMP_JAMP(1332) = TMP_JAMP(1060) - TMP_JAMP(1059) ! used 4 times - TMP_JAMP(1331) = TMP_JAMP(1121) + TMP_JAMP(473) ! used 4 times - TMP_JAMP(1330) = TMP_JAMP(1122) + TMP_JAMP(470) ! used 4 times - TMP_JAMP(1329) = AMP(963) + AMP(1762) ! used 4 times - TMP_JAMP(1328) = AMP(648) - AMP(1764) ! used 4 times - TMP_JAMP(1327) = AMP(640) - AMP(1649) ! used 4 times - TMP_JAMP(1326) = TMP_JAMP(790) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(640) ! used 4 times - TMP_JAMP(1325) = TMP_JAMP(1056) + TMP_JAMP(790) ! used 4 times - TMP_JAMP(1324) = TMP_JAMP(1059) - TMP_JAMP(1055) ! used 4 times - TMP_JAMP(1323) = TMP_JAMP(1060) + TMP_JAMP(1056) ! used 4 times - TMP_JAMP(1322) = TMP_JAMP(1105) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(963) ! used 4 times - TMP_JAMP(1321) = TMP_JAMP(1105) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(484) ! used 4 times - TMP_JAMP(1320) = TMP_JAMP(1105) + TMP_JAMP(1055) ! used 4 times - TMP_JAMP(1319) = TMP_JAMP(1106) + ((0.000000000000000D+00, + TMP_JAMP(879) = TMP_JAMP(690) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(548) ! used 4 times + TMP_JAMP(878) = TMP_JAMP(690) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(532) ! used 4 times + TMP_JAMP(877) = TMP_JAMP(689) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(494) ! used 4 times + TMP_JAMP(876) = TMP_JAMP(689) - TMP_JAMP(688) ! used 4 times + TMP_JAMP(875) = TMP_JAMP(689) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(493) ! used 4 times + TMP_JAMP(874) = TMP_JAMP(688) + TMP_JAMP(686) ! used 4 times + TMP_JAMP(873) = TMP_JAMP(688) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(492) ! used 4 times + TMP_JAMP(872) = TMP_JAMP(687) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(648) ! used 4 times - TMP_JAMP(1318) = TMP_JAMP(1106) + TMP_JAMP(1057) ! used 4 times - TMP_JAMP(1317) = TMP_JAMP(1109) - TMP_JAMP(1105) ! used 4 times - TMP_JAMP(1316) = AMP(335) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1013) ! used 4 times - TMP_JAMP(1315) = TMP_JAMP(449) + AMP(328) ! used 4 times - TMP_JAMP(1314) = TMP_JAMP(865) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(653) ! used 4 times - TMP_JAMP(1313) = TMP_JAMP(1054) - TMP_JAMP(865) ! used 4 times - TMP_JAMP(1312) = TMP_JAMP(1058) - TMP_JAMP(1056) ! used 4 times - TMP_JAMP(1311) = TMP_JAMP(1104) + TMP_JAMP(453) ! used 4 times - TMP_JAMP(1310) = AMP(972) + AMP(1708) ! used 4 times - TMP_JAMP(1309) = AMP(489) - AMP(1710) ! used 4 times - TMP_JAMP(1308) = TMP_JAMP(1054) - TMP_JAMP(1052) ! used 4 times - TMP_JAMP(1307) = TMP_JAMP(1097) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(972) ! used 4 times - TMP_JAMP(1306) = TMP_JAMP(1097) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(579) ! used 4 times - TMP_JAMP(1305) = TMP_JAMP(1097) + TMP_JAMP(1052) ! used 4 times - TMP_JAMP(1304) = TMP_JAMP(1098) + ((0.000000000000000D+00, + TMP_JAMP(871) = TMP_JAMP(687) + TMP_JAMP(686) ! used 4 times + TMP_JAMP(870) = TMP_JAMP(687) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(642) ! used 4 times + TMP_JAMP(869) = TMP_JAMP(686) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(651) ! used 4 times + TMP_JAMP(868) = TMP_JAMP(685) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(653) ! used 4 times + TMP_JAMP(867) = TMP_JAMP(685) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(652) ! used 4 times + TMP_JAMP(866) = TMP_JAMP(684) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(489) ! used 4 times - TMP_JAMP(1303) = TMP_JAMP(1098) + TMP_JAMP(1053) ! used 4 times - TMP_JAMP(1302) = TMP_JAMP(1100) - TMP_JAMP(1097) ! used 4 times - TMP_JAMP(1301) = AMP(807) - AMP(1763) ! used 4 times - TMP_JAMP(1300) = TMP_JAMP(707) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(610) ! used 4 times - TMP_JAMP(1299) = TMP_JAMP(766) - TMP_JAMP(707) ! used 4 times - TMP_JAMP(1298) = TMP_JAMP(1050) - TMP_JAMP(707) ! used 4 times - TMP_JAMP(1297) = TMP_JAMP(1051) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(807) ! used 4 times - TMP_JAMP(1296) = TMP_JAMP(1057) + TMP_JAMP(1051) ! used 4 times - TMP_JAMP(1295) = TMP_JAMP(1058) - TMP_JAMP(1050) ! used 4 times - TMP_JAMP(1294) = AMP(810) - AMP(1709) ! used 4 times - TMP_JAMP(1293) = TMP_JAMP(1053) + TMP_JAMP(1049) ! used 4 times - TMP_JAMP(1292) = AMP(994) + AMP(1702) ! used 4 times - TMP_JAMP(1291) = AMP(483) - AMP(1704) ! used 4 times - TMP_JAMP(1290) = TMP_JAMP(1052) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(994) ! used 4 times - TMP_JAMP(1289) = TMP_JAMP(1053) + ((-0.000000000000000D+00 + TMP_JAMP(865) = TMP_JAMP(684) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(483) ! used 4 times - TMP_JAMP(1288) = TMP_JAMP(1059) + TMP_JAMP(1053) ! used 4 times - TMP_JAMP(1287) = AMP(799) - AMP(1703) ! used 4 times - TMP_JAMP(1286) = TMP_JAMP(1049) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(799) ! used 4 times - TMP_JAMP(1285) = TMP_JAMP(1055) - TMP_JAMP(1051) ! used 4 times - TMP_JAMP(1284) = TMP_JAMP(1050) + ((0.000000000000000D+00, + TMP_JAMP(864) = TMP_JAMP(683) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(650) ! used 4 times + TMP_JAMP(863) = TMP_JAMP(683) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(812) ! used 4 times - TMP_JAMP(1283) = TMP_JAMP(1052) - TMP_JAMP(1049) ! used 4 times - TMP_JAMP(1282) = AMP(981) + AMP(1654) ! used 4 times - TMP_JAMP(1281) = AMP(492) - AMP(1656) ! used 4 times - TMP_JAMP(1280) = TMP_JAMP(1058) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(981) ! used 4 times - TMP_JAMP(1279) = TMP_JAMP(1083) - TMP_JAMP(1058) ! used 4 times - TMP_JAMP(1278) = TMP_JAMP(1085) - TMP_JAMP(1060) ! used 4 times - TMP_JAMP(1277) = TMP_JAMP(1086) + TMP_JAMP(1083) ! used 4 times - TMP_JAMP(1276) = TMP_JAMP(1054) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(652) ! used 4 times - TMP_JAMP(1275) = TMP_JAMP(1057) + TMP_JAMP(1054) ! used 4 times - TMP_JAMP(1274) = AMP(651) - AMP(1655) ! used 4 times - TMP_JAMP(1273) = TMP_JAMP(1077) - TMP_JAMP(1056) ! used 4 times - TMP_JAMP(1272) = AMP(642) - AMP(1758) ! used 4 times - TMP_JAMP(1271) = TMP_JAMP(1055) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1003) ! used 4 times - TMP_JAMP(1270) = TMP_JAMP(1067) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1057) ! used 4 times - TMP_JAMP(1269) = AMP(801) - AMP(1757) ! used 4 times - TMP_JAMP(1268) = TMP_JAMP(1066) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1051) ! used 4 times - TMP_JAMP(1267) = TMP_JAMP(1059) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(493) ! used 4 times - TMP_JAMP(1266) = TMP_JAMP(1061) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1059) ! used 4 times - TMP_JAMP(1265) = TMP_JAMP(1065) - TMP_JAMP(1061) ! used 4 times - TMP_JAMP(1264) = AMP(1001) + AMP(1009) ! used 4 times - TMP_JAMP(1263) = AMP(983) - AMP(1011) ! used 4 times - TMP_JAMP(1262) = TMP_JAMP(471) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(465) ! used 4 times - TMP_JAMP(1261) = TMP_JAMP(474) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(463) ! used 4 times - TMP_JAMP(1260) = TMP_JAMP(1048) - TMP_JAMP(1047) ! used 4 times - TMP_JAMP(1259) = AMP(992) - AMP(1010) ! used 4 times - TMP_JAMP(1258) = TMP_JAMP(450) + AMP(992) ! used 4 times - TMP_JAMP(1257) = TMP_JAMP(454) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(450) ! used 4 times - TMP_JAMP(1256) = TMP_JAMP(1046) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(454) ! used 4 times - TMP_JAMP(1255) = TMP_JAMP(1048) + TMP_JAMP(1046) ! used 4 times - TMP_JAMP(1254) = TMP_JAMP(1046) + AMP(142) ! used 4 times - TMP_JAMP(1253) = TMP_JAMP(1047) + TMP_JAMP(1046) ! used 4 times - TMP_JAMP(1252) = TMP_JAMP(1089) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(133) ! used 4 times - TMP_JAMP(1251) = TMP_JAMP(1089) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1048) ! used 4 times - TMP_JAMP(1250) = TMP_JAMP(1064) + AMP(148) ! used 4 times - TMP_JAMP(1249) = TMP_JAMP(1064) + TMP_JAMP(1047) ! used 4 times - TMP_JAMP(1248) = AMP(266) + AMP(289) ! used 4 times - TMP_JAMP(1247) = AMP(186) - AMP(291) ! used 4 times - TMP_JAMP(1246) = AMP(172) + AMP(175) ! used 4 times - TMP_JAMP(1245) = AMP(166) - AMP(177) ! used 4 times - TMP_JAMP(1244) = TMP_JAMP(745) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(186) ! used 4 times - TMP_JAMP(1243) = TMP_JAMP(747) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(266) ! used 4 times - TMP_JAMP(1242) = TMP_JAMP(1043) - TMP_JAMP(747) ! used 4 times - TMP_JAMP(1241) = TMP_JAMP(1044) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(195) ! used 4 times - TMP_JAMP(1240) = TMP_JAMP(1045) - TMP_JAMP(745) ! used 4 times - TMP_JAMP(1239) = TMP_JAMP(1045) + TMP_JAMP(1044) ! used 4 times - TMP_JAMP(1238) = TMP_JAMP(1114) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(172) ! used 4 times - TMP_JAMP(1237) = TMP_JAMP(1114) - TMP_JAMP(1041) ! used 4 times - TMP_JAMP(1236) = TMP_JAMP(1115) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(166) ! used 4 times - TMP_JAMP(1235) = TMP_JAMP(1115) - TMP_JAMP(1042) ! used 4 times - TMP_JAMP(1234) = AMP(239) + AMP(310) ! used 4 times - TMP_JAMP(1233) = AMP(206) - AMP(312) ! used 4 times - TMP_JAMP(1232) = AMP(204) - AMP(290) ! used 4 times - TMP_JAMP(1231) = TMP_JAMP(727) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(204) ! used 4 times - TMP_JAMP(1230) = TMP_JAMP(1040) - TMP_JAMP(727) ! used 4 times - TMP_JAMP(1229) = TMP_JAMP(1045) + TMP_JAMP(1040) ! used 4 times - TMP_JAMP(1228) = TMP_JAMP(1109) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(239) ! used 4 times - TMP_JAMP(1227) = TMP_JAMP(1109) + TMP_JAMP(1038) ! used 4 times - TMP_JAMP(1226) = TMP_JAMP(1110) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(206) ! used 4 times - TMP_JAMP(1225) = TMP_JAMP(1110) + TMP_JAMP(1039) ! used 4 times - TMP_JAMP(1224) = AMP(169) - AMP(176) ! used 4 times - TMP_JAMP(1223) = TMP_JAMP(835) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(213) ! used 4 times - TMP_JAMP(1222) = TMP_JAMP(1036) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(169) ! used 4 times - TMP_JAMP(1221) = TMP_JAMP(1037) - TMP_JAMP(835) ! used 4 times - TMP_JAMP(1220) = TMP_JAMP(1037) + TMP_JAMP(1036) ! used 4 times - TMP_JAMP(1219) = TMP_JAMP(1040) + TMP_JAMP(1037) ! used 4 times - TMP_JAMP(1218) = TMP_JAMP(1042) + TMP_JAMP(1036) ! used 4 times - TMP_JAMP(1217) = TMP_JAMP(1043) - TMP_JAMP(1040) ! used 4 times - TMP_JAMP(1216) = AMP(248) + AMP(301) ! used 4 times - TMP_JAMP(1215) = AMP(188) - AMP(303) ! used 4 times - TMP_JAMP(1214) = TMP_JAMP(1100) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(248) ! used 4 times - TMP_JAMP(1213) = TMP_JAMP(1100) + TMP_JAMP(1034) ! used 4 times - TMP_JAMP(1212) = TMP_JAMP(1101) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(188) ! used 4 times - TMP_JAMP(1211) = TMP_JAMP(1101) + TMP_JAMP(1035) ! used 4 times - TMP_JAMP(1210) = AMP(224) - AMP(311) ! used 4 times - TMP_JAMP(1209) = TMP_JAMP(766) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(229) ! used 4 times - TMP_JAMP(1208) = TMP_JAMP(1032) + TMP_JAMP(766) ! used 4 times - TMP_JAMP(1207) = TMP_JAMP(1033) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(224) ! used 4 times - TMP_JAMP(1206) = TMP_JAMP(1039) + TMP_JAMP(1033) ! used 4 times - TMP_JAMP(1205) = TMP_JAMP(1043) + TMP_JAMP(1032) ! used 4 times - TMP_JAMP(1204) = AMP(227) - AMP(302) ! used 4 times - TMP_JAMP(1203) = TMP_JAMP(1035) + TMP_JAMP(1031) ! used 4 times - TMP_JAMP(1202) = AMP(275) + AMP(298) ! used 4 times - TMP_JAMP(1201) = AMP(184) - AMP(300) ! used 4 times - TMP_JAMP(1200) = TMP_JAMP(1034) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(275) ! used 4 times - TMP_JAMP(1199) = TMP_JAMP(1035) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(184) ! used 4 times - TMP_JAMP(1198) = TMP_JAMP(1041) - TMP_JAMP(1036) ! used 4 times - TMP_JAMP(1197) = TMP_JAMP(1044) - TMP_JAMP(1035) ! used 4 times - TMP_JAMP(1196) = AMP(222) - AMP(299) ! used 4 times - TMP_JAMP(1195) = TMP_JAMP(1031) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(222) ! used 4 times - TMP_JAMP(1194) = TMP_JAMP(1038) - TMP_JAMP(1033) ! used 4 times - TMP_JAMP(1193) = TMP_JAMP(1032) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(231) ! used 4 times - TMP_JAMP(1192) = TMP_JAMP(1034) - TMP_JAMP(1031) ! used 4 times - TMP_JAMP(1191) = TMP_JAMP(1042) + TMP_JAMP(1032) ! used 4 times - TMP_JAMP(1190) = AMP(257) + AMP(292) ! used 4 times - TMP_JAMP(1189) = AMP(191) - AMP(294) ! used 4 times - TMP_JAMP(1188) = TMP_JAMP(1086) + TMP_JAMP(1043) ! used 4 times - TMP_JAMP(1187) = TMP_JAMP(1088) + TMP_JAMP(1045) ! used 4 times - TMP_JAMP(1186) = TMP_JAMP(1037) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(211) ! used 4 times - TMP_JAMP(1185) = TMP_JAMP(1039) - TMP_JAMP(1037) ! used 4 times - TMP_JAMP(1184) = AMP(209) - AMP(293) ! used 4 times - TMP_JAMP(1183) = TMP_JAMP(1081) + TMP_JAMP(1040) ! used 4 times - TMP_JAMP(1182) = AMP(284) + AMP(307) ! used 4 times - TMP_JAMP(1181) = AMP(202) - AMP(309) ! used 4 times - TMP_JAMP(1180) = TMP_JAMP(1038) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(284) ! used 4 times - TMP_JAMP(1179) = TMP_JAMP(1041) + TMP_JAMP(1038) ! used 4 times - TMP_JAMP(1178) = TMP_JAMP(1063) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1039) ! used 4 times - TMP_JAMP(1177) = AMP(220) - AMP(308) ! used 4 times - TMP_JAMP(1176) = TMP_JAMP(1062) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1033) ! used 4 times - TMP_JAMP(1175) = TMP_JAMP(1065) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1044) ! used 4 times - TMP_JAMP(1174) = AMP(278) + AMP(313) ! used 4 times - TMP_JAMP(1173) = AMP(260) - AMP(315) ! used 4 times - TMP_JAMP(1172) = TMP_JAMP(1047) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1041) ! used 4 times - TMP_JAMP(1171) = TMP_JAMP(1048) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1042) ! used 4 times - TMP_JAMP(1170) = AMP(269) - AMP(314) ! used 4 times - TMP_JAMP(1169) = TMP_JAMP(1046) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1036) ! used 4 times - TMP_JAMP(1905) = TMP_JAMP(1821) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1513) ! used 4 times - TMP_JAMP(1904) = TMP_JAMP(1781) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1575) ! used 4 times - TMP_JAMP(1903) = TMP_JAMP(1739) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(854) ! used 4 times - TMP_JAMP(1902) = TMP_JAMP(1735) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1731) ! used 4 times - TMP_JAMP(1901) = TMP_JAMP(1721) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1716) ! used 4 times - TMP_JAMP(1900) = TMP_JAMP(1686) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1632) ! used 4 times - TMP_JAMP(1899) = TMP_JAMP(1675) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1828) ! used 4 times - TMP_JAMP(1898) = TMP_JAMP(1645) - TMP_JAMP(1642) ! used 4 times - TMP_JAMP(1897) = TMP_JAMP(1646) + TMP_JAMP(1641) ! used 4 times - TMP_JAMP(1896) = TMP_JAMP(1630) + TMP_JAMP(1627) ! used 4 times - TMP_JAMP(1895) = TMP_JAMP(1617) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(655) ! used 4 times - TMP_JAMP(1894) = TMP_JAMP(1605) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(636) ! used 4 times - TMP_JAMP(1893) = TMP_JAMP(1593) - AMP(1437) ! used 4 times - TMP_JAMP(1892) = TMP_JAMP(1594) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(623) ! used 4 times - TMP_JAMP(1891) = TMP_JAMP(1598) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1595) ! used 4 times - TMP_JAMP(1890) = TMP_JAMP(1589) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(617) ! used 4 times - TMP_JAMP(1889) = TMP_JAMP(1585) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1578) ! used 4 times - TMP_JAMP(1888) = TMP_JAMP(1586) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1579) ! used 4 times - TMP_JAMP(1887) = TMP_JAMP(1568) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1647) ! used 4 times - TMP_JAMP(1886) = TMP_JAMP(1573) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1565) ! used 4 times - TMP_JAMP(1885) = TMP_JAMP(1574) - TMP_JAMP(1570) ! used 4 times - TMP_JAMP(1884) = TMP_JAMP(1555) + AMP(1280) ! used 4 times - TMP_JAMP(1883) = TMP_JAMP(1559) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1556) ! used 4 times - TMP_JAMP(1882) = TMP_JAMP(1560) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1557) ! used 4 times - TMP_JAMP(1881) = TMP_JAMP(1538) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1534) ! used 4 times - TMP_JAMP(1880) = TMP_JAMP(1529) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(574) ! used 4 times - TMP_JAMP(1879) = TMP_JAMP(1525) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(570) ! used 4 times - TMP_JAMP(1878) = TMP_JAMP(1526) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1523) ! used 4 times - TMP_JAMP(1877) = TMP_JAMP(1511) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1509) ! used 4 times - TMP_JAMP(1876) = TMP_JAMP(1512) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1510) ! used 4 times - TMP_JAMP(1875) = TMP_JAMP(1507) + TMP_JAMP(1506) ! used 4 times - TMP_JAMP(1874) = TMP_JAMP(1508) - TMP_JAMP(1505) ! used 4 times - TMP_JAMP(1873) = TMP_JAMP(1499) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1497) ! used 4 times - TMP_JAMP(1872) = TMP_JAMP(1491) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(543) ! used 4 times - TMP_JAMP(1871) = TMP_JAMP(1492) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1490) ! used 4 times - TMP_JAMP(1870) = TMP_JAMP(1480) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1479) ! used 4 times - TMP_JAMP(1869) = TMP_JAMP(1478) - TMP_JAMP(1477) ! used 4 times - TMP_JAMP(1868) = TMP_JAMP(1468) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1558) ! used 4 times - TMP_JAMP(1867) = TMP_JAMP(1472) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1467) ! used 4 times - TMP_JAMP(1866) = TMP_JAMP(1475) - TMP_JAMP(1471) ! used 4 times - TMP_JAMP(1865) = TMP_JAMP(1462) + TMP_JAMP(1459) ! used 4 times - TMP_JAMP(1864) = TMP_JAMP(1451) - TMP_JAMP(1450) ! used 4 times - TMP_JAMP(1863) = TMP_JAMP(1435) + TMP_JAMP(1434) ! used 4 times - TMP_JAMP(1862) = TMP_JAMP(1424) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1614) ! used 4 times - TMP_JAMP(1861) = TMP_JAMP(1403) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1399) ! used 4 times - TMP_JAMP(1860) = TMP_JAMP(1404) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1400) ! used 4 times - TMP_JAMP(1859) = TMP_JAMP(1397) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1396) ! used 4 times - TMP_JAMP(1858) = TMP_JAMP(1381) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1777) ! used 4 times - TMP_JAMP(1857) = TMP_JAMP(1367) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1364) ! used 4 times - TMP_JAMP(1856) = TMP_JAMP(1339) + TMP_JAMP(1338) ! used 4 times - TMP_JAMP(1855) = TMP_JAMP(1340) - TMP_JAMP(1337) ! used 4 times - TMP_JAMP(1854) = TMP_JAMP(1316) - TMP_JAMP(1315) ! used 4 times - TMP_JAMP(1853) = TMP_JAMP(1281) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1278) ! used 4 times - TMP_JAMP(1852) = TMP_JAMP(1274) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1273) ! used 4 times - TMP_JAMP(1851) = TMP_JAMP(1271) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1756) ! used 4 times - TMP_JAMP(1850) = TMP_JAMP(1272) + TMP_JAMP(1270) ! used 4 times - TMP_JAMP(1849) = TMP_JAMP(1269) + TMP_JAMP(1268) ! used 4 times - TMP_JAMP(1848) = TMP_JAMP(1263) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1262) ! used 4 times - TMP_JAMP(1847) = TMP_JAMP(1264) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1261) ! used 4 times - TMP_JAMP(1846) = TMP_JAMP(1189) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1187) ! used 4 times - TMP_JAMP(1845) = TMP_JAMP(1190) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1188) ! used 4 times - TMP_JAMP(1844) = TMP_JAMP(1184) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1183) ! used 4 times - TMP_JAMP(1843) = TMP_JAMP(1181) + TMP_JAMP(1178) ! used 4 times - TMP_JAMP(1842) = TMP_JAMP(1177) + TMP_JAMP(1176) ! used 4 times - TMP_JAMP(1841) = TMP_JAMP(1175) + AMP(193) ! used 4 times - TMP_JAMP(1840) = TMP_JAMP(1173) - TMP_JAMP(1171) ! used 4 times - TMP_JAMP(1839) = TMP_JAMP(1174) + TMP_JAMP(1172) ! used 4 times - TMP_JAMP(1838) = TMP_JAMP(1170) - TMP_JAMP(1169) ! used 4 times - TMP_JAMP(1928) = TMP_JAMP(1897) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1637) ! used 4 times - TMP_JAMP(1927) = TMP_JAMP(1898) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1636) ! used 4 times - TMP_JAMP(1926) = TMP_JAMP(1896) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1626) ! used 4 times - TMP_JAMP(1925) = TMP_JAMP(1888) - AMP(448) ! used 4 times - TMP_JAMP(1924) = TMP_JAMP(1889) + AMP(450) ! used 4 times - TMP_JAMP(1923) = TMP_JAMP(1882) + AMP(1657) ! used 4 times - TMP_JAMP(1922) = TMP_JAMP(1883) - AMP(1659) ! used 4 times - TMP_JAMP(1921) = TMP_JAMP(1878) + AMP(449) ! used 4 times - TMP_JAMP(1920) = TMP_JAMP(1876) - AMP(1576) ! used 4 times - TMP_JAMP(1919) = TMP_JAMP(1877) + AMP(1578) ! used 4 times - TMP_JAMP(1918) = TMP_JAMP(1871) - AMP(1658) ! used 4 times - TMP_JAMP(1917) = TMP_JAMP(1870) + AMP(1577) ! used 4 times - TMP_JAMP(1916) = TMP_JAMP(1866) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1464) ! used 4 times - TMP_JAMP(1915) = TMP_JAMP(1867) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1416) ! used 4 times - TMP_JAMP(1914) = TMP_JAMP(1865) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1454) ! used 4 times - TMP_JAMP(1913) = TMP_JAMP(1864) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1444) ! used 4 times - TMP_JAMP(1912) = TMP_JAMP(1863) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1429) ! used 4 times - TMP_JAMP(1911) = TMP_JAMP(1860) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1879) ! used 4 times - TMP_JAMP(1910) = TMP_JAMP(1861) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(440) ! used 4 times - TMP_JAMP(1909) = TMP_JAMP(1857) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1068) ! used 4 times - TMP_JAMP(1908) = TMP_JAMP(1855) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1330) ! used 4 times - TMP_JAMP(1907) = TMP_JAMP(1856) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1331) ! used 4 times - TMP_JAMP(1906) = TMP_JAMP(1854) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1311) ! used 4 times - TMP_JAMP(1989) = TMP_JAMP(1167) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(528) ! used 3 times - TMP_JAMP(1988) = TMP_JAMP(1799) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1156) ! used 3 times - TMP_JAMP(1987) = TMP_JAMP(1827) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(453) ! used 3 times - TMP_JAMP(1986) = TMP_JAMP(1830) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(352) ! used 3 times - TMP_JAMP(1985) = TMP_JAMP(970) - AMP(1202) ! used 3 times - TMP_JAMP(1984) = TMP_JAMP(1166) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(909) ! used 3 times - TMP_JAMP(1983) = TMP_JAMP(1812) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(511) ! used 3 times - TMP_JAMP(1982) = TMP_JAMP(1165) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(370) ! used 3 times - TMP_JAMP(1981) = TMP_JAMP(1164) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(670) ! used 3 times - TMP_JAMP(1980) = TMP_JAMP(893) - AMP(432) ! used 3 times - TMP_JAMP(1979) = TMP_JAMP(1166) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(687) ! used 3 times - TMP_JAMP(1978) = TMP_JAMP(926) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(919) ! used 3 times - TMP_JAMP(1977) = TMP_JAMP(1737) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(864) ! used 3 times - TMP_JAMP(1976) = TMP_JAMP(1902) + TMP_JAMP(837) ! used 3 times - TMP_JAMP(1975) = TMP_JAMP(875) - AMP(1058) ! used 3 times - TMP_JAMP(1974) = TMP_JAMP(833) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(527) ! used 3 times - TMP_JAMP(1973) = TMP_JAMP(792) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(789) ! used 3 times - TMP_JAMP(1972) = TMP_JAMP(1704) - TMP_JAMP(785) ! used 3 times - TMP_JAMP(1971) = TMP_JAMP(1708) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1695) ! used 3 times - TMP_JAMP(1970) = TMP_JAMP(864) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(779) ! used 3 times - TMP_JAMP(1969) = TMP_JAMP(1690) - TMP_JAMP(1013) ! used 3 times - TMP_JAMP(1968) = TMP_JAMP(1669) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1156) ! used 3 times - TMP_JAMP(1967) = TMP_JAMP(833) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(716) ! used 3 times - TMP_JAMP(1966) = TMP_JAMP(1571) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1566) ! used 3 times - TMP_JAMP(1965) = TMP_JAMP(1581) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(610) ! used 3 times - TMP_JAMP(1964) = TMP_JAMP(1553) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1549) ! used 3 times - TMP_JAMP(1963) = TMP_JAMP(1532) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1530) ! used 3 times - TMP_JAMP(1962) = TMP_JAMP(1576) + AMP(1112) ! used 3 times - TMP_JAMP(1961) = TMP_JAMP(1522) - TMP_JAMP(1519) ! used 3 times - TMP_JAMP(1960) = TMP_JAMP(1535) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1496) ! used 3 times - TMP_JAMP(1959) = TMP_JAMP(1489) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1486) ! used 3 times - TMP_JAMP(1958) = TMP_JAMP(1554) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1548) ! used 3 times - TMP_JAMP(1957) = TMP_JAMP(1469) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1560) ! used 3 times - TMP_JAMP(1956) = TMP_JAMP(1446) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(705) ! used 3 times - TMP_JAMP(1955) = TMP_JAMP(1575) + TMP_JAMP(1569) ! used 3 times - TMP_JAMP(1954) = TMP_JAMP(864) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(675) ! used 3 times - TMP_JAMP(1953) = TMP_JAMP(1405) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1401) ! used 3 times - TMP_JAMP(1952) = TMP_JAMP(1379) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(61) ! used 3 times - TMP_JAMP(1951) = TMP_JAMP(833) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(514) ! used 3 times - TMP_JAMP(1950) = TMP_JAMP(1362) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1360) ! used 3 times - TMP_JAMP(1949) = TMP_JAMP(1368) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1365) ! used 3 times - TMP_JAMP(1948) = TMP_JAMP(1348) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(628) ! used 3 times - TMP_JAMP(1947) = TMP_JAMP(1382) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1132) ! used 3 times - TMP_JAMP(1946) = TMP_JAMP(1336) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1650) ! used 3 times - TMP_JAMP(1945) = TMP_JAMP(794) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(788) ! used 3 times - TMP_JAMP(1944) = TMP_JAMP(1326) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1649) ! used 3 times - TMP_JAMP(1943) = TMP_JAMP(1313) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(653) ! used 3 times - TMP_JAMP(1942) = TMP_JAMP(1335) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1648) ! used 3 times - TMP_JAMP(1941) = TMP_JAMP(777) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(772) ! used 3 times - TMP_JAMP(1940) = TMP_JAMP(731) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(725) ! used 3 times - TMP_JAMP(1939) = TMP_JAMP(1319) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1764) ! used 3 times - TMP_JAMP(1938) = TMP_JAMP(714) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(711) ! used 3 times - TMP_JAMP(1937) = TMP_JAMP(1377) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1522) ! used 3 times - TMP_JAMP(1936) = TMP_JAMP(1373) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1528) ! used 3 times - TMP_JAMP(1935) = TMP_JAMP(760) + AMP(136) ! used 3 times - TMP_JAMP(1934) = TMP_JAMP(1259) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1257) ! used 3 times - TMP_JAMP(1933) = TMP_JAMP(1231) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(290) ! used 3 times - TMP_JAMP(1932) = TMP_JAMP(1236) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(177) ! used 3 times - TMP_JAMP(1931) = TMP_JAMP(1243) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(289) ! used 3 times - TMP_JAMP(1930) = TMP_JAMP(1226) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(312) ! used 3 times - TMP_JAMP(1929) = TMP_JAMP(1238) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(175) ! used 3 times - TMP_JAMP(1991) = TMP_JAMP(1969) + AMP(1458) ! used 3 times - TMP_JAMP(1990) = TMP_JAMP(1935) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1644) ! used 3 times - TMP_JAMP(2641) = AMP(150) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(436) ! used 2 times - TMP_JAMP(2640) = TMP_JAMP(1831) - AMP(83) ! used 2 times - TMP_JAMP(2639) = TMP_JAMP(1836) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(415) ! used 2 times - TMP_JAMP(2638) = TMP_JAMP(1837) + AMP(150) ! used 2 times - TMP_JAMP(2637) = TMP_JAMP(1905) + AMP(349) ! used 2 times - TMP_JAMP(2636) = AMP(150) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1060) ! used 2 times - TMP_JAMP(2635) = TMP_JAMP(1814) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1802) ! used 2 times - TMP_JAMP(2634) = TMP_JAMP(1815) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1801) ! used 2 times - TMP_JAMP(2633) = TMP_JAMP(1820) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1816) ! used 2 times - TMP_JAMP(2632) = TMP_JAMP(1796) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1514) ! used 2 times - TMP_JAMP(2631) = TMP_JAMP(1798) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(369) ! used 2 times - TMP_JAMP(2630) = TMP_JAMP(1800) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1791) ! used 2 times - TMP_JAMP(2629) = TMP_JAMP(1804) + TMP_JAMP(1796) ! used 2 times - TMP_JAMP(2628) = TMP_JAMP(1905) - TMP_JAMP(1804) ! used 2 times - TMP_JAMP(2627) = TMP_JAMP(1986) + AMP(444) ! used 2 times - TMP_JAMP(2626) = TMP_JAMP(1987) + TMP_JAMP(1793) ! used 2 times - TMP_JAMP(2625) = TMP_JAMP(1989) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1988) ! used 2 times - TMP_JAMP(2624) = TMP_JAMP(931) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(92) ! used 2 times - TMP_JAMP(2623) = TMP_JAMP(945) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(938) ! used 2 times - TMP_JAMP(2622) = TMP_JAMP(1782) + AMP(1206) ! used 2 times - TMP_JAMP(2621) = TMP_JAMP(1784) - AMP(1460) ! used 2 times - TMP_JAMP(2620) = TMP_JAMP(1785) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(931) ! used 2 times - TMP_JAMP(2619) = TMP_JAMP(1787) - TMP_JAMP(1785) ! used 2 times - TMP_JAMP(2618) = TMP_JAMP(1788) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1786) ! used 2 times - TMP_JAMP(2617) = TMP_JAMP(1833) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1787) ! used 2 times - TMP_JAMP(2616) = TMP_JAMP(1904) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1833) ! used 2 times - TMP_JAMP(2615) = AMP(1065) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1765) ! used 2 times - TMP_JAMP(2614) = TMP_JAMP(1772) - AMP(1065) ! used 2 times - TMP_JAMP(2613) = TMP_JAMP(1774) + TMP_JAMP(1773) ! used 2 times - TMP_JAMP(2612) = TMP_JAMP(1776) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1767) ! used 2 times - TMP_JAMP(2611) = TMP_JAMP(1777) - AMP(1121) ! used 2 times - TMP_JAMP(2610) = TMP_JAMP(1777) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(686) ! used 2 times - TMP_JAMP(2609) = TMP_JAMP(1814) + TMP_JAMP(1779) ! used 2 times - TMP_JAMP(2608) = TMP_JAMP(1983) - TMP_JAMP(1772) ! used 2 times - TMP_JAMP(2607) = TMP_JAMP(1984) - TMP_JAMP(1905) ! used 2 times - TMP_JAMP(2606) = AMP(686) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(761) ! used 2 times - TMP_JAMP(2605) = TMP_JAMP(1152) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(898) ! used 2 times - TMP_JAMP(2604) = TMP_JAMP(1904) - TMP_JAMP(1152) ! used 2 times - TMP_JAMP(2603) = AMP(82) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(144) ! used 2 times - TMP_JAMP(2602) = TMP_JAMP(886) - TMP_JAMP(883) ! used 2 times - TMP_JAMP(2601) = TMP_JAMP(1763) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(367) ! used 2 times - TMP_JAMP(2600) = TMP_JAMP(1820) + TMP_JAMP(1155) ! used 2 times - TMP_JAMP(2599) = TMP_JAMP(1836) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1756) ! used 2 times - TMP_JAMP(2598) = TMP_JAMP(1982) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1762) ! used 2 times - TMP_JAMP(2597) = TMP_JAMP(1988) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1755) ! used 2 times - TMP_JAMP(2596) = TMP_JAMP(1746) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(144) ! used 2 times - TMP_JAMP(2595) = TMP_JAMP(1752) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1746) ! used 2 times - TMP_JAMP(2594) = TMP_JAMP(1761) - TMP_JAMP(869) ! used 2 times - TMP_JAMP(2593) = TMP_JAMP(1779) - TMP_JAMP(1751) ! used 2 times - TMP_JAMP(2592) = TMP_JAMP(1816) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1803) ! used 2 times - TMP_JAMP(2591) = TMP_JAMP(1981) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1752) ! used 2 times - TMP_JAMP(2590) = TMP_JAMP(853) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(351) ! used 2 times - TMP_JAMP(2589) = TMP_JAMP(855) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(435) ! used 2 times - TMP_JAMP(2588) = TMP_JAMP(860) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(792) ! used 2 times - TMP_JAMP(2587) = TMP_JAMP(974) + ((-0.000000000000000D+00 + TMP_JAMP(862) = TMP_JAMP(682) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(807) ! used 4 times + TMP_JAMP(861) = TMP_JAMP(682) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(801) ! used 4 times + TMP_JAMP(860) = TMP_JAMP(681) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(810) ! used 4 times + TMP_JAMP(859) = TMP_JAMP(681) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(799) ! used 4 times + TMP_JAMP(858) = TMP_JAMP(676) - AMP(614) ! used 4 times + TMP_JAMP(857) = TMP_JAMP(675) + AMP(625) ! used 4 times + TMP_JAMP(856) = TMP_JAMP(668) - AMP(690) ! used 4 times + TMP_JAMP(855) = AMP(475) - AMP(477) ! used 4 times + TMP_JAMP(854) = AMP(474) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(477) ! used 4 times + TMP_JAMP(853) = AMP(472) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(475) ! used 4 times + TMP_JAMP(852) = AMP(593) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(622) ! used 4 times + TMP_JAMP(851) = AMP(622) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(627) ! used 4 times + TMP_JAMP(850) = AMP(678) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(686) ! used 4 times + TMP_JAMP(849) = AMP(678) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(767) ! used 4 times + TMP_JAMP(848) = AMP(476) + AMP(477) ! used 4 times + TMP_JAMP(847) = AMP(473) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(476) ! used 4 times + TMP_JAMP(846) = AMP(534) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(536) ! used 4 times + TMP_JAMP(845) = AMP(695) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(708) ! used 4 times + TMP_JAMP(844) = AMP(536) + AMP(695) ! used 4 times + TMP_JAMP(843) = AMP(693) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(695) ! used 4 times + TMP_JAMP(842) = AMP(536) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(549) ! used 4 times + TMP_JAMP(841) = AMP(923) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(941) ! used 4 times + TMP_JAMP(840) = AMP(932) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(941) ! used 4 times + TMP_JAMP(839) = AMP(475) + AMP(476) ! used 4 times + TMP_JAMP(838) = AMP(481) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(536) ! used 4 times + TMP_JAMP(837) = AMP(640) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(695) ! used 4 times + TMP_JAMP(836) = AMP(811) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(941) ! used 4 times + TMP_JAMP(1062) = TMP_JAMP(1037) - TMP_JAMP(857) ! used 4 times + TMP_JAMP(1061) = TMP_JAMP(993) + TMP_JAMP(762) ! used 4 times + TMP_JAMP(1060) = TMP_JAMP(979) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(788) ! used 4 times + TMP_JAMP(1059) = TMP_JAMP(959) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(795) ! used 4 times + TMP_JAMP(1058) = TMP_JAMP(958) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(795) ! used 4 times + TMP_JAMP(1057) = TMP_JAMP(952) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(788) ! used 4 times + TMP_JAMP(1056) = TMP_JAMP(940) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(752) ! used 4 times + TMP_JAMP(1055) = TMP_JAMP(932) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(762) ! used 4 times + TMP_JAMP(1054) = TMP_JAMP(929) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(750) ! used 4 times + TMP_JAMP(1053) = TMP_JAMP(914) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(753) ! used 4 times + TMP_JAMP(1052) = TMP_JAMP(913) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(754) ! used 4 times + TMP_JAMP(1051) = TMP_JAMP(904) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(793) ! used 4 times + TMP_JAMP(1050) = TMP_JAMP(895) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(793) ! used 4 times + TMP_JAMP(1049) = TMP_JAMP(883) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(754) ! used 4 times + TMP_JAMP(1048) = TMP_JAMP(882) - TMP_JAMP(744) ! used 4 times + TMP_JAMP(1047) = TMP_JAMP(875) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(822) ! used 4 times + TMP_JAMP(1046) = TMP_JAMP(867) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(793) ! used 4 times + TMP_JAMP(1045) = TMP_JAMP(864) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(836) ! used 4 times + TMP_JAMP(1044) = TMP_JAMP(863) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(751) ! used 4 times + TMP_JAMP(1065) = TMP_JAMP(944) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(781) ! used 3 times + TMP_JAMP(1064) = TMP_JAMP(943) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(777) ! used 3 times + TMP_JAMP(1063) = TMP_JAMP(893) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(667) ! used 3 times + TMP_JAMP(1304) = TMP_JAMP(1065) + TMP_JAMP(1064) ! used 2 times + TMP_JAMP(1303) = TMP_JAMP(1065) - TMP_JAMP(981) ! used 2 times + TMP_JAMP(1302) = TMP_JAMP(1063) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(804) ! used 2 times + TMP_JAMP(1301) = TMP_JAMP(1063) - TMP_JAMP(896) ! used 2 times + TMP_JAMP(1300) = TMP_JAMP(1062) + TMP_JAMP(1012) ! used 2 times + TMP_JAMP(1299) = TMP_JAMP(1062) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(907) ! used 2 times + TMP_JAMP(1298) = TMP_JAMP(1061) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1054) ! used 2 times + TMP_JAMP(1297) = TMP_JAMP(1059) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1025) ! used 2 times + TMP_JAMP(1296) = TMP_JAMP(1058) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1031) ! used 2 times + TMP_JAMP(1295) = TMP_JAMP(1057) + TMP_JAMP(1055) ! used 2 times + TMP_JAMP(1294) = TMP_JAMP(1056) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(996) ! used 2 times + TMP_JAMP(1293) = TMP_JAMP(1055) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(993) ! used 2 times + TMP_JAMP(1292) = TMP_JAMP(1054) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(999) ! used 2 times + TMP_JAMP(1291) = TMP_JAMP(1053) + TMP_JAMP(933) ! used 2 times + TMP_JAMP(1290) = TMP_JAMP(1052) + TMP_JAMP(939) ! used 2 times + TMP_JAMP(1289) = TMP_JAMP(1050) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1021) ! used 2 times + TMP_JAMP(1288) = TMP_JAMP(1049) - TMP_JAMP(939) ! used 2 times + TMP_JAMP(1287) = TMP_JAMP(1048) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1042) ! used 2 times + TMP_JAMP(1286) = TMP_JAMP(1047) - TMP_JAMP(945) ! used 2 times + TMP_JAMP(1285) = TMP_JAMP(1047) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1041) ! used 2 times + TMP_JAMP(1284) = TMP_JAMP(1046) + TMP_JAMP(895) ! used 2 times + TMP_JAMP(1283) = TMP_JAMP(1045) + TMP_JAMP(925) ! used 2 times + TMP_JAMP(1282) = TMP_JAMP(1044) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(992) ! used 2 times + TMP_JAMP(1281) = TMP_JAMP(1044) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(994) ! used 2 times + TMP_JAMP(1280) = TMP_JAMP(1043) + TMP_JAMP(1006) ! used 2 times + TMP_JAMP(1279) = TMP_JAMP(1041) + TMP_JAMP(1030) ! used 2 times + TMP_JAMP(1278) = TMP_JAMP(1040) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(984) ! used 2 times + TMP_JAMP(1277) = TMP_JAMP(1040) + TMP_JAMP(1029) ! used 2 times + TMP_JAMP(1276) = TMP_JAMP(1035) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(988) ! used 2 times + TMP_JAMP(1275) = TMP_JAMP(1031) + TMP_JAMP(1005) ! used 2 times + TMP_JAMP(1274) = TMP_JAMP(1031) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * TMP_JAMP(961) ! used 2 times - TMP_JAMP(2586) = TMP_JAMP(1744) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1740) ! used 2 times - TMP_JAMP(2585) = TMP_JAMP(1745) + TMP_JAMP(1744) ! used 2 times - TMP_JAMP(2584) = TMP_JAMP(1903) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1763) ! used 2 times - TMP_JAMP(2583) = TMP_JAMP(1987) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(853) ! used 2 times - TMP_JAMP(2582) = TMP_JAMP(1987) - TMP_JAMP(1979) ! used 2 times - TMP_JAMP(2581) = AMP(122) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1469) ! used 2 times - TMP_JAMP(2580) = AMP(95) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(122) ! used 2 times - TMP_JAMP(2579) = TMP_JAMP(939) - TMP_JAMP(838) ! used 2 times - TMP_JAMP(2578) = TMP_JAMP(1158) - AMP(1128) ! used 2 times - TMP_JAMP(2577) = TMP_JAMP(1730) + TMP_JAMP(942) ! used 2 times - TMP_JAMP(2576) = TMP_JAMP(1736) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1732) ! used 2 times - TMP_JAMP(2575) = TMP_JAMP(1976) - TMP_JAMP(1761) ! used 2 times - TMP_JAMP(2574) = AMP(1056) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1230) ! used 2 times - TMP_JAMP(2573) = TMP_JAMP(822) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(509) ! used 2 times - TMP_JAMP(2572) = TMP_JAMP(830) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1722) ! used 2 times - TMP_JAMP(2571) = TMP_JAMP(1728) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1724) ! used 2 times - TMP_JAMP(2570) = TMP_JAMP(1729) + TMP_JAMP(1728) ! used 2 times - TMP_JAMP(2569) = TMP_JAMP(1985) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1814) ! used 2 times - TMP_JAMP(2568) = AMP(122) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1472) ! used 2 times - TMP_JAMP(2567) = TMP_JAMP(811) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(602) ! used 2 times - TMP_JAMP(2566) = TMP_JAMP(1901) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1167) ! used 2 times - TMP_JAMP(2565) = AMP(378) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(390) ! used 2 times - TMP_JAMP(2564) = TMP_JAMP(795) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(417) ! used 2 times - TMP_JAMP(2563) = TMP_JAMP(795) - AMP(390) ! used 2 times - TMP_JAMP(2562) = TMP_JAMP(798) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(594) ! used 2 times - TMP_JAMP(2561) = TMP_JAMP(798) - AMP(534) ! used 2 times - TMP_JAMP(2560) = TMP_JAMP(1706) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(796) ! used 2 times - TMP_JAMP(2559) = TMP_JAMP(1715) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1712) ! used 2 times - TMP_JAMP(2558) = TMP_JAMP(1798) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1709) ! used 2 times - TMP_JAMP(2557) = TMP_JAMP(1798) + TMP_JAMP(1713) ! used 2 times - TMP_JAMP(2556) = TMP_JAMP(1989) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(800) ! used 2 times - TMP_JAMP(2555) = AMP(534) - AMP(708) ! used 2 times - TMP_JAMP(2554) = TMP_JAMP(1163) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(783) ! used 2 times - TMP_JAMP(2553) = TMP_JAMP(1903) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1769) ! used 2 times - TMP_JAMP(2552) = TMP_JAMP(1972) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(762) ! used 2 times - TMP_JAMP(2551) = TMP_JAMP(1972) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1134) ! used 2 times - TMP_JAMP(2550) = TMP_JAMP(1972) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1971) ! used 2 times - TMP_JAMP(2549) = TMP_JAMP(1973) - TMP_JAMP(1163) ! used 2 times - TMP_JAMP(2548) = AMP(380) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(390) ! used 2 times - TMP_JAMP(2547) = TMP_JAMP(1700) - TMP_JAMP(780) ! used 2 times - TMP_JAMP(2546) = TMP_JAMP(1741) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(778) ! used 2 times - TMP_JAMP(2545) = TMP_JAMP(1979) + TMP_JAMP(1970) ! used 2 times - TMP_JAMP(2544) = AMP(549) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1296) ! used 2 times - TMP_JAMP(2543) = TMP_JAMP(768) + TMP_JAMP(767) ! used 2 times - TMP_JAMP(2542) = TMP_JAMP(1737) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1162) ! used 2 times - TMP_JAMP(2541) = TMP_JAMP(1971) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1901) ! used 2 times - TMP_JAMP(2540) = TMP_JAMP(759) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1454) ! used 2 times - TMP_JAMP(2539) = TMP_JAMP(1689) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(759) ! used 2 times - TMP_JAMP(2538) = TMP_JAMP(1691) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1289) ! used 2 times - TMP_JAMP(2537) = TMP_JAMP(1715) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1689) ! used 2 times - TMP_JAMP(2536) = TMP_JAMP(1769) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(906) ! used 2 times - TMP_JAMP(2535) = TMP_JAMP(1900) - AMP(91) ! used 2 times - TMP_JAMP(2534) = TMP_JAMP(1991) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1715) ! used 2 times - TMP_JAMP(2533) = TMP_JAMP(1991) - TMP_JAMP(1904) ! used 2 times - TMP_JAMP(2532) = TMP_JAMP(749) + AMP(1636) ! used 2 times - TMP_JAMP(2531) = TMP_JAMP(1680) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(748) ! used 2 times - TMP_JAMP(2530) = TMP_JAMP(1682) - AMP(94) ! used 2 times - TMP_JAMP(2529) = AMP(1119) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1830) ! used 2 times - TMP_JAMP(2528) = AMP(830) + AMP(832) ! used 2 times - TMP_JAMP(2527) = AMP(814) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(830) ! used 2 times - TMP_JAMP(2526) = AMP(686) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1119) ! used 2 times - TMP_JAMP(2525) = AMP(686) - AMP(832) ! used 2 times - TMP_JAMP(2524) = TMP_JAMP(1673) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(736) ! used 2 times - TMP_JAMP(2523) = TMP_JAMP(1678) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(814) ! used 2 times - TMP_JAMP(2522) = TMP_JAMP(1778) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1672) ! used 2 times - TMP_JAMP(2521) = TMP_JAMP(1778) + TMP_JAMP(1678) ! used 2 times - TMP_JAMP(2520) = TMP_JAMP(1899) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(734) ! used 2 times - TMP_JAMP(2519) = TMP_JAMP(1978) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1677) ! used 2 times - TMP_JAMP(2518) = AMP(832) - AMP(866) ! used 2 times - TMP_JAMP(2517) = TMP_JAMP(721) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1205) ! used 2 times - TMP_JAMP(2516) = TMP_JAMP(1160) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(722) ! used 2 times - TMP_JAMP(2515) = TMP_JAMP(1770) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1160) ! used 2 times - TMP_JAMP(2514) = TMP_JAMP(1968) + TMP_JAMP(720) ! used 2 times - TMP_JAMP(2513) = TMP_JAMP(1968) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1783) ! used 2 times - TMP_JAMP(2512) = AMP(815) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(850) ! used 2 times - TMP_JAMP(2511) = TMP_JAMP(1664) + TMP_JAMP(717) ! used 2 times - TMP_JAMP(2510) = TMP_JAMP(1725) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(715) ! used 2 times - TMP_JAMP(2509) = TMP_JAMP(1967) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(527) ! used 2 times - TMP_JAMP(2508) = TMP_JAMP(1028) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1006) ! used 2 times - TMP_JAMP(2507) = TMP_JAMP(1159) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(709) ! used 2 times - TMP_JAMP(2506) = TMP_JAMP(1674) - TMP_JAMP(1656) ! used 2 times - TMP_JAMP(2505) = TMP_JAMP(1736) + TMP_JAMP(1679) ! used 2 times - TMP_JAMP(2504) = AMP(925) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(943) ! used 2 times - TMP_JAMP(2503) = TMP_JAMP(1653) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(925) ! used 2 times - TMP_JAMP(2502) = TMP_JAMP(1899) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(702) ! used 2 times - TMP_JAMP(2501) = TMP_JAMP(1900) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1653) ! used 2 times - TMP_JAMP(2500) = AMP(346) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(462) ! used 2 times - TMP_JAMP(2499) = AMP(65) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(80) ! used 2 times - TMP_JAMP(2498) = TMP_JAMP(681) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(346) ! used 2 times - TMP_JAMP(2497) = TMP_JAMP(1133) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(681) ! used 2 times - TMP_JAMP(2496) = TMP_JAMP(1631) + TMP_JAMP(1133) ! used 2 times - TMP_JAMP(2495) = TMP_JAMP(1638) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(678) ! used 2 times - TMP_JAMP(2494) = TMP_JAMP(1837) - TMP_JAMP(1762) ! used 2 times - TMP_JAMP(2493) = TMP_JAMP(1928) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1640) ! used 2 times - TMP_JAMP(2492) = TMP_JAMP(1986) - TMP_JAMP(1631) ! used 2 times - TMP_JAMP(2491) = TMP_JAMP(1986) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1927) ! used 2 times - TMP_JAMP(2490) = TMP_JAMP(666) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(505) ! used 2 times - TMP_JAMP(2489) = TMP_JAMP(1622) + TMP_JAMP(1131) ! used 2 times - TMP_JAMP(2488) = TMP_JAMP(1747) + AMP(1030) ! used 2 times - TMP_JAMP(2487) = TMP_JAMP(1983) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1926) ! used 2 times - TMP_JAMP(2486) = AMP(583) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(627) ! used 2 times - TMP_JAMP(2485) = AMP(546) + ((0.000000000000000D+00, + TMP_JAMP(1273) = TMP_JAMP(1030) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(899) ! used 2 times + TMP_JAMP(1272) = TMP_JAMP(1029) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(968) ! used 2 times + TMP_JAMP(1271) = TMP_JAMP(1022) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(953) ! used 2 times + TMP_JAMP(1270) = TMP_JAMP(1020) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(976) ! used 2 times + TMP_JAMP(1269) = TMP_JAMP(1020) + TMP_JAMP(992) ! used 2 times + TMP_JAMP(1268) = TMP_JAMP(1013) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(976) ! used 2 times + TMP_JAMP(1267) = TMP_JAMP(1013) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(948) ! used 2 times + TMP_JAMP(1266) = TMP_JAMP(1010) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(961) ! used 2 times + TMP_JAMP(1265) = TMP_JAMP(1010) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(956) ! used 2 times + TMP_JAMP(1264) = TMP_JAMP(1006) + TMP_JAMP(1003) ! used 2 times + TMP_JAMP(1263) = TMP_JAMP(1005) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(888) ! used 2 times + TMP_JAMP(1262) = TMP_JAMP(1004) + TMP_JAMP(995) ! used 2 times + TMP_JAMP(1261) = TMP_JAMP(1003) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(937) ! used 2 times + TMP_JAMP(1260) = TMP_JAMP(1000) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(930) ! used 2 times + TMP_JAMP(1259) = TMP_JAMP(998) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(915) ! used 2 times + TMP_JAMP(1258) = TMP_JAMP(997) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(885) ! used 2 times + TMP_JAMP(1257) = TMP_JAMP(994) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(887) ! used 2 times + TMP_JAMP(1256) = TMP_JAMP(994) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(861) ! used 2 times + TMP_JAMP(1255) = TMP_JAMP(992) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(859) ! used 2 times + TMP_JAMP(1254) = TMP_JAMP(991) - TMP_JAMP(986) ! used 2 times + TMP_JAMP(1253) = TMP_JAMP(989) - AMP(594) ! used 2 times + TMP_JAMP(1252) = TMP_JAMP(989) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(679) ! used 2 times + TMP_JAMP(1251) = TMP_JAMP(987) - TMP_JAMP(946) ! used 2 times + TMP_JAMP(1250) = TMP_JAMP(985) + TMP_JAMP(923) ! used 2 times + TMP_JAMP(1249) = TMP_JAMP(984) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(852) ! used 2 times + TMP_JAMP(1248) = TMP_JAMP(984) + TMP_JAMP(968) ! used 2 times + TMP_JAMP(1247) = TMP_JAMP(983) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(779) ! used 2 times + TMP_JAMP(1246) = TMP_JAMP(983) + TMP_JAMP(967) ! used 2 times + TMP_JAMP(1245) = TMP_JAMP(982) + TMP_JAMP(921) ! used 2 times + TMP_JAMP(1244) = TMP_JAMP(981) - TMP_JAMP(876) ! used 2 times + TMP_JAMP(1243) = TMP_JAMP(979) + AMP(509) ! used 2 times + TMP_JAMP(1242) = TMP_JAMP(978) + TMP_JAMP(851) ! used 2 times + TMP_JAMP(1241) = TMP_JAMP(976) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(786) ! used 2 times + TMP_JAMP(1240) = TMP_JAMP(976) - TMP_JAMP(877) ! used 2 times + TMP_JAMP(1239) = TMP_JAMP(975) + TMP_JAMP(971) ! used 2 times + TMP_JAMP(1238) = TMP_JAMP(974) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(830) ! used 2 times + TMP_JAMP(1237) = TMP_JAMP(974) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(671) ! used 2 times + TMP_JAMP(1236) = TMP_JAMP(973) - TMP_JAMP(966) ! used 2 times + TMP_JAMP(1235) = TMP_JAMP(972) + TMP_JAMP(954) ! used 2 times + TMP_JAMP(1234) = TMP_JAMP(969) - TMP_JAMP(905) ! used 2 times + TMP_JAMP(1233) = TMP_JAMP(968) + TMP_JAMP(850) ! used 2 times + TMP_JAMP(1232) = TMP_JAMP(967) + TMP_JAMP(905) ! used 2 times + TMP_JAMP(1231) = TMP_JAMP(967) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(775) ! used 2 times + TMP_JAMP(1230) = TMP_JAMP(963) + TMP_JAMP(871) ! used 2 times + TMP_JAMP(1229) = TMP_JAMP(960) + TMP_JAMP(849) ! used 2 times + TMP_JAMP(1228) = TMP_JAMP(960) - AMP(762) ! used 2 times + TMP_JAMP(1227) = TMP_JAMP(959) - AMP(667) ! used 2 times + TMP_JAMP(1226) = TMP_JAMP(957) + TMP_JAMP(955) ! used 2 times + TMP_JAMP(1225) = TMP_JAMP(953) + TMP_JAMP(890) ! used 2 times + TMP_JAMP(1224) = TMP_JAMP(953) + TMP_JAMP(928) ! used 2 times + TMP_JAMP(1223) = TMP_JAMP(950) - TMP_JAMP(947) ! used 2 times + TMP_JAMP(1222) = TMP_JAMP(945) - TMP_JAMP(879) ! used 2 times + TMP_JAMP(1221) = TMP_JAMP(942) + TMP_JAMP(868) ! used 2 times + TMP_JAMP(1220) = TMP_JAMP(941) - TMP_JAMP(866) ! used 2 times + TMP_JAMP(1219) = TMP_JAMP(939) - TMP_JAMP(918) ! used 2 times + TMP_JAMP(1218) = TMP_JAMP(939) + TMP_JAMP(888) ! used 2 times + TMP_JAMP(1217) = TMP_JAMP(938) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(773) ! used 2 times + TMP_JAMP(1216) = TMP_JAMP(937) + TMP_JAMP(918) ! used 2 times + TMP_JAMP(1215) = TMP_JAMP(936) - TMP_JAMP(934) ! used 2 times + TMP_JAMP(1214) = TMP_JAMP(935) - TMP_JAMP(916) ! used 2 times + TMP_JAMP(1213) = TMP_JAMP(934) - TMP_JAMP(926) ! used 2 times + TMP_JAMP(1212) = TMP_JAMP(933) - TMP_JAMP(926) ! used 2 times + TMP_JAMP(1211) = TMP_JAMP(933) - TMP_JAMP(862) ! used 2 times + TMP_JAMP(1210) = TMP_JAMP(931) - AMP(933) ! used 2 times + TMP_JAMP(1209) = TMP_JAMP(928) - TMP_JAMP(924) ! used 2 times + TMP_JAMP(1208) = TMP_JAMP(927) - TMP_JAMP(862) ! used 2 times + TMP_JAMP(1207) = TMP_JAMP(926) + AMP(921) ! used 2 times + TMP_JAMP(1206) = TMP_JAMP(924) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(840) ! used 2 times + TMP_JAMP(1205) = TMP_JAMP(924) - TMP_JAMP(884) ! used 2 times + TMP_JAMP(1204) = TMP_JAMP(923) + TMP_JAMP(918) ! used 2 times + TMP_JAMP(1203) = TMP_JAMP(923) - TMP_JAMP(919) ! used 2 times + TMP_JAMP(1202) = TMP_JAMP(922) - TMP_JAMP(920) ! used 2 times + TMP_JAMP(1201) = TMP_JAMP(922) + TMP_JAMP(917) ! used 2 times + TMP_JAMP(1200) = TMP_JAMP(922) + TMP_JAMP(908) ! used 2 times + TMP_JAMP(1199) = TMP_JAMP(921) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(826) ! used 2 times + TMP_JAMP(1198) = TMP_JAMP(920) - TMP_JAMP(851) ! used 2 times + TMP_JAMP(1197) = TMP_JAMP(919) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(779) ! used 2 times + TMP_JAMP(1196) = TMP_JAMP(919) + TMP_JAMP(901) ! used 2 times + TMP_JAMP(1195) = TMP_JAMP(917) + AMP(921) ! used 2 times + TMP_JAMP(1194) = TMP_JAMP(917) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(760) ! used 2 times + TMP_JAMP(1193) = TMP_JAMP(916) + AMP(827) ! used 2 times + TMP_JAMP(1192) = TMP_JAMP(915) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(759) ! used 2 times + TMP_JAMP(1191) = TMP_JAMP(912) - TMP_JAMP(908) ! used 2 times + TMP_JAMP(1190) = TMP_JAMP(911) + TMP_JAMP(873) ! used 2 times + TMP_JAMP(1189) = TMP_JAMP(909) - TMP_JAMP(880) ! used 2 times + TMP_JAMP(1188) = TMP_JAMP(908) - TMP_JAMP(906) ! used 2 times + TMP_JAMP(1187) = TMP_JAMP(906) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(786) ! used 2 times + TMP_JAMP(1186) = TMP_JAMP(906) - TMP_JAMP(873) ! used 2 times + TMP_JAMP(1185) = TMP_JAMP(905) - TMP_JAMP(901) ! used 2 times + TMP_JAMP(1184) = TMP_JAMP(904) + TMP_JAMP(897) ! used 2 times + TMP_JAMP(1183) = TMP_JAMP(903) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(808) ! used 2 times + TMP_JAMP(1182) = TMP_JAMP(903) + TMP_JAMP(902) ! used 2 times + TMP_JAMP(1181) = TMP_JAMP(902) + TMP_JAMP(725) ! used 2 times + TMP_JAMP(1180) = TMP_JAMP(901) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(775) ! used 2 times + TMP_JAMP(1179) = TMP_JAMP(900) - TMP_JAMP(897) ! used 2 times + TMP_JAMP(1178) = TMP_JAMP(898) + TMP_JAMP(891) ! used 2 times + TMP_JAMP(1177) = TMP_JAMP(896) + TMP_JAMP(891) ! used 2 times + TMP_JAMP(1176) = TMP_JAMP(894) + TMP_JAMP(737) ! used 2 times + TMP_JAMP(1175) = TMP_JAMP(894) + TMP_JAMP(888) ! used 2 times + TMP_JAMP(1174) = TMP_JAMP(892) - AMP(664) ! used 2 times + TMP_JAMP(1173) = TMP_JAMP(892) - TMP_JAMP(890) ! used 2 times + TMP_JAMP(1172) = TMP_JAMP(891) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(856) ! used 2 times + TMP_JAMP(1171) = TMP_JAMP(889) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(815) ! used 2 times + TMP_JAMP(1170) = TMP_JAMP(889) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(810) ! used 2 times + TMP_JAMP(1169) = TMP_JAMP(887) - TMP_JAMP(884) ! used 2 times + TMP_JAMP(1168) = TMP_JAMP(887) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(767) ! used 2 times + TMP_JAMP(1167) = TMP_JAMP(886) + AMP(824) ! used 2 times + TMP_JAMP(1166) = TMP_JAMP(886) - TMP_JAMP(884) ! used 2 times + TMP_JAMP(1165) = TMP_JAMP(885) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(766) ! used 2 times + TMP_JAMP(1164) = TMP_JAMP(881) - AMP(496) ! used 2 times + TMP_JAMP(1163) = TMP_JAMP(881) + TMP_JAMP(879) ! used 2 times + TMP_JAMP(1162) = TMP_JAMP(880) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(858) ! used 2 times + TMP_JAMP(1161) = TMP_JAMP(878) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(835) ! used 2 times + TMP_JAMP(1160) = TMP_JAMP(878) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(829) ! used 2 times + TMP_JAMP(1159) = TMP_JAMP(877) + TMP_JAMP(865) ! used 2 times + TMP_JAMP(1158) = TMP_JAMP(876) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(838) ! used 2 times + TMP_JAMP(1157) = TMP_JAMP(872) + AMP(646) ! used 2 times + TMP_JAMP(1156) = TMP_JAMP(870) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(804) ! used 2 times + TMP_JAMP(1155) = TMP_JAMP(870) + TMP_JAMP(861) ! used 2 times + TMP_JAMP(1154) = TMP_JAMP(870) + TMP_JAMP(869) ! used 2 times + TMP_JAMP(1153) = TMP_JAMP(869) + AMP(649) ! used 2 times + TMP_JAMP(1152) = TMP_JAMP(868) + AMP(637) ! used 2 times + TMP_JAMP(1151) = TMP_JAMP(866) + AMP(487) ! used 2 times + TMP_JAMP(1150) = TMP_JAMP(865) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(786) ! used 2 times + TMP_JAMP(1149) = TMP_JAMP(865) - AMP(491) ! used 2 times + TMP_JAMP(1148) = TMP_JAMP(862) - AMP(806) ! used 2 times + TMP_JAMP(1147) = TMP_JAMP(861) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(767) ! used 2 times + TMP_JAMP(1146) = TMP_JAMP(860) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(763) ! used 2 times + TMP_JAMP(1145) = TMP_JAMP(860) - AMP(809) ! used 2 times + TMP_JAMP(1144) = TMP_JAMP(859) - AMP(806) ! used 2 times + TMP_JAMP(1143) = TMP_JAMP(859) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(760) ! used 2 times + TMP_JAMP(1142) = TMP_JAMP(858) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(490) ! used 2 times + TMP_JAMP(1141) = TMP_JAMP(856) - TMP_JAMP(804) ! used 2 times + TMP_JAMP(1140) = TMP_JAMP(856) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(650) ! used 2 times + TMP_JAMP(1139) = TMP_JAMP(854) - TMP_JAMP(853) ! used 2 times + TMP_JAMP(1138) = TMP_JAMP(854) + TMP_JAMP(847) ! used 2 times + TMP_JAMP(1137) = TMP_JAMP(849) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(805) ! used 2 times + TMP_JAMP(1136) = TMP_JAMP(843) - TMP_JAMP(802) ! used 2 times + TMP_JAMP(1135) = TMP_JAMP(842) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(820) ! used 2 times + TMP_JAMP(1134) = TMP_JAMP(841) + TMP_JAMP(756) ! used 2 times + TMP_JAMP(1133) = TMP_JAMP(838) + TMP_JAMP(678) ! used 2 times + TMP_JAMP(1132) = TMP_JAMP(838) + TMP_JAMP(789) ! used 2 times + TMP_JAMP(1131) = TMP_JAMP(837) + TMP_JAMP(828) ! used 2 times + TMP_JAMP(1130) = TMP_JAMP(837) + TMP_JAMP(670) ! used 2 times + TMP_JAMP(1129) = TMP_JAMP(833) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(479) ! used 2 times + TMP_JAMP(1128) = TMP_JAMP(832) - TMP_JAMP(782) ! used 2 times + TMP_JAMP(1127) = TMP_JAMP(827) - TMP_JAMP(772) ! used 2 times + TMP_JAMP(1126) = TMP_JAMP(825) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(508) ! used 2 times + TMP_JAMP(1125) = TMP_JAMP(824) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(742) ! used 2 times + TMP_JAMP(1124) = TMP_JAMP(823) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(583) ! used 2 times - TMP_JAMP(2484) = TMP_JAMP(654) - AMP(466) ! used 2 times - TMP_JAMP(2483) = TMP_JAMP(1619) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(656) ! used 2 times - TMP_JAMP(2482) = TMP_JAMP(1620) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(546) ! used 2 times - TMP_JAMP(2481) = TMP_JAMP(1713) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(387) ! used 2 times - TMP_JAMP(2480) = TMP_JAMP(1713) - TMP_JAMP(1621) ! used 2 times - TMP_JAMP(2479) = TMP_JAMP(1792) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(654) ! used 2 times - TMP_JAMP(2478) = TMP_JAMP(1895) - TMP_JAMP(1792) ! used 2 times - TMP_JAMP(2477) = TMP_JAMP(1926) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1620) ! used 2 times - TMP_JAMP(2476) = TMP_JAMP(1927) + TMP_JAMP(1926) ! used 2 times - TMP_JAMP(2475) = AMP(110) + AMP(1629) ! used 2 times - TMP_JAMP(2474) = TMP_JAMP(646) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(74) ! used 2 times - TMP_JAMP(2473) = TMP_JAMP(1163) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(644) ! used 2 times - TMP_JAMP(2472) = TMP_JAMP(1610) - TMP_JAMP(1148) ! used 2 times - TMP_JAMP(2471) = TMP_JAMP(1611) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(645) ! used 2 times - TMP_JAMP(2470) = TMP_JAMP(1614) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1611) ! used 2 times - TMP_JAMP(2469) = TMP_JAMP(1705) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1163) ! used 2 times - TMP_JAMP(2468) = TMP_JAMP(1895) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1705) ! used 2 times - TMP_JAMP(2467) = TMP_JAMP(1928) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1612) ! used 2 times - TMP_JAMP(2466) = AMP(1109) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1152) ! used 2 times - TMP_JAMP(2465) = AMP(845) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1109) ! used 2 times - TMP_JAMP(2464) = TMP_JAMP(1607) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(845) ! used 2 times - TMP_JAMP(2463) = TMP_JAMP(1608) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(637) ! used 2 times - TMP_JAMP(2462) = TMP_JAMP(1678) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(827) ! used 2 times - TMP_JAMP(2461) = TMP_JAMP(1678) - TMP_JAMP(1609) ! used 2 times - TMP_JAMP(2460) = TMP_JAMP(1927) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1607) ! used 2 times - TMP_JAMP(2459) = TMP_JAMP(1927) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1894) ! used 2 times - TMP_JAMP(2458) = TMP_JAMP(1160) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(630) ! used 2 times - TMP_JAMP(2457) = TMP_JAMP(1601) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(631) ! used 2 times - TMP_JAMP(2456) = TMP_JAMP(1602) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1601) ! used 2 times - TMP_JAMP(2455) = TMP_JAMP(1668) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1160) ! used 2 times - TMP_JAMP(2454) = TMP_JAMP(1894) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1668) ! used 2 times - TMP_JAMP(2453) = TMP_JAMP(621) + AMP(388) ! used 2 times - TMP_JAMP(2452) = TMP_JAMP(653) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(648) ! used 2 times - TMP_JAMP(2451) = TMP_JAMP(1619) - AMP(385) ! used 2 times - TMP_JAMP(2450) = TMP_JAMP(1619) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1596) ! used 2 times - TMP_JAMP(2449) = TMP_JAMP(1619) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1600) ! used 2 times - TMP_JAMP(2448) = TMP_JAMP(1639) + AMP(64) ! used 2 times - TMP_JAMP(2447) = TMP_JAMP(1639) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1599) ! used 2 times - TMP_JAMP(2446) = TMP_JAMP(1712) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(621) ! used 2 times - TMP_JAMP(2445) = TMP_JAMP(1712) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1597) ! used 2 times - TMP_JAMP(2444) = TMP_JAMP(1762) - TMP_JAMP(888) ! used 2 times - TMP_JAMP(2443) = TMP_JAMP(1892) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1621) ! used 2 times - TMP_JAMP(2442) = TMP_JAMP(1893) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1891) ! used 2 times - TMP_JAMP(2441) = TMP_JAMP(616) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1042) ! used 2 times - TMP_JAMP(2440) = TMP_JAMP(616) - AMP(829) ! used 2 times - TMP_JAMP(2439) = TMP_JAMP(634) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(632) ! used 2 times - TMP_JAMP(2438) = TMP_JAMP(1591) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1587) ! used 2 times - TMP_JAMP(2437) = TMP_JAMP(1599) + TMP_JAMP(1592) ! used 2 times - TMP_JAMP(2436) = TMP_JAMP(1608) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1590) ! used 2 times - TMP_JAMP(2435) = TMP_JAMP(1608) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1592) ! used 2 times - TMP_JAMP(2434) = TMP_JAMP(1817) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1587) ! used 2 times - TMP_JAMP(2433) = TMP_JAMP(1890) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1609) ! used 2 times - TMP_JAMP(2432) = TMP_JAMP(1891) + TMP_JAMP(1817) ! used 2 times - TMP_JAMP(2431) = TMP_JAMP(1893) - TMP_JAMP(1608) ! used 2 times - TMP_JAMP(2430) = AMP(846) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(913) ! used 2 times - TMP_JAMP(2429) = AMP(426) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(945) ! used 2 times - TMP_JAMP(2428) = TMP_JAMP(1580) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(388) ! used 2 times - TMP_JAMP(2427) = TMP_JAMP(1592) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(913) ! used 2 times - TMP_JAMP(2426) = TMP_JAMP(1600) - TMP_JAMP(1592) ! used 2 times - TMP_JAMP(2425) = TMP_JAMP(1643) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(348) ! used 2 times - TMP_JAMP(2424) = TMP_JAMP(1925) + TMP_JAMP(1924) ! used 2 times - TMP_JAMP(2423) = TMP_JAMP(649) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(77) ! used 2 times - TMP_JAMP(2422) = TMP_JAMP(651) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(649) ! used 2 times - TMP_JAMP(2421) = TMP_JAMP(1591) + AMP(945) ! used 2 times - TMP_JAMP(2420) = TMP_JAMP(1599) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(651) ! used 2 times - TMP_JAMP(2419) = TMP_JAMP(1607) + AMP(1116) ! used 2 times - TMP_JAMP(2418) = TMP_JAMP(1607) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(846) ! used 2 times - TMP_JAMP(2417) = TMP_JAMP(1886) + TMP_JAMP(1885) ! used 2 times - TMP_JAMP(2416) = TMP_JAMP(1966) - TMP_JAMP(1599) ! used 2 times - TMP_JAMP(2415) = TMP_JAMP(1966) - TMP_JAMP(1887) ! used 2 times - TMP_JAMP(2414) = AMP(1277) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1308) ! used 2 times - TMP_JAMP(2413) = AMP(829) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1047) ! used 2 times - TMP_JAMP(2412) = TMP_JAMP(744) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(740) ! used 2 times - TMP_JAMP(2411) = TMP_JAMP(1577) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1558) ! used 2 times - TMP_JAMP(2410) = TMP_JAMP(1600) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1277) ! used 2 times - TMP_JAMP(2409) = TMP_JAMP(1600) - AMP(545) ! used 2 times - TMP_JAMP(2408) = TMP_JAMP(1628) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(506) ! used 2 times - TMP_JAMP(2407) = TMP_JAMP(1922) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1884) ! used 2 times - TMP_JAMP(2406) = TMP_JAMP(1965) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1923) ! used 2 times - TMP_JAMP(2405) = TMP_JAMP(1550) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1546) ! used 2 times - TMP_JAMP(2404) = TMP_JAMP(1597) - AMP(1308) ! used 2 times - TMP_JAMP(2403) = TMP_JAMP(1620) - AMP(590) ! used 2 times - TMP_JAMP(2402) = TMP_JAMP(1620) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(545) ! used 2 times - TMP_JAMP(2401) = TMP_JAMP(1964) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1887) ! used 2 times - TMP_JAMP(2400) = AMP(360) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(372) ! used 2 times - TMP_JAMP(2399) = TMP_JAMP(1536) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1533) ! used 2 times - TMP_JAMP(2398) = TMP_JAMP(1537) - TMP_JAMP(577) ! used 2 times - TMP_JAMP(2397) = TMP_JAMP(1540) + TMP_JAMP(1539) ! used 2 times - TMP_JAMP(2396) = TMP_JAMP(1884) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1536) ! used 2 times - TMP_JAMP(2395) = TMP_JAMP(1884) - TMP_JAMP(1618) ! used 2 times - TMP_JAMP(2394) = TMP_JAMP(1980) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(577) ! used 2 times - TMP_JAMP(2393) = TMP_JAMP(1980) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1881) ! used 2 times - TMP_JAMP(2392) = TMP_JAMP(573) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1209) ! used 2 times - TMP_JAMP(2391) = TMP_JAMP(573) - AMP(867) ! used 2 times - TMP_JAMP(2390) = TMP_JAMP(1531) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1527) ! used 2 times - TMP_JAMP(2389) = TMP_JAMP(1602) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(921) ! used 2 times - TMP_JAMP(2388) = TMP_JAMP(1602) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1577) ! used 2 times - TMP_JAMP(2387) = TMP_JAMP(1789) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1527) ! used 2 times - TMP_JAMP(2386) = TMP_JAMP(1880) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1602) ! used 2 times - TMP_JAMP(2385) = TMP_JAMP(1880) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1614) ! used 2 times - TMP_JAMP(2384) = TMP_JAMP(1881) - TMP_JAMP(1789) ! used 2 times - TMP_JAMP(2383) = TMP_JAMP(1963) + TMP_JAMP(1539) ! used 2 times - TMP_JAMP(2382) = AMP(372) - AMP(852) ! used 2 times - TMP_JAMP(2381) = AMP(362) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(372) ! used 2 times - TMP_JAMP(2380) = TMP_JAMP(1165) - AMP(362) ! used 2 times - TMP_JAMP(2379) = TMP_JAMP(1879) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1540) ! used 2 times - TMP_JAMP(2378) = TMP_JAMP(1921) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1165) ! used 2 times - TMP_JAMP(2377) = TMP_JAMP(1924) + TMP_JAMP(1921) ! used 2 times - TMP_JAMP(2376) = TMP_JAMP(1962) + TMP_JAMP(1533) ! used 2 times - TMP_JAMP(2375) = TMP_JAMP(1963) - AMP(846) ! used 2 times - TMP_JAMP(2374) = AMP(852) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1218) ! used 2 times - TMP_JAMP(2373) = TMP_JAMP(669) + ((0.000000000000000D+00, + TMP_JAMP(1123) = TMP_JAMP(821) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(591) ! used 2 times - TMP_JAMP(2372) = TMP_JAMP(1520) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1516) ! used 2 times - TMP_JAMP(2371) = TMP_JAMP(1539) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(672) ! used 2 times - TMP_JAMP(2370) = TMP_JAMP(1961) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1218) ! used 2 times - TMP_JAMP(2369) = TMP_JAMP(1961) - TMP_JAMP(1531) ! used 2 times - TMP_JAMP(2368) = AMP(1229) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1433) ! used 2 times - TMP_JAMP(2367) = AMP(1211) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1229) ! used 2 times - TMP_JAMP(2366) = TMP_JAMP(726) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1211) ! used 2 times - TMP_JAMP(2365) = TMP_JAMP(726) - AMP(867) ! used 2 times - TMP_JAMP(2364) = TMP_JAMP(761) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(73) ! used 2 times - TMP_JAMP(2363) = TMP_JAMP(763) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(761) ! used 2 times - TMP_JAMP(2362) = TMP_JAMP(1540) - AMP(79) ! used 2 times - TMP_JAMP(2361) = TMP_JAMP(1613) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(763) ! used 2 times - TMP_JAMP(2360) = TMP_JAMP(1879) + TMP_JAMP(730) ! used 2 times - TMP_JAMP(2359) = TMP_JAMP(1893) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1613) ! used 2 times - TMP_JAMP(2358) = TMP_JAMP(1919) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1893) ! used 2 times - TMP_JAMP(2357) = TMP_JAMP(1920) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(730) ! used 2 times - TMP_JAMP(2356) = TMP_JAMP(1920) + TMP_JAMP(1919) ! used 2 times - TMP_JAMP(2355) = TMP_JAMP(1518) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1725) ! used 2 times - TMP_JAMP(2354) = TMP_JAMP(1537) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1220) ! used 2 times - TMP_JAMP(2353) = TMP_JAMP(1547) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1544) ! used 2 times - TMP_JAMP(2352) = TMP_JAMP(1640) - AMP(76) ! used 2 times - TMP_JAMP(2351) = TMP_JAMP(1875) - TMP_JAMP(1874) ! used 2 times - TMP_JAMP(2350) = AMP(671) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1031) ! used 2 times - TMP_JAMP(2349) = TMP_JAMP(549) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(655) ! used 2 times - TMP_JAMP(2348) = TMP_JAMP(1500) + TMP_JAMP(1498) ! used 2 times - TMP_JAMP(2347) = TMP_JAMP(1533) + AMP(1031) ! used 2 times - TMP_JAMP(2346) = TMP_JAMP(1533) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1501) ! used 2 times - TMP_JAMP(2345) = TMP_JAMP(1962) + TMP_JAMP(1606) ! used 2 times - TMP_JAMP(2344) = TMP_JAMP(1975) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(549) ! used 2 times - TMP_JAMP(2343) = TMP_JAMP(1975) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1873) ! used 2 times - TMP_JAMP(2342) = TMP_JAMP(546) + ((0.000000000000000D+00, + TMP_JAMP(1122) = TMP_JAMP(818) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(497) ! used 2 times + TMP_JAMP(1121) = TMP_JAMP(817) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(616) ! used 2 times + TMP_JAMP(1120) = TMP_JAMP(816) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(615) ! used 2 times + TMP_JAMP(1119) = TMP_JAMP(813) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(647) ! used 2 times + TMP_JAMP(1118) = TMP_JAMP(812) - TMP_JAMP(790) ! used 2 times + TMP_JAMP(1117) = TMP_JAMP(807) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(668) ! used 2 times + TMP_JAMP(1116) = TMP_JAMP(806) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(761) ! used 2 times + TMP_JAMP(1115) = TMP_JAMP(803) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(775) ! used 2 times + TMP_JAMP(1114) = TMP_JAMP(799) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(665) ! used 2 times + TMP_JAMP(1113) = TMP_JAMP(798) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(750) ! used 2 times + TMP_JAMP(1112) = TMP_JAMP(797) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(749) ! used 2 times + TMP_JAMP(1111) = TMP_JAMP(796) - TMP_JAMP(776) ! used 2 times + TMP_JAMP(1110) = TMP_JAMP(794) - AMP(786) ! used 2 times + TMP_JAMP(1109) = TMP_JAMP(791) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(656) ! used 2 times + TMP_JAMP(1108) = TMP_JAMP(787) - AMP(608) ! used 2 times + TMP_JAMP(1107) = TMP_JAMP(783) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(506) ! used 2 times + TMP_JAMP(1106) = TMP_JAMP(780) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(603) ! used 2 times + TMP_JAMP(1105) = TMP_JAMP(780) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(488) ! used 2 times + TMP_JAMP(1104) = TMP_JAMP(776) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(753) ! used 2 times + TMP_JAMP(1103) = TMP_JAMP(774) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(765) ! used 2 times - TMP_JAMP(2341) = TMP_JAMP(1495) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1493) ! used 2 times - TMP_JAMP(2340) = TMP_JAMP(1614) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1283) ! used 2 times - TMP_JAMP(2339) = TMP_JAMP(1614) - AMP(707) ! used 2 times - TMP_JAMP(2338) = TMP_JAMP(1770) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1493) ! used 2 times - TMP_JAMP(2337) = TMP_JAMP(1873) - TMP_JAMP(1770) ! used 2 times - TMP_JAMP(2336) = TMP_JAMP(1960) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1500) ! used 2 times - TMP_JAMP(2335) = AMP(671) - AMP(691) ! used 2 times - TMP_JAMP(2334) = TMP_JAMP(1164) - AMP(656) ! used 2 times - TMP_JAMP(2333) = TMP_JAMP(1872) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1501) ! used 2 times - TMP_JAMP(2332) = TMP_JAMP(1918) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1164) ! used 2 times - TMP_JAMP(2331) = TMP_JAMP(1922) + TMP_JAMP(1918) ! used 2 times - TMP_JAMP(2330) = TMP_JAMP(1960) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(545) ! used 2 times - TMP_JAMP(2329) = AMP(789) - AMP(1115) ! used 2 times - TMP_JAMP(2328) = AMP(691) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(774) ! used 2 times - TMP_JAMP(2327) = TMP_JAMP(693) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(683) ! used 2 times - TMP_JAMP(2326) = TMP_JAMP(1485) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1481) ! used 2 times - TMP_JAMP(2325) = TMP_JAMP(1886) + TMP_JAMP(1500) ! used 2 times - TMP_JAMP(2324) = TMP_JAMP(1959) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1495) ! used 2 times - TMP_JAMP(2323) = AMP(784) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1430) ! used 2 times - TMP_JAMP(2322) = AMP(766) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(784) ! used 2 times - TMP_JAMP(2321) = TMP_JAMP(1501) + AMP(79) ! used 2 times - TMP_JAMP(2320) = TMP_JAMP(1872) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(707) ! used 2 times - TMP_JAMP(2319) = TMP_JAMP(1973) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1917) ! used 2 times - TMP_JAMP(2318) = TMP_JAMP(1498) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(775) ! used 2 times - TMP_JAMP(2317) = TMP_JAMP(1874) + TMP_JAMP(1869) ! used 2 times - TMP_JAMP(2316) = AMP(47) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(144) ! used 2 times - TMP_JAMP(2315) = TMP_JAMP(517) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(397) ! used 2 times - TMP_JAMP(2314) = TMP_JAMP(1463) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(517) ! used 2 times - TMP_JAMP(2313) = TMP_JAMP(1470) + TMP_JAMP(1463) ! used 2 times - TMP_JAMP(2312) = TMP_JAMP(1837) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1470) ! used 2 times - TMP_JAMP(2311) = TMP_JAMP(1837) - TMP_JAMP(1474) ! used 2 times - TMP_JAMP(2310) = TMP_JAMP(1868) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(889) ! used 2 times - TMP_JAMP(2309) = TMP_JAMP(1957) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1915) ! used 2 times - TMP_JAMP(2308) = TMP_JAMP(1982) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1916) ! used 2 times - TMP_JAMP(2307) = TMP_JAMP(1452) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(507) ! used 2 times - TMP_JAMP(2306) = TMP_JAMP(1457) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1559) ! used 2 times - TMP_JAMP(2305) = TMP_JAMP(1461) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1458) ! used 2 times - TMP_JAMP(2304) = TMP_JAMP(1818) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1458) ! used 2 times - TMP_JAMP(2303) = TMP_JAMP(1915) - TMP_JAMP(1818) ! used 2 times - TMP_JAMP(2302) = TMP_JAMP(1957) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(871) ! used 2 times - TMP_JAMP(2301) = TMP_JAMP(1981) + AMP(664) ! used 2 times - TMP_JAMP(2300) = TMP_JAMP(1981) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1914) ! used 2 times - TMP_JAMP(2299) = AMP(742) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(786) ! used 2 times - TMP_JAMP(2298) = TMP_JAMP(1868) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(855) ! used 2 times - TMP_JAMP(2297) = TMP_JAMP(1892) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1700) ! used 2 times - TMP_JAMP(2296) = TMP_JAMP(1892) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1745) ! used 2 times - TMP_JAMP(2295) = TMP_JAMP(1913) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(384) ! used 2 times - TMP_JAMP(2294) = TMP_JAMP(1956) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1913) ! used 2 times - TMP_JAMP(2293) = TMP_JAMP(1162) - AMP(1272) ! used 2 times - TMP_JAMP(2292) = TMP_JAMP(1442) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(56) ! used 2 times - TMP_JAMP(2291) = TMP_JAMP(1460) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1456) ! used 2 times - TMP_JAMP(2290) = TMP_JAMP(1596) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1436) ! used 2 times - TMP_JAMP(2289) = TMP_JAMP(1695) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1162) ! used 2 times - TMP_JAMP(2288) = TMP_JAMP(1695) + TMP_JAMP(1443) ! used 2 times - TMP_JAMP(2287) = AMP(1187) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1230) ! used 2 times - TMP_JAMP(2286) = AMP(824) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(863) ! used 2 times - TMP_JAMP(2285) = TMP_JAMP(1430) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(570) ! used 2 times - TMP_JAMP(2284) = TMP_JAMP(1868) - TMP_JAMP(1432) ! used 2 times - TMP_JAMP(2283) = TMP_JAMP(1890) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1664) ! used 2 times - TMP_JAMP(2282) = TMP_JAMP(1890) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1729) ! used 2 times - TMP_JAMP(2281) = TMP_JAMP(1912) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(824) ! used 2 times - TMP_JAMP(2280) = TMP_JAMP(1914) + TMP_JAMP(1912) ! used 2 times - TMP_JAMP(2279) = TMP_JAMP(1159) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(863) ! used 2 times - TMP_JAMP(2278) = TMP_JAMP(1590) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1436) ! used 2 times - TMP_JAMP(2277) = TMP_JAMP(1658) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1159) ! used 2 times - TMP_JAMP(2276) = TMP_JAMP(1658) - TMP_JAMP(1428) ! used 2 times - TMP_JAMP(2275) = AMP(382) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(388) ! used 2 times - TMP_JAMP(2274) = TMP_JAMP(1425) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1612) ! used 2 times - TMP_JAMP(2273) = TMP_JAMP(1597) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1425) ! used 2 times - TMP_JAMP(2272) = TMP_JAMP(1862) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1597) ! used 2 times - TMP_JAMP(2271) = TMP_JAMP(1913) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(382) ! used 2 times - TMP_JAMP(2270) = TMP_JAMP(1913) + TMP_JAMP(1474) ! used 2 times - TMP_JAMP(2269) = TMP_JAMP(1915) - TMP_JAMP(1891) ! used 2 times - TMP_JAMP(2268) = AMP(823) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(829) ! used 2 times - TMP_JAMP(2267) = TMP_JAMP(1422) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1613) ! used 2 times - TMP_JAMP(2266) = TMP_JAMP(1591) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1422) ! used 2 times - TMP_JAMP(2265) = TMP_JAMP(1862) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1591) ! used 2 times - TMP_JAMP(2264) = TMP_JAMP(1912) + ((-0.000000000000000D+00 + TMP_JAMP(1102) = TMP_JAMP(773) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(796) ! used 2 times + TMP_JAMP(1101) = TMP_JAMP(772) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(815) ! used 2 times + TMP_JAMP(1100) = TMP_JAMP(772) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(797) ! used 2 times + TMP_JAMP(1099) = TMP_JAMP(770) + TMP_JAMP(756) ! used 2 times + TMP_JAMP(1098) = TMP_JAMP(769) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(814) ! used 2 times + TMP_JAMP(1097) = TMP_JAMP(768) - AMP(926) ! used 2 times + TMP_JAMP(1096) = TMP_JAMP(766) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(924) ! used 2 times + TMP_JAMP(1095) = TMP_JAMP(765) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(823) ! used 2 times - TMP_JAMP(2263) = TMP_JAMP(1912) + TMP_JAMP(1461) ! used 2 times - TMP_JAMP(2262) = AMP(901) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(945) ! used 2 times - TMP_JAMP(2261) = TMP_JAMP(1423) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1420) ! used 2 times - TMP_JAMP(2260) = TMP_JAMP(1916) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1421) ! used 2 times - TMP_JAMP(2259) = TMP_JAMP(1921) - TMP_JAMP(1916) ! used 2 times - TMP_JAMP(2258) = TMP_JAMP(1925) - TMP_JAMP(1921) ! used 2 times - TMP_JAMP(2257) = TMP_JAMP(1965) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1426) ! used 2 times - TMP_JAMP(2256) = AMP(59) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1566) ! used 2 times - TMP_JAMP(2255) = TMP_JAMP(1416) - TMP_JAMP(1412) ! used 2 times - TMP_JAMP(2254) = TMP_JAMP(1419) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1415) ! used 2 times - TMP_JAMP(2253) = TMP_JAMP(1421) + AMP(1194) ! used 2 times - TMP_JAMP(2252) = TMP_JAMP(1887) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1862) ! used 2 times - TMP_JAMP(2251) = TMP_JAMP(1961) - TMP_JAMP(1955) ! used 2 times - TMP_JAMP(2250) = AMP(1265) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1308) ! used 2 times - TMP_JAMP(2249) = TMP_JAMP(1914) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1411) ! used 2 times - TMP_JAMP(2248) = TMP_JAMP(1918) + TMP_JAMP(1914) ! used 2 times - TMP_JAMP(2247) = TMP_JAMP(1923) - TMP_JAMP(1918) ! used 2 times - TMP_JAMP(2246) = TMP_JAMP(1410) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1408) ! used 2 times - TMP_JAMP(2245) = TMP_JAMP(1411) - AMP(749) ! used 2 times - TMP_JAMP(2244) = TMP_JAMP(1964) + TMP_JAMP(1959) ! used 2 times - TMP_JAMP(2243) = TMP_JAMP(959) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(399) ! used 2 times - TMP_JAMP(2242) = TMP_JAMP(1406) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(968) ! used 2 times - TMP_JAMP(2241) = TMP_JAMP(1911) - TMP_JAMP(1474) ! used 2 times - TMP_JAMP(2240) = TMP_JAMP(1954) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1910) ! used 2 times - TMP_JAMP(2239) = TMP_JAMP(1956) + AMP(741) ! used 2 times - TMP_JAMP(2238) = AMP(849) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1131) ! used 2 times - TMP_JAMP(2237) = AMP(849) + ((0.000000000000000D+00, + TMP_JAMP(1094) = TMP_JAMP(764) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(901) ! used 2 times + TMP_JAMP(1093) = TMP_JAMP(761) - AMP(935) ! used 2 times + TMP_JAMP(1092) = TMP_JAMP(758) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(826) ! used 2 times + TMP_JAMP(1091) = TMP_JAMP(757) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(913) ! used 2 times + TMP_JAMP(1090) = TMP_JAMP(755) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(934) ! used 2 times + TMP_JAMP(1089) = TMP_JAMP(755) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(808) ! used 2 times + TMP_JAMP(1088) = TMP_JAMP(753) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(909) ! used 2 times - TMP_JAMP(2236) = TMP_JAMP(1420) - TMP_JAMP(1390) ! used 2 times - TMP_JAMP(2235) = TMP_JAMP(1443) - TMP_JAMP(1428) ! used 2 times - TMP_JAMP(2234) = TMP_JAMP(1737) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1391) ! used 2 times - TMP_JAMP(2233) = TMP_JAMP(1859) - TMP_JAMP(1428) ! used 2 times - TMP_JAMP(2232) = TMP_JAMP(1953) + TMP_JAMP(1859) ! used 2 times - TMP_JAMP(2231) = TMP_JAMP(1389) + ((-0.000000000000000D+00 + TMP_JAMP(1087) = TMP_JAMP(749) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(912) ! used 2 times + TMP_JAMP(1086) = TMP_JAMP(748) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(925) ! used 2 times + TMP_JAMP(1085) = TMP_JAMP(747) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(900) ! used 2 times - TMP_JAMP(2230) = TMP_JAMP(1859) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1421) ! used 2 times - TMP_JAMP(2229) = TMP_JAMP(1924) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(344) ! used 2 times - TMP_JAMP(2228) = TMP_JAMP(1924) + TMP_JAMP(1910) ! used 2 times - TMP_JAMP(2227) = TMP_JAMP(1385) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1564) ! used 2 times - TMP_JAMP(2226) = TMP_JAMP(1417) - TMP_JAMP(1409) ! used 2 times - TMP_JAMP(2225) = TMP_JAMP(1885) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1384) ! used 2 times - TMP_JAMP(2224) = TMP_JAMP(1953) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(750) ! used 2 times - TMP_JAMP(2223) = TMP_JAMP(708) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1133) ! used 2 times - TMP_JAMP(2222) = TMP_JAMP(1406) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1412) ! used 2 times - TMP_JAMP(2221) = TMP_JAMP(1458) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1406) ! used 2 times - TMP_JAMP(2220) = TMP_JAMP(1858) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1458) ! used 2 times - TMP_JAMP(2219) = TMP_JAMP(1370) - AMP(58) ! used 2 times - TMP_JAMP(2218) = TMP_JAMP(1374) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1370) ! used 2 times - TMP_JAMP(2217) = TMP_JAMP(1414) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1374) ! used 2 times - TMP_JAMP(2216) = TMP_JAMP(1418) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1414) ! used 2 times - TMP_JAMP(2215) = TMP_JAMP(1858) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1142) ! used 2 times - TMP_JAMP(2214) = TMP_JAMP(1952) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1418) ! used 2 times - TMP_JAMP(2213) = TMP_JAMP(913) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1022) ! used 2 times - TMP_JAMP(2212) = TMP_JAMP(1369) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(929) ! used 2 times - TMP_JAMP(2211) = TMP_JAMP(1431) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(863) ! used 2 times - TMP_JAMP(2210) = TMP_JAMP(1911) - TMP_JAMP(1461) ! used 2 times - TMP_JAMP(2209) = TMP_JAMP(1951) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1909) ! used 2 times - TMP_JAMP(2208) = AMP(548) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(606) ! used 2 times - TMP_JAMP(2207) = TMP_JAMP(1722) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1356) ! used 2 times - TMP_JAMP(2206) = TMP_JAMP(1950) + TMP_JAMP(1949) ! used 2 times - TMP_JAMP(2205) = TMP_JAMP(1355) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1263) ! used 2 times - TMP_JAMP(2204) = TMP_JAMP(1922) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(497) ! used 2 times - TMP_JAMP(2203) = TMP_JAMP(1922) + TMP_JAMP(1909) ! used 2 times - TMP_JAMP(2202) = TMP_JAMP(1950) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1411) ! used 2 times - TMP_JAMP(2201) = TMP_JAMP(1949) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1193) ! used 2 times - TMP_JAMP(2200) = TMP_JAMP(1958) + TMP_JAMP(1351) ! used 2 times - TMP_JAMP(2199) = AMP(548) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(607) ! used 2 times - TMP_JAMP(2198) = TMP_JAMP(819) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(816) ! used 2 times - TMP_JAMP(2197) = TMP_JAMP(1369) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1409) ! used 2 times - TMP_JAMP(2196) = TMP_JAMP(1470) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1369) ! used 2 times - TMP_JAMP(2195) = TMP_JAMP(1947) + TMP_JAMP(1349) ! used 2 times - TMP_JAMP(2194) = TMP_JAMP(1952) + TMP_JAMP(1470) ! used 2 times - TMP_JAMP(2193) = TMP_JAMP(1952) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1948) ! used 2 times - TMP_JAMP(2192) = TMP_JAMP(1343) + AMP(616) ! used 2 times - TMP_JAMP(2191) = TMP_JAMP(1948) - TMP_JAMP(1345) ! used 2 times - TMP_JAMP(2190) = AMP(479) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(534) ! used 2 times - TMP_JAMP(2189) = AMP(332) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(494) ! used 2 times - TMP_JAMP(2188) = AMP(326) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(390) ! used 2 times - TMP_JAMP(2187) = TMP_JAMP(1580) - AMP(326) ! used 2 times - TMP_JAMP(2186) = TMP_JAMP(1895) + AMP(479) ! used 2 times - TMP_JAMP(2185) = TMP_JAMP(1908) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1580) ! used 2 times - TMP_JAMP(2184) = TMP_JAMP(1908) - TMP_JAMP(1907) ! used 2 times - TMP_JAMP(2183) = TMP_JAMP(1946) + TMP_JAMP(1895) ! used 2 times - TMP_JAMP(2182) = AMP(494) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(962) ! used 2 times - TMP_JAMP(2181) = TMP_JAMP(1317) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(484) ! used 2 times - TMP_JAMP(2180) = TMP_JAMP(1323) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(787) ! used 2 times - TMP_JAMP(2179) = TMP_JAMP(1494) + AMP(647) ! used 2 times - TMP_JAMP(2178) = TMP_JAMP(1790) - TMP_JAMP(1329) ! used 2 times - TMP_JAMP(2177) = TMP_JAMP(1946) + TMP_JAMP(1323) ! used 2 times - TMP_JAMP(2176) = TMP_JAMP(1946) + TMP_JAMP(1944) ! used 2 times - TMP_JAMP(2175) = AMP(638) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(693) ! used 2 times - TMP_JAMP(2174) = TMP_JAMP(1745) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(330) ! used 2 times - TMP_JAMP(2173) = TMP_JAMP(1906) - TMP_JAMP(1745) ! used 2 times - TMP_JAMP(2172) = TMP_JAMP(1942) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(780) ! used 2 times - TMP_JAMP(2171) = TMP_JAMP(1943) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1906) ! used 2 times - TMP_JAMP(2170) = TMP_JAMP(1944) - TMP_JAMP(1449) ! used 2 times - TMP_JAMP(2169) = TMP_JAMP(1302) + AMP(971) ! used 2 times - TMP_JAMP(2168) = TMP_JAMP(1309) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1303) ! used 2 times - TMP_JAMP(2167) = TMP_JAMP(1310) + TMP_JAMP(1309) ! used 2 times - TMP_JAMP(2166) = TMP_JAMP(1357) + AMP(488) ! used 2 times - TMP_JAMP(2165) = TMP_JAMP(1738) - TMP_JAMP(1310) ! used 2 times - TMP_JAMP(2164) = AMP(987) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1307) ! used 2 times - TMP_JAMP(2163) = TMP_JAMP(1295) - AMP(987) ! used 2 times - TMP_JAMP(2162) = TMP_JAMP(1295) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(811) ! used 2 times - TMP_JAMP(2161) = TMP_JAMP(1653) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(805) ! used 2 times - TMP_JAMP(2160) = TMP_JAMP(1880) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1691) ! used 2 times - TMP_JAMP(2159) = TMP_JAMP(1940) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1301) ! used 2 times - TMP_JAMP(2158) = TMP_JAMP(1942) - TMP_JAMP(1299) ! used 2 times - TMP_JAMP(2157) = AMP(808) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(811) ! used 2 times - TMP_JAMP(2156) = TMP_JAMP(1304) + TMP_JAMP(1293) ! used 2 times - TMP_JAMP(2155) = TMP_JAMP(1685) - TMP_JAMP(1650) ! used 2 times - TMP_JAMP(2154) = TMP_JAMP(1938) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1294) ! used 2 times - TMP_JAMP(2153) = AMP(329) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(372) ! used 2 times - TMP_JAMP(2152) = TMP_JAMP(1288) + AMP(478) ! used 2 times - TMP_JAMP(2151) = TMP_JAMP(1537) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1290) ! used 2 times - TMP_JAMP(2150) = TMP_JAMP(1881) + TMP_JAMP(1291) ! used 2 times - TMP_JAMP(2149) = TMP_JAMP(1906) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(329) ! used 2 times - TMP_JAMP(2148) = TMP_JAMP(1907) + TMP_JAMP(1906) ! used 2 times - TMP_JAMP(2147) = AMP(806) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(867) ! used 2 times - TMP_JAMP(2146) = TMP_JAMP(1301) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1285) ! used 2 times - TMP_JAMP(2145) = TMP_JAMP(1531) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1286) ! used 2 times - TMP_JAMP(2144) = AMP(797) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(852) ! used 2 times - TMP_JAMP(2143) = TMP_JAMP(1284) - AMP(327) ! used 2 times - TMP_JAMP(2142) = TMP_JAMP(1287) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1283) ! used 2 times - TMP_JAMP(2141) = TMP_JAMP(1879) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1292) ! used 2 times - TMP_JAMP(2140) = TMP_JAMP(1925) + TMP_JAMP(1908) ! used 2 times - TMP_JAMP(2139) = AMP(980) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1654) ! used 2 times - TMP_JAMP(2138) = AMP(491) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1704) ! used 2 times - TMP_JAMP(2137) = TMP_JAMP(1277) + AMP(980) ! used 2 times - TMP_JAMP(2136) = TMP_JAMP(1280) - TMP_JAMP(1277) ! used 2 times - TMP_JAMP(2135) = TMP_JAMP(1289) - AMP(491) ! used 2 times - TMP_JAMP(2134) = TMP_JAMP(1853) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1289) ! used 2 times - TMP_JAMP(2133) = TMP_JAMP(1955) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1280) ! used 2 times - TMP_JAMP(2132) = TMP_JAMP(1955) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1284) ! used 2 times - TMP_JAMP(2131) = AMP(996) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1229) ! used 2 times - TMP_JAMP(2130) = TMP_JAMP(1275) + AMP(646) ! used 2 times - TMP_JAMP(2129) = TMP_JAMP(1939) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1917) ! used 2 times - TMP_JAMP(2128) = TMP_JAMP(1276) - AMP(649) ! used 2 times - TMP_JAMP(2127) = TMP_JAMP(1853) + TMP_JAMP(1852) ! used 2 times - TMP_JAMP(2126) = TMP_JAMP(1869) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1385) ! used 2 times - TMP_JAMP(2125) = TMP_JAMP(1869) + TMP_JAMP(1852) ! used 2 times - TMP_JAMP(2124) = TMP_JAMP(1875) + TMP_JAMP(1869) ! used 2 times - TMP_JAMP(2123) = TMP_JAMP(859) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(637) ! used 2 times - TMP_JAMP(2122) = TMP_JAMP(1851) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1850) ! used 2 times - TMP_JAMP(2121) = TMP_JAMP(1910) - TMP_JAMP(1907) ! used 2 times - TMP_JAMP(2120) = TMP_JAMP(1954) - TMP_JAMP(1943) ! used 2 times - TMP_JAMP(2119) = TMP_JAMP(1294) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1283) ! used 2 times - TMP_JAMP(2118) = TMP_JAMP(1849) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(809) ! used 2 times - TMP_JAMP(2117) = TMP_JAMP(1850) + TMP_JAMP(1849) ! used 2 times - TMP_JAMP(2116) = AMP(796) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(834) ! used 2 times - TMP_JAMP(2115) = TMP_JAMP(1284) + AMP(796) ! used 2 times - TMP_JAMP(2114) = TMP_JAMP(1849) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1284) ! used 2 times - TMP_JAMP(2113) = TMP_JAMP(1851) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1849) ! used 2 times - TMP_JAMP(2112) = TMP_JAMP(1852) - TMP_JAMP(1850) ! used 2 times - TMP_JAMP(2111) = TMP_JAMP(1267) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1265) ! used 2 times - TMP_JAMP(2110) = TMP_JAMP(1294) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1159) ! used 2 times - TMP_JAMP(2109) = TMP_JAMP(1858) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1851) ! used 2 times - TMP_JAMP(2108) = TMP_JAMP(1937) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1349) ! used 2 times - TMP_JAMP(2107) = AMP(490) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1529) ! used 2 times - TMP_JAMP(2106) = TMP_JAMP(1853) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1344) ! used 2 times - TMP_JAMP(2105) = AMP(109) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(961) ! used 2 times - TMP_JAMP(2104) = TMP_JAMP(647) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(100) ! used 2 times - TMP_JAMP(2103) = TMP_JAMP(1298) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(811) ! used 2 times - TMP_JAMP(2102) = TMP_JAMP(1847) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(961) ! used 2 times - TMP_JAMP(2101) = TMP_JAMP(1848) + TMP_JAMP(1847) ! used 2 times - TMP_JAMP(2100) = TMP_JAMP(1900) + TMP_JAMP(1787) ! used 2 times - TMP_JAMP(2099) = AMP(121) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(970) ! used 2 times - TMP_JAMP(2098) = TMP_JAMP(1436) + AMP(112) ! used 2 times - TMP_JAMP(2097) = TMP_JAMP(1684) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1436) ! used 2 times - TMP_JAMP(2096) = TMP_JAMP(1934) + TMP_JAMP(1848) ! used 2 times - TMP_JAMP(2095) = TMP_JAMP(1990) + TMP_JAMP(1684) ! used 2 times - TMP_JAMP(2094) = AMP(101) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(109) ! used 2 times - TMP_JAMP(2093) = TMP_JAMP(1919) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1612) ! used 2 times - TMP_JAMP(2092) = TMP_JAMP(1934) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(991) ! used 2 times - TMP_JAMP(2091) = AMP(133) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(979) ! used 2 times - TMP_JAMP(2090) = TMP_JAMP(1279) - TMP_JAMP(1251) ! used 2 times - TMP_JAMP(2089) = TMP_JAMP(1848) + TMP_JAMP(1282) ! used 2 times - TMP_JAMP(2088) = TMP_JAMP(1874) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(124) ! used 2 times - TMP_JAMP(2087) = AMP(493) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1000) ! used 2 times - TMP_JAMP(2086) = TMP_JAMP(844) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(113) ! used 2 times - TMP_JAMP(2085) = TMP_JAMP(1249) + AMP(148) ! used 2 times - TMP_JAMP(2084) = TMP_JAMP(1349) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1149) ! used 2 times - TMP_JAMP(2083) = TMP_JAMP(1847) - TMP_JAMP(1266) ! used 2 times - TMP_JAMP(2082) = TMP_JAMP(1947) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(848) ! used 2 times - TMP_JAMP(2081) = TMP_JAMP(1371) + AMP(125) ! used 2 times - TMP_JAMP(2080) = AMP(185) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(291) ! used 2 times - TMP_JAMP(2079) = AMP(174) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(195) ! used 2 times - TMP_JAMP(2078) = TMP_JAMP(1588) - AMP(168) ! used 2 times - TMP_JAMP(2077) = TMP_JAMP(1780) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1453) ! used 2 times - TMP_JAMP(2076) = TMP_JAMP(1894) + TMP_JAMP(1244) ! used 2 times - TMP_JAMP(2075) = TMP_JAMP(1234) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1227) ! used 2 times - TMP_JAMP(2074) = TMP_JAMP(1528) - AMP(207) ! used 2 times - TMP_JAMP(2073) = TMP_JAMP(1771) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1321) ! used 2 times - TMP_JAMP(2072) = TMP_JAMP(1933) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(723) ! used 2 times - TMP_JAMP(2071) = AMP(203) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(850) ! used 2 times - TMP_JAMP(2070) = TMP_JAMP(879) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(872) ! used 2 times - TMP_JAMP(2069) = TMP_JAMP(1221) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(213) ! used 2 times - TMP_JAMP(2068) = TMP_JAMP(1729) + TMP_JAMP(1224) ! used 2 times - TMP_JAMP(2067) = TMP_JAMP(1931) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(717) ! used 2 times - TMP_JAMP(2066) = TMP_JAMP(1933) - TMP_JAMP(1433) ! used 2 times - TMP_JAMP(2065) = TMP_JAMP(1213) + TMP_JAMP(1211) ! used 2 times - TMP_JAMP(2064) = TMP_JAMP(1216) + TMP_JAMP(1215) ! used 2 times - TMP_JAMP(2063) = TMP_JAMP(1240) + AMP(189) ! used 2 times - TMP_JAMP(2062) = TMP_JAMP(1393) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1247) ! used 2 times - TMP_JAMP(2061) = TMP_JAMP(1723) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1306) ! used 2 times - TMP_JAMP(2060) = AMP(267) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(943) ! used 2 times - TMP_JAMP(2059) = TMP_JAMP(1209) + TMP_JAMP(1205) ! used 2 times - TMP_JAMP(2058) = TMP_JAMP(1931) + TMP_JAMP(1300) ! used 2 times - TMP_JAMP(2057) = TMP_JAMP(1945) + ((0.000000000000000D+00 + TMP_JAMP(1084) = TMP_JAMP(745) + AMP(602) ! used 2 times + TMP_JAMP(1083) = TMP_JAMP(742) - AMP(478) ! used 2 times + TMP_JAMP(1082) = TMP_JAMP(738) - AMP(754) ! used 2 times + TMP_JAMP(1081) = TMP_JAMP(731) + AMP(638) ! used 2 times + TMP_JAMP(1080) = TMP_JAMP(726) + AMP(582) ! used 2 times + TMP_JAMP(1079) = TMP_JAMP(720) + AMP(805) ! used 2 times + TMP_JAMP(1078) = TMP_JAMP(712) + AMP(590) ! used 2 times + TMP_JAMP(1077) = TMP_JAMP(703) + AMP(774) ! used 2 times + TMP_JAMP(1076) = TMP_JAMP(662) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(766) ! used 2 times + TMP_JAMP(1075) = TMP_JAMP(661) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(741) ! used 2 times + TMP_JAMP(1074) = TMP_JAMP(657) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(607) ! used 2 times + TMP_JAMP(1073) = AMP(593) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(595) ! used 2 times + TMP_JAMP(1072) = AMP(472) + AMP(473) ! used 2 times + TMP_JAMP(1071) = AMP(505) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(511) ! used 2 times + TMP_JAMP(1070) = AMP(837) + AMP(920) ! used 2 times + TMP_JAMP(1069) = AMP(655) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(660) ! used 2 times + TMP_JAMP(1068) = AMP(742) + AMP(781) ! used 2 times + TMP_JAMP(1067) = AMP(855) + AMP(908) ! used 2 times + TMP_JAMP(1066) = AMP(519) - AMP(606) ! used 2 times + TMP_JAMP(1376) = TMP_JAMP(1298) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * TMP_JAMP(1210) ! used 2 times - TMP_JAMP(2056) = TMP_JAMP(1212) + TMP_JAMP(1203) ! used 2 times - TMP_JAMP(2055) = TMP_JAMP(1941) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1204) ! used 2 times - TMP_JAMP(2054) = AMP(183) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(195) ! used 2 times - TMP_JAMP(2053) = AMP(171) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(671) ! used 2 times - TMP_JAMP(2052) = TMP_JAMP(1197) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(914) ! used 2 times - TMP_JAMP(2051) = TMP_JAMP(1498) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1200) ! used 2 times - TMP_JAMP(2050) = TMP_JAMP(1873) + TMP_JAMP(1201) ! used 2 times - TMP_JAMP(2049) = TMP_JAMP(1929) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1224) ! used 2 times - TMP_JAMP(2048) = AMP(225) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(707) ! used 2 times - TMP_JAMP(2047) = TMP_JAMP(1194) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(310) ! used 2 times - TMP_JAMP(2046) = TMP_JAMP(1228) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1210) ! used 2 times - TMP_JAMP(2045) = TMP_JAMP(1495) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1195) ! used 2 times - TMP_JAMP(2044) = AMP(221) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(691) ! used 2 times - TMP_JAMP(2043) = TMP_JAMP(1191) - AMP(167) ! used 2 times - TMP_JAMP(2042) = TMP_JAMP(1196) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1192) ! used 2 times - TMP_JAMP(2041) = TMP_JAMP(1872) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1202) ! used 2 times - TMP_JAMP(2040) = TMP_JAMP(1932) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1923) ! used 2 times - TMP_JAMP(2039) = AMP(192) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(300) ! used 2 times - TMP_JAMP(2038) = TMP_JAMP(1193) + AMP(258) ! used 2 times - TMP_JAMP(2037) = TMP_JAMP(1199) + AMP(192) ! used 2 times - TMP_JAMP(2036) = TMP_JAMP(1483) - TMP_JAMP(1199) ! used 2 times - TMP_JAMP(2035) = TMP_JAMP(1487) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1483) ! used 2 times - TMP_JAMP(2034) = TMP_JAMP(1846) - TMP_JAMP(1487) ! used 2 times - TMP_JAMP(2033) = TMP_JAMP(1846) + TMP_JAMP(1845) ! used 2 times - TMP_JAMP(2032) = AMP(276) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(784) ! used 2 times - TMP_JAMP(2031) = TMP_JAMP(1185) - AMP(205) ! used 2 times - TMP_JAMP(2030) = TMP_JAMP(1930) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1920) ! used 2 times - TMP_JAMP(2029) = TMP_JAMP(1186) - AMP(208) ! used 2 times - TMP_JAMP(2028) = TMP_JAMP(1875) + TMP_JAMP(1844) ! used 2 times - TMP_JAMP(2027) = TMP_JAMP(1182) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1179) ! used 2 times - TMP_JAMP(2026) = TMP_JAMP(1929) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1909) ! used 2 times - TMP_JAMP(2025) = TMP_JAMP(1951) - AMP(201) ! used 2 times - TMP_JAMP(2024) = TMP_JAMP(1951) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1843) ! used 2 times - TMP_JAMP(2023) = AMP(228) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(548) ! used 2 times - TMP_JAMP(2022) = TMP_JAMP(1192) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(301) ! used 2 times - TMP_JAMP(2021) = TMP_JAMP(1214) + TMP_JAMP(1192) ! used 2 times - TMP_JAMP(2020) = TMP_JAMP(1214) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1204) ! used 2 times - TMP_JAMP(2019) = TMP_JAMP(1842) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(228) ! used 2 times - TMP_JAMP(2018) = TMP_JAMP(1843) + TMP_JAMP(1842) ! used 2 times - TMP_JAMP(2017) = TMP_JAMP(1842) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(219) ! used 2 times - TMP_JAMP(2016) = TMP_JAMP(1843) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(210) ! used 2 times - TMP_JAMP(2015) = TMP_JAMP(1844) - TMP_JAMP(1843) ! used 2 times - TMP_JAMP(2014) = TMP_JAMP(1845) - TMP_JAMP(1844) ! used 2 times - TMP_JAMP(2013) = TMP_JAMP(1180) + AMP(285) ! used 2 times - TMP_JAMP(2012) = TMP_JAMP(1204) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1162) ! used 2 times - TMP_JAMP(2011) = TMP_JAMP(1937) + AMP(187) ! used 2 times - TMP_JAMP(2010) = TMP_JAMP(1937) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1841) ! used 2 times - TMP_JAMP(2009) = TMP_JAMP(1841) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(190) ! used 2 times - TMP_JAMP(2008) = TMP_JAMP(1936) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1846) ! used 2 times - TMP_JAMP(2007) = AMP(229) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(259) ! used 2 times - TMP_JAMP(2006) = AMP(109) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(238) ! used 2 times - TMP_JAMP(2005) = TMP_JAMP(1208) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(229) ! used 2 times - TMP_JAMP(2004) = TMP_JAMP(1839) - AMP(109) ! used 2 times - TMP_JAMP(2003) = TMP_JAMP(1990) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(1208) ! used 2 times - TMP_JAMP(2002) = TMP_JAMP(1990) - TMP_JAMP(1840) ! used 2 times - TMP_JAMP(2001) = AMP(121) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(247) ! used 2 times - TMP_JAMP(2000) = TMP_JAMP(1838) + AMP(121) ! used 2 times - TMP_JAMP(1999) = TMP_JAMP(1840) + TMP_JAMP(1838) ! used 2 times - TMP_JAMP(1998) = AMP(142) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(268) ! used 2 times - TMP_JAMP(1997) = TMP_JAMP(1838) - AMP(142) ! used 2 times - TMP_JAMP(1996) = TMP_JAMP(1839) - TMP_JAMP(1838) ! used 2 times - TMP_JAMP(1995) = TMP_JAMP(1252) - AMP(256) ! used 2 times - TMP_JAMP(1994) = TMP_JAMP(1845) + TMP_JAMP(1840) ! used 2 times - TMP_JAMP(1993) = TMP_JAMP(1250) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(277) ! used 2 times - TMP_JAMP(1992) = TMP_JAMP(1841) + TMP_JAMP(1839) ! used 2 times - TMP_JAMP(2744) = TMP_JAMP(2640) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1515) ! used 2 times - TMP_JAMP(2743) = TMP_JAMP(2621) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1888) ! used 2 times - TMP_JAMP(2742) = TMP_JAMP(2622) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2618) ! used 2 times - TMP_JAMP(2741) = TMP_JAMP(2602) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2600) ! used 2 times - TMP_JAMP(2740) = TMP_JAMP(2592) + TMP_JAMP(868) ! used 2 times - TMP_JAMP(2739) = TMP_JAMP(2593) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(667) ! used 2 times - TMP_JAMP(2738) = TMP_JAMP(2578) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2576) ! used 2 times - TMP_JAMP(2737) = TMP_JAMP(2579) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2577) ! used 2 times - TMP_JAMP(2736) = TMP_JAMP(2573) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2569) ! used 2 times - TMP_JAMP(2735) = TMP_JAMP(2567) + TMP_JAMP(2566) ! used 2 times - TMP_JAMP(2734) = TMP_JAMP(2568) - AMP(1753) ! used 2 times - TMP_JAMP(2733) = TMP_JAMP(2560) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2559) ! used 2 times - TMP_JAMP(2732) = TMP_JAMP(2555) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1287) ! used 2 times - TMP_JAMP(2731) = TMP_JAMP(2545) + AMP(753) ! used 2 times - TMP_JAMP(2730) = TMP_JAMP(2546) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(459) ! used 2 times - TMP_JAMP(2729) = TMP_JAMP(2541) + AMP(603) ! used 2 times - TMP_JAMP(2728) = TMP_JAMP(2536) + TMP_JAMP(902) ! used 2 times - TMP_JAMP(2727) = TMP_JAMP(2530) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1638) ! used 2 times - TMP_JAMP(2726) = TMP_JAMP(2520) + AMP(1040) ! used 2 times - TMP_JAMP(2725) = TMP_JAMP(2524) - TMP_JAMP(2519) ! used 2 times - TMP_JAMP(2724) = TMP_JAMP(2517) + TMP_JAMP(2513) ! used 2 times - TMP_JAMP(2723) = TMP_JAMP(2518) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(924) ! used 2 times - TMP_JAMP(2722) = TMP_JAMP(2509) + AMP(1197) ! used 2 times - TMP_JAMP(2721) = TMP_JAMP(2510) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1713) ! used 2 times - TMP_JAMP(2720) = TMP_JAMP(2505) + ((-0.000000000000000D+00 + TMP_JAMP(1375) = TMP_JAMP(1287) + TMP_JAMP(1164) ! used 2 times + TMP_JAMP(1374) = TMP_JAMP(1286) + TMP_JAMP(1151) ! used 2 times + TMP_JAMP(1373) = TMP_JAMP(1284) - TMP_JAMP(1153) ! used 2 times + TMP_JAMP(1372) = TMP_JAMP(1276) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1242) ! used 2 times + TMP_JAMP(1371) = TMP_JAMP(1267) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1163) ! used 2 times + TMP_JAMP(1370) = TMP_JAMP(1266) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1181) ! used 2 times + TMP_JAMP(1369) = TMP_JAMP(1265) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1173) ! used 2 times + TMP_JAMP(1368) = TMP_JAMP(1260) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1166) ! used 2 times + TMP_JAMP(1367) = TMP_JAMP(1249) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1126) ! used 2 times + TMP_JAMP(1366) = TMP_JAMP(1246) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1015) ! used 2 times + TMP_JAMP(1365) = TMP_JAMP(1245) - TMP_JAMP(1198) ! used 2 times + TMP_JAMP(1364) = TMP_JAMP(1243) - TMP_JAMP(1057) ! used 2 times + TMP_JAMP(1363) = TMP_JAMP(1236) - TMP_JAMP(1229) ! used 2 times + TMP_JAMP(1362) = TMP_JAMP(1234) - TMP_JAMP(1183) ! used 2 times + TMP_JAMP(1361) = TMP_JAMP(1233) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1117) ! used 2 times + TMP_JAMP(1360) = TMP_JAMP(1227) + TMP_JAMP(1058) ! used 2 times + TMP_JAMP(1359) = TMP_JAMP(1226) + TMP_JAMP(1082) ! used 2 times + TMP_JAMP(1358) = TMP_JAMP(1222) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1074) ! used 2 times + TMP_JAMP(1357) = TMP_JAMP(1217) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1098) ! used 2 times + TMP_JAMP(1356) = TMP_JAMP(1215) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1096) ! used 2 times + TMP_JAMP(1355) = TMP_JAMP(1214) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1097) ! used 2 times + TMP_JAMP(1354) = TMP_JAMP(1213) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1134) ! used 2 times + TMP_JAMP(1353) = TMP_JAMP(1208) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1099) ! used 2 times + TMP_JAMP(1352) = TMP_JAMP(1206) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1090) ! used 2 times + TMP_JAMP(1351) = TMP_JAMP(1202) - TMP_JAMP(1080) ! used 2 times + TMP_JAMP(1350) = TMP_JAMP(1199) - TMP_JAMP(1071) ! used 2 times + TMP_JAMP(1349) = TMP_JAMP(1192) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1087) ! used 2 times + TMP_JAMP(1348) = TMP_JAMP(1190) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1142) ! used 2 times + TMP_JAMP(1347) = TMP_JAMP(1189) - TMP_JAMP(1160) ! used 2 times + TMP_JAMP(1346) = TMP_JAMP(1186) - TMP_JAMP(1149) ! used 2 times + TMP_JAMP(1345) = TMP_JAMP(1184) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1115) ! used 2 times + TMP_JAMP(1344) = TMP_JAMP(1182) - TMP_JAMP(1137) ! used 2 times + TMP_JAMP(1343) = TMP_JAMP(1178) - TMP_JAMP(1170) ! used 2 times + TMP_JAMP(1342) = TMP_JAMP(1175) + TMP_JAMP(970) ! used 2 times + TMP_JAMP(1341) = TMP_JAMP(1171) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1114) ! used 2 times + TMP_JAMP(1340) = TMP_JAMP(1165) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1085) ! used 2 times + TMP_JAMP(1339) = TMP_JAMP(1162) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1121) ! used 2 times + TMP_JAMP(1338) = TMP_JAMP(1157) + TMP_JAMP(1046) ! used 2 times + TMP_JAMP(1337) = TMP_JAMP(1154) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1140) ! used 2 times + TMP_JAMP(1336) = TMP_JAMP(1146) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1089) ! used 2 times + TMP_JAMP(1335) = TMP_JAMP(1136) - TMP_JAMP(1104) ! used 2 times + TMP_JAMP(1334) = TMP_JAMP(1135) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1106) ! used 2 times + TMP_JAMP(1333) = TMP_JAMP(1132) + TMP_JAMP(1105) ! used 2 times + TMP_JAMP(1332) = TMP_JAMP(1128) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1078) ! used 2 times + TMP_JAMP(1331) = TMP_JAMP(1125) - TMP_JAMP(1073) ! used 2 times + TMP_JAMP(1330) = TMP_JAMP(1124) + TMP_JAMP(1014) ! used 2 times + TMP_JAMP(1329) = TMP_JAMP(1123) - TMP_JAMP(1017) ! used 2 times + TMP_JAMP(1328) = TMP_JAMP(1122) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1048) ! used 2 times + TMP_JAMP(1327) = TMP_JAMP(1120) + TMP_JAMP(1016) ! used 2 times + TMP_JAMP(1326) = TMP_JAMP(1119) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1064) ! used 2 times + TMP_JAMP(1325) = TMP_JAMP(1118) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1077) ! used 2 times + TMP_JAMP(1324) = TMP_JAMP(1116) + TMP_JAMP(1024) ! used 2 times + TMP_JAMP(1323) = TMP_JAMP(1112) + TMP_JAMP(1011) ! used 2 times + TMP_JAMP(1322) = TMP_JAMP(1111) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1081) ! used 2 times + TMP_JAMP(1321) = TMP_JAMP(1110) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1068) ! used 2 times + TMP_JAMP(1320) = TMP_JAMP(1109) - TMP_JAMP(1027) ! used 2 times + TMP_JAMP(1319) = TMP_JAMP(1108) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1066) ! used 2 times + TMP_JAMP(1318) = TMP_JAMP(1103) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(964) ! used 2 times + TMP_JAMP(1317) = TMP_JAMP(1101) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1056) ! used 2 times + TMP_JAMP(1316) = TMP_JAMP(1094) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1049) ! used 2 times + TMP_JAMP(1315) = TMP_JAMP(1093) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1067) ! used 2 times + TMP_JAMP(1314) = TMP_JAMP(1091) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1052) ! used 2 times + TMP_JAMP(1313) = TMP_JAMP(1086) - TMP_JAMP(999) ! used 2 times + TMP_JAMP(1312) = TMP_JAMP(1084) - TMP_JAMP(1060) ! used 2 times + TMP_JAMP(1311) = TMP_JAMP(1083) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1042) ! used 2 times + TMP_JAMP(1310) = TMP_JAMP(1079) + TMP_JAMP(1045) ! used 2 times + TMP_JAMP(1309) = TMP_JAMP(1076) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1051) ! used 2 times + TMP_JAMP(1308) = TMP_JAMP(1075) + TMP_JAMP(1009) ! used 2 times + TMP_JAMP(1307) = TMP_JAMP(1072) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(839) ! used 2 times + TMP_JAMP(1306) = TMP_JAMP(1070) + TMP_JAMP(1053) ! used 2 times + TMP_JAMP(1305) = TMP_JAMP(1069) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1032) ! used 2 times + TMP_JAMP(1416) = TMP_JAMP(1372) - TMP_JAMP(1331) ! used 2 times + TMP_JAMP(1415) = TMP_JAMP(1371) + TMP_JAMP(1319) ! used 2 times + TMP_JAMP(1414) = TMP_JAMP(1370) - TMP_JAMP(1309) ! used 2 times + TMP_JAMP(1413) = TMP_JAMP(1369) + TMP_JAMP(1321) ! used 2 times + TMP_JAMP(1412) = TMP_JAMP(1368) + TMP_JAMP(1315) ! used 2 times + TMP_JAMP(1411) = TMP_JAMP(1367) + TMP_JAMP(1254) ! used 2 times + TMP_JAMP(1410) = TMP_JAMP(1365) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1330) ! used 2 times + TMP_JAMP(1409) = TMP_JAMP(1363) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1324) ! used 2 times + TMP_JAMP(1408) = TMP_JAMP(1362) + TMP_JAMP(1305) ! used 2 times + TMP_JAMP(1407) = TMP_JAMP(1361) + TMP_JAMP(1239) ! used 2 times + TMP_JAMP(1406) = TMP_JAMP(1359) - TMP_JAMP(1297) ! used 2 times + TMP_JAMP(1405) = TMP_JAMP(1358) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1300) ! used 2 times + TMP_JAMP(1404) = TMP_JAMP(1357) + TMP_JAMP(1294) ! used 2 times + TMP_JAMP(1403) = TMP_JAMP(1356) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1262) ! used 2 times + TMP_JAMP(1402) = TMP_JAMP(1355) + TMP_JAMP(1306) ! used 2 times + TMP_JAMP(1401) = TMP_JAMP(1354) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1313) ! used 2 times + TMP_JAMP(1400) = TMP_JAMP(1353) - TMP_JAMP(1310) ! used 2 times + TMP_JAMP(1399) = TMP_JAMP(1352) - TMP_JAMP(1292) ! used 2 times + TMP_JAMP(1398) = TMP_JAMP(1348) - TMP_JAMP(1285) ! used 2 times + TMP_JAMP(1397) = TMP_JAMP(1347) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1327) ! used 2 times + TMP_JAMP(1396) = TMP_JAMP(1345) - TMP_JAMP(1289) ! used 2 times + TMP_JAMP(1395) = TMP_JAMP(1344) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1318) ! used 2 times + TMP_JAMP(1394) = TMP_JAMP(1343) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1323) ! used 2 times + TMP_JAMP(1393) = TMP_JAMP(1339) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1299) ! used 2 times + TMP_JAMP(1392) = TMP_JAMP(1338) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1274) ! used 2 times + TMP_JAMP(1391) = TMP_JAMP(1337) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1273) ! used 2 times + TMP_JAMP(1390) = TMP_JAMP(1336) - TMP_JAMP(1283) ! used 2 times + TMP_JAMP(1389) = TMP_JAMP(1335) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1235) ! used 2 times + TMP_JAMP(1388) = TMP_JAMP(1334) - TMP_JAMP(1251) ! used 2 times + TMP_JAMP(1387) = TMP_JAMP(1333) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1220) ! used 2 times + TMP_JAMP(1386) = TMP_JAMP(1332) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1191) ! used 2 times + TMP_JAMP(1385) = TMP_JAMP(1328) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1161) ! used 2 times + TMP_JAMP(1384) = TMP_JAMP(1326) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1230) ! used 2 times + TMP_JAMP(1383) = TMP_JAMP(1325) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1179) ! used 2 times + TMP_JAMP(1382) = TMP_JAMP(1322) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1221) ! used 2 times + TMP_JAMP(1381) = TMP_JAMP(1317) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1293) ! used 2 times + TMP_JAMP(1380) = TMP_JAMP(1316) - TMP_JAMP(1258) ! used 2 times + TMP_JAMP(1379) = TMP_JAMP(1314) + TMP_JAMP(1259) ! used 2 times + TMP_JAMP(1378) = TMP_JAMP(1312) + TMP_JAMP(1223) ! used 2 times + TMP_JAMP(1377) = TMP_JAMP(1308) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1225) ! used 2 times + TMP_JAMP(1419) = TMP_JAMP(1180) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(547) ! used 2 times + TMP_JAMP(1418) = TMP_JAMP(1139) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(241) ! used 2 times + TMP_JAMP(1417) = TMP_JAMP(855) + TMP_JAMP(243) ! used 2 times + TMP_JAMP(1479) = AMP(1371) - AMP(1381) ! used 16 times + TMP_JAMP(1478) = AMP(1370) - AMP(1382) ! used 16 times + TMP_JAMP(1477) = AMP(1358) - AMP(1361) ! used 16 times + TMP_JAMP(1476) = AMP(1323) + AMP(1387) ! used 16 times + TMP_JAMP(1475) = AMP(1322) + AMP(1388) ! used 16 times + TMP_JAMP(1474) = AMP(1320) + AMP(1324) ! used 16 times + TMP_JAMP(1473) = AMP(1310) - AMP(1313) ! used 16 times + TMP_JAMP(1472) = AMP(1309) + AMP(1311) ! used 16 times + TMP_JAMP(1471) = AMP(1103) + AMP(1359) ! used 16 times + TMP_JAMP(1470) = AMP(1085) + AMP(1087) ! used 16 times + TMP_JAMP(1469) = AMP(1084) - AMP(1100) ! used 16 times + TMP_JAMP(1468) = AMP(1080) + AMP(1082) ! used 16 times + TMP_JAMP(1467) = AMP(1079) - AMP(1099) ! used 16 times + TMP_JAMP(1466) = AMP(1076) + AMP(1078) ! used 16 times + TMP_JAMP(1465) = AMP(1378) + AMP(1383) ! used 16 times + TMP_JAMP(1464) = AMP(1367) + AMP(1369) ! used 16 times + TMP_JAMP(1463) = AMP(1330) - AMP(1389) ! used 16 times + TMP_JAMP(1462) = AMP(1319) + AMP(1321) ! used 16 times + TMP_JAMP(1461) = AMP(1315) - AMP(1318) ! used 16 times + TMP_JAMP(1460) = AMP(1314) + AMP(1316) ! used 16 times + TMP_JAMP(1459) = AMP(1357) + AMP(1360) ! used 16 times + TMP_JAMP(1458) = AMP(1163) + AMP(1165) ! used 16 times + TMP_JAMP(1457) = AMP(1159) + AMP(1161) ! used 16 times + TMP_JAMP(1456) = AMP(1158) + AMP(1160) ! used 16 times + TMP_JAMP(1455) = AMP(1368) + AMP(1372) ! used 16 times + TMP_JAMP(1454) = AMP(1164) + AMP(1168) ! used 16 times + TMP_JAMP(1453) = AMP(1154) + AMP(1156) ! used 16 times + TMP_JAMP(1452) = AMP(1153) + AMP(1155) ! used 16 times + TMP_JAMP(1451) = AMP(1362) + AMP(1364) ! used 16 times + TMP_JAMP(1450) = AMP(1101) + AMP(1104) ! used 16 times + TMP_JAMP(1449) = AMP(1363) - AMP(1366) ! used 16 times + TMP_JAMP(1448) = AMP(1347) + AMP(1384) ! used 16 times + TMP_JAMP(1447) = AMP(1346) + AMP(1385) ! used 16 times + TMP_JAMP(1446) = AMP(1334) - AMP(1337) ! used 16 times + TMP_JAMP(1445) = AMP(1181) + AMP(1336) ! used 16 times + TMP_JAMP(1444) = AMP(1162) - AMP(1178) ! used 16 times + TMP_JAMP(1443) = AMP(1157) - AMP(1177) ! used 16 times + TMP_JAMP(1442) = AMP(1354) - AMP(1386) ! used 16 times + TMP_JAMP(1441) = AMP(1343) + AMP(1345) ! used 16 times + TMP_JAMP(1440) = AMP(1333) + AMP(1335) ! used 16 times + TMP_JAMP(1439) = AMP(1081) + AMP(1083) ! used 16 times + TMP_JAMP(1438) = AMP(1344) + AMP(1348) ! used 16 times + TMP_JAMP(1437) = AMP(1086) + AMP(1090) ! used 16 times + TMP_JAMP(1436) = AMP(1075) + AMP(1077) ! used 16 times + TMP_JAMP(1435) = AMP(1338) + AMP(1340) ! used 16 times + TMP_JAMP(1434) = AMP(1179) + AMP(1182) ! used 16 times + TMP_JAMP(1433) = AMP(1339) - AMP(1342) ! used 16 times + TMP_JAMP(1432) = AMP(1259) + AMP(1312) ! used 16 times + TMP_JAMP(1431) = AMP(1257) + AMP(1260) ! used 16 times + TMP_JAMP(1430) = AMP(1240) - AMP(1256) ! used 16 times + TMP_JAMP(1429) = AMP(1237) + AMP(1239) ! used 16 times + TMP_JAMP(1428) = AMP(1242) + AMP(1246) ! used 16 times + TMP_JAMP(1427) = AMP(1236) + AMP(1238) ! used 16 times + TMP_JAMP(1426) = AMP(1241) + AMP(1243) ! used 16 times + TMP_JAMP(1425) = AMP(1235) - AMP(1255) ! used 16 times + TMP_JAMP(1424) = AMP(1231) + AMP(1233) ! used 16 times + TMP_JAMP(1423) = AMP(1232) + AMP(1234) ! used 16 times + TMP_JAMP(1422) = AMP(954) - AMP(1097) ! used 16 times + TMP_JAMP(1421) = AMP(952) + AMP(1328) ! used 16 times + TMP_JAMP(1420) = AMP(953) - AMP(1175) ! used 16 times + TMP_JAMP(1524) = TMP_JAMP(1477) + AMP(1390) ! used 16 times + TMP_JAMP(1523) = TMP_JAMP(1474) - AMP(1327) ! used 16 times + TMP_JAMP(1522) = TMP_JAMP(1473) - AMP(1392) ! used 16 times + TMP_JAMP(1521) = TMP_JAMP(1472) + AMP(1329) ! used 16 times + TMP_JAMP(1520) = TMP_JAMP(1471) - TMP_JAMP(1469) ! used 16 times + TMP_JAMP(1519) = TMP_JAMP(1470) - AMP(1096) ! used 16 times + TMP_JAMP(1518) = TMP_JAMP(1468) + AMP(1098) ! used 16 times + TMP_JAMP(1517) = TMP_JAMP(1467) - AMP(1102) ! used 16 times + TMP_JAMP(1516) = TMP_JAMP(1466) + AMP(1089) ! used 16 times + TMP_JAMP(1515) = TMP_JAMP(1464) - AMP(1393) ! used 16 times + TMP_JAMP(1514) = TMP_JAMP(1462) + AMP(1395) ! used 16 times + TMP_JAMP(1513) = TMP_JAMP(1461) - AMP(1325) ! used 16 times + TMP_JAMP(1512) = TMP_JAMP(1460) + AMP(1332) ! used 16 times + TMP_JAMP(1511) = TMP_JAMP(1459) + AMP(1377) ! used 16 times + TMP_JAMP(1510) = TMP_JAMP(1458) - AMP(1174) ! used 16 times + TMP_JAMP(1509) = TMP_JAMP(1457) + AMP(1173) ! used 16 times + TMP_JAMP(1508) = TMP_JAMP(1456) + AMP(1176) ! used 16 times + TMP_JAMP(1507) = TMP_JAMP(1455) - AMP(1375) ! used 16 times + TMP_JAMP(1506) = TMP_JAMP(1454) - AMP(1171) ! used 16 times + TMP_JAMP(1505) = TMP_JAMP(1453) + AMP(1167) ! used 16 times + TMP_JAMP(1504) = TMP_JAMP(1452) + AMP(1170) ! used 16 times + TMP_JAMP(1503) = TMP_JAMP(1451) + AMP(1379) ! used 16 times + TMP_JAMP(1502) = TMP_JAMP(1450) + AMP(1365) ! used 16 times + TMP_JAMP(1501) = TMP_JAMP(1449) - AMP(1373) ! used 16 times + TMP_JAMP(1500) = TMP_JAMP(1446) - AMP(1391) ! used 16 times + TMP_JAMP(1499) = TMP_JAMP(1445) - TMP_JAMP(1444) ! used 16 times + TMP_JAMP(1498) = TMP_JAMP(1443) - AMP(1180) ! used 16 times + TMP_JAMP(1497) = TMP_JAMP(1441) + AMP(1394) ! used 16 times + TMP_JAMP(1496) = TMP_JAMP(1440) + AMP(1353) ! used 16 times + TMP_JAMP(1495) = TMP_JAMP(1439) + AMP(1095) ! used 16 times + TMP_JAMP(1494) = TMP_JAMP(1438) - AMP(1351) ! used 16 times + TMP_JAMP(1493) = TMP_JAMP(1437) - AMP(1093) ! used 16 times + TMP_JAMP(1492) = TMP_JAMP(1436) + AMP(1092) ! used 16 times + TMP_JAMP(1491) = TMP_JAMP(1435) + AMP(1356) ! used 16 times + TMP_JAMP(1490) = TMP_JAMP(1434) + AMP(1341) ! used 16 times + TMP_JAMP(1489) = TMP_JAMP(1433) - AMP(1349) ! used 16 times + TMP_JAMP(1488) = TMP_JAMP(1432) - TMP_JAMP(1430) ! used 16 times + TMP_JAMP(1487) = TMP_JAMP(1431) + AMP(1317) ! used 16 times + TMP_JAMP(1486) = TMP_JAMP(1429) + AMP(1251) ! used 16 times + TMP_JAMP(1485) = TMP_JAMP(1428) - AMP(1249) ! used 16 times + TMP_JAMP(1484) = TMP_JAMP(1427) + AMP(1254) ! used 16 times + TMP_JAMP(1483) = TMP_JAMP(1426) - AMP(1252) ! used 16 times + TMP_JAMP(1482) = TMP_JAMP(1425) - AMP(1258) ! used 16 times + TMP_JAMP(1481) = TMP_JAMP(1424) + AMP(1248) ! used 16 times + TMP_JAMP(1480) = TMP_JAMP(1423) + AMP(1245) ! used 16 times + TMP_JAMP(1530) = TMP_JAMP(1524) - TMP_JAMP(1478) ! used 16 times + TMP_JAMP(1529) = TMP_JAMP(1522) - TMP_JAMP(1475) ! used 16 times + TMP_JAMP(1528) = TMP_JAMP(1515) - TMP_JAMP(1465) ! used 16 times + TMP_JAMP(1527) = TMP_JAMP(1514) - TMP_JAMP(1463) ! used 16 times + TMP_JAMP(1526) = TMP_JAMP(1500) - TMP_JAMP(1447) ! used 16 times + TMP_JAMP(1525) = TMP_JAMP(1497) - TMP_JAMP(1442) ! used 16 times + TMP_JAMP(1708) = TMP_JAMP(1530) + TMP_JAMP(1520) ! used 8 times + TMP_JAMP(1707) = TMP_JAMP(1530) + TMP_JAMP(1529) ! used 8 times + TMP_JAMP(1706) = TMP_JAMP(1530) - TMP_JAMP(1526) ! used 8 times + TMP_JAMP(1705) = TMP_JAMP(1530) - TMP_JAMP(1511) ! used 8 times + TMP_JAMP(1704) = TMP_JAMP(1529) + TMP_JAMP(1521) ! used 8 times + TMP_JAMP(1703) = TMP_JAMP(1529) + TMP_JAMP(1526) ! used 8 times + TMP_JAMP(1702) = TMP_JAMP(1529) - TMP_JAMP(1488) ! used 8 times + TMP_JAMP(1701) = TMP_JAMP(1528) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1072) ! used 8 times + TMP_JAMP(1700) = TMP_JAMP(1528) - TMP_JAMP(1479) ! used 8 times + TMP_JAMP(1699) = TMP_JAMP(1528) - TMP_JAMP(1503) ! used 8 times + TMP_JAMP(1698) = TMP_JAMP(1528) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1012) ! used 8 times + TMP_JAMP(1697) = TMP_JAMP(1527) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1074) ! used 8 times + TMP_JAMP(1696) = TMP_JAMP(1527) - TMP_JAMP(1476) ! used 8 times + TMP_JAMP(1695) = TMP_JAMP(1527) + TMP_JAMP(1512) ! used 8 times + TMP_JAMP(1694) = TMP_JAMP(1527) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1014) ! used 8 times + TMP_JAMP(1693) = TMP_JAMP(1526) - TMP_JAMP(1448) ! used 8 times + TMP_JAMP(1692) = TMP_JAMP(1526) + TMP_JAMP(1496) ! used 8 times + TMP_JAMP(1691) = TMP_JAMP(1525) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1073) ! used 8 times + TMP_JAMP(1690) = TMP_JAMP(1525) + TMP_JAMP(1491) ! used 8 times + TMP_JAMP(1689) = TMP_JAMP(1525) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1013) ! used 8 times + TMP_JAMP(1688) = TMP_JAMP(1523) + TMP_JAMP(1519) ! used 8 times + TMP_JAMP(1687) = TMP_JAMP(1523) + TMP_JAMP(1521) ! used 8 times + TMP_JAMP(1686) = TMP_JAMP(1523) - TMP_JAMP(1510) ! used 8 times + TMP_JAMP(1685) = TMP_JAMP(1523) - TMP_JAMP(1513) ! used 8 times + TMP_JAMP(1684) = TMP_JAMP(1521) + TMP_JAMP(1518) ! used 8 times + TMP_JAMP(1683) = TMP_JAMP(1521) - TMP_JAMP(1508) ! used 8 times + TMP_JAMP(1682) = TMP_JAMP(1521) + TMP_JAMP(1488) ! used 8 times + TMP_JAMP(1681) = TMP_JAMP(1520) + TMP_JAMP(1518) ! used 8 times + TMP_JAMP(1680) = TMP_JAMP(1520) + TMP_JAMP(1511) ! used 8 times + TMP_JAMP(1679) = TMP_JAMP(1520) - TMP_JAMP(1495) ! used 8 times + TMP_JAMP(1678) = TMP_JAMP(1519) + TMP_JAMP(1518) ! used 8 times + TMP_JAMP(1677) = TMP_JAMP(1519) + TMP_JAMP(1510) ! used 8 times + TMP_JAMP(1676) = TMP_JAMP(1518) + TMP_JAMP(1508) ! used 8 times + TMP_JAMP(1675) = TMP_JAMP(1518) + TMP_JAMP(1495) ! used 8 times + TMP_JAMP(1674) = TMP_JAMP(1517) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1147) ! used 8 times + TMP_JAMP(1673) = TMP_JAMP(1517) + TMP_JAMP(1516) ! used 8 times + TMP_JAMP(1672) = TMP_JAMP(1517) - TMP_JAMP(1492) ! used 8 times + TMP_JAMP(1671) = TMP_JAMP(1517) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1148) ! used 8 times + TMP_JAMP(1670) = TMP_JAMP(1516) + TMP_JAMP(1492) ! used 8 times + TMP_JAMP(1669) = TMP_JAMP(1513) - AMP(1326) ! used 8 times + TMP_JAMP(1668) = TMP_JAMP(1513) + TMP_JAMP(1512) ! used 8 times + TMP_JAMP(1667) = TMP_JAMP(1511) + AMP(1376) ! used 8 times + TMP_JAMP(1666) = TMP_JAMP(1511) - TMP_JAMP(1486) ! used 8 times + TMP_JAMP(1665) = TMP_JAMP(1511) + TMP_JAMP(1507) ! used 8 times + TMP_JAMP(1664) = TMP_JAMP(1510) - TMP_JAMP(1505) ! used 8 times + TMP_JAMP(1663) = TMP_JAMP(1509) + TMP_JAMP(1508) ! used 8 times + TMP_JAMP(1662) = TMP_JAMP(1509) + TMP_JAMP(1486) ! used 8 times + TMP_JAMP(1661) = TMP_JAMP(1509) - TMP_JAMP(1499) ! used 8 times + TMP_JAMP(1660) = TMP_JAMP(1509) + TMP_JAMP(1506) ! used 8 times + TMP_JAMP(1659) = TMP_JAMP(1508) + TMP_JAMP(1499) ! used 8 times + TMP_JAMP(1658) = TMP_JAMP(1507) + TMP_JAMP(1506) ! used 8 times + TMP_JAMP(1657) = TMP_JAMP(1507) - TMP_JAMP(1485) ! used 8 times + TMP_JAMP(1656) = TMP_JAMP(1507) - TMP_JAMP(1501) ! used 8 times + TMP_JAMP(1655) = TMP_JAMP(1506) - AMP(1172) ! used 8 times + TMP_JAMP(1654) = TMP_JAMP(1506) - TMP_JAMP(1504) ! used 8 times + TMP_JAMP(1653) = TMP_JAMP(1506) + TMP_JAMP(1485) ! used 8 times + TMP_JAMP(1652) = TMP_JAMP(1505) + TMP_JAMP(1504) ! used 8 times + TMP_JAMP(1651) = TMP_JAMP(1503) + TMP_JAMP(1501) ! used 8 times + TMP_JAMP(1650) = TMP_JAMP(1502) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1150) ! used 8 times + TMP_JAMP(1649) = TMP_JAMP(1502) - TMP_JAMP(1501) ! used 8 times + TMP_JAMP(1648) = TMP_JAMP(1502) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1149) ! used 8 times + TMP_JAMP(1647) = TMP_JAMP(1501) - AMP(1374) ! used 8 times + TMP_JAMP(1646) = TMP_JAMP(1499) + TMP_JAMP(1496) ! used 8 times + TMP_JAMP(1645) = TMP_JAMP(1498) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1225) ! used 8 times + TMP_JAMP(1644) = TMP_JAMP(1498) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1226) ! used 8 times + TMP_JAMP(1643) = TMP_JAMP(1496) + TMP_JAMP(1495) ! used 8 times + TMP_JAMP(1642) = TMP_JAMP(1496) + TMP_JAMP(1494) ! used 8 times + TMP_JAMP(1641) = TMP_JAMP(1496) - TMP_JAMP(1484) ! used 8 times + TMP_JAMP(1640) = TMP_JAMP(1495) + AMP(1094) ! used 8 times + TMP_JAMP(1639) = TMP_JAMP(1495) + TMP_JAMP(1484) ! used 8 times + TMP_JAMP(1638) = TMP_JAMP(1495) + TMP_JAMP(1493) ! used 8 times + TMP_JAMP(1637) = TMP_JAMP(1494) - AMP(1350) ! used 8 times + TMP_JAMP(1636) = TMP_JAMP(1494) + TMP_JAMP(1493) ! used 8 times + TMP_JAMP(1635) = TMP_JAMP(1494) - TMP_JAMP(1483) ! used 8 times + TMP_JAMP(1634) = TMP_JAMP(1494) - TMP_JAMP(1489) ! used 8 times + TMP_JAMP(1633) = TMP_JAMP(1493) - AMP(1094) ! used 8 times + TMP_JAMP(1632) = TMP_JAMP(1493) - TMP_JAMP(1492) ! used 8 times + TMP_JAMP(1631) = TMP_JAMP(1493) + TMP_JAMP(1483) ! used 8 times + TMP_JAMP(1630) = TMP_JAMP(1491) + TMP_JAMP(1489) ! used 8 times + TMP_JAMP(1629) = TMP_JAMP(1490) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1228) ! used 8 times + TMP_JAMP(1628) = TMP_JAMP(1490) - TMP_JAMP(1489) ! used 8 times + TMP_JAMP(1627) = TMP_JAMP(1490) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1227) ! used 8 times + TMP_JAMP(1626) = TMP_JAMP(1489) - AMP(1350) ! used 8 times + TMP_JAMP(1625) = TMP_JAMP(1488) - TMP_JAMP(1486) ! used 8 times + TMP_JAMP(1624) = TMP_JAMP(1488) + TMP_JAMP(1484) ! used 8 times + TMP_JAMP(1623) = TMP_JAMP(1487) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1305) ! used 8 times + TMP_JAMP(1622) = TMP_JAMP(1487) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1306) ! used 8 times + TMP_JAMP(1621) = TMP_JAMP(1486) + AMP(1250) ! used 8 times + TMP_JAMP(1620) = TMP_JAMP(1486) + TMP_JAMP(1485) ! used 8 times + TMP_JAMP(1619) = TMP_JAMP(1485) - AMP(1250) ! used 8 times + TMP_JAMP(1618) = TMP_JAMP(1485) - TMP_JAMP(1481) ! used 8 times + TMP_JAMP(1617) = TMP_JAMP(1484) + TMP_JAMP(1483) ! used 8 times + TMP_JAMP(1616) = TMP_JAMP(1483) - TMP_JAMP(1480) ! used 8 times + TMP_JAMP(1615) = TMP_JAMP(1482) - TMP_JAMP(1481) ! used 8 times + TMP_JAMP(1614) = TMP_JAMP(1482) + TMP_JAMP(1480) ! used 8 times + TMP_JAMP(1613) = TMP_JAMP(1482) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1303) ! used 8 times + TMP_JAMP(1612) = TMP_JAMP(1481) + TMP_JAMP(1480) ! used 8 times + TMP_JAMP(1611) = TMP_JAMP(1479) + TMP_JAMP(1476) ! used 8 times + TMP_JAMP(1610) = TMP_JAMP(1479) - TMP_JAMP(1448) ! used 8 times + TMP_JAMP(1609) = TMP_JAMP(1479) - AMP(1374) ! used 8 times + TMP_JAMP(1608) = TMP_JAMP(1476) + AMP(1326) ! used 8 times + TMP_JAMP(1607) = TMP_JAMP(1476) + TMP_JAMP(1448) ! used 8 times + TMP_JAMP(1606) = TMP_JAMP(1448) + AMP(1350) ! used 8 times + TMP_JAMP(1605) = TMP_JAMP(1422) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(948) ! used 8 times + TMP_JAMP(1604) = TMP_JAMP(1422) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(951) ! used 8 times + TMP_JAMP(1603) = TMP_JAMP(1421) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(946) ! used 8 times + TMP_JAMP(1602) = TMP_JAMP(1421) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(949) ! used 8 times + TMP_JAMP(1601) = TMP_JAMP(1420) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(947) ! used 8 times + TMP_JAMP(1600) = TMP_JAMP(1420) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(950) ! used 8 times + TMP_JAMP(1599) = AMP(1122) + AMP(1123) ! used 8 times + TMP_JAMP(1598) = AMP(1117) + AMP(1125) ! used 8 times + TMP_JAMP(1597) = AMP(1068) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1380) ! used 8 times + TMP_JAMP(1596) = AMP(1064) - AMP(1066) ! used 8 times + TMP_JAMP(1595) = AMP(1046) - AMP(1048) ! used 8 times + TMP_JAMP(1594) = AMP(1044) + AMP(1050) ! used 8 times + TMP_JAMP(1593) = AMP(1200) + AMP(1201) ! used 8 times + TMP_JAMP(1592) = AMP(1172) + AMP(1376) ! used 8 times + TMP_JAMP(1591) = AMP(1166) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1202) ! used 8 times + TMP_JAMP(1590) = AMP(1166) + AMP(1172) ! used 8 times + TMP_JAMP(1589) = AMP(1212) + AMP(1213) ! used 8 times + TMP_JAMP(1588) = AMP(1207) + AMP(1215) ! used 8 times + TMP_JAMP(1587) = AMP(1195) + AMP(1203) ! used 8 times + TMP_JAMP(1586) = AMP(1088) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1124) ! used 8 times + TMP_JAMP(1585) = AMP(1062) + AMP(1067) ! used 8 times + TMP_JAMP(1584) = AMP(1058) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1355) ! used 8 times + TMP_JAMP(1583) = AMP(1055) - AMP(1057) ! used 8 times + TMP_JAMP(1582) = AMP(1094) + AMP(1352) ! used 8 times + TMP_JAMP(1581) = AMP(1088) + AMP(1094) ! used 8 times + TMP_JAMP(1580) = AMP(1134) + AMP(1135) ! used 8 times + TMP_JAMP(1579) = AMP(1129) + AMP(1137) ! used 8 times + TMP_JAMP(1578) = AMP(1053) + AMP(1059) ! used 8 times + TMP_JAMP(1577) = AMP(1172) + AMP(1250) ! used 8 times + TMP_JAMP(1576) = AMP(1290) + AMP(1291) ! used 8 times + TMP_JAMP(1575) = AMP(1247) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1292) ! used 8 times + TMP_JAMP(1574) = AMP(1094) + AMP(1253) ! used 8 times + TMP_JAMP(1573) = AMP(1299) + AMP(1300) ! used 8 times + TMP_JAMP(1572) = AMP(1244) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1301) ! used 8 times + TMP_JAMP(1571) = AMP(1285) + AMP(1293) ! used 8 times + TMP_JAMP(1570) = AMP(1294) + AMP(1302) ! used 8 times + TMP_JAMP(1569) = AMP(1049) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1331) ! used 8 times + TMP_JAMP(1568) = AMP(1169) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1214) ! used 8 times + TMP_JAMP(1567) = AMP(1091) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1136) ! used 8 times + TMP_JAMP(1566) = AMP(1110) + AMP(1111) ! used 8 times + TMP_JAMP(1565) = AMP(1105) + AMP(1113) ! used 8 times + TMP_JAMP(1564) = AMP(1278) + AMP(1279) ! used 8 times + TMP_JAMP(1563) = AMP(1250) - AMP(1376) ! used 8 times + TMP_JAMP(1562) = AMP(1244) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1280) ! used 8 times + TMP_JAMP(1561) = AMP(1244) + AMP(1250) ! used 8 times + TMP_JAMP(1560) = AMP(1273) + AMP(1281) ! used 8 times + TMP_JAMP(1559) = AMP(1091) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1112) ! used 8 times + TMP_JAMP(1558) = AMP(1143) + AMP(1144) ! used 8 times + TMP_JAMP(1557) = AMP(1138) + AMP(1146) ! used 8 times + TMP_JAMP(1556) = AMP(1221) + AMP(1222) ! used 8 times + TMP_JAMP(1555) = AMP(1166) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1223) ! used 8 times + TMP_JAMP(1554) = AMP(1216) + AMP(1224) ! used 8 times + TMP_JAMP(1553) = AMP(1352) + AMP(1355) ! used 8 times + TMP_JAMP(1552) = AMP(1247) + AMP(1253) ! used 8 times + TMP_JAMP(1551) = AMP(1253) - AMP(1352) ! used 8 times + TMP_JAMP(1550) = AMP(1088) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1145) ! used 8 times + TMP_JAMP(1549) = AMP(1413) + AMP(1414) ! used 8 times + TMP_JAMP(1548) = AMP(1374) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1416) ! used 8 times + TMP_JAMP(1547) = AMP(1188) + AMP(1189) ! used 8 times + TMP_JAMP(1546) = AMP(1183) + AMP(1191) ! used 8 times + TMP_JAMP(1545) = AMP(1411) - AMP(1415) ! used 8 times + TMP_JAMP(1544) = AMP(1266) + AMP(1267) ! used 8 times + TMP_JAMP(1543) = AMP(1247) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1268) ! used 8 times + TMP_JAMP(1542) = AMP(1261) + AMP(1269) ! used 8 times + TMP_JAMP(1541) = AMP(1169) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1190) ! used 8 times + TMP_JAMP(1540) = AMP(1376) + AMP(1380) ! used 8 times + TMP_JAMP(1539) = AMP(1008) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1380) ! used 8 times + TMP_JAMP(1538) = AMP(1004) - AMP(1006) ! used 8 times + TMP_JAMP(1537) = AMP(986) - AMP(988) ! used 8 times + TMP_JAMP(1536) = AMP(984) + AMP(990) ! used 8 times + TMP_JAMP(1535) = AMP(1002) + AMP(1007) ! used 8 times + TMP_JAMP(1534) = AMP(998) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1355) ! used 8 times + TMP_JAMP(1533) = AMP(995) - AMP(997) ! used 8 times + TMP_JAMP(1532) = AMP(993) + AMP(999) ! used 8 times + TMP_JAMP(1531) = AMP(989) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1331) ! used 8 times + TMP_JAMP(1711) = TMP_JAMP(1698) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1538) ! used 8 times + TMP_JAMP(1710) = TMP_JAMP(1694) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1537) ! used 8 times + TMP_JAMP(1709) = TMP_JAMP(1689) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1533) ! used 8 times + TMP_JAMP(1712) = TMP_JAMP(1691) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1583) ! used 6 times + TMP_JAMP(1714) = TMP_JAMP(1697) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1595) ! used 5 times + TMP_JAMP(1713) = TMP_JAMP(1557) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1516) ! used 5 times + TMP_JAMP(1843) = TMP_JAMP(1711) + TMP_JAMP(1710) ! used 4 times + TMP_JAMP(1842) = TMP_JAMP(1711) - TMP_JAMP(1709) ! used 4 times + TMP_JAMP(1841) = TMP_JAMP(1710) + TMP_JAMP(1709) ! used 4 times + TMP_JAMP(1840) = TMP_JAMP(1710) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1531) ! used 4 times + TMP_JAMP(1839) = TMP_JAMP(1709) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1534) ! used 4 times + TMP_JAMP(1838) = TMP_JAMP(1703) - TMP_JAMP(1659) ! used 4 times + TMP_JAMP(1837) = TMP_JAMP(1701) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1596) ! used 4 times + TMP_JAMP(1836) = TMP_JAMP(1685) + TMP_JAMP(1521) ! used 4 times + TMP_JAMP(1835) = TMP_JAMP(1679) - TMP_JAMP(1632) ! used 4 times + TMP_JAMP(1834) = TMP_JAMP(1673) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1598) ! used 4 times + TMP_JAMP(1833) = TMP_JAMP(1671) - AMP(1139) ! used 4 times + TMP_JAMP(1832) = TMP_JAMP(1668) - AMP(1041) ! used 4 times + TMP_JAMP(1831) = TMP_JAMP(1664) + TMP_JAMP(1659) ! used 4 times + TMP_JAMP(1830) = TMP_JAMP(1661) + TMP_JAMP(1654) ! used 4 times + TMP_JAMP(1829) = TMP_JAMP(1651) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1585) ! used 4 times + TMP_JAMP(1828) = TMP_JAMP(1648) + AMP(1151) ! used 4 times + TMP_JAMP(1827) = TMP_JAMP(1648) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1535) ! used 4 times + TMP_JAMP(1826) = TMP_JAMP(1645) + AMP(1184) ! used 4 times + TMP_JAMP(1825) = TMP_JAMP(1642) - TMP_JAMP(1606) ! used 4 times + TMP_JAMP(1824) = TMP_JAMP(1635) + TMP_JAMP(1551) ! used 4 times + TMP_JAMP(1823) = TMP_JAMP(1630) - TMP_JAMP(1606) ! used 4 times + TMP_JAMP(1822) = TMP_JAMP(1629) + AMP(1230) ! used 4 times + TMP_JAMP(1821) = TMP_JAMP(1627) + AMP(1229) ! used 4 times + TMP_JAMP(1820) = TMP_JAMP(1627) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1532) ! used 4 times + TMP_JAMP(1819) = TMP_JAMP(1623) + AMP(1307) ! used 4 times + TMP_JAMP(1818) = TMP_JAMP(1623) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1536) ! used 4 times + TMP_JAMP(1817) = TMP_JAMP(1620) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1576) ! used 4 times + TMP_JAMP(1816) = TMP_JAMP(1617) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1573) ! used 4 times + TMP_JAMP(1815) = TMP_JAMP(1615) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1571) ! used 4 times + TMP_JAMP(1814) = TMP_JAMP(1614) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1570) ! used 4 times + TMP_JAMP(1813) = TMP_JAMP(1613) + AMP(1308) ! used 4 times + TMP_JAMP(1812) = TMP_JAMP(1612) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1571) ! used 4 times + TMP_JAMP(1811) = TMP_JAMP(1612) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1570) ! used 4 times + TMP_JAMP(1810) = TMP_JAMP(1607) + AMP(1326) ! used 4 times + TMP_JAMP(1809) = TMP_JAMP(1605) - TMP_JAMP(1603) ! used 4 times + TMP_JAMP(1808) = TMP_JAMP(1605) + TMP_JAMP(1601) ! used 4 times + TMP_JAMP(1807) = TMP_JAMP(1604) - TMP_JAMP(1559) ! used 4 times + TMP_JAMP(1806) = TMP_JAMP(1604) - AMP(1141) ! used 4 times + TMP_JAMP(1805) = TMP_JAMP(1604) + TMP_JAMP(1600) ! used 4 times + TMP_JAMP(1804) = TMP_JAMP(1603) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1569) ! used 4 times + TMP_JAMP(1803) = TMP_JAMP(1603) + TMP_JAMP(1601) ! used 4 times + TMP_JAMP(1802) = TMP_JAMP(1602) - AMP(945) ! used 4 times + TMP_JAMP(1801) = TMP_JAMP(1602) + TMP_JAMP(1600) ! used 4 times + TMP_JAMP(1800) = TMP_JAMP(1602) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1531) ! used 4 times + TMP_JAMP(1799) = TMP_JAMP(1600) - AMP(1219) ! used 4 times + TMP_JAMP(1798) = TMP_JAMP(1600) - TMP_JAMP(1541) ! used 4 times + TMP_JAMP(1797) = TMP_JAMP(1599) - TMP_JAMP(1598) ! used 4 times + TMP_JAMP(1796) = TMP_JAMP(1599) + TMP_JAMP(1593) ! used 4 times + TMP_JAMP(1795) = TMP_JAMP(1599) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1120) ! used 4 times + TMP_JAMP(1794) = TMP_JAMP(1598) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1118) ! used 4 times + TMP_JAMP(1793) = TMP_JAMP(1598) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1126) ! used 4 times + TMP_JAMP(1792) = TMP_JAMP(1596) - AMP(1072) ! used 4 times + TMP_JAMP(1791) = TMP_JAMP(1596) - TMP_JAMP(1585) ! used 4 times + TMP_JAMP(1790) = TMP_JAMP(1594) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1041) ! used 4 times + TMP_JAMP(1789) = TMP_JAMP(1594) + TMP_JAMP(1569) ! used 4 times + TMP_JAMP(1788) = TMP_JAMP(1594) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1045) ! used 4 times + TMP_JAMP(1787) = TMP_JAMP(1593) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1198) ! used 4 times + TMP_JAMP(1786) = TMP_JAMP(1589) - TMP_JAMP(1588) ! used 4 times + TMP_JAMP(1785) = TMP_JAMP(1589) + TMP_JAMP(1576) ! used 4 times + TMP_JAMP(1784) = TMP_JAMP(1589) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1568) ! used 4 times + TMP_JAMP(1783) = TMP_JAMP(1588) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1568) ! used 4 times + TMP_JAMP(1782) = TMP_JAMP(1588) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1208) ! used 4 times + TMP_JAMP(1781) = TMP_JAMP(1587) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1204) ! used 4 times + TMP_JAMP(1780) = TMP_JAMP(1587) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1196) ! used 4 times + TMP_JAMP(1779) = TMP_JAMP(1585) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1063) ! used 4 times + TMP_JAMP(1778) = TMP_JAMP(1580) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1132) ! used 4 times + TMP_JAMP(1777) = TMP_JAMP(1580) + TMP_JAMP(1573) ! used 4 times + TMP_JAMP(1776) = TMP_JAMP(1580) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1567) ! used 4 times + TMP_JAMP(1775) = TMP_JAMP(1579) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1126) ! used 4 times + TMP_JAMP(1774) = TMP_JAMP(1579) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1567) ! used 4 times + TMP_JAMP(1773) = TMP_JAMP(1579) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1130) ! used 4 times + TMP_JAMP(1772) = TMP_JAMP(1578) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1054) ! used 4 times + TMP_JAMP(1771) = TMP_JAMP(1578) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1032) ! used 4 times + TMP_JAMP(1770) = TMP_JAMP(1576) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1288) ! used 4 times + TMP_JAMP(1769) = TMP_JAMP(1576) - TMP_JAMP(1571) ! used 4 times + TMP_JAMP(1768) = TMP_JAMP(1573) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1297) ! used 4 times + TMP_JAMP(1767) = TMP_JAMP(1573) - TMP_JAMP(1570) ! used 4 times + TMP_JAMP(1766) = TMP_JAMP(1566) + TMP_JAMP(1564) ! used 4 times + TMP_JAMP(1765) = TMP_JAMP(1566) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1559) ! used 4 times + TMP_JAMP(1764) = TMP_JAMP(1566) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1108) ! used 4 times + TMP_JAMP(1763) = TMP_JAMP(1565) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1106) ! used 4 times + TMP_JAMP(1762) = TMP_JAMP(1565) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1559) ! used 4 times + TMP_JAMP(1761) = TMP_JAMP(1565) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1114) ! used 4 times + TMP_JAMP(1760) = TMP_JAMP(1564) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1276) ! used 4 times + TMP_JAMP(1759) = TMP_JAMP(1561) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1280) ! used 4 times + TMP_JAMP(1758) = TMP_JAMP(1561) + AMP(1172) ! used 4 times + TMP_JAMP(1757) = TMP_JAMP(1561) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1301) ! used 4 times + TMP_JAMP(1756) = TMP_JAMP(1560) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1282) ! used 4 times + TMP_JAMP(1755) = TMP_JAMP(1560) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1274) ! used 4 times + TMP_JAMP(1754) = TMP_JAMP(1558) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1141) ! used 4 times + TMP_JAMP(1753) = TMP_JAMP(1558) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1550) ! used 4 times + TMP_JAMP(1752) = TMP_JAMP(1556) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1555) ! used 4 times + TMP_JAMP(1751) = TMP_JAMP(1556) - TMP_JAMP(1554) ! used 4 times + TMP_JAMP(1750) = TMP_JAMP(1555) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1554) ! used 4 times + TMP_JAMP(1749) = TMP_JAMP(1554) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1217) ! used 4 times + TMP_JAMP(1748) = TMP_JAMP(1549) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1548) ! used 4 times + TMP_JAMP(1747) = TMP_JAMP(1549) - TMP_JAMP(1544) ! used 4 times + TMP_JAMP(1746) = TMP_JAMP(1549) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1408) ! used 4 times + TMP_JAMP(1745) = TMP_JAMP(1547) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1541) ! used 4 times + TMP_JAMP(1744) = TMP_JAMP(1547) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1186) ! used 4 times + TMP_JAMP(1743) = TMP_JAMP(1546) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1184) ! used 4 times + TMP_JAMP(1742) = TMP_JAMP(1546) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1541) ! used 4 times + TMP_JAMP(1741) = TMP_JAMP(1546) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1192) ! used 4 times + TMP_JAMP(1740) = TMP_JAMP(1545) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1023) ! used 4 times + TMP_JAMP(1739) = TMP_JAMP(1545) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1410) ! used 4 times + TMP_JAMP(1738) = TMP_JAMP(1544) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1543) ! used 4 times + TMP_JAMP(1737) = TMP_JAMP(1544) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1264) ! used 4 times + TMP_JAMP(1736) = TMP_JAMP(1542) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1270) ! used 4 times + TMP_JAMP(1735) = TMP_JAMP(1542) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1262) ! used 4 times + TMP_JAMP(1734) = TMP_JAMP(1540) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1068) ! used 4 times + TMP_JAMP(1733) = TMP_JAMP(1539) + TMP_JAMP(1535) ! used 4 times + TMP_JAMP(1732) = TMP_JAMP(1536) + TMP_JAMP(1531) ! used 4 times + TMP_JAMP(1731) = TMP_JAMP(1536) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(981) ! used 4 times + TMP_JAMP(1730) = TMP_JAMP(1535) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(963) ! used 4 times + TMP_JAMP(1729) = TMP_JAMP(1534) + TMP_JAMP(1532) ! used 4 times + TMP_JAMP(1728) = TMP_JAMP(1532) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(972) ! used 4 times + TMP_JAMP(1727) = TMP_JAMP(1512) + AMP(985) ! used 4 times + TMP_JAMP(1726) = TMP_JAMP(1503) + AMP(1003) ! used 4 times + TMP_JAMP(1725) = TMP_JAMP(1491) + AMP(994) ! used 4 times + TMP_JAMP(1724) = AMP(1118) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1147) ! used 4 times + TMP_JAMP(1723) = AMP(1061) + AMP(1069) ! used 4 times + TMP_JAMP(1722) = AMP(1043) - AMP(1071) ! used 4 times + TMP_JAMP(1721) = AMP(1147) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1152) ! used 4 times + TMP_JAMP(1720) = AMP(1052) - AMP(1070) ! used 4 times + TMP_JAMP(1719) = AMP(1286) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1304) ! used 4 times + TMP_JAMP(1718) = AMP(1295) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1304) ! used 4 times + TMP_JAMP(1717) = AMP(1001) + AMP(1009) ! used 4 times + TMP_JAMP(1716) = AMP(983) - AMP(1011) ! used 4 times + TMP_JAMP(1715) = AMP(992) - AMP(1010) ! used 4 times + TMP_JAMP(1857) = TMP_JAMP(1833) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1713) ! used 4 times + TMP_JAMP(1856) = TMP_JAMP(1813) + TMP_JAMP(1622) ! used 4 times + TMP_JAMP(1855) = TMP_JAMP(1788) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1622) ! used 4 times + TMP_JAMP(1854) = TMP_JAMP(1782) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1644) ! used 4 times + TMP_JAMP(1853) = TMP_JAMP(1781) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1652) ! used 4 times + TMP_JAMP(1852) = TMP_JAMP(1780) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1645) ! used 4 times + TMP_JAMP(1851) = TMP_JAMP(1779) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1650) ! used 4 times + TMP_JAMP(1850) = TMP_JAMP(1773) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1671) ! used 4 times + TMP_JAMP(1849) = TMP_JAMP(1749) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1644) ! used 4 times + TMP_JAMP(1848) = TMP_JAMP(1741) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1652) ! used 4 times + TMP_JAMP(1847) = TMP_JAMP(1731) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1668) ! used 4 times + TMP_JAMP(1846) = TMP_JAMP(1730) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1651) ! used 4 times + TMP_JAMP(1845) = TMP_JAMP(1721) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1650) ! used 4 times + TMP_JAMP(1844) = TMP_JAMP(1720) - TMP_JAMP(1712) ! used 4 times + TMP_JAMP(1862) = TMP_JAMP(1722) - TMP_JAMP(1714) ! used 3 times + TMP_JAMP(1861) = TMP_JAMP(1670) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1557) ! used 3 times + TMP_JAMP(1860) = TMP_JAMP(1631) - TMP_JAMP(1574) ! used 3 times + TMP_JAMP(1859) = TMP_JAMP(1608) + TMP_JAMP(1479) ! used 3 times + TMP_JAMP(1858) = TMP_JAMP(1595) + AMP(1074) ! used 3 times + TMP_JAMP(1863) = TMP_JAMP(1861) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1761) ! used 3 times + TMP_JAMP(2077) = TMP_JAMP(1863) - TMP_JAMP(1807) ! used 2 times + TMP_JAMP(2076) = TMP_JAMP(1862) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1790) ! used 2 times + TMP_JAMP(2075) = TMP_JAMP(1857) + TMP_JAMP(1828) ! used 2 times + TMP_JAMP(2074) = TMP_JAMP(1857) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1558) ! used 2 times + TMP_JAMP(2073) = TMP_JAMP(1856) - TMP_JAMP(1669) ! used 2 times + TMP_JAMP(2072) = TMP_JAMP(1855) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1695) ! used 2 times + TMP_JAMP(2071) = TMP_JAMP(1854) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1568) ! used 2 times + TMP_JAMP(2070) = TMP_JAMP(1853) + TMP_JAMP(1786) ! used 2 times + TMP_JAMP(2069) = TMP_JAMP(1852) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1831) ! used 2 times + TMP_JAMP(2068) = TMP_JAMP(1852) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1591) ! used 2 times + TMP_JAMP(2067) = TMP_JAMP(1850) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1567) ! used 2 times + TMP_JAMP(2066) = TMP_JAMP(1849) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1555) ! used 2 times + TMP_JAMP(2065) = TMP_JAMP(1848) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1750) ! used 2 times + TMP_JAMP(2064) = TMP_JAMP(1846) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1711) ! used 2 times + TMP_JAMP(2063) = TMP_JAMP(1845) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1647) ! used 2 times + TMP_JAMP(2062) = TMP_JAMP(1844) - TMP_JAMP(1823) ! used 2 times + TMP_JAMP(2061) = TMP_JAMP(1840) - TMP_JAMP(1716) ! used 2 times + TMP_JAMP(2060) = TMP_JAMP(1839) + TMP_JAMP(1710) ! used 2 times + TMP_JAMP(2059) = TMP_JAMP(1839) - TMP_JAMP(1711) ! used 2 times + TMP_JAMP(2058) = TMP_JAMP(1838) + TMP_JAMP(1521) ! used 2 times + TMP_JAMP(2057) = TMP_JAMP(1837) + TMP_JAMP(1734) ! used 2 times + TMP_JAMP(2056) = TMP_JAMP(1836) + TMP_JAMP(1623) ! used 2 times + TMP_JAMP(2055) = TMP_JAMP(1836) + TMP_JAMP(1622) ! used 2 times + TMP_JAMP(2054) = TMP_JAMP(1835) - TMP_JAMP(1825) ! used 2 times + TMP_JAMP(2053) = TMP_JAMP(1835) + TMP_JAMP(1666) ! used 2 times + TMP_JAMP(2052) = TMP_JAMP(1831) + TMP_JAMP(1810) ! used 2 times + TMP_JAMP(2051) = TMP_JAMP(1831) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1556) ! used 2 times + TMP_JAMP(2050) = TMP_JAMP(1830) + TMP_JAMP(1665) ! used 2 times + TMP_JAMP(2049) = TMP_JAMP(1830) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1743) ! used 2 times + TMP_JAMP(2048) = TMP_JAMP(1830) - TMP_JAMP(1641) ! used 2 times + TMP_JAMP(2047) = TMP_JAMP(1828) - TMP_JAMP(1567) ! used 2 times + TMP_JAMP(2046) = TMP_JAMP(1828) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1733) ! used 2 times + TMP_JAMP(2045) = TMP_JAMP(1827) + TMP_JAMP(1726) ! used 2 times + TMP_JAMP(2044) = TMP_JAMP(1825) - TMP_JAMP(1476) ! used 2 times + TMP_JAMP(2043) = TMP_JAMP(1824) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1760) ! used 2 times + TMP_JAMP(2042) = TMP_JAMP(1824) - AMP(1350) ! used 2 times + TMP_JAMP(2041) = TMP_JAMP(1823) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1728) ! used 2 times + TMP_JAMP(2040) = TMP_JAMP(1822) + TMP_JAMP(1591) ! used 2 times + TMP_JAMP(2039) = TMP_JAMP(1822) - TMP_JAMP(1626) ! used 2 times + TMP_JAMP(2038) = TMP_JAMP(1822) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1742) ! used 2 times + TMP_JAMP(2037) = TMP_JAMP(1821) - TMP_JAMP(1626) ! used 2 times + TMP_JAMP(2036) = TMP_JAMP(1821) + TMP_JAMP(1555) ! used 2 times + TMP_JAMP(2035) = TMP_JAMP(1821) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1729) ! used 2 times + TMP_JAMP(2034) = TMP_JAMP(1820) + TMP_JAMP(1725) ! used 2 times + TMP_JAMP(2033) = TMP_JAMP(1819) - TMP_JAMP(1669) ! used 2 times + TMP_JAMP(2032) = TMP_JAMP(1819) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1732) ! used 2 times + TMP_JAMP(2031) = TMP_JAMP(1818) + TMP_JAMP(1727) ! used 2 times + TMP_JAMP(2030) = TMP_JAMP(1815) + TMP_JAMP(1719) ! used 2 times + TMP_JAMP(2029) = TMP_JAMP(1814) - TMP_JAMP(1718) ! used 2 times + TMP_JAMP(2028) = TMP_JAMP(1813) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1569) ! used 2 times + TMP_JAMP(2027) = TMP_JAMP(1812) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1760) ! used 2 times + TMP_JAMP(2026) = TMP_JAMP(1811) + TMP_JAMP(1757) ! used 2 times + TMP_JAMP(2025) = TMP_JAMP(1809) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1795) ! used 2 times + TMP_JAMP(2024) = TMP_JAMP(1808) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1795) ! used 2 times + TMP_JAMP(2023) = TMP_JAMP(1805) - TMP_JAMP(1676) ! used 2 times + TMP_JAMP(2022) = TMP_JAMP(1803) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1787) ! used 2 times + TMP_JAMP(2021) = TMP_JAMP(1800) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(980) ! used 2 times + TMP_JAMP(2020) = TMP_JAMP(1799) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1218) ! used 2 times + TMP_JAMP(2019) = TMP_JAMP(1798) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1751) ! used 2 times + TMP_JAMP(2018) = TMP_JAMP(1795) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1688) ! used 2 times + TMP_JAMP(2017) = TMP_JAMP(1794) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1673) ! used 2 times + TMP_JAMP(2016) = TMP_JAMP(1793) - TMP_JAMP(1774) ! used 2 times + TMP_JAMP(2015) = TMP_JAMP(1792) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1699) ! used 2 times + TMP_JAMP(2014) = TMP_JAMP(1789) - AMP(1040) ! used 2 times + TMP_JAMP(2013) = TMP_JAMP(1787) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1677) ! used 2 times + TMP_JAMP(2012) = TMP_JAMP(1787) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1686) ! used 2 times + TMP_JAMP(2011) = TMP_JAMP(1784) + TMP_JAMP(1769) ! used 2 times + TMP_JAMP(2010) = TMP_JAMP(1784) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1658) ! used 2 times + TMP_JAMP(2009) = TMP_JAMP(1783) - AMP(1205) ! used 2 times + TMP_JAMP(2008) = TMP_JAMP(1779) + TMP_JAMP(1597) ! used 2 times + TMP_JAMP(2007) = TMP_JAMP(1778) - TMP_JAMP(1775) ! used 2 times + TMP_JAMP(2006) = TMP_JAMP(1776) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1636) ! used 2 times + TMP_JAMP(2005) = TMP_JAMP(1776) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1518) ! used 2 times + TMP_JAMP(2004) = TMP_JAMP(1772) + TMP_JAMP(1584) ! used 2 times + TMP_JAMP(2003) = TMP_JAMP(1771) + AMP(1030) ! used 2 times + TMP_JAMP(2002) = TMP_JAMP(1769) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1719) ! used 2 times + TMP_JAMP(2001) = TMP_JAMP(1767) + TMP_JAMP(1736) ! used 2 times + TMP_JAMP(2000) = TMP_JAMP(1767) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1718) ! used 2 times + TMP_JAMP(1999) = TMP_JAMP(1765) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1638) ! used 2 times + TMP_JAMP(1998) = TMP_JAMP(1764) + TMP_JAMP(1760) ! used 2 times + TMP_JAMP(1997) = TMP_JAMP(1763) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1674) ! used 2 times + TMP_JAMP(1996) = TMP_JAMP(1762) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1672) ! used 2 times + TMP_JAMP(1995) = TMP_JAMP(1758) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1280) ! used 2 times + TMP_JAMP(1994) = TMP_JAMP(1757) + AMP(1172) ! used 2 times + TMP_JAMP(1993) = TMP_JAMP(1756) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1612) ! used 2 times + TMP_JAMP(1992) = TMP_JAMP(1756) + AMP(1283) ! used 2 times + TMP_JAMP(1991) = TMP_JAMP(1755) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1562) ! used 2 times + TMP_JAMP(1990) = TMP_JAMP(1754) - AMP(1116) ! used 2 times + TMP_JAMP(1989) = TMP_JAMP(1753) + AMP(1140) ! used 2 times + TMP_JAMP(1988) = TMP_JAMP(1752) + AMP(1218) ! used 2 times + TMP_JAMP(1987) = TMP_JAMP(1752) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1677) ! used 2 times + TMP_JAMP(1986) = TMP_JAMP(1746) + TMP_JAMP(1744) ! used 2 times + TMP_JAMP(1985) = TMP_JAMP(1746) - TMP_JAMP(1737) ! used 2 times + TMP_JAMP(1984) = TMP_JAMP(1745) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1660) ! used 2 times + TMP_JAMP(1983) = TMP_JAMP(1744) - AMP(1193) ! used 2 times + TMP_JAMP(1982) = TMP_JAMP(1740) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1723) ! used 2 times + TMP_JAMP(1981) = TMP_JAMP(1740) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1734) ! used 2 times + TMP_JAMP(1980) = TMP_JAMP(1739) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1548) ! used 2 times + TMP_JAMP(1979) = TMP_JAMP(1738) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1620) ! used 2 times + TMP_JAMP(1978) = TMP_JAMP(1737) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1653) ! used 2 times + TMP_JAMP(1977) = TMP_JAMP(1736) + AMP(1271) ! used 2 times + TMP_JAMP(1976) = TMP_JAMP(1735) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1625) ! used 2 times + TMP_JAMP(1975) = TMP_JAMP(1735) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1543) ! used 2 times + TMP_JAMP(1974) = TMP_JAMP(1734) - TMP_JAMP(1705) ! used 2 times + TMP_JAMP(1973) = TMP_JAMP(1728) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1630) ! used 2 times + TMP_JAMP(1972) = TMP_JAMP(1727) - TMP_JAMP(1529) ! used 2 times + TMP_JAMP(1971) = TMP_JAMP(1727) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(987) ! used 2 times + TMP_JAMP(1970) = TMP_JAMP(1726) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1005) ! used 2 times + TMP_JAMP(1969) = TMP_JAMP(1725) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(996) ! used 2 times + TMP_JAMP(1968) = TMP_JAMP(1723) - TMP_JAMP(1700) ! used 2 times + TMP_JAMP(1967) = TMP_JAMP(1717) - TMP_JAMP(1711) ! used 2 times + TMP_JAMP(1966) = TMP_JAMP(1717) + TMP_JAMP(1716) ! used 2 times + TMP_JAMP(1965) = TMP_JAMP(1716) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(979) ! used 2 times + TMP_JAMP(1964) = TMP_JAMP(1715) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(970) ! used 2 times + TMP_JAMP(1963) = TMP_JAMP(1715) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(991) ! used 2 times + TMP_JAMP(1962) = TMP_JAMP(1712) - TMP_JAMP(1692) ! used 2 times + TMP_JAMP(1961) = TMP_JAMP(1707) + TMP_JAMP(1687) ! used 2 times + TMP_JAMP(1960) = TMP_JAMP(1706) - TMP_JAMP(1479) ! used 2 times + TMP_JAMP(1959) = TMP_JAMP(1705) - TMP_JAMP(1526) ! used 2 times + TMP_JAMP(1958) = TMP_JAMP(1703) + TMP_JAMP(1687) ! used 2 times + TMP_JAMP(1957) = TMP_JAMP(1702) - TMP_JAMP(1476) ! used 2 times + TMP_JAMP(1956) = TMP_JAMP(1701) - TMP_JAMP(1651) ! used 2 times + TMP_JAMP(1955) = TMP_JAMP(1692) - TMP_JAMP(1624) ! used 2 times + TMP_JAMP(1954) = TMP_JAMP(1690) + TMP_JAMP(1629) ! used 2 times + TMP_JAMP(1953) = TMP_JAMP(1688) - AMP(1326) ! used 2 times + TMP_JAMP(1952) = TMP_JAMP(1686) - AMP(1326) ! used 2 times + TMP_JAMP(1951) = TMP_JAMP(1681) + TMP_JAMP(1530) ! used 2 times + TMP_JAMP(1950) = TMP_JAMP(1680) + TMP_JAMP(1656) ! used 2 times + TMP_JAMP(1949) = TMP_JAMP(1678) + TMP_JAMP(1520) ! used 2 times + TMP_JAMP(1948) = TMP_JAMP(1678) + TMP_JAMP(1643) ! used 2 times + TMP_JAMP(1947) = TMP_JAMP(1677) + TMP_JAMP(1663) ! used 2 times + TMP_JAMP(1946) = TMP_JAMP(1675) + TMP_JAMP(1624) ! used 2 times + TMP_JAMP(1945) = TMP_JAMP(1670) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(1127) ! used 2 times - TMP_JAMP(2719) = TMP_JAMP(2508) - TMP_JAMP(2506) ! used 2 times - TMP_JAMP(2718) = TMP_JAMP(2502) - AMP(1451) ! used 2 times - TMP_JAMP(2717) = TMP_JAMP(2499) + TMP_JAMP(2495) ! used 2 times - TMP_JAMP(2716) = TMP_JAMP(2487) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1719) ! used 2 times - TMP_JAMP(2715) = TMP_JAMP(2488) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1070) ! used 2 times - TMP_JAMP(2714) = TMP_JAMP(2490) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2489) ! used 2 times - TMP_JAMP(2713) = TMP_JAMP(2472) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2467) ! used 2 times - TMP_JAMP(2712) = TMP_JAMP(2475) - TMP_JAMP(2474) ! used 2 times - TMP_JAMP(2711) = TMP_JAMP(2413) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2412) ! used 2 times - TMP_JAMP(2710) = TMP_JAMP(2395) - AMP(582) ! used 2 times - TMP_JAMP(2709) = TMP_JAMP(2375) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(912) ! used 2 times - TMP_JAMP(2708) = TMP_JAMP(2373) - TMP_JAMP(2371) ! used 2 times - TMP_JAMP(2707) = TMP_JAMP(2353) - AMP(1511) ! used 2 times - TMP_JAMP(2706) = TMP_JAMP(2345) + AMP(1107) ! used 2 times - TMP_JAMP(2705) = TMP_JAMP(2330) + AMP(1275) ! used 2 times - TMP_JAMP(2704) = TMP_JAMP(2326) - TMP_JAMP(2325) ! used 2 times - TMP_JAMP(2703) = TMP_JAMP(2329) + TMP_JAMP(2327) ! used 2 times - TMP_JAMP(2702) = TMP_JAMP(2308) + AMP(364) ! used 2 times - TMP_JAMP(2701) = TMP_JAMP(2307) + AMP(1021) ! used 2 times - TMP_JAMP(2700) = TMP_JAMP(2287) - TMP_JAMP(2285) ! used 2 times - TMP_JAMP(2699) = TMP_JAMP(2269) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(46) ! used 2 times - TMP_JAMP(2698) = TMP_JAMP(2261) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2257) ! used 2 times - TMP_JAMP(2697) = TMP_JAMP(2256) - TMP_JAMP(2255) ! used 2 times - TMP_JAMP(2696) = TMP_JAMP(2244) - TMP_JAMP(1407) ! used 2 times - TMP_JAMP(2695) = TMP_JAMP(2243) - TMP_JAMP(2242) ! used 2 times - TMP_JAMP(2694) = TMP_JAMP(2236) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2235) ! used 2 times - TMP_JAMP(2693) = TMP_JAMP(2231) - TMP_JAMP(2230) ! used 2 times - TMP_JAMP(2692) = TMP_JAMP(2225) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1140) ! used 2 times - TMP_JAMP(2691) = TMP_JAMP(2227) - TMP_JAMP(2226) ! used 2 times - TMP_JAMP(2690) = TMP_JAMP(2211) - AMP(1185) ! used 2 times - TMP_JAMP(2689) = TMP_JAMP(2213) + TMP_JAMP(2212) ! used 2 times - TMP_JAMP(2688) = TMP_JAMP(2205) - TMP_JAMP(2202) ! used 2 times - TMP_JAMP(2687) = TMP_JAMP(2200) + AMP(615) ! used 2 times - TMP_JAMP(2686) = TMP_JAMP(2192) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2191) ! used 2 times - TMP_JAMP(2685) = TMP_JAMP(2182) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2181) ! used 2 times - TMP_JAMP(2684) = TMP_JAMP(2175) + TMP_JAMP(2170) ! used 2 times - TMP_JAMP(2683) = TMP_JAMP(2169) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2165) ! used 2 times - TMP_JAMP(2682) = TMP_JAMP(2155) + ((-0.000000000000000D+00 + TMP_JAMP(1944) = TMP_JAMP(1668) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1039) ! used 2 times + TMP_JAMP(1943) = TMP_JAMP(1663) - TMP_JAMP(1625) ! used 2 times + TMP_JAMP(1942) = TMP_JAMP(1663) + TMP_JAMP(1518) ! used 2 times + TMP_JAMP(1941) = TMP_JAMP(1661) + TMP_JAMP(1526) ! used 2 times + TMP_JAMP(1940) = TMP_JAMP(1661) + AMP(1172) ! used 2 times + TMP_JAMP(1939) = TMP_JAMP(1658) - TMP_JAMP(1592) ! used 2 times + TMP_JAMP(1938) = TMP_JAMP(1657) - AMP(1376) ! used 2 times + TMP_JAMP(1937) = TMP_JAMP(1653) - TMP_JAMP(1577) ! used 2 times + TMP_JAMP(1936) = TMP_JAMP(1649) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1150) ! used 2 times + TMP_JAMP(1935) = TMP_JAMP(1648) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1549) ! used 2 times + TMP_JAMP(1934) = TMP_JAMP(1645) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1547) ! used 2 times + TMP_JAMP(1933) = TMP_JAMP(1643) + TMP_JAMP(1582) ! used 2 times + TMP_JAMP(1932) = TMP_JAMP(1637) + TMP_JAMP(1632) ! used 2 times + TMP_JAMP(1931) = TMP_JAMP(1634) + TMP_JAMP(1627) ! used 2 times + TMP_JAMP(1930) = TMP_JAMP(1630) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1031) ! used 2 times + TMP_JAMP(1929) = TMP_JAMP(1628) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1228) ! used 2 times + TMP_JAMP(1928) = TMP_JAMP(1624) + TMP_JAMP(1616) ! used 2 times + TMP_JAMP(1927) = TMP_JAMP(1620) + TMP_JAMP(1551) ! used 2 times + TMP_JAMP(1926) = TMP_JAMP(1620) + TMP_JAMP(1615) ! used 2 times + TMP_JAMP(1925) = TMP_JAMP(1618) + TMP_JAMP(1613) ! used 2 times + TMP_JAMP(1924) = TMP_JAMP(1617) - AMP(1376) ! used 2 times + TMP_JAMP(1923) = TMP_JAMP(1617) - TMP_JAMP(1614) ! used 2 times + TMP_JAMP(1922) = TMP_JAMP(1613) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1564) ! used 2 times + TMP_JAMP(1921) = TMP_JAMP(1612) + TMP_JAMP(1543) ! used 2 times + TMP_JAMP(1920) = TMP_JAMP(1611) - TMP_JAMP(1548) ! used 2 times + TMP_JAMP(1919) = TMP_JAMP(1609) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(961) ! used 2 times + TMP_JAMP(1918) = TMP_JAMP(1609) - TMP_JAMP(1530) ! used 2 times + TMP_JAMP(1917) = TMP_JAMP(1598) - AMP(1128) ! used 2 times + TMP_JAMP(1916) = TMP_JAMP(1597) + AMP(1060) ! used 2 times + TMP_JAMP(1915) = TMP_JAMP(1592) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1539) ! used 2 times + TMP_JAMP(1914) = TMP_JAMP(1591) + TMP_JAMP(1577) ! used 2 times + TMP_JAMP(1913) = TMP_JAMP(1591) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1197) ! used 2 times + TMP_JAMP(1912) = TMP_JAMP(1590) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1202) ! used 2 times + TMP_JAMP(1911) = TMP_JAMP(1586) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1121) ! used 2 times + TMP_JAMP(1910) = TMP_JAMP(1586) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1119) ! used 2 times + TMP_JAMP(1909) = TMP_JAMP(1584) + AMP(1051) ! used 2 times + TMP_JAMP(1908) = TMP_JAMP(1583) + AMP(1073) ! used 2 times + TMP_JAMP(1907) = TMP_JAMP(1577) + TMP_JAMP(1510) ! used 2 times + TMP_JAMP(1906) = TMP_JAMP(1575) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1287) ! used 2 times + TMP_JAMP(1905) = TMP_JAMP(1575) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1289) ! used 2 times + TMP_JAMP(1904) = TMP_JAMP(1574) + TMP_JAMP(1519) ! used 2 times + TMP_JAMP(1903) = TMP_JAMP(1572) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1296) ! used 2 times + TMP_JAMP(1902) = TMP_JAMP(1572) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1298) ! used 2 times + TMP_JAMP(1901) = TMP_JAMP(1569) + AMP(1042) ! used 2 times + TMP_JAMP(1900) = TMP_JAMP(1563) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1539) ! used 2 times + TMP_JAMP(1899) = TMP_JAMP(1562) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1275) ! used 2 times + TMP_JAMP(1898) = TMP_JAMP(1559) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1107) ! used 2 times + TMP_JAMP(1897) = TMP_JAMP(1553) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1058) ! used 2 times + TMP_JAMP(1896) = TMP_JAMP(1551) - TMP_JAMP(1529) ! used 2 times + TMP_JAMP(1895) = TMP_JAMP(1550) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1115) ! used 2 times + TMP_JAMP(1894) = TMP_JAMP(1550) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1142) ! used 2 times + TMP_JAMP(1893) = TMP_JAMP(1548) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1021) ! used 2 times + TMP_JAMP(1892) = TMP_JAMP(1548) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1409) ! used 2 times + TMP_JAMP(1891) = TMP_JAMP(1543) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1263) ! used 2 times + TMP_JAMP(1890) = TMP_JAMP(1541) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1185) ! used 2 times + TMP_JAMP(1889) = TMP_JAMP(1539) - AMP(962) ! used 2 times + TMP_JAMP(1888) = TMP_JAMP(1539) + AMP(1000) ! used 2 times + TMP_JAMP(1887) = TMP_JAMP(1534) - AMP(971) ! used 2 times + TMP_JAMP(1886) = TMP_JAMP(1512) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1047) ! used 2 times + TMP_JAMP(1885) = TMP_JAMP(1507) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1022) ! used 2 times + TMP_JAMP(1884) = TMP_JAMP(1505) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1199) ! used 2 times + TMP_JAMP(1883) = TMP_JAMP(1505) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1220) ! used 2 times + TMP_JAMP(1882) = TMP_JAMP(1504) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1211) ! used 2 times + TMP_JAMP(1881) = TMP_JAMP(1504) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1187) ! used 2 times + TMP_JAMP(1880) = TMP_JAMP(1503) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1065) ! used 2 times + TMP_JAMP(1879) = TMP_JAMP(1501) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1412) ! used 2 times + TMP_JAMP(1878) = TMP_JAMP(1492) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1133) ! used 2 times + TMP_JAMP(1877) = TMP_JAMP(1491) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1056) ! used 2 times + TMP_JAMP(1876) = TMP_JAMP(1481) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1265) ! used 2 times + TMP_JAMP(1875) = TMP_JAMP(1480) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1277) ! used 2 times + TMP_JAMP(1874) = TMP_JAMP(1476) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(982) ! used 2 times + TMP_JAMP(1873) = AMP(1374) + AMP(1376) ! used 2 times + TMP_JAMP(1872) = AMP(1206) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1210) ! used 2 times + TMP_JAMP(1871) = AMP(1124) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1352) ! used 2 times + TMP_JAMP(1870) = AMP(1284) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1288) ! used 2 times + TMP_JAMP(1869) = AMP(1106) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1109) ! used 2 times + TMP_JAMP(1868) = AMP(1209) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1210) ! used 2 times + TMP_JAMP(1867) = AMP(1094) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1292) ! used 2 times + TMP_JAMP(1866) = AMP(1272) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1297) ! used 2 times + TMP_JAMP(1865) = AMP(1194) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1219) ! used 2 times + TMP_JAMP(1864) = AMP(1131) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1132) ! used 2 times + TMP_JAMP(2141) = TMP_JAMP(2077) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1990) ! used 2 times + TMP_JAMP(2140) = TMP_JAMP(2076) - TMP_JAMP(1944) ! used 2 times + TMP_JAMP(2139) = TMP_JAMP(2072) + TMP_JAMP(1858) ! used 2 times + TMP_JAMP(2138) = TMP_JAMP(2071) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1882) ! used 2 times + TMP_JAMP(2137) = TMP_JAMP(2070) - TMP_JAMP(1872) ! used 2 times + TMP_JAMP(2136) = TMP_JAMP(2065) - TMP_JAMP(1983) ! used 2 times + TMP_JAMP(2135) = TMP_JAMP(2062) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(2003) ! used 2 times + TMP_JAMP(2134) = TMP_JAMP(2054) + TMP_JAMP(1960) ! used 2 times + TMP_JAMP(2133) = TMP_JAMP(2053) - TMP_JAMP(1924) ! used 2 times + TMP_JAMP(2132) = TMP_JAMP(2051) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1849) ! used 2 times + TMP_JAMP(2131) = TMP_JAMP(2048) + TMP_JAMP(1927) ! used 2 times + TMP_JAMP(2130) = TMP_JAMP(2046) + TMP_JAMP(1970) ! used 2 times + TMP_JAMP(2129) = TMP_JAMP(2041) - TMP_JAMP(1964) ! used 2 times + TMP_JAMP(2128) = TMP_JAMP(2035) + TMP_JAMP(1969) ! used 2 times + TMP_JAMP(2127) = TMP_JAMP(2032) + TMP_JAMP(1971) ! used 2 times + TMP_JAMP(2126) = TMP_JAMP(2030) - TMP_JAMP(1905) ! used 2 times + TMP_JAMP(2125) = TMP_JAMP(2029) + TMP_JAMP(1902) ! used 2 times + TMP_JAMP(2124) = TMP_JAMP(2024) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(2013) ! used 2 times + TMP_JAMP(2123) = TMP_JAMP(2021) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1847) ! used 2 times + TMP_JAMP(2122) = TMP_JAMP(2017) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1911) ! used 2 times + TMP_JAMP(2121) = TMP_JAMP(2016) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1945) ! used 2 times + TMP_JAMP(2120) = TMP_JAMP(2015) - TMP_JAMP(1851) ! used 2 times + TMP_JAMP(2119) = TMP_JAMP(2014) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1832) ! used 2 times + TMP_JAMP(2118) = TMP_JAMP(2009) - TMP_JAMP(1853) ! used 2 times + TMP_JAMP(2117) = TMP_JAMP(2007) + TMP_JAMP(1917) ! used 2 times + TMP_JAMP(2116) = TMP_JAMP(2004) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1877) ! used 2 times + TMP_JAMP(2115) = TMP_JAMP(2002) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1926) ! used 2 times + TMP_JAMP(2114) = TMP_JAMP(2000) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1923) ! used 2 times + TMP_JAMP(2113) = TMP_JAMP(1996) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1869) ! used 2 times + TMP_JAMP(2112) = TMP_JAMP(1993) - TMP_JAMP(1870) ! used 2 times + TMP_JAMP(2111) = TMP_JAMP(1991) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1875) ! used 2 times + TMP_JAMP(2110) = TMP_JAMP(1989) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1806) ! used 2 times + TMP_JAMP(2109) = TMP_JAMP(1982) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1956) ! used 2 times + TMP_JAMP(2108) = TMP_JAMP(1980) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1879) ! used 2 times + TMP_JAMP(2107) = TMP_JAMP(1976) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1925) ! used 2 times + TMP_JAMP(2106) = TMP_JAMP(1975) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1876) ! used 2 times + TMP_JAMP(2105) = TMP_JAMP(1973) + TMP_JAMP(1887) ! used 2 times + TMP_JAMP(2104) = TMP_JAMP(1965) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1847) ! used 2 times + TMP_JAMP(2103) = TMP_JAMP(1955) - TMP_JAMP(1896) ! used 2 times + TMP_JAMP(2102) = TMP_JAMP(1951) + TMP_JAMP(1704) ! used 2 times + TMP_JAMP(2101) = TMP_JAMP(1949) - TMP_JAMP(1859) ! used 2 times + TMP_JAMP(2100) = TMP_JAMP(1946) + TMP_JAMP(1904) ! used 2 times + TMP_JAMP(2099) = TMP_JAMP(1943) + TMP_JAMP(1907) ! used 2 times + TMP_JAMP(2098) = TMP_JAMP(1942) + TMP_JAMP(1680) ! used 2 times + TMP_JAMP(2097) = TMP_JAMP(1935) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1739) ! used 2 times + TMP_JAMP(2096) = TMP_JAMP(1931) + TMP_JAMP(1646) ! used 2 times + TMP_JAMP(2095) = TMP_JAMP(1930) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1771) ! used 2 times + TMP_JAMP(2094) = TMP_JAMP(1922) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1755) ! used 2 times + TMP_JAMP(2093) = TMP_JAMP(1921) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1866) ! used 2 times + TMP_JAMP(2092) = TMP_JAMP(1916) + TMP_JAMP(1792) ! used 2 times + TMP_JAMP(2091) = TMP_JAMP(1909) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1844) ! used 2 times + TMP_JAMP(2090) = TMP_JAMP(1908) + TMP_JAMP(1772) ! used 2 times + TMP_JAMP(2089) = TMP_JAMP(1906) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1770) ! used 2 times + TMP_JAMP(2088) = TMP_JAMP(1903) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1768) ! used 2 times + TMP_JAMP(2087) = TMP_JAMP(1893) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1791) ! used 2 times + TMP_JAMP(2086) = TMP_JAMP(1889) + TMP_JAMP(1846) ! used 2 times + TMP_JAMP(2085) = TMP_JAMP(1888) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1717) ! used 2 times + TMP_JAMP(2084) = TMP_JAMP(1886) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1855) ! used 2 times + TMP_JAMP(2083) = TMP_JAMP(1885) - TMP_JAMP(1829) ! used 2 times + TMP_JAMP(2082) = TMP_JAMP(1881) - TMP_JAMP(1826) ! used 2 times + TMP_JAMP(2081) = TMP_JAMP(1880) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1845) ! used 2 times + TMP_JAMP(2080) = TMP_JAMP(1871) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1581) ! used 2 times + TMP_JAMP(2079) = TMP_JAMP(1867) + TMP_JAMP(1552) ! used 2 times + TMP_JAMP(2078) = TMP_JAMP(1865) - TMP_JAMP(1848) ! used 2 times + TMP_JAMP(2153) = TMP_JAMP(2109) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(2087) ! used 2 times + TMP_JAMP(2152) = TMP_JAMP(2101) + TMP_JAMP(1961) ! used 2 times + TMP_JAMP(2151) = TMP_JAMP(2097) + TMP_JAMP(1950) ! used 2 times + TMP_JAMP(2150) = TMP_JAMP(2094) - TMP_JAMP(1928) ! used 2 times + TMP_JAMP(2149) = TMP_JAMP(2093) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(2001) ! used 2 times + TMP_JAMP(2148) = TMP_JAMP(2092) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1968) ! used 2 times + TMP_JAMP(2147) = TMP_JAMP(2090) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1954) ! used 2 times + TMP_JAMP(2146) = TMP_JAMP(2084) + TMP_JAMP(2028) ! used 2 times + TMP_JAMP(2145) = TMP_JAMP(2083) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1981) ! used 2 times + TMP_JAMP(2144) = TMP_JAMP(2082) - TMP_JAMP(2038) ! used 2 times + TMP_JAMP(2143) = TMP_JAMP(2081) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(2008) ! used 2 times + TMP_JAMP(2142) = TMP_JAMP(2078) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(2019) ! used 2 times + TMP_JAMP(2225) = AMP(1817) + AMP(1824) ! used 16 times + TMP_JAMP(2224) = AMP(1811) + AMP(1822) ! used 16 times + TMP_JAMP(2223) = AMP(1816) + AMP(1821) ! used 16 times + TMP_JAMP(2222) = AMP(1810) + AMP(1819) ! used 16 times + TMP_JAMP(2221) = AMP(1493) + AMP(1497) ! used 16 times + TMP_JAMP(2220) = AMP(1487) + AMP(1495) ! used 16 times + TMP_JAMP(2219) = AMP(1492) + AMP(1503) ! used 16 times + TMP_JAMP(2218) = AMP(1486) + AMP(1501) ! used 16 times + TMP_JAMP(2217) = AMP(1683) + AMP(1689) ! used 16 times + TMP_JAMP(2216) = AMP(1677) + AMP(1687) ! used 16 times + TMP_JAMP(2215) = AMP(1681) + AMP(1686) ! used 16 times + TMP_JAMP(2214) = AMP(1675) + AMP(1684) ! used 16 times + TMP_JAMP(2213) = AMP(1490) + AMP(1496) ! used 16 times + TMP_JAMP(2212) = AMP(1489) + AMP(1502) ! used 16 times + TMP_JAMP(2211) = AMP(1871) + AMP(1878) ! used 16 times + TMP_JAMP(2210) = AMP(1865) + AMP(1876) ! used 16 times + TMP_JAMP(2209) = AMP(1814) + AMP(1823) ! used 16 times + TMP_JAMP(2208) = AMP(1813) + AMP(1820) ! used 16 times + TMP_JAMP(2207) = AMP(1870) + AMP(1875) ! used 16 times + TMP_JAMP(2206) = AMP(1864) + AMP(1873) ! used 16 times + TMP_JAMP(2205) = AMP(1548) + AMP(1554) ! used 16 times + TMP_JAMP(2204) = AMP(1542) + AMP(1552) ! used 16 times + TMP_JAMP(2203) = AMP(1547) + AMP(1551) ! used 16 times + TMP_JAMP(2202) = AMP(1541) + AMP(1549) ! used 16 times + TMP_JAMP(2201) = AMP(1791) + AMP(1797) ! used 16 times + TMP_JAMP(2200) = AMP(1785) + AMP(1795) ! used 16 times + TMP_JAMP(2199) = AMP(1782) - AMP(1790) ! used 16 times + TMP_JAMP(2198) = AMP(1780) - AMP(1784) ! used 16 times + TMP_JAMP(2197) = AMP(1680) + AMP(1688) ! used 16 times + TMP_JAMP(2196) = AMP(1678) + AMP(1685) ! used 16 times + TMP_JAMP(2195) = AMP(1789) + AMP(1794) ! used 16 times + TMP_JAMP(2194) = AMP(1783) + AMP(1792) ! used 16 times + TMP_JAMP(2193) = AMP(1545) + AMP(1553) ! used 16 times + TMP_JAMP(2192) = AMP(1544) + AMP(1550) ! used 16 times + TMP_JAMP(2191) = AMP(1546) + AMP(1557) ! used 16 times + TMP_JAMP(2190) = AMP(1540) + AMP(1555) ! used 16 times + TMP_JAMP(2189) = AMP(1543) + AMP(1556) ! used 16 times + TMP_JAMP(2188) = AMP(1844) + AMP(1851) ! used 16 times + TMP_JAMP(2187) = AMP(1838) + AMP(1849) ! used 16 times + TMP_JAMP(2186) = AMP(1843) + AMP(1848) ! used 16 times + TMP_JAMP(2185) = AMP(1837) + AMP(1846) ! used 16 times + TMP_JAMP(2184) = AMP(1494) + AMP(1500) ! used 16 times + TMP_JAMP(2183) = AMP(1488) + AMP(1498) ! used 16 times + TMP_JAMP(2182) = AMP(1737) + AMP(1743) ! used 16 times + TMP_JAMP(2181) = AMP(1731) + AMP(1741) ! used 16 times + TMP_JAMP(2180) = AMP(1728) - AMP(1736) ! used 16 times + TMP_JAMP(2179) = AMP(1726) - AMP(1730) ! used 16 times + TMP_JAMP(2178) = AMP(1735) + AMP(1740) ! used 16 times + TMP_JAMP(2177) = AMP(1729) + AMP(1738) ! used 16 times + TMP_JAMP(2176) = AMP(1491) + AMP(1499) ! used 16 times + TMP_JAMP(2175) = AMP(1868) + AMP(1877) ! used 16 times + TMP_JAMP(2174) = AMP(1674) - AMP(1682) ! used 16 times + TMP_JAMP(2173) = AMP(1672) - AMP(1676) ! used 16 times + TMP_JAMP(2172) = AMP(1867) + AMP(1874) ! used 16 times + TMP_JAMP(2171) = AMP(1673) - AMP(1679) ! used 16 times + TMP_JAMP(2170) = AMP(1841) + AMP(1850) ! used 16 times + TMP_JAMP(2169) = AMP(1840) + AMP(1847) ! used 16 times + TMP_JAMP(2168) = AMP(1602) + AMP(1608) ! used 16 times + TMP_JAMP(2167) = AMP(1596) + AMP(1606) ! used 16 times + TMP_JAMP(2166) = AMP(1600) + AMP(1611) ! used 16 times + TMP_JAMP(2165) = AMP(1594) + AMP(1609) ! used 16 times + TMP_JAMP(2164) = AMP(1601) + AMP(1605) ! used 16 times + TMP_JAMP(2163) = AMP(1595) + AMP(1603) ! used 16 times + TMP_JAMP(2162) = AMP(1788) + AMP(1796) ! used 16 times + TMP_JAMP(2161) = AMP(1781) - AMP(1787) ! used 16 times + TMP_JAMP(2160) = AMP(1786) + AMP(1793) ! used 16 times + TMP_JAMP(2159) = AMP(1734) + AMP(1742) ! used 16 times + TMP_JAMP(2158) = AMP(1727) - AMP(1733) ! used 16 times + TMP_JAMP(2157) = AMP(1732) + AMP(1739) ! used 16 times + TMP_JAMP(2156) = AMP(1599) + AMP(1607) ! used 16 times + TMP_JAMP(2155) = AMP(1597) + AMP(1610) ! used 16 times + TMP_JAMP(2154) = AMP(1598) + AMP(1604) ! used 16 times + TMP_JAMP(2380) = TMP_JAMP(2225) - TMP_JAMP(2224) ! used 8 times + TMP_JAMP(2379) = TMP_JAMP(2225) + TMP_JAMP(2209) ! used 8 times + TMP_JAMP(2378) = TMP_JAMP(2224) + TMP_JAMP(2209) ! used 8 times + TMP_JAMP(2377) = TMP_JAMP(2223) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1827) ! used 8 times + TMP_JAMP(2376) = TMP_JAMP(2223) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1833) ! used 8 times + TMP_JAMP(2375) = TMP_JAMP(2222) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1825) ! used 8 times + TMP_JAMP(2374) = TMP_JAMP(2222) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1831) ! used 8 times + TMP_JAMP(2373) = TMP_JAMP(2221) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1518) ! used 8 times + TMP_JAMP(2372) = TMP_JAMP(2221) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1533) ! used 8 times + TMP_JAMP(2371) = TMP_JAMP(2220) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1516) ! used 8 times + TMP_JAMP(2370) = TMP_JAMP(2220) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1531) ! used 8 times + TMP_JAMP(2369) = TMP_JAMP(2219) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1539) ! used 8 times + TMP_JAMP(2368) = TMP_JAMP(2219) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1536) ! used 8 times + TMP_JAMP(2367) = TMP_JAMP(2218) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1537) ! used 8 times + TMP_JAMP(2366) = TMP_JAMP(2218) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1534) ! used 8 times + TMP_JAMP(2365) = TMP_JAMP(2217) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1662) ! used 8 times + TMP_JAMP(2364) = TMP_JAMP(2217) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1653) ! used 8 times + TMP_JAMP(2363) = TMP_JAMP(2216) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1660) ! used 8 times + TMP_JAMP(2362) = TMP_JAMP(2216) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1651) ! used 8 times + TMP_JAMP(2361) = TMP_JAMP(2215) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1692) ! used 8 times + TMP_JAMP(2360) = TMP_JAMP(2215) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1698) ! used 8 times + TMP_JAMP(2359) = TMP_JAMP(2214) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1690) ! used 8 times + TMP_JAMP(2358) = TMP_JAMP(2214) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1696) ! used 8 times + TMP_JAMP(2357) = TMP_JAMP(2213) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1517) ! used 8 times + TMP_JAMP(2356) = TMP_JAMP(2213) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1532) ! used 8 times + TMP_JAMP(2355) = TMP_JAMP(2212) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1538) ! used 8 times + TMP_JAMP(2354) = TMP_JAMP(2212) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1535) ! used 8 times + TMP_JAMP(2353) = TMP_JAMP(2211) - TMP_JAMP(2210) ! used 8 times + TMP_JAMP(2352) = TMP_JAMP(2211) + TMP_JAMP(2175) ! used 8 times + TMP_JAMP(2351) = TMP_JAMP(2210) + TMP_JAMP(2175) ! used 8 times + TMP_JAMP(2350) = TMP_JAMP(2208) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1826) ! used 8 times + TMP_JAMP(2349) = TMP_JAMP(2208) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1832) ! used 8 times + TMP_JAMP(2348) = TMP_JAMP(2207) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1887) ! used 8 times + TMP_JAMP(2347) = TMP_JAMP(2207) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1881) ! used 8 times + TMP_JAMP(2346) = TMP_JAMP(2206) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1885) ! used 8 times + TMP_JAMP(2345) = TMP_JAMP(2206) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1879) ! used 8 times + TMP_JAMP(2344) = TMP_JAMP(2205) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1581) ! used 8 times + TMP_JAMP(2343) = TMP_JAMP(2205) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1563) ! used 8 times + TMP_JAMP(2342) = TMP_JAMP(2204) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1579) ! used 8 times + TMP_JAMP(2341) = TMP_JAMP(2204) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1561) ! used 8 times + TMP_JAMP(2340) = TMP_JAMP(2203) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1572) ! used 8 times + TMP_JAMP(2339) = TMP_JAMP(2203) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1587) ! used 8 times + TMP_JAMP(2338) = TMP_JAMP(2202) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1570) ! used 8 times + TMP_JAMP(2337) = TMP_JAMP(2202) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1585) ! used 8 times + TMP_JAMP(2336) = TMP_JAMP(2201) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1770) ! used 8 times + TMP_JAMP(2335) = TMP_JAMP(2201) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1761) ! used 8 times + TMP_JAMP(2334) = TMP_JAMP(2200) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1768) ! used 8 times + TMP_JAMP(2333) = TMP_JAMP(2200) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1759) ! used 8 times + TMP_JAMP(2332) = TMP_JAMP(2199) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1776) ! used 8 times + TMP_JAMP(2331) = TMP_JAMP(2199) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1779) ! used 8 times + TMP_JAMP(2330) = TMP_JAMP(2198) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1774) ! used 8 times + TMP_JAMP(2329) = TMP_JAMP(2198) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1777) ! used 8 times + TMP_JAMP(2328) = TMP_JAMP(2197) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1661) ! used 8 times + TMP_JAMP(2327) = TMP_JAMP(2197) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1652) ! used 8 times + TMP_JAMP(2326) = TMP_JAMP(2196) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1691) ! used 8 times + TMP_JAMP(2325) = TMP_JAMP(2196) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1697) ! used 8 times + TMP_JAMP(2324) = TMP_JAMP(2195) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1806) ! used 8 times + TMP_JAMP(2323) = TMP_JAMP(2195) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1800) ! used 8 times + TMP_JAMP(2322) = TMP_JAMP(2194) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1804) ! used 8 times + TMP_JAMP(2321) = TMP_JAMP(2194) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1798) ! used 8 times + TMP_JAMP(2320) = TMP_JAMP(2193) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1580) ! used 8 times + TMP_JAMP(2319) = TMP_JAMP(2193) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1562) ! used 8 times + TMP_JAMP(2318) = TMP_JAMP(2192) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1571) ! used 8 times + TMP_JAMP(2317) = TMP_JAMP(2192) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1586) ! used 8 times + TMP_JAMP(2316) = TMP_JAMP(2191) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1593) ! used 8 times + TMP_JAMP(2315) = TMP_JAMP(2191) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1590) ! used 8 times + TMP_JAMP(2314) = TMP_JAMP(2190) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1591) ! used 8 times + TMP_JAMP(2313) = TMP_JAMP(2190) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1588) ! used 8 times + TMP_JAMP(2312) = TMP_JAMP(2189) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1592) ! used 8 times + TMP_JAMP(2311) = TMP_JAMP(2189) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1589) ! used 8 times + TMP_JAMP(2310) = TMP_JAMP(2188) - TMP_JAMP(2187) ! used 8 times + TMP_JAMP(2309) = TMP_JAMP(2188) + TMP_JAMP(2170) ! used 8 times + TMP_JAMP(2308) = TMP_JAMP(2187) + TMP_JAMP(2170) ! used 8 times + TMP_JAMP(2307) = TMP_JAMP(2186) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1860) ! used 8 times + TMP_JAMP(2306) = TMP_JAMP(2186) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1854) ! used 8 times + TMP_JAMP(2305) = TMP_JAMP(2185) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1858) ! used 8 times + TMP_JAMP(2304) = TMP_JAMP(2185) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1852) ! used 8 times + TMP_JAMP(2303) = TMP_JAMP(2184) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1527) ! used 8 times + TMP_JAMP(2302) = TMP_JAMP(2184) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1509) ! used 8 times + TMP_JAMP(2301) = TMP_JAMP(2183) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1525) ! used 8 times + TMP_JAMP(2300) = TMP_JAMP(2183) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1507) ! used 8 times + TMP_JAMP(2299) = TMP_JAMP(2182) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1716) ! used 8 times + TMP_JAMP(2298) = TMP_JAMP(2182) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1707) ! used 8 times + TMP_JAMP(2297) = TMP_JAMP(2181) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1714) ! used 8 times + TMP_JAMP(2296) = TMP_JAMP(2181) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1705) ! used 8 times + TMP_JAMP(2295) = TMP_JAMP(2180) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1722) ! used 8 times + TMP_JAMP(2294) = TMP_JAMP(2180) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1725) ! used 8 times + TMP_JAMP(2293) = TMP_JAMP(2179) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1720) ! used 8 times + TMP_JAMP(2292) = TMP_JAMP(2179) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1723) ! used 8 times + TMP_JAMP(2291) = TMP_JAMP(2178) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1752) ! used 8 times + TMP_JAMP(2290) = TMP_JAMP(2178) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1746) ! used 8 times + TMP_JAMP(2289) = TMP_JAMP(2177) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1750) ! used 8 times + TMP_JAMP(2288) = TMP_JAMP(2177) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1744) ! used 8 times + TMP_JAMP(2287) = TMP_JAMP(2176) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1526) ! used 8 times + TMP_JAMP(2286) = TMP_JAMP(2176) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1508) ! used 8 times + TMP_JAMP(2285) = TMP_JAMP(2175) + TMP_JAMP(2170) ! used 8 times + TMP_JAMP(2284) = TMP_JAMP(2174) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1671) ! used 8 times + TMP_JAMP(2283) = TMP_JAMP(2174) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1668) ! used 8 times + TMP_JAMP(2282) = TMP_JAMP(2173) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1669) ! used 8 times + TMP_JAMP(2281) = TMP_JAMP(2173) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1666) ! used 8 times + TMP_JAMP(2280) = TMP_JAMP(2172) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1886) ! used 8 times + TMP_JAMP(2279) = TMP_JAMP(2172) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1880) ! used 8 times + TMP_JAMP(2278) = TMP_JAMP(2171) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1670) ! used 8 times + TMP_JAMP(2277) = TMP_JAMP(2171) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1667) ! used 8 times + TMP_JAMP(2276) = TMP_JAMP(2169) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1859) ! used 8 times + TMP_JAMP(2275) = TMP_JAMP(2169) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1853) ! used 8 times + TMP_JAMP(2274) = TMP_JAMP(2168) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1635) ! used 8 times + TMP_JAMP(2273) = TMP_JAMP(2168) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1617) ! used 8 times + TMP_JAMP(2272) = TMP_JAMP(2167) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1633) ! used 8 times + TMP_JAMP(2271) = TMP_JAMP(2167) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1615) ! used 8 times + TMP_JAMP(2270) = TMP_JAMP(2166) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1644) ! used 8 times + TMP_JAMP(2269) = TMP_JAMP(2166) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1647) ! used 8 times + TMP_JAMP(2268) = TMP_JAMP(2165) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1642) ! used 8 times + TMP_JAMP(2267) = TMP_JAMP(2165) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1645) ! used 8 times + TMP_JAMP(2266) = TMP_JAMP(2164) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1641) ! used 8 times + TMP_JAMP(2265) = TMP_JAMP(2164) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1626) ! used 8 times + TMP_JAMP(2264) = TMP_JAMP(2163) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1639) ! used 8 times + TMP_JAMP(2263) = TMP_JAMP(2163) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1624) ! used 8 times + TMP_JAMP(2262) = TMP_JAMP(2162) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1769) ! used 8 times + TMP_JAMP(2261) = TMP_JAMP(2162) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1760) ! used 8 times + TMP_JAMP(2260) = TMP_JAMP(2161) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1775) ! used 8 times + TMP_JAMP(2259) = TMP_JAMP(2161) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1778) ! used 8 times + TMP_JAMP(2258) = TMP_JAMP(2160) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1805) ! used 8 times + TMP_JAMP(2257) = TMP_JAMP(2160) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1799) ! used 8 times + TMP_JAMP(2256) = TMP_JAMP(2159) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1715) ! used 8 times + TMP_JAMP(2255) = TMP_JAMP(2159) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1706) ! used 8 times + TMP_JAMP(2254) = TMP_JAMP(2158) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1721) ! used 8 times + TMP_JAMP(2253) = TMP_JAMP(2158) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1724) ! used 8 times + TMP_JAMP(2252) = TMP_JAMP(2157) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1751) ! used 8 times + TMP_JAMP(2251) = TMP_JAMP(2157) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1745) ! used 8 times + TMP_JAMP(2250) = TMP_JAMP(2156) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1634) ! used 8 times + TMP_JAMP(2249) = TMP_JAMP(2156) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1616) ! used 8 times + TMP_JAMP(2248) = TMP_JAMP(2155) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1643) ! used 8 times + TMP_JAMP(2247) = TMP_JAMP(2155) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1646) ! used 8 times + TMP_JAMP(2246) = TMP_JAMP(2154) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1640) ! used 8 times + TMP_JAMP(2245) = TMP_JAMP(2154) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1625) ! used 8 times + TMP_JAMP(2244) = AMP(1455) + AMP(1456) ! used 8 times + TMP_JAMP(2243) = AMP(1453) - AMP(1457) ! used 8 times + TMP_JAMP(2242) = AMP(1866) - AMP(1872) ! used 8 times + TMP_JAMP(2241) = AMP(1464) + AMP(1465) ! used 8 times + TMP_JAMP(2240) = AMP(1462) - AMP(1466) ! used 8 times + TMP_JAMP(2239) = AMP(1839) - AMP(1845) ! used 8 times + TMP_JAMP(2238) = AMP(1473) + AMP(1474) ! used 8 times + TMP_JAMP(2237) = AMP(1471) - AMP(1475) ! used 8 times + TMP_JAMP(2236) = AMP(1869) + AMP(1872) ! used 8 times + TMP_JAMP(2235) = AMP(1842) + AMP(1845) ! used 8 times + TMP_JAMP(2234) = AMP(1812) - AMP(1818) ! used 8 times + TMP_JAMP(2233) = AMP(1815) + AMP(1818) ! used 8 times + TMP_JAMP(2232) = AMP(1812) + AMP(1815) ! used 8 times + TMP_JAMP(2231) = AMP(1434) + AMP(1435) ! used 8 times + TMP_JAMP(2230) = AMP(1432) - AMP(1436) ! used 8 times + TMP_JAMP(2229) = AMP(1866) + AMP(1869) ! used 8 times + TMP_JAMP(2228) = AMP(1482) + AMP(1483) ! used 8 times + TMP_JAMP(2227) = AMP(1480) - AMP(1484) ! used 8 times + TMP_JAMP(2226) = AMP(1839) + AMP(1842) ! used 8 times + TMP_JAMP(2390) = TMP_JAMP(2375) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(2244) ! used 8 times + TMP_JAMP(2389) = TMP_JAMP(2374) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(2228) ! used 8 times + TMP_JAMP(2388) = TMP_JAMP(2359) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(2243) ! used 8 times + TMP_JAMP(2387) = TMP_JAMP(2358) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(2227) ! used 8 times + TMP_JAMP(2386) = TMP_JAMP(2346) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(2241) ! used 8 times + TMP_JAMP(2385) = TMP_JAMP(2322) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(2240) ! used 8 times + TMP_JAMP(2384) = TMP_JAMP(2305) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(2238) ! used 8 times + TMP_JAMP(2383) = TMP_JAMP(2304) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(2231) ! used 8 times + TMP_JAMP(2382) = TMP_JAMP(2289) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(2237) ! used 8 times + TMP_JAMP(2381) = TMP_JAMP(2288) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(2230) ! used 8 times + TMP_JAMP(2671) = TMP_JAMP(2390) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1458) ! used 4 times + TMP_JAMP(2670) = TMP_JAMP(2390) - TMP_JAMP(2377) ! used 4 times + TMP_JAMP(2669) = TMP_JAMP(2390) - TMP_JAMP(2388) ! used 4 times + TMP_JAMP(2668) = TMP_JAMP(2389) - TMP_JAMP(2376) ! used 4 times + TMP_JAMP(2667) = TMP_JAMP(2389) + TMP_JAMP(2349) ! used 4 times + TMP_JAMP(2666) = TMP_JAMP(2389) - TMP_JAMP(2387) ! used 4 times + TMP_JAMP(2665) = TMP_JAMP(2388) - AMP(1663) ! used 4 times + TMP_JAMP(2664) = TMP_JAMP(2388) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1458) ! used 4 times + TMP_JAMP(2663) = TMP_JAMP(2388) - TMP_JAMP(2282) ! used 4 times + TMP_JAMP(2662) = TMP_JAMP(2387) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1485) ! used 4 times + TMP_JAMP(2661) = TMP_JAMP(2387) - TMP_JAMP(2360) ! used 4 times + TMP_JAMP(2660) = TMP_JAMP(2387) + TMP_JAMP(2325) ! used 4 times + TMP_JAMP(2659) = TMP_JAMP(2386) - TMP_JAMP(2348) ! used 4 times + TMP_JAMP(2658) = TMP_JAMP(2386) - TMP_JAMP(2385) ! used 4 times + TMP_JAMP(2657) = TMP_JAMP(2385) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1467) ! used 4 times + TMP_JAMP(2656) = TMP_JAMP(2385) - AMP(1762) ! used 4 times + TMP_JAMP(2655) = TMP_JAMP(2384) - TMP_JAMP(2307) ! used 4 times + TMP_JAMP(2654) = TMP_JAMP(2384) + TMP_JAMP(2276) ! used 4 times + TMP_JAMP(2653) = TMP_JAMP(2384) - TMP_JAMP(2382) ! used 4 times + TMP_JAMP(2652) = TMP_JAMP(2383) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1437) ! used 4 times + TMP_JAMP(2651) = TMP_JAMP(2383) - TMP_JAMP(2306) ! used 4 times + TMP_JAMP(2650) = TMP_JAMP(2383) + TMP_JAMP(2275) ! used 4 times + TMP_JAMP(2649) = TMP_JAMP(2383) - TMP_JAMP(2381) ! used 4 times + TMP_JAMP(2648) = TMP_JAMP(2382) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1476) ! used 4 times + TMP_JAMP(2647) = TMP_JAMP(2382) - AMP(1708) ! used 4 times + TMP_JAMP(2646) = TMP_JAMP(2381) - AMP(1717) ! used 4 times + TMP_JAMP(2645) = TMP_JAMP(2381) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1437) ! used 4 times + TMP_JAMP(2644) = TMP_JAMP(2380) + TMP_JAMP(2309) ! used 4 times + TMP_JAMP(2643) = TMP_JAMP(2380) - TMP_JAMP(2234) ! used 4 times + TMP_JAMP(2642) = TMP_JAMP(2379) + TMP_JAMP(2233) ! used 4 times + TMP_JAMP(2641) = TMP_JAMP(2378) + TMP_JAMP(2352) ! used 4 times + TMP_JAMP(2640) = TMP_JAMP(2378) + TMP_JAMP(2232) ! used 4 times + TMP_JAMP(2639) = TMP_JAMP(2377) + TMP_JAMP(2350) ! used 4 times + TMP_JAMP(2638) = TMP_JAMP(2377) - AMP(1830) ! used 4 times + TMP_JAMP(2637) = TMP_JAMP(2376) - AMP(1836) ! used 4 times + TMP_JAMP(2636) = TMP_JAMP(2376) + TMP_JAMP(2349) ! used 4 times + TMP_JAMP(2635) = TMP_JAMP(2373) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1458) ! used 4 times + TMP_JAMP(2634) = TMP_JAMP(2373) - TMP_JAMP(2371) ! used 4 times + TMP_JAMP(2633) = TMP_JAMP(2373) + TMP_JAMP(2357) ! used 4 times + TMP_JAMP(2632) = TMP_JAMP(2373) - AMP(1521) ! used 4 times + TMP_JAMP(2631) = TMP_JAMP(2372) - TMP_JAMP(2370) ! used 4 times + TMP_JAMP(2630) = TMP_JAMP(2372) + TMP_JAMP(2356) ! used 4 times + TMP_JAMP(2629) = TMP_JAMP(2371) + AMP(1513) ! used 4 times + TMP_JAMP(2628) = TMP_JAMP(2371) + TMP_JAMP(2357) ! used 4 times + TMP_JAMP(2627) = TMP_JAMP(2370) + TMP_JAMP(2356) ! used 4 times + TMP_JAMP(2626) = TMP_JAMP(2369) + AMP(1515) ! used 4 times + TMP_JAMP(2625) = TMP_JAMP(2369) + TMP_JAMP(2355) ! used 4 times + TMP_JAMP(2624) = TMP_JAMP(2369) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1467) ! used 4 times + TMP_JAMP(2623) = TMP_JAMP(2369) - AMP(1506) ! used 4 times + TMP_JAMP(2622) = TMP_JAMP(2368) - TMP_JAMP(2366) ! used 4 times + TMP_JAMP(2621) = TMP_JAMP(2368) + AMP(1530) ! used 4 times + TMP_JAMP(2620) = TMP_JAMP(2368) + TMP_JAMP(2354) ! used 4 times + TMP_JAMP(2619) = TMP_JAMP(2367) + AMP(1513) ! used 4 times + TMP_JAMP(2618) = TMP_JAMP(2367) + TMP_JAMP(2355) ! used 4 times + TMP_JAMP(2617) = TMP_JAMP(2367) - TMP_JAMP(2330) ! used 4 times + TMP_JAMP(2616) = TMP_JAMP(2366) + AMP(1528) ! used 4 times + TMP_JAMP(2615) = TMP_JAMP(2366) + TMP_JAMP(2354) ! used 4 times + TMP_JAMP(2614) = TMP_JAMP(2365) + AMP(1665) ! used 4 times + TMP_JAMP(2613) = TMP_JAMP(2365) - TMP_JAMP(2363) ! used 4 times + TMP_JAMP(2612) = TMP_JAMP(2365) + TMP_JAMP(2328) ! used 4 times + TMP_JAMP(2611) = TMP_JAMP(2365) + TMP_JAMP(2299) ! used 4 times + TMP_JAMP(2610) = TMP_JAMP(2364) - TMP_JAMP(2362) ! used 4 times + TMP_JAMP(2609) = TMP_JAMP(2364) + TMP_JAMP(2327) ! used 4 times + TMP_JAMP(2608) = TMP_JAMP(2364) + TMP_JAMP(2298) ! used 4 times + TMP_JAMP(2607) = TMP_JAMP(2363) + AMP(1663) ! used 4 times + TMP_JAMP(2606) = TMP_JAMP(2363) + TMP_JAMP(2328) ! used 4 times + TMP_JAMP(2605) = TMP_JAMP(2362) + TMP_JAMP(2327) ! used 4 times + TMP_JAMP(2604) = TMP_JAMP(2362) + AMP(1654) ! used 4 times + TMP_JAMP(2603) = TMP_JAMP(2361) - AMP(1665) ! used 4 times + TMP_JAMP(2602) = TMP_JAMP(2361) + TMP_JAMP(2357) ! used 4 times + TMP_JAMP(2601) = TMP_JAMP(2361) + TMP_JAMP(2326) ! used 4 times + TMP_JAMP(2600) = TMP_JAMP(2361) + AMP(1695) ! used 4 times + TMP_JAMP(2599) = TMP_JAMP(2360) + TMP_JAMP(2356) ! used 4 times + TMP_JAMP(2598) = TMP_JAMP(2360) + TMP_JAMP(2325) ! used 4 times + TMP_JAMP(2597) = TMP_JAMP(2360) - AMP(1656) ! used 4 times + TMP_JAMP(2596) = TMP_JAMP(2355) + AMP(1514) ! used 4 times + TMP_JAMP(2595) = TMP_JAMP(2354) + AMP(1529) ! used 4 times + TMP_JAMP(2594) = TMP_JAMP(2353) - TMP_JAMP(2242) ! used 4 times + TMP_JAMP(2593) = TMP_JAMP(2350) - TMP_JAMP(2338) ! used 4 times + TMP_JAMP(2592) = TMP_JAMP(2350) - AMP(1829) ! used 4 times + TMP_JAMP(2591) = TMP_JAMP(2349) - AMP(1835) ! used 4 times + TMP_JAMP(2590) = TMP_JAMP(2349) - TMP_JAMP(2337) ! used 4 times + TMP_JAMP(2589) = TMP_JAMP(2348) - AMP(1890) ! used 4 times + TMP_JAMP(2588) = TMP_JAMP(2348) + TMP_JAMP(2280) ! used 4 times + TMP_JAMP(2587) = TMP_JAMP(2347) - TMP_JAMP(2345) ! used 4 times + TMP_JAMP(2586) = TMP_JAMP(2347) + TMP_JAMP(2279) ! used 4 times + TMP_JAMP(2585) = TMP_JAMP(2347) - AMP(1884) ! used 4 times + TMP_JAMP(2584) = TMP_JAMP(2345) + TMP_JAMP(2279) ! used 4 times + TMP_JAMP(2583) = TMP_JAMP(2345) - AMP(1882) ! used 4 times + TMP_JAMP(2582) = TMP_JAMP(2344) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1467) ! used 4 times + TMP_JAMP(2581) = TMP_JAMP(2344) - TMP_JAMP(2342) ! used 4 times + TMP_JAMP(2580) = TMP_JAMP(2344) + TMP_JAMP(2320) ! used 4 times + TMP_JAMP(2579) = TMP_JAMP(2343) - TMP_JAMP(2341) ! used 4 times + TMP_JAMP(2578) = TMP_JAMP(2343) + TMP_JAMP(2319) ! used 4 times + TMP_JAMP(2577) = TMP_JAMP(2343) - AMP(1566) ! used 4 times + TMP_JAMP(2576) = TMP_JAMP(2342) + TMP_JAMP(2320) ! used 4 times + TMP_JAMP(2575) = TMP_JAMP(2341) + TMP_JAMP(2319) ! used 4 times + TMP_JAMP(2574) = TMP_JAMP(2340) - AMP(1575) ! used 4 times + TMP_JAMP(2573) = TMP_JAMP(2340) - TMP_JAMP(2338) ! used 4 times + TMP_JAMP(2572) = TMP_JAMP(2340) + TMP_JAMP(2318) ! used 4 times + TMP_JAMP(2571) = TMP_JAMP(2340) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1458) ! used 4 times + TMP_JAMP(2570) = TMP_JAMP(2339) - TMP_JAMP(2337) ! used 4 times + TMP_JAMP(2569) = TMP_JAMP(2339) + TMP_JAMP(2317) ! used 4 times + TMP_JAMP(2568) = TMP_JAMP(2339) - AMP(1566) ! used 4 times + TMP_JAMP(2567) = TMP_JAMP(2339) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1485) ! used 4 times + TMP_JAMP(2566) = TMP_JAMP(2338) - AMP(1573) ! used 4 times + TMP_JAMP(2565) = TMP_JAMP(2338) + TMP_JAMP(2318) ! used 4 times + TMP_JAMP(2564) = TMP_JAMP(2337) - AMP(1564) ! used 4 times + TMP_JAMP(2563) = TMP_JAMP(2337) + TMP_JAMP(2317) ! used 4 times + TMP_JAMP(2562) = TMP_JAMP(2336) + TMP_JAMP(2328) ! used 4 times + TMP_JAMP(2561) = TMP_JAMP(2336) - TMP_JAMP(2334) ! used 4 times + TMP_JAMP(2560) = TMP_JAMP(2336) + TMP_JAMP(2262) ! used 4 times + TMP_JAMP(2559) = TMP_JAMP(2336) + AMP(1773) ! used 4 times + TMP_JAMP(2558) = TMP_JAMP(2335) + TMP_JAMP(2327) ! used 4 times + TMP_JAMP(2557) = TMP_JAMP(2335) - TMP_JAMP(2333) ! used 4 times + TMP_JAMP(2556) = TMP_JAMP(2335) + TMP_JAMP(2261) ! used 4 times + TMP_JAMP(2555) = TMP_JAMP(2334) + TMP_JAMP(2262) ! used 4 times + TMP_JAMP(2554) = TMP_JAMP(2333) + AMP(1762) ! used 4 times + TMP_JAMP(2553) = TMP_JAMP(2333) + TMP_JAMP(2261) ! used 4 times + TMP_JAMP(2552) = TMP_JAMP(2332) + AMP(1767) ! used 4 times + TMP_JAMP(2551) = TMP_JAMP(2332) - TMP_JAMP(2330) ! used 4 times + TMP_JAMP(2550) = TMP_JAMP(2332) - AMP(1809) ! used 4 times + TMP_JAMP(2549) = TMP_JAMP(2332) + TMP_JAMP(2260) ! used 4 times + TMP_JAMP(2548) = TMP_JAMP(2331) - TMP_JAMP(2329) ! used 4 times + TMP_JAMP(2547) = TMP_JAMP(2331) + TMP_JAMP(2259) ! used 4 times + TMP_JAMP(2546) = TMP_JAMP(2331) + AMP(1758) ! used 4 times + TMP_JAMP(2545) = TMP_JAMP(2330) + AMP(1765) ! used 4 times + TMP_JAMP(2544) = TMP_JAMP(2330) + TMP_JAMP(2260) ! used 4 times + TMP_JAMP(2543) = TMP_JAMP(2329) + TMP_JAMP(2259) ! used 4 times + TMP_JAMP(2542) = TMP_JAMP(2329) + AMP(1756) ! used 4 times + TMP_JAMP(2541) = TMP_JAMP(2328) + AMP(1664) ! used 4 times + TMP_JAMP(2540) = TMP_JAMP(2326) - AMP(1664) ! used 4 times + TMP_JAMP(2539) = TMP_JAMP(2326) + TMP_JAMP(2318) ! used 4 times + TMP_JAMP(2538) = TMP_JAMP(2325) - AMP(1655) ! used 4 times + TMP_JAMP(2537) = TMP_JAMP(2324) + AMP(1809) ! used 4 times + TMP_JAMP(2536) = TMP_JAMP(2324) + TMP_JAMP(2320) ! used 4 times + TMP_JAMP(2535) = TMP_JAMP(2324) - AMP(1764) ! used 4 times + TMP_JAMP(2534) = TMP_JAMP(2323) - AMP(1773) ! used 4 times + TMP_JAMP(2533) = TMP_JAMP(2323) + TMP_JAMP(2319) ! used 4 times + TMP_JAMP(2532) = TMP_JAMP(2323) + AMP(1803) ! used 4 times + TMP_JAMP(2531) = TMP_JAMP(2321) - AMP(1771) ! used 4 times + TMP_JAMP(2530) = TMP_JAMP(2321) + AMP(1801) ! used 4 times + TMP_JAMP(2529) = TMP_JAMP(2318) - AMP(1574) ! used 4 times + TMP_JAMP(2528) = TMP_JAMP(2317) - AMP(1565) ! used 4 times + TMP_JAMP(2527) = TMP_JAMP(2316) + AMP(1569) ! used 4 times + TMP_JAMP(2526) = TMP_JAMP(2316) - AMP(1560) ! used 4 times + TMP_JAMP(2525) = TMP_JAMP(2315) - AMP(1578) ! used 4 times + TMP_JAMP(2524) = TMP_JAMP(2315) + AMP(1584) ! used 4 times + TMP_JAMP(2523) = TMP_JAMP(2314) + AMP(1567) ! used 4 times + TMP_JAMP(2522) = TMP_JAMP(2314) - AMP(1558) ! used 4 times + TMP_JAMP(2521) = TMP_JAMP(2313) - AMP(1576) ! used 4 times + TMP_JAMP(2520) = TMP_JAMP(2313) + AMP(1582) ! used 4 times + TMP_JAMP(2519) = TMP_JAMP(2312) + AMP(1568) ! used 4 times + TMP_JAMP(2518) = TMP_JAMP(2312) - AMP(1559) ! used 4 times + TMP_JAMP(2517) = TMP_JAMP(2311) - AMP(1577) ! used 4 times + TMP_JAMP(2516) = TMP_JAMP(2311) + AMP(1583) ! used 4 times + TMP_JAMP(2515) = TMP_JAMP(2310) - TMP_JAMP(2239) ! used 4 times + TMP_JAMP(2514) = TMP_JAMP(2307) - AMP(1863) ! used 4 times + TMP_JAMP(2513) = TMP_JAMP(2307) + TMP_JAMP(2276) ! used 4 times + TMP_JAMP(2512) = TMP_JAMP(2306) + TMP_JAMP(2275) ! used 4 times + TMP_JAMP(2511) = TMP_JAMP(2306) - AMP(1857) ! used 4 times + TMP_JAMP(2510) = TMP_JAMP(2303) - AMP(1521) ! used 4 times + TMP_JAMP(2509) = TMP_JAMP(2303) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1476) ! used 4 times + TMP_JAMP(2508) = TMP_JAMP(2303) + AMP(1524) ! used 4 times + TMP_JAMP(2507) = TMP_JAMP(2302) + AMP(1506) ! used 4 times + TMP_JAMP(2506) = TMP_JAMP(2302) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1437) ! used 4 times + TMP_JAMP(2505) = TMP_JAMP(2302) - AMP(1512) ! used 4 times + TMP_JAMP(2504) = TMP_JAMP(2301) - AMP(1519) ! used 4 times + TMP_JAMP(2503) = TMP_JAMP(2301) + AMP(1522) ! used 4 times + TMP_JAMP(2502) = TMP_JAMP(2300) + AMP(1504) ! used 4 times + TMP_JAMP(2501) = TMP_JAMP(2300) - AMP(1510) ! used 4 times + TMP_JAMP(2500) = TMP_JAMP(2299) - TMP_JAMP(2297) ! used 4 times + TMP_JAMP(2499) = TMP_JAMP(2299) + TMP_JAMP(2256) ! used 4 times + TMP_JAMP(2498) = TMP_JAMP(2298) - TMP_JAMP(2296) ! used 4 times + TMP_JAMP(2497) = TMP_JAMP(2298) + TMP_JAMP(2255) ! used 4 times + TMP_JAMP(2496) = TMP_JAMP(2297) + AMP(1717) ! used 4 times + TMP_JAMP(2495) = TMP_JAMP(2297) + TMP_JAMP(2256) ! used 4 times + TMP_JAMP(2494) = TMP_JAMP(2296) + AMP(1708) ! used 4 times + TMP_JAMP(2493) = TMP_JAMP(2296) + TMP_JAMP(2255) ! used 4 times + TMP_JAMP(2492) = TMP_JAMP(2295) + AMP(1713) ! used 4 times + TMP_JAMP(2491) = TMP_JAMP(2295) - TMP_JAMP(2293) ! used 4 times + TMP_JAMP(2490) = TMP_JAMP(2295) + TMP_JAMP(2254) ! used 4 times + TMP_JAMP(2489) = TMP_JAMP(2294) - TMP_JAMP(2292) ! used 4 times + TMP_JAMP(2488) = TMP_JAMP(2294) + TMP_JAMP(2253) ! used 4 times + TMP_JAMP(2487) = TMP_JAMP(2294) + AMP(1704) ! used 4 times + TMP_JAMP(2486) = TMP_JAMP(2293) + AMP(1711) ! used 4 times + TMP_JAMP(2485) = TMP_JAMP(2293) + TMP_JAMP(2254) ! used 4 times + TMP_JAMP(2484) = TMP_JAMP(2292) + TMP_JAMP(2253) ! used 4 times + TMP_JAMP(2483) = TMP_JAMP(2292) + AMP(1702) ! used 4 times + TMP_JAMP(2482) = TMP_JAMP(2291) + AMP(1755) ! used 4 times + TMP_JAMP(2481) = TMP_JAMP(2291) - AMP(1710) ! used 4 times + TMP_JAMP(2480) = TMP_JAMP(2290) - AMP(1719) ! used 4 times + TMP_JAMP(2479) = TMP_JAMP(2290) + AMP(1749) ! used 4 times + TMP_JAMP(2478) = TMP_JAMP(2287) - AMP(1520) ! used 4 times + TMP_JAMP(2477) = TMP_JAMP(2287) + AMP(1523) ! used 4 times + TMP_JAMP(2476) = TMP_JAMP(2286) + AMP(1505) ! used 4 times + TMP_JAMP(2475) = TMP_JAMP(2286) - AMP(1511) ! used 4 times + TMP_JAMP(2474) = TMP_JAMP(2284) - AMP(1695) ! used 4 times + TMP_JAMP(2473) = TMP_JAMP(2284) - TMP_JAMP(2282) ! used 4 times + TMP_JAMP(2472) = TMP_JAMP(2284) + AMP(1650) ! used 4 times + TMP_JAMP(2471) = TMP_JAMP(2283) + AMP(1659) ! used 4 times + TMP_JAMP(2470) = TMP_JAMP(2283) - AMP(1701) ! used 4 times + TMP_JAMP(2469) = TMP_JAMP(2282) + AMP(1648) ! used 4 times + TMP_JAMP(2468) = TMP_JAMP(2281) + AMP(1657) ! used 4 times + TMP_JAMP(2467) = TMP_JAMP(2281) - TMP_JAMP(2267) ! used 4 times + TMP_JAMP(2466) = TMP_JAMP(2280) - AMP(1889) ! used 4 times + TMP_JAMP(2465) = TMP_JAMP(2280) - TMP_JAMP(2272) ! used 4 times + TMP_JAMP(2464) = TMP_JAMP(2279) - AMP(1883) ! used 4 times + TMP_JAMP(2463) = TMP_JAMP(2278) - AMP(1694) ! used 4 times + TMP_JAMP(2462) = TMP_JAMP(2278) + AMP(1649) ! used 4 times + TMP_JAMP(2461) = TMP_JAMP(2277) + AMP(1658) ! used 4 times + TMP_JAMP(2460) = TMP_JAMP(2277) - AMP(1700) ! used 4 times + TMP_JAMP(2459) = TMP_JAMP(2276) - AMP(1862) ! used 4 times + TMP_JAMP(2458) = TMP_JAMP(2275) - AMP(1856) ! used 4 times + TMP_JAMP(2457) = TMP_JAMP(2274) - TMP_JAMP(2272) ! used 4 times + TMP_JAMP(2456) = TMP_JAMP(2274) + TMP_JAMP(2250) ! used 4 times + TMP_JAMP(2455) = TMP_JAMP(2274) - AMP(1629) ! used 4 times + TMP_JAMP(2454) = TMP_JAMP(2274) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1467) ! used 4 times + TMP_JAMP(2453) = TMP_JAMP(2273) - TMP_JAMP(2271) ! used 4 times + TMP_JAMP(2452) = TMP_JAMP(2273) + TMP_JAMP(2249) ! used 4 times + TMP_JAMP(2451) = TMP_JAMP(2272) + TMP_JAMP(2250) ! used 4 times + TMP_JAMP(2450) = TMP_JAMP(2271) + TMP_JAMP(2249) ! used 4 times + TMP_JAMP(2449) = TMP_JAMP(2270) - AMP(1632) ! used 4 times + TMP_JAMP(2448) = TMP_JAMP(2270) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1458) ! used 4 times + TMP_JAMP(2447) = TMP_JAMP(2270) - TMP_JAMP(2268) ! used 4 times + TMP_JAMP(2446) = TMP_JAMP(2270) + AMP(1638) ! used 4 times + TMP_JAMP(2445) = TMP_JAMP(2270) + TMP_JAMP(2248) ! used 4 times + TMP_JAMP(2444) = TMP_JAMP(2269) - TMP_JAMP(2267) ! used 4 times + TMP_JAMP(2443) = TMP_JAMP(2269) + TMP_JAMP(2247) ! used 4 times + TMP_JAMP(2442) = TMP_JAMP(2269) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1485) ! used 4 times + TMP_JAMP(2441) = TMP_JAMP(2269) - AMP(1614) ! used 4 times + TMP_JAMP(2440) = TMP_JAMP(2268) - AMP(1630) ! used 4 times + TMP_JAMP(2439) = TMP_JAMP(2268) + TMP_JAMP(2248) ! used 4 times + TMP_JAMP(2438) = TMP_JAMP(2267) + TMP_JAMP(2247) ! used 4 times + TMP_JAMP(2437) = TMP_JAMP(2267) - AMP(1612) ! used 4 times + TMP_JAMP(2436) = TMP_JAMP(2266) + AMP(1638) ! used 4 times + TMP_JAMP(2435) = TMP_JAMP(2266) - AMP(1620) ! used 4 times + TMP_JAMP(2434) = TMP_JAMP(2266) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1476) ! used 4 times + TMP_JAMP(2433) = TMP_JAMP(2265) - AMP(1629) ! used 4 times + TMP_JAMP(2432) = TMP_JAMP(2265) + AMP(1623) ! used 4 times + TMP_JAMP(2431) = TMP_JAMP(2265) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1437) ! used 4 times + TMP_JAMP(2430) = TMP_JAMP(2264) + AMP(1636) ! used 4 times + TMP_JAMP(2429) = TMP_JAMP(2264) - AMP(1618) ! used 4 times + TMP_JAMP(2428) = TMP_JAMP(2263) - AMP(1627) ! used 4 times + TMP_JAMP(2427) = TMP_JAMP(2263) + AMP(1621) ! used 4 times + TMP_JAMP(2426) = TMP_JAMP(2262) + TMP_JAMP(2256) ! used 4 times + TMP_JAMP(2425) = TMP_JAMP(2262) + AMP(1772) ! used 4 times + TMP_JAMP(2424) = TMP_JAMP(2261) + TMP_JAMP(2255) ! used 4 times + TMP_JAMP(2423) = TMP_JAMP(2260) + AMP(1766) ! used 4 times + TMP_JAMP(2422) = TMP_JAMP(2260) - AMP(1808) ! used 4 times + TMP_JAMP(2421) = TMP_JAMP(2259) + AMP(1757) ! used 4 times + TMP_JAMP(2420) = TMP_JAMP(2258) + AMP(1808) ! used 4 times + TMP_JAMP(2419) = TMP_JAMP(2258) + TMP_JAMP(2250) ! used 4 times + TMP_JAMP(2418) = TMP_JAMP(2258) - AMP(1763) ! used 4 times + TMP_JAMP(2417) = TMP_JAMP(2257) - AMP(1772) ! used 4 times + TMP_JAMP(2416) = TMP_JAMP(2257) + TMP_JAMP(2249) ! used 4 times + TMP_JAMP(2415) = TMP_JAMP(2257) + AMP(1802) ! used 4 times + TMP_JAMP(2414) = TMP_JAMP(2254) + AMP(1712) ! used 4 times + TMP_JAMP(2413) = TMP_JAMP(2253) + AMP(1703) ! used 4 times + TMP_JAMP(2412) = TMP_JAMP(2252) + AMP(1754) ! used 4 times + TMP_JAMP(2411) = TMP_JAMP(2252) - AMP(1709) ! used 4 times + TMP_JAMP(2410) = TMP_JAMP(2251) - AMP(1718) ! used 4 times + TMP_JAMP(2409) = TMP_JAMP(2251) + AMP(1748) ! used 4 times + TMP_JAMP(2408) = TMP_JAMP(2248) - AMP(1631) ! used 4 times + TMP_JAMP(2407) = TMP_JAMP(2247) - AMP(1613) ! used 4 times + TMP_JAMP(2406) = TMP_JAMP(2246) + AMP(1637) ! used 4 times + TMP_JAMP(2405) = TMP_JAMP(2246) - AMP(1619) ! used 4 times + TMP_JAMP(2404) = TMP_JAMP(2245) - AMP(1628) ! used 4 times + TMP_JAMP(2403) = TMP_JAMP(2245) + AMP(1622) ! used 4 times + TMP_JAMP(2402) = TMP_JAMP(2211) + TMP_JAMP(2187) ! used 4 times + TMP_JAMP(2401) = TMP_JAMP(2210) + TMP_JAMP(2188) ! used 4 times + TMP_JAMP(2400) = AMP(1459) + AMP(1888) ! used 4 times + TMP_JAMP(2399) = AMP(1461) + AMP(1807) ! used 4 times + TMP_JAMP(2398) = AMP(1468) + AMP(1861) ! used 4 times + TMP_JAMP(2397) = AMP(1470) + AMP(1753) ! used 4 times + TMP_JAMP(2396) = AMP(1452) + AMP(1693) ! used 4 times + TMP_JAMP(2395) = AMP(1450) + AMP(1828) ! used 4 times + TMP_JAMP(2394) = AMP(1477) + AMP(1834) ! used 4 times + TMP_JAMP(2393) = AMP(1479) + AMP(1699) ! used 4 times + TMP_JAMP(2392) = AMP(1431) + AMP(1747) ! used 4 times + TMP_JAMP(2391) = AMP(1429) + AMP(1855) ! used 4 times + TMP_JAMP(2673) = TMP_JAMP(2402) + TMP_JAMP(2285) ! used 4 times + TMP_JAMP(2672) = TMP_JAMP(2401) + TMP_JAMP(2285) ! used 4 times + TMP_JAMP(2675) = TMP_JAMP(2531) - TMP_JAMP(2334) ! used 3 times + TMP_JAMP(2674) = TMP_JAMP(2393) - TMP_JAMP(2281) ! used 3 times + TMP_JAMP(2881) = TMP_JAMP(2674) + TMP_JAMP(2660) ! used 2 times + TMP_JAMP(2880) = TMP_JAMP(2673) + TMP_JAMP(2236) ! used 2 times + TMP_JAMP(2879) = TMP_JAMP(2672) + TMP_JAMP(2235) ! used 2 times + TMP_JAMP(2878) = TMP_JAMP(2666) + TMP_JAMP(2604) ! used 2 times + TMP_JAMP(2877) = TMP_JAMP(2658) + TMP_JAMP(2554) ! used 2 times + TMP_JAMP(2876) = TMP_JAMP(2658) - TMP_JAMP(2399) ! used 2 times + TMP_JAMP(2875) = TMP_JAMP(2653) + TMP_JAMP(2494) ! used 2 times + TMP_JAMP(2874) = TMP_JAMP(2649) + TMP_JAMP(2496) ! used 2 times + TMP_JAMP(2873) = TMP_JAMP(2643) + TMP_JAMP(2309) ! used 2 times + TMP_JAMP(2872) = TMP_JAMP(2642) + TMP_JAMP(2310) ! used 2 times + TMP_JAMP(2871) = TMP_JAMP(2642) + TMP_JAMP(2353) ! used 2 times + TMP_JAMP(2870) = TMP_JAMP(2641) + TMP_JAMP(2236) ! used 2 times + TMP_JAMP(2869) = TMP_JAMP(2640) + TMP_JAMP(2352) ! used 2 times + TMP_JAMP(2868) = TMP_JAMP(2638) + TMP_JAMP(2234) ! used 2 times + TMP_JAMP(2867) = TMP_JAMP(2638) - TMP_JAMP(2233) ! used 2 times + TMP_JAMP(2866) = TMP_JAMP(2638) - TMP_JAMP(2504) ! used 2 times + TMP_JAMP(2865) = TMP_JAMP(2630) + TMP_JAMP(2621) ! used 2 times + TMP_JAMP(2864) = TMP_JAMP(2627) + TMP_JAMP(2501) ! used 2 times + TMP_JAMP(2863) = TMP_JAMP(2627) + TMP_JAMP(2616) ! used 2 times + TMP_JAMP(2862) = TMP_JAMP(2623) - TMP_JAMP(2506) ! used 2 times + TMP_JAMP(2861) = TMP_JAMP(2621) - TMP_JAMP(2616) ! used 2 times + TMP_JAMP(2860) = TMP_JAMP(2621) + TMP_JAMP(2595) ! used 2 times + TMP_JAMP(2859) = TMP_JAMP(2620) - TMP_JAMP(2508) ! used 2 times + TMP_JAMP(2858) = TMP_JAMP(2619) + TMP_JAMP(2596) ! used 2 times + TMP_JAMP(2857) = TMP_JAMP(2618) - TMP_JAMP(2502) ! used 2 times + TMP_JAMP(2856) = TMP_JAMP(2617) + TMP_JAMP(2399) ! used 2 times + TMP_JAMP(2855) = TMP_JAMP(2616) + TMP_JAMP(2595) ! used 2 times + TMP_JAMP(2854) = TMP_JAMP(2615) - TMP_JAMP(2503) ! used 2 times + TMP_JAMP(2853) = TMP_JAMP(2612) + TMP_JAMP(2561) ! used 2 times + TMP_JAMP(2852) = TMP_JAMP(2607) - TMP_JAMP(2388) ! used 2 times + TMP_JAMP(2851) = TMP_JAMP(2604) - TMP_JAMP(2364) ! used 2 times + TMP_JAMP(2850) = TMP_JAMP(2603) - TMP_JAMP(2365) ! used 2 times + TMP_JAMP(2849) = TMP_JAMP(2602) + TMP_JAMP(2596) ! used 2 times + TMP_JAMP(2848) = TMP_JAMP(2600) - TMP_JAMP(2473) ! used 2 times + TMP_JAMP(2847) = TMP_JAMP(2597) - TMP_JAMP(2364) ! used 2 times + TMP_JAMP(2846) = TMP_JAMP(2597) + TMP_JAMP(2538) ! used 2 times + TMP_JAMP(2845) = TMP_JAMP(2597) + TMP_JAMP(2595) ! used 2 times + TMP_JAMP(2844) = TMP_JAMP(2594) + TMP_JAMP(2379) ! used 2 times + TMP_JAMP(2843) = TMP_JAMP(2592) - TMP_JAMP(2233) ! used 2 times + TMP_JAMP(2842) = TMP_JAMP(2592) - TMP_JAMP(2566) ! used 2 times + TMP_JAMP(2841) = TMP_JAMP(2592) - TMP_JAMP(2232) ! used 2 times + TMP_JAMP(2840) = TMP_JAMP(2591) - TMP_JAMP(2233) ! used 2 times + TMP_JAMP(2839) = TMP_JAMP(2591) - TMP_JAMP(2564) ! used 2 times + TMP_JAMP(2838) = TMP_JAMP(2587) + TMP_JAMP(2353) ! used 2 times + TMP_JAMP(2837) = TMP_JAMP(2585) - TMP_JAMP(2564) ! used 2 times + TMP_JAMP(2836) = TMP_JAMP(2583) - TMP_JAMP(2242) ! used 2 times + TMP_JAMP(2835) = TMP_JAMP(2580) - TMP_JAMP(2525) ! used 2 times + TMP_JAMP(2834) = TMP_JAMP(2577) + TMP_JAMP(2567) ! used 2 times + TMP_JAMP(2833) = TMP_JAMP(2576) - TMP_JAMP(2521) ! used 2 times + TMP_JAMP(2832) = TMP_JAMP(2575) + TMP_JAMP(2564) ! used 2 times + TMP_JAMP(2831) = TMP_JAMP(2574) - TMP_JAMP(2566) ! used 2 times + TMP_JAMP(2830) = TMP_JAMP(2574) + TMP_JAMP(2529) ! used 2 times + TMP_JAMP(2829) = TMP_JAMP(2567) + TMP_JAMP(2389) ! used 2 times + TMP_JAMP(2828) = TMP_JAMP(2566) + TMP_JAMP(2529) ! used 2 times + TMP_JAMP(2827) = TMP_JAMP(2564) + TMP_JAMP(2528) ! used 2 times + TMP_JAMP(2826) = TMP_JAMP(2559) - TMP_JAMP(2533) ! used 2 times + TMP_JAMP(2825) = TMP_JAMP(2557) + TMP_JAMP(2542) ! used 2 times + TMP_JAMP(2824) = TMP_JAMP(2554) + TMP_JAMP(2535) ! used 2 times + TMP_JAMP(2823) = TMP_JAMP(2552) - TMP_JAMP(2545) ! used 2 times + TMP_JAMP(2822) = TMP_JAMP(2552) + TMP_JAMP(2423) ! used 2 times + TMP_JAMP(2821) = TMP_JAMP(2548) + TMP_JAMP(2530) ! used 2 times + TMP_JAMP(2820) = TMP_JAMP(2547) - TMP_JAMP(2415) ! used 2 times + TMP_JAMP(2819) = TMP_JAMP(2547) - TMP_JAMP(2532) ! used 2 times + TMP_JAMP(2818) = TMP_JAMP(2546) - TMP_JAMP(2542) ! used 2 times + TMP_JAMP(2817) = TMP_JAMP(2546) + TMP_JAMP(2421) ! used 2 times + TMP_JAMP(2816) = TMP_JAMP(2546) + TMP_JAMP(2538) ! used 2 times + TMP_JAMP(2815) = TMP_JAMP(2545) + TMP_JAMP(2423) ! used 2 times + TMP_JAMP(2814) = TMP_JAMP(2543) - TMP_JAMP(2530) ! used 2 times + TMP_JAMP(2813) = TMP_JAMP(2542) + TMP_JAMP(2421) ! used 2 times + TMP_JAMP(2812) = TMP_JAMP(2541) - TMP_JAMP(2539) ! used 2 times + TMP_JAMP(2811) = TMP_JAMP(2538) - TMP_JAMP(2327) ! used 2 times + TMP_JAMP(2810) = TMP_JAMP(2537) + TMP_JAMP(2235) ! used 2 times + TMP_JAMP(2809) = TMP_JAMP(2536) + TMP_JAMP(2529) ! used 2 times + TMP_JAMP(2808) = TMP_JAMP(2532) - TMP_JAMP(2530) ! used 2 times + TMP_JAMP(2807) = TMP_JAMP(2532) + TMP_JAMP(2528) ! used 2 times + TMP_JAMP(2806) = TMP_JAMP(2530) + TMP_JAMP(2415) ! used 2 times + TMP_JAMP(2805) = TMP_JAMP(2528) - TMP_JAMP(2460) ! used 2 times + TMP_JAMP(2804) = TMP_JAMP(2527) - TMP_JAMP(2523) ! used 2 times + TMP_JAMP(2803) = TMP_JAMP(2527) + TMP_JAMP(2519) ! used 2 times + TMP_JAMP(2802) = TMP_JAMP(2527) + TMP_JAMP(2509) ! used 2 times + TMP_JAMP(2801) = TMP_JAMP(2526) - TMP_JAMP(2522) ! used 2 times + TMP_JAMP(2800) = TMP_JAMP(2526) + TMP_JAMP(2518) ! used 2 times + TMP_JAMP(2799) = TMP_JAMP(2525) - TMP_JAMP(2521) ! used 2 times + TMP_JAMP(2798) = TMP_JAMP(2525) + TMP_JAMP(2517) ! used 2 times + TMP_JAMP(2797) = TMP_JAMP(2524) - TMP_JAMP(2520) ! used 2 times + TMP_JAMP(2796) = TMP_JAMP(2524) + TMP_JAMP(2516) ! used 2 times + TMP_JAMP(2795) = TMP_JAMP(2523) + TMP_JAMP(2519) ! used 2 times + TMP_JAMP(2794) = TMP_JAMP(2522) + TMP_JAMP(2518) ! used 2 times + TMP_JAMP(2793) = TMP_JAMP(2521) + TMP_JAMP(2517) ! used 2 times + TMP_JAMP(2792) = TMP_JAMP(2520) + TMP_JAMP(2516) ! used 2 times + TMP_JAMP(2791) = TMP_JAMP(2518) + TMP_JAMP(2226) ! used 2 times + TMP_JAMP(2790) = TMP_JAMP(2517) + TMP_JAMP(2226) ! used 2 times + TMP_JAMP(2789) = TMP_JAMP(2517) + TMP_JAMP(2335) ! used 2 times + TMP_JAMP(2788) = TMP_JAMP(2515) + TMP_JAMP(2379) ! used 2 times + TMP_JAMP(2787) = TMP_JAMP(2514) - TMP_JAMP(2504) ! used 2 times + TMP_JAMP(2786) = TMP_JAMP(2511) + TMP_JAMP(2239) ! used 2 times + TMP_JAMP(2785) = TMP_JAMP(2510) + TMP_JAMP(2436) ! used 2 times + TMP_JAMP(2784) = TMP_JAMP(2508) - TMP_JAMP(2503) ! used 2 times + TMP_JAMP(2783) = TMP_JAMP(2508) + TMP_JAMP(2477) ! used 2 times + TMP_JAMP(2782) = TMP_JAMP(2505) - TMP_JAMP(2501) ! used 2 times + TMP_JAMP(2781) = TMP_JAMP(2504) + TMP_JAMP(2478) ! used 2 times + TMP_JAMP(2780) = TMP_JAMP(2501) + TMP_JAMP(2475) ! used 2 times + TMP_JAMP(2779) = TMP_JAMP(2496) + TMP_JAMP(2480) ! used 2 times + TMP_JAMP(2778) = TMP_JAMP(2494) + TMP_JAMP(2481) ! used 2 times + TMP_JAMP(2777) = TMP_JAMP(2492) - TMP_JAMP(2486) ! used 2 times + TMP_JAMP(2776) = TMP_JAMP(2492) + TMP_JAMP(2414) ! used 2 times + TMP_JAMP(2775) = TMP_JAMP(2491) + TMP_JAMP(2397) ! used 2 times + TMP_JAMP(2774) = TMP_JAMP(2490) - TMP_JAMP(2482) ! used 2 times + TMP_JAMP(2773) = TMP_JAMP(2488) - TMP_JAMP(2409) ! used 2 times + TMP_JAMP(2772) = TMP_JAMP(2488) - TMP_JAMP(2479) ! used 2 times + TMP_JAMP(2771) = TMP_JAMP(2487) - TMP_JAMP(2483) ! used 2 times + TMP_JAMP(2770) = TMP_JAMP(2487) + TMP_JAMP(2413) ! used 2 times + TMP_JAMP(2769) = TMP_JAMP(2486) + TMP_JAMP(2414) ! used 2 times + TMP_JAMP(2768) = TMP_JAMP(2485) - TMP_JAMP(2397) ! used 2 times + TMP_JAMP(2767) = TMP_JAMP(2483) + TMP_JAMP(2413) ! used 2 times + TMP_JAMP(2766) = TMP_JAMP(2482) + TMP_JAMP(2478) ! used 2 times + TMP_JAMP(2765) = TMP_JAMP(2482) + TMP_JAMP(2412) ! used 2 times + TMP_JAMP(2764) = TMP_JAMP(2482) + TMP_JAMP(2236) ! used 2 times + TMP_JAMP(2763) = TMP_JAMP(2481) + TMP_JAMP(2477) ! used 2 times + TMP_JAMP(2762) = TMP_JAMP(2480) + TMP_JAMP(2476) ! used 2 times + TMP_JAMP(2761) = TMP_JAMP(2479) - TMP_JAMP(2392) ! used 2 times + TMP_JAMP(2760) = TMP_JAMP(2479) + TMP_JAMP(2475) ! used 2 times + TMP_JAMP(2759) = TMP_JAMP(2477) - TMP_JAMP(2229) ! used 2 times + TMP_JAMP(2758) = TMP_JAMP(2477) - TMP_JAMP(2298) ! used 2 times + TMP_JAMP(2757) = TMP_JAMP(2476) - TMP_JAMP(2299) ! used 2 times + TMP_JAMP(2756) = TMP_JAMP(2476) - TMP_JAMP(2229) ! used 2 times + TMP_JAMP(2755) = TMP_JAMP(2475) - TMP_JAMP(2470) ! used 2 times + TMP_JAMP(2754) = TMP_JAMP(2474) + TMP_JAMP(2463) ! used 2 times + TMP_JAMP(2753) = TMP_JAMP(2473) + TMP_JAMP(2396) ! used 2 times + TMP_JAMP(2752) = TMP_JAMP(2472) - TMP_JAMP(2469) ! used 2 times + TMP_JAMP(2751) = TMP_JAMP(2472) + TMP_JAMP(2462) ! used 2 times + TMP_JAMP(2750) = TMP_JAMP(2471) - TMP_JAMP(2468) ! used 2 times + TMP_JAMP(2749) = TMP_JAMP(2471) + TMP_JAMP(2461) ! used 2 times + TMP_JAMP(2748) = TMP_JAMP(2470) + TMP_JAMP(2460) ! used 2 times + TMP_JAMP(2747) = TMP_JAMP(2470) - TMP_JAMP(2236) ! used 2 times + TMP_JAMP(2746) = TMP_JAMP(2469) + TMP_JAMP(2462) ! used 2 times + TMP_JAMP(2745) = TMP_JAMP(2468) + TMP_JAMP(2461) ! used 2 times + TMP_JAMP(2744) = TMP_JAMP(2466) - TMP_JAMP(2236) ! used 2 times + TMP_JAMP(2743) = TMP_JAMP(2465) + TMP_JAMP(2386) ! used 2 times + TMP_JAMP(2742) = TMP_JAMP(2464) - TMP_JAMP(2236) ! used 2 times + TMP_JAMP(2741) = TMP_JAMP(2464) - TMP_JAMP(2229) ! used 2 times + TMP_JAMP(2740) = TMP_JAMP(2463) - TMP_JAMP(2235) ! used 2 times + TMP_JAMP(2739) = TMP_JAMP(2463) - TMP_JAMP(2326) ! used 2 times + TMP_JAMP(2738) = TMP_JAMP(2460) - TMP_JAMP(2235) ! used 2 times + TMP_JAMP(2737) = TMP_JAMP(2459) - TMP_JAMP(2235) ! used 2 times + TMP_JAMP(2736) = TMP_JAMP(2458) - TMP_JAMP(2235) ! used 2 times + TMP_JAMP(2735) = TMP_JAMP(2458) - TMP_JAMP(2226) ! used 2 times + TMP_JAMP(2734) = TMP_JAMP(2455) + TMP_JAMP(2431) ! used 2 times + TMP_JAMP(2733) = TMP_JAMP(2454) + TMP_JAMP(2433) ! used 2 times + TMP_JAMP(2732) = TMP_JAMP(2454) - TMP_JAMP(2449) ! used 2 times + TMP_JAMP(2731) = TMP_JAMP(2453) - TMP_JAMP(2429) ! used 2 times + TMP_JAMP(2730) = TMP_JAMP(2451) - TMP_JAMP(2440) ! used 2 times + TMP_JAMP(2729) = TMP_JAMP(2450) + TMP_JAMP(2429) ! used 2 times + TMP_JAMP(2728) = TMP_JAMP(2448) - AMP(1632) ! used 2 times + TMP_JAMP(2727) = TMP_JAMP(2445) + TMP_JAMP(2406) ! used 2 times + TMP_JAMP(2726) = TMP_JAMP(2442) + TMP_JAMP(2432) ! used 2 times + TMP_JAMP(2725) = TMP_JAMP(2441) - TMP_JAMP(2437) ! used 2 times + TMP_JAMP(2724) = TMP_JAMP(2441) + TMP_JAMP(2407) ! used 2 times + TMP_JAMP(2723) = TMP_JAMP(2440) + TMP_JAMP(2408) ! used 2 times + TMP_JAMP(2722) = TMP_JAMP(2439) + TMP_JAMP(2430) ! used 2 times + TMP_JAMP(2721) = TMP_JAMP(2438) + TMP_JAMP(2427) ! used 2 times + TMP_JAMP(2720) = TMP_JAMP(2437) + TMP_JAMP(2407) ! used 2 times + TMP_JAMP(2719) = TMP_JAMP(2434) - AMP(1620) ! used 2 times + TMP_JAMP(2718) = TMP_JAMP(2431) + AMP(1623) ! used 2 times + TMP_JAMP(2717) = TMP_JAMP(2430) + TMP_JAMP(2406) ! used 2 times + TMP_JAMP(2716) = TMP_JAMP(2428) + TMP_JAMP(2272) ! used 2 times + TMP_JAMP(2715) = TMP_JAMP(2427) + TMP_JAMP(2403) ! used 2 times + TMP_JAMP(2714) = TMP_JAMP(2426) + TMP_JAMP(2299) ! used 2 times + TMP_JAMP(2713) = TMP_JAMP(2424) + TMP_JAMP(2298) ! used 2 times + TMP_JAMP(2712) = TMP_JAMP(2424) + TMP_JAMP(2335) ! used 2 times + TMP_JAMP(2711) = TMP_JAMP(2423) + TMP_JAMP(2410) ! used 2 times + TMP_JAMP(2710) = TMP_JAMP(2421) + TMP_JAMP(2411) ! used 2 times + TMP_JAMP(2709) = TMP_JAMP(2419) + TMP_JAMP(2404) ! used 2 times + TMP_JAMP(2708) = TMP_JAMP(2418) - TMP_JAMP(2408) ! used 2 times + TMP_JAMP(2707) = TMP_JAMP(2418) + TMP_JAMP(2413) ! used 2 times + TMP_JAMP(2706) = TMP_JAMP(2417) + TMP_JAMP(2414) ! used 2 times + TMP_JAMP(2705) = TMP_JAMP(2416) - TMP_JAMP(2407) ! used 2 times + TMP_JAMP(2704) = TMP_JAMP(2415) + TMP_JAMP(2405) ! used 2 times + TMP_JAMP(2703) = TMP_JAMP(2412) + TMP_JAMP(2397) ! used 2 times + TMP_JAMP(2702) = TMP_JAMP(2412) + TMP_JAMP(2405) ! used 2 times + TMP_JAMP(2701) = TMP_JAMP(2411) + TMP_JAMP(2406) ! used 2 times + TMP_JAMP(2700) = TMP_JAMP(2410) + TMP_JAMP(2403) ! used 2 times + TMP_JAMP(2699) = TMP_JAMP(2409) + TMP_JAMP(2404) ! used 2 times + TMP_JAMP(2698) = TMP_JAMP(2409) + TMP_JAMP(2392) ! used 2 times + TMP_JAMP(2697) = TMP_JAMP(2400) + TMP_JAMP(2242) ! used 2 times + TMP_JAMP(2696) = TMP_JAMP(2400) + TMP_JAMP(2229) ! used 2 times + TMP_JAMP(2695) = TMP_JAMP(2398) + TMP_JAMP(2239) ! used 2 times + TMP_JAMP(2694) = TMP_JAMP(2398) + TMP_JAMP(2226) ! used 2 times + TMP_JAMP(2693) = TMP_JAMP(2396) + TMP_JAMP(2326) ! used 2 times + TMP_JAMP(2692) = TMP_JAMP(2396) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1454) ! used 2 times + TMP_JAMP(2691) = TMP_JAMP(2395) - TMP_JAMP(2390) ! used 2 times + TMP_JAMP(2690) = TMP_JAMP(2395) + TMP_JAMP(2232) ! used 2 times + TMP_JAMP(2689) = TMP_JAMP(2395) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1451) ! used 2 times + TMP_JAMP(2688) = TMP_JAMP(2394) + TMP_JAMP(2234) ! used 2 times + TMP_JAMP(2687) = TMP_JAMP(2394) + TMP_JAMP(2232) ! used 2 times + TMP_JAMP(2686) = TMP_JAMP(2392) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1433) ! used 2 times + TMP_JAMP(2685) = TMP_JAMP(2391) + TMP_JAMP(2239) ! used 2 times + TMP_JAMP(2684) = TMP_JAMP(2391) + TMP_JAMP(2226) ! used 2 times + TMP_JAMP(2683) = TMP_JAMP(2391) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1430) ! used 2 times + TMP_JAMP(2682) = TMP_JAMP(2389) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1478) ! used 2 times + TMP_JAMP(2681) = TMP_JAMP(2386) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1460) ! used 2 times + TMP_JAMP(2680) = TMP_JAMP(2385) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1463) ! used 2 times + TMP_JAMP(2679) = TMP_JAMP(2384) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1469) ! used 2 times + TMP_JAMP(2678) = TMP_JAMP(2382) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(1472) ! used 2 times + TMP_JAMP(2677) = TMP_JAMP(2351) + TMP_JAMP(2229) ! used 2 times + TMP_JAMP(2676) = TMP_JAMP(2308) + TMP_JAMP(2226) ! used 2 times + TMP_JAMP(2911) = TMP_JAMP(2861) + TMP_JAMP(2631) ! used 2 times + TMP_JAMP(2910) = TMP_JAMP(2856) + TMP_JAMP(2680) ! used 2 times + TMP_JAMP(2909) = TMP_JAMP(2840) + TMP_JAMP(2637) ! used 2 times + TMP_JAMP(2908) = TMP_JAMP(2831) + TMP_JAMP(2581) ! used 2 times + TMP_JAMP(2907) = TMP_JAMP(2802) + TMP_JAMP(2632) ! used 2 times + TMP_JAMP(2906) = TMP_JAMP(2801) - TMP_JAMP(2579) ! used 2 times + TMP_JAMP(2905) = TMP_JAMP(2799) - TMP_JAMP(2581) ! used 2 times + TMP_JAMP(2904) = TMP_JAMP(2792) + TMP_JAMP(2563) ! used 2 times + TMP_JAMP(2903) = TMP_JAMP(2784) - TMP_JAMP(2622) ! used 2 times + TMP_JAMP(2902) = TMP_JAMP(2782) + TMP_JAMP(2631) ! used 2 times + TMP_JAMP(2901) = TMP_JAMP(2781) + TMP_JAMP(2628) ! used 2 times + TMP_JAMP(2900) = TMP_JAMP(2777) - TMP_JAMP(2500) ! used 2 times + TMP_JAMP(2899) = TMP_JAMP(2771) - TMP_JAMP(2498) ! used 2 times + TMP_JAMP(2898) = TMP_JAMP(2754) - TMP_JAMP(2601) ! used 2 times + TMP_JAMP(2897) = TMP_JAMP(2752) - TMP_JAMP(2610) ! used 2 times + TMP_JAMP(2896) = TMP_JAMP(2750) - TMP_JAMP(2613) ! used 2 times + TMP_JAMP(2895) = TMP_JAMP(2748) - TMP_JAMP(2598) ! used 2 times + TMP_JAMP(2894) = TMP_JAMP(2746) - TMP_JAMP(2605) ! used 2 times + TMP_JAMP(2893) = TMP_JAMP(2744) + TMP_JAMP(2589) ! used 2 times + TMP_JAMP(2892) = TMP_JAMP(2742) + TMP_JAMP(2585) ! used 2 times + TMP_JAMP(2891) = TMP_JAMP(2741) + TMP_JAMP(2583) ! used 2 times + TMP_JAMP(2890) = TMP_JAMP(2737) + TMP_JAMP(2514) ! used 2 times + TMP_JAMP(2889) = TMP_JAMP(2736) + TMP_JAMP(2511) ! used 2 times + TMP_JAMP(2888) = TMP_JAMP(2733) + TMP_JAMP(2507) ! used 2 times + TMP_JAMP(2887) = TMP_JAMP(2725) - TMP_JAMP(2453) ! used 2 times + TMP_JAMP(2886) = TMP_JAMP(2709) - TMP_JAMP(2422) ! used 2 times + TMP_JAMP(2885) = TMP_JAMP(2705) - TMP_JAMP(2425) ! used 2 times + TMP_JAMP(2884) = TMP_JAMP(2697) + TMP_JAMP(2589) ! used 2 times + TMP_JAMP(2883) = TMP_JAMP(2694) - TMP_JAMP(2459) ! used 2 times + TMP_JAMP(2882) = TMP_JAMP(2688) + TMP_JAMP(2637) ! used 2 times + TMP_JAMP(2941) = TMP_JAMP(2910) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(2063) ! used 2 times + TMP_JAMP(2940) = TMP_JAMP(2903) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1850) ! used 2 times + TMP_JAMP(2939) = TMP_JAMP(2893) - TMP_JAMP(1653) ! used 2 times + TMP_JAMP(2938) = TMP_JAMP(2891) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1985) ! used 2 times + TMP_JAMP(2937) = TMP_JAMP(2890) - TMP_JAMP(1860) ! used 2 times + TMP_JAMP(2936) = TMP_JAMP(2889) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1998) ! used 2 times + TMP_JAMP(2935) = TMP_JAMP(2878) - TMP_JAMP(2123) ! used 2 times + TMP_JAMP(2934) = TMP_JAMP(2876) + TMP_JAMP(1936) ! used 2 times + TMP_JAMP(2933) = TMP_JAMP(2852) + TMP_JAMP(2140) ! used 2 times + TMP_JAMP(2932) = TMP_JAMP(2716) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1992) ! used 2 times + TMP_JAMP(2931) = TMP_JAMP(2696) - TMP_JAMP(1657) ! used 2 times + TMP_JAMP(2930) = TMP_JAMP(2692) - TMP_JAMP(2033) ! used 2 times + TMP_JAMP(2929) = TMP_JAMP(2682) - TMP_JAMP(1802) ! used 2 times + TMP_JAMP(2928) = TMP_JAMP(2681) - TMP_JAMP(1873) ! used 2 times + TMP_JAMP(2927) = TMP_JAMP(2676) + TMP_JAMP(2103) ! used 2 times + TMP_JAMP(2926) = TMP_JAMP(2675) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(2153) ! used 2 times + TMP_JAMP(2925) = TMP_JAMP(2673) - TMP_JAMP(2131) ! used 2 times + TMP_JAMP(2924) = TMP_JAMP(2672) + TMP_JAMP(2133) ! used 2 times + TMP_JAMP(2923) = TMP_JAMP(2653) + TMP_JAMP(1929) ! used 2 times + TMP_JAMP(2922) = TMP_JAMP(2649) + TMP_JAMP(2096) ! used 2 times + TMP_JAMP(2921) = TMP_JAMP(2644) - TMP_JAMP(2100) ! used 2 times + TMP_JAMP(2920) = TMP_JAMP(2643) - TMP_JAMP(2102) ! used 2 times + TMP_JAMP(2919) = TMP_JAMP(2640) + TMP_JAMP(2058) ! used 2 times + TMP_JAMP(2918) = TMP_JAMP(2639) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1796) ! used 2 times + TMP_JAMP(2917) = TMP_JAMP(2428) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(2112) ! used 2 times + TMP_JAMP(2916) = TMP_JAMP(2380) - TMP_JAMP(2152) ! used 2 times + TMP_JAMP(2915) = TMP_JAMP(2378) + TMP_JAMP(1958) ! used 2 times + TMP_JAMP(2914) = TMP_JAMP(2351) + TMP_JAMP(1665) ! used 2 times + TMP_JAMP(2913) = TMP_JAMP(2310) + TMP_JAMP(2134) ! used 2 times + TMP_JAMP(2912) = TMP_JAMP(2073) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(1483) ! used 2 times + TMP_JAMP(3030) = TMP_JAMP(2935) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1044) ! used 2 times + TMP_JAMP(3029) = TMP_JAMP(2934) - TMP_JAMP(329) ! used 2 times + TMP_JAMP(3028) = TMP_JAMP(2926) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(451) ! used 2 times + TMP_JAMP(3027) = TMP_JAMP(2907) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(586) ! used 2 times + TMP_JAMP(3026) = TMP_JAMP(2898) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1238) ! used 2 times + TMP_JAMP(3025) = TMP_JAMP(2894) - TMP_JAMP(1130) ! used 2 times + TMP_JAMP(3024) = TMP_JAMP(2888) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(587) ! used 2 times + TMP_JAMP(3023) = TMP_JAMP(2886) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1402) ! used 2 times + TMP_JAMP(3022) = TMP_JAMP(2885) - TMP_JAMP(1095) ! used 2 times + TMP_JAMP(3021) = TMP_JAMP(2862) + TMP_JAMP(580) ! used 2 times + TMP_JAMP(3020) = TMP_JAMP(2850) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1364) ! used 2 times + TMP_JAMP(3019) = TMP_JAMP(2849) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1411) ! used 2 times + TMP_JAMP(3018) = TMP_JAMP(2845) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1398) ! used 2 times + TMP_JAMP(3017) = TMP_JAMP(2834) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(582) ! used 2 times + TMP_JAMP(3016) = TMP_JAMP(2826) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1174) ! used 2 times + TMP_JAMP(3015) = TMP_JAMP(2812) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1360) ! used 2 times + TMP_JAMP(3014) = TMP_JAMP(2809) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1409) ! used 2 times + TMP_JAMP(3013) = TMP_JAMP(2807) + TMP_JAMP(1113) ! used 2 times + TMP_JAMP(3012) = TMP_JAMP(2785) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(588) ! used 2 times + TMP_JAMP(3011) = TMP_JAMP(2760) + TMP_JAMP(1329) ! used 2 times + TMP_JAMP(3010) = TMP_JAMP(2751) - TMP_JAMP(1131) ! used 2 times + TMP_JAMP(3009) = TMP_JAMP(2749) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1366) ! used 2 times + TMP_JAMP(3008) = TMP_JAMP(2734) + TMP_JAMP(593) ! used 2 times + TMP_JAMP(3007) = TMP_JAMP(2732) - TMP_JAMP(358) ! used 2 times + TMP_JAMP(3006) = TMP_JAMP(2728) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(590) ! used 2 times + TMP_JAMP(3005) = TMP_JAMP(2726) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(583) ! used 2 times + TMP_JAMP(3004) = TMP_JAMP(2719) + TMP_JAMP(592) ! used 2 times + TMP_JAMP(3003) = TMP_JAMP(2718) + TMP_JAMP(345) ! used 2 times + TMP_JAMP(3002) = TMP_JAMP(2711) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1193) ! used 2 times + TMP_JAMP(3001) = TMP_JAMP(2710) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1145) ! used 2 times + TMP_JAMP(3000) = TMP_JAMP(2708) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1400) ! used 2 times + TMP_JAMP(2999) = TMP_JAMP(2706) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1167) ! used 2 times + TMP_JAMP(2998) = TMP_JAMP(2704) + TMP_JAMP(1088) ! used 2 times + TMP_JAMP(2997) = TMP_JAMP(2702) + TMP_JAMP(1412) ! used 2 times + TMP_JAMP(2996) = TMP_JAMP(2701) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * TMP_JAMP(1390) ! used 2 times - TMP_JAMP(2681) = TMP_JAMP(2156) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1710) ! used 2 times - TMP_JAMP(2680) = TMP_JAMP(2151) + AMP(1702) ! used 2 times - TMP_JAMP(2679) = TMP_JAMP(2152) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2150) ! used 2 times - TMP_JAMP(2678) = TMP_JAMP(2145) - AMP(1703) ! used 2 times - TMP_JAMP(2677) = TMP_JAMP(2142) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2141) ! used 2 times - TMP_JAMP(2676) = TMP_JAMP(2130) + TMP_JAMP(2129) ! used 2 times - TMP_JAMP(2675) = TMP_JAMP(2121) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(333) ! used 2 times - TMP_JAMP(2674) = TMP_JAMP(2123) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2120) ! used 2 times - TMP_JAMP(2673) = TMP_JAMP(2112) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(650) ! used 2 times - TMP_JAMP(2672) = TMP_JAMP(2111) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2109) ! used 2 times - TMP_JAMP(2671) = TMP_JAMP(2107) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2106) ! used 2 times - TMP_JAMP(2670) = TMP_JAMP(2103) - AMP(982) ! used 2 times - TMP_JAMP(2669) = TMP_JAMP(2104) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2100) ! used 2 times - TMP_JAMP(2668) = TMP_JAMP(2091) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2090) ! used 2 times - TMP_JAMP(2667) = TMP_JAMP(2086) + TMP_JAMP(2082) ! used 2 times - TMP_JAMP(2666) = TMP_JAMP(2087) - TMP_JAMP(2085) ! used 2 times - TMP_JAMP(2665) = TMP_JAMP(2081) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1530) ! used 2 times - TMP_JAMP(2664) = TMP_JAMP(2080) + TMP_JAMP(2076) ! used 2 times - TMP_JAMP(2663) = TMP_JAMP(2073) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(240) ! used 2 times - TMP_JAMP(2662) = TMP_JAMP(2068) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(170) ! used 2 times - TMP_JAMP(2661) = TMP_JAMP(2071) + TMP_JAMP(2066) ! used 2 times - TMP_JAMP(2660) = TMP_JAMP(2061) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(249) ! used 2 times - TMP_JAMP(2659) = TMP_JAMP(2063) - TMP_JAMP(2062) ! used 2 times - TMP_JAMP(2658) = TMP_JAMP(2065) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2064) ! used 2 times - TMP_JAMP(2657) = TMP_JAMP(2060) + TMP_JAMP(2059) ! used 2 times - TMP_JAMP(2656) = TMP_JAMP(2056) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(303) ! used 2 times - TMP_JAMP(2655) = TMP_JAMP(2050) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1774) ! used 2 times - TMP_JAMP(2654) = TMP_JAMP(2051) - AMP(298) ! used 2 times - TMP_JAMP(2653) = TMP_JAMP(2054) + TMP_JAMP(2052) ! used 2 times - TMP_JAMP(2652) = TMP_JAMP(2045) - AMP(299) ! used 2 times - TMP_JAMP(2651) = TMP_JAMP(2047) + TMP_JAMP(2046) ! used 2 times - TMP_JAMP(2650) = TMP_JAMP(2042) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2041) ! used 2 times - TMP_JAMP(2649) = TMP_JAMP(2043) - TMP_JAMP(2040) ! used 2 times - TMP_JAMP(2648) = TMP_JAMP(2031) + TMP_JAMP(2030) ! used 2 times - TMP_JAMP(2647) = TMP_JAMP(2029) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2028) ! used 2 times - TMP_JAMP(2646) = TMP_JAMP(2027) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2026) ! used 2 times - TMP_JAMP(2645) = TMP_JAMP(2017) - AMP(532) ! used 2 times - TMP_JAMP(2644) = TMP_JAMP(2013) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(307) ! used 2 times - TMP_JAMP(2643) = TMP_JAMP(1995) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(1994) ! used 2 times - TMP_JAMP(2642) = TMP_JAMP(1993) + TMP_JAMP(1992) ! used 2 times - TMP_JAMP(2768) = TMP_JAMP(2742) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(1890) ! used 2 times - TMP_JAMP(2767) = TMP_JAMP(2737) + TMP_JAMP(2575) ! used 2 times - TMP_JAMP(2766) = TMP_JAMP(2724) + AMP(1573) ! used 2 times - TMP_JAMP(2765) = TMP_JAMP(2720) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2719) ! used 2 times - TMP_JAMP(2764) = TMP_JAMP(2717) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2493) ! used 2 times - TMP_JAMP(2763) = TMP_JAMP(2716) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2714) ! used 2 times - TMP_JAMP(2762) = TMP_JAMP(2713) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2712) ! used 2 times - TMP_JAMP(2761) = TMP_JAMP(2704) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2703) ! used 2 times - TMP_JAMP(2760) = TMP_JAMP(2700) - TMP_JAMP(2284) ! used 2 times - TMP_JAMP(2759) = TMP_JAMP(2697) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(2252) ! used 2 times - TMP_JAMP(2758) = TMP_JAMP(2695) - TMP_JAMP(2241) ! used 2 times - TMP_JAMP(2757) = TMP_JAMP(2691) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1565) ! used 2 times - TMP_JAMP(2756) = TMP_JAMP(2689) + TMP_JAMP(2210) ! used 2 times - TMP_JAMP(2755) = TMP_JAMP(2685) + TMP_JAMP(2178) ! used 2 times - TMP_JAMP(2754) = TMP_JAMP(2676) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(652) ! used 2 times - TMP_JAMP(2753) = TMP_JAMP(2672) + AMP(1005) ! used 2 times - TMP_JAMP(2752) = TMP_JAMP(2668) - TMP_JAMP(2089) ! used 2 times - TMP_JAMP(2751) = TMP_JAMP(2666) + TMP_JAMP(2083) ! used 2 times - TMP_JAMP(2750) = TMP_JAMP(2659) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(848) ! used 2 times - TMP_JAMP(2749) = TMP_JAMP(2657) - TMP_JAMP(2058) ! used 2 times - TMP_JAMP(2748) = TMP_JAMP(2655) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(2653) ! used 2 times - TMP_JAMP(2747) = TMP_JAMP(2649) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(231) ! used 2 times - TMP_JAMP(2746) = TMP_JAMP(2648) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(211) ! used 2 times - TMP_JAMP(2745) = TMP_JAMP(2646) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(173) ! used 2 times - JAMP(1,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(190)+(-1.000000000000000D+00)*AMP(251) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2008) - $ +TMP_JAMP(2642)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2643)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2665) - JAMP(2,1) = (-1.000000000000000D+00)*AMP(242) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(247)+( - $ -1.000000000000000D+00)*TMP_JAMP(841)+(-1.000000000000000D+00) - $ *TMP_JAMP(842)+TMP_JAMP(2000)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2011)+(-1.000000000000000D+00) - $ *TMP_JAMP(2642)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2658)+TMP_JAMP(2667) - JAMP(3,1) = (-1.000000000000000D+00)*AMP(250) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(268) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1504) - $ +(-1.000000000000000D+00)*TMP_JAMP(1997)+(-1.000000000000000D - $ +00)*TMP_JAMP(2088)+((0.000000000000000D+00,1.000000000000000D - $ +00))*TMP_JAMP(2643)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2647) - JAMP(4,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(101)+(-1.000000000000000D+00)*AMP(233)+( - $ -1.000000000000000D+00)*TMP_JAMP(935)+(-1.000000000000000D+00) - $ *TMP_JAMP(1996)+(-1.000000000000000D+00)*TMP_JAMP(1998) - $ +TMP_JAMP(2006)+TMP_JAMP(2075)+(-1.000000000000000D+00) - $ *TMP_JAMP(2093)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2746) - JAMP(5,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(112)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(226)+(-1.000000000000000D+00)*AMP(241)+( - $ -1.000000000000000D+00)*AMP(301)+(-1.000000000000000D+00) - $ *TMP_JAMP(1596)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1681)+(-1.000000000000000D+00)*TMP_JAMP(1999)+( - $ -1.000000000000000D+00)*TMP_JAMP(2001)+TMP_JAMP(2003) - $ +TMP_JAMP(2007)+TMP_JAMP(2012)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2021)+TMP_JAMP(2097) - $ +TMP_JAMP(2532) - JAMP(6,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(223)+(-1.000000000000000D+00)*AMP(232) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(238) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(259) - $ +AMP(1630)+(-1.000000000000000D+00)*TMP_JAMP(757) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1688) - $ +(-1.000000000000000D+00)*TMP_JAMP(2002)+TMP_JAMP(2004) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2005) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2473) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2651) - $ +(-1.000000000000000D+00)*TMP_JAMP(2669) - JAMP(7,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(210)+(-1.000000000000000D+00)*AMP(253) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1353) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2008) - $ +(-1.000000000000000D+00)*TMP_JAMP(2009)+(-1.000000000000000D - $ +00)*TMP_JAMP(2015)+((0.000000000000000D+00,1.000000000000000D - $ +00))*TMP_JAMP(2644)+((0.000000000000000D+00,1.000000000000000D - $ +00))*TMP_JAMP(2686) - JAMP(8,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(187)+(-1.000000000000000D+00)*AMP(244)+TMP_JAMP(773) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(775) - $ +(-1.000000000000000D+00)*TMP_JAMP(1347)+(-1.000000000000000D - $ +00)*TMP_JAMP(1350)+((0.000000000000000D+00,-1.000000000000000D - $ +00))*TMP_JAMP(2010)+(-1.000000000000000D+00)*TMP_JAMP(2012) - $ +TMP_JAMP(2019)+TMP_JAMP(2084)+TMP_JAMP(2199)+TMP_JAMP(2543) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2644) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2656) - JAMP(9,1) = (-1.000000000000000D+00)*AMP(252)+( - $ -1.000000000000000D+00)*AMP(530)+AMP(1699)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1545)+TMP_JAMP(1552)+( - $ -1.000000000000000D+00)*TMP_JAMP(2014)+(-1.000000000000000D+00) - $ *TMP_JAMP(2016)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2038)+(-1.000000000000000D+00)*TMP_JAMP(2645) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2687) - JAMP(10,1) = (-1.000000000000000D+00)*AMP(161)+( - $ -1.000000000000000D+00)*TMP_JAMP(979)+TMP_JAMP(2204) - $ +TMP_JAMP(2645)+(-1.000000000000000D+00)*TMP_JAMP(2745) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2747) - JAMP(11,1) = (-1.000000000000000D+00)*AMP(243) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(606)+( - $ -1.000000000000000D+00)*TMP_JAMP(592)+(-1.000000000000000D+00) - $ *TMP_JAMP(827)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1941)+(-1.000000000000000D+00)*TMP_JAMP(2018) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2020) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2022) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2023) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2025) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2069) - $ +TMP_JAMP(2207)+TMP_JAMP(2660) + TMP_JAMP(2995) = TMP_JAMP(2700) - TMP_JAMP(1092) ! used 2 times + TMP_JAMP(2994) = TMP_JAMP(2668) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(284) ! used 2 times + TMP_JAMP(2993) = TMP_JAMP(2663) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1237) ! used 2 times + TMP_JAMP(2992) = TMP_JAMP(2626) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(591) ! used 2 times + TMP_JAMP(2991) = TMP_JAMP(2571) + TMP_JAMP(572) ! used 2 times + TMP_JAMP(2990) = TMP_JAMP(2568) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(581) ! used 2 times + TMP_JAMP(2989) = TMP_JAMP(2540) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1407) ! used 2 times + TMP_JAMP(2988) = TMP_JAMP(2534) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1341) ! used 2 times + TMP_JAMP(2987) = TMP_JAMP(2484) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1194) ! used 2 times + TMP_JAMP(2986) = TMP_JAMP(2435) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(585) ! used 2 times + TMP_JAMP(2985) = TMP_JAMP(2434) + TMP_JAMP(536) ! used 2 times + TMP_JAMP(2984) = TMP_JAMP(2420) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1403) ! used 2 times + TMP_JAMP(2983) = TMP_JAMP(2151) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(574) ! used 2 times + TMP_JAMP(2982) = TMP_JAMP(2150) - TMP_JAMP(423) ! used 2 times + TMP_JAMP(2981) = TMP_JAMP(2147) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(549) ! used 2 times + TMP_JAMP(2980) = TMP_JAMP(2132) + TMP_JAMP(376) ! used 2 times + TMP_JAMP(2979) = TMP_JAMP(2115) + TMP_JAMP(264) ! used 2 times + TMP_JAMP(2978) = TMP_JAMP(2114) - TMP_JAMP(263) ! used 2 times + TMP_JAMP(2977) = TMP_JAMP(2111) - TMP_JAMP(1197) ! used 2 times + TMP_JAMP(2976) = TMP_JAMP(2110) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1026) ! used 2 times + TMP_JAMP(2975) = TMP_JAMP(2107) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(413) ! used 2 times + TMP_JAMP(2974) = TMP_JAMP(2098) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(492) ! used 2 times + TMP_JAMP(2973) = TMP_JAMP(2080) - TMP_JAMP(1406) ! used 2 times + TMP_JAMP(2972) = TMP_JAMP(2079) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1395) ! used 2 times + TMP_JAMP(2971) = TMP_JAMP(2074) - TMP_JAMP(377) ! used 2 times + TMP_JAMP(2970) = TMP_JAMP(2056) + TMP_JAMP(530) ! used 2 times + TMP_JAMP(2969) = TMP_JAMP(2045) - TMP_JAMP(554) ! used 2 times + TMP_JAMP(2968) = TMP_JAMP(2034) + TMP_JAMP(552) ! used 2 times + TMP_JAMP(2967) = TMP_JAMP(1999) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(420) ! used 2 times + TMP_JAMP(2966) = TMP_JAMP(1997) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(425) ! used 2 times + TMP_JAMP(2965) = TMP_JAMP(1995) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1351) ! used 2 times + TMP_JAMP(2964) = TMP_JAMP(1984) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(415) ! used 2 times + TMP_JAMP(2963) = TMP_JAMP(1957) + TMP_JAMP(545) ! used 2 times + TMP_JAMP(2962) = TMP_JAMP(1940) - TMP_JAMP(374) ! used 2 times + TMP_JAMP(2961) = TMP_JAMP(1934) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(363) ! used 2 times + TMP_JAMP(2960) = TMP_JAMP(1918) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(577) ! used 2 times + TMP_JAMP(2959) = TMP_JAMP(1912) - TMP_JAMP(1416) ! used 2 times + TMP_JAMP(2958) = TMP_JAMP(1897) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1408) ! used 2 times + TMP_JAMP(2957) = TMP_JAMP(1878) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(928) ! used 2 times + TMP_JAMP(2956) = TMP_JAMP(1864) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1026) ! used 2 times + TMP_JAMP(2955) = TMP_JAMP(1854) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(396) ! used 2 times + TMP_JAMP(2954) = TMP_JAMP(1843) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(1417) ! used 2 times + TMP_JAMP(2953) = TMP_JAMP(1810) - TMP_JAMP(356) ! used 2 times + TMP_JAMP(2952) = TMP_JAMP(1803) - TMP_JAMP(1381) ! used 2 times + TMP_JAMP(2951) = TMP_JAMP(1800) - TMP_JAMP(409) ! used 2 times + TMP_JAMP(2950) = TMP_JAMP(1759) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1410) ! used 2 times + TMP_JAMP(2949) = TMP_JAMP(1757) - TMP_JAMP(372) ! used 2 times + TMP_JAMP(2948) = TMP_JAMP(1753) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(378) ! used 2 times + TMP_JAMP(2947) = TMP_JAMP(1714) + TMP_JAMP(401) ! used 2 times + TMP_JAMP(2946) = TMP_JAMP(1682) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(544) ! used 2 times + TMP_JAMP(2945) = TMP_JAMP(1610) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(578) ! used 2 times + TMP_JAMP(2944) = TMP_JAMP(1586) + TMP_JAMP(1389) ! used 2 times + TMP_JAMP(2943) = TMP_JAMP(1584) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(1320) ! used 2 times + TMP_JAMP(2942) = TMP_JAMP(1575) - TMP_JAMP(1414) ! used 2 times + JAMP(1,1) = (-1.000000000000000D+00)*AMP(251) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(289) + $ +TMP_JAMP(360)+TMP_JAMP(485)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(558)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(576)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*AMP(1489)+(-1.000000000000000D+00) + $ *TMP_JAMP(2911)+(-1.000000000000000D+00)*TMP_JAMP(2916)+( + $ -1.000000000000000D+00)*TMP_JAMP(2971)+TMP_JAMP(2994) + JAMP(2,1) = (-1.000000000000000D+00)*AMP(242)+( + $ -1.000000000000000D+00)*TMP_JAMP(359)+TMP_JAMP(388) + $ +TMP_JAMP(483)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(498)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(557)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(576)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1580)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *AMP(1480)+TMP_JAMP(2655)+(-1.000000000000000D+00) + $ *TMP_JAMP(2913)+(-1.000000000000000D+00)*TMP_JAMP(2940) + JAMP(3,1) = (-1.000000000000000D+00)*AMP(250)+( + $ -1.000000000000000D+00)*TMP_JAMP(484)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(495)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(558)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(575)+(-1.000000000000000D+00) + $ *TMP_JAMP(2590)+(-1.000000000000000D+00)*TMP_JAMP(2797)+( + $ -1.000000000000000D+00)*TMP_JAMP(2829)+(-1.000000000000000D+00) + $ *TMP_JAMP(2915)+TMP_JAMP(2953)+TMP_JAMP(2980) + JAMP(4,1) = (-1.000000000000000D+00)*AMP(233)+( + $ -1.000000000000000D+00)*TMP_JAMP(259)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(470)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(497)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(575)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1589)+TMP_JAMP(1693) + $ +TMP_JAMP(2050)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *AMP(1471)+(-1.000000000000000D+00)*TMP_JAMP(2353) + $ +TMP_JAMP(2659)+TMP_JAMP(2905)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2955)+TMP_JAMP(2960) + JAMP(5,1) = (-1.000000000000000D+00)*AMP(241) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(271) + $ +TMP_JAMP(386)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(515)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(557)+(-1.000000000000000D+00)*TMP_JAMP(1526)+( + $ -1.000000000000000D+00)*TMP_JAMP(1825)+TMP_JAMP(2268)+( + $ -1.000000000000000D+00)*TMP_JAMP(2308)+TMP_JAMP(2430)+( + $ -1.000000000000000D+00)*TMP_JAMP(2446)+(-1.000000000000000D+00) + $ *TMP_JAMP(2654)+(-1.000000000000000D+00)*TMP_JAMP(2963) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2978) + $ +(-1.000000000000000D+00)*TMP_JAMP(2985) + JAMP(6,1) = (-1.000000000000000D+00)*AMP(232) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(287) + $ +TMP_JAMP(387)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(523)+(-1.000000000000000D+00)*TMP_JAMP(2440)+( + $ -1.000000000000000D+00)*TMP_JAMP(2743)+(-1.000000000000000D+00) + $ *TMP_JAMP(2914)+(-1.000000000000000D+00)*TMP_JAMP(2960) + $ +TMP_JAMP(2963)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2979)+(-1.000000000000000D+00)*TMP_JAMP(3007) + JAMP(7,1) = (-1.000000000000000D+00)*AMP(253)+( + $ -1.000000000000000D+00)*TMP_JAMP(147)+TMP_JAMP(373) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(573) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1393) + $ +(-1.000000000000000D+00)*TMP_JAMP(1592)+((0.000000000000000D + $ +00,1.000000000000000D+00))*TMP_JAMP(1987)+(-1.000000000000000D + $ +00)*TMP_JAMP(2627)+TMP_JAMP(2636)+TMP_JAMP(2844)+( + $ -1.000000000000000D+00)*TMP_JAMP(2855)+TMP_JAMP(2971)+( + $ -1.000000000000000D+00)*TMP_JAMP(2974) + JAMP(8,1) = (-1.000000000000000D+00)*AMP(244) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(117) + $ +(-1.000000000000000D+00)*TMP_JAMP(388)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(487)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(573)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1405)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1777)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(1850)+TMP_JAMP(2513)+( + $ -1.000000000000000D+00)*TMP_JAMP(2759)+TMP_JAMP(2854) + $ +TMP_JAMP(2924)+(-1.000000000000000D+00)*TMP_JAMP(2949) + JAMP(9,1) = (-1.000000000000000D+00)*AMP(252) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(272) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(283) + $ +(-1.000000000000000D+00)*TMP_JAMP(371)+(-1.000000000000000D+00) + $ *TMP_JAMP(373)+(-1.000000000000000D+00)*TMP_JAMP(379) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(490) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1397) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1752) + $ +(-1.000000000000000D+00)*TMP_JAMP(2055)+TMP_JAMP(2099)+( + $ -1.000000000000000D+00)*TMP_JAMP(2641)+TMP_JAMP(2661)+( + $ -1.000000000000000D+00)*TMP_JAMP(2667)+TMP_JAMP(2674) + $ +TMP_JAMP(2747) + JAMP(10,1) = (-1.000000000000000D+00)*AMP(161) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(274) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(289) + $ +TMP_JAMP(371)+(-1.000000000000000D+00)*TMP_JAMP(546) + $ +TMP_JAMP(551)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1247)+(-1.000000000000000D+00)*TMP_JAMP(1385)+( + $ -1.000000000000000D+00)*TMP_JAMP(1621)+(-1.000000000000000D+00) + $ *TMP_JAMP(1702)+TMP_JAMP(1837)+TMP_JAMP(1974) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2139) + $ +TMP_JAMP(2677)+(-1.000000000000000D+00)*TMP_JAMP(2896) + JAMP(11,1) = AMP(213)+(-1.000000000000000D+00)*AMP(243) + $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(249) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(269) + $ +(-1.000000000000000D+00)*TMP_JAMP(391)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(488)+(-1.000000000000000D+00) + $ *TMP_JAMP(1415)+TMP_JAMP(1486)+(-1.000000000000000D+00) + $ *TMP_JAMP(1642)+TMP_JAMP(1816)+(-1.000000000000000D+00) + $ *TMP_JAMP(2276)+(-1.000000000000000D+00)*TMP_JAMP(2673)+( + $ -1.000000000000000D+00)*TMP_JAMP(2764)+TMP_JAMP(2775)+( + $ -1.000000000000000D+00)*TMP_JAMP(2923)+TMP_JAMP(2949) + $ +TMP_JAMP(2962) JAMP(12,1) = (-1.000000000000000D+00)*AMP(160) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(201) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(496) - $ +TMP_JAMP(663)+TMP_JAMP(824)+TMP_JAMP(828)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1151)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(1220) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1223) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2024) - $ +TMP_JAMP(2571)+TMP_JAMP(2662)+TMP_JAMP(2745) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(497) + $ +TMP_JAMP(546)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1057)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1375)+TMP_JAMP(1959)+(-1.000000000000000D+00) + $ *TMP_JAMP(2057)+TMP_JAMP(2594)+(-1.000000000000000D+00) + $ *TMP_JAMP(2900)+(-1.000000000000000D+00)*TMP_JAMP(2962) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2981) JAMP(13,1) = (-1.000000000000000D+00)*AMP(255) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(775) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2032) - $ +(-1.000000000000000D+00)*TMP_JAMP(2034)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2036) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2039) - $ +(-1.000000000000000D+00)*TMP_JAMP(2126)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2647)+(-1.000000000000000D - $ +00)*TMP_JAMP(2654) - JAMP(14,1) = (-1.000000000000000D+00)*AMP(235)+AMP(311) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(766) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1207) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2032) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2048) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2319) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2554) - $ +TMP_JAMP(2650)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2746) - JAMP(15,1) = (-1.000000000000000D+00)*AMP(254)+AMP(300) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(774) - $ +TMP_JAMP(2033)+(-1.000000000000000D+00)*TMP_JAMP(2035) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2037) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2038) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2044) - $ +(-1.000000000000000D+00)*TMP_JAMP(2652)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2696) - JAMP(16,1) = (-1.000000000000000D+00)*AMP(163)+AMP(176) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(656) - $ +TMP_JAMP(867)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1151)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1222)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2044)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2053)+TMP_JAMP(2332)+TMP_JAMP(2650) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2747) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(277) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(491) + $ +TMP_JAMP(548)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1396)+(-1.000000000000000D+00)*TMP_JAMP(1678)+( + $ -1.000000000000000D+00)*TMP_JAMP(1933)+TMP_JAMP(2636) + $ +TMP_JAMP(2788)+(-1.000000000000000D+00)*TMP_JAMP(2904) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2948) + $ +(-1.000000000000000D+00)*TMP_JAMP(2980) + JAMP(14,1) = (-1.000000000000000D+00)*AMP(235) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(118) + $ +(-1.000000000000000D+00)*TMP_JAMP(154)+TMP_JAMP(382) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(488) + $ +(-1.000000000000000D+00)*TMP_JAMP(548)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1785)+TMP_JAMP(2588) + $ +TMP_JAMP(2790)+(-1.000000000000000D+00)*TMP_JAMP(2833) + $ +TMP_JAMP(2925)+(-1.000000000000000D+00)*TMP_JAMP(2942) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2955) + JAMP(15,1) = (-1.000000000000000D+00)*AMP(254)+( + $ -1.000000000000000D+00)*TMP_JAMP(151)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(265)+TMP_JAMP(380) + $ +TMP_JAMP(1383)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1419)+TMP_JAMP(2055)+TMP_JAMP(2738)+( + $ -1.000000000000000D+00)*TMP_JAMP(2881)+(-1.000000000000000D+00) + $ *TMP_JAMP(2921)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2948)+TMP_JAMP(2994) + JAMP(16,1) = (-1.000000000000000D+00)*AMP(163) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(266) + $ +(-1.000000000000000D+00)*TMP_JAMP(385)+(-1.000000000000000D+00) + $ *TMP_JAMP(400)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(496)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1232)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1419)+(-1.000000000000000D+00)*TMP_JAMP(1712) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2139) + $ +TMP_JAMP(2606)+(-1.000000000000000D+00)*TMP_JAMP(2745) + $ +TMP_JAMP(2927)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(2943) JAMP(17,1) = (-1.000000000000000D+00)*AMP(234) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1945) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2048) - $ +TMP_JAMP(2338)+(-1.000000000000000D+00)*TMP_JAMP(2342) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2651) - $ +(-1.000000000000000D+00)*TMP_JAMP(2652)+TMP_JAMP(2663)+( - $ -1.000000000000000D+00)*TMP_JAMP(2748) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(279) + $ +(-1.000000000000000D+00)*TMP_JAMP(382)+(-1.000000000000000D+00) + $ *TMP_JAMP(383)+(-1.000000000000000D+00)*TMP_JAMP(398) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(489) + $ +TMP_JAMP(667)+(-1.000000000000000D+00)*TMP_JAMP(1520) + $ +TMP_JAMP(1639)+(-1.000000000000000D+00)*TMP_JAMP(1665) + $ +TMP_JAMP(1817)+(-1.000000000000000D+00)*TMP_JAMP(2280) + $ +TMP_JAMP(2551)+(-1.000000000000000D+00)*TMP_JAMP(2672)+( + $ -1.000000000000000D+00)*TMP_JAMP(2810)+TMP_JAMP(2972)+( + $ -1.000000000000000D+00)*TMP_JAMP(3029) JAMP(18,1) = (-1.000000000000000D+00)*AMP(162) $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(174) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(655) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1198) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2049) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2053) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2070) - $ +TMP_JAMP(2077)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2344)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2612)+(-1.000000000000000D+00)*TMP_JAMP(2654) - $ +TMP_JAMP(2748) - JAMP(19,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(226)+(-1.000000000000000D+00)*AMP(246) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(934) - $ +TMP_JAMP(611)+TMP_JAMP(737)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2055)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2656)+(-1.000000000000000D+00) - $ *TMP_JAMP(2682)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2749)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2750) - JAMP(20,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(223)+(-1.000000000000000D+00)*AMP(237)+AMP(866) - $ +TMP_JAMP(717)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1206)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1230)+TMP_JAMP(1232)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1930)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2057)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2074)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2160)+TMP_JAMP(2503) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2749) - JAMP(21,1) = (-1.000000000000000D+00)*AMP(245) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(933)+( - $ -1.000000000000000D+00)*TMP_JAMP(723)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(1219)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1223)+TMP_JAMP(1658) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1938) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2658) - $ +(-1.000000000000000D+00)*TMP_JAMP(2660)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2661)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2750) - JAMP(22,1) = (-1.000000000000000D+00)*AMP(165) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(815) - $ +AMP(830)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1217)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1218)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1932)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2067)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2069)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2070)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2078)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2283)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2661)+(-1.000000000000000D+00)*TMP_JAMP(2662) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(282) + $ +TMP_JAMP(383)+TMP_JAMP(385)+(-1.000000000000000D+00) + $ *TMP_JAMP(402)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(498)+(-1.000000000000000D+00)*TMP_JAMP(1640) + $ +TMP_JAMP(1708)+TMP_JAMP(1962)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(2120)+TMP_JAMP(2515) + $ +TMP_JAMP(2561)+(-1.000000000000000D+00)*TMP_JAMP(2823)+( + $ -1.000000000000000D+00)*TMP_JAMP(2958) + JAMP(19,1) = (-1.000000000000000D+00)*AMP(246) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(276) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(327) + $ +(-1.000000000000000D+00)*TMP_JAMP(389)+TMP_JAMP(404) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1399) + $ +(-1.000000000000000D+00)*TMP_JAMP(1638)+TMP_JAMP(1809) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2005) + $ +(-1.000000000000000D+00)*TMP_JAMP(2439)+TMP_JAMP(2513)+( + $ -1.000000000000000D+00)*TMP_JAMP(2717)+TMP_JAMP(2873)+( + $ -1.000000000000000D+00)*TMP_JAMP(2946)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2978) + JAMP(20,1) = (-1.000000000000000D+00)*AMP(237) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(273) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(328) + $ +TMP_JAMP(395)+(-1.000000000000000D+00)*TMP_JAMP(397) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1401) + $ +(-1.000000000000000D+00)*TMP_JAMP(1508)+(-1.000000000000000D + $ +00)*TMP_JAMP(1660)+((0.000000000000000D+00,1.000000000000000D + $ +00))*TMP_JAMP(1784)+TMP_JAMP(1803)+(-1.000000000000000D+00) + $ *TMP_JAMP(2451)+TMP_JAMP(2588)+TMP_JAMP(2723)+TMP_JAMP(2869) + $ +TMP_JAMP(2946)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(2979) + JAMP(21,1) = (-1.000000000000000D+00)*AMP(245)+( + $ -1.000000000000000D+00)*TMP_JAMP(163)+TMP_JAMP(390) + $ +TMP_JAMP(392)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(493)+(-1.000000000000000D+00)*TMP_JAMP(550) + $ +TMP_JAMP(759)+(-1.000000000000000D+00)*TMP_JAMP(1376) + $ +TMP_JAMP(1636)+TMP_JAMP(1643)+TMP_JAMP(1659)+( + $ -1.000000000000000D+00)*TMP_JAMP(1808)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(2005)+(-1.000000000000000D+00) + $ *TMP_JAMP(2307)+TMP_JAMP(2485)+(-1.000000000000000D+00) + $ *TMP_JAMP(2703)+(-1.000000000000000D+00)*TMP_JAMP(2872) + $ +TMP_JAMP(2923) + JAMP(22,1) = (-1.000000000000000D+00)*AMP(165)+( + $ -1.000000000000000D+00)*TMP_JAMP(164)+TMP_JAMP(394) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(495) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1569) + $ +TMP_JAMP(2495)+(-1.000000000000000D+00)*TMP_JAMP(2769) + $ +TMP_JAMP(2919)+(-1.000000000000000D+00)*TMP_JAMP(2947) + $ +TMP_JAMP(2952)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2981) JAMP(23,1) = (-1.000000000000000D+00)*AMP(236) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1225) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1229) - $ +TMP_JAMP(1233)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1241)+TMP_JAMP(1668)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1940)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2072)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2074)+TMP_JAMP(2075)+( - $ -1.000000000000000D+00)*TMP_JAMP(2663)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2664)+TMP_JAMP(2723) - JAMP(24,1) = (-1.000000000000000D+00)*AMP(164)+TMP_JAMP(737) - $ +TMP_JAMP(739)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(743)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1235)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1237)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1239)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1242)+TMP_JAMP(1245)+TMP_JAMP(1246)+TMP_JAMP(1248)+( - $ -1.000000000000000D+00)*TMP_JAMP(2077)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2078)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2079)+TMP_JAMP(2523)+( - $ -1.000000000000000D+00)*TMP_JAMP(2528)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2664) - JAMP(25,1) = (-1.000000000000000D+00)*AMP(974) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2665) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2671) - $ +TMP_JAMP(2751)+(-1.000000000000000D+00)*TMP_JAMP(2752) - JAMP(26,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(487)+(-1.000000000000000D+00)*AMP(965)+( - $ -1.000000000000000D+00)*AMP(1010)+TMP_JAMP(812)+TMP_JAMP(815) - $ +TMP_JAMP(1256)+TMP_JAMP(1258)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1303)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1305)+(-1.000000000000000D+00) - $ *TMP_JAMP(2084)+(-1.000000000000000D+00)*TMP_JAMP(2099) - $ +TMP_JAMP(2167)+(-1.000000000000000D+00)*TMP_JAMP(2667)+( - $ -1.000000000000000D+00)*TMP_JAMP(2751) - JAMP(27,1) = (-1.000000000000000D+00)*AMP(973)+( - $ -1.000000000000000D+00)*TMP_JAMP(1254)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(1476)+TMP_JAMP(2088)+( - $ -1.000000000000000D+00)*TMP_JAMP(2092)+TMP_JAMP(2125) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2128) - $ +TMP_JAMP(2752) - JAMP(28,1) = AMP(142)+(-1.000000000000000D+00)*AMP(956) - $ +TMP_JAMP(901)+TMP_JAMP(1253)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1320)+TMP_JAMP(1329) - $ +TMP_JAMP(2092)+TMP_JAMP(2093)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2094)+(-1.000000000000000D+00) - $ *TMP_JAMP(2102)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2754) - JAMP(29,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(808)+(-1.000000000000000D+00)*AMP(964)+AMP(1637)+( - $ -1.000000000000000D+00)*AMP(1708)+(-1.000000000000000D+00) - $ *TMP_JAMP(698)+(-1.000000000000000D+00)*TMP_JAMP(1255) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1283) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1307) - $ +TMP_JAMP(1590)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1648)+(-1.000000000000000D+00)*TMP_JAMP(2095)+( - $ -1.000000000000000D+00)*TMP_JAMP(2096)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2098)+TMP_JAMP(2099) - $ +TMP_JAMP(2110)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2670) - JAMP(30,1) = AMP(136)+(-1.000000000000000D+00)*AMP(955)+( - $ -1.000000000000000D+00)*AMP(1762)+TMP_JAMP(702)+TMP_JAMP(704) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(706) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(764) - $ +TMP_JAMP(1260)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1322)+TMP_JAMP(2101)+TMP_JAMP(2105)+TMP_JAMP(2146)+( - $ -1.000000000000000D+00)*TMP_JAMP(2161)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2458)+TMP_JAMP(2669) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2670) - JAMP(31,1) = (-1.000000000000000D+00)*AMP(977) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1142) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1387) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1936) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2671) - $ +(-1.000000000000000D+00)*TMP_JAMP(2673)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2753) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(280) + $ +(-1.000000000000000D+00)*TMP_JAMP(395)+TMP_JAMP(399)+( + $ -1.000000000000000D+00)*TMP_JAMP(405)+(-1.000000000000000D+00) + $ *TMP_JAMP(1808)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2010)+(-1.000000000000000D+00)*TMP_JAMP(2348) + $ +TMP_JAMP(2544)+(-1.000000000000000D+00)*TMP_JAMP(2871) + $ +TMP_JAMP(2974)+(-1.000000000000000D+00)*TMP_JAMP(2984) + $ +TMP_JAMP(3029) + JAMP(24,1) = (-1.000000000000000D+00)*AMP(164) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(128) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(281) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(286) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(330) + $ +TMP_JAMP(403)+TMP_JAMP(406)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1404)+TMP_JAMP(1605)+( + $ -1.000000000000000D+00)*TMP_JAMP(1804)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2120)+TMP_JAMP(2555)+( + $ -1.000000000000000D+00)*TMP_JAMP(2815)+TMP_JAMP(2920) + $ +TMP_JAMP(2947) + JAMP(25,1) = (-1.000000000000000D+00)*TMP_JAMP(360) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(454) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(517) + $ +(-1.000000000000000D+00)*AMP(976)+(-1.000000000000000D+00) + $ *TMP_JAMP(1843)+TMP_JAMP(1859)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2085)+TMP_JAMP(2104)+( + $ -1.000000000000000D+00)*TMP_JAMP(2662)+TMP_JAMP(2851) + $ +TMP_JAMP(2865)+TMP_JAMP(3018) + JAMP(26,1) = TMP_JAMP(359)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(463)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(518)+(-1.000000000000000D+00) + $ *TMP_JAMP(834)+(-1.000000000000000D+00)*TMP_JAMP(1019) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1374) + $ +(-1.000000000000000D+00)*AMP(967)+(-1.000000000000000D+00) + $ *TMP_JAMP(1479)+TMP_JAMP(1842)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(2085)+(-1.000000000000000D+00) + $ *TMP_JAMP(2129)+(-1.000000000000000D+00)*TMP_JAMP(2648) + $ +TMP_JAMP(2758)+TMP_JAMP(2778)+(-1.000000000000000D+00) + $ *TMP_JAMP(2859) + JAMP(27,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(453)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(513)+(-1.000000000000000D+00)*TMP_JAMP(809)+( + $ -1.000000000000000D+00)*TMP_JAMP(1028)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1373)+(-1.000000000000000D+00) + $ *AMP(975)+(-1.000000000000000D+00)*TMP_JAMP(1963)+TMP_JAMP(2060) + $ +(-1.000000000000000D+00)*TMP_JAMP(2104)+TMP_JAMP(2317) + $ +TMP_JAMP(2387)+TMP_JAMP(2567)+(-1.000000000000000D+00) + $ *TMP_JAMP(2604)+TMP_JAMP(2796)+TMP_JAMP(2811)+( + $ -1.000000000000000D+00)*TMP_JAMP(2953) + JAMP(28,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(316)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(470)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(514)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(735)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1392)+(-1.000000000000000D+00)*AMP(958)+TMP_JAMP(1448) + $ +(-1.000000000000000D+00)*TMP_JAMP(1839)+((0.000000000000000D + $ +00,1.000000000000000D+00))*TMP_JAMP(1846)+(-1.000000000000000D + $ +00)*TMP_JAMP(1919)+TMP_JAMP(1963)+(-1.000000000000000D+00) + $ *TMP_JAMP(1967)+(-1.000000000000000D+00)*TMP_JAMP(2657)+( + $ -1.000000000000000D+00)*TMP_JAMP(2789)+TMP_JAMP(2824) + $ +TMP_JAMP(2835) + JAMP(29,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(314)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(462)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(717)+(-1.000000000000000D+00)*AMP(966)+TMP_JAMP(1709) + $ +(-1.000000000000000D+00)*TMP_JAMP(1874)+TMP_JAMP(2061) + $ +TMP_JAMP(2129)+AMP(1642)+TMP_JAMP(2445)+(-1.000000000000000D + $ +00)*TMP_JAMP(2493)+TMP_JAMP(2647)+TMP_JAMP(2985)+TMP_JAMP(2996) + JAMP(30,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(320)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(520)+(-1.000000000000000D+00)*AMP(957)+( + $ -1.000000000000000D+00)*TMP_JAMP(1840)+TMP_JAMP(1874) + $ +TMP_JAMP(1919)+TMP_JAMP(1966)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(2064)+TMP_JAMP(2250)+( + $ -1.000000000000000D+00)*TMP_JAMP(2553)+TMP_JAMP(2656) + $ +TMP_JAMP(3000)+TMP_JAMP(3007) + JAMP(31,1) = TMP_JAMP(804)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1391)+(-1.000000000000000D+00) + $ *AMP(979)+TMP_JAMP(1857)+TMP_JAMP(1894)+TMP_JAMP(2130) + $ +TMP_JAMP(2609)+(-1.000000000000000D+00)*TMP_JAMP(2816) + $ +TMP_JAMP(2825)+(-1.000000000000000D+00)*TMP_JAMP(2863)+( + $ -1.000000000000000D+00)*TMP_JAMP(3018) JAMP(32,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(487)+(-1.000000000000000D+00)*AMP(968)+( - $ -1.000000000000000D+00)*TMP_JAMP(709)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1394)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2108)+(-1.000000000000000D - $ +00)*TMP_JAMP(2110)+TMP_JAMP(2118)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2198)+TMP_JAMP(2223) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2681) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2753) - JAMP(33,1) = (-1.000000000000000D+00)*AMP(690)+( - $ -1.000000000000000D+00)*AMP(975)+(-1.000000000000000D+00) - $ *AMP(1654)+(-1.000000000000000D+00)*TMP_JAMP(2114) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2116) - $ +TMP_JAMP(2133)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2137)+TMP_JAMP(2673)+TMP_JAMP(2692) - JAMP(34,1) = (-1.000000000000000D+00)*AMP(323) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(327)+( - $ -1.000000000000000D+00)*AMP(834)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2113)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2115)+(-1.000000000000000D - $ +00)*TMP_JAMP(2140)+(-1.000000000000000D+00)*TMP_JAMP(2229)+( - $ -1.000000000000000D+00)*TMP_JAMP(2675) - JAMP(35,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(809)+(-1.000000000000000D+00)*AMP(966)+TMP_JAMP(1524) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1938) - $ +(-1.000000000000000D+00)*TMP_JAMP(2117)+TMP_JAMP(2119) - $ +TMP_JAMP(2234)+(-1.000000000000000D+00)*TMP_JAMP(2238)+( - $ -1.000000000000000D+00)*TMP_JAMP(2674)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2683) - JAMP(36,1) = (-1.000000000000000D+00)*AMP(321) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(330) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(342)+( - $ -1.000000000000000D+00)*TMP_JAMP(856)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2122)+(-1.000000000000000D+00) - $ *TMP_JAMP(2173)+TMP_JAMP(2586)+TMP_JAMP(2674)+TMP_JAMP(2675) - JAMP(37,1) = (-1.000000000000000D+00)*AMP(978) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1220)+( - $ -1.000000000000000D+00)*AMP(1704)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(1385)+(-1.000000000000000D+00) - $ *TMP_JAMP(2124)+(-1.000000000000000D+00)*TMP_JAMP(2127) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2128) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2131) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2135) - $ +TMP_JAMP(2372)+TMP_JAMP(2680) - JAMP(38,1) = (-1.000000000000000D+00)*AMP(959)+AMP(1763) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1297) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2131) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2147) - $ +TMP_JAMP(2357)+(-1.000000000000000D+00)*TMP_JAMP(2366) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2516) - $ +TMP_JAMP(2677)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2754) - JAMP(39,1) = (-1.000000000000000D+00)*AMP(976) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1515) - $ +(-1.000000000000000D+00)*TMP_JAMP(2132)+TMP_JAMP(2134) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2136) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2138) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2139) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2144) - $ +(-1.000000000000000D+00)*TMP_JAMP(2355)+TMP_JAMP(2370)+( - $ -1.000000000000000D+00)*TMP_JAMP(2678) - JAMP(40,1) = (-1.000000000000000D+00)*AMP(324)+TMP_JAMP(2140) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2143) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2144) - $ +TMP_JAMP(2149)+(-1.000000000000000D+00)*TMP_JAMP(2378) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2381) - $ +TMP_JAMP(2677) - JAMP(41,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(631)+(-1.000000000000000D+00)*AMP(957)+TMP_JAMP(963) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1940) - $ +TMP_JAMP(2146)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2147)+TMP_JAMP(2387)+TMP_JAMP(2392)+( - $ -1.000000000000000D+00)*TMP_JAMP(2678)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2679)+TMP_JAMP(2755) - JAMP(42,1) = (-1.000000000000000D+00)*AMP(319) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(360)+( - $ -1.000000000000000D+00)*AMP(468)+(-1.000000000000000D+00) - $ *TMP_JAMP(958)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1795)+(-1.000000000000000D+00)*TMP_JAMP(2148) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2153) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2189) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2394) - $ +TMP_JAMP(2630)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2679)+TMP_JAMP(2680) - JAMP(43,1) = AMP(549)+(-1.000000000000000D+00)*AMP(969) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1298) - $ +AMP(1307)+TMP_JAMP(801)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(1333)+TMP_JAMP(1341) - $ +TMP_JAMP(1582)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2154)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2157)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2158)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2163)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2166)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2681)+(-1.000000000000000D+00)*TMP_JAMP(2682) - JAMP(44,1) = AMP(708)+(-1.000000000000000D+00)*AMP(960) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1289) - $ +TMP_JAMP(610)+(-1.000000000000000D+00)*TMP_JAMP(780) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1296) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1325) - $ +TMP_JAMP(1327)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1939)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2158)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2159)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2160)+TMP_JAMP(2161)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2162)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2164)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2179) - JAMP(45,1) = (-1.000000000000000D+00)*AMP(967)+( - $ -1.000000000000000D+00)*TMP_JAMP(579)+(-1.000000000000000D+00) - $ *TMP_JAMP(787)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1308)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1314)+TMP_JAMP(1695)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1941)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2166)+TMP_JAMP(2168) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2177) - $ +(-1.000000000000000D+00)*TMP_JAMP(2544)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2683) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2684) + $ *TMP_JAMP(949)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1147)+TMP_JAMP(1280)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1374)+(-1.000000000000000D+00) + $ *AMP(970)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2067)+(-1.000000000000000D+00)*TMP_JAMP(2130) + $ +TMP_JAMP(2333)+(-1.000000000000000D+00)*TMP_JAMP(2542) + $ +TMP_JAMP(2713)+(-1.000000000000000D+00)*TMP_JAMP(2763) + $ +TMP_JAMP(2854)+TMP_JAMP(2957)+(-1.000000000000000D+00) + $ *TMP_JAMP(3001) + JAMP(33,1) = (-1.000000000000000D+00)*TMP_JAMP(1102)+( + $ -1.000000000000000D+00)*TMP_JAMP(1256)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1391)+(-1.000000000000000D+00) + $ *AMP(977)+(-1.000000000000000D+00)*TMP_JAMP(1688)+( + $ -1.000000000000000D+00)*TMP_JAMP(2556)+TMP_JAMP(2811) + $ +TMP_JAMP(2817)+TMP_JAMP(2882)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2976)+(-1.000000000000000D+00) + $ *TMP_JAMP(3030) + JAMP(34,1) = (-1.000000000000000D+00)*AMP(323)+TMP_JAMP(419) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(511) + $ +TMP_JAMP(1102)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1147)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1281)+TMP_JAMP(1604)+TMP_JAMP(2553)+( + $ -1.000000000000000D+00)*TMP_JAMP(2813)+TMP_JAMP(2920)+( + $ -1.000000000000000D+00)*TMP_JAMP(2951)+TMP_JAMP(2954)+( + $ -1.000000000000000D+00)*TMP_JAMP(2969) + JAMP(35,1) = (-1.000000000000000D+00)*TMP_JAMP(1001)+( + $ -1.000000000000000D+00)*TMP_JAMP(1022)+(-1.000000000000000D+00) + $ *TMP_JAMP(1033)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1152)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1155)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1224)+(-1.000000000000000D+00)*AMP(968)+TMP_JAMP(1582) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2006) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2105) + $ +TMP_JAMP(2514)+TMP_JAMP(2546)+TMP_JAMP(2695)+( + $ -1.000000000000000D+00)*TMP_JAMP(2712)+(-1.000000000000000D+00) + $ *TMP_JAMP(2875)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(2956)+TMP_JAMP(3001) + JAMP(36,1) = (-1.000000000000000D+00)*AMP(321)+TMP_JAMP(431) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(504) + $ +TMP_JAMP(553)+TMP_JAMP(814)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(839)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1152)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(1156)+TMP_JAMP(1271) + $ +TMP_JAMP(1520)+TMP_JAMP(1706)+(-1.000000000000000D+00) + $ *TMP_JAMP(1933)+TMP_JAMP(2059)+TMP_JAMP(2515)+TMP_JAMP(2557)+( + $ -1.000000000000000D+00)*TMP_JAMP(2818)+TMP_JAMP(2969) + JAMP(37,1) = TMP_JAMP(786)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(910)+TMP_JAMP(1277) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1346) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1373) + $ +(-1.000000000000000D+00)*AMP(980)+TMP_JAMP(1883) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2066) + $ +TMP_JAMP(2128)+TMP_JAMP(2609)+(-1.000000000000000D+00) + $ *TMP_JAMP(2846)+(-1.000000000000000D+00)*TMP_JAMP(2899)+( + $ -1.000000000000000D+00)*TMP_JAMP(2904) + JAMP(38,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(933)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(965)+TMP_JAMP(1005)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1143)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(1148)+((0.000000000000000D + $ +00,-1.000000000000000D+00))*TMP_JAMP(1392)+( + $ -1.000000000000000D+00)*AMP(961)+(-1.000000000000000D+00) + $ *TMP_JAMP(2128)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2138)+TMP_JAMP(2296)+(-1.000000000000000D+00) + $ *TMP_JAMP(2483)+(-1.000000000000000D+00)*TMP_JAMP(2535)+( + $ -1.000000000000000D+00)*TMP_JAMP(2576)+(-1.000000000000000D+00) + $ *TMP_JAMP(2707)+TMP_JAMP(2712)+TMP_JAMP(2793) + JAMP(39,1) = (-1.000000000000000D+00)*TMP_JAMP(827)+( + $ -1.000000000000000D+00)*TMP_JAMP(1020)+(-1.000000000000000D+00) + $ *TMP_JAMP(1039)+TMP_JAMP(1100)+(-1.000000000000000D+00) + $ *TMP_JAMP(1255)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1346)+(-1.000000000000000D+00)*AMP(978)+TMP_JAMP(1686) + $ +(-1.000000000000000D+00)*TMP_JAMP(1799)+((0.000000000000000D + $ +00,1.000000000000000D+00))*TMP_JAMP(1988)+(-1.000000000000000D + $ +00)*TMP_JAMP(2497)+TMP_JAMP(2591)+(-1.000000000000000D+00) + $ *TMP_JAMP(2687)+TMP_JAMP(2770)+TMP_JAMP(2847)+TMP_JAMP(3030) + JAMP(40,1) = (-1.000000000000000D+00)*AMP(324) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(240) + $ +TMP_JAMP(418)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(510)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(848)+(-1.000000000000000D+00)*TMP_JAMP(1100) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1143) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1282) + $ +TMP_JAMP(1600)+(-1.000000000000000D+00)*TMP_JAMP(1841) + $ +TMP_JAMP(2493)+(-1.000000000000000D+00)*TMP_JAMP(2767) + $ +TMP_JAMP(2919)+TMP_JAMP(2951)+(-1.000000000000000D+00) + $ *TMP_JAMP(2968) + JAMP(41,1) = (-1.000000000000000D+00)*TMP_JAMP(771)+( + $ -1.000000000000000D+00)*TMP_JAMP(1002)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1144)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1159)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(1211)+(-1.000000000000000D + $ +00)*TMP_JAMP(1270)+((0.000000000000000D+00,-1.000000000000000D + $ +00))*TMP_JAMP(1311)+(-1.000000000000000D+00)*AMP(959) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1784) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1868) + $ +(-1.000000000000000D+00)*TMP_JAMP(1939)+((0.000000000000000D + $ +00,-1.000000000000000D+00))*TMP_JAMP(2086)+TMP_JAMP(2487) + $ +TMP_JAMP(2707)+(-1.000000000000000D+00)*TMP_JAMP(2713)+( + $ -1.000000000000000D+00)*TMP_JAMP(2877)+TMP_JAMP(2884) + JAMP(42,1) = (-1.000000000000000D+00)*AMP(319)+TMP_JAMP(187) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(242) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(303) + $ +(-1.000000000000000D+00)*TMP_JAMP(412)+TMP_JAMP(436) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(839) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1150) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1240) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1311) + $ +TMP_JAMP(1705)+(-1.000000000000000D+00)*TMP_JAMP(1842)+( + $ -1.000000000000000D+00)*TMP_JAMP(1915)+(-1.000000000000000D+00) + $ *TMP_JAMP(1941)+TMP_JAMP(2594)+(-1.000000000000000D+00) + $ *TMP_JAMP(2899)+TMP_JAMP(2968) + JAMP(43,1) = TMP_JAMP(678)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(688)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(949)+TMP_JAMP(1387)+( + $ -1.000000000000000D+00)*AMP(971)+TMP_JAMP(2125)+TMP_JAMP(2127) + $ +(-1.000000000000000D+00)*TMP_JAMP(2481)+TMP_JAMP(2497)+( + $ -1.000000000000000D+00)*TMP_JAMP(2722)+(-1.000000000000000D+00) + $ *TMP_JAMP(2897)+(-1.000000000000000D+00)*TMP_JAMP(2996) + JAMP(44,1) = TMP_JAMP(1384)+(-1.000000000000000D+00)*AMP(962)+( + $ -1.000000000000000D+00)*TMP_JAMP(2126)+(-1.000000000000000D+00) + $ *TMP_JAMP(2127)+(-1.000000000000000D+00)*TMP_JAMP(2535) + $ +TMP_JAMP(2556)+(-1.000000000000000D+00)*TMP_JAMP(2730)+( + $ -1.000000000000000D+00)*TMP_JAMP(3000)+(-1.000000000000000D+00) + $ *TMP_JAMP(3025) + JAMP(45,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(728)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(874)+TMP_JAMP(1382)+(-1.000000000000000D+00) + $ *TMP_JAMP(1387)+(-1.000000000000000D+00)*AMP(969)+TMP_JAMP(1824) + $ +(-1.000000000000000D+00)*TMP_JAMP(2088)+((0.000000000000000D + $ +00,1.000000000000000D+00))*TMP_JAMP(2105)+(-1.000000000000000D + $ +00)*TMP_JAMP(2327)+(-1.000000000000000D+00)*TMP_JAMP(2608) + $ +TMP_JAMP(2653)+TMP_JAMP(2778)+(-1.000000000000000D+00) + $ *TMP_JAMP(2883)+TMP_JAMP(3010) JAMP(46,1) = (-1.000000000000000D+00)*AMP(322) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(380) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1312) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2171) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2172) - $ +(-1.000000000000000D+00)*TMP_JAMP(2174)+TMP_JAMP(2185) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2188) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2297) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2684) - JAMP(47,1) = (-1.000000000000000D+00)*AMP(958) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1318) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1324) - $ +TMP_JAMP(1328)+TMP_JAMP(1705)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1945)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2176)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2179)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2180)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2186)+TMP_JAMP(2732)+( - $ -1.000000000000000D+00)*TMP_JAMP(2755) - JAMP(48,1) = (-1.000000000000000D+00)*AMP(320)+( - $ -1.000000000000000D+00)*AMP(466)+(-1.000000000000000D+00) - $ *AMP(467)+TMP_JAMP(801)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(1332)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1334)+TMP_JAMP(1342) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1710) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2183) - $ +(-1.000000000000000D+00)*TMP_JAMP(2184)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2187)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2189) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2190) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2479) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2565) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(133) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(292) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(331) + $ +TMP_JAMP(429)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(480)+(-1.000000000000000D+00)*TMP_JAMP(553) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(686) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(848) + $ +(-1.000000000000000D+00)*TMP_JAMP(1382)+(-1.000000000000000D + $ +00)*TMP_JAMP(2031)+(-1.000000000000000D+00)*TMP_JAMP(2060) + $ +TMP_JAMP(2927)+(-1.000000000000000D+00)*TMP_JAMP(3025) + JAMP(47,1) = TMP_JAMP(1129)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1158)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(1303)+(-1.000000000000000D + $ +00)*TMP_JAMP(1384)+(-1.000000000000000D+00)*AMP(960) + $ +TMP_JAMP(1563)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(2086)+(-1.000000000000000D+00)*TMP_JAMP(2089)+( + $ -1.000000000000000D+00)*TMP_JAMP(2364)+TMP_JAMP(2466)+( + $ -1.000000000000000D+00)*TMP_JAMP(2558)+TMP_JAMP(2658) + $ +TMP_JAMP(2824)+(-1.000000000000000D+00)*TMP_JAMP(2931) + $ +TMP_JAMP(3010) + JAMP(48,1) = (-1.000000000000000D+00)*AMP(320) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(331) + $ +TMP_JAMP(411)+TMP_JAMP(430)+(-1.000000000000000D+00) + $ *TMP_JAMP(441)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(503)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1065)+(-1.000000000000000D+00)*TMP_JAMP(1129) + $ +TMP_JAMP(1133)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1244)+TMP_JAMP(1625)+(-1.000000000000000D+00) + $ *TMP_JAMP(1705)+TMP_JAMP(1818)+(-1.000000000000000D+00) + $ *TMP_JAMP(1900)+TMP_JAMP(1972)+TMP_JAMP(2677)+( + $ -1.000000000000000D+00)*TMP_JAMP(2897)+TMP_JAMP(2954) JAMP(49,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(58)+(-1.000000000000000D+00)*AMP(530)+(-1.000000000000000D - $ +00)*AMP(1403)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1352)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2194)+TMP_JAMP(2197)+TMP_JAMP(2201)+TMP_JAMP(2216)+( - $ -1.000000000000000D+00)*TMP_JAMP(2218)+TMP_JAMP(2254) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2686) - JAMP(50,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(55)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(1271)+(-1.000000000000000D+00)*AMP(1397) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1409) - $ +AMP(1618)+(-1.000000000000000D+00)*AMP(1883)+TMP_JAMP(750) - $ +TMP_JAMP(1346)+(-1.000000000000000D+00)*TMP_JAMP(1359) - $ +TMP_JAMP(1361)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1437)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1441)+TMP_JAMP(1442)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2193)+(-1.000000000000000D+00) - $ *TMP_JAMP(2195)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2196)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2198)+(-1.000000000000000D+00)*TMP_JAMP(2199) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2531) + $ *TMP_JAMP(1393)+(-1.000000000000000D+00)*AMP(1405) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1746) + $ +TMP_JAMP(1892)+(-1.000000000000000D+00)*TMP_JAMP(1939) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2136) + $ +TMP_JAMP(2579)+TMP_JAMP(2630)+(-1.000000000000000D+00) + $ *TMP_JAMP(2836)+TMP_JAMP(2837)+TMP_JAMP(2860)+TMP_JAMP(2990) + JAMP(50,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1405)+(-1.000000000000000D+00)*AMP(1399)+( + $ -1.000000000000000D+00)*TMP_JAMP(1892)+TMP_JAMP(1938) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1977) + $ +TMP_JAMP(2026)+(-1.000000000000000D+00)*TMP_JAMP(2620) + $ +TMP_JAMP(2731)+TMP_JAMP(2783)+TMP_JAMP(2938)+TMP_JAMP(2986) JAMP(51,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(749)+(-1.000000000000000D+00)*AMP(1402)+AMP(1700) - $ +TMP_JAMP(1354)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1484)+(-1.000000000000000D+00)*TMP_JAMP(1488)+( - $ -1.000000000000000D+00)*TMP_JAMP(2201)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2687)+TMP_JAMP(2688) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2757) + $ *TMP_JAMP(1394)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1397)+(-1.000000000000000D+00)*AMP(1404) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1737) + $ +TMP_JAMP(1891)+TMP_JAMP(1937)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(2136)+TMP_JAMP(2575) + $ +TMP_JAMP(2827)+(-1.000000000000000D+00)*TMP_JAMP(2892)+( + $ -1.000000000000000D+00)*TMP_JAMP(2895) JAMP(52,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(497)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(665)+(-1.000000000000000D+00)*AMP(1018)+( - $ -1.000000000000000D+00)*TMP_JAMP(2203)+(-1.000000000000000D+00) - $ *TMP_JAMP(2248)+(-1.000000000000000D+00)*TMP_JAMP(2688)+( - $ -1.000000000000000D+00)*TMP_JAMP(2756) - JAMP(53,1) = (-1.000000000000000D+00)*AMP(514) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(908) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1271)+( - $ -1.000000000000000D+00)*AMP(1396)+TMP_JAMP(710)+TMP_JAMP(1358) - $ +(-1.000000000000000D+00)*TMP_JAMP(1558)+(-1.000000000000000D - $ +00)*TMP_JAMP(2206)+(-1.000000000000000D+00)*TMP_JAMP(2207) - $ +TMP_JAMP(2208)+(-1.000000000000000D+00)*TMP_JAMP(2277) - $ +TMP_JAMP(2572)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2690)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2694) + $ *TMP_JAMP(1176)+TMP_JAMP(1385)+(-1.000000000000000D+00) + $ *AMP(1020)+(-1.000000000000000D+00)*TMP_JAMP(1619)+( + $ -1.000000000000000D+00)*TMP_JAMP(1891)+TMP_JAMP(2145)+( + $ -1.000000000000000D+00)*TMP_JAMP(2531)+(-1.000000000000000D+00) + $ *TMP_JAMP(2853)+TMP_JAMP(2938)+TMP_JAMP(2988)+TMP_JAMP(3009) + JAMP(53,1) = TMP_JAMP(1415)+(-1.000000000000000D+00)*AMP(1398) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1744) + $ +(-1.000000000000000D+00)*TMP_JAMP(1811)+TMP_JAMP(1890) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1977) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1978) + $ +(-1.000000000000000D+00)*TMP_JAMP(1994)+TMP_JAMP(2729)+( + $ -1.000000000000000D+00)*TMP_JAMP(2774)+(-1.000000000000000D+00) + $ *TMP_JAMP(2892)+TMP_JAMP(2997) JAMP(54,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(496)+(-1.000000000000000D+00)*AMP(1017)+AMP(1712)+( - $ -1.000000000000000D+00)*AMP(1884)+(-1.000000000000000D+00) - $ *TMP_JAMP(716)+(-1.000000000000000D+00)*TMP_JAMP(1363) - $ +TMP_JAMP(1366)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1660)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2209)+(-1.000000000000000D+00)*TMP_JAMP(2281) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2690) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2721) - $ +TMP_JAMP(2756) - JAMP(55,1) = (-1.000000000000000D+00)*AMP(690)+( - $ -1.000000000000000D+00)*AMP(1406)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*AMP(1412)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1372)+TMP_JAMP(1375) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1386) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2214) - $ +(-1.000000000000000D+00)*TMP_JAMP(2215)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2217)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2219)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2221)+TMP_JAMP(2224)+( - $ -1.000000000000000D+00)*TMP_JAMP(2246) - JAMP(56,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(55)+(-1.000000000000000D+00)*AMP(1400)+AMP(1620)+( - $ -1.000000000000000D+00)*TMP_JAMP(697)+(-1.000000000000000D+00) - $ *TMP_JAMP(843)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(847)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1126)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1376)+(-1.000000000000000D+00)*TMP_JAMP(1378)+( - $ -1.000000000000000D+00)*TMP_JAMP(1380)+TMP_JAMP(1382) - $ +TMP_JAMP(1383)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1392)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1427)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1438)+TMP_JAMP(2220)+(-1.000000000000000D+00) - $ *TMP_JAMP(2222)+(-1.000000000000000D+00)*TMP_JAMP(2223) - $ +TMP_JAMP(2233)+(-1.000000000000000D+00)*TMP_JAMP(2237) + $ *TMP_JAMP(721)+(-1.000000000000000D+00)*TMP_JAMP(1263) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1295) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1375) + $ +(-1.000000000000000D+00)*AMP(1019)+(-1.000000000000000D+00) + $ *TMP_JAMP(1655)+(-1.000000000000000D+00)*TMP_JAMP(1890) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1986) + $ +(-1.000000000000000D+00)*TMP_JAMP(2145)+TMP_JAMP(2492) + $ +TMP_JAMP(2585)+TMP_JAMP(2675)+(-1.000000000000000D+00) + $ *TMP_JAMP(2714)+(-1.000000000000000D+00)*TMP_JAMP(2836) + $ +TMP_JAMP(2999) + JAMP(55,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1063)+TMP_JAMP(1141)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1177)+(-1.000000000000000D+00) + $ *AMP(1408)+(-1.000000000000000D+00)*TMP_JAMP(1894)+( + $ -1.000000000000000D+00)*TMP_JAMP(2075)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2108)+(-1.000000000000000D+00) + $ *TMP_JAMP(2578)+TMP_JAMP(2821)+(-1.000000000000000D+00) + $ *TMP_JAMP(2911)+(-1.000000000000000D+00)*TMP_JAMP(2990)+( + $ -1.000000000000000D+00)*TMP_JAMP(3013) + JAMP(56,1) = TMP_JAMP(647)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1168)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1205)+(-1.000000000000000D+00) + $ *AMP(1402)+TMP_JAMP(2047)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(2108)+(-1.000000000000000D+00) + $ *TMP_JAMP(2452)+TMP_JAMP(2814)+(-1.000000000000000D+00) + $ *TMP_JAMP(2940)+(-1.000000000000000D+00)*TMP_JAMP(2957)+( + $ -1.000000000000000D+00)*TMP_JAMP(2986)+(-1.000000000000000D+00) + $ *TMP_JAMP(2998) JAMP(57,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(1194)+(-1.000000000000000D+00)*AMP(1404)+AMP(1835) - $ +TMP_JAMP(1388)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1517)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1521)+(-1.000000000000000D+00)*TMP_JAMP(2224)+( - $ -1.000000000000000D+00)*TMP_JAMP(2692)+TMP_JAMP(2693) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2757) - JAMP(58,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(344)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(366)+(-1.000000000000000D+00)*AMP(395)+TMP_JAMP(2228) - $ +TMP_JAMP(2259)+(-1.000000000000000D+00)*TMP_JAMP(2693) - $ +TMP_JAMP(2758) - JAMP(59,1) = (-1.000000000000000D+00)*AMP(675) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1131) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1272)+( - $ -1.000000000000000D+00)*AMP(1398)+TMP_JAMP(771)+TMP_JAMP(1395) - $ +(-1.000000000000000D+00)*TMP_JAMP(1583)+(-1.000000000000000D - $ +00)*TMP_JAMP(2232)+(-1.000000000000000D+00)*TMP_JAMP(2234) - $ +TMP_JAMP(2237)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2239)+(-1.000000000000000D+00)*TMP_JAMP(2289) - $ +TMP_JAMP(2588)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2694) - JAMP(60,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(342)+(-1.000000000000000D+00)*AMP(393)+AMP(458)+( - $ -1.000000000000000D+00)*AMP(1803)+(-1.000000000000000D+00) - $ *TMP_JAMP(779)+(-1.000000000000000D+00)*TMP_JAMP(1398)+( - $ -1.000000000000000D+00)*TMP_JAMP(1402)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1697)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2239)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2240)+( - $ -1.000000000000000D+00)*TMP_JAMP(2295)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2730)+(-1.000000000000000D+00) - $ *TMP_JAMP(2758) - JAMP(61,1) = (-1.000000000000000D+00)*AMP(1407) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1481) - $ +AMP(1612)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2245)+(-1.000000000000000D+00)*TMP_JAMP(2246) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2250) - $ +(-1.000000000000000D+00)*TMP_JAMP(2273)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2696)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2759) - JAMP(62,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(665)+(-1.000000000000000D+00)*AMP(1020)+( - $ -1.000000000000000D+00)*TMP_JAMP(2247)+TMP_JAMP(2249) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2250) - $ +TMP_JAMP(2264)+TMP_JAMP(2698)+TMP_JAMP(2711) - JAMP(63,1) = (-1.000000000000000D+00)*AMP(1405) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1478) - $ +AMP(1613)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1413)+TMP_JAMP(2251)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2253)+TMP_JAMP(2254) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2262) - $ +TMP_JAMP(2266)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2759) - JAMP(64,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(366)+(-1.000000000000000D+00)*AMP(396) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(426) - $ +TMP_JAMP(2258)+TMP_JAMP(2260)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2262)+TMP_JAMP(2271) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2428) - $ +TMP_JAMP(2698) - JAMP(65,1) = (-1.000000000000000D+00)*AMP(1015)+( - $ -1.000000000000000D+00)*TMP_JAMP(2263)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2265)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2267)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2268)+TMP_JAMP(2304) - $ +TMP_JAMP(2434)+TMP_JAMP(2441)+(-1.000000000000000D+00) - $ *TMP_JAMP(2699)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2701) + $ *TMP_JAMP(1172)+TMP_JAMP(1257)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1301)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1340)+(-1.000000000000000D+00) + $ *AMP(1406)+TMP_JAMP(1677)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2142)+(-1.000000000000000D+00) + $ *TMP_JAMP(2820)+TMP_JAMP(2832)+(-1.000000000000000D+00) + $ *TMP_JAMP(2909)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2976)+TMP_JAMP(3013) + JAMP(58,1) = (-1.000000000000000D+00)*AMP(395)+( + $ -1.000000000000000D+00)*TMP_JAMP(172)+(-1.000000000000000D+00) + $ *TMP_JAMP(419)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(506)+(-1.000000000000000D+00)*TMP_JAMP(994) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1168) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1340) + $ +(-1.000000000000000D+00)*TMP_JAMP(2023)+TMP_JAMP(2543)+( + $ -1.000000000000000D+00)*TMP_JAMP(2642)+(-1.000000000000000D+00) + $ *TMP_JAMP(2806)+(-1.000000000000000D+00)*TMP_JAMP(2838) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2964) + $ +TMP_JAMP(2983) + JAMP(59,1) = (-1.000000000000000D+00)*TMP_JAMP(800) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(893) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1169) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1209) + $ +TMP_JAMP(1377)+(-1.000000000000000D+00)*AMP(1400) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1776) + $ +(-1.000000000000000D+00)*TMP_JAMP(2149)+TMP_JAMP(2729)+( + $ -1.000000000000000D+00)*TMP_JAMP(2819)+(-1.000000000000000D+00) + $ *TMP_JAMP(2937)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2956)+TMP_JAMP(2998) + JAMP(60,1) = (-1.000000000000000D+00)*AMP(393) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(132) + $ +TMP_JAMP(414)+(-1.000000000000000D+00)*TMP_JAMP(431) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(499) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1302) + $ +(-1.000000000000000D+00)*TMP_JAMP(1377)+TMP_JAMP(1574) + $ +TMP_JAMP(1639)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1979)+TMP_JAMP(2548)+(-1.000000000000000D+00) + $ *TMP_JAMP(2584)+(-1.000000000000000D+00)*TMP_JAMP(2808)+( + $ -1.000000000000000D+00)*TMP_JAMP(2879)+(-1.000000000000000D+00) + $ *TMP_JAMP(2983) + JAMP(61,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1394)+(-1.000000000000000D+00)*AMP(1409) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2106) + $ +(-1.000000000000000D+00)*TMP_JAMP(2319)+(-1.000000000000000D + $ +00)*TMP_JAMP(2805)+(-1.000000000000000D+00)*TMP_JAMP(2881) + $ +TMP_JAMP(2887)+TMP_JAMP(2912)+(-1.000000000000000D+00) + $ *TMP_JAMP(3017) + JAMP(62,1) = TMP_JAMP(773)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1231)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(1288)+((0.000000000000000D + $ +00,1.000000000000000D+00))*TMP_JAMP(1342)+(-1.000000000000000D + $ +00)*AMP(1022)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(2106)+(-1.000000000000000D+00)*TMP_JAMP(2146)+( + $ -1.000000000000000D+00)*TMP_JAMP(2271)+TMP_JAMP(2363) + $ +TMP_JAMP(2437)+TMP_JAMP(2562)+(-1.000000000000000D+00) + $ *TMP_JAMP(2745)+(-1.000000000000000D+00)*TMP_JAMP(2988)+( + $ -1.000000000000000D+00)*TMP_JAMP(3022) + JAMP(63,1) = (-1.000000000000000D+00)*TMP_JAMP(1380)+( + $ -1.000000000000000D+00)*AMP(1407)+TMP_JAMP(1952) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2142) + $ +(-1.000000000000000D+00)*TMP_JAMP(2341)+TMP_JAMP(2452)+( + $ -1.000000000000000D+00)*TMP_JAMP(2687)+(-1.000000000000000D+00) + $ *TMP_JAMP(2724)+TMP_JAMP(2839)+TMP_JAMP(2929)+TMP_JAMP(3017) + JAMP(64,1) = (-1.000000000000000D+00)*AMP(396) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(300) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(311) + $ +(-1.000000000000000D+00)*TMP_JAMP(421)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(501)+TMP_JAMP(1380)+( + $ -1.000000000000000D+00)*AMP(947)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1544)+TMP_JAMP(1683) + $ +TMP_JAMP(1801)+(-1.000000000000000D+00)*TMP_JAMP(2450) + $ +TMP_JAMP(2586)+TMP_JAMP(2720)+TMP_JAMP(2869) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2964) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2975) + JAMP(65,1) = TMP_JAMP(579)+(-1.000000000000000D+00) + $ *TMP_JAMP(1008)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1049)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1218)+(-1.000000000000000D+00)*AMP(1017) + $ +TMP_JAMP(1611)+TMP_JAMP(1862)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1901)+TMP_JAMP(2273)+( + $ -1.000000000000000D+00)*TMP_JAMP(2441)+TMP_JAMP(3022) + $ +TMP_JAMP(3028) JAMP(66,1) = (-1.000000000000000D+00)*AMP(391) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(397) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(418)+( - $ -1.000000000000000D+00)*AMP(474)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1824)+(-1.000000000000000D+00) - $ *TMP_JAMP(2270)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2272)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2274)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2275)+TMP_JAMP(2312)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2314)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2446)+TMP_JAMP(2699) - JAMP(67,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(908)+(-1.000000000000000D+00)*AMP(1401)+( - $ -1.000000000000000D+00)*TMP_JAMP(699)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(701)+(-1.000000000000000D+00) - $ *TMP_JAMP(889)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1427)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1466)+TMP_JAMP(1473)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1719)+TMP_JAMP(2276)+( - $ -1.000000000000000D+00)*TMP_JAMP(2278)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2279)+(-1.000000000000000D+00) - $ *TMP_JAMP(2292)+TMP_JAMP(2734)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2760) - JAMP(68,1) = (-1.000000000000000D+00)*AMP(1019) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1056) - $ +TMP_JAMP(855)+TMP_JAMP(1664)+TMP_JAMP(2280) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2282) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2286) - $ +TMP_JAMP(2291)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2301)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2760) - JAMP(69,1) = (-1.000000000000000D+00)*AMP(705)+( - $ -1.000000000000000D+00)*AMP(1399)+(-1.000000000000000D+00) - $ *AMP(1861)+(-1.000000000000000D+00)*TMP_JAMP(543)+TMP_JAMP(751) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(754) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1439) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1440) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1447) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1734) - $ +TMP_JAMP(2288)+TMP_JAMP(2290)+(-1.000000000000000D+00) - $ *TMP_JAMP(2291)+TMP_JAMP(2292)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2293)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2299)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2302)+(-1.000000000000000D+00) - $ *TMP_JAMP(2581) - JAMP(70,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(384)+(-1.000000000000000D+00)*AMP(394) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(435) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1445) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1448) - $ +TMP_JAMP(1700)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2294)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2296)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2298)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2299)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2306)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2702) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(130) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(302) + $ +(-1.000000000000000D+00)*TMP_JAMP(417)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(479)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(502)+(-1.000000000000000D+00) + $ *TMP_JAMP(579)+(-1.000000000000000D+00)*TMP_JAMP(1418) + $ +TMP_JAMP(1707)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1747)+(-1.000000000000000D+00)*TMP_JAMP(1920)+( + $ -1.000000000000000D+00)*TMP_JAMP(2584)+TMP_JAMP(2887)+( + $ -1.000000000000000D+00)*TMP_JAMP(2914)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(2975) + JAMP(67,1) = (-1.000000000000000D+00)*AMP(1403)+( + $ -1.000000000000000D+00)*TMP_JAMP(1626)+(-1.000000000000000D+00) + $ *TMP_JAMP(2144)+(-1.000000000000000D+00)*TMP_JAMP(2452)+( + $ -1.000000000000000D+00)*TMP_JAMP(2678)+TMP_JAMP(2768) + $ +TMP_JAMP(2906)+(-1.000000000000000D+00)*TMP_JAMP(2997)+( + $ -1.000000000000000D+00)*TMP_JAMP(3004) + JAMP(68,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1055)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1058)+TMP_JAMP(1275)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1342)+(-1.000000000000000D+00) + $ *AMP(1021)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2116)+TMP_JAMP(2144)+TMP_JAMP(2297)+( + $ -1.000000000000000D+00)*TMP_JAMP(2341)+TMP_JAMP(2426)+( + $ -1.000000000000000D+00)*TMP_JAMP(2486)+TMP_JAMP(2794)+( + $ -1.000000000000000D+00)*TMP_JAMP(2999)+TMP_JAMP(3016) + JAMP(69,1) = (-1.000000000000000D+00)*TMP_JAMP(1413)+( + $ -1.000000000000000D+00)*AMP(1401)+TMP_JAMP(2042)+TMP_JAMP(2149) + $ +TMP_JAMP(2578)+TMP_JAMP(2679)+TMP_JAMP(2731)+( + $ -1.000000000000000D+00)*TMP_JAMP(2800)+(-1.000000000000000D+00) + $ *TMP_JAMP(2883)+TMP_JAMP(3004) + JAMP(70,1) = AMP(370)+(-1.000000000000000D+00)*AMP(394) + $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(435)+( + $ -1.000000000000000D+00)*TMP_JAMP(170)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(290)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(298)+(-1.000000000000000D+00) + $ *TMP_JAMP(414)+(-1.000000000000000D+00)*TMP_JAMP(434) + $ +TMP_JAMP(1413)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1738)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1743)+TMP_JAMP(2522)+(-1.000000000000000D+00) + $ *TMP_JAMP(2575)+TMP_JAMP(2586)+TMP_JAMP(2791)+TMP_JAMP(2925)+( + $ -1.000000000000000D+00)*TMP_JAMP(2961) JAMP(71,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(664)+(-1.000000000000000D+00)*AMP(1016) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1051) - $ +TMP_JAMP(870)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1455)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2300)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2302)+TMP_JAMP(2303)+TMP_JAMP(2305) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2306) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2316) - $ +TMP_JAMP(2595)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2701) + $ *TMP_JAMP(1176)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1296)+(-1.000000000000000D+00)*AMP(1018) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2091) + $ +TMP_JAMP(2343)+(-1.000000000000000D+00)*TMP_JAMP(2800)+( + $ -1.000000000000000D+00)*TMP_JAMP(2945)+(-1.000000000000000D+00) + $ *TMP_JAMP(3016)+(-1.000000000000000D+00)*TMP_JAMP(3028) JAMP(72,1) = (-1.000000000000000D+00)*AMP(392) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(427) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1465) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1757) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2309) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2310) - $ +(-1.000000000000000D+00)*TMP_JAMP(2311)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2313)+TMP_JAMP(2315) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2316) - $ +TMP_JAMP(2444)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2702) - JAMP(73,1) = (-1.000000000000000D+00)*AMP(1424)+( - $ -1.000000000000000D+00)*AMP(1512)+TMP_JAMP(682) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(696) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1476) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1562) - $ +TMP_JAMP(1567)+TMP_JAMP(2317)+TMP_JAMP(2318)+TMP_JAMP(2321)+( - $ -1.000000000000000D+00)*TMP_JAMP(2323)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2352)+(-1.000000000000000D+00) - $ *TMP_JAMP(2761) - JAMP(74,1) = (-1.000000000000000D+00)*AMP(1418) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1430) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1125) - $ +TMP_JAMP(1496)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2319)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2320)+(-1.000000000000000D+00)*TMP_JAMP(2321) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2322) - $ +TMP_JAMP(2340)+TMP_JAMP(2358)+TMP_JAMP(2361)+TMP_JAMP(2364) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2450) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2471) - JAMP(75,1) = (-1.000000000000000D+00)*AMP(1423) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1482) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1958) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2324) - $ +(-1.000000000000000D+00)*TMP_JAMP(2328)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2403) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2705) - $ +TMP_JAMP(2707)+TMP_JAMP(2761) - JAMP(76,1) = (-1.000000000000000D+00)*AMP(1027)+AMP(1717)+( - $ -1.000000000000000D+00)*TMP_JAMP(825)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(1131)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1625)+(-1.000000000000000D - $ +00)*TMP_JAMP(2331)+((0.000000000000000D+00,-1.000000000000000D - $ +00))*TMP_JAMP(2333)+((0.000000000000000D+00,1.000000000000000D - $ +00))*TMP_JAMP(2334)+(-1.000000000000000D+00)*TMP_JAMP(2335) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2347) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2396) - $ +TMP_JAMP(2408)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2705) - JAMP(77,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(920)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(1283)+(-1.000000000000000D+00)*AMP(1417)+TMP_JAMP(724) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1577) - $ +(-1.000000000000000D+00)*TMP_JAMP(1583)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2336)+TMP_JAMP(2337)+( - $ -1.000000000000000D+00)*TMP_JAMP(2339)+TMP_JAMP(2341) - $ +TMP_JAMP(2342)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2386)+(-1.000000000000000D+00)*TMP_JAMP(2455) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2706) - JAMP(78,1) = (-1.000000000000000D+00)*AMP(1026)+( - $ -1.000000000000000D+00)*TMP_JAMP(738)+TMP_JAMP(1524) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1530) - $ +(-1.000000000000000D+00)*TMP_JAMP(1629)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2343) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2346) - $ +TMP_JAMP(2348)+(-1.000000000000000D+00)*TMP_JAMP(2349) - $ +TMP_JAMP(2350)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2433)+(-1.000000000000000D+00)*TMP_JAMP(2462)+( - $ -1.000000000000000D+00)*TMP_JAMP(2522)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2706) - JAMP(79,1) = (-1.000000000000000D+00)*AMP(1427)+( - $ -1.000000000000000D+00)*TMP_JAMP(667)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(675)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1502)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1503)+(-1.000000000000000D - $ +00)*TMP_JAMP(1966)+TMP_JAMP(2351)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2352)+(-1.000000000000000D+00) - $ *TMP_JAMP(2354)+(-1.000000000000000D+00)*TMP_JAMP(2355) - $ +TMP_JAMP(2362)+(-1.000000000000000D+00)*TMP_JAMP(2368)+( - $ -1.000000000000000D+00)*TMP_JAMP(2707)+(-1.000000000000000D+00) - $ *TMP_JAMP(2708) - JAMP(80,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(73)+(-1.000000000000000D+00)*AMP(1421) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1433) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1124) - $ +TMP_JAMP(1532)+(-1.000000000000000D+00)*TMP_JAMP(2356) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2359) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2360) - $ +(-1.000000000000000D+00)*TMP_JAMP(2362)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2363)+TMP_JAMP(2365) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2367) - $ +(-1.000000000000000D+00)*TMP_JAMP(2389)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2436)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2457) - JAMP(81,1) = (-1.000000000000000D+00)*AMP(1425) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1513) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1514) - $ +(-1.000000000000000D+00)*TMP_JAMP(2369)+TMP_JAMP(2372)+( - $ -1.000000000000000D+00)*TMP_JAMP(2374)+TMP_JAMP(2405)+( - $ -1.000000000000000D+00)*TMP_JAMP(2417)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2419)+TMP_JAMP(2708)+( - $ -1.000000000000000D+00)*TMP_JAMP(2709) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(299) + $ +TMP_JAMP(416)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(452)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(467)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(507)+(-1.000000000000000D+00)*TMP_JAMP(1307) + $ +TMP_JAMP(1665)+(-1.000000000000000D+00)*TMP_JAMP(1706) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1748) + $ +TMP_JAMP(2049)+(-1.000000000000000D+00)*TMP_JAMP(2838) + $ +TMP_JAMP(2906)+TMP_JAMP(2945)+TMP_JAMP(2961) + JAMP(73,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(584)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1396)+TMP_JAMP(1582)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1713)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1761)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(1764)+TMP_JAMP(1895)+( + $ -1.000000000000000D+00)*TMP_JAMP(1932)+(-1.000000000000000D+00) + $ *AMP(1428)+TMP_JAMP(2569)+(-1.000000000000000D+00) + $ *TMP_JAMP(2652)+TMP_JAMP(2683)+TMP_JAMP(2786)+TMP_JAMP(2796) + $ +TMP_JAMP(2902) + JAMP(74,1) = TMP_JAMP(2027)+TMP_JAMP(2042)+(-1.000000000000000D + $ +00)*AMP(1422)+TMP_JAMP(2383)+TMP_JAMP(2580)+( + $ -1.000000000000000D+00)*TMP_JAMP(2683)+TMP_JAMP(2735)+( + $ -1.000000000000000D+00)*TMP_JAMP(2798)+(-1.000000000000000D+00) + $ *TMP_JAMP(2932)+TMP_JAMP(2942)+TMP_JAMP(3008) + JAMP(75,1) = TMP_JAMP(1015)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1196)+(-1.000000000000000D+00) + $ *TMP_JAMP(1383)+(-1.000000000000000D+00)*TMP_JAMP(1386) + $ +TMP_JAMP(1860)+(-1.000000000000000D+00)*TMP_JAMP(1863)+( + $ -1.000000000000000D+00)*TMP_JAMP(1895)+TMP_JAMP(1899)+( + $ -1.000000000000000D+00)*AMP(1427)+TMP_JAMP(2627)+TMP_JAMP(2780) + $ +(-1.000000000000000D+00)*TMP_JAMP(2895)+(-1.000000000000000D + $ +00)*TMP_JAMP(2936) + JAMP(76,1) = (-1.000000000000000D+00)*TMP_JAMP(1038)+( + $ -1.000000000000000D+00)*TMP_JAMP(1107)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1185)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(1203)+(-1.000000000000000D + $ +00)*AMP(1029)+(-1.000000000000000D+00)*TMP_JAMP(1899) + $ +TMP_JAMP(2043)+(-1.000000000000000D+00)*TMP_JAMP(2095)+( + $ -1.000000000000000D+00)*TMP_JAMP(2328)+TMP_JAMP(2458)+( + $ -1.000000000000000D+00)*TMP_JAMP(2611)+TMP_JAMP(2649)+( + $ -1.000000000000000D+00)*TMP_JAMP(2684)+TMP_JAMP(2779) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2943) + $ +TMP_JAMP(3009) + JAMP(77,1) = (-1.000000000000000D+00)*TMP_JAMP(800) + $ +TMP_JAMP(1631)+(-1.000000000000000D+00)*TMP_JAMP(1812) + $ +TMP_JAMP(1898)+(-1.000000000000000D+00)*AMP(1421)+( + $ -1.000000000000000D+00)*TMP_JAMP(2332)+TMP_JAMP(2537) + $ +TMP_JAMP(2932)+(-1.000000000000000D+00)*TMP_JAMP(2936)+( + $ -1.000000000000000D+00)*TMP_JAMP(2972)+TMP_JAMP(3023) + JAMP(78,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1216)+(-1.000000000000000D+00)*TMP_JAMP(1264)+( + $ -1.000000000000000D+00)*AMP(1028)+(-1.000000000000000D+00) + $ *TMP_JAMP(1494)+(-1.000000000000000D+00)*TMP_JAMP(1633) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1764) + $ +(-1.000000000000000D+00)*TMP_JAMP(1898)+TMP_JAMP(2095)+( + $ -1.000000000000000D+00)*TMP_JAMP(2336)+(-1.000000000000000D+00) + $ *TMP_JAMP(2426)+TMP_JAMP(2511)+TMP_JAMP(2552)+TMP_JAMP(2685)+( + $ -1.000000000000000D+00)*TMP_JAMP(2874)+TMP_JAMP(2958) + $ +TMP_JAMP(3002) + JAMP(79,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(584)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1187)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1200)+TMP_JAMP(1626)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1849)+(-1.000000000000000D+00) + $ *TMP_JAMP(1883)+(-1.000000000000000D+00)*TMP_JAMP(2036)+( + $ -1.000000000000000D+00)*AMP(1431)+TMP_JAMP(2489)+( + $ -1.000000000000000D+00)*TMP_JAMP(2505)+(-1.000000000000000D+00) + $ *TMP_JAMP(2570)+(-1.000000000000000D+00)*TMP_JAMP(2630) + $ +TMP_JAMP(2645)+TMP_JAMP(2686)+(-1.000000000000000D+00) + $ *TMP_JAMP(2797)+(-1.000000000000000D+00)*TMP_JAMP(3011) + JAMP(80,1) = TMP_JAMP(643)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1207)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(1291)+TMP_JAMP(2037) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2138) + $ +(-1.000000000000000D+00)*AMP(1425)+(-1.000000000000000D+00) + $ *TMP_JAMP(2250)+(-1.000000000000000D+00)*TMP_JAMP(2381)+( + $ -1.000000000000000D+00)*TMP_JAMP(2686)+(-1.000000000000000D+00) + $ *TMP_JAMP(2699)+TMP_JAMP(2905)+TMP_JAMP(2987)+( + $ -1.000000000000000D+00)*TMP_JAMP(3008) + JAMP(81,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1188)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1201)+TMP_JAMP(1269)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1349)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(1987)+TMP_JAMP(2020)+( + $ -1.000000000000000D+00)*TMP_JAMP(2141)+(-1.000000000000000D+00) + $ *AMP(1429)+(-1.000000000000000D+00)*TMP_JAMP(2773) + $ +TMP_JAMP(2864)+(-1.000000000000000D+00)*TMP_JAMP(2909) + $ +TMP_JAMP(3011) JAMP(82,1) = (-1.000000000000000D+00)*AMP(404) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(408) - $ +AMP(460)+TMP_JAMP(856)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1133)+(-1.000000000000000D+00) - $ *TMP_JAMP(1524)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1635)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2376)+TMP_JAMP(2377)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2379)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2380)+(-1.000000000000000D+00) - $ *TMP_JAMP(2382)+TMP_JAMP(2425)+TMP_JAMP(2709) - JAMP(83,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(921)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(1209)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(1284)+(-1.000000000000000D+00)*AMP(1419)+TMP_JAMP(786)+( - $ -1.000000000000000D+00)*TMP_JAMP(1558)+(-1.000000000000000D+00) - $ *TMP_JAMP(2383)+TMP_JAMP(2384)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2385)+TMP_JAMP(2388) - $ +TMP_JAMP(2390)+(-1.000000000000000D+00)*TMP_JAMP(2391)+( - $ -1.000000000000000D+00)*TMP_JAMP(2469)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2710) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(305) + $ +TMP_JAMP(426)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(505)+(-1.000000000000000D+00)*TMP_JAMP(555)+( + $ -1.000000000000000D+00)*TMP_JAMP(992)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1349)+(-1.000000000000000D+00) + $ *TMP_JAMP(2023)+(-1.000000000000000D+00)*TMP_JAMP(2306)+( + $ -1.000000000000000D+00)*TMP_JAMP(2698)+(-1.000000000000000D+00) + $ *TMP_JAMP(2872)+TMP_JAMP(2922)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2967)+TMP_JAMP(2987) + JAMP(83,1) = (-1.000000000000000D+00)*TMP_JAMP(784) + $ +TMP_JAMP(1020)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1053)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1195)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1212)+TMP_JAMP(1268)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1868)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(2011)+(-1.000000000000000D + $ +00)*AMP(1423)+TMP_JAMP(2451)+TMP_JAMP(2699)+( + $ -1.000000000000000D+00)*TMP_JAMP(2772)+TMP_JAMP(2917)+( + $ -1.000000000000000D+00)*TMP_JAMP(2939)+(-1.000000000000000D+00) + $ *TMP_JAMP(2965) JAMP(84,1) = (-1.000000000000000D+00)*AMP(402) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(408) - $ +TMP_JAMP(803)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1535)+(-1.000000000000000D+00)*TMP_JAMP(1644) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2393) - $ +TMP_JAMP(2397)+(-1.000000000000000D+00)*TMP_JAMP(2398)+( - $ -1.000000000000000D+00)*TMP_JAMP(2399)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2400)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2443)+(-1.000000000000000D - $ +00)*TMP_JAMP(2481)+(-1.000000000000000D+00)*TMP_JAMP(2558) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2710) - JAMP(85,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(77)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(590)+(-1.000000000000000D+00)*AMP(1428) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1481)+( - $ -1.000000000000000D+00)*AMP(1701)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1541)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1542)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1543)+TMP_JAMP(1551) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2401) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2402) - $ +TMP_JAMP(2404)+(-1.000000000000000D+00)*TMP_JAMP(2405) - $ +TMP_JAMP(2410)+(-1.000000000000000D+00)*TMP_JAMP(2416) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2422) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2452) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(133) + $ +(-1.000000000000000D+00)*TMP_JAMP(181)+TMP_JAMP(424)+( + $ -1.000000000000000D+00)*TMP_JAMP(436)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(500)+(-1.000000000000000D+00) + $ *TMP_JAMP(556)+TMP_JAMP(658)+(-1.000000000000000D+00) + $ *TMP_JAMP(1013)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1241)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1564)+TMP_JAMP(1617)+TMP_JAMP(1662)+( + $ -1.000000000000000D+00)*TMP_JAMP(2275)+TMP_JAMP(2489)+( + $ -1.000000000000000D+00)*TMP_JAMP(2761)+(-1.000000000000000D+00) + $ *TMP_JAMP(2880)+(-1.000000000000000D+00)*TMP_JAMP(2922) + $ +TMP_JAMP(2965) + JAMP(85,1) = TMP_JAMP(1386)+(-1.000000000000000D+00)*AMP(1432)+( + $ -1.000000000000000D+00)*TMP_JAMP(2372)+TMP_JAMP(2387) + $ +TMP_JAMP(2393)+TMP_JAMP(2427)+(-1.000000000000000D+00) + $ *TMP_JAMP(2467)+(-1.000000000000000D+00)*TMP_JAMP(2505)+( + $ -1.000000000000000D+00)*TMP_JAMP(2599)+(-1.000000000000000D+00) + $ *TMP_JAMP(2755)+(-1.000000000000000D+00)*TMP_JAMP(2912) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2977) + $ +(-1.000000000000000D+00)*TMP_JAMP(3005) JAMP(86,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(826)+(-1.000000000000000D+00)*AMP(1029)+AMP(1718) - $ +TMP_JAMP(637)+TMP_JAMP(829)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(832)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1604)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2406)+TMP_JAMP(2407)+( - $ -1.000000000000000D+00)*TMP_JAMP(2408)+(-1.000000000000000D+00) - $ *TMP_JAMP(2409)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2411)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2414)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2435)+(-1.000000000000000D+00)*TMP_JAMP(2711) - JAMP(87,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(1116)+(-1.000000000000000D+00)*AMP(1426) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1478) - $ +AMP(1834)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1561)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1563)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1564)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1572)+TMP_JAMP(2415)+TMP_JAMP(2417) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2418) - $ +(-1.000000000000000D+00)*TMP_JAMP(2420)+(-1.000000000000000D - $ +00)*TMP_JAMP(2421)+TMP_JAMP(2423)+(-1.000000000000000D+00) - $ *TMP_JAMP(2427)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2439) - JAMP(88,1) = (-1.000000000000000D+00)*AMP(405)+AMP(461)+( - $ -1.000000000000000D+00)*TMP_JAMP(656)+(-1.000000000000000D+00) - $ *TMP_JAMP(858)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(862)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1576)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1577)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1581)+TMP_JAMP(1582)+TMP_JAMP(1584) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1616) - $ +(-1.000000000000000D+00)*TMP_JAMP(2424)+(-1.000000000000000D - $ +00)*TMP_JAMP(2425)+(-1.000000000000000D+00)*TMP_JAMP(2426) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2428) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2429) - $ +TMP_JAMP(2430)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2451) - JAMP(89,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(826)+(-1.000000000000000D+00)*AMP(1024) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1042)+( - $ -1.000000000000000D+00)*AMP(1717)+(-1.000000000000000D+00) - $ *TMP_JAMP(870)+TMP_JAMP(1590)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1624)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2431)+(-1.000000000000000D+00) - $ *TMP_JAMP(2432)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2433)+(-1.000000000000000D+00)*TMP_JAMP(2437)+( - $ -1.000000000000000D+00)*TMP_JAMP(2438)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2439)+(-1.000000000000000D+00) - $ *TMP_JAMP(2440)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2448)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2715) - JAMP(90,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(64)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(385)+(-1.000000000000000D+00)*AMP(400) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(406) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(418)+( - $ -1.000000000000000D+00)*AMP(460)+(-1.000000000000000D+00) - $ *TMP_JAMP(1596)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1633)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2442)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2443)+(-1.000000000000000D+00)*TMP_JAMP(2444) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2445) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2447) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2449) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2452) - $ +TMP_JAMP(2453)+(-1.000000000000000D+00)*TMP_JAMP(2599) - JAMP(91,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(920)+AMP(1152)+(-1.000000000000000D+00)*AMP(1422) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1463)+( - $ -1.000000000000000D+00)*AMP(1807)+(-1.000000000000000D+00) - $ *TMP_JAMP(631)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1767)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2454)+(-1.000000000000000D+00)*TMP_JAMP(2456) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2458) - $ +TMP_JAMP(2460)+(-1.000000000000000D+00)*TMP_JAMP(2465) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2762) - JAMP(92,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(827)+(-1.000000000000000D+00)*AMP(1028)+( - $ -1.000000000000000D+00)*AMP(1765)+(-1.000000000000000D+00) - $ *TMP_JAMP(635)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1603)+(-1.000000000000000D+00)*TMP_JAMP(2459) - $ +TMP_JAMP(2461)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2463)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2464)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2466)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2614)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2763) - JAMP(93,1) = AMP(627)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*AMP(1284)+(-1.000000000000000D+00) - $ *AMP(1420)+TMP_JAMP(645)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2468)+TMP_JAMP(2470) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2473) - $ +TMP_JAMP(2477)+(-1.000000000000000D+00)*TMP_JAMP(2485) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2743) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2762) - JAMP(94,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(387)+(-1.000000000000000D+00)*AMP(403) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(444) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1615) - $ +(-1.000000000000000D+00)*TMP_JAMP(2476)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2478)+TMP_JAMP(2480) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2482) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2483) - $ +TMP_JAMP(2484)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2486)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2492)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2497)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2500) - JAMP(95,1) = (-1.000000000000000D+00)*AMP(1025)+( - $ -1.000000000000000D+00)*AMP(1069)+(-1.000000000000000D+00) - $ *TMP_JAMP(663)+(-1.000000000000000D+00)*TMP_JAMP(665) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1623) - $ +TMP_JAMP(1629)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1809)+TMP_JAMP(2636)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2715)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2763)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2764) - JAMP(96,1) = (-1.000000000000000D+00)*AMP(401) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(406)+( - $ -1.000000000000000D+00)*AMP(462)+TMP_JAMP(676)+TMP_JAMP(680) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1632) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1634) - $ +TMP_JAMP(1644)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2491)+(-1.000000000000000D+00)*TMP_JAMP(2494) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2496) - $ +TMP_JAMP(2498)+(-1.000000000000000D+00)*TMP_JAMP(2641) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2764) + $ *TMP_JAMP(951)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(983)+TMP_JAMP(1107)+TMP_JAMP(1127) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1204) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1290) + $ +(-1.000000000000000D+00)*AMP(1031)+TMP_JAMP(2146)+( + $ -1.000000000000000D+00)*TMP_JAMP(2480)+TMP_JAMP(2499)+( + $ -1.000000000000000D+00)*TMP_JAMP(2721)+(-1.000000000000000D+00) + $ *TMP_JAMP(2896)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(2977)+(-1.000000000000000D+00)*TMP_JAMP(2995) + JAMP(87,1) = (-1.000000000000000D+00)*TMP_JAMP(1379)+( + $ -1.000000000000000D+00)*TMP_JAMP(1953)+TMP_JAMP(2141)+( + $ -1.000000000000000D+00)*AMP(1430)+TMP_JAMP(2247)+TMP_JAMP(2403) + $ +TMP_JAMP(2882)+TMP_JAMP(2902)+(-1.000000000000000D+00) + $ *TMP_JAMP(2929)+TMP_JAMP(3005) + JAMP(88,1) = (-1.000000000000000D+00)*AMP(405)+( + $ -1.000000000000000D+00)*TMP_JAMP(176)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(291)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(294)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(312)+TMP_JAMP(422) + $ +TMP_JAMP(1379)+TMP_JAMP(1604)+(-1.000000000000000D+00) + $ *TMP_JAMP(1684)+(-1.000000000000000D+00)*TMP_JAMP(1802)+( + $ -1.000000000000000D+00)*TMP_JAMP(2438)+TMP_JAMP(2512)+( + $ -1.000000000000000D+00)*TMP_JAMP(2715)+TMP_JAMP(2873) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2967) + $ +TMP_JAMP(2982) + JAMP(89,1) = TMP_JAMP(258)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(318)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(476)+TMP_JAMP(1007) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1052) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1219) + $ +(-1.000000000000000D+00)*AMP(1026)+TMP_JAMP(1696)+( + $ -1.000000000000000D+00)*TMP_JAMP(1722)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1858)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(1901)+(-1.000000000000000D + $ +00)*TMP_JAMP(2135)+TMP_JAMP(2443)+(-1.000000000000000D+00) + $ *TMP_JAMP(2495)+TMP_JAMP(2646)+TMP_JAMP(2995)+TMP_JAMP(3003) + JAMP(90,1) = (-1.000000000000000D+00)*AMP(400)+( + $ -1.000000000000000D+00)*AMP(419)+(-1.000000000000000D+00) + $ *TMP_JAMP(109)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(240)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(295)+(-1.000000000000000D+00)*TMP_JAMP(427) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(466) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(502) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(522) + $ +TMP_JAMP(1138)+(-1.000000000000000D+00)*TMP_JAMP(1703)+( + $ -1.000000000000000D+00)*TMP_JAMP(2044)+(-1.000000000000000D+00) + $ *TMP_JAMP(2308)+TMP_JAMP(2427)+(-1.000000000000000D+00) + $ *TMP_JAMP(2444)+(-1.000000000000000D+00)*TMP_JAMP(2650)+( + $ -1.000000000000000D+00)*TMP_JAMP(2982)+(-1.000000000000000D+00) + $ *TMP_JAMP(3003) + JAMP(91,1) = TMP_JAMP(647)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(2113)+(-1.000000000000000D+00) + $ *AMP(1426)+TMP_JAMP(2369)+TMP_JAMP(2502)+(-1.000000000000000D + $ +00)*TMP_JAMP(2941)+(-1.000000000000000D+00)*TMP_JAMP(3023)+( + $ -1.000000000000000D+00)*TMP_JAMP(3024) + JAMP(92,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(985)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1204)+TMP_JAMP(1261)+TMP_JAMP(1280) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1350) + $ +(-1.000000000000000D+00)*AMP(1030)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2113)+(-1.000000000000000D+00) + $ *TMP_JAMP(2143)+TMP_JAMP(2334)+(-1.000000000000000D+00) + $ *TMP_JAMP(2545)+TMP_JAMP(2714)+(-1.000000000000000D+00) + $ *TMP_JAMP(2762)+TMP_JAMP(2857)+(-1.000000000000000D+00) + $ *TMP_JAMP(3002) + JAMP(93,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1769)+(-1.000000000000000D+00)*AMP(1424)+( + $ -1.000000000000000D+00)*AMP(1893)+TMP_JAMP(2465)+TMP_JAMP(2476) + $ +(-1.000000000000000D+00)*TMP_JAMP(2625)+(-1.000000000000000D + $ +00)*TMP_JAMP(2917)+TMP_JAMP(2928)+(-1.000000000000000D+00) + $ *TMP_JAMP(2931)+TMP_JAMP(2950)+TMP_JAMP(3024) + JAMP(94,1) = (-1.000000000000000D+00)*AMP(403) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(290) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(307) + $ +(-1.000000000000000D+00)*TMP_JAMP(424)+TMP_JAMP(430) + $ +TMP_JAMP(532)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1766)+TMP_JAMP(2512)+(-1.000000000000000D+00) + $ *TMP_JAMP(2756)+TMP_JAMP(2857)+TMP_JAMP(2924)+( + $ -1.000000000000000D+00)*TMP_JAMP(2950)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2966) + JAMP(95,1) = AMP(150)+((0.000000000000000D+00,1.000000000000000D + $ +00))*TMP_JAMP(237)+(-1.000000000000000D+00)*TMP_JAMP(1043) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1250) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1350) + $ +(-1.000000000000000D+00)*AMP(1027)+TMP_JAMP(2135) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2148) + $ +(-1.000000000000000D+00)*TMP_JAMP(2355)+(-1.000000000000000D + $ +00)*TMP_JAMP(2381)+TMP_JAMP(2757)+TMP_JAMP(2779)+( + $ -1.000000000000000D+00)*TMP_JAMP(3021) + JAMP(96,1) = (-1.000000000000000D+00)*AMP(401)+( + $ -1.000000000000000D+00)*AMP(437)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(222)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(310)+TMP_JAMP(427) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(478) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(504) + $ +(-1.000000000000000D+00)*TMP_JAMP(1307)+((0.000000000000000D + $ +00,1.000000000000000D+00))*TMP_JAMP(1566)+(-1.000000000000000D + $ +00)*TMP_JAMP(2367)+TMP_JAMP(2502)+TMP_JAMP(2651)+( + $ -1.000000000000000D+00)*TMP_JAMP(2913)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(2966)+TMP_JAMP(3021) JAMP(97,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(934)+(-1.000000000000000D+00)*AMP(1445)+AMP(1637) - $ +TMP_JAMP(697)+TMP_JAMP(732)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(1168)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1647)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1649)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1652)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(1655) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1690) - $ +TMP_JAMP(1976)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2718)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2727)+TMP_JAMP(2765) + $ *TMP_JAMP(1399)+(-1.000000000000000D+00)*TMP_JAMP(1953)+( + $ -1.000000000000000D+00)*TMP_JAMP(2025)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2121)+(-1.000000000000000D+00) + $ *AMP(1449)+TMP_JAMP(2234)+TMP_JAMP(2634)+(-1.000000000000000D + $ +00)*TMP_JAMP(2671)+TMP_JAMP(2689)+TMP_JAMP(2727)+TMP_JAMP(2866) + $ +TMP_JAMP(3012) JAMP(98,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(91)+(-1.000000000000000D+00)*AMP(1439)+TMP_JAMP(703)+( - $ -1.000000000000000D+00)*TMP_JAMP(881)+TMP_JAMP(887) - $ +TMP_JAMP(937)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(948)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1127)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1651)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1662)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1666)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2501)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2504)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2533)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2623)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2718)+TMP_JAMP(2766) - JAMP(99,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(933)+(-1.000000000000000D+00)*AMP(1444)+AMP(1754)+AMP(1755) - $ +(-1.000000000000000D+00)*TMP_JAMP(708)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1654)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1657)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1663)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1718)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2507)+(-1.000000000000000D - $ +00)*TMP_JAMP(2514)+((0.000000000000000D+00,1.000000000000000D - $ +00))*TMP_JAMP(2722)+TMP_JAMP(2735)+(-1.000000000000000D+00) - $ *TMP_JAMP(2765) - JAMP(100,1) = (-1.000000000000000D+00)*AMP(1036)+AMP(1663)+( - $ -1.000000000000000D+00)*TMP_JAMP(796)+(-1.000000000000000D+00) - $ *TMP_JAMP(985)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(990)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1659)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1661)+TMP_JAMP(1669)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1671)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1802)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(1806)+( - $ -1.000000000000000D+00)*TMP_JAMP(2511)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2512)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2587)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2721) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2722) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2726) - $ +TMP_JAMP(2736) - JAMP(101,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(761)+(-1.000000000000000D+00)*AMP(1438)+AMP(1808)+AMP(1830) - $ +(-1.000000000000000D+00)*TMP_JAMP(720)+TMP_JAMP(722) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1665) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1667) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1766) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1903) - $ +(-1.000000000000000D+00)*TMP_JAMP(2515)+TMP_JAMP(2526)+( - $ -1.000000000000000D+00)*TMP_JAMP(2723)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2725)+TMP_JAMP(2728)+( - $ -1.000000000000000D+00)*TMP_JAMP(2766) - JAMP(102,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(668)+(-1.000000000000000D+00)*AMP(1035)+( - $ -1.000000000000000D+00)*AMP(1664)+TMP_JAMP(732)+( - $ -1.000000000000000D+00)*TMP_JAMP(735)+(-1.000000000000000D+00) - $ *TMP_JAMP(916)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(922)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1670)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1672)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1676)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1775)+(-1.000000000000000D+00)*TMP_JAMP(2521)+( - $ -1.000000000000000D+00)*TMP_JAMP(2525)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2527)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2529)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2725) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2726) - $ +(-1.000000000000000D+00)*TMP_JAMP(2740) - JAMP(103,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(1298)+(-1.000000000000000D+00)*AMP(1448) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1454)+( - $ -1.000000000000000D+00)*TMP_JAMP(769)+(-1.000000000000000D+00) - $ *TMP_JAMP(797)+(-1.000000000000000D+00)*TMP_JAMP(798)+( - $ -1.000000000000000D+00)*TMP_JAMP(978)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1137)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1683)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1693)+(-1.000000000000000D - $ +00)*TMP_JAMP(1902)+((0.000000000000000D+00,-1.000000000000000D - $ +00))*TMP_JAMP(2531)+TMP_JAMP(2532)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2534)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2539)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2727)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2729) - JAMP(104,1) = (-1.000000000000000D+00)*AMP(1442)+( - $ -1.000000000000000D+00)*TMP_JAMP(755)+TMP_JAMP(756)+( - $ -1.000000000000000D+00)*TMP_JAMP(758)+TMP_JAMP(981) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(999) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1161) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1687) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1703) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1748) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2533) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2535) - $ +(-1.000000000000000D+00)*TMP_JAMP(2537)+TMP_JAMP(2538)+( - $ -1.000000000000000D+00)*TMP_JAMP(2540)+TMP_JAMP(2552) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2605) - $ +(-1.000000000000000D+00)*TMP_JAMP(2728) - JAMP(105,1) = (-1.000000000000000D+00)*AMP(1446)+AMP(1862) - $ +TMP_JAMP(770)+(-1.000000000000000D+00)*TMP_JAMP(909) - $ +TMP_JAMP(986)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1692)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1694)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1699)+(-1.000000000000000D+00)*TMP_JAMP(2542)+( - $ -1.000000000000000D+00)*TMP_JAMP(2543)+TMP_JAMP(2544)+( - $ -1.000000000000000D+00)*TMP_JAMP(2551)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2729)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2731)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2738) - JAMP(106,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(351)+(-1.000000000000000D+00)*AMP(413)+AMP(451)+AMP(693)+( - $ -1.000000000000000D+00)*TMP_JAMP(917)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(927)+TMP_JAMP(1005) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1020) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1696) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1698) - $ +TMP_JAMP(1704)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1823)+(-1.000000000000000D+00)*TMP_JAMP(2547) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2548) - $ +TMP_JAMP(2564)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2583)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2730)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2731)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2733) + $ *TMP_JAMP(1401)+TMP_JAMP(1952)+(-1.000000000000000D+00) + $ *TMP_JAMP(2022)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2118)+(-1.000000000000000D+00)*AMP(1443) + $ +TMP_JAMP(2390)+(-1.000000000000000D+00)*TMP_JAMP(2408) + $ +TMP_JAMP(2456)+(-1.000000000000000D+00)*TMP_JAMP(2689) + $ +TMP_JAMP(2841)+TMP_JAMP(2908)+(-1.000000000000000D+00) + $ *TMP_JAMP(3006) + JAMP(99,1) = TMP_JAMP(821)+(-1.000000000000000D+00) + $ *TMP_JAMP(1018)+TMP_JAMP(1376)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1378)+TMP_JAMP(1913) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2121) + $ +TMP_JAMP(2124)+(-1.000000000000000D+00)*AMP(1448)+( + $ -1.000000000000000D+00)*TMP_JAMP(2490)+(-1.000000000000000D+00) + $ *TMP_JAMP(2638)+TMP_JAMP(2765)+(-1.000000000000000D+00) + $ *TMP_JAMP(2843)+TMP_JAMP(2901) + JAMP(100,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(746)+(-1.000000000000000D+00)*TMP_JAMP(1278)+( + $ -1.000000000000000D+00)*AMP(1038)+(-1.000000000000000D+00) + $ *TMP_JAMP(1913)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2012)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2119)+(-1.000000000000000D+00)*TMP_JAMP(2499) + $ +TMP_JAMP(2592)+TMP_JAMP(2607)+TMP_JAMP(2669)+( + $ -1.000000000000000D+00)*TMP_JAMP(2690)+TMP_JAMP(2776)+( + $ -1.000000000000000D+00)*TMP_JAMP(2952)+TMP_JAMP(3020) + JAMP(101,1) = TMP_JAMP(1910)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2118)+TMP_JAMP(2124)+( + $ -1.000000000000000D+00)*AMP(1442)+AMP(1813)+TMP_JAMP(2342)+( + $ -1.000000000000000D+00)*TMP_JAMP(2549)+(-1.000000000000000D+00) + $ *TMP_JAMP(2842)+(-1.000000000000000D+00)*TMP_JAMP(2867) + $ +TMP_JAMP(2984)+TMP_JAMP(3014) + JAMP(102,1) = (-1.000000000000000D+00)*TMP_JAMP(1030) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1404) + $ +(-1.000000000000000D+00)*AMP(1037)+(-1.000000000000000D+00) + $ *TMP_JAMP(1809)+(-1.000000000000000D+00)*TMP_JAMP(1910) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2018) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2119) + $ +(-1.000000000000000D+00)*TMP_JAMP(2560)+(-1.000000000000000D + $ +00)*TMP_JAMP(2606)+TMP_JAMP(2665)+TMP_JAMP(2691)+TMP_JAMP(2822) + $ +TMP_JAMP(2868)+TMP_JAMP(2989) + JAMP(103,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1252)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1388)+(-1.000000000000000D+00)*TMP_JAMP(2125)+( + $ -1.000000000000000D+00)*AMP(1452)+TMP_JAMP(2430)+( + $ -1.000000000000000D+00)*TMP_JAMP(2447)+(-1.000000000000000D+00) + $ *TMP_JAMP(2478)+(-1.000000000000000D+00)*TMP_JAMP(2633) + $ +TMP_JAMP(2664)+(-1.000000000000000D+00)*TMP_JAMP(2848) + $ +TMP_JAMP(2930)+(-1.000000000000000D+00)*TMP_JAMP(3012) + JAMP(104,1) = (-1.000000000000000D+00)*TMP_JAMP(777)+( + $ -1.000000000000000D+00)*TMP_JAMP(798)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(845)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(962)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(1228)+TMP_JAMP(2126)+( + $ -1.000000000000000D+00)*AMP(1446)+(-1.000000000000000D+00) + $ *TMP_JAMP(2440)+(-1.000000000000000D+00)*TMP_JAMP(2457)+( + $ -1.000000000000000D+00)*TMP_JAMP(2580)+TMP_JAMP(2739)+( + $ -1.000000000000000D+00)*TMP_JAMP(2830)+(-1.000000000000000D+00) + $ *TMP_JAMP(2930)+(-1.000000000000000D+00)*TMP_JAMP(2993) + $ +TMP_JAMP(3006) + JAMP(105,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(989)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1388)+(-1.000000000000000D+00)*TMP_JAMP(1670) + $ +TMP_JAMP(2088)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2117)+(-1.000000000000000D+00)*AMP(1450) + $ +TMP_JAMP(2901)+(-1.000000000000000D+00)*TMP_JAMP(2937)+( + $ -1.000000000000000D+00)*TMP_JAMP(2944)+(-1.000000000000000D+00) + $ *TMP_JAMP(3026) + JAMP(106,1) = (-1.000000000000000D+00)*AMP(413) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(292) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(304) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(333) + $ +(-1.000000000000000D+00)*TMP_JAMP(432)+TMP_JAMP(442) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1599) + $ +TMP_JAMP(2670)+(-1.000000000000000D+00)*TMP_JAMP(2693) + $ +TMP_JAMP(2740)+(-1.000000000000000D+00)*TMP_JAMP(2921) + $ +TMP_JAMP(2944)+TMP_JAMP(2970)+(-1.000000000000000D+00) + $ *TMP_JAMP(2993) JAMP(107,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(762)+(-1.000000000000000D+00)*AMP(1440)+AMP(1889)+( - $ -1.000000000000000D+00)*TMP_JAMP(783)+(-1.000000000000000D+00) - $ *TMP_JAMP(784)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1134)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1153)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1701)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1702)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2549)+(-1.000000000000000D+00)*TMP_JAMP(2550) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2553) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2556) - $ +TMP_JAMP(2562)+(-1.000000000000000D+00)*TMP_JAMP(2732) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2768) - JAMP(108,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(369)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(378)+(-1.000000000000000D+00)*AMP(411) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(417) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(594) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1669) - $ +TMP_JAMP(799)+(-1.000000000000000D+00)*TMP_JAMP(802) - $ +TMP_JAMP(960)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1707)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1709)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1711)+TMP_JAMP(1714)+(-1.000000000000000D+00) - $ *TMP_JAMP(1835)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2556)+(-1.000000000000000D+00)*TMP_JAMP(2557)+( - $ -1.000000000000000D+00)*TMP_JAMP(2561)+(-1.000000000000000D+00) - $ *TMP_JAMP(2563)+TMP_JAMP(2597)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2733) - JAMP(109,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(95)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(1199)+(-1.000000000000000D+00)*AMP(1449)+AMP(1567)+( - $ -1.000000000000000D+00)*TMP_JAMP(813)+(-1.000000000000000D+00) - $ *TMP_JAMP(814)+(-1.000000000000000D+00)*TMP_JAMP(885) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1717) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1720) - $ +TMP_JAMP(1722)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1727)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1754)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1974)+(-1.000000000000000D+00)*TMP_JAMP(2734)+( - $ -1.000000000000000D+00)*TMP_JAMP(2735)+(-1.000000000000000D+00) - $ *TMP_JAMP(2767) - JAMP(110,1) = AMP(527)+(-1.000000000000000D+00)*AMP(670)+( - $ -1.000000000000000D+00)*AMP(1038)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*AMP(1199)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*AMP(1592)+TMP_JAMP(821)+( - $ -1.000000000000000D+00)*TMP_JAMP(823)+TMP_JAMP(826) - $ +TMP_JAMP(873)+TMP_JAMP(915)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1724)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1726)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(1975)+TMP_JAMP(2570) - $ +TMP_JAMP(2572)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2574)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2584)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2613)+(-1.000000000000000D+00)*TMP_JAMP(2736)+( - $ -1.000000000000000D+00)*TMP_JAMP(2739) - JAMP(111,1) = (-1.000000000000000D+00)*AMP(687) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(754)+( - $ -1.000000000000000D+00)*AMP(1447)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*AMP(1469)+AMP(1568)+TMP_JAMP(836)+( - $ -1.000000000000000D+00)*TMP_JAMP(839)+TMP_JAMP(840) - $ +TMP_JAMP(869)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1733)+TMP_JAMP(1738)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(1743)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1749)+TMP_JAMP(1977) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1978) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2580) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2738) - $ +TMP_JAMP(2767) - JAMP(112,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(367)+(-1.000000000000000D+00)*AMP(414) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(754) - $ +AMP(786)+(-1.000000000000000D+00)*TMP_JAMP(852)+( - $ -1.000000000000000D+00)*TMP_JAMP(857)+(-1.000000000000000D+00) - $ *TMP_JAMP(884)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1740)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1742)+TMP_JAMP(1751)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(1759)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1793)+(-1.000000000000000D - $ +00)*TMP_JAMP(1799)+((0.000000000000000D+00,1.000000000000000D - $ +00))*TMP_JAMP(1980)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2582)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2584)+TMP_JAMP(2585) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2587) - $ +TMP_JAMP(2588)+(-1.000000000000000D+00)*TMP_JAMP(2589) - $ +TMP_JAMP(2590) + $ *TMP_JAMP(844)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(978)+TMP_JAMP(1023)+TMP_JAMP(1034) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1228) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1253) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1304) + $ +(-1.000000000000000D+00)*TMP_JAMP(1914)+TMP_JAMP(2089) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2137) + $ +(-1.000000000000000D+00)*AMP(1444)+TMP_JAMP(2576) + $ +TMP_JAMP(2828)+(-1.000000000000000D+00)*TMP_JAMP(2939)+( + $ -1.000000000000000D+00)*TMP_JAMP(3026) + JAMP(108,1) = (-1.000000000000000D+00)*AMP(411) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(293) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(301) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(334) + $ +(-1.000000000000000D+00)*TMP_JAMP(437)+TMP_JAMP(440) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(596)+( + $ -1.000000000000000D+00)*TMP_JAMP(781)+(-1.000000000000000D+00) + $ *TMP_JAMP(817)+TMP_JAMP(846)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(977)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(980)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(1252)+TMP_JAMP(1591) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1593) + $ +TMP_JAMP(2099)+(-1.000000000000000D+00)*TMP_JAMP(2350)+( + $ -1.000000000000000D+00)*TMP_JAMP(2600)+(-1.000000000000000D+00) + $ *TMP_JAMP(2669)+TMP_JAMP(2753)+(-1.000000000000000D+00) + $ *TMP_JAMP(2870)+(-1.000000000000000D+00)*TMP_JAMP(2970) + JAMP(109,1) = (-1.000000000000000D+00)*TMP_JAMP(1036) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1378) + $ +(-1.000000000000000D+00)*TMP_JAMP(1884)+(-1.000000000000000D + $ +00)*TMP_JAMP(2039)+((0.000000000000000D+00,-1.000000000000000D + $ +00))*TMP_JAMP(2068)+(-1.000000000000000D+00)*AMP(1453)+( + $ -1.000000000000000D+00)*TMP_JAMP(2357)+TMP_JAMP(2523)+( + $ -1.000000000000000D+00)*TMP_JAMP(2573)+TMP_JAMP(2678)+( + $ -1.000000000000000D+00)*TMP_JAMP(2766)+TMP_JAMP(2775)+( + $ -1.000000000000000D+00)*TMP_JAMP(3027) + JAMP(110,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(990)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1248)+TMP_JAMP(1277)+(-1.000000000000000D+00) + $ *AMP(1040)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1852)+TMP_JAMP(1884)+TMP_JAMP(2040) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2116) + $ +(-1.000000000000000D+00)*TMP_JAMP(2338)+(-1.000000000000000D + $ +00)*TMP_JAMP(2795)+(-1.000000000000000D+00)*TMP_JAMP(2900) + $ +TMP_JAMP(3015)+(-1.000000000000000D+00)*TMP_JAMP(3020) + JAMP(111,1) = TMP_JAMP(1516)+(-1.000000000000000D+00) + $ *TMP_JAMP(1932)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(2117)+(-1.000000000000000D+00)*AMP(1451)+( + $ -1.000000000000000D+00)*TMP_JAMP(2371)+TMP_JAMP(2519) + $ +TMP_JAMP(2572)+(-1.000000000000000D+00)*TMP_JAMP(2679) + $ +TMP_JAMP(2695)+TMP_JAMP(2787)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(2973)+TMP_JAMP(3027) + JAMP(112,1) = (-1.000000000000000D+00)*AMP(414)+( + $ -1.000000000000000D+00)*TMP_JAMP(189)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(299)+TMP_JAMP(433) + $ +TMP_JAMP(435)+(-1.000000000000000D+00)*TMP_JAMP(439) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(508) + $ +(-1.000000000000000D+00)*TMP_JAMP(1948)+((0.000000000000000D + $ +00,1.000000000000000D+00))*TMP_JAMP(2069)+(-1.000000000000000D + $ +00)*TMP_JAMP(2565)+TMP_JAMP(2788)+(-1.000000000000000D+00) + $ *TMP_JAMP(2795)+TMP_JAMP(2918)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(2973) JAMP(113,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(82)+(-1.000000000000000D+00)*AMP(1033) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1039) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1051) - $ +TMP_JAMP(866)+(-1.000000000000000D+00)*TMP_JAMP(867) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1135) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1748) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1750) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1807) - $ +(-1.000000000000000D+00)*TMP_JAMP(1817)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2591)+(-1.000000000000000D - $ +00)*TMP_JAMP(2594)+((0.000000000000000D+00,1.000000000000000D - $ +00))*TMP_JAMP(2596)+TMP_JAMP(2739)+(-1.000000000000000D+00) - $ *TMP_JAMP(2740)+TMP_JAMP(2741) + $ *TMP_JAMP(78)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(321)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(739)+(-1.000000000000000D+00)*TMP_JAMP(1272)+( + $ -1.000000000000000D+00)*AMP(1035)+(-1.000000000000000D+00) + $ *TMP_JAMP(1810)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2091)+TMP_JAMP(2803)+(-1.000000000000000D+00) + $ *TMP_JAMP(2933)+TMP_JAMP(2991)+(-1.000000000000000D+00) + $ *TMP_JAMP(3015) JAMP(114,1) = (-1.000000000000000D+00)*AMP(409) $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(415) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(427)+( - $ -1.000000000000000D+00)*AMP(451)+(-1.000000000000000D+00) - $ *AMP(1569)+(-1.000000000000000D+00)*TMP_JAMP(880)+TMP_JAMP(881) - $ +TMP_JAMP(882)+(-1.000000000000000D+00)*TMP_JAMP(885) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1753) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1758) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1760) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1828) - $ +TMP_JAMP(2597)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2598)+(-1.000000000000000D+00)*TMP_JAMP(2599) - $ +TMP_JAMP(2601)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2603)+(-1.000000000000000D+00)*TMP_JAMP(2741) - JAMP(115,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(92)+(-1.000000000000000D+00)*AMP(1443) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1463) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(1537) - $ +AMP(1574)+(-1.000000000000000D+00)*AMP(1809)+( - $ -1.000000000000000D+00)*TMP_JAMP(897)+(-1.000000000000000D+00) - $ *TMP_JAMP(898)+(-1.000000000000000D+00)*TMP_JAMP(899)+( - $ -1.000000000000000D+00)*TMP_JAMP(900)+TMP_JAMP(1003)+( - $ -1.000000000000000D+00)*TMP_JAMP(1009)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(1014)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1153)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1764)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(1765) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1768) - $ +TMP_JAMP(1771)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2604)+(-1.000000000000000D+00)*TMP_JAMP(2606) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2607) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2611) - $ +TMP_JAMP(2617)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2620) - JAMP(116,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(508)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(668)+(-1.000000000000000D+00)*AMP(1037) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1121)+( - $ -1.000000000000000D+00)*AMP(1514)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*AMP(1776)+(-1.000000000000000D+00) - $ *TMP_JAMP(908)+TMP_JAMP(910)+(-1.000000000000000D+00) - $ *TMP_JAMP(911)+TMP_JAMP(912)+TMP_JAMP(918)+TMP_JAMP(982) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2607) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2608) - $ +(-1.000000000000000D+00)*TMP_JAMP(2609)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2610)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2612)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(2613)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(2615) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2629) - JAMP(117,1) = (-1.000000000000000D+00)*AMP(528) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(595)+( - $ -1.000000000000000D+00)*AMP(1441)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*AMP(1538)+TMP_JAMP(932)+( - $ -1.000000000000000D+00)*TMP_JAMP(933)+TMP_JAMP(934) - $ +TMP_JAMP(936)+TMP_JAMP(956)+(-1.000000000000000D+00) - $ *TMP_JAMP(983)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1136)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1797)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1985)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2616)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2619)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2623)+(-1.000000000000000D+00)*TMP_JAMP(2624)+( - $ -1.000000000000000D+00)*TMP_JAMP(2634)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(2743)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(2768) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(83) + $ +(-1.000000000000000D+00)*TMP_JAMP(190)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(236)+((0.000000000000000D+00 + $ ,-1.000000000000000D+00))*TMP_JAMP(311)+TMP_JAMP(439) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(467) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(526) + $ +TMP_JAMP(1138)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(1593)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(1852)+TMP_JAMP(2052)+(-1.000000000000000D+00) + $ *TMP_JAMP(2390)+(-1.000000000000000D+00)*TMP_JAMP(2593)+( + $ -1.000000000000000D+00)*TMP_JAMP(2804)+(-1.000000000000000D+00) + $ *TMP_JAMP(2915)+(-1.000000000000000D+00)*TMP_JAMP(2991) + JAMP(115,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(589)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(2122)+(-1.000000000000000D+00)*AMP(1447)+( + $ -1.000000000000000D+00)*TMP_JAMP(2373)+TMP_JAMP(2550)+( + $ -1.000000000000000D+00)*TMP_JAMP(2574)+(-1.000000000000000D+00) + $ *TMP_JAMP(2582)+(-1.000000000000000D+00)*TMP_JAMP(2626) + $ +TMP_JAMP(2629)+TMP_JAMP(2941)+(-1.000000000000000D+00) + $ *TMP_JAMP(3014) + JAMP(116,1) = TMP_JAMP(1279)+(-1.000000000000000D+00)*AMP(1039) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2122) + $ +TMP_JAMP(2143)+AMP(1669)+(-1.000000000000000D+00) + $ *TMP_JAMP(2371)+(-1.000000000000000D+00)*TMP_JAMP(2619)+( + $ -1.000000000000000D+00)*TMP_JAMP(2823)+TMP_JAMP(2853)+( + $ -1.000000000000000D+00)*TMP_JAMP(2989)+(-1.000000000000000D+00) + $ *TMP_JAMP(3019) + JAMP(117,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(589)+(-1.000000000000000D+00)*TMP_JAMP(1658) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2137) + $ +(-1.000000000000000D+00)*AMP(1445)+AMP(1519)+TMP_JAMP(2596) + $ +TMP_JAMP(2624)+TMP_JAMP(2633)+TMP_JAMP(2884)+TMP_JAMP(2908)+( + $ -1.000000000000000D+00)*TMP_JAMP(2928)+TMP_JAMP(2959) JAMP(118,1) = ((0.000000000000000D+00,1.000000000000000D+00)) $ *AMP(349)+(-1.000000000000000D+00)*AMP(412) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(595) - $ +AMP(627)+(-1.000000000000000D+00)*TMP_JAMP(952)+( - $ -1.000000000000000D+00)*TMP_JAMP(953)+TMP_JAMP(954) - $ +TMP_JAMP(955)+(-1.000000000000000D+00)*TMP_JAMP(957) - $ +TMP_JAMP(962)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1794)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2625)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2626)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2627)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2628)+TMP_JAMP(2630)+TMP_JAMP(2631) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2632) - JAMP(119,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(508)+(-1.000000000000000D+00)*AMP(1034) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1039)+( - $ -1.000000000000000D+00)*AMP(1071)+TMP_JAMP(975)+( - $ -1.000000000000000D+00)*TMP_JAMP(976)+(-1.000000000000000D+00) - $ *TMP_JAMP(977)+TMP_JAMP(978)+(-1.000000000000000D+00) - $ *TMP_JAMP(980)+(-1.000000000000000D+00)*TMP_JAMP(984) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1138) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1805) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1808) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1810) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1811) - $ +TMP_JAMP(1813)+TMP_JAMP(1818)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2633)+(-1.000000000000000D+00) - $ *TMP_JAMP(2634)+TMP_JAMP(2635)+(-1.000000000000000D+00) - $ *TMP_JAMP(2636)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2744) - JAMP(120,1) = AMP(98)+(-1.000000000000000D+00)*AMP(410) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(436)+( - $ -1.000000000000000D+00)*TMP_JAMP(1000)+TMP_JAMP(1001) - $ +TMP_JAMP(1002)+TMP_JAMP(1004)+(-1.000000000000000D+00) - $ *TMP_JAMP(1007)+TMP_JAMP(1008)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(1139)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(1140)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1819)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1822)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1825)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1826)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(1829)+TMP_JAMP(1832) - $ +TMP_JAMP(1834)+TMP_JAMP(1835)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(2637)+TMP_JAMP(2638) - $ +TMP_JAMP(2639)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2744) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(308) + $ +TMP_JAMP(438)+(-1.000000000000000D+00)*TMP_JAMP(443) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(506) + $ +(-1.000000000000000D+00)*TMP_JAMP(532)+(-1.000000000000000D+00) + $ *TMP_JAMP(1667)+(-1.000000000000000D+00)*TMP_JAMP(1681)+( + $ -1.000000000000000D+00)*TMP_JAMP(1724)+TMP_JAMP(1834)+( + $ -1.000000000000000D+00)*TMP_JAMP(1947)+(-1.000000000000000D+00) + $ *TMP_JAMP(2628)+TMP_JAMP(2844)+(-1.000000000000000D+00) + $ *TMP_JAMP(2858)+TMP_JAMP(2918)+(-1.000000000000000D+00) + $ *TMP_JAMP(2959) + JAMP(119,1) = (-1.000000000000000D+00)*TMP_JAMP(1041)+( + $ -1.000000000000000D+00)*AMP(1036)+TMP_JAMP(1608) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2148) + $ +(-1.000000000000000D+00)*TMP_JAMP(2614)+TMP_JAMP(2635) + $ +TMP_JAMP(2933)+TMP_JAMP(2992)+TMP_JAMP(3019) + JAMP(120,1) = (-1.000000000000000D+00)*AMP(410) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(415) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(436) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(244) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(309) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(336) + $ +TMP_JAMP(531)+(-1.000000000000000D+00)*TMP_JAMP(1418)+( + $ -1.000000000000000D+00)*TMP_JAMP(1673)+TMP_JAMP(1724) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1797) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1462) + $ +TMP_JAMP(2619)+(-1.000000000000000D+00)*TMP_JAMP(2634) + $ +TMP_JAMP(2670)+(-1.000000000000000D+00)*TMP_JAMP(2916)+( + $ -1.000000000000000D+00)*TMP_JAMP(2992) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/dummy_fct.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/dummy_fct.f index 076cf29d67..4f7a204b8f 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/dummy_fct.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/dummy_fct.f @@ -32,7 +32,7 @@ logical FUNCTION dummy_cuts(P) LOGICAL IS_A_NU(NEXTERNAL),IS_HEAVY(NEXTERNAL) logical do_cuts(nexternal) COMMON /TO_SPECISA/IS_A_J,IS_A_A,IS_A_L,IS_A_B,IS_A_NU,IS_HEAVY, - . IS_A_ONIUM, do_cuts + & IS_A_ONIUM, do_cuts dummy_cuts=.true. @@ -118,15 +118,16 @@ double precision function user_dynamical_scale(P) C ************************************************************ -C default for the library implementing a dummt bias function +C default for the library implementing a dummy bias function C ************************************************************ subroutine bias_wgt_custom(p, original_weight, bias_weight) - implicit none + implicit none C C Parameters C include 'nexternal.inc' -C + +C C Arguments C double precision p(0:3, nexternal) @@ -161,3 +162,4 @@ subroutine bias_wgt_custom(p, original_weight, bias_weight) return end subroutine bias_wgt_custom + diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py index e9f421ae5f..824815f47b 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py @@ -537,7 +537,7 @@ def charge_card(self, tag): self.param_card = param_card_reader.ParamCard(param_card) return self.param_card elif tag == 'mgruncard': - self.run_card = RunCard(self[tag]) + self.run_card = RunCard(self[tag], unknown_warning=False) return self.run_card elif tag == 'mg5proccard': proc_card = self[tag].split('\n') @@ -2625,6 +2625,7 @@ class RunCard(ConfigFile): default_include_file = 'run_card.inc' default_autodef_file = 'run.inc' donewarning = [] + include_as_parameter = [] def plugin_input(self, finput): @@ -2671,18 +2672,40 @@ def __new__(cls, finput=None, **opt): elif isinstance(finput, cls): target_class = finput.__class__ elif isinstance(finput, str): + path = finput if '\n' not in finput: finput = open(finput).read() if 'req_acc_FO' in finput: target_class = RunCardNLO else: target_class = RunCardLO + if MADEVENT and os.path.exists(pjoin(MEDIR, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(MEDIR, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): + misc.sprint('try to use plugin class') + pydir = path.replace('run_card.dat', '../bin/internal/') + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + else: return None target_class.fill_post_set_from_blocks() - - return super(RunCard, cls).__new__(target_class, finput, **opt) + out = super(RunCard, cls).__new__(target_class, finput, **opt) + if not isinstance(out, RunCard): #should not happen but in presence of missmatch of library loaded. + out.__init__(finput, **opt) + return out else: return super(RunCard, cls).__new__(cls, finput, **opt) @@ -2710,7 +2733,7 @@ def __init__(self, *args, **opts): self.system_default = {} self.display_block = [] # set some block to be displayed - + self.fct_mod = {} # {param: (fct_pointer, *argument, **opts)} self.cut_class = {} self.warned=False @@ -2747,7 +2770,7 @@ def get_lepton_densities(cls): def add_param(self, name, value, fortran_name=None, include=True, hidden=False, legacy=False, cut=False, system=False, sys_default=None, - autodef=False, + autodef=False, fct_mod=None, **opts): """ add a parameter to the card. value is the default value and defines the type (int/float/bool/str) of the input. @@ -2761,6 +2784,7 @@ def add_param(self, name, value, fortran_name=None, include=True, If a path (Source/PDF/pdf.inc) the definition will be added within that file Default is False (does not add the definition) entry added in the run_card will automatically have this on True. + fct_mod: defines a function to run if the parameter is modify in the include file options of **opts: - allowed: list of valid options. '*' means anything else should be allowed. empty list means anything possible as well. @@ -2785,8 +2809,12 @@ def add_param(self, name, value, fortran_name=None, include=True, if autodef: self.definition_path[autodef].append(name) self.user_set.add(name) + # function to trigger if a value is modified in the include file + # main target is action to force correct recompilation (like for compilation flag/...) + if fct_mod: + self.fct_mod[name] = fct_mod - def read(self, finput, consistency=True): + def read(self, finput, consistency=True, unknown_warning=True): """Read the input file, this can be a path to a file, a file object, a str with the content of the file.""" @@ -2794,6 +2822,7 @@ def read(self, finput, consistency=True): if "\n" in finput: finput = finput.split('\n') elif os.path.isfile(finput): + self.path = finput finput = open(finput) else: raise Exception("No such file %s" % finput) @@ -2808,7 +2837,7 @@ def read(self, finput, consistency=True): name = name.lower().strip() if name not in self: #looks like an entry added by a user -> add it nicely - self.add_unknown_entry(name, value) + self.add_unknown_entry(name, value, unknown_warning) else: self.set( name, value, user=True) # parameter not set in the run_card can be set to compatiblity value @@ -2820,7 +2849,7 @@ def read(self, finput, consistency=True): logger.warning(str(error)) else: raise - def add_unknown_entry(self, name, value): + def add_unknown_entry(self, name, value, unknow_warning): """function to add an entry to the run_card when the associated parameter does not exists. This is based on the guess_entry_fromname for the various syntax providing input. This then call add_param accordingly. @@ -2859,7 +2888,7 @@ def add_unknown_entry(self, name, value): raise Exception("dictionary need to have at least one entry") default['dict']['__type__'] = default[self.guess_type_from_value(default_value[0])] - if name not in RunCard.donewarning: + if name not in RunCard.donewarning and unknow_warning: logger.warning("Found unexpected entry in run_card: \"%s\" with value \"%s\".\n"+\ " The type was assigned to %s. \n"+\ " The definition of that variable will %sbe automatically added to fortran file %s\n"+\ @@ -2897,7 +2926,16 @@ def valid_line(self, line, tmp): return False else: return True - + + + def reset_simd(self, old_value, new_value, name, *args, **opts): + raise Exception('pass in reset simd') + + def make_clean(self,old_value, new_value, name, dir): + raise Exception('pass make clean for ', dir) + + def make_Ptouch(self,old_value, new_value, name, reset): + raise Exception('pass Ptouch for ', reset) def write(self, output_file, template=None, python_template=False, write_hidden=False, template_options=None, **opt): @@ -3072,6 +3110,77 @@ def write(self, output_file, template=None, python_template=False, else: output_file.write(text) + def get_last_value_include(self, output_dir): + """For paraeter in self.fct_mod + parse the associate inc file to get the value of the previous run. + We return a dictionary {name: old_value} + if inc file does not exist we will return the current value (i.e. set has no change) + """ + + #remember that + # default_include_file is a class variable + # self.includepath is on the form include_path : [list of param ] + out = {} + + # setup inc_to_parse to be like self.includepath (include_path : [list of param ]) + # BUT only containing the parameter that need to be tracked for the fct_mod option + inc_to_parse = {} + for inc_file, params in self.includepath.items(): + if not inc_file: + continue + if any(p in params for p in self.fct_mod): + inc_to_parse[inc_file] = [name for name in self.includepath[inc_file] if name in self.fct_mod] + + # now loop over the files and ask the associate function + for inc_file, params in inc_to_parse.items(): + if inc_file is True: + inc_file = self.default_include_file + out.update(self.get_value_from_include(inc_file, params, output_dir)) + + return out + + def get_value_from_include(self, path, list_of_params, output_dir): + """for a given include file return the current value of the requested parameter + return a dictionary {name: value} + if path does not exists return the current value in self for all parameter""" + + #WARNING DOES NOT HANDLE LIST/DICT so far + + # handle case where file is missing + if not os.path.exists(pjoin(output_dir,path)): + misc.sprint("include file not existing", pjoin(output_dir,path)) + out = {name: self[name] for name in list_of_params} + + with open(pjoin(output_dir,path), 'r') as fsock: + text = fsock.read() + + for name in list_of_params: + misc.sprint(name, name in self.fortran_name) + misc.sprint(self.fortran_name[name] if name in self.fortran_name[name] else name) + to_track = [self.fortran_name[name] if name in self.fortran_name else name for name in list_of_params] + pattern = re.compile(r"\(?(%(names)s)\s?=\s?([^)]*)\)?" % {'names':'|'.join(to_track)}, re.I) + out = dict(pattern.findall(text)) + misc.sprint(out) + for name in list_of_params: + if name in self.fortran_name: + value = out[self.fortran_name[name]] + del out[self.fortran_name[name]] + out[name] = value + + for name, value in out.items(): + try: + out[name] = self.format_variable(value, type(self[name])) + except Exception: + continue + + if len(out) != len(list_of_params): + misc.sprint(list_of_params) + misc.sprint(to_track) + misc.sprint(self.fortran_name) + misc.sprint(text) + raise Exception + return out + def get_default(self, name, default=None, log_level=None): """return self[name] if exist otherwise default. log control if we @@ -3362,71 +3471,93 @@ def write_include_file(self, output_dir, output_file=None): #ensusre that system only parameter are correctly set self.update_system_parameter_for_include() + value_in_old_include = self.get_last_value_include(output_dir) + + if output_dir: self.write_autodef(output_dir, output_file=None) # check/fix status of customised functions self.edit_dummy_fct_from_file(self["custom_fcts"], os.path.dirname(output_dir)) for incname in self.includepath: - if incname is True: - pathinc = self.default_include_file - elif incname is False: - continue - else: - pathinc = incname + self.write_one_include_file(output_dir, incname, output_file) + + for name,value in value_in_old_include.items(): + if value != self[name]: + self.fct_mod[name][0](value, self[name], name, *self.fct_mod[name][1],**self.fct_mod[name][2]) - if output_file: - fsock = output_file + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + + misc.sprint(incname) + if incname is True: + pathinc = self.default_include_file + elif incname is False: + return + else: + pathinc = incname + + if output_file: + fsock = output_file + else: + fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) + + + for key in self.includepath[incname]: + #define the fortran name + if key in self.fortran_name: + fortran_name = self.fortran_name[key] else: - fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) - for key in self.includepath[incname]: - #define the fortran name - if key in self.fortran_name: - fortran_name = self.fortran_name[key] + fortran_name = key + + if incname in self.include_as_parameter: + fsock.writelines('INTEGER %s\n' % fortran_name) + #get the value with warning if the user didn't set it + value = self.get_default(key) + if hasattr(self, 'mod_inc_%s' % key): + value = getattr(self, 'mod_inc_%s' % key)(value) + # Special treatment for strings containing a list of + # strings. Convert it to a list of strings + if isinstance(value, list): + # in case of a list, add the length of the list as 0th + # element in fortran. Only in case of integer or float + # list (not for bool nor string) + targettype = self.list_parameter[key] + if targettype is bool: + pass + elif targettype is int: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) + fsock.writelines(line) + elif targettype is float: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) + fsock.writelines(line) + # output the rest of the list in fortran + for i,v in enumerate(value): + line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) + fsock.writelines(line) + elif isinstance(value, dict): + for fortran_name, onevalue in value.items(): + line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) + fsock.writelines(line) + elif isinstance(incname,str) and 'compile' in incname: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, value) else: - fortran_name = key - - #get the value with warning if the user didn't set it - value = self.get_default(key) - if hasattr(self, 'mod_inc_%s' % key): - value = getattr(self, 'mod_inc_%s' % key)(value) - # Special treatment for strings containing a list of - # strings. Convert it to a list of strings - if isinstance(value, list): - # in case of a list, add the length of the list as 0th - # element in fortran. Only in case of integer or float - # list (not for bool nor string) - targettype = self.list_parameter[key] - if targettype is bool: - pass - elif targettype is int: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) - fsock.writelines(line) - elif targettype is float: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) - fsock.writelines(line) - # output the rest of the list in fortran - for i,v in enumerate(value): - line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) - fsock.writelines(line) - elif isinstance(value, dict): - for fortran_name, onevalue in value.items(): - line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) - fsock.writelines(line) - elif isinstance(incname,str) and 'compile' in incname: line = '%s = %s \n' % (fortran_name, value) - fsock.write(line) + fsock.write(line) + else: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, self.f77_formatting(value)) else: line = '%s = %s \n' % (fortran_name, self.f77_formatting(value)) - fsock.writelines(line) - if not output_file: - fsock.close() - path = pjoin(output_dir,pathinc) - if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): - files.mv(path+'.tmp', path) - else: - os.remove(path+'.tmp') - + fsock.writelines(line) + if not output_file: + fsock.close() + path = pjoin(output_dir,pathinc) + if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): + files.mv(path+'.tmp', path) + else: + os.remove(path+'.tmp') def write_autodef(self, output_dir, output_file=None): """ Add the definition of variable to run.inc if the variable is set with autodef. @@ -3765,13 +3896,14 @@ def remove_all_cut(self): %(tmin_for_channel)s = tmin_for_channel ! limit the non-singular reach of --some-- channel of integration related to T-channel diagram (value between -1 and 0), -1 is no impact %(survey_splitting)s = survey_splitting ! for loop-induced control how many core are used at survey for the computation of a single iteration. %(survey_nchannel_per_job)s = survey_nchannel_per_job ! control how many Channel are integrated inside a single job on cluster/multicore - %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) + %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) #********************************************************************* -# Compilation flag. No automatic re-compilation (need manual "make clean" in Source) +# Compilation flag. #********************************************************************* %(global_flag)s = global_flag ! fortran optimization flag use for the all code. %(aloha_flag)s = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' %(matrix_flag)s = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' + %(vector_size)s = vector_size ! size designed for SIMD/OpenMP/GPU (number of events in lockstep) """ template_off = '# To see advanced option for Phase-Space optimization: type "update psoptim"' @@ -3927,9 +4059,12 @@ class RunCardLO(RunCard): "get_dummy_x1_x2": pjoin("SubProcesses","dummy_fct.f"), "dummy_boostframe": pjoin("SubProcesses","dummy_fct.f"), "user_dynamical_scale": pjoin("SubProcesses","dummy_fct.f"), + "bias_wgt_custom": pjoin("SubProcesses","dummy_fct.f"), "user_": pjoin("SubProcesses","dummy_fct.f") # all function starting by user will be added to that file } + include_as_parameter = ['vector.inc'] + if MG5DIR: default_run_card = pjoin(MG5DIR, "internal", "default_run_card_lo.dat") @@ -4163,10 +4298,15 @@ def default_setup(self): self.add_param('hel_splitamp', True, hidden=True, include=False, comment='decide if amplitude aloha call can be splitted in two or not when doing helicity per helicity optimization.') self.add_param('hel_zeroamp', True, hidden=True, include=False, comment='decide if zero amplitude can be removed from the computation when doing helicity per helicity optimization.') self.add_param('SDE_strategy', 1, allowed=[1,2], fortran_name="sde_strat", comment="decide how Multi-channel should behaves \"1\" means full single diagram enhanced (hep-ph/0208156), \"2\" use the product of the denominator") - self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check') - self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math') - self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3') - + self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check', + fct_mod=(self.make_clean, ('Source'),{})) + self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math', + fct_mod=(self.make_clean, ('Source/DHELAS'),{})) + self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3', + fct_mod=(self.make_Ptouch, ('matrix'),{})) + self.add_param('vector_size', 1, include='vector.inc', hidden=True, comment='lockstep size for parralelism run', + fortran_name='VECSIZE_MEMMAX', fct_mod=(self.reset_simd,(),{})) + # parameter allowing to define simple cut via the pdg # Special syntax are related to those. (can not be edit directly) self.add_param('pt_min_pdg',{'__type__':0.}, include=False, cut=True) @@ -4188,8 +4328,7 @@ def default_setup(self): self.add_param('mxxmin4pdg',[-1.], system=True) self.add_param('mxxpart_antipart', [False], system=True) - # CUDACPP parameters - self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + def check_validity(self): """ """ diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/check_param_card.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/check_param_card.py index fe874a06a4..71089d7480 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/check_param_card.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/check_param_card.py @@ -85,7 +85,7 @@ def load_str(self, text): self.value= ' '.join(data[len(self.lhacode):]) # check that lhacode are the first entry otherwise return invalid param. if ' '.join([str(i) for i in self.lhacode]) != ' '.join(data[:len(self.lhacode)]): - raise InvalidParam + raise InvalidParam("line was %s" % str(data)) else: self.value = data[-1] diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py index 5d0187e3fa..14c7f310dc 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py @@ -749,13 +749,15 @@ def writeRunWeb(me_dir): class RunWebHandling(object): - def __init__(self, me_dir, crashifpresent=True, warnifpresent=True): + def __init__(self, me_dir, crashifpresent=True, warnifpresent=True, force_run=False): """raise error if RunWeb already exists me_dir is the directory where the write RunWeb""" self.remove_run_web = True self.me_dir = me_dir - + if force_run: + self.remove_run_web = False + return if crashifpresent or warnifpresent: if os.path.exists(pjoin(me_dir, 'RunWeb')): pid = open(pjoin(me_dir, 'RunWeb')).read() @@ -6574,7 +6576,7 @@ def reask(self, *args, **opt): fail_due_to_format = 0 #parameter to avoid infinite loop def postcmd(self, stop, line): - if line not in [None, '0', 'done', '']: + if line not in [None, '0', 'done', '',0]: ending_question = cmd.OneLinePathCompletion.postcmd(self,stop,line) else: ending_question = True @@ -7533,7 +7535,8 @@ def open_file(self, answer): else: raise if time.time() - start < .5: - self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y') + self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y', + timeout=False) self.reload_card(path) def reload_card(self, path): diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/extended_cmd.py index a6a8609dce..2f37070580 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/extended_cmd.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/extended_cmd.py @@ -1108,9 +1108,12 @@ def ask(self, question, default, choices=[], path_msg=None, if alias: choices += list(alias.keys()) + + question_instance = obj(question, allow_arg=choices, default=default, mother_interface=self, **opt) - + if fct_timeout is None: + fct_timeout = lambda x: question_instance.postcmd(x, default) if x and default else False if first_cmd: if isinstance(first_cmd, str): question_instance.onecmd(first_cmd) @@ -2271,6 +2274,9 @@ def postcmd(self, stop, line): if n: self.default(line) return self.postcmd(stop, line) + elif self.value is None and line: + self.default(line) + return self.postcmd(stop, line) if not self.casesensitive: for ans in self.allow_arg: if ans.lower() == self.value.lower(): diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py index 3b8ec31215..a88d60b282 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py @@ -154,10 +154,15 @@ def get_helicity(self, to_submit=True, clean=True): p = misc.Popen(['./gensym'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=Pdir) #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts + (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - + try: + nb_channel = max([math.floor(float(d)) for d in stdout.split()]) + except Exception as error: + misc.sprint(stdout, 'no channel or error for %s' % Pdir) + continue + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py index f4c9cb6334..c9d1c7706a 100644 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py @@ -1,6 +1,12 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: O. Mattelaer (Aug 2023) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, A. Valassi (2023) for the MG5aMC CUDACPP plugin. import logging - +import os +import subprocess +pjoin = os.path.join logger = logging.getLogger('cmdprint') # for stdout try: @@ -9,20 +15,23 @@ import internal.madevent_interface as madevent_interface import internal.misc as misc import internal.extended_cmd as extended_cmd + import internal.banner as banner_mod else: import madgraph.interface.madevent_interface as madevent_interface import madgraph.various.misc as misc import madgraph.interface.extended_cmd as extended_cmd + import madgraph.various.banner as banner_mod class CPPMEInterface(madevent_interface.MadEventCmdShell): - def compile(self, *args, **opts): """ """ import multiprocessing if not self.options['nb_core'] or self.options['nb_core'] == 'None': self.options['nb_core'] = multiprocessing.cpu_count() - if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + import pathlib + import os + pjoin = os.path.join cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend == 'FORTRAN': @@ -36,5 +45,50 @@ def compile(self, *args, **opts): return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) - + +class CPPRunCard(banner_mod.RunCardLO): + def reset_simd(self, old_value, new_value, name): + if not hasattr(self, 'path'): + logger.warning('WARNING! CPPRunCard instance has no attribute path') + return + ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') + if name == "vector_size" and new_value <= int(old_value): + # code can handle the new size -> do not recompile + return + Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') + subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + def plugin_input(self, finput): + return + + def default_setup(self): + super().default_setup() + self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + if incname == "vector.inc" and 'vector_size' not in self.user_set: + return + super().write_one_include_file(output_dir, incname, output_file) + + def check_validity(self): + """ensure that PLUGIN information are consistent""" + super().check_validity() + if self['SDE_strategy'] != 1: + logger.warning('SDE_strategy different of 1 is not supported with SMD/GPU mode') + self['sde_strategy'] = 1 + if self['hel_recycling']: + self['hel_recycling'] = False + +class GPURunCard(CPPRunCard): + def default_setup(self): + super(CPPRunCard, self).default_setup() + self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False) + +#class CUDACPPRunCard(CPPRunCard): +# def default_setup(self): +# super(CPPRunCard, self).default_setup() +# self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + MEINTERFACE = CPPMEInterface +RunCard = CPPRunCard diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py index 920e07a926..d722702891 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py @@ -3796,9 +3796,11 @@ def do_combine_events(self, line): if self.run_card['bias_module'].lower() not in ['dummy', 'none'] and nb_event: self.correct_bias() - + elif self.run_card['custom_fcts']: + self.correct_bias() + logger.info("combine events done in %s", time.time()-start) - + self.to_store.append('event') diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/madevent b/epochX/cudacpp/gg_ttggg.mad/bin/madevent index 10b6a71fa2..dff9711b73 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/madevent +++ b/epochX/cudacpp/gg_ttggg.mad/bin/madevent @@ -173,6 +173,10 @@ if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): launch_interface = launch_plugin.MEINTERFACE +#Source use this executable for compilation always allow it +force_run = False +if (args and args[0] == 'treatcards'): + force_run=True # Call the cmd interface main loop try: @@ -180,7 +184,7 @@ try: launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) - with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): + with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), force_run=force_run): if (args and os.path.isfile(args[0])): # They are an input file input_file = args[0] diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 93a7080330..df96dd2b8c 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005595684051513672  +DEBUG: model prefixing takes 0.005476713180541992  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.844 s +1 processes with 1240 diagrams generated in 1.864 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.495 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.446 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.349 s +ALOHA: aloha creates 5 routines in 0.342 s VVV1 VVV1 FFV1 @@ -204,9 +204,10 @@ FileWriter for / FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m12.991s -user 0m12.831s -sys 0m0.101s +real 0m12.796s +user 0m12.647s +sys 0m0.093s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 470e92412b..c8e3576630 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005367279052734375  +DEBUG: model prefixing takes 0.0055692195892333984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.076 s +8 processes with 40 diagrams generated in 0.078 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -197,7 +197,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -206,15 +206,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -223,23 +223,23 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.214 s +Wrote files for 32 helas calls in 0.215 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.144 s +ALOHA: aloha creates 2 routines in 0.145 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.135 s FFV1 FFV1 FFV1 @@ -259,8 +259,46 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: self.in_madevent_mode =  True [output.py at line 207]  +DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +patching file Source/genps.inc +patching file Source/makefile +patching file SubProcesses/makefile +patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 385 (offset 5 lines). +patching file bin/internal/madevent_interface.py +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +patching file auto_dsig1.f +Hunk #1 succeeded at 527 (offset 58 lines). +patching file driver.f +patching file matrix1.f +Hunk #1 succeeded at 75 (offset 3 lines). +Hunk #2 succeeded at 162 (offset 19 lines). +Hunk #3 succeeded at 247 (offset 26 lines). +Hunk #4 succeeded at 281 (offset 32 lines). +Hunk #5 succeeded at 326 (offset 32 lines). +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +patching file auto_dsig1.f +Hunk #1 succeeded at 527 (offset 58 lines). +patching file driver.f +patching file matrix1.f +Hunk #1 succeeded at 75 (offset 3 lines). +Hunk #2 succeeded at 162 (offset 19 lines). +Hunk #3 succeeded at 247 (offset 26 lines). +Hunk #4 succeeded at 281 (offset 32 lines). +Hunk #5 succeeded at 326 (offset 32 lines). +DEBUG: p.returncode =  0 [output.py at line 233]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. +Type "launch" to generate events from this process, or see +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README +Run "open index.html" to see more information about this process. +quit + +real 0m1.935s +user 0m1.699s +sys 0m0.219s ************************************************************ * * * W E L C O M E to * @@ -288,8 +326,7 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run -run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP +WARNING! CPPRunCard instance has no attribute path quit INFO: launch in debug mode @@ -323,41 +360,3 @@ treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common -patching file Source/genps.inc -patching file Source/makefile -patching file SubProcesses/makefile -patching file Source/make_opts -patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). -patching file bin/internal/gen_ximprove.py -patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 -patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). -patching file driver.f -patching file matrix1.f -Hunk #1 succeeded at 75 (offset 3 lines). -Hunk #2 succeeded at 162 (offset 19 lines). -Hunk #3 succeeded at 247 (offset 26 lines). -Hunk #4 succeeded at 281 (offset 32 lines). -Hunk #5 succeeded at 326 (offset 32 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 -patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). -patching file driver.f -patching file matrix1.f -Hunk #1 succeeded at 75 (offset 3 lines). -Hunk #2 succeeded at 162 (offset 19 lines). -Hunk #3 succeeded at 247 (offset 26 lines). -Hunk #4 succeeded at 281 (offset 32 lines). -Hunk #5 succeeded at 326 (offset 32 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. -Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README -Run "open index.html" to see more information about this process. -quit - -real 0m2.575s -user 0m2.217s -sys 0m0.315s diff --git a/epochX/cudacpp/gq_ttq.mad/Source/make_opts b/epochX/cudacpp/gq_ttq.mad/Source/make_opts index bd3c24228d..57f5f7bb96 100644 --- a/epochX/cudacpp/gq_ttq.mad/Source/make_opts +++ b/epochX/cudacpp/gq_ttq.mad/Source/make_opts @@ -1,17 +1,12 @@ -pdlabel1= -pdlabel2= -lhapdf= -PYTHIA8_PATH=NotInstalled -MG5AMC_VERSION=3.5.0_lo_vect GLOBAL_FLAG=-O3 -ffast-math -fbounds-check -ALOHA_FLAG= -MATRIX_FLAG= DEFAULT_CPP_COMPILER=g++ MACFLAG= STDLIB=-lstdc++ STDLIB_FLAG= DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime +PYTHIA8_PATH=NotInstalled #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gq_ttq.mad/Source/makefile b/epochX/cudacpp/gq_ttq.mad/Source/makefile index dbe08b846e..00c73099a0 100644 --- a/epochX/cudacpp/gq_ttq.mad/Source/makefile +++ b/epochX/cudacpp/gq_ttq.mad/Source/makefile @@ -136,5 +136,7 @@ cleanSource: clean: cleanSource for i in `ls -d ../SubProcesses/P*`; do cd $$i; make clean; cd -; done; -cleanall: cleanSource +cleanavx: + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; +cleanall: cleanSource # THIS IS THE ONE for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/dummy_fct.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/dummy_fct.f index 076cf29d67..4f7a204b8f 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/dummy_fct.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/dummy_fct.f @@ -32,7 +32,7 @@ logical FUNCTION dummy_cuts(P) LOGICAL IS_A_NU(NEXTERNAL),IS_HEAVY(NEXTERNAL) logical do_cuts(nexternal) COMMON /TO_SPECISA/IS_A_J,IS_A_A,IS_A_L,IS_A_B,IS_A_NU,IS_HEAVY, - . IS_A_ONIUM, do_cuts + & IS_A_ONIUM, do_cuts dummy_cuts=.true. @@ -118,15 +118,16 @@ double precision function user_dynamical_scale(P) C ************************************************************ -C default for the library implementing a dummt bias function +C default for the library implementing a dummy bias function C ************************************************************ subroutine bias_wgt_custom(p, original_weight, bias_weight) - implicit none + implicit none C C Parameters C include 'nexternal.inc' -C + +C C Arguments C double precision p(0:3, nexternal) @@ -161,3 +162,4 @@ subroutine bias_wgt_custom(p, original_weight, bias_weight) return end subroutine bias_wgt_custom + diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py index e9f421ae5f..824815f47b 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py @@ -537,7 +537,7 @@ def charge_card(self, tag): self.param_card = param_card_reader.ParamCard(param_card) return self.param_card elif tag == 'mgruncard': - self.run_card = RunCard(self[tag]) + self.run_card = RunCard(self[tag], unknown_warning=False) return self.run_card elif tag == 'mg5proccard': proc_card = self[tag].split('\n') @@ -2625,6 +2625,7 @@ class RunCard(ConfigFile): default_include_file = 'run_card.inc' default_autodef_file = 'run.inc' donewarning = [] + include_as_parameter = [] def plugin_input(self, finput): @@ -2671,18 +2672,40 @@ def __new__(cls, finput=None, **opt): elif isinstance(finput, cls): target_class = finput.__class__ elif isinstance(finput, str): + path = finput if '\n' not in finput: finput = open(finput).read() if 'req_acc_FO' in finput: target_class = RunCardNLO else: target_class = RunCardLO + if MADEVENT and os.path.exists(pjoin(MEDIR, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(MEDIR, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): + misc.sprint('try to use plugin class') + pydir = path.replace('run_card.dat', '../bin/internal/') + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + else: return None target_class.fill_post_set_from_blocks() - - return super(RunCard, cls).__new__(target_class, finput, **opt) + out = super(RunCard, cls).__new__(target_class, finput, **opt) + if not isinstance(out, RunCard): #should not happen but in presence of missmatch of library loaded. + out.__init__(finput, **opt) + return out else: return super(RunCard, cls).__new__(cls, finput, **opt) @@ -2710,7 +2733,7 @@ def __init__(self, *args, **opts): self.system_default = {} self.display_block = [] # set some block to be displayed - + self.fct_mod = {} # {param: (fct_pointer, *argument, **opts)} self.cut_class = {} self.warned=False @@ -2747,7 +2770,7 @@ def get_lepton_densities(cls): def add_param(self, name, value, fortran_name=None, include=True, hidden=False, legacy=False, cut=False, system=False, sys_default=None, - autodef=False, + autodef=False, fct_mod=None, **opts): """ add a parameter to the card. value is the default value and defines the type (int/float/bool/str) of the input. @@ -2761,6 +2784,7 @@ def add_param(self, name, value, fortran_name=None, include=True, If a path (Source/PDF/pdf.inc) the definition will be added within that file Default is False (does not add the definition) entry added in the run_card will automatically have this on True. + fct_mod: defines a function to run if the parameter is modify in the include file options of **opts: - allowed: list of valid options. '*' means anything else should be allowed. empty list means anything possible as well. @@ -2785,8 +2809,12 @@ def add_param(self, name, value, fortran_name=None, include=True, if autodef: self.definition_path[autodef].append(name) self.user_set.add(name) + # function to trigger if a value is modified in the include file + # main target is action to force correct recompilation (like for compilation flag/...) + if fct_mod: + self.fct_mod[name] = fct_mod - def read(self, finput, consistency=True): + def read(self, finput, consistency=True, unknown_warning=True): """Read the input file, this can be a path to a file, a file object, a str with the content of the file.""" @@ -2794,6 +2822,7 @@ def read(self, finput, consistency=True): if "\n" in finput: finput = finput.split('\n') elif os.path.isfile(finput): + self.path = finput finput = open(finput) else: raise Exception("No such file %s" % finput) @@ -2808,7 +2837,7 @@ def read(self, finput, consistency=True): name = name.lower().strip() if name not in self: #looks like an entry added by a user -> add it nicely - self.add_unknown_entry(name, value) + self.add_unknown_entry(name, value, unknown_warning) else: self.set( name, value, user=True) # parameter not set in the run_card can be set to compatiblity value @@ -2820,7 +2849,7 @@ def read(self, finput, consistency=True): logger.warning(str(error)) else: raise - def add_unknown_entry(self, name, value): + def add_unknown_entry(self, name, value, unknow_warning): """function to add an entry to the run_card when the associated parameter does not exists. This is based on the guess_entry_fromname for the various syntax providing input. This then call add_param accordingly. @@ -2859,7 +2888,7 @@ def add_unknown_entry(self, name, value): raise Exception("dictionary need to have at least one entry") default['dict']['__type__'] = default[self.guess_type_from_value(default_value[0])] - if name not in RunCard.donewarning: + if name not in RunCard.donewarning and unknow_warning: logger.warning("Found unexpected entry in run_card: \"%s\" with value \"%s\".\n"+\ " The type was assigned to %s. \n"+\ " The definition of that variable will %sbe automatically added to fortran file %s\n"+\ @@ -2897,7 +2926,16 @@ def valid_line(self, line, tmp): return False else: return True - + + + def reset_simd(self, old_value, new_value, name, *args, **opts): + raise Exception('pass in reset simd') + + def make_clean(self,old_value, new_value, name, dir): + raise Exception('pass make clean for ', dir) + + def make_Ptouch(self,old_value, new_value, name, reset): + raise Exception('pass Ptouch for ', reset) def write(self, output_file, template=None, python_template=False, write_hidden=False, template_options=None, **opt): @@ -3072,6 +3110,77 @@ def write(self, output_file, template=None, python_template=False, else: output_file.write(text) + def get_last_value_include(self, output_dir): + """For paraeter in self.fct_mod + parse the associate inc file to get the value of the previous run. + We return a dictionary {name: old_value} + if inc file does not exist we will return the current value (i.e. set has no change) + """ + + #remember that + # default_include_file is a class variable + # self.includepath is on the form include_path : [list of param ] + out = {} + + # setup inc_to_parse to be like self.includepath (include_path : [list of param ]) + # BUT only containing the parameter that need to be tracked for the fct_mod option + inc_to_parse = {} + for inc_file, params in self.includepath.items(): + if not inc_file: + continue + if any(p in params for p in self.fct_mod): + inc_to_parse[inc_file] = [name for name in self.includepath[inc_file] if name in self.fct_mod] + + # now loop over the files and ask the associate function + for inc_file, params in inc_to_parse.items(): + if inc_file is True: + inc_file = self.default_include_file + out.update(self.get_value_from_include(inc_file, params, output_dir)) + + return out + + def get_value_from_include(self, path, list_of_params, output_dir): + """for a given include file return the current value of the requested parameter + return a dictionary {name: value} + if path does not exists return the current value in self for all parameter""" + + #WARNING DOES NOT HANDLE LIST/DICT so far + + # handle case where file is missing + if not os.path.exists(pjoin(output_dir,path)): + misc.sprint("include file not existing", pjoin(output_dir,path)) + out = {name: self[name] for name in list_of_params} + + with open(pjoin(output_dir,path), 'r') as fsock: + text = fsock.read() + + for name in list_of_params: + misc.sprint(name, name in self.fortran_name) + misc.sprint(self.fortran_name[name] if name in self.fortran_name[name] else name) + to_track = [self.fortran_name[name] if name in self.fortran_name else name for name in list_of_params] + pattern = re.compile(r"\(?(%(names)s)\s?=\s?([^)]*)\)?" % {'names':'|'.join(to_track)}, re.I) + out = dict(pattern.findall(text)) + misc.sprint(out) + for name in list_of_params: + if name in self.fortran_name: + value = out[self.fortran_name[name]] + del out[self.fortran_name[name]] + out[name] = value + + for name, value in out.items(): + try: + out[name] = self.format_variable(value, type(self[name])) + except Exception: + continue + + if len(out) != len(list_of_params): + misc.sprint(list_of_params) + misc.sprint(to_track) + misc.sprint(self.fortran_name) + misc.sprint(text) + raise Exception + return out + def get_default(self, name, default=None, log_level=None): """return self[name] if exist otherwise default. log control if we @@ -3362,71 +3471,93 @@ def write_include_file(self, output_dir, output_file=None): #ensusre that system only parameter are correctly set self.update_system_parameter_for_include() + value_in_old_include = self.get_last_value_include(output_dir) + + if output_dir: self.write_autodef(output_dir, output_file=None) # check/fix status of customised functions self.edit_dummy_fct_from_file(self["custom_fcts"], os.path.dirname(output_dir)) for incname in self.includepath: - if incname is True: - pathinc = self.default_include_file - elif incname is False: - continue - else: - pathinc = incname + self.write_one_include_file(output_dir, incname, output_file) + + for name,value in value_in_old_include.items(): + if value != self[name]: + self.fct_mod[name][0](value, self[name], name, *self.fct_mod[name][1],**self.fct_mod[name][2]) - if output_file: - fsock = output_file + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + + misc.sprint(incname) + if incname is True: + pathinc = self.default_include_file + elif incname is False: + return + else: + pathinc = incname + + if output_file: + fsock = output_file + else: + fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) + + + for key in self.includepath[incname]: + #define the fortran name + if key in self.fortran_name: + fortran_name = self.fortran_name[key] else: - fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) - for key in self.includepath[incname]: - #define the fortran name - if key in self.fortran_name: - fortran_name = self.fortran_name[key] + fortran_name = key + + if incname in self.include_as_parameter: + fsock.writelines('INTEGER %s\n' % fortran_name) + #get the value with warning if the user didn't set it + value = self.get_default(key) + if hasattr(self, 'mod_inc_%s' % key): + value = getattr(self, 'mod_inc_%s' % key)(value) + # Special treatment for strings containing a list of + # strings. Convert it to a list of strings + if isinstance(value, list): + # in case of a list, add the length of the list as 0th + # element in fortran. Only in case of integer or float + # list (not for bool nor string) + targettype = self.list_parameter[key] + if targettype is bool: + pass + elif targettype is int: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) + fsock.writelines(line) + elif targettype is float: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) + fsock.writelines(line) + # output the rest of the list in fortran + for i,v in enumerate(value): + line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) + fsock.writelines(line) + elif isinstance(value, dict): + for fortran_name, onevalue in value.items(): + line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) + fsock.writelines(line) + elif isinstance(incname,str) and 'compile' in incname: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, value) else: - fortran_name = key - - #get the value with warning if the user didn't set it - value = self.get_default(key) - if hasattr(self, 'mod_inc_%s' % key): - value = getattr(self, 'mod_inc_%s' % key)(value) - # Special treatment for strings containing a list of - # strings. Convert it to a list of strings - if isinstance(value, list): - # in case of a list, add the length of the list as 0th - # element in fortran. Only in case of integer or float - # list (not for bool nor string) - targettype = self.list_parameter[key] - if targettype is bool: - pass - elif targettype is int: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) - fsock.writelines(line) - elif targettype is float: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) - fsock.writelines(line) - # output the rest of the list in fortran - for i,v in enumerate(value): - line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) - fsock.writelines(line) - elif isinstance(value, dict): - for fortran_name, onevalue in value.items(): - line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) - fsock.writelines(line) - elif isinstance(incname,str) and 'compile' in incname: line = '%s = %s \n' % (fortran_name, value) - fsock.write(line) + fsock.write(line) + else: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, self.f77_formatting(value)) else: line = '%s = %s \n' % (fortran_name, self.f77_formatting(value)) - fsock.writelines(line) - if not output_file: - fsock.close() - path = pjoin(output_dir,pathinc) - if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): - files.mv(path+'.tmp', path) - else: - os.remove(path+'.tmp') - + fsock.writelines(line) + if not output_file: + fsock.close() + path = pjoin(output_dir,pathinc) + if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): + files.mv(path+'.tmp', path) + else: + os.remove(path+'.tmp') def write_autodef(self, output_dir, output_file=None): """ Add the definition of variable to run.inc if the variable is set with autodef. @@ -3765,13 +3896,14 @@ def remove_all_cut(self): %(tmin_for_channel)s = tmin_for_channel ! limit the non-singular reach of --some-- channel of integration related to T-channel diagram (value between -1 and 0), -1 is no impact %(survey_splitting)s = survey_splitting ! for loop-induced control how many core are used at survey for the computation of a single iteration. %(survey_nchannel_per_job)s = survey_nchannel_per_job ! control how many Channel are integrated inside a single job on cluster/multicore - %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) + %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) #********************************************************************* -# Compilation flag. No automatic re-compilation (need manual "make clean" in Source) +# Compilation flag. #********************************************************************* %(global_flag)s = global_flag ! fortran optimization flag use for the all code. %(aloha_flag)s = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' %(matrix_flag)s = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' + %(vector_size)s = vector_size ! size designed for SIMD/OpenMP/GPU (number of events in lockstep) """ template_off = '# To see advanced option for Phase-Space optimization: type "update psoptim"' @@ -3927,9 +4059,12 @@ class RunCardLO(RunCard): "get_dummy_x1_x2": pjoin("SubProcesses","dummy_fct.f"), "dummy_boostframe": pjoin("SubProcesses","dummy_fct.f"), "user_dynamical_scale": pjoin("SubProcesses","dummy_fct.f"), + "bias_wgt_custom": pjoin("SubProcesses","dummy_fct.f"), "user_": pjoin("SubProcesses","dummy_fct.f") # all function starting by user will be added to that file } + include_as_parameter = ['vector.inc'] + if MG5DIR: default_run_card = pjoin(MG5DIR, "internal", "default_run_card_lo.dat") @@ -4163,10 +4298,15 @@ def default_setup(self): self.add_param('hel_splitamp', True, hidden=True, include=False, comment='decide if amplitude aloha call can be splitted in two or not when doing helicity per helicity optimization.') self.add_param('hel_zeroamp', True, hidden=True, include=False, comment='decide if zero amplitude can be removed from the computation when doing helicity per helicity optimization.') self.add_param('SDE_strategy', 1, allowed=[1,2], fortran_name="sde_strat", comment="decide how Multi-channel should behaves \"1\" means full single diagram enhanced (hep-ph/0208156), \"2\" use the product of the denominator") - self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check') - self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math') - self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3') - + self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check', + fct_mod=(self.make_clean, ('Source'),{})) + self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math', + fct_mod=(self.make_clean, ('Source/DHELAS'),{})) + self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3', + fct_mod=(self.make_Ptouch, ('matrix'),{})) + self.add_param('vector_size', 1, include='vector.inc', hidden=True, comment='lockstep size for parralelism run', + fortran_name='VECSIZE_MEMMAX', fct_mod=(self.reset_simd,(),{})) + # parameter allowing to define simple cut via the pdg # Special syntax are related to those. (can not be edit directly) self.add_param('pt_min_pdg',{'__type__':0.}, include=False, cut=True) @@ -4188,8 +4328,7 @@ def default_setup(self): self.add_param('mxxmin4pdg',[-1.], system=True) self.add_param('mxxpart_antipart', [False], system=True) - # CUDACPP parameters - self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + def check_validity(self): """ """ diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/check_param_card.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/check_param_card.py index fe874a06a4..71089d7480 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/check_param_card.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/check_param_card.py @@ -85,7 +85,7 @@ def load_str(self, text): self.value= ' '.join(data[len(self.lhacode):]) # check that lhacode are the first entry otherwise return invalid param. if ' '.join([str(i) for i in self.lhacode]) != ' '.join(data[:len(self.lhacode)]): - raise InvalidParam + raise InvalidParam("line was %s" % str(data)) else: self.value = data[-1] diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py index 5d0187e3fa..14c7f310dc 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py @@ -749,13 +749,15 @@ def writeRunWeb(me_dir): class RunWebHandling(object): - def __init__(self, me_dir, crashifpresent=True, warnifpresent=True): + def __init__(self, me_dir, crashifpresent=True, warnifpresent=True, force_run=False): """raise error if RunWeb already exists me_dir is the directory where the write RunWeb""" self.remove_run_web = True self.me_dir = me_dir - + if force_run: + self.remove_run_web = False + return if crashifpresent or warnifpresent: if os.path.exists(pjoin(me_dir, 'RunWeb')): pid = open(pjoin(me_dir, 'RunWeb')).read() @@ -6574,7 +6576,7 @@ def reask(self, *args, **opt): fail_due_to_format = 0 #parameter to avoid infinite loop def postcmd(self, stop, line): - if line not in [None, '0', 'done', '']: + if line not in [None, '0', 'done', '',0]: ending_question = cmd.OneLinePathCompletion.postcmd(self,stop,line) else: ending_question = True @@ -7533,7 +7535,8 @@ def open_file(self, answer): else: raise if time.time() - start < .5: - self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y') + self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y', + timeout=False) self.reload_card(path) def reload_card(self, path): diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/extended_cmd.py index a6a8609dce..2f37070580 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/extended_cmd.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/extended_cmd.py @@ -1108,9 +1108,12 @@ def ask(self, question, default, choices=[], path_msg=None, if alias: choices += list(alias.keys()) + + question_instance = obj(question, allow_arg=choices, default=default, mother_interface=self, **opt) - + if fct_timeout is None: + fct_timeout = lambda x: question_instance.postcmd(x, default) if x and default else False if first_cmd: if isinstance(first_cmd, str): question_instance.onecmd(first_cmd) @@ -2271,6 +2274,9 @@ def postcmd(self, stop, line): if n: self.default(line) return self.postcmd(stop, line) + elif self.value is None and line: + self.default(line) + return self.postcmd(stop, line) if not self.casesensitive: for ans in self.allow_arg: if ans.lower() == self.value.lower(): diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py index 3b8ec31215..a88d60b282 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py @@ -154,10 +154,15 @@ def get_helicity(self, to_submit=True, clean=True): p = misc.Popen(['./gensym'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=Pdir) #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts + (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - + try: + nb_channel = max([math.floor(float(d)) for d in stdout.split()]) + except Exception as error: + misc.sprint(stdout, 'no channel or error for %s' % Pdir) + continue + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py index f4c9cb6334..c9d1c7706a 100644 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py @@ -1,6 +1,12 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: O. Mattelaer (Aug 2023) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, A. Valassi (2023) for the MG5aMC CUDACPP plugin. import logging - +import os +import subprocess +pjoin = os.path.join logger = logging.getLogger('cmdprint') # for stdout try: @@ -9,20 +15,23 @@ import internal.madevent_interface as madevent_interface import internal.misc as misc import internal.extended_cmd as extended_cmd + import internal.banner as banner_mod else: import madgraph.interface.madevent_interface as madevent_interface import madgraph.various.misc as misc import madgraph.interface.extended_cmd as extended_cmd + import madgraph.various.banner as banner_mod class CPPMEInterface(madevent_interface.MadEventCmdShell): - def compile(self, *args, **opts): """ """ import multiprocessing if not self.options['nb_core'] or self.options['nb_core'] == 'None': self.options['nb_core'] = multiprocessing.cpu_count() - if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + import pathlib + import os + pjoin = os.path.join cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend == 'FORTRAN': @@ -36,5 +45,50 @@ def compile(self, *args, **opts): return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) - + +class CPPRunCard(banner_mod.RunCardLO): + def reset_simd(self, old_value, new_value, name): + if not hasattr(self, 'path'): + logger.warning('WARNING! CPPRunCard instance has no attribute path') + return + ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') + if name == "vector_size" and new_value <= int(old_value): + # code can handle the new size -> do not recompile + return + Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') + subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + def plugin_input(self, finput): + return + + def default_setup(self): + super().default_setup() + self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + if incname == "vector.inc" and 'vector_size' not in self.user_set: + return + super().write_one_include_file(output_dir, incname, output_file) + + def check_validity(self): + """ensure that PLUGIN information are consistent""" + super().check_validity() + if self['SDE_strategy'] != 1: + logger.warning('SDE_strategy different of 1 is not supported with SMD/GPU mode') + self['sde_strategy'] = 1 + if self['hel_recycling']: + self['hel_recycling'] = False + +class GPURunCard(CPPRunCard): + def default_setup(self): + super(CPPRunCard, self).default_setup() + self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False) + +#class CUDACPPRunCard(CPPRunCard): +# def default_setup(self): +# super(CPPRunCard, self).default_setup() +# self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + MEINTERFACE = CPPMEInterface +RunCard = CPPRunCard diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py index 920e07a926..d722702891 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py @@ -3796,9 +3796,11 @@ def do_combine_events(self, line): if self.run_card['bias_module'].lower() not in ['dummy', 'none'] and nb_event: self.correct_bias() - + elif self.run_card['custom_fcts']: + self.correct_bias() + logger.info("combine events done in %s", time.time()-start) - + self.to_store.append('event') diff --git a/epochX/cudacpp/gq_ttq.mad/bin/madevent b/epochX/cudacpp/gq_ttq.mad/bin/madevent index 10b6a71fa2..dff9711b73 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/madevent +++ b/epochX/cudacpp/gq_ttq.mad/bin/madevent @@ -173,6 +173,10 @@ if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): launch_interface = launch_plugin.MEINTERFACE +#Source use this executable for compilation always allow it +force_run = False +if (args and args[0] == 'treatcards'): + force_run=True # Call the cmd interface main loop try: @@ -180,7 +184,7 @@ try: launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) - with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): + with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), force_run=force_run): if (args and os.path.isfile(args[0])): # They are an input file input_file = args[0] diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 455aaad666..9cc1ee5d97 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005272865295410156  +DEBUG: model prefixing takes 0.005384206771850586  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.076 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -210,7 +210,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.029 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.140 s FFV1 FFV1 FFV1 @@ -224,9 +224,10 @@ FileWriter for / FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.652s -user 0m0.588s -sys 0m0.058s +real 0m0.649s +user 0m0.590s +sys 0m0.052s diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 78ccf0d626..2e1f4941cf 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -151,7 +151,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.060 s +ALOHA: aloha creates 1 routines in 0.061 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -161,9 +161,10 @@ FileWriter for / FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.cc INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.460s -user 0m0.364s -sys 0m0.058s +real 0m0.552s +user 0m0.380s +sys 0m0.051s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 1c26065d41..698e17243e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00558018684387207  +DEBUG: model prefixing takes 0.0054972171783447266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.029 s +5 processes with 7 diagrams generated in 0.031 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.133 s +13 processes with 76 diagrams generated in 0.147 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.797 s +65 processes with 1119 diagrams generated in 1.810 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -506,15 +506,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -523,15 +523,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -540,15 +540,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  2 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -557,15 +557,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  3 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -574,15 +574,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  4 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -591,15 +591,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  5 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -608,15 +608,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  6 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -625,15 +625,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  7 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -642,15 +642,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  8 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -659,15 +659,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  9 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -676,15 +676,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  10 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -693,15 +693,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  11 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -710,15 +710,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  12 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -727,15 +727,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  13 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -744,15 +744,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  14 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -761,15 +761,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  15 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -778,15 +778,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  16 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -795,21 +795,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1] [export_cpp.py at line 711]  DEBUG: subproc_number =  17 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.264 s -Wrote files for 810 helas calls in 3.591 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.266 s +Wrote files for 810 helas calls in 3.189 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.330 s +ALOHA: aloha creates 5 routines in 0.329 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -844,78 +844,15 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) -************************************************************ -* * -* W E L C O M E to * -* M A D G R A P H 5 _ a M C @ N L O * -* M A D E V E N T * -* * -* * * * -* * * * * * -* * * * * 5 * * * * * -* * * * * * -* * * * -* * -* VERSION 3.5.1_lo_vect * -* * -* The MadGraph5_aMC@NLO Development Team - Find us at * -* https://server06.fynu.ucl.ac.be/projects/madgraph * -* * -* Type 'help' for in-line help. * -* * -************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -treatcards run -run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP -quit -INFO: -launch in debug mode -************************************************************ -* * -* W E L C O M E to * -* M A D G R A P H 5 _ a M C @ N L O * -* M A D E V E N T * -* * -* * * * -* * * * * * -* * * * * 5 * * * * * -* * * * * * -* * * * -* * -* VERSION 3.5.1_lo_vect * -* * -* The MadGraph5_aMC@NLO Development Team - Find us at * -* https://server06.fynu.ucl.ac.be/projects/madgraph * -* * -* Type 'help' for in-line help. * -* * -************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -Using default text editor "vi". Set another one in ./input/mg5_configuration.txt -Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt -No valid web browser found. Please set in ./input/mg5_configuration.txt -treatcards param -quit -INFO: -launch in debug mode +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: self.in_madevent_mode =  True [output.py at line 207]  +DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile -patching file Source/make_opts -patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f @@ -1090,12 +1027,74 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). +DEBUG: p.returncode =  0 [output.py at line 233]  Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README Run "open index.html" to see more information about this process. quit -real 0m9.770s -user 0m8.785s -sys 0m0.562s +real 0m8.738s +user 0m8.225s +sys 0m0.476s +************************************************************ +* * +* W E L C O M E to * +* M A D G R A P H 5 _ a M C @ N L O * +* M A D E V E N T * +* * +* * * * +* * * * * * +* * * * * 5 * * * * * +* * * * * * +* * * * +* * +* VERSION 3.5.1_lo_vect * +* * +* The MadGraph5_aMC@NLO Development Team - Find us at * +* https://server06.fynu.ucl.ac.be/projects/madgraph * +* * +* Type 'help' for in-line help. * +* * +************************************************************ +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +Using default text editor "vi". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +treatcards run +WARNING! CPPRunCard instance has no attribute path +quit +INFO: +launch in debug mode +************************************************************ +* * +* W E L C O M E to * +* M A D G R A P H 5 _ a M C @ N L O * +* M A D E V E N T * +* * +* * * * +* * * * * * +* * * * * 5 * * * * * +* * * * * * +* * * * +* * +* VERSION 3.5.1_lo_vect * +* * +* The MadGraph5_aMC@NLO Development Team - Find us at * +* https://server06.fynu.ucl.ac.be/projects/madgraph * +* * +* Type 'help' for in-line help. * +* * +************************************************************ +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +Using default text editor "vi". Set another one in ./input/mg5_configuration.txt +Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt +No valid web browser found. Please set in ./input/mg5_configuration.txt +treatcards param +quit +INFO: +launch in debug mode diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/make_opts b/epochX/cudacpp/pp_tt012j.mad/Source/make_opts index bd3c24228d..57f5f7bb96 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Source/make_opts +++ b/epochX/cudacpp/pp_tt012j.mad/Source/make_opts @@ -1,17 +1,12 @@ -pdlabel1= -pdlabel2= -lhapdf= -PYTHIA8_PATH=NotInstalled -MG5AMC_VERSION=3.5.0_lo_vect GLOBAL_FLAG=-O3 -ffast-math -fbounds-check -ALOHA_FLAG= -MATRIX_FLAG= DEFAULT_CPP_COMPILER=g++ MACFLAG= STDLIB=-lstdc++ STDLIB_FLAG= DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime +PYTHIA8_PATH=NotInstalled #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/makefile b/epochX/cudacpp/pp_tt012j.mad/Source/makefile index dbe08b846e..00c73099a0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Source/makefile +++ b/epochX/cudacpp/pp_tt012j.mad/Source/makefile @@ -136,5 +136,7 @@ cleanSource: clean: cleanSource for i in `ls -d ../SubProcesses/P*`; do cd $$i; make clean; cd -; done; -cleanall: cleanSource +cleanavx: + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; +cleanall: cleanSource # THIS IS THE ONE for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f index fc924825c2..bf665ff6e0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -359,7 +359,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C LOCAL VARIABLES C INTEGER I,J,M,N - COMPLEX*16 ZTEMP, TMP_JAMP(10) + COMPLEX*16 ZTEMP, TMP_JAMP(9) REAL*8 CF(NCOLOR,NCOLOR) COMPLEX*16 AMP(NGRAPHS), JAMP(NCOLOR,NAMPSO) COMPLEX*16 W(6,NWAVEFUNCS) @@ -508,33 +508,30 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(15) + AMP(16) ! used 4 times TMP_JAMP(2) = AMP(1) + AMP(18) ! used 4 times TMP_JAMP(1) = AMP(12) - AMP(17) ! used 4 times - TMP_JAMP(10) = TMP_JAMP(3) - TMP_JAMP(2) ! used 2 times - TMP_JAMP(9) = TMP_JAMP(1) + ((-0.000000000000000D+00 + TMP_JAMP(9) = TMP_JAMP(3) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(8) ! used 2 times + TMP_JAMP(8) = TMP_JAMP(3) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(5) ! used 2 times + TMP_JAMP(7) = TMP_JAMP(2) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(2) ! used 2 times + TMP_JAMP(6) = TMP_JAMP(2) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(3) ! used 2 times + TMP_JAMP(5) = TMP_JAMP(1) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(11) ! used 2 times - TMP_JAMP(8) = TMP_JAMP(2) - TMP_JAMP(1) ! used 2 times - TMP_JAMP(7) = TMP_JAMP(1) + ((0.000000000000000D+00, + TMP_JAMP(4) = TMP_JAMP(1) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(10) ! used 2 times - TMP_JAMP(6) = TMP_JAMP(3) - TMP_JAMP(1) ! used 2 times - TMP_JAMP(5) = TMP_JAMP(2) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(3) ! used 2 times - TMP_JAMP(4) = TMP_JAMP(3) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(5) ! used 2 times - JAMP(1,1) = (-1.000000000000000D+00)*AMP(6)+TMP_JAMP(4)+( - $ -1.000000000000000D+00)*TMP_JAMP(5) - JAMP(2,1) = (-1.000000000000000D+00)*AMP(4)+(-1.000000000000000D - $ +00)*TMP_JAMP(4)+TMP_JAMP(9) - JAMP(3,1) = (-1.000000000000000D+00)*AMP(13)+TMP_JAMP(5)+( - $ -1.000000000000000D+00)*TMP_JAMP(7) - JAMP(4,1) = (-1.000000000000000D+00)*AMP(7)+((0.000000000000000D - $ +00,1.000000000000000D+00))*AMP(8)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*AMP(10)+(-1.000000000000000D+00) - $ *TMP_JAMP(6) - JAMP(5,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(2)+((0.000000000000000D+00,-1.000000000000000D+00))*AMP(11) - $ +(-1.000000000000000D+00)*AMP(14)+TMP_JAMP(8) - JAMP(6,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(2)+((0.000000000000000D+00,-1.000000000000000D+00))*AMP(8) - $ +(-1.000000000000000D+00)*AMP(9)+TMP_JAMP(10) + JAMP(1,1) = (-1.000000000000000D+00)*AMP(6)+(-1.000000000000000D + $ +00)*TMP_JAMP(6)+TMP_JAMP(8) + JAMP(2,1) = (-1.000000000000000D+00)*AMP(4)+TMP_JAMP(5)+( + $ -1.000000000000000D+00)*TMP_JAMP(8) + JAMP(3,1) = (-1.000000000000000D+00)*AMP(13)+( + $ -1.000000000000000D+00)*TMP_JAMP(4)+TMP_JAMP(6) + JAMP(4,1) = (-1.000000000000000D+00)*AMP(7)+TMP_JAMP(4)+( + $ -1.000000000000000D+00)*TMP_JAMP(9) + JAMP(5,1) = (-1.000000000000000D+00)*AMP(14)+( + $ -1.000000000000000D+00)*TMP_JAMP(5)+TMP_JAMP(7) + JAMP(6,1) = (-1.000000000000000D+00)*AMP(9)+(-1.000000000000000D + $ +00)*TMP_JAMP(7)+TMP_JAMP(9) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f index 2e8e377de8..c8fbb1cc8b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f @@ -391,7 +391,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C LOCAL VARIABLES C INTEGER I,J,M,N - COMPLEX*16 ZTEMP, TMP_JAMP(163) + COMPLEX*16 ZTEMP, TMP_JAMP(155) REAL*8 CF(NCOLOR,NCOLOR) COMPLEX*16 AMP(NGRAPHS), JAMP(NCOLOR,NAMPSO) COMPLEX*16 W(6,NWAVEFUNCS) @@ -1218,362 +1218,318 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(84) + AMP(86) ! used 8 times TMP_JAMP(2) = AMP(81) - AMP(83) ! used 8 times TMP_JAMP(1) = AMP(82) + AMP(85) ! used 8 times - TMP_JAMP(30) = TMP_JAMP(11) + AMP(121) ! used 8 times - TMP_JAMP(29) = TMP_JAMP(12) - AMP(132) ! used 8 times + TMP_JAMP(30) = TMP_JAMP(15) - AMP(157) ! used 8 times + TMP_JAMP(29) = TMP_JAMP(14) + AMP(159) ! used 8 times TMP_JAMP(28) = TMP_JAMP(13) + AMP(130) ! used 8 times - TMP_JAMP(27) = TMP_JAMP(14) + AMP(159) ! used 8 times - TMP_JAMP(26) = TMP_JAMP(15) - AMP(157) ! used 8 times - TMP_JAMP(25) = TMP_JAMP(8) - AMP(131) ! used 8 times + TMP_JAMP(27) = TMP_JAMP(12) - AMP(132) ! used 8 times + TMP_JAMP(26) = TMP_JAMP(11) + AMP(121) ! used 8 times + TMP_JAMP(25) = TMP_JAMP(10) + AMP(154) ! used 8 times TMP_JAMP(24) = TMP_JAMP(9) - AMP(156) ! used 8 times - TMP_JAMP(23) = TMP_JAMP(10) + AMP(154) ! used 8 times - TMP_JAMP(22) = TMP_JAMP(6) + AMP(114) ! used 8 times - TMP_JAMP(21) = TMP_JAMP(7) + AMP(158) ! used 8 times - TMP_JAMP(20) = TMP_JAMP(4) - AMP(141) ! used 8 times - TMP_JAMP(19) = TMP_JAMP(5) + AMP(139) ! used 8 times - TMP_JAMP(18) = TMP_JAMP(2) + AMP(105) ! used 8 times - TMP_JAMP(17) = TMP_JAMP(3) - AMP(155) ! used 8 times + TMP_JAMP(23) = TMP_JAMP(8) - AMP(131) ! used 8 times + TMP_JAMP(22) = TMP_JAMP(7) + AMP(158) ! used 8 times + TMP_JAMP(21) = TMP_JAMP(6) + AMP(114) ! used 8 times + TMP_JAMP(20) = TMP_JAMP(5) + AMP(139) ! used 8 times + TMP_JAMP(19) = TMP_JAMP(4) - AMP(141) ! used 8 times + TMP_JAMP(18) = TMP_JAMP(3) - AMP(155) ! used 8 times + TMP_JAMP(17) = TMP_JAMP(2) + AMP(105) ! used 8 times TMP_JAMP(16) = TMP_JAMP(1) - AMP(140) ! used 8 times - TMP_JAMP(90) = AMP(108) + AMP(133) ! used 4 times - TMP_JAMP(89) = AMP(51) + AMP(52) ! used 4 times - TMP_JAMP(88) = AMP(40) - AMP(54) ! used 4 times - TMP_JAMP(87) = AMP(11) - AMP(135) ! used 4 times - TMP_JAMP(86) = TMP_JAMP(26) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(51) ! used 4 times - TMP_JAMP(85) = TMP_JAMP(28) + TMP_JAMP(27) ! used 4 times - TMP_JAMP(84) = TMP_JAMP(29) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(11) ! used 4 times - TMP_JAMP(83) = TMP_JAMP(30) + TMP_JAMP(29) ! used 4 times - TMP_JAMP(82) = AMP(102) + AMP(151) ! used 4 times - TMP_JAMP(81) = AMP(69) - AMP(134) ! used 4 times - TMP_JAMP(80) = AMP(59) - AMP(153) ! used 4 times - TMP_JAMP(79) = TMP_JAMP(23) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(102) ! used 4 times - TMP_JAMP(78) = TMP_JAMP(24) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(59) ! used 4 times - TMP_JAMP(77) = TMP_JAMP(25) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(69) ! used 4 times - TMP_JAMP(76) = TMP_JAMP(29) + TMP_JAMP(25) ! used 4 times - TMP_JAMP(75) = TMP_JAMP(30) - TMP_JAMP(23) ! used 4 times - TMP_JAMP(74) = AMP(43) - AMP(53) ! used 4 times - TMP_JAMP(73) = TMP_JAMP(21) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(43) ! used 4 times - TMP_JAMP(72) = TMP_JAMP(22) + ((0.000000000000000D+00, + TMP_JAMP(80) = TMP_JAMP(30) + TMP_JAMP(29) ! used 4 times + TMP_JAMP(79) = TMP_JAMP(30) - TMP_JAMP(22) ! used 4 times + TMP_JAMP(78) = TMP_JAMP(29) + TMP_JAMP(22) ! used 4 times + TMP_JAMP(77) = TMP_JAMP(28) + TMP_JAMP(27) ! used 4 times + TMP_JAMP(76) = TMP_JAMP(28) - TMP_JAMP(23) ! used 4 times + TMP_JAMP(75) = TMP_JAMP(27) + TMP_JAMP(23) ! used 4 times + TMP_JAMP(74) = TMP_JAMP(27) + TMP_JAMP(19) ! used 4 times + TMP_JAMP(73) = TMP_JAMP(26) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(18) ! used 4 times + TMP_JAMP(72) = TMP_JAMP(26) - TMP_JAMP(25) ! used 4 times + TMP_JAMP(71) = TMP_JAMP(26) - TMP_JAMP(19) ! used 4 times + TMP_JAMP(70) = TMP_JAMP(26) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(17) ! used 4 times + TMP_JAMP(69) = TMP_JAMP(25) + TMP_JAMP(24) ! used 4 times + TMP_JAMP(68) = TMP_JAMP(25) - TMP_JAMP(18) ! used 4 times + TMP_JAMP(67) = TMP_JAMP(24) - TMP_JAMP(23) ! used 4 times + TMP_JAMP(66) = TMP_JAMP(24) + TMP_JAMP(18) ! used 4 times + TMP_JAMP(65) = TMP_JAMP(22) + TMP_JAMP(20) ! used 4 times + TMP_JAMP(64) = TMP_JAMP(21) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(62) ! used 4 times - TMP_JAMP(71) = TMP_JAMP(22) + TMP_JAMP(21) ! used 4 times - TMP_JAMP(70) = TMP_JAMP(27) + TMP_JAMP(21) ! used 4 times - TMP_JAMP(69) = TMP_JAMP(28) - TMP_JAMP(25) ! used 4 times - TMP_JAMP(68) = AMP(119) + AMP(145) ! used 4 times - TMP_JAMP(67) = AMP(14) - AMP(147) ! used 4 times - TMP_JAMP(66) = TMP_JAMP(20) + TMP_JAMP(19) ! used 4 times - TMP_JAMP(65) = TMP_JAMP(22) - TMP_JAMP(19) ! used 4 times - TMP_JAMP(64) = TMP_JAMP(29) + TMP_JAMP(20) ! used 4 times - TMP_JAMP(63) = AMP(77) - AMP(152) ! used 4 times - TMP_JAMP(62) = TMP_JAMP(17) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(77) ! used 4 times - TMP_JAMP(61) = TMP_JAMP(18) + ((-0.000000000000000D+00 + TMP_JAMP(63) = TMP_JAMP(21) - TMP_JAMP(20) ! used 4 times + TMP_JAMP(62) = TMP_JAMP(21) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(61) ! used 4 times + TMP_JAMP(61) = TMP_JAMP(20) + TMP_JAMP(19) ! used 4 times + TMP_JAMP(60) = TMP_JAMP(20) - TMP_JAMP(16) ! used 4 times + TMP_JAMP(59) = TMP_JAMP(19) + TMP_JAMP(16) ! used 4 times + TMP_JAMP(58) = TMP_JAMP(18) - TMP_JAMP(16) ! used 4 times + TMP_JAMP(57) = TMP_JAMP(17) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(79) ! used 4 times - TMP_JAMP(60) = TMP_JAMP(24) + TMP_JAMP(17) ! used 4 times - TMP_JAMP(59) = TMP_JAMP(28) - TMP_JAMP(18) ! used 4 times - TMP_JAMP(58) = AMP(89) - AMP(146) ! used 4 times - TMP_JAMP(57) = TMP_JAMP(20) + TMP_JAMP(16) ! used 4 times - TMP_JAMP(56) = AMP(117) + AMP(142) ! used 4 times - TMP_JAMP(55) = AMP(8) - AMP(144) ! used 4 times - TMP_JAMP(54) = TMP_JAMP(19) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(117) ! used 4 times - TMP_JAMP(53) = TMP_JAMP(20) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(8) ! used 4 times - TMP_JAMP(52) = TMP_JAMP(26) - TMP_JAMP(21) ! used 4 times - TMP_JAMP(51) = TMP_JAMP(30) - TMP_JAMP(20) ! used 4 times - TMP_JAMP(50) = AMP(87) - AMP(143) ! used 4 times - TMP_JAMP(49) = TMP_JAMP(16) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(87) ! used 4 times - TMP_JAMP(48) = TMP_JAMP(23) - TMP_JAMP(17) ! used 4 times - TMP_JAMP(47) = TMP_JAMP(18) + ((0.000000000000000D+00, + TMP_JAMP(56) = TMP_JAMP(17) - TMP_JAMP(16) ! used 4 times + TMP_JAMP(55) = TMP_JAMP(17) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(80) ! used 4 times - TMP_JAMP(46) = TMP_JAMP(19) - TMP_JAMP(16) ! used 4 times - TMP_JAMP(45) = TMP_JAMP(27) + TMP_JAMP(18) ! used 4 times - TMP_JAMP(44) = TMP_JAMP(28) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(110) ! used 4 times - TMP_JAMP(43) = TMP_JAMP(29) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(16) ! used 4 times - TMP_JAMP(42) = TMP_JAMP(22) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(61) ! used 4 times - TMP_JAMP(41) = TMP_JAMP(24) - TMP_JAMP(22) ! used 4 times - TMP_JAMP(40) = TMP_JAMP(25) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(71) ! used 4 times - TMP_JAMP(39) = AMP(96) + AMP(148) ! used 4 times - TMP_JAMP(38) = TMP_JAMP(23) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(96) ! used 4 times - TMP_JAMP(37) = TMP_JAMP(24) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(56) ! used 4 times - TMP_JAMP(36) = TMP_JAMP(26) + TMP_JAMP(23) ! used 4 times - TMP_JAMP(35) = TMP_JAMP(17) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(74) ! used 4 times - TMP_JAMP(34) = TMP_JAMP(30) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(17) ! used 4 times - TMP_JAMP(33) = TMP_JAMP(26) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(33) ! used 4 times - TMP_JAMP(32) = TMP_JAMP(27) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(22) ! used 4 times - TMP_JAMP(31) = TMP_JAMP(21) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(25) ! used 4 times - TMP_JAMP(98) = TMP_JAMP(43) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(138) ! used 4 times - TMP_JAMP(97) = TMP_JAMP(44) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(136) ! used 4 times - TMP_JAMP(96) = TMP_JAMP(40) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(137) ! used 4 times - TMP_JAMP(95) = TMP_JAMP(37) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(150) ! used 4 times - TMP_JAMP(94) = TMP_JAMP(35) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(149) ! used 4 times - TMP_JAMP(93) = TMP_JAMP(32) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(36) ! used 4 times - TMP_JAMP(92) = TMP_JAMP(33) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(34) ! used 4 times - TMP_JAMP(91) = TMP_JAMP(31) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(35) ! used 4 times - TMP_JAMP(151) = AMP(18) + ((0.000000000000000D+00, + TMP_JAMP(54) = AMP(108) + AMP(133) ! used 4 times + TMP_JAMP(53) = AMP(51) + AMP(52) ! used 4 times + TMP_JAMP(52) = AMP(40) - AMP(54) ! used 4 times + TMP_JAMP(51) = AMP(11) - AMP(135) ! used 4 times + TMP_JAMP(50) = AMP(102) + AMP(151) ! used 4 times + TMP_JAMP(49) = AMP(69) - AMP(134) ! used 4 times + TMP_JAMP(48) = AMP(59) - AMP(153) ! used 4 times + TMP_JAMP(47) = AMP(43) - AMP(53) ! used 4 times + TMP_JAMP(46) = AMP(119) + AMP(145) ! used 4 times + TMP_JAMP(45) = AMP(14) - AMP(147) ! used 4 times + TMP_JAMP(44) = AMP(77) - AMP(152) ! used 4 times + TMP_JAMP(43) = AMP(89) - AMP(146) ! used 4 times + TMP_JAMP(42) = AMP(117) + AMP(142) ! used 4 times + TMP_JAMP(41) = AMP(8) - AMP(144) ! used 4 times + TMP_JAMP(40) = AMP(87) - AMP(143) ! used 4 times + TMP_JAMP(39) = AMP(110) + AMP(136) ! used 4 times + TMP_JAMP(38) = AMP(16) - AMP(138) ! used 4 times + TMP_JAMP(37) = AMP(71) - AMP(137) ! used 4 times + TMP_JAMP(36) = AMP(96) + AMP(148) ! used 4 times + TMP_JAMP(35) = AMP(56) - AMP(150) ! used 4 times + TMP_JAMP(34) = AMP(74) - AMP(149) ! used 4 times + TMP_JAMP(33) = AMP(33) + AMP(34) ! used 4 times + TMP_JAMP(32) = AMP(22) - AMP(36) ! used 4 times + TMP_JAMP(31) = AMP(25) - AMP(35) ! used 4 times + TMP_JAMP(142) = TMP_JAMP(80) + TMP_JAMP(77) ! used 2 times + TMP_JAMP(141) = TMP_JAMP(80) + TMP_JAMP(68) ! used 2 times + TMP_JAMP(140) = TMP_JAMP(79) - TMP_JAMP(61) ! used 2 times + TMP_JAMP(139) = TMP_JAMP(79) + TMP_JAMP(69) ! used 2 times + TMP_JAMP(138) = TMP_JAMP(78) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(47) ! used 2 times + TMP_JAMP(137) = TMP_JAMP(77) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(39) ! used 2 times + TMP_JAMP(136) = TMP_JAMP(76) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(54) ! used 2 times + TMP_JAMP(135) = TMP_JAMP(76) + TMP_JAMP(66) ! used 2 times + TMP_JAMP(134) = TMP_JAMP(76) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(39) ! used 2 times + TMP_JAMP(133) = TMP_JAMP(75) + TMP_JAMP(61) ! used 2 times + TMP_JAMP(132) = TMP_JAMP(73) + AMP(50) ! used 2 times + TMP_JAMP(131) = TMP_JAMP(70) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(36) ! used 2 times + TMP_JAMP(130) = TMP_JAMP(67) - TMP_JAMP(27) ! used 2 times + TMP_JAMP(129) = TMP_JAMP(61) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(46) ! used 2 times + TMP_JAMP(128) = TMP_JAMP(61) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(41) ! used 2 times + TMP_JAMP(127) = TMP_JAMP(58) - TMP_JAMP(25) ! used 2 times + TMP_JAMP(126) = TMP_JAMP(58) + TMP_JAMP(24) ! used 2 times + TMP_JAMP(125) = TMP_JAMP(58) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(43) ! used 2 times + TMP_JAMP(124) = TMP_JAMP(55) + AMP(111) ! used 2 times + TMP_JAMP(123) = TMP_JAMP(54) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(109) ! used 2 times + TMP_JAMP(122) = TMP_JAMP(53) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(50) ! used 2 times - TMP_JAMP(150) = TMP_JAMP(87) + ((0.000000000000000D+00, + TMP_JAMP(121) = TMP_JAMP(53) - TMP_JAMP(47) ! used 2 times + TMP_JAMP(120) = TMP_JAMP(52) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(42) ! used 2 times + TMP_JAMP(119) = TMP_JAMP(52) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(41) ! used 2 times + TMP_JAMP(118) = TMP_JAMP(51) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(10) ! used 2 times - TMP_JAMP(149) = TMP_JAMP(90) + TMP_JAMP(88) ! used 2 times - TMP_JAMP(148) = TMP_JAMP(82) - AMP(18) ! used 2 times - TMP_JAMP(147) = TMP_JAMP(74) + ((-0.000000000000000D+00 + TMP_JAMP(117) = TMP_JAMP(51) + TMP_JAMP(49) ! used 2 times + TMP_JAMP(116) = TMP_JAMP(51) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(12) ! used 2 times + TMP_JAMP(115) = TMP_JAMP(50) - TMP_JAMP(44) ! used 2 times + TMP_JAMP(114) = TMP_JAMP(49) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(70) ! used 2 times + TMP_JAMP(113) = TMP_JAMP(48) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(60) ! used 2 times + TMP_JAMP(112) = TMP_JAMP(48) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(58) ! used 2 times + TMP_JAMP(111) = TMP_JAMP(47) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(44) ! used 2 times - TMP_JAMP(146) = TMP_JAMP(68) + TMP_JAMP(67) ! used 2 times - TMP_JAMP(145) = TMP_JAMP(77) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(134) ! used 2 times - TMP_JAMP(144) = AMP(79) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(109) ! used 2 times - TMP_JAMP(143) = TMP_JAMP(63) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(76) ! used 2 times - TMP_JAMP(142) = TMP_JAMP(90) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(59) ! used 2 times - TMP_JAMP(141) = TMP_JAMP(67) + TMP_JAMP(58) ! used 2 times - TMP_JAMP(140) = AMP(7) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(18) ! used 2 times - TMP_JAMP(139) = TMP_JAMP(54) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(142) ! used 2 times - TMP_JAMP(138) = TMP_JAMP(55) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(51) ! used 2 times - TMP_JAMP(137) = TMP_JAMP(89) - TMP_JAMP(74) ! used 2 times - TMP_JAMP(136) = TMP_JAMP(49) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(143) ! used 2 times - TMP_JAMP(135) = TMP_JAMP(82) - TMP_JAMP(63) ! used 2 times - TMP_JAMP(134) = AMP(41) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(80) ! used 2 times - TMP_JAMP(133) = TMP_JAMP(56) - TMP_JAMP(50) ! used 2 times - TMP_JAMP(132) = TMP_JAMP(88) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(45) ! used 2 times - TMP_JAMP(131) = TMP_JAMP(47) + AMP(111) ! used 2 times - TMP_JAMP(130) = TMP_JAMP(53) - AMP(9) ! used 2 times - TMP_JAMP(129) = TMP_JAMP(98) + TMP_JAMP(97) ! used 2 times - TMP_JAMP(128) = AMP(58) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(61) ! used 2 times - TMP_JAMP(127) = TMP_JAMP(80) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(41) ! used 2 times - TMP_JAMP(126) = TMP_JAMP(42) - AMP(72) ! used 2 times - TMP_JAMP(125) = TMP_JAMP(96) - TMP_JAMP(42) ! used 2 times - TMP_JAMP(124) = TMP_JAMP(98) + TMP_JAMP(96) ! used 2 times - TMP_JAMP(123) = TMP_JAMP(36) - AMP(39) ! used 2 times - TMP_JAMP(122) = TMP_JAMP(89) - TMP_JAMP(39) ! used 2 times - TMP_JAMP(121) = TMP_JAMP(95) - AMP(55) ! used 2 times - TMP_JAMP(120) = TMP_JAMP(58) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(46) ! used 2 times - TMP_JAMP(119) = TMP_JAMP(68) - TMP_JAMP(58) ! used 2 times - TMP_JAMP(118) = TMP_JAMP(94) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(58) ! used 2 times - TMP_JAMP(117) = TMP_JAMP(95) + TMP_JAMP(94) ! used 2 times - TMP_JAMP(116) = TMP_JAMP(94) - AMP(73) ! used 2 times - TMP_JAMP(115) = TMP_JAMP(95) + AMP(57) ! used 2 times - TMP_JAMP(114) = TMP_JAMP(96) - TMP_JAMP(95) ! used 2 times - TMP_JAMP(113) = TMP_JAMP(97) - TMP_JAMP(96) ! used 2 times - TMP_JAMP(112) = TMP_JAMP(38) + AMP(95) ! used 2 times - TMP_JAMP(111) = TMP_JAMP(67) + ((-0.000000000000000D+00 + TMP_JAMP(110) = TMP_JAMP(46) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(120) ! used 2 times + TMP_JAMP(109) = TMP_JAMP(46) - TMP_JAMP(43) ! used 2 times + TMP_JAMP(108) = TMP_JAMP(45) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(12) ! used 2 times + TMP_JAMP(107) = TMP_JAMP(45) + TMP_JAMP(43) ! used 2 times + TMP_JAMP(106) = TMP_JAMP(45) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(13) ! used 2 times - TMP_JAMP(110) = TMP_JAMP(67) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(34) ! used 2 times - TMP_JAMP(109) = TMP_JAMP(98) + AMP(15) ! used 2 times - TMP_JAMP(108) = TMP_JAMP(98) + TMP_JAMP(34) ! used 2 times - TMP_JAMP(107) = TMP_JAMP(61) - AMP(23) ! used 2 times - TMP_JAMP(106) = TMP_JAMP(93) + TMP_JAMP(92) ! used 2 times - TMP_JAMP(105) = TMP_JAMP(68) + ((-0.000000000000000D+00 + TMP_JAMP(105) = TMP_JAMP(44) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(76) ! used 2 times + TMP_JAMP(104) = TMP_JAMP(42) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(45) ! used 2 times + TMP_JAMP(103) = TMP_JAMP(42) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(118) ! used 2 times + TMP_JAMP(102) = TMP_JAMP(41) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(7) ! used 2 times + TMP_JAMP(101) = TMP_JAMP(40) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(78) ! used 2 times + TMP_JAMP(100) = TMP_JAMP(40) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(88) ! used 2 times + TMP_JAMP(99) = TMP_JAMP(39) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(111) ! used 2 times + TMP_JAMP(98) = TMP_JAMP(39) - TMP_JAMP(37) ! used 2 times + TMP_JAMP(97) = TMP_JAMP(38) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(9) ! used 2 times + TMP_JAMP(96) = TMP_JAMP(38) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(15) ! used 2 times + TMP_JAMP(95) = TMP_JAMP(37) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(72) ! used 2 times + TMP_JAMP(94) = TMP_JAMP(36) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(39) ! used 2 times + TMP_JAMP(93) = TMP_JAMP(35) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(55) ! used 2 times + TMP_JAMP(92) = TMP_JAMP(35) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(57) ! used 2 times + TMP_JAMP(91) = TMP_JAMP(34) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(75) ! used 2 times + TMP_JAMP(90) = TMP_JAMP(34) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(73) ! used 2 times + TMP_JAMP(89) = TMP_JAMP(33) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(21) ! used 2 times + TMP_JAMP(88) = TMP_JAMP(33) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(32) ! used 2 times + TMP_JAMP(87) = TMP_JAMP(32) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(23) ! used 2 times + TMP_JAMP(86) = TMP_JAMP(32) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(24) ! used 2 times + TMP_JAMP(85) = TMP_JAMP(31) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(27) ! used 2 times - TMP_JAMP(104) = TMP_JAMP(91) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(68) ! used 2 times - TMP_JAMP(103) = TMP_JAMP(93) + TMP_JAMP(91) ! used 2 times - TMP_JAMP(102) = TMP_JAMP(91) - AMP(26) ! used 2 times - TMP_JAMP(101) = TMP_JAMP(92) - TMP_JAMP(91) ! used 2 times - TMP_JAMP(100) = TMP_JAMP(97) + TMP_JAMP(93) ! used 2 times - TMP_JAMP(99) = TMP_JAMP(92) + TMP_JAMP(34) ! used 2 times - TMP_JAMP(163) = TMP_JAMP(149) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(42) ! used 2 times - TMP_JAMP(162) = TMP_JAMP(144) - TMP_JAMP(142) ! used 2 times - TMP_JAMP(161) = TMP_JAMP(140) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(138) ! used 2 times - TMP_JAMP(160) = TMP_JAMP(135) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(48) ! used 2 times - TMP_JAMP(159) = TMP_JAMP(133) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(46) ! used 2 times - TMP_JAMP(158) = TMP_JAMP(134) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(132) ! used 2 times - TMP_JAMP(157) = TMP_JAMP(130) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(144) ! used 2 times - TMP_JAMP(156) = TMP_JAMP(128) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * TMP_JAMP(127) ! used 2 times - TMP_JAMP(155) = TMP_JAMP(123) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * TMP_JAMP(122) ! used 2 times - TMP_JAMP(154) = TMP_JAMP(112) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(148) ! used 2 times - TMP_JAMP(153) = TMP_JAMP(100) + AMP(24) ! used 2 times - TMP_JAMP(152) = TMP_JAMP(99) + AMP(32) ! used 2 times + TMP_JAMP(84) = TMP_JAMP(31) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(26) ! used 2 times + TMP_JAMP(83) = TMP_JAMP(25) + AMP(95) ! used 2 times + TMP_JAMP(82) = AMP(18) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(101) ! used 2 times + TMP_JAMP(81) = AMP(79) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(90) ! used 2 times + TMP_JAMP(155) = TMP_JAMP(131) - TMP_JAMP(83) ! used 2 times + TMP_JAMP(154) = TMP_JAMP(119) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(55) ! used 2 times + TMP_JAMP(153) = TMP_JAMP(114) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(64) ! used 2 times + TMP_JAMP(152) = TMP_JAMP(111) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(64) ! used 2 times + TMP_JAMP(151) = TMP_JAMP(105) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(57) ! used 2 times + TMP_JAMP(150) = TMP_JAMP(103) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(62) ! used 2 times + TMP_JAMP(149) = TMP_JAMP(100) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(55) ! used 2 times + TMP_JAMP(148) = TMP_JAMP(95) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(62) ! used 2 times + TMP_JAMP(147) = TMP_JAMP(94) - TMP_JAMP(53) ! used 2 times + TMP_JAMP(146) = TMP_JAMP(89) - TMP_JAMP(50) ! used 2 times + TMP_JAMP(145) = TMP_JAMP(88) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(70) ! used 2 times + TMP_JAMP(144) = TMP_JAMP(84) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * TMP_JAMP(62) ! used 2 times + TMP_JAMP(143) = TMP_JAMP(81) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * TMP_JAMP(56) ! used 2 times JAMP(1,1) = (-1.000000000000000D+00)*AMP(30) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(109) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(152) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(153) - JAMP(2,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(27)+(-1.000000000000000D+00)*AMP(28)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(66)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(104)+TMP_JAMP(111) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(152) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(80) + $ +TMP_JAMP(86)+TMP_JAMP(96)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(137)+TMP_JAMP(145) + JAMP(2,1) = (-1.000000000000000D+00)*AMP(28) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(79) + $ +TMP_JAMP(85)+TMP_JAMP(106)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(129)+(-1.000000000000000D+00) + $ *TMP_JAMP(145) JAMP(3,1) = (-1.000000000000000D+00)*AMP(31) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(72) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(102) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(125) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(153) - JAMP(4,1) = (-1.000000000000000D+00)*AMP(19) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(21) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(26) - $ +AMP(151)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(79)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(101)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(156) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(78) + $ +(-1.000000000000000D+00)*TMP_JAMP(86)+TMP_JAMP(95) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(134) + $ +(-1.000000000000000D+00)*TMP_JAMP(144) + JAMP(4,1) = (-1.000000000000000D+00)*AMP(19)+TMP_JAMP(112) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(139) + $ +TMP_JAMP(144)+(-1.000000000000000D+00)*TMP_JAMP(146) JAMP(5,1) = (-1.000000000000000D+00)*AMP(29) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(90) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(103) - $ +(-1.000000000000000D+00)*TMP_JAMP(105)+((0.000000000000000D+00 - $ ,-1.000000000000000D+00))*TMP_JAMP(107)+TMP_JAMP(120) - JAMP(6,1) = (-1.000000000000000D+00)*AMP(20) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(21) - $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(76) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(106) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(107) - $ +(-1.000000000000000D+00)*TMP_JAMP(160) - JAMP(7,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(15)+((0.000000000000000D+00,1.000000000000000D+00))*AMP(57) - $ +(-1.000000000000000D+00)*AMP(93)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(108)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(114)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(154) - JAMP(8,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(13)+((0.000000000000000D+00,1.000000000000000D+00))*AMP(75) - $ +(-1.000000000000000D+00)*AMP(91)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(57)+(-1.000000000000000D+00) - $ *TMP_JAMP(110)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(118)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(154) - JAMP(9,1) = (-1.000000000000000D+00)*AMP(94) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(113) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(115) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(116) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(131) - JAMP(10,1) = (-1.000000000000000D+00)*AMP(38) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(116) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(155) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(158) - JAMP(11,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(55)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(75)+(-1.000000000000000D+00)*AMP(92)+((0.000000000000000D - $ +00,1.000000000000000D+00))*AMP(120)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(46)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(72)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(117)+(-1.000000000000000D+00) - $ *TMP_JAMP(119) - JAMP(12,1) = (-1.000000000000000D+00)*AMP(37)+( - $ -1.000000000000000D+00)*AMP(62)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*TMP_JAMP(71)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(121)+TMP_JAMP(147) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(29) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(65) + $ +(-1.000000000000000D+00)*TMP_JAMP(85)+(-1.000000000000000D+00) + $ *TMP_JAMP(87)+(-1.000000000000000D+00)*TMP_JAMP(109) + $ +TMP_JAMP(143) + JAMP(6,1) = (-1.000000000000000D+00)*AMP(20)+TMP_JAMP(87) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(141) + $ +TMP_JAMP(146)+TMP_JAMP(151) + JAMP(7,1) = (-1.000000000000000D+00)*AMP(93)+( + $ -1.000000000000000D+00)*TMP_JAMP(37)+TMP_JAMP(92)+( + $ -1.000000000000000D+00)*TMP_JAMP(96)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(130)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(155) + JAMP(8,1) = (-1.000000000000000D+00)*AMP(91) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(19) + $ +TMP_JAMP(91)+(-1.000000000000000D+00)*TMP_JAMP(106) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(125) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(155) - JAMP(13,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(118)+(-1.000000000000000D+00)*AMP(126) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(124) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(126) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(139) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(157) - JAMP(14,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(78)+(-1.000000000000000D+00)*AMP(98)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*AMP(118)+AMP(152) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(62) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(156) - $ +(-1.000000000000000D+00)*TMP_JAMP(159) - JAMP(15,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(88)+(-1.000000000000000D+00)*AMP(127)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(129)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*TMP_JAMP(131)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(136)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(157) - JAMP(16,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(45)+(-1.000000000000000D+00)*AMP(47)+AMP(53) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(88) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(73) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(158) - $ +(-1.000000000000000D+00)*TMP_JAMP(159) - JAMP(17,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(78)+(-1.000000000000000D+00)*AMP(97)+((0.000000000000000D - $ +00,1.000000000000000D+00))*AMP(101)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(136)+(-1.000000000000000D+00) - $ *TMP_JAMP(160)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(161) - JAMP(18,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(45)+(-1.000000000000000D+00)*AMP(46)+((0.000000000000000D - $ +00,1.000000000000000D+00))*AMP(50)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*TMP_JAMP(52)+(-1.000000000000000D+00) - $ *TMP_JAMP(137)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(139)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(161) - JAMP(19,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(12)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(90)+(-1.000000000000000D+00)*AMP(128)+(-1.000000000000000D - $ +00)*AMP(135)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(57)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(84)+(-1.000000000000000D+00)*TMP_JAMP(141)+( - $ -1.000000000000000D+00)*TMP_JAMP(162) - JAMP(20,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(60)+(-1.000000000000000D+00)*AMP(100)+((0.000000000000000D - $ +00,1.000000000000000D+00))*TMP_JAMP(60)+(-1.000000000000000D - $ +00)*TMP_JAMP(80)+(-1.000000000000000D+00)*TMP_JAMP(143) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(145) - $ +TMP_JAMP(162) - JAMP(21,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(12)+(-1.000000000000000D+00)*AMP(62)+((0.000000000000000D - $ +00,1.000000000000000D+00))*AMP(70)+((0.000000000000000D+00, - $ -1.000000000000000D+00))*AMP(120)+(-1.000000000000000D+00) - $ *AMP(129)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(64)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(65)+(-1.000000000000000D+00)*TMP_JAMP(87) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(145) - $ +TMP_JAMP(146) + JAMP(9,1) = (-1.000000000000000D+00)*AMP(94)+( + $ -1.000000000000000D+00)*TMP_JAMP(90)+(-1.000000000000000D+00) + $ *TMP_JAMP(92)+(-1.000000000000000D+00)*TMP_JAMP(98) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(124) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(135) + JAMP(10,1) = (-1.000000000000000D+00)*AMP(38)+TMP_JAMP(90) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(141) + $ +(-1.000000000000000D+00)*TMP_JAMP(147)+TMP_JAMP(154) + JAMP(11,1) = AMP(62)+(-1.000000000000000D+00)*AMP(92) + $ +((0.000000000000000D+00,1.000000000000000D+00))*AMP(120) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(63)+( + $ -1.000000000000000D+00)*TMP_JAMP(91)+(-1.000000000000000D+00) + $ *TMP_JAMP(93)+(-1.000000000000000D+00)*TMP_JAMP(109) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(126) + JAMP(12,1) = (-1.000000000000000D+00)*AMP(37)+TMP_JAMP(93) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(139) + $ +TMP_JAMP(147)+TMP_JAMP(152) + JAMP(13,1) = (-1.000000000000000D+00)*AMP(126) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(75) + $ +(-1.000000000000000D+00)*TMP_JAMP(97)+TMP_JAMP(103) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(128) + $ +(-1.000000000000000D+00)*TMP_JAMP(148) + JAMP(14,1) = (-1.000000000000000D+00)*AMP(98) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(20)+( + $ -1.000000000000000D+00)*TMP_JAMP(44)+TMP_JAMP(101)+( + $ -1.000000000000000D+00)*TMP_JAMP(112)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(126)+(-1.000000000000000D+00) + $ *TMP_JAMP(150) + JAMP(15,1) = (-1.000000000000000D+00)*AMP(127)+( + $ -1.000000000000000D+00)*TMP_JAMP(41)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(59)+((0.000000000000000D+00 + $ ,1.000000000000000D+00))*TMP_JAMP(77)+TMP_JAMP(97)+TMP_JAMP(99) + $ +(-1.000000000000000D+00)*TMP_JAMP(149) + JAMP(16,1) = (-1.000000000000000D+00)*AMP(47) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(60)+( + $ -1.000000000000000D+00)*TMP_JAMP(104)+(-1.000000000000000D+00) + $ *TMP_JAMP(119)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(138)+TMP_JAMP(149) + JAMP(17,1) = (-1.000000000000000D+00)*AMP(97) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(71) + $ +TMP_JAMP(82)+(-1.000000000000000D+00)*TMP_JAMP(101)+( + $ -1.000000000000000D+00)*TMP_JAMP(102)+(-1.000000000000000D+00) + $ *TMP_JAMP(115)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *TMP_JAMP(127) + JAMP(18,1) = (-1.000000000000000D+00)*AMP(46)+TMP_JAMP(102) + $ +TMP_JAMP(104)+(-1.000000000000000D+00)*TMP_JAMP(121) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(132) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(140) + JAMP(19,1) = (-1.000000000000000D+00)*AMP(128) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(28) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(74) + $ +(-1.000000000000000D+00)*TMP_JAMP(107)+TMP_JAMP(116) + $ +TMP_JAMP(123)+(-1.000000000000000D+00)*TMP_JAMP(143) + JAMP(20,1) = (-1.000000000000000D+00)*AMP(100)+TMP_JAMP(49)+( + $ -1.000000000000000D+00)*TMP_JAMP(113)+(-1.000000000000000D+00) + $ *TMP_JAMP(123)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *TMP_JAMP(135)+(-1.000000000000000D+00)*TMP_JAMP(151) + JAMP(21,1) = (-1.000000000000000D+00)*AMP(129)+( + $ -1.000000000000000D+00)*TMP_JAMP(51)+TMP_JAMP(108)+TMP_JAMP(110) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(133) + $ +(-1.000000000000000D+00)*TMP_JAMP(153) JAMP(22,1) = (-1.000000000000000D+00)*AMP(49) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(70) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(69) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(70) - $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(72) - $ +TMP_JAMP(81)+(-1.000000000000000D+00)*TMP_JAMP(147)+( - $ -1.000000000000000D+00)*TMP_JAMP(163) - JAMP(23,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *AMP(60)+(-1.000000000000000D+00)*AMP(99)+((0.000000000000000D - $ +00,-1.000000000000000D+00))*AMP(101)+(-1.000000000000000D+00) - $ *AMP(153)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(75)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(76)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(78)+(-1.000000000000000D+00)*TMP_JAMP(81) - $ +TMP_JAMP(148)+(-1.000000000000000D+00)*TMP_JAMP(150) - JAMP(24,1) = (-1.000000000000000D+00)*AMP(48)+AMP(52) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(83) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(85) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(86) - $ +TMP_JAMP(150)+TMP_JAMP(151)+TMP_JAMP(163) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(78) + $ +TMP_JAMP(114)+(-1.000000000000000D+00)*TMP_JAMP(120) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(136) + $ +(-1.000000000000000D+00)*TMP_JAMP(152) + JAMP(23,1) = ((0.000000000000000D+00,1.000000000000000D+00)) + $ *AMP(10)+(-1.000000000000000D+00)*AMP(99)+TMP_JAMP(50) + $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(72)+( + $ -1.000000000000000D+00)*TMP_JAMP(82)+TMP_JAMP(113)+( + $ -1.000000000000000D+00)*TMP_JAMP(117)+((0.000000000000000D+00, + $ -1.000000000000000D+00))*TMP_JAMP(130) + JAMP(24,1) = (-1.000000000000000D+00)*AMP(48)+TMP_JAMP(54) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(73) + $ +TMP_JAMP(118)+TMP_JAMP(120)+TMP_JAMP(122)+((0.000000000000000D + $ +00,-1.000000000000000D+00))*TMP_JAMP(142) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f index 41e5e36e39..4f966fab6d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f @@ -397,7 +397,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C LOCAL VARIABLES C INTEGER I,J,M,N - COMPLEX*16 ZTEMP, TMP_JAMP(16) + COMPLEX*16 ZTEMP, TMP_JAMP(17) REAL*8 CF(NCOLOR,NCOLOR) COMPLEX*16 AMP(NGRAPHS), JAMP(NCOLOR,NAMPSO) COMPLEX*16 W(6,NWAVEFUNCS) @@ -668,17 +668,21 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(35) + AMP(37) ! used 4 times TMP_JAMP(2) = AMP(3) + AMP(34) ! used 4 times TMP_JAMP(1) = AMP(36) + AMP(38) ! used 4 times - TMP_JAMP(16) = AMP(1) + AMP(2) ! used 2 times - TMP_JAMP(15) = AMP(1) + AMP(5) ! used 2 times - TMP_JAMP(14) = TMP_JAMP(3) + TMP_JAMP(2) ! used 2 times - TMP_JAMP(13) = AMP(2) + AMP(4) ! used 2 times - TMP_JAMP(12) = TMP_JAMP(2) - TMP_JAMP(1) ! used 2 times - TMP_JAMP(11) = AMP(4) + AMP(5) ! used 2 times - TMP_JAMP(10) = TMP_JAMP(3) + TMP_JAMP(1) ! used 2 times - TMP_JAMP(9) = TMP_JAMP(1) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(29) ! used 2 times - TMP_JAMP(8) = TMP_JAMP(3) + ((-0.000000000000000D+00, + TMP_JAMP(17) = TMP_JAMP(3) + TMP_JAMP(2) ! used 2 times + TMP_JAMP(16) = TMP_JAMP(3) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(33) ! used 2 times + TMP_JAMP(15) = TMP_JAMP(3) + ((-0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(10) ! used 2 times + TMP_JAMP(14) = TMP_JAMP(1) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(25) ! used 2 times + TMP_JAMP(13) = TMP_JAMP(1) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(20) ! used 2 times + TMP_JAMP(12) = TMP_JAMP(1) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(29) ! used 2 times + TMP_JAMP(11) = AMP(1) + AMP(2) ! used 2 times + TMP_JAMP(10) = AMP(1) + AMP(5) ! used 2 times + TMP_JAMP(9) = AMP(2) + AMP(4) ! used 2 times + TMP_JAMP(8) = AMP(4) + AMP(5) ! used 2 times TMP_JAMP(7) = AMP(2) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(32) ! used 2 times TMP_JAMP(6) = AMP(1) + ((0.000000000000000D+00 @@ -694,12 +698,12 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +00,5.000000000000000D-01))*AMP(33)+(5.000000000000000D-01) $ *TMP_JAMP(2)+((0.000000000000000D+00,-5.000000000000000D-01)) $ *TMP_JAMP(4)+((0.000000000000000D+00,-5.000000000000000D-01)) - $ *TMP_JAMP(7)+(5.000000000000000D-01)*TMP_JAMP(8) + $ *TMP_JAMP(7)+(5.000000000000000D-01)*TMP_JAMP(15) JAMP(3,1) = (5.000000000000000D-01)*AMP(17)+((0.000000000000000D $ +00,-5.000000000000000D-01))*AMP(20)+(-5.000000000000000D-01) $ *TMP_JAMP(2)+((0.000000000000000D+00,-5.000000000000000D-01)) $ *TMP_JAMP(5)+((0.000000000000000D+00,-5.000000000000000D-01)) - $ *TMP_JAMP(6)+(5.000000000000000D-01)*TMP_JAMP(9) + $ *TMP_JAMP(6)+(5.000000000000000D-01)*TMP_JAMP(12) JAMP(4,1) = (-1.666666666666667D-01)*AMP(18) $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(6) $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(7) @@ -710,36 +714,35 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -01)*AMP(7)+(5.000000000000000D-01)*AMP(22)+(5.000000000000000D $ -01)*AMP(23)+((0.000000000000000D+00,-5.000000000000000D-01)) $ *AMP(25)+((0.000000000000000D+00,5.000000000000000D-01))*AMP(31) - $ +(-5.000000000000000D-01)*TMP_JAMP(8)+(-5.000000000000000D-01) - $ *TMP_JAMP(9) + $ +(-5.000000000000000D-01)*TMP_JAMP(12)+(-5.000000000000000D-01) + $ *TMP_JAMP(15) JAMP(7,1) = (5.000000000000000D-01)*AMP(11)+(5.000000000000000D $ -01)*AMP(13)+((0.000000000000000D+00,-5.000000000000000D-01)) $ *AMP(15)+(5.000000000000000D-01)*AMP(16)+(5.000000000000000D-01) - $ *AMP(18)+((0.000000000000000D+00,5.000000000000000D-01))*AMP(20) - $ +((0.000000000000000D+00,5.000000000000000D-01))*AMP(27) - $ +((0.000000000000000D+00,-5.000000000000000D-01))*AMP(33)+( - $ -5.000000000000000D-01)*TMP_JAMP(10) + $ *AMP(18)+((0.000000000000000D+00,5.000000000000000D-01))*AMP(27) + $ +(-5.000000000000000D-01)*TMP_JAMP(13)+(-5.000000000000000D-01) + $ *TMP_JAMP(16) JAMP(8,1) = (-1.666666666666667D-01)*AMP(16)+( $ -1.666666666666667D-01)*AMP(17)+(-1.666666666666667D-01)*AMP(21) $ +(-1.666666666666667D-01)*AMP(22) JAMP(9,1) = (-1.666666666666667D-01)*AMP(11)+( $ -1.666666666666667D-01)*AMP(14)+(-1.666666666666667D-01)*AMP(26) - $ +((0.000000000000000D+00,-1.666666666666667D-01))*TMP_JAMP(11) + $ +((0.000000000000000D+00,-1.666666666666667D-01))*TMP_JAMP(8) JAMP(10,1) = (5.000000000000000D-01)*AMP(21)+(5.000000000000000D - $ -01)*AMP(24)+((0.000000000000000D+00,5.000000000000000D-01)) - $ *AMP(25)+(5.000000000000000D-01)*AMP(26)+((0.000000000000000D - $ +00,-5.000000000000000D-01))*AMP(27)+(-5.000000000000000D-01) - $ *TMP_JAMP(12)+((0.000000000000000D+00,5.000000000000000D-01)) - $ *TMP_JAMP(13) + $ -01)*AMP(24)+(5.000000000000000D-01)*AMP(26) + $ +((0.000000000000000D+00,-5.000000000000000D-01))*AMP(27)+( + $ -5.000000000000000D-01)*TMP_JAMP(2)+((0.000000000000000D+00 + $ ,5.000000000000000D-01))*TMP_JAMP(9)+(5.000000000000000D-01) + $ *TMP_JAMP(14) JAMP(11,1) = (5.000000000000000D-01)*AMP(12)+(5.000000000000000D $ -01)*AMP(14)+((0.000000000000000D+00,5.000000000000000D-01)) $ *AMP(15)+(5.000000000000000D-01)*AMP(30)+((0.000000000000000D - $ +00,-5.000000000000000D-01))*AMP(31)+(5.000000000000000D-01) - $ *TMP_JAMP(14)+((0.000000000000000D+00,5.000000000000000D-01)) - $ *TMP_JAMP(15) + $ +00,-5.000000000000000D-01))*AMP(31)+((0.000000000000000D+00 + $ ,5.000000000000000D-01))*TMP_JAMP(10)+(5.000000000000000D-01) + $ *TMP_JAMP(17) JAMP(12,1) = (-1.666666666666667D-01)*AMP(23)+( $ -1.666666666666667D-01)*AMP(24)+(-1.666666666666667D-01)*AMP(30) - $ +((0.000000000000000D+00,-1.666666666666667D-01))*TMP_JAMP(16) + $ +((0.000000000000000D+00,-1.666666666666667D-01))*TMP_JAMP(11) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f index a2b48f860a..c03cebacb0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f @@ -397,7 +397,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C LOCAL VARIABLES C INTEGER I,J,M,N - COMPLEX*16 ZTEMP, TMP_JAMP(16) + COMPLEX*16 ZTEMP, TMP_JAMP(17) REAL*8 CF(NCOLOR,NCOLOR) COMPLEX*16 AMP(NGRAPHS), JAMP(NCOLOR,NAMPSO) COMPLEX*16 W(6,NWAVEFUNCS) @@ -668,51 +668,55 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(36) + AMP(38) ! used 4 times TMP_JAMP(2) = AMP(3) + AMP(34) ! used 4 times TMP_JAMP(1) = AMP(35) + AMP(37) ! used 4 times - TMP_JAMP(16) = AMP(4) + AMP(5) ! used 2 times - TMP_JAMP(15) = AMP(2) + AMP(4) ! used 2 times - TMP_JAMP(14) = TMP_JAMP(3) - TMP_JAMP(2) ! used 2 times - TMP_JAMP(13) = AMP(1) + AMP(2) ! used 2 times - TMP_JAMP(12) = AMP(1) + AMP(5) ! used 2 times - TMP_JAMP(11) = TMP_JAMP(2) + TMP_JAMP(1) ! used 2 times - TMP_JAMP(10) = TMP_JAMP(3) + TMP_JAMP(1) ! used 2 times - TMP_JAMP(9) = AMP(5) + ((0.000000000000000D+00 + TMP_JAMP(17) = TMP_JAMP(3) - TMP_JAMP(2) ! used 2 times + TMP_JAMP(16) = TMP_JAMP(3) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(20) ! used 2 times + TMP_JAMP(15) = TMP_JAMP(3) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(29) ! used 2 times + TMP_JAMP(14) = TMP_JAMP(1) + ((-0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(15) ! used 2 times + TMP_JAMP(13) = TMP_JAMP(1) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(33) ! used 2 times + TMP_JAMP(12) = TMP_JAMP(1) + ((-0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(10) ! used 2 times + TMP_JAMP(11) = AMP(4) + AMP(5) ! used 2 times + TMP_JAMP(10) = AMP(2) + AMP(4) ! used 2 times + TMP_JAMP(9) = AMP(1) + AMP(2) ! used 2 times + TMP_JAMP(8) = AMP(1) + AMP(5) ! used 2 times + TMP_JAMP(7) = AMP(5) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(28) ! used 2 times - TMP_JAMP(8) = AMP(1) + ((0.000000000000000D+00 + TMP_JAMP(6) = AMP(1) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(19) ! used 2 times - TMP_JAMP(7) = TMP_JAMP(3) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(29) ! used 2 times - TMP_JAMP(6) = AMP(2) + ((-0.000000000000000D+00 + TMP_JAMP(5) = AMP(2) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(32) ! used 2 times - TMP_JAMP(5) = AMP(4) + ((-0.000000000000000D+00 + TMP_JAMP(4) = AMP(4) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(9) ! used 2 times - TMP_JAMP(4) = TMP_JAMP(1) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(10) ! used 2 times JAMP(1,1) = (-5.000000000000000D-01)*AMP(6)+(-5.000000000000000D $ -01)*AMP(7)+(-5.000000000000000D-01)*AMP(22)+( $ -5.000000000000000D-01)*AMP(23)+((0.000000000000000D+00 $ ,5.000000000000000D-01))*AMP(25)+((0.000000000000000D+00, $ -5.000000000000000D-01))*AMP(31)+(5.000000000000000D-01) - $ *TMP_JAMP(4)+(5.000000000000000D-01)*TMP_JAMP(7) + $ *TMP_JAMP(12)+(5.000000000000000D-01)*TMP_JAMP(15) JAMP(2,1) = (1.666666666666667D-01)*AMP(7)+(1.666666666666667D $ -01)*AMP(8)+(1.666666666666667D-01)*AMP(12)+(1.666666666666667D $ -01)*AMP(13) JAMP(3,1) = (-5.000000000000000D-01)*AMP(8)+((0.000000000000000D $ +00,-5.000000000000000D-01))*AMP(33)+(-5.000000000000000D-01) - $ *TMP_JAMP(2)+(-5.000000000000000D-01)*TMP_JAMP(4) - $ +((0.000000000000000D+00,5.000000000000000D-01))*TMP_JAMP(5) - $ +((0.000000000000000D+00,5.000000000000000D-01))*TMP_JAMP(6) + $ *TMP_JAMP(2)+((0.000000000000000D+00,5.000000000000000D-01)) + $ *TMP_JAMP(4)+((0.000000000000000D+00,5.000000000000000D-01)) + $ *TMP_JAMP(5)+(-5.000000000000000D-01)*TMP_JAMP(12) JAMP(4,1) = (1.666666666666667D-01)*AMP(6)+((0.000000000000000D - $ +00,-1.666666666666667D-01))*TMP_JAMP(5)+((0.000000000000000D - $ +00,-1.666666666666667D-01))*TMP_JAMP(9) + $ +00,-1.666666666666667D-01))*TMP_JAMP(4)+((0.000000000000000D + $ +00,-1.666666666666667D-01))*TMP_JAMP(7) JAMP(5,1) = (1.666666666666667D-01)*AMP(18)+((0.000000000000000D - $ +00,-1.666666666666667D-01))*TMP_JAMP(6)+((0.000000000000000D - $ +00,-1.666666666666667D-01))*TMP_JAMP(8) + $ +00,-1.666666666666667D-01))*TMP_JAMP(5)+((0.000000000000000D + $ +00,-1.666666666666667D-01))*TMP_JAMP(6) JAMP(6,1) = (-5.000000000000000D-01)*AMP(17) $ +((0.000000000000000D+00,5.000000000000000D-01))*AMP(20) - $ +(5.000000000000000D-01)*TMP_JAMP(2)+(-5.000000000000000D-01) - $ *TMP_JAMP(7)+((0.000000000000000D+00,5.000000000000000D-01)) - $ *TMP_JAMP(8)+((0.000000000000000D+00,5.000000000000000D-01)) - $ *TMP_JAMP(9) + $ +(5.000000000000000D-01)*TMP_JAMP(2)+((0.000000000000000D+00 + $ ,5.000000000000000D-01))*TMP_JAMP(6)+((0.000000000000000D+00 + $ ,5.000000000000000D-01))*TMP_JAMP(7)+(-5.000000000000000D-01) + $ *TMP_JAMP(15) JAMP(7,1) = (1.666666666666667D-01)*AMP(16)+(1.666666666666667D $ -01)*AMP(17)+(1.666666666666667D-01)*AMP(21) $ +(1.666666666666667D-01)*AMP(22) @@ -720,28 +724,26 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -5.000000000000000D-01)*AMP(13)+((0.000000000000000D+00 $ ,5.000000000000000D-01))*AMP(15)+(-5.000000000000000D-01) $ *AMP(16)+(-5.000000000000000D-01)*AMP(18)+((0.000000000000000D - $ +00,-5.000000000000000D-01))*AMP(20)+((0.000000000000000D+00, - $ -5.000000000000000D-01))*AMP(27)+((0.000000000000000D+00 - $ ,5.000000000000000D-01))*AMP(33)+(5.000000000000000D-01) - $ *TMP_JAMP(10) + $ +00,-5.000000000000000D-01))*AMP(27)+(5.000000000000000D-01) + $ *TMP_JAMP(13)+(5.000000000000000D-01)*TMP_JAMP(16) JAMP(9,1) = (-5.000000000000000D-01)*AMP(12)+( - $ -5.000000000000000D-01)*AMP(14)+((0.000000000000000D+00, - $ -5.000000000000000D-01))*AMP(15)+(-5.000000000000000D-01) - $ *AMP(30)+((0.000000000000000D+00,5.000000000000000D-01))*AMP(31) - $ +(-5.000000000000000D-01)*TMP_JAMP(11)+((0.000000000000000D+00, - $ -5.000000000000000D-01))*TMP_JAMP(12) + $ -5.000000000000000D-01)*AMP(14)+(-5.000000000000000D-01)*AMP(30) + $ +((0.000000000000000D+00,5.000000000000000D-01))*AMP(31)+( + $ -5.000000000000000D-01)*TMP_JAMP(2)+((0.000000000000000D+00, + $ -5.000000000000000D-01))*TMP_JAMP(8)+(-5.000000000000000D-01) + $ *TMP_JAMP(14) JAMP(10,1) = (1.666666666666667D-01)*AMP(23)+(1.666666666666667D $ -01)*AMP(24)+(1.666666666666667D-01)*AMP(30) - $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(13) + $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(9) JAMP(11,1) = (-5.000000000000000D-01)*AMP(21)+( $ -5.000000000000000D-01)*AMP(24)+((0.000000000000000D+00, $ -5.000000000000000D-01))*AMP(25)+(-5.000000000000000D-01) $ *AMP(26)+((0.000000000000000D+00,5.000000000000000D-01))*AMP(27) - $ +(-5.000000000000000D-01)*TMP_JAMP(14)+((0.000000000000000D+00, - $ -5.000000000000000D-01))*TMP_JAMP(15) + $ +((0.000000000000000D+00,-5.000000000000000D-01))*TMP_JAMP(10) + $ +(-5.000000000000000D-01)*TMP_JAMP(17) JAMP(12,1) = (1.666666666666667D-01)*AMP(11)+(1.666666666666667D $ -01)*AMP(14)+(1.666666666666667D-01)*AMP(26) - $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(16) + $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(11) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f index 7ce63300ba..39422dc34c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f @@ -397,7 +397,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C LOCAL VARIABLES C INTEGER I,J,M,N - COMPLEX*16 ZTEMP, TMP_JAMP(16) + COMPLEX*16 ZTEMP, TMP_JAMP(17) REAL*8 CF(NCOLOR,NCOLOR) COMPLEX*16 AMP(NGRAPHS), JAMP(NCOLOR,NAMPSO) COMPLEX*16 W(6,NWAVEFUNCS) @@ -666,32 +666,35 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(35) + AMP(37) ! used 4 times TMP_JAMP(2) = AMP(3) + AMP(34) ! used 4 times TMP_JAMP(1) = AMP(36) + AMP(38) ! used 4 times - TMP_JAMP(16) = AMP(1) + AMP(5) ! used 2 times - TMP_JAMP(15) = TMP_JAMP(3) + TMP_JAMP(2) ! used 2 times - TMP_JAMP(14) = AMP(1) + AMP(2) ! used 2 times - TMP_JAMP(13) = AMP(4) + AMP(5) ! used 2 times - TMP_JAMP(12) = AMP(2) + AMP(4) ! used 2 times - TMP_JAMP(11) = TMP_JAMP(2) - TMP_JAMP(1) ! used 2 times - TMP_JAMP(10) = AMP(4) + ((-0.000000000000000D+00 + TMP_JAMP(17) = TMP_JAMP(3) + TMP_JAMP(2) ! used 2 times + TMP_JAMP(16) = TMP_JAMP(3) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(33) ! used 2 times + TMP_JAMP(15) = TMP_JAMP(3) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(10) ! used 2 times + TMP_JAMP(14) = TMP_JAMP(1) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(25) ! used 2 times + TMP_JAMP(13) = TMP_JAMP(1) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(29) ! used 2 times + TMP_JAMP(12) = TMP_JAMP(1) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(20) ! used 2 times + TMP_JAMP(11) = AMP(1) + AMP(5) ! used 2 times + TMP_JAMP(10) = AMP(1) + AMP(2) ! used 2 times + TMP_JAMP(9) = AMP(4) + AMP(5) ! used 2 times + TMP_JAMP(8) = AMP(2) + AMP(4) ! used 2 times + TMP_JAMP(7) = AMP(4) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(9) ! used 2 times - TMP_JAMP(9) = AMP(2) + ((-0.000000000000000D+00 + TMP_JAMP(6) = AMP(2) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(32) ! used 2 times - TMP_JAMP(8) = TMP_JAMP(3) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(10) ! used 2 times - TMP_JAMP(7) = AMP(5) + ((0.000000000000000D+00 + TMP_JAMP(5) = AMP(5) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(28) ! used 2 times - TMP_JAMP(6) = AMP(1) + ((0.000000000000000D+00 + TMP_JAMP(4) = AMP(1) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(19) ! used 2 times - TMP_JAMP(5) = TMP_JAMP(1) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(20) ! used 2 times - TMP_JAMP(4) = TMP_JAMP(3) + TMP_JAMP(1) ! used 2 times JAMP(1,1) = (5.000000000000000D-01)*AMP(11)+(5.000000000000000D $ -01)*AMP(13)+((0.000000000000000D+00,-5.000000000000000D-01)) $ *AMP(15)+(5.000000000000000D-01)*AMP(16)+(5.000000000000000D-01) - $ *AMP(18)+((0.000000000000000D+00,5.000000000000000D-01))*AMP(20) - $ +((0.000000000000000D+00,5.000000000000000D-01))*AMP(27) - $ +((0.000000000000000D+00,-5.000000000000000D-01))*AMP(33)+( - $ -5.000000000000000D-01)*TMP_JAMP(4) + $ *AMP(18)+((0.000000000000000D+00,5.000000000000000D-01))*AMP(27) + $ +(-5.000000000000000D-01)*TMP_JAMP(12)+(-5.000000000000000D-01) + $ *TMP_JAMP(16) JAMP(2,1) = (-1.666666666666667D-01)*AMP(16)+( $ -1.666666666666667D-01)*AMP(17)+(-1.666666666666667D-01)*AMP(21) $ +(-1.666666666666667D-01)*AMP(22) @@ -699,46 +702,45 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -01)*AMP(8)+(-1.666666666666667D-01)*AMP(12)+( $ -1.666666666666667D-01)*AMP(13) JAMP(4,1) = (5.000000000000000D-01)*AMP(6)+(5.000000000000000D - $ -01)*AMP(7)+((0.000000000000000D+00,5.000000000000000D-01)) - $ *AMP(10)+(5.000000000000000D-01)*AMP(22)+(5.000000000000000D-01) - $ *AMP(23)+((0.000000000000000D+00,-5.000000000000000D-01)) - $ *AMP(25)+((0.000000000000000D+00,-5.000000000000000D-01)) - $ *AMP(29)+((0.000000000000000D+00,5.000000000000000D-01))*AMP(31) - $ +(-5.000000000000000D-01)*TMP_JAMP(4) + $ -01)*AMP(7)+(5.000000000000000D-01)*AMP(22)+(5.000000000000000D + $ -01)*AMP(23)+((0.000000000000000D+00,-5.000000000000000D-01)) + $ *AMP(25)+((0.000000000000000D+00,5.000000000000000D-01))*AMP(31) + $ +(-5.000000000000000D-01)*TMP_JAMP(13)+(-5.000000000000000D-01) + $ *TMP_JAMP(15) JAMP(5,1) = (5.000000000000000D-01)*AMP(17)+((0.000000000000000D $ +00,5.000000000000000D-01))*AMP(29)+(-5.000000000000000D-01) - $ *TMP_JAMP(2)+(5.000000000000000D-01)*TMP_JAMP(5) - $ +((0.000000000000000D+00,-5.000000000000000D-01))*TMP_JAMP(6) - $ +((0.000000000000000D+00,-5.000000000000000D-01))*TMP_JAMP(7) + $ *TMP_JAMP(2)+((0.000000000000000D+00,-5.000000000000000D-01)) + $ *TMP_JAMP(4)+((0.000000000000000D+00,-5.000000000000000D-01)) + $ *TMP_JAMP(5)+(5.000000000000000D-01)*TMP_JAMP(12) JAMP(6,1) = (-1.666666666666667D-01)*AMP(18) + $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(4) $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(6) - $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(9) JAMP(7,1) = (-1.666666666666667D-01)*AMP(6)+((0.000000000000000D - $ +00,1.666666666666667D-01))*TMP_JAMP(7)+((0.000000000000000D+00 - $ ,1.666666666666667D-01))*TMP_JAMP(10) + $ +00,1.666666666666667D-01))*TMP_JAMP(5)+((0.000000000000000D+00 + $ ,1.666666666666667D-01))*TMP_JAMP(7) JAMP(8,1) = (5.000000000000000D-01)*AMP(8)+((0.000000000000000D $ +00,5.000000000000000D-01))*AMP(33)+(5.000000000000000D-01) - $ *TMP_JAMP(2)+(5.000000000000000D-01)*TMP_JAMP(8) - $ +((0.000000000000000D+00,-5.000000000000000D-01))*TMP_JAMP(9) - $ +((0.000000000000000D+00,-5.000000000000000D-01))*TMP_JAMP(10) + $ *TMP_JAMP(2)+((0.000000000000000D+00,-5.000000000000000D-01)) + $ *TMP_JAMP(6)+((0.000000000000000D+00,-5.000000000000000D-01)) + $ *TMP_JAMP(7)+(5.000000000000000D-01)*TMP_JAMP(15) JAMP(9,1) = (5.000000000000000D-01)*AMP(21)+(5.000000000000000D - $ -01)*AMP(24)+((0.000000000000000D+00,5.000000000000000D-01)) - $ *AMP(25)+(5.000000000000000D-01)*AMP(26)+((0.000000000000000D - $ +00,-5.000000000000000D-01))*AMP(27)+(-5.000000000000000D-01) - $ *TMP_JAMP(11)+((0.000000000000000D+00,5.000000000000000D-01)) - $ *TMP_JAMP(12) + $ -01)*AMP(24)+(5.000000000000000D-01)*AMP(26) + $ +((0.000000000000000D+00,-5.000000000000000D-01))*AMP(27)+( + $ -5.000000000000000D-01)*TMP_JAMP(2)+((0.000000000000000D+00 + $ ,5.000000000000000D-01))*TMP_JAMP(8)+(5.000000000000000D-01) + $ *TMP_JAMP(14) JAMP(10,1) = (-1.666666666666667D-01)*AMP(11)+( $ -1.666666666666667D-01)*AMP(14)+(-1.666666666666667D-01)*AMP(26) - $ +((0.000000000000000D+00,-1.666666666666667D-01))*TMP_JAMP(13) + $ +((0.000000000000000D+00,-1.666666666666667D-01))*TMP_JAMP(9) JAMP(11,1) = (-1.666666666666667D-01)*AMP(23)+( $ -1.666666666666667D-01)*AMP(24)+(-1.666666666666667D-01)*AMP(30) - $ +((0.000000000000000D+00,-1.666666666666667D-01))*TMP_JAMP(14) + $ +((0.000000000000000D+00,-1.666666666666667D-01))*TMP_JAMP(10) JAMP(12,1) = (5.000000000000000D-01)*AMP(12)+(5.000000000000000D $ -01)*AMP(14)+((0.000000000000000D+00,5.000000000000000D-01)) $ *AMP(15)+(5.000000000000000D-01)*AMP(30)+((0.000000000000000D - $ +00,-5.000000000000000D-01))*AMP(31)+(5.000000000000000D-01) - $ *TMP_JAMP(15)+((0.000000000000000D+00,5.000000000000000D-01)) - $ *TMP_JAMP(16) + $ +00,-5.000000000000000D-01))*AMP(31)+((0.000000000000000D+00 + $ ,5.000000000000000D-01))*TMP_JAMP(11)+(5.000000000000000D-01) + $ *TMP_JAMP(17) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f index efcaed5bd1..9e27e48c99 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f @@ -523,17 +523,17 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(2) = AMP(1) + AMP(4) ! used 3 times TMP_JAMP(1) = AMP(1) + AMP(2) ! used 3 times TMP_JAMP(8) = TMP_JAMP(5) + TMP_JAMP(4) ! used 2 times - TMP_JAMP(7) = TMP_JAMP(4) + TMP_JAMP(1) ! used 2 times - TMP_JAMP(6) = TMP_JAMP(5) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(7) = TMP_JAMP(5) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(6) = TMP_JAMP(4) + TMP_JAMP(1) ! used 2 times JAMP(1,1) = ((0.000000000000000D+00,-2.500000000000000D-01)) $ *AMP(3)+(2.500000000000000D-01)*AMP(7)+(2.500000000000000D-01) $ *TMP_JAMP(2) - JAMP(2,1) = (-8.333333333333333D-02)*TMP_JAMP(6) - JAMP(3,1) = (-8.333333333333333D-02)*TMP_JAMP(7) + JAMP(2,1) = (-8.333333333333333D-02)*TMP_JAMP(7) + JAMP(3,1) = (-8.333333333333333D-02)*TMP_JAMP(6) JAMP(4,1) = ((0.000000000000000D+00,2.500000000000000D-01)) $ *AMP(3)+(2.500000000000000D-01)*AMP(6)+(2.500000000000000D-01) $ *TMP_JAMP(3) - JAMP(5,1) = (2.777777777777778D-02)*TMP_JAMP(4) + JAMP(5,1) = (2.777777777777778D-02)*TMP_JAMP(5) $ +(2.777777777777778D-02)*TMP_JAMP(6) JAMP(6,1) = (-8.333333333333333D-02)*TMP_JAMP(8) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f index 3172975ef4..6bdc5db576 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f @@ -534,19 +534,19 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(2) + AMP(5) ! used 3 times TMP_JAMP(2) = AMP(1) + AMP(4) ! used 3 times TMP_JAMP(1) = AMP(1) + AMP(2) ! used 3 times - TMP_JAMP(8) = TMP_JAMP(4) + TMP_JAMP(1) ! used 2 times - TMP_JAMP(7) = TMP_JAMP(5) + TMP_JAMP(1) ! used 2 times - TMP_JAMP(6) = TMP_JAMP(5) + TMP_JAMP(4) ! used 2 times - JAMP(1,1) = (8.333333333333333D-02)*TMP_JAMP(6) + TMP_JAMP(8) = TMP_JAMP(5) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(7) = TMP_JAMP(5) + TMP_JAMP(4) ! used 2 times + TMP_JAMP(6) = TMP_JAMP(4) + TMP_JAMP(1) ! used 2 times + JAMP(1,1) = (8.333333333333333D-02)*TMP_JAMP(7) JAMP(2,1) = ((0.000000000000000D+00,-2.500000000000000D-01)) $ *AMP(3)+(-2.500000000000000D-01)*AMP(6)+(-2.500000000000000D-01) $ *TMP_JAMP(3) JAMP(3,1) = ((0.000000000000000D+00,2.500000000000000D-01)) $ *AMP(3)+(-2.500000000000000D-01)*AMP(7)+(-2.500000000000000D-01) $ *TMP_JAMP(2) - JAMP(4,1) = (8.333333333333333D-02)*TMP_JAMP(8) - JAMP(5,1) = (8.333333333333333D-02)*TMP_JAMP(7) - JAMP(6,1) = (-2.777777777777778D-02)*TMP_JAMP(1)+( + JAMP(4,1) = (8.333333333333333D-02)*TMP_JAMP(6) + JAMP(5,1) = (8.333333333333333D-02)*TMP_JAMP(8) + JAMP(6,1) = (-2.777777777777778D-02)*TMP_JAMP(5)+( $ -2.777777777777778D-02)*TMP_JAMP(6) IF(INIT_MODE)THEN diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f index 77fe909abc..8b2cf62531 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f @@ -544,30 +544,30 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(2) + AMP(5) ! used 3 times TMP_JAMP(2) = AMP(1) + AMP(4) ! used 3 times TMP_JAMP(1) = AMP(1) + AMP(2) ! used 3 times - TMP_JAMP(16) = TMP_JAMP(8) + TMP_JAMP(5) ! used 2 times - TMP_JAMP(15) = TMP_JAMP(9) + TMP_JAMP(5) ! used 2 times - TMP_JAMP(14) = TMP_JAMP(9) + TMP_JAMP(8) ! used 2 times - TMP_JAMP(13) = TMP_JAMP(10) + TMP_JAMP(4) ! used 2 times - TMP_JAMP(12) = TMP_JAMP(4) + TMP_JAMP(1) ! used 2 times - TMP_JAMP(11) = TMP_JAMP(10) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(16) = TMP_JAMP(10) + TMP_JAMP(4) ! used 2 times + TMP_JAMP(15) = TMP_JAMP(10) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(14) = TMP_JAMP(9) + TMP_JAMP(5) ! used 2 times + TMP_JAMP(13) = TMP_JAMP(9) + TMP_JAMP(8) ! used 2 times + TMP_JAMP(12) = TMP_JAMP(8) + TMP_JAMP(5) ! used 2 times + TMP_JAMP(11) = TMP_JAMP(4) + TMP_JAMP(1) ! used 2 times JAMP(1,1) = ((0.000000000000000D+00,-2.500000000000000D-01)) $ *AMP(3)+(2.500000000000000D-01)*AMP(14)+(2.500000000000000D-01) - $ *TMP_JAMP(2)+(8.333333333333333D-02)*TMP_JAMP(15) + $ *TMP_JAMP(2)+(8.333333333333333D-02)*TMP_JAMP(14) JAMP(2,1) = ((0.000000000000000D+00,2.500000000000000D-01)) $ *AMP(8)+(-2.500000000000000D-01)*AMP(12)+(-2.500000000000000D - $ -01)*TMP_JAMP(6)+(-8.333333333333333D-02)*TMP_JAMP(11) + $ -01)*TMP_JAMP(6)+(-8.333333333333333D-02)*TMP_JAMP(15) JAMP(3,1) = ((0.000000000000000D+00,-2.500000000000000D-01)) $ *AMP(8)+(-2.500000000000000D-01)*AMP(11)+(-2.500000000000000D - $ -01)*TMP_JAMP(7)+(-8.333333333333333D-02)*TMP_JAMP(12) + $ -01)*TMP_JAMP(7)+(-8.333333333333333D-02)*TMP_JAMP(11) JAMP(4,1) = ((0.000000000000000D+00,2.500000000000000D-01)) $ *AMP(3)+(2.500000000000000D-01)*AMP(13)+(2.500000000000000D-01) - $ *TMP_JAMP(3)+(8.333333333333333D-02)*TMP_JAMP(16) - JAMP(5,1) = (2.777777777777778D-02)*TMP_JAMP(4) + $ *TMP_JAMP(3)+(8.333333333333333D-02)*TMP_JAMP(12) + JAMP(5,1) = (2.777777777777778D-02)*TMP_JAMP(10) $ +(2.777777777777778D-02)*TMP_JAMP(11)+(8.333333333333333D-02) - $ *TMP_JAMP(14) - JAMP(6,1) = (-2.777777777777778D-02)*TMP_JAMP(5)+( - $ -8.333333333333333D-02)*TMP_JAMP(13)+(-2.777777777777778D-02) - $ *TMP_JAMP(14) + $ *TMP_JAMP(13) + JAMP(6,1) = (-2.777777777777778D-02)*TMP_JAMP(9)+( + $ -2.777777777777778D-02)*TMP_JAMP(12)+(-8.333333333333333D-02) + $ *TMP_JAMP(16) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f index 80fb12abe5..a843f4656a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f @@ -397,7 +397,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C LOCAL VARIABLES C INTEGER I,J,M,N - COMPLEX*16 ZTEMP, TMP_JAMP(19) + COMPLEX*16 ZTEMP, TMP_JAMP(17) REAL*8 CF(NCOLOR,NCOLOR) COMPLEX*16 AMP(NGRAPHS), JAMP(NCOLOR,NAMPSO) COMPLEX*16 W(6,NWAVEFUNCS) @@ -668,29 +668,25 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(36) + AMP(38) ! used 4 times TMP_JAMP(2) = AMP(35) + AMP(37) ! used 4 times TMP_JAMP(1) = AMP(3) + AMP(34) ! used 4 times - TMP_JAMP(19) = TMP_JAMP(3) + TMP_JAMP(2) ! used 2 times - TMP_JAMP(18) = TMP_JAMP(2) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(33) ! used 2 times - TMP_JAMP(17) = TMP_JAMP(2) + ((0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(15) ! used 2 times + TMP_JAMP(17) = TMP_JAMP(3) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(25) ! used 2 times TMP_JAMP(16) = TMP_JAMP(3) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(27) ! used 2 times - TMP_JAMP(15) = TMP_JAMP(3) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * AMP(20) ! used 2 times - TMP_JAMP(14) = AMP(1) + AMP(2) ! used 2 times - TMP_JAMP(13) = AMP(2) + ((-0.000000000000000D+00 + TMP_JAMP(15) = TMP_JAMP(3) - TMP_JAMP(1) ! used 2 times + TMP_JAMP(14) = TMP_JAMP(2) + ((0.000000000000000D+00, + $ -1.000000000000000D+00)) * AMP(10) ! used 2 times + TMP_JAMP(13) = TMP_JAMP(2) + ((0.000000000000000D+00 + $ ,1.000000000000000D+00)) * AMP(15) ! used 2 times + TMP_JAMP(12) = TMP_JAMP(2) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(11) = AMP(1) + AMP(2) ! used 2 times + TMP_JAMP(10) = AMP(2) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(32) ! used 2 times - TMP_JAMP(12) = AMP(1) + ((-0.000000000000000D+00 + TMP_JAMP(9) = AMP(1) + ((-0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(19) ! used 2 times - TMP_JAMP(11) = AMP(1) + AMP(5) ! used 2 times - TMP_JAMP(10) = TMP_JAMP(2) + TMP_JAMP(1) ! used 2 times - TMP_JAMP(9) = AMP(5) + ((0.000000000000000D+00 + TMP_JAMP(8) = AMP(1) + AMP(5) ! used 2 times + TMP_JAMP(7) = AMP(5) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(28) ! used 2 times - TMP_JAMP(8) = TMP_JAMP(3) - TMP_JAMP(1) ! used 2 times - TMP_JAMP(7) = AMP(4) + ((-0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(26) ! used 2 times - TMP_JAMP(6) = TMP_JAMP(1) + ((0.000000000000000D+00, - $ -1.000000000000000D+00)) * AMP(2) ! used 2 times + TMP_JAMP(6) = AMP(2) + AMP(4) ! used 2 times TMP_JAMP(5) = AMP(4) + ((0.000000000000000D+00 $ ,1.000000000000000D+00)) * AMP(9) ! used 2 times TMP_JAMP(4) = AMP(4) + AMP(5) ! used 2 times @@ -701,33 +697,35 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -01)*AMP(14)+(1.666666666666667D-01)*AMP(26) $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(4) JAMP(3,1) = (-5.000000000000000D-01)*AMP(8)+((0.000000000000000D - $ +00,5.000000000000000D-01))*AMP(10)+(-5.000000000000000D-01) - $ *AMP(32)+((0.000000000000000D+00,5.000000000000000D-01)) - $ *TMP_JAMP(5)+(-5.000000000000000D-01)*TMP_JAMP(6)+( - $ -5.000000000000000D-01)*TMP_JAMP(18) + $ +00,5.000000000000000D-01))*AMP(10)+((0.000000000000000D+00, + $ -5.000000000000000D-01))*AMP(33)+((0.000000000000000D+00 + $ ,5.000000000000000D-01))*TMP_JAMP(5)+((0.000000000000000D+00 + $ ,5.000000000000000D-01))*TMP_JAMP(10)+(-5.000000000000000D-01) + $ *TMP_JAMP(12) JAMP(4,1) = (-5.000000000000000D-01)*AMP(21)+( $ -5.000000000000000D-01)*AMP(24)+((0.000000000000000D+00, - $ -5.000000000000000D-01))*AMP(25)+(5.000000000000000D-01) - $ *TMP_JAMP(6)+((0.000000000000000D+00,-5.000000000000000D-01)) - $ *TMP_JAMP(7)+(-5.000000000000000D-01)*TMP_JAMP(16) + $ -5.000000000000000D-01))*AMP(25)+(-5.000000000000000D-01) + $ *AMP(26)+((0.000000000000000D+00,5.000000000000000D-01))*AMP(27) + $ +((0.000000000000000D+00,-5.000000000000000D-01))*TMP_JAMP(6)+( + $ -5.000000000000000D-01)*TMP_JAMP(15) JAMP(5,1) = (-5.000000000000000D-01)*AMP(17) $ +((0.000000000000000D+00,5.000000000000000D-01))*AMP(20) - $ +((0.000000000000000D+00,-5.000000000000000D-01))*AMP(29)+( - $ -5.000000000000000D-01)*TMP_JAMP(8)+((0.000000000000000D+00 - $ ,5.000000000000000D-01))*TMP_JAMP(9)+((0.000000000000000D+00 - $ ,5.000000000000000D-01))*TMP_JAMP(12) + $ +((0.000000000000000D+00,-5.000000000000000D-01))*AMP(29) + $ +((0.000000000000000D+00,5.000000000000000D-01))*TMP_JAMP(7) + $ +((0.000000000000000D+00,5.000000000000000D-01))*TMP_JAMP(9)+( + $ -5.000000000000000D-01)*TMP_JAMP(15) JAMP(6,1) = (-5.000000000000000D-01)*AMP(12)+( $ -5.000000000000000D-01)*AMP(14)+((0.000000000000000D+00, $ -5.000000000000000D-01))*AMP(15)+(-5.000000000000000D-01) $ *AMP(30)+((0.000000000000000D+00,5.000000000000000D-01))*AMP(31) - $ +(-5.000000000000000D-01)*TMP_JAMP(10)+((0.000000000000000D+00, - $ -5.000000000000000D-01))*TMP_JAMP(11) + $ +((0.000000000000000D+00,-5.000000000000000D-01))*TMP_JAMP(8)+( + $ -5.000000000000000D-01)*TMP_JAMP(12) JAMP(7,1) = (1.666666666666667D-01)*AMP(18)+((0.000000000000000D - $ +00,-1.666666666666667D-01))*TMP_JAMP(12)+((0.000000000000000D - $ +00,-1.666666666666667D-01))*TMP_JAMP(13) + $ +00,-1.666666666666667D-01))*TMP_JAMP(9)+((0.000000000000000D + $ +00,-1.666666666666667D-01))*TMP_JAMP(10) JAMP(8,1) = (1.666666666666667D-01)*AMP(23)+(1.666666666666667D $ -01)*AMP(24)+(1.666666666666667D-01)*AMP(30) - $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(14) + $ +((0.000000000000000D+00,1.666666666666667D-01))*TMP_JAMP(11) JAMP(9,1) = (1.666666666666667D-01)*AMP(16)+(1.666666666666667D $ -01)*AMP(17)+(1.666666666666667D-01)*AMP(21) $ +(1.666666666666667D-01)*AMP(22) @@ -736,15 +734,13 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +(-5.000000000000000D-01)*AMP(18)+((0.000000000000000D+00, $ -5.000000000000000D-01))*AMP(27)+((0.000000000000000D+00 $ ,5.000000000000000D-01))*AMP(33)+(5.000000000000000D-01) - $ *TMP_JAMP(15)+(5.000000000000000D-01)*TMP_JAMP(17) + $ *TMP_JAMP(13)+(5.000000000000000D-01)*TMP_JAMP(16) JAMP(11,1) = (-5.000000000000000D-01)*AMP(6)+( - $ -5.000000000000000D-01)*AMP(7)+((0.000000000000000D+00, - $ -5.000000000000000D-01))*AMP(10)+(-5.000000000000000D-01) - $ *AMP(22)+(-5.000000000000000D-01)*AMP(23)+((0.000000000000000D - $ +00,5.000000000000000D-01))*AMP(25)+((0.000000000000000D+00 + $ -5.000000000000000D-01)*AMP(7)+(-5.000000000000000D-01)*AMP(22) + $ +(-5.000000000000000D-01)*AMP(23)+((0.000000000000000D+00 $ ,5.000000000000000D-01))*AMP(29)+((0.000000000000000D+00, $ -5.000000000000000D-01))*AMP(31)+(5.000000000000000D-01) - $ *TMP_JAMP(19) + $ *TMP_JAMP(14)+(5.000000000000000D-01)*TMP_JAMP(17) JAMP(12,1) = (1.666666666666667D-01)*AMP(7)+(1.666666666666667D $ -01)*AMP(8)+(1.666666666666667D-01)*AMP(12)+(1.666666666666667D $ -01)*AMP(13) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f index 3544d80d72..6d8f6b4ed8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f @@ -544,30 +544,30 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(1) + AMP(4) ! used 3 times TMP_JAMP(2) = AMP(2) + AMP(5) ! used 3 times TMP_JAMP(1) = AMP(1) + AMP(2) ! used 3 times - TMP_JAMP(16) = TMP_JAMP(8) + TMP_JAMP(5) ! used 2 times - TMP_JAMP(15) = TMP_JAMP(9) + TMP_JAMP(5) ! used 2 times - TMP_JAMP(14) = TMP_JAMP(9) + TMP_JAMP(8) ! used 2 times - TMP_JAMP(13) = TMP_JAMP(10) + TMP_JAMP(4) ! used 2 times - TMP_JAMP(12) = TMP_JAMP(10) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(16) = TMP_JAMP(10) + TMP_JAMP(4) ! used 2 times + TMP_JAMP(15) = TMP_JAMP(10) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(14) = TMP_JAMP(9) + TMP_JAMP(5) ! used 2 times + TMP_JAMP(13) = TMP_JAMP(9) + TMP_JAMP(8) ! used 2 times + TMP_JAMP(12) = TMP_JAMP(8) + TMP_JAMP(5) ! used 2 times TMP_JAMP(11) = TMP_JAMP(4) + TMP_JAMP(1) ! used 2 times JAMP(1,1) = (2.777777777777778D-02)*TMP_JAMP(10) $ +(2.777777777777778D-02)*TMP_JAMP(11)+(8.333333333333333D-02) - $ *TMP_JAMP(14) + $ *TMP_JAMP(13) JAMP(2,1) = ((0.000000000000000D+00,-2.500000000000000D-01)) $ *AMP(8)+(-2.500000000000000D-01)*AMP(11)+(-2.500000000000000D $ -01)*TMP_JAMP(7)+(-8.333333333333333D-02)*TMP_JAMP(11) JAMP(3,1) = ((0.000000000000000D+00,2.500000000000000D-01)) $ *AMP(8)+(-2.500000000000000D-01)*AMP(12)+(-2.500000000000000D - $ -01)*TMP_JAMP(6)+(-8.333333333333333D-02)*TMP_JAMP(12) + $ -01)*TMP_JAMP(6)+(-8.333333333333333D-02)*TMP_JAMP(15) JAMP(4,1) = ((0.000000000000000D+00,2.500000000000000D-01)) $ *AMP(3)+(2.500000000000000D-01)*AMP(13)+(2.500000000000000D-01) - $ *TMP_JAMP(2)+(8.333333333333333D-02)*TMP_JAMP(16) + $ *TMP_JAMP(2)+(8.333333333333333D-02)*TMP_JAMP(12) JAMP(5,1) = ((0.000000000000000D+00,-2.500000000000000D-01)) $ *AMP(3)+(2.500000000000000D-01)*AMP(14)+(2.500000000000000D-01) - $ *TMP_JAMP(3)+(8.333333333333333D-02)*TMP_JAMP(15) - JAMP(6,1) = (-2.777777777777778D-02)*TMP_JAMP(5)+( - $ -8.333333333333333D-02)*TMP_JAMP(13)+(-2.777777777777778D-02) - $ *TMP_JAMP(14) + $ *TMP_JAMP(3)+(8.333333333333333D-02)*TMP_JAMP(14) + JAMP(6,1) = (-2.777777777777778D-02)*TMP_JAMP(9)+( + $ -2.777777777777778D-02)*TMP_JAMP(12)+(-8.333333333333333D-02) + $ *TMP_JAMP(16) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f index 61d4e59741..53f591633e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f @@ -523,14 +523,14 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(2) = AMP(2) + AMP(5) ! used 3 times TMP_JAMP(1) = AMP(1) + AMP(2) ! used 3 times TMP_JAMP(8) = TMP_JAMP(5) + TMP_JAMP(4) ! used 2 times - TMP_JAMP(7) = TMP_JAMP(4) + TMP_JAMP(1) ! used 2 times - TMP_JAMP(6) = TMP_JAMP(5) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(7) = TMP_JAMP(5) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(6) = TMP_JAMP(4) + TMP_JAMP(1) ! used 2 times JAMP(1,1) = ((0.000000000000000D+00,2.500000000000000D-01)) $ *AMP(3)+(2.500000000000000D-01)*AMP(6)+(2.500000000000000D-01) $ *TMP_JAMP(2) - JAMP(2,1) = (-8.333333333333333D-02)*TMP_JAMP(6) - JAMP(3,1) = (-8.333333333333333D-02)*TMP_JAMP(7) - JAMP(4,1) = (2.777777777777778D-02)*TMP_JAMP(4) + JAMP(2,1) = (-8.333333333333333D-02)*TMP_JAMP(7) + JAMP(3,1) = (-8.333333333333333D-02)*TMP_JAMP(6) + JAMP(4,1) = (2.777777777777778D-02)*TMP_JAMP(5) $ +(2.777777777777778D-02)*TMP_JAMP(6) JAMP(5,1) = ((0.000000000000000D+00,-2.500000000000000D-01)) $ *AMP(3)+(2.500000000000000D-01)*AMP(7)+(2.500000000000000D-01) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f index 1b50f51264..dce10b9553 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f @@ -544,30 +544,30 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(3) = AMP(1) + AMP(4) ! used 3 times TMP_JAMP(2) = AMP(2) + AMP(5) ! used 3 times TMP_JAMP(1) = AMP(1) + AMP(2) ! used 3 times - TMP_JAMP(16) = TMP_JAMP(8) + TMP_JAMP(5) ! used 2 times - TMP_JAMP(15) = TMP_JAMP(9) + TMP_JAMP(5) ! used 2 times - TMP_JAMP(14) = TMP_JAMP(9) + TMP_JAMP(8) ! used 2 times - TMP_JAMP(13) = TMP_JAMP(10) + TMP_JAMP(4) ! used 2 times - TMP_JAMP(12) = TMP_JAMP(4) + TMP_JAMP(1) ! used 2 times - TMP_JAMP(11) = TMP_JAMP(10) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(16) = TMP_JAMP(10) + TMP_JAMP(4) ! used 2 times + TMP_JAMP(15) = TMP_JAMP(10) + TMP_JAMP(1) ! used 2 times + TMP_JAMP(14) = TMP_JAMP(9) + TMP_JAMP(5) ! used 2 times + TMP_JAMP(13) = TMP_JAMP(9) + TMP_JAMP(8) ! used 2 times + TMP_JAMP(12) = TMP_JAMP(8) + TMP_JAMP(5) ! used 2 times + TMP_JAMP(11) = TMP_JAMP(4) + TMP_JAMP(1) ! used 2 times JAMP(1,1) = ((0.000000000000000D+00,2.500000000000000D-01)) $ *AMP(3)+(2.500000000000000D-01)*AMP(13)+(2.500000000000000D-01) - $ *TMP_JAMP(2)+(8.333333333333333D-02)*TMP_JAMP(16) + $ *TMP_JAMP(2)+(8.333333333333333D-02)*TMP_JAMP(12) JAMP(2,1) = ((0.000000000000000D+00,2.500000000000000D-01)) $ *AMP(8)+(-2.500000000000000D-01)*AMP(12)+(-2.500000000000000D - $ -01)*TMP_JAMP(6)+(-8.333333333333333D-02)*TMP_JAMP(11) + $ -01)*TMP_JAMP(6)+(-8.333333333333333D-02)*TMP_JAMP(15) JAMP(3,1) = ((0.000000000000000D+00,-2.500000000000000D-01)) $ *AMP(8)+(-2.500000000000000D-01)*AMP(11)+(-2.500000000000000D - $ -01)*TMP_JAMP(7)+(-8.333333333333333D-02)*TMP_JAMP(12) - JAMP(4,1) = (2.777777777777778D-02)*TMP_JAMP(4) + $ -01)*TMP_JAMP(7)+(-8.333333333333333D-02)*TMP_JAMP(11) + JAMP(4,1) = (2.777777777777778D-02)*TMP_JAMP(10) $ +(2.777777777777778D-02)*TMP_JAMP(11)+(8.333333333333333D-02) - $ *TMP_JAMP(14) + $ *TMP_JAMP(13) JAMP(5,1) = ((0.000000000000000D+00,-2.500000000000000D-01)) $ *AMP(3)+(2.500000000000000D-01)*AMP(14)+(2.500000000000000D-01) - $ *TMP_JAMP(3)+(8.333333333333333D-02)*TMP_JAMP(15) - JAMP(6,1) = (-2.777777777777778D-02)*TMP_JAMP(5)+( - $ -8.333333333333333D-02)*TMP_JAMP(13)+(-2.777777777777778D-02) - $ *TMP_JAMP(14) + $ *TMP_JAMP(3)+(8.333333333333333D-02)*TMP_JAMP(14) + JAMP(6,1) = (-2.777777777777778D-02)*TMP_JAMP(9)+( + $ -2.777777777777778D-02)*TMP_JAMP(12)+(-8.333333333333333D-02) + $ *TMP_JAMP(16) IF(INIT_MODE)THEN DO I=1, NGRAPHS diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/dummy_fct.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/dummy_fct.f index 076cf29d67..4f7a204b8f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/dummy_fct.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/dummy_fct.f @@ -32,7 +32,7 @@ logical FUNCTION dummy_cuts(P) LOGICAL IS_A_NU(NEXTERNAL),IS_HEAVY(NEXTERNAL) logical do_cuts(nexternal) COMMON /TO_SPECISA/IS_A_J,IS_A_A,IS_A_L,IS_A_B,IS_A_NU,IS_HEAVY, - . IS_A_ONIUM, do_cuts + & IS_A_ONIUM, do_cuts dummy_cuts=.true. @@ -118,15 +118,16 @@ double precision function user_dynamical_scale(P) C ************************************************************ -C default for the library implementing a dummt bias function +C default for the library implementing a dummy bias function C ************************************************************ subroutine bias_wgt_custom(p, original_weight, bias_weight) - implicit none + implicit none C C Parameters C include 'nexternal.inc' -C + +C C Arguments C double precision p(0:3, nexternal) @@ -161,3 +162,4 @@ subroutine bias_wgt_custom(p, original_weight, bias_weight) return end subroutine bias_wgt_custom + diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py index e9f421ae5f..824815f47b 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py @@ -537,7 +537,7 @@ def charge_card(self, tag): self.param_card = param_card_reader.ParamCard(param_card) return self.param_card elif tag == 'mgruncard': - self.run_card = RunCard(self[tag]) + self.run_card = RunCard(self[tag], unknown_warning=False) return self.run_card elif tag == 'mg5proccard': proc_card = self[tag].split('\n') @@ -2625,6 +2625,7 @@ class RunCard(ConfigFile): default_include_file = 'run_card.inc' default_autodef_file = 'run.inc' donewarning = [] + include_as_parameter = [] def plugin_input(self, finput): @@ -2671,18 +2672,40 @@ def __new__(cls, finput=None, **opt): elif isinstance(finput, cls): target_class = finput.__class__ elif isinstance(finput, str): + path = finput if '\n' not in finput: finput = open(finput).read() if 'req_acc_FO' in finput: target_class = RunCardNLO else: target_class = RunCardLO + if MADEVENT and os.path.exists(pjoin(MEDIR, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(MEDIR, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): + misc.sprint('try to use plugin class') + pydir = path.replace('run_card.dat', '../bin/internal/') + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + else: return None target_class.fill_post_set_from_blocks() - - return super(RunCard, cls).__new__(target_class, finput, **opt) + out = super(RunCard, cls).__new__(target_class, finput, **opt) + if not isinstance(out, RunCard): #should not happen but in presence of missmatch of library loaded. + out.__init__(finput, **opt) + return out else: return super(RunCard, cls).__new__(cls, finput, **opt) @@ -2710,7 +2733,7 @@ def __init__(self, *args, **opts): self.system_default = {} self.display_block = [] # set some block to be displayed - + self.fct_mod = {} # {param: (fct_pointer, *argument, **opts)} self.cut_class = {} self.warned=False @@ -2747,7 +2770,7 @@ def get_lepton_densities(cls): def add_param(self, name, value, fortran_name=None, include=True, hidden=False, legacy=False, cut=False, system=False, sys_default=None, - autodef=False, + autodef=False, fct_mod=None, **opts): """ add a parameter to the card. value is the default value and defines the type (int/float/bool/str) of the input. @@ -2761,6 +2784,7 @@ def add_param(self, name, value, fortran_name=None, include=True, If a path (Source/PDF/pdf.inc) the definition will be added within that file Default is False (does not add the definition) entry added in the run_card will automatically have this on True. + fct_mod: defines a function to run if the parameter is modify in the include file options of **opts: - allowed: list of valid options. '*' means anything else should be allowed. empty list means anything possible as well. @@ -2785,8 +2809,12 @@ def add_param(self, name, value, fortran_name=None, include=True, if autodef: self.definition_path[autodef].append(name) self.user_set.add(name) + # function to trigger if a value is modified in the include file + # main target is action to force correct recompilation (like for compilation flag/...) + if fct_mod: + self.fct_mod[name] = fct_mod - def read(self, finput, consistency=True): + def read(self, finput, consistency=True, unknown_warning=True): """Read the input file, this can be a path to a file, a file object, a str with the content of the file.""" @@ -2794,6 +2822,7 @@ def read(self, finput, consistency=True): if "\n" in finput: finput = finput.split('\n') elif os.path.isfile(finput): + self.path = finput finput = open(finput) else: raise Exception("No such file %s" % finput) @@ -2808,7 +2837,7 @@ def read(self, finput, consistency=True): name = name.lower().strip() if name not in self: #looks like an entry added by a user -> add it nicely - self.add_unknown_entry(name, value) + self.add_unknown_entry(name, value, unknown_warning) else: self.set( name, value, user=True) # parameter not set in the run_card can be set to compatiblity value @@ -2820,7 +2849,7 @@ def read(self, finput, consistency=True): logger.warning(str(error)) else: raise - def add_unknown_entry(self, name, value): + def add_unknown_entry(self, name, value, unknow_warning): """function to add an entry to the run_card when the associated parameter does not exists. This is based on the guess_entry_fromname for the various syntax providing input. This then call add_param accordingly. @@ -2859,7 +2888,7 @@ def add_unknown_entry(self, name, value): raise Exception("dictionary need to have at least one entry") default['dict']['__type__'] = default[self.guess_type_from_value(default_value[0])] - if name not in RunCard.donewarning: + if name not in RunCard.donewarning and unknow_warning: logger.warning("Found unexpected entry in run_card: \"%s\" with value \"%s\".\n"+\ " The type was assigned to %s. \n"+\ " The definition of that variable will %sbe automatically added to fortran file %s\n"+\ @@ -2897,7 +2926,16 @@ def valid_line(self, line, tmp): return False else: return True - + + + def reset_simd(self, old_value, new_value, name, *args, **opts): + raise Exception('pass in reset simd') + + def make_clean(self,old_value, new_value, name, dir): + raise Exception('pass make clean for ', dir) + + def make_Ptouch(self,old_value, new_value, name, reset): + raise Exception('pass Ptouch for ', reset) def write(self, output_file, template=None, python_template=False, write_hidden=False, template_options=None, **opt): @@ -3072,6 +3110,77 @@ def write(self, output_file, template=None, python_template=False, else: output_file.write(text) + def get_last_value_include(self, output_dir): + """For paraeter in self.fct_mod + parse the associate inc file to get the value of the previous run. + We return a dictionary {name: old_value} + if inc file does not exist we will return the current value (i.e. set has no change) + """ + + #remember that + # default_include_file is a class variable + # self.includepath is on the form include_path : [list of param ] + out = {} + + # setup inc_to_parse to be like self.includepath (include_path : [list of param ]) + # BUT only containing the parameter that need to be tracked for the fct_mod option + inc_to_parse = {} + for inc_file, params in self.includepath.items(): + if not inc_file: + continue + if any(p in params for p in self.fct_mod): + inc_to_parse[inc_file] = [name for name in self.includepath[inc_file] if name in self.fct_mod] + + # now loop over the files and ask the associate function + for inc_file, params in inc_to_parse.items(): + if inc_file is True: + inc_file = self.default_include_file + out.update(self.get_value_from_include(inc_file, params, output_dir)) + + return out + + def get_value_from_include(self, path, list_of_params, output_dir): + """for a given include file return the current value of the requested parameter + return a dictionary {name: value} + if path does not exists return the current value in self for all parameter""" + + #WARNING DOES NOT HANDLE LIST/DICT so far + + # handle case where file is missing + if not os.path.exists(pjoin(output_dir,path)): + misc.sprint("include file not existing", pjoin(output_dir,path)) + out = {name: self[name] for name in list_of_params} + + with open(pjoin(output_dir,path), 'r') as fsock: + text = fsock.read() + + for name in list_of_params: + misc.sprint(name, name in self.fortran_name) + misc.sprint(self.fortran_name[name] if name in self.fortran_name[name] else name) + to_track = [self.fortran_name[name] if name in self.fortran_name else name for name in list_of_params] + pattern = re.compile(r"\(?(%(names)s)\s?=\s?([^)]*)\)?" % {'names':'|'.join(to_track)}, re.I) + out = dict(pattern.findall(text)) + misc.sprint(out) + for name in list_of_params: + if name in self.fortran_name: + value = out[self.fortran_name[name]] + del out[self.fortran_name[name]] + out[name] = value + + for name, value in out.items(): + try: + out[name] = self.format_variable(value, type(self[name])) + except Exception: + continue + + if len(out) != len(list_of_params): + misc.sprint(list_of_params) + misc.sprint(to_track) + misc.sprint(self.fortran_name) + misc.sprint(text) + raise Exception + return out + def get_default(self, name, default=None, log_level=None): """return self[name] if exist otherwise default. log control if we @@ -3362,71 +3471,93 @@ def write_include_file(self, output_dir, output_file=None): #ensusre that system only parameter are correctly set self.update_system_parameter_for_include() + value_in_old_include = self.get_last_value_include(output_dir) + + if output_dir: self.write_autodef(output_dir, output_file=None) # check/fix status of customised functions self.edit_dummy_fct_from_file(self["custom_fcts"], os.path.dirname(output_dir)) for incname in self.includepath: - if incname is True: - pathinc = self.default_include_file - elif incname is False: - continue - else: - pathinc = incname + self.write_one_include_file(output_dir, incname, output_file) + + for name,value in value_in_old_include.items(): + if value != self[name]: + self.fct_mod[name][0](value, self[name], name, *self.fct_mod[name][1],**self.fct_mod[name][2]) - if output_file: - fsock = output_file + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + + misc.sprint(incname) + if incname is True: + pathinc = self.default_include_file + elif incname is False: + return + else: + pathinc = incname + + if output_file: + fsock = output_file + else: + fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) + + + for key in self.includepath[incname]: + #define the fortran name + if key in self.fortran_name: + fortran_name = self.fortran_name[key] else: - fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) - for key in self.includepath[incname]: - #define the fortran name - if key in self.fortran_name: - fortran_name = self.fortran_name[key] + fortran_name = key + + if incname in self.include_as_parameter: + fsock.writelines('INTEGER %s\n' % fortran_name) + #get the value with warning if the user didn't set it + value = self.get_default(key) + if hasattr(self, 'mod_inc_%s' % key): + value = getattr(self, 'mod_inc_%s' % key)(value) + # Special treatment for strings containing a list of + # strings. Convert it to a list of strings + if isinstance(value, list): + # in case of a list, add the length of the list as 0th + # element in fortran. Only in case of integer or float + # list (not for bool nor string) + targettype = self.list_parameter[key] + if targettype is bool: + pass + elif targettype is int: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) + fsock.writelines(line) + elif targettype is float: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) + fsock.writelines(line) + # output the rest of the list in fortran + for i,v in enumerate(value): + line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) + fsock.writelines(line) + elif isinstance(value, dict): + for fortran_name, onevalue in value.items(): + line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) + fsock.writelines(line) + elif isinstance(incname,str) and 'compile' in incname: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, value) else: - fortran_name = key - - #get the value with warning if the user didn't set it - value = self.get_default(key) - if hasattr(self, 'mod_inc_%s' % key): - value = getattr(self, 'mod_inc_%s' % key)(value) - # Special treatment for strings containing a list of - # strings. Convert it to a list of strings - if isinstance(value, list): - # in case of a list, add the length of the list as 0th - # element in fortran. Only in case of integer or float - # list (not for bool nor string) - targettype = self.list_parameter[key] - if targettype is bool: - pass - elif targettype is int: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) - fsock.writelines(line) - elif targettype is float: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) - fsock.writelines(line) - # output the rest of the list in fortran - for i,v in enumerate(value): - line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) - fsock.writelines(line) - elif isinstance(value, dict): - for fortran_name, onevalue in value.items(): - line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) - fsock.writelines(line) - elif isinstance(incname,str) and 'compile' in incname: line = '%s = %s \n' % (fortran_name, value) - fsock.write(line) + fsock.write(line) + else: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, self.f77_formatting(value)) else: line = '%s = %s \n' % (fortran_name, self.f77_formatting(value)) - fsock.writelines(line) - if not output_file: - fsock.close() - path = pjoin(output_dir,pathinc) - if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): - files.mv(path+'.tmp', path) - else: - os.remove(path+'.tmp') - + fsock.writelines(line) + if not output_file: + fsock.close() + path = pjoin(output_dir,pathinc) + if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): + files.mv(path+'.tmp', path) + else: + os.remove(path+'.tmp') def write_autodef(self, output_dir, output_file=None): """ Add the definition of variable to run.inc if the variable is set with autodef. @@ -3765,13 +3896,14 @@ def remove_all_cut(self): %(tmin_for_channel)s = tmin_for_channel ! limit the non-singular reach of --some-- channel of integration related to T-channel diagram (value between -1 and 0), -1 is no impact %(survey_splitting)s = survey_splitting ! for loop-induced control how many core are used at survey for the computation of a single iteration. %(survey_nchannel_per_job)s = survey_nchannel_per_job ! control how many Channel are integrated inside a single job on cluster/multicore - %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) + %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) #********************************************************************* -# Compilation flag. No automatic re-compilation (need manual "make clean" in Source) +# Compilation flag. #********************************************************************* %(global_flag)s = global_flag ! fortran optimization flag use for the all code. %(aloha_flag)s = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' %(matrix_flag)s = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' + %(vector_size)s = vector_size ! size designed for SIMD/OpenMP/GPU (number of events in lockstep) """ template_off = '# To see advanced option for Phase-Space optimization: type "update psoptim"' @@ -3927,9 +4059,12 @@ class RunCardLO(RunCard): "get_dummy_x1_x2": pjoin("SubProcesses","dummy_fct.f"), "dummy_boostframe": pjoin("SubProcesses","dummy_fct.f"), "user_dynamical_scale": pjoin("SubProcesses","dummy_fct.f"), + "bias_wgt_custom": pjoin("SubProcesses","dummy_fct.f"), "user_": pjoin("SubProcesses","dummy_fct.f") # all function starting by user will be added to that file } + include_as_parameter = ['vector.inc'] + if MG5DIR: default_run_card = pjoin(MG5DIR, "internal", "default_run_card_lo.dat") @@ -4163,10 +4298,15 @@ def default_setup(self): self.add_param('hel_splitamp', True, hidden=True, include=False, comment='decide if amplitude aloha call can be splitted in two or not when doing helicity per helicity optimization.') self.add_param('hel_zeroamp', True, hidden=True, include=False, comment='decide if zero amplitude can be removed from the computation when doing helicity per helicity optimization.') self.add_param('SDE_strategy', 1, allowed=[1,2], fortran_name="sde_strat", comment="decide how Multi-channel should behaves \"1\" means full single diagram enhanced (hep-ph/0208156), \"2\" use the product of the denominator") - self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check') - self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math') - self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3') - + self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check', + fct_mod=(self.make_clean, ('Source'),{})) + self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math', + fct_mod=(self.make_clean, ('Source/DHELAS'),{})) + self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3', + fct_mod=(self.make_Ptouch, ('matrix'),{})) + self.add_param('vector_size', 1, include='vector.inc', hidden=True, comment='lockstep size for parralelism run', + fortran_name='VECSIZE_MEMMAX', fct_mod=(self.reset_simd,(),{})) + # parameter allowing to define simple cut via the pdg # Special syntax are related to those. (can not be edit directly) self.add_param('pt_min_pdg',{'__type__':0.}, include=False, cut=True) @@ -4188,8 +4328,7 @@ def default_setup(self): self.add_param('mxxmin4pdg',[-1.], system=True) self.add_param('mxxpart_antipart', [False], system=True) - # CUDACPP parameters - self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + def check_validity(self): """ """ diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/check_param_card.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/check_param_card.py index fe874a06a4..71089d7480 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/check_param_card.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/check_param_card.py @@ -85,7 +85,7 @@ def load_str(self, text): self.value= ' '.join(data[len(self.lhacode):]) # check that lhacode are the first entry otherwise return invalid param. if ' '.join([str(i) for i in self.lhacode]) != ' '.join(data[:len(self.lhacode)]): - raise InvalidParam + raise InvalidParam("line was %s" % str(data)) else: self.value = data[-1] diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py index 5d0187e3fa..14c7f310dc 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py @@ -749,13 +749,15 @@ def writeRunWeb(me_dir): class RunWebHandling(object): - def __init__(self, me_dir, crashifpresent=True, warnifpresent=True): + def __init__(self, me_dir, crashifpresent=True, warnifpresent=True, force_run=False): """raise error if RunWeb already exists me_dir is the directory where the write RunWeb""" self.remove_run_web = True self.me_dir = me_dir - + if force_run: + self.remove_run_web = False + return if crashifpresent or warnifpresent: if os.path.exists(pjoin(me_dir, 'RunWeb')): pid = open(pjoin(me_dir, 'RunWeb')).read() @@ -6574,7 +6576,7 @@ def reask(self, *args, **opt): fail_due_to_format = 0 #parameter to avoid infinite loop def postcmd(self, stop, line): - if line not in [None, '0', 'done', '']: + if line not in [None, '0', 'done', '',0]: ending_question = cmd.OneLinePathCompletion.postcmd(self,stop,line) else: ending_question = True @@ -7533,7 +7535,8 @@ def open_file(self, answer): else: raise if time.time() - start < .5: - self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y') + self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y', + timeout=False) self.reload_card(path) def reload_card(self, path): diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/extended_cmd.py index a6a8609dce..2f37070580 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/extended_cmd.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/extended_cmd.py @@ -1108,9 +1108,12 @@ def ask(self, question, default, choices=[], path_msg=None, if alias: choices += list(alias.keys()) + + question_instance = obj(question, allow_arg=choices, default=default, mother_interface=self, **opt) - + if fct_timeout is None: + fct_timeout = lambda x: question_instance.postcmd(x, default) if x and default else False if first_cmd: if isinstance(first_cmd, str): question_instance.onecmd(first_cmd) @@ -2271,6 +2274,9 @@ def postcmd(self, stop, line): if n: self.default(line) return self.postcmd(stop, line) + elif self.value is None and line: + self.default(line) + return self.postcmd(stop, line) if not self.casesensitive: for ans in self.allow_arg: if ans.lower() == self.value.lower(): diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py index 3b8ec31215..a88d60b282 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py @@ -154,10 +154,15 @@ def get_helicity(self, to_submit=True, clean=True): p = misc.Popen(['./gensym'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=Pdir) #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts + (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - + try: + nb_channel = max([math.floor(float(d)) for d in stdout.split()]) + except Exception as error: + misc.sprint(stdout, 'no channel or error for %s' % Pdir) + continue + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py index f4c9cb6334..c9d1c7706a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py @@ -1,6 +1,12 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: O. Mattelaer (Aug 2023) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, A. Valassi (2023) for the MG5aMC CUDACPP plugin. import logging - +import os +import subprocess +pjoin = os.path.join logger = logging.getLogger('cmdprint') # for stdout try: @@ -9,20 +15,23 @@ import internal.madevent_interface as madevent_interface import internal.misc as misc import internal.extended_cmd as extended_cmd + import internal.banner as banner_mod else: import madgraph.interface.madevent_interface as madevent_interface import madgraph.various.misc as misc import madgraph.interface.extended_cmd as extended_cmd + import madgraph.various.banner as banner_mod class CPPMEInterface(madevent_interface.MadEventCmdShell): - def compile(self, *args, **opts): """ """ import multiprocessing if not self.options['nb_core'] or self.options['nb_core'] == 'None': self.options['nb_core'] = multiprocessing.cpu_count() - if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + import pathlib + import os + pjoin = os.path.join cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend == 'FORTRAN': @@ -36,5 +45,50 @@ def compile(self, *args, **opts): return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) - + +class CPPRunCard(banner_mod.RunCardLO): + def reset_simd(self, old_value, new_value, name): + if not hasattr(self, 'path'): + logger.warning('WARNING! CPPRunCard instance has no attribute path') + return + ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') + if name == "vector_size" and new_value <= int(old_value): + # code can handle the new size -> do not recompile + return + Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') + subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + def plugin_input(self, finput): + return + + def default_setup(self): + super().default_setup() + self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + if incname == "vector.inc" and 'vector_size' not in self.user_set: + return + super().write_one_include_file(output_dir, incname, output_file) + + def check_validity(self): + """ensure that PLUGIN information are consistent""" + super().check_validity() + if self['SDE_strategy'] != 1: + logger.warning('SDE_strategy different of 1 is not supported with SMD/GPU mode') + self['sde_strategy'] = 1 + if self['hel_recycling']: + self['hel_recycling'] = False + +class GPURunCard(CPPRunCard): + def default_setup(self): + super(CPPRunCard, self).default_setup() + self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False) + +#class CUDACPPRunCard(CPPRunCard): +# def default_setup(self): +# super(CPPRunCard, self).default_setup() +# self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + MEINTERFACE = CPPMEInterface +RunCard = CPPRunCard diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py index 920e07a926..d722702891 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py @@ -3796,9 +3796,11 @@ def do_combine_events(self, line): if self.run_card['bias_module'].lower() not in ['dummy', 'none'] and nb_event: self.correct_bias() - + elif self.run_card['custom_fcts']: + self.correct_bias() + logger.info("combine events done in %s", time.time()-start) - + self.to_store.append('event') diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/madevent b/epochX/cudacpp/pp_tt012j.mad/bin/madevent index 10b6a71fa2..dff9711b73 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/madevent +++ b/epochX/cudacpp/pp_tt012j.mad/bin/madevent @@ -173,6 +173,10 @@ if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): launch_interface = launch_plugin.MEINTERFACE +#Source use this executable for compilation always allow it +force_run = False +if (args and args[0] == 'treatcards'): + force_run=True # Call the cmd interface main loop try: @@ -180,7 +184,7 @@ try: launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) - with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): + with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), force_run=force_run): if (args and os.path.isfile(args[0])): # They are an input file input_file = args[0] From 5ed0e4b015a40e15ce7c51e5d1894dded29129d8 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 22:06:44 +0200 Subject: [PATCH 052/119] [oct23av] in CODEGEN/generateAndCompare.sh, no longer copy or clean up the MG5AMC PLUGIN directory, following PR #766 (NB: I would very much prefer instead to revert this commit and revert the commit in PR #766) --- epochX/cudacpp/CODEGEN/generateAndCompare.sh | 38 ++++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index c1c7886e54..e88b6bd5e6 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -372,17 +372,17 @@ function cleanup_MG5AMC_HOME() rm -f ${MG5AMC_HOME}/Template/LO/Source/make_opts rm -f ${MG5AMC_HOME}/input/mg5_configuration.txt rm -f ${MG5AMC_HOME}/models/sm/py3_model.pkl - # Remove any *~ files in MG5AMC_HOME + # Remove any *~ files in MG5AMC_HOME rm -rf $(find ${MG5AMC_HOME} -name '*~') } -function cleanup_MG5AMC_PLUGIN() -{ - # Remove and recreate MG5AMC_HOME/PLUGIN - rm -rf ${MG5AMC_HOME}/PLUGIN - mkdir ${MG5AMC_HOME}/PLUGIN - touch ${MG5AMC_HOME}/PLUGIN/__init__.py -} +#function cleanup_MG5AMC_PLUGIN() +#{ +# # Remove and recreate MG5AMC_HOME/PLUGIN +# rm -rf ${MG5AMC_HOME}/PLUGIN +# mkdir ${MG5AMC_HOME}/PLUGIN +# touch ${MG5AMC_HOME}/PLUGIN/__init__.py +#} #-------------------------------------------------------------------------------------- @@ -535,7 +535,7 @@ cd - > /dev/null # Clean up before code generation cleanup_MG5AMC_HOME -cleanup_MG5AMC_PLUGIN +###cleanup_MG5AMC_PLUGIN # Print differences in MG5AMC with respect to git after copying ad-hoc patches cd ${MG5AMC_HOME} @@ -546,18 +546,18 @@ echo -e "***************** Differences to the current git commit ${commit_patche cd - > /dev/null # Copy the new plugin to MG5AMC_HOME (if the script directory backend is cudacpp or alpaka) -if [ "${SCRBCK}" == "cudacpp" ]; then - if [ "${OUTBCK}" == "no-path-to-this-statement" ]; then - echo -e "\nWARNING! '${OUTBCK}' mode selected: do not copy the cudacpp plugin (workaround for #341)" - else # currently succeeds also for madcpp and madgpu (#341 has been fixed) - echo -e "\nINFO! '${OUTBCK}' mode selected: copy the cudacpp plugin\n" - cp -dpr ${SCRDIR}/PLUGIN/${SCRBCK^^}_SA_OUTPUT ${MG5AMC_HOME}/PLUGIN/${SCRBCK^^}_OUTPUT - ls -l ${MG5AMC_HOME}/PLUGIN - fi +#if [ "${SCRBCK}" == "cudacpp" ]; then +# if [ "${OUTBCK}" == "no-path-to-this-statement" ]; then +# echo -e "\nWARNING! '${OUTBCK}' mode selected: do not copy the cudacpp plugin (workaround for #341)" +# else # currently succeeds also for madcpp and madgpu (#341 has been fixed) +# echo -e "\nINFO! '${OUTBCK}' mode selected: copy the cudacpp plugin\n" +# cp -dpr ${SCRDIR}/PLUGIN/${SCRBCK^^}_SA_OUTPUT ${MG5AMC_HOME}/PLUGIN/${SCRBCK^^}_OUTPUT +# ls -l ${MG5AMC_HOME}/PLUGIN +# fi ###elif [ "${SCRBCK}" == "alpaka" ]; then ### cp -dpr ${SCRDIR}/PLUGIN/${SCRBCK^^}_CUDACPP_SA_OUTPUT ${MG5AMC_HOME}/PLUGIN/ ### ls -l ${MG5AMC_HOME}/PLUGIN -fi +#fi # For gridpacks, use separate output directories for MG 29x and MG 3xx ###if [ "${SCRBCK}" == "gridpack" ]; then @@ -575,7 +575,7 @@ codeGenAndDiff $proc "$cmd" # Clean up after code generation cleanup_MG5AMC_HOME -cleanup_MG5AMC_PLUGIN +###cleanup_MG5AMC_PLUGIN # Check formatting in the auto-generated code if [ "${OUTBCK}" == "cudacpp" ]; then From f87854e3b496f503a84979e350940037ef580b76 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 22:13:57 +0200 Subject: [PATCH 053/119] [oct23av] in CODEGEN, fix clang format for 'COUPs[ndcoup+0]' from one commit in PR #776 --- .../cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 76247bf82f..5300d1cdb9 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1674,7 +1674,7 @@ def format_coupling(self, call): else: call = call.replace('CD_ACCESS', 'CI_ACCESS') call = call.replace('m_pars->%s%s' % (sign, coup), - 'COUPs[ndcoup+%s], %s' % (alias[coup]-len(self.couporderdep), '1.0' if not sign else '-1.0')) + 'COUPs[ndcoup + %s], %s' % (alias[coup]-len(self.couporderdep), '1.0' if not sign else '-1.0')) if newcoup: self.couplings2order = self.couporderdep | self.couporderindep return call From 759e60c6b4ed9f13e6b4ee398af32646ed5f4463 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 22:19:28 +0200 Subject: [PATCH 054/119] [oct23av] regenerate all 8 mad and 7 sa processes after completing the merging and patching of upstream/master in my environment Code changes: auto_dsig1.f in all processes, CPPProcess.cc in eemumu. I checked that eemumu and ggtt tput/tmad tests succeed (logs not kept). --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 78 +++-- .../ee_mumu.mad/Source/PDF/pdfwrap_lhapdf.f | 1 + epochX/cudacpp/ee_mumu.mad/Source/make_opts | 9 +- epochX/cudacpp/ee_mumu.mad/Source/makefile | 4 +- .../SubProcesses/P1_epem_mupmum/CPPProcess.cc | 8 +- .../SubProcesses/P1_epem_mupmum/auto_dsig1.f | 1 + .../ee_mumu.mad/SubProcesses/dummy_fct.f | 10 +- .../ee_mumu.mad/bin/internal/banner.py | 289 +++++++++++++----- .../bin/internal/check_param_card.py | 2 +- .../bin/internal/common_run_interface.py | 11 +- .../ee_mumu.mad/bin/internal/extended_cmd.py | 8 +- .../ee_mumu.mad/bin/internal/gen_ximprove.py | 9 +- .../ee_mumu.mad/bin/internal/launch_plugin.py | 62 +++- .../bin/internal/madevent_interface.py | 6 +- epochX/cudacpp/ee_mumu.mad/bin/madevent | 6 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 +- .../P1_Sigma_sm_epem_mupmum/CPPProcess.cc | 8 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 22 +- .../gg_tt.mad/Source/PDF/pdfwrap_lhapdf.f | 1 + .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 1 + .../cudacpp/gg_tt.mad/bin/internal/banner.py | 28 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 26 +- .../gg_tt01g.mad/Source/PDF/pdfwrap_lhapdf.f | 1 + .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 1 + .../SubProcesses/P2_gg_ttxg/auto_dsig1.f | 1 + .../gg_tt01g.mad/bin/internal/banner.py | 28 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 22 +- .../gg_ttg.mad/Source/PDF/pdfwrap_lhapdf.f | 1 + .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 1 + .../cudacpp/gg_ttg.mad/bin/internal/banner.py | 28 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 12 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +- .../gg_ttgg.mad/Source/PDF/pdfwrap_lhapdf.f | 1 + .../SubProcesses/P1_gg_ttxgg/auto_dsig1.f | 1 + .../gg_ttgg.mad/bin/internal/banner.py | 28 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 26 +- .../gg_ttggg.mad/Source/PDF/pdfwrap_lhapdf.f | 1 + .../SubProcesses/P1_gg_ttxggg/auto_dsig1.f | 1 + .../gg_ttggg.mad/bin/internal/banner.py | 28 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 32 +- .../gq_ttq.mad/Source/PDF/pdfwrap_lhapdf.f | 1 + .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 1 + .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 1 + .../cudacpp/gq_ttq.mad/bin/internal/banner.py | 28 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 12 +- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 8 +- .../CODEGEN_mad_pp_tt012j_log.txt | 132 ++++---- .../pp_tt012j.mad/Source/PDF/pdfwrap_lhapdf.f | 1 + .../SubProcesses/P0_gg_ttx/auto_dsig1.f | 1 + .../SubProcesses/P0_uux_ttx/auto_dsig1.f | 1 + .../SubProcesses/P1_gg_ttxg/auto_dsig1.f | 1 + .../SubProcesses/P1_gu_ttxu/auto_dsig1.f | 1 + .../SubProcesses/P1_gux_ttxux/auto_dsig1.f | 1 + .../SubProcesses/P1_uux_ttxg/auto_dsig1.f | 1 + .../SubProcesses/P2_gg_ttxgg/auto_dsig1.f | 1 + .../SubProcesses/P2_gg_ttxuux/auto_dsig1.f | 1 + .../SubProcesses/P2_gu_ttxgu/auto_dsig1.f | 1 + .../SubProcesses/P2_gux_ttxgux/auto_dsig1.f | 1 + .../SubProcesses/P2_uc_ttxuc/auto_dsig1.f | 1 + .../SubProcesses/P2_ucx_ttxucx/auto_dsig1.f | 1 + .../SubProcesses/P2_uu_ttxuu/auto_dsig1.f | 1 + .../SubProcesses/P2_uux_ttxccx/auto_dsig1.f | 1 + .../SubProcesses/P2_uux_ttxgg/auto_dsig1.f | 1 + .../SubProcesses/P2_uux_ttxuux/auto_dsig1.f | 1 + .../SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f | 1 + .../SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f | 1 + .../pp_tt012j.mad/bin/internal/banner.py | 28 +- 70 files changed, 705 insertions(+), 396 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 3476c5c66b..c2f712a9a0 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005334377288818359  +DEBUG: model prefixing takes 0.005261898040771484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6199]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -183,27 +183,27 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1836]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  WARNING: vector code for lepton pdf not implemented. We removed the option to run dressed lepton  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1836]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.096 s +Wrote files for 8 helas calls in 0.097 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.199 s +ALOHA: aloha creates 3 routines in 0.196 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.252 s +ALOHA: aloha creates 7 routines in 0.251 s FFV1 FFV1 FFV2 @@ -226,8 +226,34 @@ save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CO INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG! Switching on tmad mode (MG5AMC_TMADMODE=1) +DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  +DEBUG: self.in_madevent_mode =  True [output.py at line 207]  +DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +patching file Source/genps.inc +patching file Source/makefile +patching file SubProcesses/makefile +patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 385 (offset 5 lines). +patching file bin/internal/madevent_interface.py +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +patching file auto_dsig1.f +Hunk #1 succeeded at 496 (offset 27 lines). +patching file driver.f +patching file matrix1.f +Hunk #3 succeeded at 230 (offset 9 lines). +Hunk #4 succeeded at 267 (offset 18 lines). +Hunk #5 succeeded at 312 (offset 18 lines). +DEBUG: p.returncode =  0 [output.py at line 233]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. +Type "launch" to generate events from this process, or see +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README +Run "open index.html" to see more information about this process. +quit + +real 0m1.848s +user 0m1.628s +sys 0m0.206s ************************************************************ * * * W E L C O M E to * @@ -255,9 +281,7 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run -run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP -run_card missed argument cudacpp_backend. Takes default: CPP +WARNING! CPPRunCard instance has no attribute path quit INFO: launch in debug mode @@ -291,29 +315,3 @@ treatcards param quit INFO: launch in debug mode -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common -patching file Source/genps.inc -patching file Source/makefile -patching file SubProcesses/makefile -patching file Source/make_opts -patching file bin/internal/banner.py -Hunk #1 succeeded at 4188 (offset 1 line). -patching file bin/internal/gen_ximprove.py -patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 -patching file auto_dsig1.f -Hunk #1 succeeded at 495 (offset 26 lines). -patching file driver.f -patching file matrix1.f -Hunk #3 succeeded at 230 (offset 9 lines). -Hunk #4 succeeded at 267 (offset 18 lines). -Hunk #5 succeeded at 312 (offset 18 lines). -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. -Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README -Run "open index.html" to see more information about this process. -quit - -real 0m2.484s -user 0m2.173s -sys 0m0.301s diff --git a/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdfwrap_lhapdf.f b/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdfwrap_lhapdf.f index 0be926e6cd..3f36905346 100644 --- a/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdfwrap_lhapdf.f +++ b/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdfwrap_lhapdf.f @@ -5,6 +5,7 @@ SUBROUTINE PDFWRAP C INCLUDE 'pdf.inc' INCLUDE '../alfas.inc' + INCLUDE '../vector.inc' INCLUDE '../coupl.inc' REAL*8 ZMASS DATA ZMASS/91.188D0/ diff --git a/epochX/cudacpp/ee_mumu.mad/Source/make_opts b/epochX/cudacpp/ee_mumu.mad/Source/make_opts index bd3c24228d..57f5f7bb96 100644 --- a/epochX/cudacpp/ee_mumu.mad/Source/make_opts +++ b/epochX/cudacpp/ee_mumu.mad/Source/make_opts @@ -1,17 +1,12 @@ -pdlabel1= -pdlabel2= -lhapdf= -PYTHIA8_PATH=NotInstalled -MG5AMC_VERSION=3.5.0_lo_vect GLOBAL_FLAG=-O3 -ffast-math -fbounds-check -ALOHA_FLAG= -MATRIX_FLAG= DEFAULT_CPP_COMPILER=g++ MACFLAG= STDLIB=-lstdc++ STDLIB_FLAG= DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime +PYTHIA8_PATH=NotInstalled #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/ee_mumu.mad/Source/makefile b/epochX/cudacpp/ee_mumu.mad/Source/makefile index dbe08b846e..00c73099a0 100644 --- a/epochX/cudacpp/ee_mumu.mad/Source/makefile +++ b/epochX/cudacpp/ee_mumu.mad/Source/makefile @@ -136,5 +136,7 @@ cleanSource: clean: cleanSource for i in `ls -d ../SubProcesses/P*`; do cd $$i; make clean; cd -; done; -cleanall: cleanSource +cleanavx: + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; +cleanall: cleanSource # THIS IS THE ONE for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc index 8d370a6b34..0af629d3a8 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu oxxxxx( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 ); - FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[4] ); + FFV1P0_3( w_fp[1], w_fp[0], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 1 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); @@ -259,10 +259,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 2 *** // Wavefunction(s) for diagram number 2 - FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], 1.0, COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV2_4_3( w_fp[1], w_fp[0], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], 1.0, COUPs[2], 1.0, &_fp[0] ); + FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL if( channelId == 2 ) numerators_sv += cxabs2( amp_sv[0] ); if( channelId != 0 ) denominators_sv += cxabs2( amp_sv[0] ); diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f index 31e7790d2d..fcf2e4dec5 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f @@ -272,6 +272,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION EP1(VECSIZE_MEMMAX) DOUBLE PRECISION EM2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/dummy_fct.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/dummy_fct.f index 076cf29d67..4f7a204b8f 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/dummy_fct.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/dummy_fct.f @@ -32,7 +32,7 @@ logical FUNCTION dummy_cuts(P) LOGICAL IS_A_NU(NEXTERNAL),IS_HEAVY(NEXTERNAL) logical do_cuts(nexternal) COMMON /TO_SPECISA/IS_A_J,IS_A_A,IS_A_L,IS_A_B,IS_A_NU,IS_HEAVY, - . IS_A_ONIUM, do_cuts + & IS_A_ONIUM, do_cuts dummy_cuts=.true. @@ -118,15 +118,16 @@ double precision function user_dynamical_scale(P) C ************************************************************ -C default for the library implementing a dummt bias function +C default for the library implementing a dummy bias function C ************************************************************ subroutine bias_wgt_custom(p, original_weight, bias_weight) - implicit none + implicit none C C Parameters C include 'nexternal.inc' -C + +C C Arguments C double precision p(0:3, nexternal) @@ -161,3 +162,4 @@ subroutine bias_wgt_custom(p, original_weight, bias_weight) return end subroutine bias_wgt_custom + diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py index e9f421ae5f..ef1bf58979 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py @@ -537,7 +537,7 @@ def charge_card(self, tag): self.param_card = param_card_reader.ParamCard(param_card) return self.param_card elif tag == 'mgruncard': - self.run_card = RunCard(self[tag]) + self.run_card = RunCard(self[tag], unknown_warning=False) return self.run_card elif tag == 'mg5proccard': proc_card = self[tag].split('\n') @@ -2625,6 +2625,7 @@ class RunCard(ConfigFile): default_include_file = 'run_card.inc' default_autodef_file = 'run.inc' donewarning = [] + include_as_parameter = [] def plugin_input(self, finput): @@ -2671,18 +2672,47 @@ def __new__(cls, finput=None, **opt): elif isinstance(finput, cls): target_class = finput.__class__ elif isinstance(finput, str): + path = finput if '\n' not in finput: finput = open(finput).read() if 'req_acc_FO' in finput: target_class = RunCardNLO else: target_class = RunCardLO + if MADEVENT and os.path.exists(pjoin(MEDIR, 'bin','internal', 'launch_plugin.py')): + with misc.TMP_variable(sys, 'path', sys.path + [pjoin(MEDIR, 'bin', 'internal')]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + elif not MADEVENT: + if 'run_card.dat' in path: + launch_plugin_path = path.replace('run_card.dat', '../bin/internal/launch_plugin.py') + elif 'run_card_default.dat' in path: + launch_plugin_path = path.replace('run_card_default.dat', '../bin/internal/launch_plugin.py') + else: + launch_plugin_path = None + if launch_plugin_path and os.path.exists(launch_plugin_path): + misc.sprint('try to use plugin class', path.replace('run_card.dat', '../bin/internal/launch_plugin.py')) + pydir = os.path.dirname(launch_plugin_path) + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard + else: return None target_class.fill_post_set_from_blocks() - - return super(RunCard, cls).__new__(target_class, finput, **opt) + out = super(RunCard, cls).__new__(target_class, finput, **opt) + if not isinstance(out, RunCard): #should not happen but in presence of missmatch of library loaded. + out.__init__(finput, **opt) + return out else: return super(RunCard, cls).__new__(cls, finput, **opt) @@ -2710,7 +2740,7 @@ def __init__(self, *args, **opts): self.system_default = {} self.display_block = [] # set some block to be displayed - + self.fct_mod = {} # {param: (fct_pointer, *argument, **opts)} self.cut_class = {} self.warned=False @@ -2747,7 +2777,7 @@ def get_lepton_densities(cls): def add_param(self, name, value, fortran_name=None, include=True, hidden=False, legacy=False, cut=False, system=False, sys_default=None, - autodef=False, + autodef=False, fct_mod=None, **opts): """ add a parameter to the card. value is the default value and defines the type (int/float/bool/str) of the input. @@ -2761,6 +2791,7 @@ def add_param(self, name, value, fortran_name=None, include=True, If a path (Source/PDF/pdf.inc) the definition will be added within that file Default is False (does not add the definition) entry added in the run_card will automatically have this on True. + fct_mod: defines a function to run if the parameter is modify in the include file options of **opts: - allowed: list of valid options. '*' means anything else should be allowed. empty list means anything possible as well. @@ -2785,8 +2816,12 @@ def add_param(self, name, value, fortran_name=None, include=True, if autodef: self.definition_path[autodef].append(name) self.user_set.add(name) + # function to trigger if a value is modified in the include file + # main target is action to force correct recompilation (like for compilation flag/...) + if fct_mod: + self.fct_mod[name] = fct_mod - def read(self, finput, consistency=True): + def read(self, finput, consistency=True, unknown_warning=True): """Read the input file, this can be a path to a file, a file object, a str with the content of the file.""" @@ -2794,6 +2829,7 @@ def read(self, finput, consistency=True): if "\n" in finput: finput = finput.split('\n') elif os.path.isfile(finput): + self.path = finput finput = open(finput) else: raise Exception("No such file %s" % finput) @@ -2808,7 +2844,7 @@ def read(self, finput, consistency=True): name = name.lower().strip() if name not in self: #looks like an entry added by a user -> add it nicely - self.add_unknown_entry(name, value) + self.add_unknown_entry(name, value, unknown_warning) else: self.set( name, value, user=True) # parameter not set in the run_card can be set to compatiblity value @@ -2820,7 +2856,7 @@ def read(self, finput, consistency=True): logger.warning(str(error)) else: raise - def add_unknown_entry(self, name, value): + def add_unknown_entry(self, name, value, unknow_warning): """function to add an entry to the run_card when the associated parameter does not exists. This is based on the guess_entry_fromname for the various syntax providing input. This then call add_param accordingly. @@ -2859,7 +2895,7 @@ def add_unknown_entry(self, name, value): raise Exception("dictionary need to have at least one entry") default['dict']['__type__'] = default[self.guess_type_from_value(default_value[0])] - if name not in RunCard.donewarning: + if name not in RunCard.donewarning and unknow_warning: logger.warning("Found unexpected entry in run_card: \"%s\" with value \"%s\".\n"+\ " The type was assigned to %s. \n"+\ " The definition of that variable will %sbe automatically added to fortran file %s\n"+\ @@ -2897,7 +2933,17 @@ def valid_line(self, line, tmp): return False else: return True - + + + def reset_simd(self, old_value, new_value, name, *args, **opts): + #return + raise Exception('pass in reset simd') + + def make_clean(self,old_value, new_value, name, dir): + raise Exception('pass make clean for ', dir) + + def make_Ptouch(self,old_value, new_value, name, reset): + raise Exception('pass Ptouch for ', reset) def write(self, output_file, template=None, python_template=False, write_hidden=False, template_options=None, **opt): @@ -3072,6 +3118,77 @@ def write(self, output_file, template=None, python_template=False, else: output_file.write(text) + def get_last_value_include(self, output_dir): + """For paraeter in self.fct_mod + parse the associate inc file to get the value of the previous run. + We return a dictionary {name: old_value} + if inc file does not exist we will return the current value (i.e. set has no change) + """ + + #remember that + # default_include_file is a class variable + # self.includepath is on the form include_path : [list of param ] + out = {} + + # setup inc_to_parse to be like self.includepath (include_path : [list of param ]) + # BUT only containing the parameter that need to be tracked for the fct_mod option + inc_to_parse = {} + for inc_file, params in self.includepath.items(): + if not inc_file: + continue + if any(p in params for p in self.fct_mod): + inc_to_parse[inc_file] = [name for name in self.includepath[inc_file] if name in self.fct_mod] + + # now loop over the files and ask the associate function + for inc_file, params in inc_to_parse.items(): + if inc_file is True: + inc_file = self.default_include_file + out.update(self.get_value_from_include(inc_file, params, output_dir)) + + return out + + def get_value_from_include(self, path, list_of_params, output_dir): + """for a given include file return the current value of the requested parameter + return a dictionary {name: value} + if path does not exists return the current value in self for all parameter""" + + #WARNING DOES NOT HANDLE LIST/DICT so far + + # handle case where file is missing + if not os.path.exists(pjoin(output_dir,path)): + misc.sprint("include file not existing", pjoin(output_dir,path)) + out = {name: self[name] for name in list_of_params} + + with open(pjoin(output_dir,path), 'r') as fsock: + text = fsock.read() + + for name in list_of_params: + misc.sprint(name, name in self.fortran_name) + misc.sprint(self.fortran_name[name] if name in self.fortran_name[name] else name) + to_track = [self.fortran_name[name] if name in self.fortran_name else name for name in list_of_params] + pattern = re.compile(r"\(?(%(names)s)\s?=\s?([^)]*)\)?" % {'names':'|'.join(to_track)}, re.I) + out = dict(pattern.findall(text)) + misc.sprint(out) + for name in list_of_params: + if name in self.fortran_name: + value = out[self.fortran_name[name]] + del out[self.fortran_name[name]] + out[name] = value + + for name, value in out.items(): + try: + out[name] = self.format_variable(value, type(self[name])) + except Exception: + continue + + if len(out) != len(list_of_params): + misc.sprint(list_of_params) + misc.sprint(to_track) + misc.sprint(self.fortran_name) + misc.sprint(text) + raise Exception + return out + def get_default(self, name, default=None, log_level=None): """return self[name] if exist otherwise default. log control if we @@ -3362,71 +3479,93 @@ def write_include_file(self, output_dir, output_file=None): #ensusre that system only parameter are correctly set self.update_system_parameter_for_include() + value_in_old_include = self.get_last_value_include(output_dir) + + if output_dir: self.write_autodef(output_dir, output_file=None) # check/fix status of customised functions self.edit_dummy_fct_from_file(self["custom_fcts"], os.path.dirname(output_dir)) for incname in self.includepath: - if incname is True: - pathinc = self.default_include_file - elif incname is False: - continue - else: - pathinc = incname + self.write_one_include_file(output_dir, incname, output_file) + + for name,value in value_in_old_include.items(): + if value != self[name]: + self.fct_mod[name][0](value, self[name], name, *self.fct_mod[name][1],**self.fct_mod[name][2]) - if output_file: - fsock = output_file + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + + misc.sprint(incname) + if incname is True: + pathinc = self.default_include_file + elif incname is False: + return + else: + pathinc = incname + + if output_file: + fsock = output_file + else: + fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) + + + for key in self.includepath[incname]: + #define the fortran name + if key in self.fortran_name: + fortran_name = self.fortran_name[key] else: - fsock = file_writers.FortranWriter(pjoin(output_dir,pathinc+'.tmp')) - for key in self.includepath[incname]: - #define the fortran name - if key in self.fortran_name: - fortran_name = self.fortran_name[key] + fortran_name = key + + if incname in self.include_as_parameter: + fsock.writelines('INTEGER %s\n' % fortran_name) + #get the value with warning if the user didn't set it + value = self.get_default(key) + if hasattr(self, 'mod_inc_%s' % key): + value = getattr(self, 'mod_inc_%s' % key)(value) + # Special treatment for strings containing a list of + # strings. Convert it to a list of strings + if isinstance(value, list): + # in case of a list, add the length of the list as 0th + # element in fortran. Only in case of integer or float + # list (not for bool nor string) + targettype = self.list_parameter[key] + if targettype is bool: + pass + elif targettype is int: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) + fsock.writelines(line) + elif targettype is float: + line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) + fsock.writelines(line) + # output the rest of the list in fortran + for i,v in enumerate(value): + line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) + fsock.writelines(line) + elif isinstance(value, dict): + for fortran_name, onevalue in value.items(): + line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) + fsock.writelines(line) + elif isinstance(incname,str) and 'compile' in incname: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, value) else: - fortran_name = key - - #get the value with warning if the user didn't set it - value = self.get_default(key) - if hasattr(self, 'mod_inc_%s' % key): - value = getattr(self, 'mod_inc_%s' % key)(value) - # Special treatment for strings containing a list of - # strings. Convert it to a list of strings - if isinstance(value, list): - # in case of a list, add the length of the list as 0th - # element in fortran. Only in case of integer or float - # list (not for bool nor string) - targettype = self.list_parameter[key] - if targettype is bool: - pass - elif targettype is int: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(len(value))) - fsock.writelines(line) - elif targettype is float: - line = '%s(%s) = %s \n' % (fortran_name, 0, self.f77_formatting(float(len(value)))) - fsock.writelines(line) - # output the rest of the list in fortran - for i,v in enumerate(value): - line = '%s(%s) = %s \n' % (fortran_name, i+1, self.f77_formatting(v)) - fsock.writelines(line) - elif isinstance(value, dict): - for fortran_name, onevalue in value.items(): - line = '%s = %s \n' % (fortran_name, self.f77_formatting(onevalue)) - fsock.writelines(line) - elif isinstance(incname,str) and 'compile' in incname: line = '%s = %s \n' % (fortran_name, value) - fsock.write(line) + fsock.write(line) + else: + if incname in self.include_as_parameter: + line = 'PARAMETER (%s=%s)' %( fortran_name, self.f77_formatting(value)) else: line = '%s = %s \n' % (fortran_name, self.f77_formatting(value)) - fsock.writelines(line) - if not output_file: - fsock.close() - path = pjoin(output_dir,pathinc) - if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): - files.mv(path+'.tmp', path) - else: - os.remove(path+'.tmp') - + fsock.writelines(line) + if not output_file: + fsock.close() + path = pjoin(output_dir,pathinc) + if not os.path.exists(path) or not filecmp.cmp(path, path+'.tmp'): + files.mv(path+'.tmp', path) + else: + os.remove(path+'.tmp') def write_autodef(self, output_dir, output_file=None): """ Add the definition of variable to run.inc if the variable is set with autodef. @@ -3765,13 +3904,14 @@ def remove_all_cut(self): %(tmin_for_channel)s = tmin_for_channel ! limit the non-singular reach of --some-- channel of integration related to T-channel diagram (value between -1 and 0), -1 is no impact %(survey_splitting)s = survey_splitting ! for loop-induced control how many core are used at survey for the computation of a single iteration. %(survey_nchannel_per_job)s = survey_nchannel_per_job ! control how many Channel are integrated inside a single job on cluster/multicore - %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) + %(refine_evt_by_job)s = refine_evt_by_job ! control the maximal number of events for the first iteration of the refine (larger means less jobs) #********************************************************************* -# Compilation flag. No automatic re-compilation (need manual "make clean" in Source) +# Compilation flag. #********************************************************************* %(global_flag)s = global_flag ! fortran optimization flag use for the all code. %(aloha_flag)s = aloha_flag ! fortran optimization flag for aloha function. Suggestions: '-ffast-math' %(matrix_flag)s = matrix_flag ! fortran optimization flag for matrix.f function. Suggestions: '-O3' + %(vector_size)s = vector_size ! size designed for SIMD/OpenMP/GPU (number of events in lockstep) """ template_off = '# To see advanced option for Phase-Space optimization: type "update psoptim"' @@ -3927,9 +4067,12 @@ class RunCardLO(RunCard): "get_dummy_x1_x2": pjoin("SubProcesses","dummy_fct.f"), "dummy_boostframe": pjoin("SubProcesses","dummy_fct.f"), "user_dynamical_scale": pjoin("SubProcesses","dummy_fct.f"), + "bias_wgt_custom": pjoin("SubProcesses","dummy_fct.f"), "user_": pjoin("SubProcesses","dummy_fct.f") # all function starting by user will be added to that file } + include_as_parameter = ['vector.inc'] + if MG5DIR: default_run_card = pjoin(MG5DIR, "internal", "default_run_card_lo.dat") @@ -4163,10 +4306,15 @@ def default_setup(self): self.add_param('hel_splitamp', True, hidden=True, include=False, comment='decide if amplitude aloha call can be splitted in two or not when doing helicity per helicity optimization.') self.add_param('hel_zeroamp', True, hidden=True, include=False, comment='decide if zero amplitude can be removed from the computation when doing helicity per helicity optimization.') self.add_param('SDE_strategy', 1, allowed=[1,2], fortran_name="sde_strat", comment="decide how Multi-channel should behaves \"1\" means full single diagram enhanced (hep-ph/0208156), \"2\" use the product of the denominator") - self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check') - self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math') - self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3') - + self.add_param('global_flag', '-O', include=False, hidden=True, comment='global fortran compilation flag, suggestion -fbound-check', + fct_mod=(self.make_clean, ('Source'),{})) + self.add_param('aloha_flag', '', include=False, hidden=True, comment='global fortran compilation flag, suggestion: -ffast-math', + fct_mod=(self.make_clean, ('Source/DHELAS'),{})) + self.add_param('matrix_flag', '', include=False, hidden=True, comment='fortran compilation flag for the matrix-element files, suggestion -O3', + fct_mod=(self.make_Ptouch, ('matrix'),{})) + self.add_param('vector_size', 1, include='vector.inc', hidden=True, comment='lockstep size for parralelism run', + fortran_name='VECSIZE_MEMMAX', fct_mod=(self.reset_simd,(),{})) + # parameter allowing to define simple cut via the pdg # Special syntax are related to those. (can not be edit directly) self.add_param('pt_min_pdg',{'__type__':0.}, include=False, cut=True) @@ -4188,8 +4336,7 @@ def default_setup(self): self.add_param('mxxmin4pdg',[-1.], system=True) self.add_param('mxxpart_antipart', [False], system=True) - # CUDACPP parameters - self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + def check_validity(self): """ """ diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/check_param_card.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/check_param_card.py index fe874a06a4..71089d7480 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/check_param_card.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/check_param_card.py @@ -85,7 +85,7 @@ def load_str(self, text): self.value= ' '.join(data[len(self.lhacode):]) # check that lhacode are the first entry otherwise return invalid param. if ' '.join([str(i) for i in self.lhacode]) != ' '.join(data[:len(self.lhacode)]): - raise InvalidParam + raise InvalidParam("line was %s" % str(data)) else: self.value = data[-1] diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py index 5d0187e3fa..14c7f310dc 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py @@ -749,13 +749,15 @@ def writeRunWeb(me_dir): class RunWebHandling(object): - def __init__(self, me_dir, crashifpresent=True, warnifpresent=True): + def __init__(self, me_dir, crashifpresent=True, warnifpresent=True, force_run=False): """raise error if RunWeb already exists me_dir is the directory where the write RunWeb""" self.remove_run_web = True self.me_dir = me_dir - + if force_run: + self.remove_run_web = False + return if crashifpresent or warnifpresent: if os.path.exists(pjoin(me_dir, 'RunWeb')): pid = open(pjoin(me_dir, 'RunWeb')).read() @@ -6574,7 +6576,7 @@ def reask(self, *args, **opt): fail_due_to_format = 0 #parameter to avoid infinite loop def postcmd(self, stop, line): - if line not in [None, '0', 'done', '']: + if line not in [None, '0', 'done', '',0]: ending_question = cmd.OneLinePathCompletion.postcmd(self,stop,line) else: ending_question = True @@ -7533,7 +7535,8 @@ def open_file(self, answer): else: raise if time.time() - start < .5: - self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y') + self.mother_interface.ask("Are you really that fast? If you are using an editor that returns directly. Please confirm that you have finised to edit the file", 'y', + timeout=False) self.reload_card(path) def reload_card(self, path): diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/extended_cmd.py index a6a8609dce..2f37070580 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/extended_cmd.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/extended_cmd.py @@ -1108,9 +1108,12 @@ def ask(self, question, default, choices=[], path_msg=None, if alias: choices += list(alias.keys()) + + question_instance = obj(question, allow_arg=choices, default=default, mother_interface=self, **opt) - + if fct_timeout is None: + fct_timeout = lambda x: question_instance.postcmd(x, default) if x and default else False if first_cmd: if isinstance(first_cmd, str): question_instance.onecmd(first_cmd) @@ -2271,6 +2274,9 @@ def postcmd(self, stop, line): if n: self.default(line) return self.postcmd(stop, line) + elif self.value is None and line: + self.default(line) + return self.postcmd(stop, line) if not self.casesensitive: for ans in self.allow_arg: if ans.lower() == self.value.lower(): diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py index 3b8ec31215..a88d60b282 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py @@ -154,10 +154,15 @@ def get_helicity(self, to_submit=True, clean=True): p = misc.Popen(['./gensym'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=Pdir) #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts + (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - + try: + nb_channel = max([math.floor(float(d)) for d in stdout.split()]) + except Exception as error: + misc.sprint(stdout, 'no channel or error for %s' % Pdir) + continue + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py index f4c9cb6334..c9d1c7706a 100644 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py @@ -1,6 +1,12 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: O. Mattelaer (Aug 2023) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, A. Valassi (2023) for the MG5aMC CUDACPP plugin. import logging - +import os +import subprocess +pjoin = os.path.join logger = logging.getLogger('cmdprint') # for stdout try: @@ -9,20 +15,23 @@ import internal.madevent_interface as madevent_interface import internal.misc as misc import internal.extended_cmd as extended_cmd + import internal.banner as banner_mod else: import madgraph.interface.madevent_interface as madevent_interface import madgraph.various.misc as misc import madgraph.interface.extended_cmd as extended_cmd + import madgraph.various.banner as banner_mod class CPPMEInterface(madevent_interface.MadEventCmdShell): - def compile(self, *args, **opts): """ """ import multiprocessing if not self.options['nb_core'] or self.options['nb_core'] == 'None': self.options['nb_core'] = multiprocessing.cpu_count() - if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'): + import pathlib + import os + pjoin = os.path.join cudacpp_backend = self.run_card['cudacpp_backend'].upper() # the default value is defined in banner.py logger.info("Building madevent in madevent_interface.py with '%s' matrix elements"%cudacpp_backend) if cudacpp_backend == 'FORTRAN': @@ -36,5 +45,50 @@ def compile(self, *args, **opts): return misc.compile(nb_core=self.options['nb_core'], *args, **opts) else: return misc.compile(nb_core=self.options['nb_core'], *args, **opts) - + +class CPPRunCard(banner_mod.RunCardLO): + def reset_simd(self, old_value, new_value, name): + if not hasattr(self, 'path'): + logger.warning('WARNING! CPPRunCard instance has no attribute path') + return + ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') + if name == "vector_size" and new_value <= int(old_value): + # code can handle the new size -> do not recompile + return + Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') + subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + def plugin_input(self, finput): + return + + def default_setup(self): + super().default_setup() + self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + + def write_one_include_file(self, output_dir, incname, output_file=None): + """write one include file at the time""" + if incname == "vector.inc" and 'vector_size' not in self.user_set: + return + super().write_one_include_file(output_dir, incname, output_file) + + def check_validity(self): + """ensure that PLUGIN information are consistent""" + super().check_validity() + if self['SDE_strategy'] != 1: + logger.warning('SDE_strategy different of 1 is not supported with SMD/GPU mode') + self['sde_strategy'] = 1 + if self['hel_recycling']: + self['hel_recycling'] = False + +class GPURunCard(CPPRunCard): + def default_setup(self): + super(CPPRunCard, self).default_setup() + self.add_param('cudacpp_backend', 'CUDA', include=False, hidden=False) + +#class CUDACPPRunCard(CPPRunCard): +# def default_setup(self): +# super(CPPRunCard, self).default_setup() +# self.add_param('cudacpp_backend', 'CPP', include=False, hidden=False) + MEINTERFACE = CPPMEInterface +RunCard = CPPRunCard diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py index 920e07a926..d722702891 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py @@ -3796,9 +3796,11 @@ def do_combine_events(self, line): if self.run_card['bias_module'].lower() not in ['dummy', 'none'] and nb_event: self.correct_bias() - + elif self.run_card['custom_fcts']: + self.correct_bias() + logger.info("combine events done in %s", time.time()-start) - + self.to_store.append('event') diff --git a/epochX/cudacpp/ee_mumu.mad/bin/madevent b/epochX/cudacpp/ee_mumu.mad/bin/madevent index 10b6a71fa2..dff9711b73 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/madevent +++ b/epochX/cudacpp/ee_mumu.mad/bin/madevent @@ -173,6 +173,10 @@ if os.path.exists(pjoin(root_path, 'bin','internal', 'launch_plugin.py')): launch_interface = launch_plugin.MEINTERFACE +#Source use this executable for compilation always allow it +force_run = False +if (args and args[0] == 'treatcards'): + force_run=True # Call the cmd interface main loop try: @@ -180,7 +184,7 @@ try: launch = launch_interface(me_dir=os.path.dirname(root_path), force_run=True) launch.exec_cmd('help generate_events') sys.exit(0) - with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), ): + with cmd_interface.MadEventCmdShell.RunWebHandling(os.path.dirname(root_path), force_run=force_run): if (args and os.path.isfile(args[0])): # They are an input file input_file = args[0] diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 415537ec64..f52c8e4178 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005320072174072266  +DEBUG: model prefixing takes 0.005647897720336914  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -181,7 +181,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.260 s +ALOHA: aloha creates 4 routines in 0.262 s FFV1 FFV1 FFV2 @@ -202,6 +202,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.650s -user 0m0.591s -sys 0m0.055s +real 0m0.727s +user 0m0.588s +sys 0m0.045s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc index 11472d834e..c0ab4edb92 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc @@ -246,10 +246,10 @@ namespace mg5amcCpu oxxxxx( momenta, 0., cHel[ihel][3], +1, w_fp[3], 3 ); - FFV1P0_3( w_fp[1], w_fp[0], COUPs[0], 1.0, 0., 0., w_fp[4] ); + FFV1P0_3( w_fp[1], w_fp[0], COUPs[ndcoup + 0], 1.0, 0., 0., w_fp[4] ); // Amplitude(s) for diagram number 1 - FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[0], 1.0, &_fp[0] ); + FFV1_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 0], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif @@ -258,10 +258,10 @@ namespace mg5amcCpu // *** DIAGRAM 2 OF 2 *** // Wavefunction(s) for diagram number 2 - FFV2_4_3( w_fp[1], w_fp[0], COUPs[1], 1.0, COUPs[2], 1.0, cIPD[0], cIPD[1], w_fp[4] ); + FFV2_4_3( w_fp[1], w_fp[0], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, cIPD[0], cIPD[1], w_fp[4] ); // Amplitude(s) for diagram number 2 - FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[1], 1.0, COUPs[2], 1.0, &_fp[0] ); + FFV2_4_0( w_fp[2], w_fp[3], w_fp[4], COUPs[ndcoup + 1], 1.0, COUPs[ndcoup + 2], 1.0, &_fp[0] ); #ifdef MGONGPU_SUPPORTS_MULTICHANNEL // Here the code base generated with multichannel support updates numerators_sv and denominators_sv (#473) #endif diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 5f2b8c9ba8..15b42e74b3 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005397319793701172  +DEBUG: model prefixing takes 0.005278825759887695  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,16 +191,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.102 s +Wrote files for 10 helas calls in 0.100 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.147 s +ALOHA: aloha creates 2 routines in 0.143 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.131 s +ALOHA: aloha creates 4 routines in 0.130 s VVV1 FFV1 FFV1 @@ -222,16 +222,16 @@ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  DEBUG: self.in_madevent_mode =  True [output.py at line 207]  DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). +Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f DEBUG: p.returncode =  0 [output.py at line 233]  @@ -241,9 +241,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.975s -user 0m1.493s -sys 0m0.208s +real 0m1.685s +user 0m1.469s +sys 0m0.195s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.mad/Source/PDF/pdfwrap_lhapdf.f b/epochX/cudacpp/gg_tt.mad/Source/PDF/pdfwrap_lhapdf.f index 0be926e6cd..3f36905346 100644 --- a/epochX/cudacpp/gg_tt.mad/Source/PDF/pdfwrap_lhapdf.f +++ b/epochX/cudacpp/gg_tt.mad/Source/PDF/pdfwrap_lhapdf.f @@ -5,6 +5,7 @@ SUBROUTINE PDFWRAP C INCLUDE 'pdf.inc' INCLUDE '../alfas.inc' + INCLUDE '../vector.inc' INCLUDE '../coupl.inc' REAL*8 ZMASS DATA ZMASS/91.188D0/ diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index 0b493ae244..5a3da931f2 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -263,6 +263,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION G2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py index 824815f47b..ef1bf58979 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py @@ -2687,16 +2687,23 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): - misc.sprint('try to use plugin class') - pydir = path.replace('run_card.dat', '../bin/internal/') - with misc.TMP_variable(sys, 'path', sys.path + [pydir]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - target_class = launch_plugin.RunCard + elif not MADEVENT: + if 'run_card.dat' in path: + launch_plugin_path = path.replace('run_card.dat', '../bin/internal/launch_plugin.py') + elif 'run_card_default.dat' in path: + launch_plugin_path = path.replace('run_card_default.dat', '../bin/internal/launch_plugin.py') + else: + launch_plugin_path = None + if launch_plugin_path and os.path.exists(launch_plugin_path): + misc.sprint('try to use plugin class', path.replace('run_card.dat', '../bin/internal/launch_plugin.py')) + pydir = os.path.dirname(launch_plugin_path) + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard else: return None @@ -2929,6 +2936,7 @@ def valid_line(self, line, tmp): def reset_simd(self, old_value, new_value, name, *args, **opts): + #return raise Exception('pass in reset simd') def make_clean(self,old_value, new_value, name, dir): diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index f185804aa5..833219aea2 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005272865295410156  +DEBUG: model prefixing takes 0.005304813385009766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.142 s VVV1 FFV1 FFV1 @@ -197,6 +197,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.547s -user 0m0.471s -sys 0m0.060s +real 0m0.594s +user 0m0.459s +sys 0m0.061s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index e5d91cbc60..53b724aa19 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005311250686645508  +DEBUG: model prefixing takes 0.005379676818847656  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -184,7 +184,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -201,7 +201,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -216,8 +216,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.242 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s +Wrote files for 46 helas calls in 0.240 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -259,21 +259,21 @@ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  DEBUG: self.in_madevent_mode =  True [output.py at line 207]  DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). +Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). +Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). @@ -287,9 +287,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.268s -user 0m2.030s -sys 0m0.237s +real 0m2.267s +user 0m2.029s +sys 0m0.232s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdfwrap_lhapdf.f b/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdfwrap_lhapdf.f index 0be926e6cd..3f36905346 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdfwrap_lhapdf.f +++ b/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdfwrap_lhapdf.f @@ -5,6 +5,7 @@ SUBROUTINE PDFWRAP C INCLUDE 'pdf.inc' INCLUDE '../alfas.inc' + INCLUDE '../vector.inc' INCLUDE '../coupl.inc' REAL*8 ZMASS DATA ZMASS/91.188D0/ diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index 0b493ae244..5a3da931f2 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -263,6 +263,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION G2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f index 68e664f70c..6eb0fa0827 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f @@ -263,6 +263,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION G2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py index 824815f47b..ef1bf58979 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py @@ -2687,16 +2687,23 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): - misc.sprint('try to use plugin class') - pydir = path.replace('run_card.dat', '../bin/internal/') - with misc.TMP_variable(sys, 'path', sys.path + [pydir]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - target_class = launch_plugin.RunCard + elif not MADEVENT: + if 'run_card.dat' in path: + launch_plugin_path = path.replace('run_card.dat', '../bin/internal/launch_plugin.py') + elif 'run_card_default.dat' in path: + launch_plugin_path = path.replace('run_card_default.dat', '../bin/internal/launch_plugin.py') + else: + launch_plugin_path = None + if launch_plugin_path and os.path.exists(launch_plugin_path): + misc.sprint('try to use plugin class', path.replace('run_card.dat', '../bin/internal/launch_plugin.py')) + pydir = os.path.dirname(launch_plugin_path) + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard else: return None @@ -2929,6 +2936,7 @@ def valid_line(self, line, tmp): def reset_simd(self, old_value, new_value, name, *args, **opts): + #return raise Exception('pass in reset simd') def make_clean(self,old_value, new_value, name, dir): diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index e6f353626a..b6f681cfd6 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053670406341552734  +DEBUG: model prefixing takes 0.005342960357666016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,14 +191,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.147 s +Wrote files for 36 helas calls in 0.145 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.336 s +ALOHA: aloha creates 5 routines in 0.319 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -233,16 +233,16 @@ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  DEBUG: self.in_madevent_mode =  True [output.py at line 207]  DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). +Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). @@ -256,9 +256,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.253s -user 0m1.939s -sys 0m0.240s +real 0m2.134s +user 0m1.911s +sys 0m0.216s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdfwrap_lhapdf.f b/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdfwrap_lhapdf.f index 0be926e6cd..3f36905346 100644 --- a/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdfwrap_lhapdf.f +++ b/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdfwrap_lhapdf.f @@ -5,6 +5,7 @@ SUBROUTINE PDFWRAP C INCLUDE 'pdf.inc' INCLUDE '../alfas.inc' + INCLUDE '../vector.inc' INCLUDE '../coupl.inc' REAL*8 ZMASS DATA ZMASS/91.188D0/ diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index b8615bc68f..110e204c24 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -263,6 +263,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION G2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py index 824815f47b..ef1bf58979 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py @@ -2687,16 +2687,23 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): - misc.sprint('try to use plugin class') - pydir = path.replace('run_card.dat', '../bin/internal/') - with misc.TMP_variable(sys, 'path', sys.path + [pydir]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - target_class = launch_plugin.RunCard + elif not MADEVENT: + if 'run_card.dat' in path: + launch_plugin_path = path.replace('run_card.dat', '../bin/internal/launch_plugin.py') + elif 'run_card_default.dat' in path: + launch_plugin_path = path.replace('run_card_default.dat', '../bin/internal/launch_plugin.py') + else: + launch_plugin_path = None + if launch_plugin_path and os.path.exists(launch_plugin_path): + misc.sprint('try to use plugin class', path.replace('run_card.dat', '../bin/internal/launch_plugin.py')) + pydir = os.path.dirname(launch_plugin_path) + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard else: return None @@ -2929,6 +2936,7 @@ def valid_line(self, line, tmp): def reset_simd(self, old_value, new_value, name, *args, **opts): + #return raise Exception('pass in reset simd') def make_clean(self,old_value, new_value, name, dir): diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 72297bf232..a58f4e64b0 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005309343338012695  +DEBUG: model prefixing takes 0.005480527877807617  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.036 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.322 s +ALOHA: aloha creates 5 routines in 0.327 s VVV1 VVV1 FFV1 @@ -205,6 +205,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.788s -user 0m0.726s -sys 0m0.046s +real 0m0.808s +user 0m0.714s +sys 0m0.055s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index b334f66c3b..0d7bd5faa1 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00529932975769043  +DEBUG: model prefixing takes 0.0053293704986572266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,8 +190,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s -Wrote files for 222 helas calls in 0.679 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.417 s +Wrote files for 222 helas calls in 0.677 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -236,16 +236,16 @@ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  DEBUG: self.in_madevent_mode =  True [output.py at line 207]  DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). +Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 191 (offset 48 lines). @@ -259,9 +259,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.229s -user 0m3.016s -sys 0m0.210s +real 0m3.225s +user 0m2.982s +sys 0m0.220s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdfwrap_lhapdf.f b/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdfwrap_lhapdf.f index 0be926e6cd..3f36905346 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdfwrap_lhapdf.f +++ b/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdfwrap_lhapdf.f @@ -5,6 +5,7 @@ SUBROUTINE PDFWRAP C INCLUDE 'pdf.inc' INCLUDE '../alfas.inc' + INCLUDE '../vector.inc' INCLUDE '../coupl.inc' REAL*8 ZMASS DATA ZMASS/91.188D0/ diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f index 0fa6436690..043887bde3 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f @@ -263,6 +263,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION G2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py index 824815f47b..ef1bf58979 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py @@ -2687,16 +2687,23 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): - misc.sprint('try to use plugin class') - pydir = path.replace('run_card.dat', '../bin/internal/') - with misc.TMP_variable(sys, 'path', sys.path + [pydir]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - target_class = launch_plugin.RunCard + elif not MADEVENT: + if 'run_card.dat' in path: + launch_plugin_path = path.replace('run_card.dat', '../bin/internal/launch_plugin.py') + elif 'run_card_default.dat' in path: + launch_plugin_path = path.replace('run_card_default.dat', '../bin/internal/launch_plugin.py') + else: + launch_plugin_path = None + if launch_plugin_path and os.path.exists(launch_plugin_path): + misc.sprint('try to use plugin class', path.replace('run_card.dat', '../bin/internal/launch_plugin.py')) + pydir = os.path.dirname(launch_plugin_path) + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard else: return None @@ -2929,6 +2936,7 @@ def valid_line(self, line, tmp): def reset_simd(self, old_value, new_value, name, *args, **opts): + #return raise Exception('pass in reset simd') def make_clean(self,old_value, new_value, name, dir): diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 06e55809ba..66076ad947 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00528264045715332  +DEBUG: model prefixing takes 0.005255222320556641  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.156 s +1 processes with 123 diagrams generated in 0.158 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.423 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.328 s +ALOHA: aloha creates 5 routines in 0.312 s VVV1 VVV1 FFV1 @@ -208,6 +208,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m1.515s -user 0m1.393s -sys 0m0.045s +real 0m1.416s +user 0m1.345s +sys 0m0.059s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index f1893e745c..31d7395875 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005330801010131836  +DEBUG: model prefixing takes 0.0056993961334228516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.864 s +1 processes with 1240 diagrams generated in 1.844 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 7s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.598 s -Wrote files for 2281 helas calls in 18.100 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.525 s +Wrote files for 2281 helas calls in 18.330 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.313 s +ALOHA: aloha creates 5 routines in 0.318 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.312 s VVV1 VVV1 FFV1 @@ -238,16 +238,16 @@ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  DEBUG: self.in_madevent_mode =  True [output.py at line 207]  DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). +Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 255 (offset 112 lines). @@ -261,9 +261,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m28.759s -user 0m28.272s -sys 0m0.392s +real 0m28.929s +user 0m28.460s +sys 0m0.360s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdfwrap_lhapdf.f b/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdfwrap_lhapdf.f index 0be926e6cd..3f36905346 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdfwrap_lhapdf.f +++ b/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdfwrap_lhapdf.f @@ -5,6 +5,7 @@ SUBROUTINE PDFWRAP C INCLUDE 'pdf.inc' INCLUDE '../alfas.inc' + INCLUDE '../vector.inc' INCLUDE '../coupl.inc' REAL*8 ZMASS DATA ZMASS/91.188D0/ diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f index 6828f1c252..51b8d47520 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f @@ -263,6 +263,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION G2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py index 824815f47b..ef1bf58979 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py @@ -2687,16 +2687,23 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): - misc.sprint('try to use plugin class') - pydir = path.replace('run_card.dat', '../bin/internal/') - with misc.TMP_variable(sys, 'path', sys.path + [pydir]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - target_class = launch_plugin.RunCard + elif not MADEVENT: + if 'run_card.dat' in path: + launch_plugin_path = path.replace('run_card.dat', '../bin/internal/launch_plugin.py') + elif 'run_card_default.dat' in path: + launch_plugin_path = path.replace('run_card_default.dat', '../bin/internal/launch_plugin.py') + else: + launch_plugin_path = None + if launch_plugin_path and os.path.exists(launch_plugin_path): + misc.sprint('try to use plugin class', path.replace('run_card.dat', '../bin/internal/launch_plugin.py')) + pydir = os.path.dirname(launch_plugin_path) + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard else: return None @@ -2929,6 +2936,7 @@ def valid_line(self, line, tmp): def reset_simd(self, old_value, new_value, name, *args, **opts): + #return raise Exception('pass in reset simd') def make_clean(self,old_value, new_value, name, dir): diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index df96dd2b8c..4e6ebcfe4b 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005476713180541992  +DEBUG: model prefixing takes 0.005368709564208984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.864 s +1 processes with 1240 diagrams generated in 1.851 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.446 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.456 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.342 s +ALOHA: aloha creates 5 routines in 0.339 s VVV1 VVV1 FFV1 @@ -208,6 +208,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m12.796s -user 0m12.647s -sys 0m0.093s +real 0m12.963s +user 0m12.634s +sys 0m0.082s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index c8e3576630..09ee0f32ca 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055692195892333984  +DEBUG: model prefixing takes 0.0053806304931640625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.078 s +8 processes with 40 diagrams generated in 0.076 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -197,7 +197,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -214,7 +214,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -229,17 +229,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.215 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Wrote files for 32 helas calls in 0.214 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.141 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.135 s +ALOHA: aloha creates 4 routines in 0.129 s FFV1 FFV1 FFV1 @@ -262,16 +262,16 @@ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  DEBUG: self.in_madevent_mode =  True [output.py at line 207]  DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -279,9 +279,9 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 247 (offset 26 lines). Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -296,9 +296,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.935s -user 0m1.699s -sys 0m0.219s +real 0m1.978s +user 0m1.677s +sys 0m0.213s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdfwrap_lhapdf.f b/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdfwrap_lhapdf.f index 0be926e6cd..3f36905346 100644 --- a/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdfwrap_lhapdf.f +++ b/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdfwrap_lhapdf.f @@ -5,6 +5,7 @@ SUBROUTINE PDFWRAP C INCLUDE 'pdf.inc' INCLUDE '../alfas.inc' + INCLUDE '../vector.inc' INCLUDE '../coupl.inc' REAL*8 ZMASS DATA ZMASS/91.188D0/ diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 81ab70f6d1..ba39cab867 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -281,6 +281,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION D2(VECSIZE_MEMMAX),U2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index b58c5d70bd..5ec9701b78 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -281,6 +281,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION CX2(VECSIZE_MEMMAX),SX2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py index 824815f47b..ef1bf58979 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py @@ -2687,16 +2687,23 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): - misc.sprint('try to use plugin class') - pydir = path.replace('run_card.dat', '../bin/internal/') - with misc.TMP_variable(sys, 'path', sys.path + [pydir]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - target_class = launch_plugin.RunCard + elif not MADEVENT: + if 'run_card.dat' in path: + launch_plugin_path = path.replace('run_card.dat', '../bin/internal/launch_plugin.py') + elif 'run_card_default.dat' in path: + launch_plugin_path = path.replace('run_card_default.dat', '../bin/internal/launch_plugin.py') + else: + launch_plugin_path = None + if launch_plugin_path and os.path.exists(launch_plugin_path): + misc.sprint('try to use plugin class', path.replace('run_card.dat', '../bin/internal/launch_plugin.py')) + pydir = os.path.dirname(launch_plugin_path) + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard else: return None @@ -2929,6 +2936,7 @@ def valid_line(self, line, tmp): def reset_simd(self, old_value, new_value, name, *args, **opts): + #return raise Exception('pass in reset simd') def make_clean(self,old_value, new_value, name, dir): diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 9cc1ee5d97..30aaffd6fa 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005384206771850586  +DEBUG: model prefixing takes 0.005368471145629883  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -205,12 +205,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.029 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.140 s +ALOHA: aloha creates 2 routines in 0.141 s FFV1 FFV1 FFV1 @@ -228,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.649s -user 0m0.590s -sys 0m0.052s +real 0m0.651s +user 0m0.582s +sys 0m0.051s diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 2e1f4941cf..e44a158345 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -151,7 +151,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.061 s +ALOHA: aloha creates 1 routines in 0.060 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -165,6 +165,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.552s -user 0m0.380s -sys 0m0.051s +real 0m0.420s +user 0m0.362s +sys 0m0.044s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 698e17243e..6910847468 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0054972171783447266  +DEBUG: model prefixing takes 0.005797863006591797  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.031 s +5 processes with 7 diagrams generated in 0.029 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.147 s +13 processes with 76 diagrams generated in 0.134 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.810 s +65 processes with 1119 diagrams generated in 1.814 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,15 +801,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.266 s -Wrote files for 810 helas calls in 3.189 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.282 s +Wrote files for 810 helas calls in 3.212 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.600 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.332 s VVV1 VVV1 FFV1 @@ -847,21 +847,21 @@ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  DEBUG: self.in_madevent_mode =  True [output.py at line 207]  DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). +Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 539 (offset 70 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -869,18 +869,18 @@ Hunk #2 succeeded at 146 (offset 3 lines). Hunk #3 succeeded at 224 (offset 3 lines). Hunk #4 succeeded at 252 (offset 3 lines). Hunk #5 succeeded at 297 (offset 3 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). +Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -888,9 +888,9 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -898,9 +898,9 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 539 (offset 70 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -908,18 +908,18 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 483 (offset 14 lines). +Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 191 (offset 48 lines). Hunk #3 succeeded at 269 (offset 48 lines). Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 516 (offset 47 lines). +Hunk #1 succeeded at 517 (offset 48 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -927,9 +927,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -937,9 +937,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 527 (offset 58 lines). +Hunk #1 succeeded at 528 (offset 59 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -947,9 +947,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 554 (offset 85 lines). +Hunk #1 succeeded at 555 (offset 86 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 77 (offset 5 lines). @@ -957,9 +957,9 @@ Hunk #2 succeeded at 196 (offset 53 lines). Hunk #3 succeeded at 274 (offset 53 lines). Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 626 (offset 157 lines). +Hunk #1 succeeded at 627 (offset 158 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 83 (offset 11 lines). @@ -967,9 +967,9 @@ Hunk #2 succeeded at 202 (offset 59 lines). Hunk #3 succeeded at 280 (offset 59 lines). Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 539 (offset 70 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -977,9 +977,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 626 (offset 157 lines). +Hunk #1 succeeded at 627 (offset 158 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 83 (offset 11 lines). @@ -987,9 +987,9 @@ Hunk #2 succeeded at 202 (offset 59 lines). Hunk #3 succeeded at 280 (offset 59 lines). Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 539 (offset 70 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -997,9 +997,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 539 (offset 70 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1007,9 +1007,9 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 554 (offset 85 lines). +Hunk #1 succeeded at 555 (offset 86 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 77 (offset 5 lines). @@ -1017,9 +1017,9 @@ Hunk #2 succeeded at 196 (offset 53 lines). Hunk #3 succeeded at 274 (offset 53 lines). Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/PLUGIN/CUDACPP_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 538 (offset 69 lines). +Hunk #1 succeeded at 539 (offset 70 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1034,9 +1034,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m8.738s -user 0m8.225s -sys 0m0.476s +real 0m9.319s +user 0m8.334s +sys 0m0.458s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdfwrap_lhapdf.f b/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdfwrap_lhapdf.f index 0be926e6cd..3f36905346 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdfwrap_lhapdf.f +++ b/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdfwrap_lhapdf.f @@ -5,6 +5,7 @@ SUBROUTINE PDFWRAP C INCLUDE 'pdf.inc' INCLUDE '../alfas.inc' + INCLUDE '../vector.inc' INCLUDE '../coupl.inc' REAL*8 ZMASS DATA ZMASS/91.188D0/ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f index ee723193db..a48f6997f3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f @@ -263,6 +263,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION G2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f index f205954b28..f9147f699e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f @@ -284,6 +284,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION D1(VECSIZE_MEMMAX),U1(VECSIZE_MEMMAX) $ ,S1(VECSIZE_MEMMAX),C1(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index b8615bc68f..110e204c24 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -263,6 +263,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION G2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 81ab70f6d1..ba39cab867 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -281,6 +281,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION D2(VECSIZE_MEMMAX),U2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index b58c5d70bd..5ec9701b78 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -281,6 +281,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION CX2(VECSIZE_MEMMAX),SX2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f index d85b1143a0..842b1c72d4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f @@ -284,6 +284,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION D1(VECSIZE_MEMMAX),U1(VECSIZE_MEMMAX) $ ,S1(VECSIZE_MEMMAX),C1(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f index 4d2e1b4f8c..4e2bfe85ab 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f @@ -263,6 +263,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION G2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f index 67adf83921..e5a0390c47 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f @@ -278,6 +278,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION G2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f index 83a2a24681..309be94a99 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f @@ -281,6 +281,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION D2(VECSIZE_MEMMAX),U2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f index 8cb3f9af60..23d82657bf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f @@ -281,6 +281,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION G1(VECSIZE_MEMMAX) DOUBLE PRECISION CX2(VECSIZE_MEMMAX),SX2(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f index 3488dfd2e6..4d12dfeade 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f @@ -292,6 +292,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION D1(VECSIZE_MEMMAX),U1(VECSIZE_MEMMAX) $ ,C1(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f index 0b6e873ee4..5bac32b00a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f @@ -324,6 +324,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION D1(VECSIZE_MEMMAX),U1(VECSIZE_MEMMAX) $ ,S1(VECSIZE_MEMMAX),C1(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f index 5ed7bc881f..50c16edaac 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f @@ -284,6 +284,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION D1(VECSIZE_MEMMAX),U1(VECSIZE_MEMMAX) $ ,S1(VECSIZE_MEMMAX),C1(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f index a32595dce6..577a8d9c54 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f @@ -324,6 +324,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION D1(VECSIZE_MEMMAX),U1(VECSIZE_MEMMAX) $ ,S1(VECSIZE_MEMMAX),C1(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f index baaee299a2..f4e431c5ce 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f @@ -284,6 +284,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION D1(VECSIZE_MEMMAX),U1(VECSIZE_MEMMAX) $ ,S1(VECSIZE_MEMMAX),C1(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f index c2206e8d5e..123a3ae00e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f @@ -284,6 +284,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION D1(VECSIZE_MEMMAX),U1(VECSIZE_MEMMAX) $ ,S1(VECSIZE_MEMMAX),C1(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f index e92ee65fd7..a4cb748b19 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f @@ -292,6 +292,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION CX1(VECSIZE_MEMMAX),UX1(VECSIZE_MEMMAX) $ ,DX1(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f index cad7f4197d..3a3ed05151 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f @@ -284,6 +284,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, C C LOCAL VARIABLES C + DOUBLE PRECISION QSCALE INTEGER I,ITYPE,LP,IPROC DOUBLE PRECISION CX1(VECSIZE_MEMMAX),SX1(VECSIZE_MEMMAX) $ ,UX1(VECSIZE_MEMMAX),DX1(VECSIZE_MEMMAX) diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py index 824815f47b..ef1bf58979 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py @@ -2687,16 +2687,23 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - elif not MADEVENT and os.path.exists(path.replace('run_card.dat', '../bin/internal/launch_plugin.py')): - misc.sprint('try to use plugin class') - pydir = path.replace('run_card.dat', '../bin/internal/') - with misc.TMP_variable(sys, 'path', sys.path + [pydir]): - from importlib import reload - try: - reload('launch_plugin') - except Exception as error: - import launch_plugin - target_class = launch_plugin.RunCard + elif not MADEVENT: + if 'run_card.dat' in path: + launch_plugin_path = path.replace('run_card.dat', '../bin/internal/launch_plugin.py') + elif 'run_card_default.dat' in path: + launch_plugin_path = path.replace('run_card_default.dat', '../bin/internal/launch_plugin.py') + else: + launch_plugin_path = None + if launch_plugin_path and os.path.exists(launch_plugin_path): + misc.sprint('try to use plugin class', path.replace('run_card.dat', '../bin/internal/launch_plugin.py')) + pydir = os.path.dirname(launch_plugin_path) + with misc.TMP_variable(sys, 'path', sys.path + [pydir]): + from importlib import reload + try: + reload('launch_plugin') + except Exception as error: + import launch_plugin + target_class = launch_plugin.RunCard else: return None @@ -2929,6 +2936,7 @@ def valid_line(self, line, tmp): def reset_simd(self, old_value, new_value, name, *args, **opts): + #return raise Exception('pass in reset simd') def make_clean(self,old_value, new_value, name, dir): From b3badbf58ef2dd28647aca3c259b66f3f4574f3c Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 27 Oct 2023 22:36:24 +0200 Subject: [PATCH 055/119] [oct23av] regenerate all 8 mad and 7 sa processes after completing the merging of the latest upstream/master I quickly checked that ggtt tput/tmad tests succeed (logs not kept). I will now rerun full scale tests. --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 16 +++--- .../ee_mumu.mad/Source/DHELAS/aloha_file.inc | 2 +- .../ee_mumu.mad/bin/internal/lhe_parser.py | 2 + .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 ++-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 12 ++-- .../gg_tt.mad/Source/DHELAS/aloha_file.inc | 2 +- .../gg_tt.mad/bin/internal/lhe_parser.py | 2 + .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 ++-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 24 ++++---- .../gg_tt01g.mad/Source/DHELAS/aloha_file.inc | 2 +- .../gg_tt01g.mad/bin/internal/lhe_parser.py | 2 + .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 16 +++--- .../gg_ttg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../gg_ttg.mad/bin/internal/lhe_parser.py | 2 + .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 12 ++-- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 18 +++--- .../gg_ttgg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../gg_ttgg.mad/bin/internal/lhe_parser.py | 2 + .../CODEGEN_cudacpp_gg_ttgg_log.txt | 12 ++-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 22 ++++---- .../gg_ttggg.mad/Source/DHELAS/aloha_file.inc | 2 +- .../gg_ttggg.mad/bin/internal/lhe_parser.py | 2 + .../CODEGEN_cudacpp_gg_ttggg_log.txt | 12 ++-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 +++---- .../gq_ttq.mad/Source/DHELAS/aloha_file.inc | 2 +- .../gq_ttq.mad/bin/internal/lhe_parser.py | 2 + .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 8 +-- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 8 +-- .../CODEGEN_mad_pp_tt012j_log.txt | 56 +++++++++---------- .../Source/DHELAS/aloha_file.inc | 2 +- .../pp_tt012j.mad/bin/internal/lhe_parser.py | 2 + 31 files changed, 152 insertions(+), 136 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index c2f712a9a0..f9f4abe253 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005261898040771484  +DEBUG: model prefixing takes 0.005633115768432617  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,19 +191,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.097 s +Wrote files for 8 helas calls in 0.096 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.196 s +ALOHA: aloha creates 3 routines in 0.199 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.251 s +ALOHA: aloha creates 7 routines in 0.252 s FFV1 FFV1 FFV2 @@ -251,9 +251,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.848s -user 0m1.628s -sys 0m0.206s +real 0m1.968s +user 0m1.733s +sys 0m0.228s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc index 738db319fd..13aaa31c6d 100644 --- a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_0.o FFV4_3.o FFV1P0_3.o FFV2_0.o FFV4_0.o FFV2_3.o +ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV2_0.o FFV2_3.o FFV4_0.o FFV4_3.o diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/lhe_parser.py index 4c1d5f0d23..a6b8582e1a 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/lhe_parser.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/lhe_parser.py @@ -1075,6 +1075,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None): # check special case without PDF for one (or both) beam if init_information["idbmup1"] in [0,9]: event = next(self) + if len(event) == 0: + event = Event(str(event)) init_information["idbmup1"]= event[0].pdg if init_information["idbmup2"] == 0: init_information["idbmup2"]= event[1].pdg diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index f52c8e4178..19633ae89d 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005647897720336914  +DEBUG: model prefixing takes 0.005425214767456055  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines @@ -202,6 +202,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.727s -user 0m0.588s -sys 0m0.045s +real 0m0.691s +user 0m0.582s +sys 0m0.053s diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 15b42e74b3..f25d273fa3 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005278825759887695  +DEBUG: model prefixing takes 0.00533747673034668  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -195,7 +195,7 @@ Wrote files for 10 helas calls in 0.100 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.142 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -241,9 +241,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.685s -user 0m1.469s -sys 0m0.195s +real 0m1.746s +user 0m1.450s +sys 0m0.214s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc index 5597c614b0..3a21194b00 100644 --- a/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_tt.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o FFV1_0.o FFV1_2.o VVV1P0_1.o +ALOHARoutine = FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gg_tt.mad/bin/internal/lhe_parser.py index 4c1d5f0d23..a6b8582e1a 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/lhe_parser.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/lhe_parser.py @@ -1075,6 +1075,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None): # check special case without PDF for one (or both) beam if init_information["idbmup1"] in [0,9]: event = next(self) + if len(event) == 0: + event = Event(str(event)) init_information["idbmup1"]= event[0].pdg if init_information["idbmup2"] == 0: init_information["idbmup2"]= event[1].pdg diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 833219aea2..6106a063b1 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005304813385009766  +DEBUG: model prefixing takes 0.0053386688232421875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.145 s VVV1 FFV1 FFV1 @@ -197,6 +197,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.594s -user 0m0.459s -sys 0m0.061s +real 0m0.562s +user 0m0.474s +sys 0m0.049s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 53b724aa19..5cfefc86ec 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005379676818847656  +DEBUG: model prefixing takes 0.005840778350830078  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,14 +155,14 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.019 s +1 processes with 16 diagrams generated in 0.021 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -184,7 +184,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -201,7 +201,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -216,15 +216,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s -Wrote files for 46 helas calls in 0.240 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s +Wrote files for 46 helas calls in 0.239 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.321 s +ALOHA: aloha creates 5 routines in 0.322 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -232,7 +232,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.305 s VVV1 VVV1 FFV1 @@ -287,9 +287,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.267s -user 0m2.029s -sys 0m0.232s +real 0m2.275s +user 0m2.024s +sys 0m0.250s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc index 50c12b0804..7639734c1c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o +ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o VVVV1P0_1.o VVVV3P0_1.o VVVV4P0_1.o diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/lhe_parser.py index 4c1d5f0d23..a6b8582e1a 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/lhe_parser.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/lhe_parser.py @@ -1075,6 +1075,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None): # check special case without PDF for one (or both) beam if init_information["idbmup1"] in [0,9]: event = next(self) + if len(event) == 0: + event = Event(str(event)) init_information["idbmup1"]= event[0].pdg if init_information["idbmup2"] == 0: init_information["idbmup2"]= event[1].pdg diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index b6f681cfd6..fe344f8cc1 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005342960357666016  +DEBUG: model prefixing takes 0.0052983760833740234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,14 +191,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.145 s +Wrote files for 36 helas calls in 0.147 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.319 s +ALOHA: aloha creates 5 routines in 0.320 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.305 s VVV1 VVV1 FFV1 @@ -256,9 +256,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.134s -user 0m1.911s -sys 0m0.216s +real 0m2.259s +user 0m1.920s +sys 0m0.209s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc index 50c12b0804..7639734c1c 100644 --- a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o +ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o VVVV1P0_1.o VVVV3P0_1.o VVVV4P0_1.o diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/lhe_parser.py index 4c1d5f0d23..a6b8582e1a 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/lhe_parser.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/lhe_parser.py @@ -1075,6 +1075,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None): # check special case without PDF for one (or both) beam if init_information["idbmup1"] in [0,9]: event = next(self) + if len(event) == 0: + event = Event(str(event)) init_information["idbmup1"]= event[0].pdg if init_information["idbmup2"] == 0: init_information["idbmup2"]= event[1].pdg diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index a58f4e64b0..78da7e26c9 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005480527877807617  +DEBUG: model prefixing takes 0.00540924072265625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.319 s VVV1 VVV1 FFV1 @@ -205,6 +205,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.808s -user 0m0.714s -sys 0m0.055s +real 0m0.770s +user 0m0.712s +sys 0m0.046s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 0d7bd5faa1..5b686d942c 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053293704986572266  +DEBUG: model prefixing takes 0.005361080169677734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.156 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,14 +191,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg Generated helas calls for 1 subprocesses (123 diagrams) in 0.417 s -Wrote files for 222 helas calls in 0.677 s +Wrote files for 222 helas calls in 0.676 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.325 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -259,9 +259,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.225s -user 0m2.982s -sys 0m0.220s +real 0m3.441s +user 0m2.976s +sys 0m0.225s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc index ec923afd6d..fa0f3d86f5 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o +ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o VVVV1P0_1.o VVVV1_0.o VVVV3P0_1.o VVVV3_0.o VVVV4P0_1.o VVVV4_0.o diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/lhe_parser.py index 4c1d5f0d23..a6b8582e1a 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/lhe_parser.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/lhe_parser.py @@ -1075,6 +1075,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None): # check special case without PDF for one (or both) beam if init_information["idbmup1"] in [0,9]: event = next(self) + if len(event) == 0: + event = Event(str(event)) init_information["idbmup1"]= event[0].pdg if init_information["idbmup2"] == 0: init_information["idbmup2"]= event[1].pdg diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 66076ad947..d9d30196d4 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005255222320556641  +DEBUG: model prefixing takes 0.005450725555419922  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.158 s +1 processes with 123 diagrams generated in 0.156 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.418 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.312 s +ALOHA: aloha creates 5 routines in 0.313 s VVV1 VVV1 FFV1 @@ -209,5 +209,5 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg quit real 0m1.416s -user 0m1.345s -sys 0m0.059s +user 0m1.336s +sys 0m0.068s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 31d7395875..c0051a2221 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056993961334228516  +DEBUG: model prefixing takes 0.0053594112396240234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.844 s +1 processes with 1240 diagrams generated in 1.855 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,9 +175,9 @@ INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] -INFO: Color-Flow passed to 1630 term in 7s. Introduce 3030 contraction +INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.525 s -Wrote files for 2281 helas calls in 18.330 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.449 s +Wrote files for 2281 helas calls in 18.329 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.318 s +ALOHA: aloha creates 5 routines in 0.329 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.312 s +ALOHA: aloha creates 10 routines in 0.323 s VVV1 VVV1 FFV1 @@ -261,9 +261,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m28.929s -user 0m28.460s -sys 0m0.360s +real 0m28.868s +user 0m28.342s +sys 0m0.399s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc index ec923afd6d..fa0f3d86f5 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o +ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o VVVV1P0_1.o VVVV1_0.o VVVV3P0_1.o VVVV3_0.o VVVV4P0_1.o VVVV4_0.o diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/lhe_parser.py index 4c1d5f0d23..a6b8582e1a 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/lhe_parser.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/lhe_parser.py @@ -1075,6 +1075,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None): # check special case without PDF for one (or both) beam if init_information["idbmup1"] in [0,9]: event = next(self) + if len(event) == 0: + event = Event(str(event)) init_information["idbmup1"]= event[0].pdg if init_information["idbmup2"] == 0: init_information["idbmup2"]= event[1].pdg diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 4e6ebcfe4b..dc512f186d 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005368709564208984  +DEBUG: model prefixing takes 0.0054700374603271484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.851 s +1 processes with 1240 diagrams generated in 1.839 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.456 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.466 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.339 s +ALOHA: aloha creates 5 routines in 0.346 s VVV1 VVV1 FFV1 @@ -208,6 +208,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m12.963s +real 0m12.842s user 0m12.634s -sys 0m0.082s +sys 0m0.111s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 09ee0f32ca..aeeaa24a46 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053806304931640625  +DEBUG: model prefixing takes 0.005341529846191406  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -197,7 +197,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -214,7 +214,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -229,17 +229,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s -Wrote files for 32 helas calls in 0.214 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Wrote files for 32 helas calls in 0.215 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.144 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.129 s +ALOHA: aloha creates 4 routines in 0.130 s FFV1 FFV1 FFV1 @@ -296,9 +296,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.978s -user 0m1.677s -sys 0m0.213s +real 0m1.938s +user 0m1.675s +sys 0m0.219s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc index 4457933199..dccc9da9d2 100644 --- a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o FFV1_0.o VVV1_0.o FFV1_2.o FFV1P0_3.o +ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1_0.o diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/lhe_parser.py index 4c1d5f0d23..a6b8582e1a 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/lhe_parser.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/lhe_parser.py @@ -1075,6 +1075,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None): # check special case without PDF for one (or both) beam if init_information["idbmup1"] in [0,9]: event = next(self) + if len(event) == 0: + event = Event(str(event)) init_information["idbmup1"]= event[0].pdg if init_information["idbmup2"] == 0: init_information["idbmup2"]= event[1].pdg diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 30aaffd6fa..5dd652a8ff 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005368471145629883  +DEBUG: model prefixing takes 0.005471229553222656  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -228,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.651s -user 0m0.582s -sys 0m0.051s +real 0m0.636s +user 0m0.581s +sys 0m0.049s diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index e44a158345..caae38eb15 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -151,7 +151,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.060 s +ALOHA: aloha creates 1 routines in 0.062 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -165,6 +165,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.420s -user 0m0.362s -sys 0m0.044s +real 0m0.426s +user 0m0.355s +sys 0m0.059s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 6910847468..e3b0d16901 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005797863006591797  +DEBUG: model prefixing takes 0.005556583404541016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.029 s +5 processes with 7 diagrams generated in 0.028 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.814 s +65 processes with 1119 diagrams generated in 1.812 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,15 +801,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.282 s -Wrote files for 810 helas calls in 3.212 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.260 s +Wrote files for 810 helas calls in 3.181 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.600 s +ALOHA: aloha creates 5 routines in 0.335 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.332 s +ALOHA: aloha creates 10 routines in 0.309 s VVV1 VVV1 FFV1 @@ -1034,9 +1034,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.319s -user 0m8.334s -sys 0m0.458s +real 0m8.723s +user 0m8.211s +sys 0m0.450s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc index ec923afd6d..fa0f3d86f5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc +++ b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_file.inc @@ -1 +1 @@ -ALOHARoutine = FFV1_1.o VVVV4_0.o VVVV4P0_1.o FFV1_0.o VVV1_0.o FFV1_2.o VVVV3_0.o VVVV1_0.o VVVV3P0_1.o VVVV1P0_1.o VVV1P0_1.o FFV1P0_3.o +ALOHARoutine = FFV1P0_3.o FFV1_0.o FFV1_1.o FFV1_2.o VVV1P0_1.o VVV1_0.o VVVV1P0_1.o VVVV1_0.o VVVV3P0_1.o VVVV3_0.o VVVV4P0_1.o VVVV4_0.o diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/lhe_parser.py index 4c1d5f0d23..a6b8582e1a 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/lhe_parser.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/lhe_parser.py @@ -1075,6 +1075,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None): # check special case without PDF for one (or both) beam if init_information["idbmup1"] in [0,9]: event = next(self) + if len(event) == 0: + event = Event(str(event)) init_information["idbmup1"]= event[0].pdg if init_information["idbmup2"] == 0: init_information["idbmup2"]= event[1].pdg From fad8a920dec7d9bcce0073d2cc64ad860d038c82 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sat, 28 Oct 2023 08:55:52 +0200 Subject: [PATCH 056/119] [oct23av] in eemumu mgOnGpuCxtypes.h, add missing function 'cxsmpl operator*( const cxsmpl& a, const double& b )' to fix eemumu float builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ccache /usr/local/cuda-12.0/bin/nvcc -o build.none_f_inl0_hrd0/gcheck.exe build.none_f_inl0_hrd0/gcheck_sa.o -gencode arch=compute_70,code=compute_70 -gencode arch=compute_70,code=sm_70 -L../../lib/build.none_f_inl0_hrd0 -lmg5amc_common -Xlinker -rpath,'$ORIGIN/../../../lib/build.none_f_inl0_hrd0' -L../../lib/build.none_f_inl0_hrd0 -lmg5amc_epem_mupmum_cuda build.none_f_inl0_hrd0/gCommonRandomNumberKernel.o build.none_f_inl0_hrd0/gRamboSamplingKernels.o build.none_f_inl0_hrd0/gCurandRandomNumberKernel.o -L/usr/local/cuda-12.0/lib64/ -lcurand ccache g++ -O3 -std=c++17 -I. -I../../src -Wall -Wshadow -Wextra -ffast-math -fopenmp -march=x86-64 -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT -fPIC -c CPPProcess.cc -o build.none_f_inl0_hrd0/CPPProcess.o In file included from CPPProcess.cc:20: ../../src/HelAmps_sm.h: In function ‘void mg5amcCpu::FFV2_4_0(const fptype*, const fptype*, const fptype*, const fptype*, double, const fptype*, double, mgOnGpu::fptype*)’: ../../src/HelAmps_sm.h:1165:101: error: no match for ‘operator*’ (operand types are ‘const cxtype_sv’ {aka ‘const mgOnGpu::cxsmpl’} and ‘const double’) 1165 | ( *vertex ) = ( -one ) * ( Ccoeff2 * COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1 * COUP1 ) ); | ~~~~ ^ ~~~~~~~ | | | | | const double | const cxtype_sv {aka const mgOnGpu::cxsmpl} --- epochX/cudacpp/ee_mumu.mad/src/mgOnGpuCxtypes.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) From bfa2f9f20da11b117e969be46470a35166437678 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sat, 28 Oct 2023 08:59:26 +0200 Subject: [PATCH 057/119] [oct23av] in CODEGEN mgOnGpuCxtypes.h, add missing function 'cxsmpl operator*( const cxsmpl& a, const double& b )' to fix eemumu float builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ccache /usr/local/cuda-12.0/bin/nvcc -o build.none_f_inl0_hrd0/gcheck.exe build.none_f_inl0_hrd0/gcheck_sa.o -gencode arch=compute_70,code=compute_70 -gencode arch=compute_70,code=sm_70 -L../../lib/build.none_f_inl0_hrd0 -lmg5amc_common -Xlinker -rpath,'$ORIGIN/../../../lib/build.none_f_inl0_hrd0' -L../../lib/build.none_f_inl0_hrd0 -lmg5amc_epem_mupmum_cuda build.none_f_inl0_hrd0/gCommonRandomNumberKernel.o build.none_f_inl0_hrd0/gRamboSamplingKernels.o build.none_f_inl0_hrd0/gCurandRandomNumberKernel.o -L/usr/local/cuda-12.0/lib64/ -lcurand ccache g++ -O3 -std=c++17 -I. -I../../src -Wall -Wshadow -Wextra -ffast-math -fopenmp -march=x86-64 -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT -fPIC -c CPPProcess.cc -o build.none_f_inl0_hrd0/CPPProcess.o In file included from CPPProcess.cc:20: ../../src/HelAmps_sm.h: In function ‘void mg5amcCpu::FFV2_4_0(const fptype*, const fptype*, const fptype*, const fptype*, double, const fptype*, double, mgOnGpu::fptype*)’: ../../src/HelAmps_sm.h:1165:101: error: no match for ‘operator*’ (operand types are ‘const cxtype_sv’ {aka ‘const mgOnGpu::cxsmpl’} and ‘const double’) 1165 | ( *vertex ) = ( -one ) * ( Ccoeff2 * COUP2 * ( +cI * TMP1 + ( two * cI ) * TMP3 ) + cI * ( TMP1 * Ccoeff1 * COUP1 ) ); | ~~~~ ^ ~~~~~~~ | | | | | const double | const cxtype_sv {aka const mgOnGpu::cxsmpl} --- .../madgraph/iolibs/template_files/gpu/mgOnGpuCxtypes.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuCxtypes.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) From a71881e95a282f0c98e41239830f30243b8d5dd7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sat, 28 Oct 2023 09:00:35 +0200 Subject: [PATCH 058/119] [oct23av] regenerate eemumu.mad after copying mgOnGpuCxtypes.h to CODEGEN, all ok - then copy the updated file to all other 7+7 processes --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 16 ++++++++-------- epochX/cudacpp/ee_mumu.sa/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/gg_tt.mad/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/gg_tt.sa/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/gg_ttg.mad/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/gg_ttg.sa/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/gq_ttq.mad/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/gq_ttq.sa/src/mgOnGpuCxtypes.h | 6 ++++++ epochX/cudacpp/heft_gg_h.sa/src/mgOnGpuCxtypes.h | 6 ++++++ .../cudacpp/pp_tt012j.mad/src/mgOnGpuCxtypes.h | 6 ++++++ 15 files changed, 92 insertions(+), 8 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index f9f4abe253..e304c1595b 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005633115768432617  +DEBUG: model prefixing takes 0.005795001983642578  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,19 +191,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.096 s +Wrote files for 8 helas calls in 0.100 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.199 s +ALOHA: aloha creates 3 routines in 0.198 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.252 s +ALOHA: aloha creates 7 routines in 0.261 s FFV1 FFV1 FFV2 @@ -251,9 +251,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.968s -user 0m1.733s -sys 0m0.228s +real 0m2.657s +user 0m1.670s +sys 0m0.206s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/heft_gg_h.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/heft_gg_h.sa/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/heft_gg_h.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/heft_gg_h.sa/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) diff --git a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuCxtypes.h index b56348bc58..ca9a9f00c0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuCxtypes.h @@ -159,6 +159,12 @@ namespace mg5amcCpu return cxsmpl( a, 0 ) * b; } + inline __host__ __device__ constexpr cxsmpl + operator*( const cxsmpl& a, const double& b ) + { + return a * cxsmpl( b, 0 ); + } + template inline __host__ __device__ constexpr cxsmpl operator/( const cxsmpl& a, const cxsmpl& b ) From ee83b62c1dcf164795debe5e838cb3561f4b309b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sat, 28 Oct 2023 15:13:43 +0200 Subject: [PATCH 059/119] [oct23av] rerun 78 tput tests - gqttq runTest failures, NaNs have disappeared, eemumu is x2-4 slower STARTED AT Sat Oct 28 09:09:27 AM CEST 2023 ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean ENDED(1) AT Sat Oct 28 12:24:58 PM CEST 2023 [Status=2] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Sat Oct 28 12:50:30 PM CEST 2023 [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Sat Oct 28 12:59:27 PM CEST 2023 [Status=2] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Sat Oct 28 01:02:35 PM CEST 2023 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ENDED(5) AT Sat Oct 28 01:05:39 PM CEST 2023 [Status=0] --- This is a summary of the changes with respect to my previous logs using the August code base Functionality: ** all NaN's have disappeared everywhere! -- eemumu, ggtt, ggttg, ggttgg, ggttggg: all ok ** gqttq: some failures in runTest.exe Performance: -- eemumu: cuda ~20% slower (and 20% increase in registers used) ** eemumu: simd a factor 2 (inl0) to a factor 4 (inl1) slower -- ggtt: cuda up to 10% slower -- ggtt: simd ~10-15% faster (inl0), similar (inl1) -- ggttg, ggttgg, ggttggg: generally similar performance, some minor speedups --- .../log_eemumu_mad_d_inl0_hrd0.txt | 98 +++++----- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 98 +++++----- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 98 +++++----- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 98 +++++----- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 98 +++++----- .../log_eemumu_mad_d_inl0_hrd1.txt | 98 +++++----- .../log_eemumu_mad_d_inl1_hrd0.txt | 98 +++++----- .../log_eemumu_mad_d_inl1_hrd1.txt | 98 +++++----- .../log_eemumu_mad_f_inl0_hrd0.txt | 146 +++++++------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 177 +++++++---------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 142 +++++++------- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 146 +++++++------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 146 +++++++------- .../log_eemumu_mad_f_inl0_hrd1.txt | 146 +++++++------- .../log_eemumu_mad_f_inl1_hrd0.txt | 146 +++++++------- .../log_eemumu_mad_f_inl1_hrd1.txt | 146 +++++++------- .../log_eemumu_mad_m_inl0_hrd0.txt | 98 +++++----- .../log_eemumu_mad_m_inl0_hrd1.txt | 98 +++++----- .../log_ggtt_mad_d_inl0_hrd0.txt | 100 +++++----- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 100 +++++----- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 100 +++++----- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 100 +++++----- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 100 +++++----- .../log_ggtt_mad_d_inl0_hrd1.txt | 96 +++++----- .../log_ggtt_mad_d_inl1_hrd0.txt | 86 ++++----- .../log_ggtt_mad_d_inl1_hrd1.txt | 86 ++++----- .../log_ggtt_mad_f_inl0_hrd0.txt | 116 +++++------ .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 116 +++++------ .../log_ggtt_mad_f_inl0_hrd0_common.txt | 116 +++++------ .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 116 +++++------ .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 116 +++++------ .../log_ggtt_mad_f_inl0_hrd1.txt | 116 +++++------ .../log_ggtt_mad_f_inl1_hrd0.txt | 86 ++++----- .../log_ggtt_mad_f_inl1_hrd1.txt | 86 ++++----- .../log_ggtt_mad_m_inl0_hrd0.txt | 96 +++++----- .../log_ggtt_mad_m_inl0_hrd1.txt | 96 +++++----- .../log_ggttg_mad_d_inl0_hrd0.txt | 118 ++++++------ .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 118 ++++++------ .../log_ggttg_mad_d_inl0_hrd1.txt | 118 ++++++------ .../log_ggttg_mad_f_inl0_hrd0.txt | 138 +++++++------- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 138 +++++++------- .../log_ggttg_mad_f_inl0_hrd1.txt | 138 +++++++------- .../log_ggttg_mad_m_inl0_hrd0.txt | 110 +++++------ .../log_ggttg_mad_m_inl0_hrd1.txt | 110 +++++------ .../log_ggttgg_mad_d_inl0_hrd0.txt | 110 +++++------ .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 110 +++++------ .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 110 +++++------ .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 110 +++++------ .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 110 +++++------ .../log_ggttgg_mad_d_inl0_hrd1.txt | 110 +++++------ .../log_ggttgg_mad_d_inl1_hrd0.txt | 100 +++++----- .../log_ggttgg_mad_d_inl1_hrd1.txt | 100 +++++----- .../log_ggttgg_mad_f_inl0_hrd0.txt | 130 ++++++------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 130 ++++++------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 130 ++++++------- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 130 ++++++------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 130 ++++++------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 130 ++++++------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 100 +++++----- .../log_ggttgg_mad_f_inl1_hrd1.txt | 100 +++++----- .../log_ggttgg_mad_m_inl0_hrd0.txt | 110 +++++------ .../log_ggttgg_mad_m_inl0_hrd1.txt | 108 +++++------ .../log_ggttggg_mad_d_inl0_hrd0.txt | 110 +++++------ .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 110 +++++------ .../log_ggttggg_mad_d_inl0_hrd1.txt | 110 +++++------ .../log_ggttggg_mad_f_inl0_hrd0.txt | 130 ++++++------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 130 ++++++------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 130 ++++++------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 110 +++++------ .../log_ggttggg_mad_m_inl0_hrd1.txt | 110 +++++------ .../log_gqttq_mad_d_inl0_hrd0.txt | 174 +++-------------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 178 +++-------------- .../log_gqttq_mad_d_inl0_hrd1.txt | 174 +++-------------- .../log_gqttq_mad_f_inl0_hrd0.txt | 176 +++-------------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 180 +++--------------- .../log_gqttq_mad_f_inl0_hrd1.txt | 176 +++-------------- .../log_gqttq_mad_m_inl0_hrd0.txt | 174 +++-------------- .../log_gqttq_mad_m_inl0_hrd1.txt | 174 +++-------------- 78 files changed, 4222 insertions(+), 5173 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index fb3e759147..9d61768255 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:31:49 +DATE: 2023-10-28_12:06:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,16 +44,16 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.992610e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.677560e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.800497e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.436209e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.229794e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.009285e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.651038 sec - 2,636,526,277 cycles # 3.033 GHz - 4,084,504,000 instructions # 1.55 insn per cycle - 0.937514788 seconds time elapsed +TOTAL : 0.744144 sec + 2,828,116,167 cycles # 3.006 GHz + 4,431,343,825 instructions # 1.57 insn per cycle + 1.067121357 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 @@ -69,15 +69,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.223824e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.458038e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.458038e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.139844e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.339077e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.339077e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.535744 sec - 17,174,922,423 cycles # 3.101 GHz - 40,422,775,862 instructions # 2.35 insn per cycle - 5.540757574 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.916739 sec + 18,330,054,823 cycles # 3.096 GHz + 44,036,343,414 instructions # 2.40 insn per cycle + 5.924908084 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.137087e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.061371e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.061371e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.685588e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.206291e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.206291e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.338481 sec - 10,270,021,527 cycles # 3.072 GHz - 24,681,672,230 instructions # 2.40 insn per cycle - 3.343524574 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.126625 sec + 12,790,163,629 cycles # 3.096 GHz + 31,002,245,035 instructions # 2.42 insn per cycle + 4.142582600 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.319309e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.049977e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.049977e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.100110e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.946697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.946697e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.283025 sec - 6,897,531,665 cycles # 3.016 GHz - 13,676,914,709 instructions # 1.98 insn per cycle - 2.287967204 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) +TOTAL : 3.384390 sec + 10,090,434,829 cycles # 2.977 GHz + 19,377,580,740 instructions # 1.92 insn per cycle + 3.399760208 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.461379e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.455419e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.455419e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.153186e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.046388e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.046388e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.201231 sec - 6,644,736,732 cycles # 3.013 GHz - 13,369,268,411 instructions # 2.01 insn per cycle - 2.206080825 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) +TOTAL : 3.315608 sec + 9,756,600,437 cycles # 2.938 GHz + 18,996,634,425 instructions # 1.95 insn per cycle + 3.330988477 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.225436e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.708340e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.708340e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.848119e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.465628e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.465628e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.342191 sec - 5,886,532,421 cycles # 2.509 GHz - 10,160,262,547 instructions # 1.73 insn per cycle - 2.347341313 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) +TOTAL : 3.802025 sec + 8,634,120,490 cycles # 2.268 GHz + 15,739,590,432 instructions # 1.82 insn per cycle + 3.813740164 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index ff2ab6ab12..5e5aef5e19 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:02:17 +DATE: 2023-10-28_12:53:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,20 +48,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.965333e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.255447e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.255447e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.655812e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.764738e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.764738e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.135108 sec - 7,250,256,577 cycles # 3.055 GHz - 13,026,966,701 instructions # 1.80 insn per cycle - 2.429348584 seconds time elapsed +TOTAL : 2.243071 sec + 7,442,574,377 cycles # 2.991 GHz + 13,225,389,066 instructions # 1.78 insn per cycle + 2.549080102 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 @@ -78,15 +78,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.180043e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.393531e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.393531e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.102407e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.289656e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.289656e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.930110 sec - 18,404,007,216 cycles # 3.102 GHz - 40,649,787,986 instructions # 2.21 insn per cycle - 5.936245491 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.301577 sec + 19,602,629,453 cycles # 3.109 GHz + 44,262,843,264 instructions # 2.26 insn per cycle + 6.308282734 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -105,15 +105,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001560e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.785736e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.785736e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.604137e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.071968e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.071968e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.743997 sec - 11,597,299,620 cycles # 3.094 GHz - 25,525,941,371 instructions # 2.20 insn per cycle - 3.750076018 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.520826 sec + 14,045,705,180 cycles # 3.104 GHz + 31,843,337,701 instructions # 2.27 insn per cycle + 4.527637329 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -132,15 +132,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.959223e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.997724e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.997724e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.921703e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.616697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.616697e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.737253 sec - 8,251,537,282 cycles # 3.009 GHz - 15,038,208,979 instructions # 1.82 insn per cycle - 2.743376591 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) +TOTAL : 3.884222 sec + 11,358,296,431 cycles # 2.920 GHz + 20,737,654,475 instructions # 1.83 insn per cycle + 3.890844402 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,15 +159,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.076188e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.294642e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.294642e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.042579e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.818494e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.818494e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.656399 sec - 7,925,615,204 cycles # 2.978 GHz - 14,731,067,513 instructions # 1.86 insn per cycle - 2.662615813 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) +TOTAL : 3.680139 sec + 11,063,141,020 cycles # 3.001 GHz + 20,365,630,441 instructions # 1.84 insn per cycle + 3.686776925 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.899896e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.732839e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.732839e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.753883e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.296790e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.296790e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.778907 sec - 7,285,067,282 cycles # 2.617 GHz - 11,305,402,811 instructions # 1.55 insn per cycle - 2.785056032 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) +TOTAL : 4.203410 sec + 10,020,568,248 cycles # 2.381 GHz + 16,883,677,599 instructions # 1.68 insn per cycle + 4.210142720 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index ee209006c3..7f4cdc7cb4 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:14:42 +DATE: 2023-10-28_13:05:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,16 +44,16 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.734802e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.548595e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.695477e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.829805e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.626756e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.006288e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.288103 sec - 4,585,947,033 cycles # 3.031 GHz - 7,017,628,406 instructions # 1.53 insn per cycle - 1.570944429 seconds time elapsed +TOTAL : 1.335722 sec + 4,629,481,061 cycles # 2.946 GHz + 7,232,158,826 instructions # 1.56 insn per cycle + 1.628149973 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 @@ -69,15 +69,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.218596e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.449391e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.449391e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.125259e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.321125e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.321125e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.906919 sec - 18,244,110,475 cycles # 3.087 GHz - 40,525,216,964 instructions # 2.22 insn per cycle - 5.912008529 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.343833 sec + 19,420,661,031 cycles # 3.061 GHz + 44,140,174,090 instructions # 2.27 insn per cycle + 6.349019015 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.094791e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.999575e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.999575e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.685239e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.203002e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.203002e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.757966 sec - 11,362,621,772 cycles # 3.020 GHz - 24,684,545,006 instructions # 2.17 insn per cycle - 3.762920389 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.470451 sec + 13,918,658,241 cycles # 3.110 GHz + 31,003,563,450 instructions # 2.23 insn per cycle + 4.475964868 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.221039e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.851480e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.851480e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.104044e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.951379e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.951379e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.709730 sec - 7,962,642,894 cycles # 2.935 GHz - 13,579,072,475 instructions # 1.71 insn per cycle - 2.714759310 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) +TOTAL : 3.725185 sec + 11,191,729,074 cycles # 3.002 GHz + 19,277,666,580 instructions # 1.72 insn per cycle + 3.730254016 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.422533e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.356319e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.356319e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.170659e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.090967e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.090967e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.582578 sec - 7,748,421,467 cycles # 2.996 GHz - 13,080,984,196 instructions # 1.69 insn per cycle - 2.587647023 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) +TOTAL : 3.639431 sec + 10,927,950,837 cycles # 3.000 GHz + 18,707,814,304 instructions # 1.71 insn per cycle + 3.644708893 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.234611e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.715248e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.715248e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.887944e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.525023e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.525023e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.694072 sec - 7,028,142,015 cycles # 2.605 GHz - 9,860,263,834 instructions # 1.40 insn per cycle - 2.699027311 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) +TOTAL : 4.076704 sec + 9,749,882,110 cycles # 2.390 GHz + 15,436,538,196 instructions # 1.58 insn per cycle + 4.081702298 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 8ad22bdaab..ce4a04519c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:11:43 +DATE: 2023-10-28_13:02:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,16 +44,16 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.751984e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.562686e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.712190e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.845902e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.649474e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.025258e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.934603 sec - 3,531,370,849 cycles # 3.027 GHz - 7,056,706,138 instructions # 2.00 insn per cycle - 1.223131814 seconds time elapsed +TOTAL : 0.955989 sec + 3,578,163,738 cycles # 3.023 GHz + 7,068,279,319 instructions # 1.98 insn per cycle + 1.241156432 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 @@ -69,15 +69,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.229006e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.461428e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.461428e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.138227e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.336088e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.336088e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.514347 sec - 17,134,759,816 cycles # 3.105 GHz - 40,421,384,674 instructions # 2.36 insn per cycle - 5.519235575 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.921217 sec + 18,291,774,290 cycles # 3.087 GHz + 44,034,233,337 instructions # 2.41 insn per cycle + 5.926430108 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.136550e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.078809e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.078809e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.674598e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.187066e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.187066e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.338536 sec - 10,358,915,958 cycles # 3.099 GHz - 24,681,209,780 instructions # 2.38 insn per cycle - 3.343472671 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.152819 sec + 12,778,275,966 cycles # 3.074 GHz + 31,001,281,707 instructions # 2.43 insn per cycle + 4.158063842 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.254346e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.883713e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.883713e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.121816e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.958302e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.958302e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.325759 sec - 6,909,272,329 cycles # 2.965 GHz - 13,676,492,702 instructions # 1.98 insn per cycle - 2.330695390 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) +TOTAL : 3.351941 sec + 10,044,492,755 cycles # 2.993 GHz + 19,377,203,012 instructions # 1.93 insn per cycle + 3.357252123 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.418862e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.369541e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.369541e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.202934e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.107252e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.107252e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.228021 sec - 6,650,960,186 cycles # 2.980 GHz - 13,380,296,124 instructions # 2.01 insn per cycle - 2.233066593 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) +TOTAL : 3.242049 sec + 9,710,030,443 cycles # 2.991 GHz + 19,005,255,795 instructions # 1.96 insn per cycle + 3.247405175 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.184006e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.596559e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.596559e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.891534e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.531334e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.531334e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.371480 sec - 5,899,234,125 cycles # 2.483 GHz - 10,159,638,956 instructions # 1.72 insn per cycle - 2.376624580 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) +TOTAL : 3.720604 sec + 8,629,557,650 cycles # 2.318 GHz + 15,737,846,740 instructions # 1.82 insn per cycle + 3.725901019 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 683b401a34..2870ebbc32 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:08:41 +DATE: 2023-10-28_12:59:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,17 +45,17 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.450047e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.536531e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.695667e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.299550e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.591007e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.931193e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.791948 sec - 6,196,047,830 cycles # 3.061 GHz - 11,389,938,256 instructions # 1.84 insn per cycle - 2.082150692 seconds time elapsed +TOTAL : 1.820738 sec + 6,295,899,400 cycles # 3.073 GHz + 11,479,210,876 instructions # 1.82 insn per cycle + 2.105126705 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 @@ -71,15 +71,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.225914e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.460719e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.460719e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.127640e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.323593e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.323593e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.523506 sec - 17,169,419,080 cycles # 3.106 GHz - 40,421,624,196 instructions # 2.35 insn per cycle - 5.528429327 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.976156 sec + 18,298,750,649 cycles # 3.060 GHz + 44,033,955,064 instructions # 2.41 insn per cycle + 5.981385208 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -97,15 +97,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.159688e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.087862e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.087862e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.692034e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.217323e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.217323e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.303087 sec - 10,283,099,650 cycles # 3.109 GHz - 24,681,354,623 instructions # 2.40 insn per cycle - 3.308198240 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1284) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.110550 sec + 12,786,408,904 cycles # 3.108 GHz + 31,001,807,900 instructions # 2.42 insn per cycle + 4.116019692 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -123,15 +123,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.299969e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.994161e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.994161e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.022948e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.833059e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.833059e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.296796 sec - 6,872,706,594 cycles # 2.987 GHz - 13,676,412,365 instructions # 1.99 insn per cycle - 2.301612504 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1057) (512y: 0) (512z: 0) +TOTAL : 3.511619 sec + 10,107,987,005 cycles # 2.881 GHz + 19,380,873,481 instructions # 1.92 insn per cycle + 3.517122011 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -149,15 +149,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.398777e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.314475e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.314475e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.183458e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.090327e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.090327e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.238868 sec - 6,648,921,003 cycles # 2.964 GHz - 13,380,372,785 instructions # 2.01 insn per cycle - 2.243705065 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1009) (512y: 0) (512z: 0) +TOTAL : 3.269447 sec + 9,757,920,533 cycles # 2.981 GHz + 19,005,390,431 instructions # 1.95 insn per cycle + 3.274812048 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -175,15 +175,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.257611e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.762023e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.762023e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.882345e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.512559e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.512559e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.324047 sec - 5,897,358,226 cycles # 2.533 GHz - 10,159,451,394 instructions # 1.72 insn per cycle - 2.329040547 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 268) (512y: 0) (512z: 683) +TOTAL : 3.732017 sec + 8,623,800,932 cycles # 2.309 GHz + 15,737,047,561 instructions # 1.82 insn per cycle + 3.737384155 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 8f12496d4e..5657d881b3 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:32:17 +DATE: 2023-10-28_12:07:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,16 +44,16 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.135499e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.480702e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.088764e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.441096e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.268097e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.113690e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.649915 sec - 2,521,027,262 cycles # 2.870 GHz - 3,932,562,496 instructions # 1.56 insn per cycle - 0.935284453 seconds time elapsed +TOTAL : 0.709809 sec + 2,803,996,602 cycles # 2.965 GHz + 4,290,428,381 instructions # 1.53 insn per cycle + 1.019645233 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 @@ -69,15 +69,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.221519e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.452240e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.452240e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.200878e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.422800e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.422800e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.545860 sec - 17,128,162,100 cycles # 3.086 GHz - 40,370,576,437 instructions # 2.36 insn per cycle - 5.550872301 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 362) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.634059 sec + 17,440,710,145 cycles # 3.093 GHz + 41,880,329,393 instructions # 2.40 insn per cycle + 5.642218325 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.147682e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.063332e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.063332e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.736085e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.290780e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.290780e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.321610 sec - 10,270,541,691 cycles # 3.088 GHz - 24,643,021,754 instructions # 2.40 insn per cycle - 3.326647423 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.019667 sec + 12,477,262,405 cycles # 3.100 GHz + 30,163,431,800 instructions # 2.42 insn per cycle + 4.033775326 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.302607e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.009317e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.009317e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.125635e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.982968e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.982968e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.292563 sec - 6,895,121,758 cycles # 3.004 GHz - 13,651,253,610 instructions # 1.98 insn per cycle - 2.297509965 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1037) (512y: 0) (512z: 0) +TOTAL : 3.348054 sec + 9,972,951,874 cycles # 2.974 GHz + 19,109,456,596 instructions # 1.92 insn per cycle + 3.361220026 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1930) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.466481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.436410e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.436410e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.196494e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.114824e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.114824e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.199117 sec - 6,634,520,360 cycles # 3.011 GHz - 13,355,581,160 instructions # 2.01 insn per cycle - 2.204141246 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 989) (512y: 0) (512z: 0) +TOTAL : 3.252567 sec + 9,689,824,724 cycles # 2.974 GHz + 18,765,633,099 instructions # 1.94 insn per cycle + 3.263307852 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1661) (512y: 178) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.383490e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.139882e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.139882e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.908402e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.562129e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.562129e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.249115 sec - 5,748,008,303 cycles # 2.551 GHz - 10,038,931,524 instructions # 1.75 insn per cycle - 2.254226968 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 246) (512y: 0) (512z: 663) +TOTAL : 3.690497 sec + 8,466,142,206 cycles # 2.291 GHz + 15,614,351,262 instructions # 1.84 insn per cycle + 3.704028359 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 886) (512y: 156) (512z: 1239) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index bad17671a9..13070edd4c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:52:22 +DATE: 2023-10-28_12:43:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,16 +44,16 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.866442e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.650091e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.818114e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.494476e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.553934e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.018374e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.673872 sec - 2,565,505,221 cycles # 2.855 GHz - 3,938,465,714 instructions # 1.54 insn per cycle - 0.962945607 seconds time elapsed +TOTAL : 0.673907 sec + 2,748,926,087 cycles # 3.032 GHz + 4,244,261,400 instructions # 1.54 insn per cycle + 0.965294408 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 @@ -69,15 +69,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.788792e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.362239e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.362239e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.713214e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.199457e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.199457e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.649101 sec - 8,210,295,285 cycles # 3.094 GHz - 17,459,406,832 instructions # 2.13 insn per cycle - 2.654141725 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 125) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.064069 sec + 12,678,588,689 cycles # 3.116 GHz + 32,576,476,028 instructions # 2.57 insn per cycle + 4.069761387 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.776841e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.067159e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.067159e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.163410e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.090108e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.090108e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.060741 sec - 6,367,782,565 cycles # 3.084 GHz - 12,773,139,369 instructions # 2.01 insn per cycle - 2.065923417 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 810) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.302170 sec + 10,269,221,045 cycles # 3.105 GHz + 24,504,851,413 instructions # 2.39 insn per cycle + 3.307906175 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.730005e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.360397e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.360397e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.375029e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.478948e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.478948e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.721074 sec - 5,169,510,873 cycles # 2.996 GHz - 9,371,577,717 instructions # 1.81 insn per cycle - 1.726231344 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 720) (512y: 0) (512z: 0) +TOTAL : 3.037442 sec + 9,111,077,932 cycles # 2.995 GHz + 16,941,027,954 instructions # 1.86 insn per cycle + 3.043137650 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.006682e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.519447e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.519447e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.360262e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.485603e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.485603e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.646907 sec - 4,985,951,497 cycles # 3.019 GHz - 9,229,216,123 instructions # 1.85 insn per cycle - 1.652062250 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 641) (512y: 0) (512z: 0) +TOTAL : 3.062735 sec + 8,912,572,641 cycles # 2.907 GHz + 16,357,435,332 instructions # 1.84 insn per cycle + 3.068517018 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.264702e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.000270e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.000270e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.019189e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.788585e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.788585e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.871689 sec - 5,009,273,536 cycles # 2.672 GHz - 8,693,527,346 instructions # 1.74 insn per cycle - 1.876855811 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 200) (512y: 0) (512z: 276) +TOTAL : 3.519094 sec + 7,914,322,807 cycles # 2.246 GHz + 14,592,508,916 instructions # 1.84 insn per cycle + 3.524967051 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index e9aad49fe2..5ce6c3294f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:52:44 +DATE: 2023-10-28_12:43:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,16 +44,16 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.999685e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.416020e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.082814e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.498637e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.592465e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.106967e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.643446 sec - 2,620,726,592 cycles # 3.012 GHz - 4,087,111,468 instructions # 1.56 insn per cycle - 0.931265549 seconds time elapsed +TOTAL : 0.672595 sec + 2,740,210,191 cycles # 3.028 GHz + 4,246,653,984 instructions # 1.55 insn per cycle + 0.964049396 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 @@ -69,15 +69,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.490278e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.520777e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.520777e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.270288e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.204979e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.204979e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.208507 sec - 6,644,704,561 cycles # 3.003 GHz - 14,230,584,763 instructions # 2.14 insn per cycle - 2.213619219 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 122) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.163536 sec + 9,876,175,945 cycles # 3.117 GHz + 25,456,751,506 instructions # 2.58 insn per cycle + 3.169196174 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.496209e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.203520e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.203520e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.524894e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.890405e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.890405e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.793256 sec - 5,547,608,796 cycles # 3.086 GHz - 10,773,719,188 instructions # 1.94 insn per cycle - 1.798601488 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 610) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.883411 sec + 8,990,488,928 cycles # 3.113 GHz + 21,514,519,903 instructions # 2.39 insn per cycle + 2.888918635 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.103748e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.641142e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.641142e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.510849e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.830526e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.830526e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.621275 sec - 4,941,467,155 cycles # 3.041 GHz - 8,728,712,502 instructions # 1.77 insn per cycle - 1.626388139 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 552) (512y: 0) (512z: 0) +TOTAL : 2.893235 sec + 8,686,251,634 cycles # 2.997 GHz + 15,829,692,324 instructions # 1.82 insn per cycle + 2.898833065 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.063835e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.792404e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.792404e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.587762e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.939824e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.939824e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.652560 sec - 4,758,216,156 cycles # 2.885 GHz - 8,734,044,090 instructions # 1.84 insn per cycle - 1.657554337 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 519) (512y: 0) (512z: 0) +TOTAL : 2.817140 sec + 8,420,351,252 cycles # 2.984 GHz + 15,528,245,109 instructions # 1.84 insn per cycle + 2.822805511 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.451379e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.103695e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.103695e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.181864e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.103964e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.103964e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.809541 sec - 4,867,548,247 cycles # 2.684 GHz - 8,406,231,727 instructions # 1.73 insn per cycle - 1.814605280 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 168) (512y: 0) (512z: 227) +TOTAL : 3.287080 sec + 7,580,170,366 cycles # 2.303 GHz + 14,294,844,412 instructions # 1.89 insn per cycle + 3.292914044 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index a96360aa15..2bc6d58328 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -36,48 +36,48 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:32:45 +DATE: 2023-10-28_12:07:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.622469e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.481172e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.826768e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.551831 sec - 2,329,387,313 cycles # 3.021 GHz - 3,650,775,745 instructions # 1.57 insn per cycle - 0.828584114 seconds time elapsed +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.090543e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.080299e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.281561e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.587204 sec + 2,430,047,127 cycles # 2.990 GHz + 3,806,946,919 instructions # 1.57 insn per cycle + 0.886413121 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 +Avg ME (F77/CUDA) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.243845e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.497749e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.497749e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.410831 sec - 16,844,648,065 cycles # 3.111 GHz - 40,088,965,912 instructions # 2.38 insn per cycle - 5.415719530 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.166380e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.383006e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.383006e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 5.752162 sec + 17,833,499,336 cycles # 3.098 GHz + 43,613,139,570 instructions # 2.45 insn per cycle + 5.760218770 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -85,25 +85,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039807573077E-002 -Relative difference = 1.500049293219082e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.198454e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.967626e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.967626e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.324919 sec - 7,138,346,939 cycles # 3.065 GHz - 16,729,497,470 instructions # 2.34 insn per cycle - 2.329783883 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.398175e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.666453e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.666453e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.981717 sec + 9,251,617,129 cycles # 3.097 GHz + 21,926,067,081 instructions # 2.37 insn per cycle + 2.997914976 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -111,25 +111,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039511077804E-002 -Relative difference = 3.8113554068418534e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.643489e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.234035e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.234035e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.702583 sec - 5,155,065,818 cycles # 3.021 GHz - 10,628,955,239 instructions # 2.06 insn per cycle - 1.707446133 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.574825e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.945979e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.945979e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.791462 sec + 8,314,714,212 cycles # 2.973 GHz + 15,591,186,569 instructions # 1.88 insn per cycle + 2.811182829 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -137,25 +137,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.817440e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.344869e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.344869e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.653471 sec - 5,044,234,982 cycles # 3.043 GHz - 10,475,715,128 instructions # 2.08 insn per cycle - 1.658543423 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.503167e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.857737e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.857737e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.876747 sec + 8,267,197,933 cycles # 2.869 GHz + 15,440,367,783 instructions # 1.87 insn per cycle + 2.891647671 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -163,25 +163,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.608343e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175337e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.175337e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.712484 sec - 4,683,701,478 cycles # 2.728 GHz - 8,926,870,251 instructions # 1.91 insn per cycle - 1.717292179 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) +EvtsPerSec[Rmb+ME] (23) = ( 2.645972e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.083907e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.083907e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.731553 sec + 6,627,921,922 cycles # 2.422 GHz + 12,869,631,967 instructions # 1.94 insn per cycle + 2.748478826 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -189,8 +189,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052371332888E-002 -Relative difference = 1.8485528876148422e-07 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index efa36acd38..9a142ed19e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:02:51 +DATE: 2023-10-28_12:54:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,61 +45,48 @@ WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! flagging abnormal ME for ievt=71728 -WARNING! flagging abnormal ME for ievt=152898 -WARNING! flagging abnormal ME for ievt=496545 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=465318 -WARNING! flagging abnormal ME for ievt=458848 -WARNING! flagging abnormal ME for ievt=247522 Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=7, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.717117e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.761186e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.761186e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371709e-02 +- 3.270385e-06 ) GeV^0 -TOTAL : 1.602340 sec - 5,595,842,121 cycles # 3.065 GHz - 10,162,817,469 instructions # 1.82 insn per cycle - 1.883301086 seconds time elapsed +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.422048e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.972937e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.972937e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.649582 sec + 5,711,265,423 cycles # 3.047 GHz + 10,308,364,781 instructions # 1.80 insn per cycle + 1.932930473 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 +Avg ME (F77/CUDA) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=53874 -WARNING! flagging abnormal ME for ievt=71728 -WARNING! flagging abnormal ME for ievt=152898 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=164749 -WARNING! flagging abnormal ME for ievt=247522 Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.194651e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.427134e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.427134e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.727507 sec - 17,468,478,153 cycles # 3.048 GHz - 40,238,549,213 instructions # 2.30 insn per cycle - 5.733108996 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.140422e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.349734e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.349734e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 5.977808 sec + 18,533,331,220 cycles # 3.098 GHz + 43,762,290,767 instructions # 2.36 insn per cycle + 5.983909821 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -107,32 +94,26 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039807573077E-002 -Relative difference = 1.500049293219082e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=53874 -WARNING! flagging abnormal ME for ievt=71728 -WARNING! flagging abnormal ME for ievt=152898 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=164749 -WARNING! flagging abnormal ME for ievt=247522 Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.015918e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.363005e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.363005e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.562966 sec - 7,929,821,727 cycles # 3.088 GHz - 18,064,430,946 instructions # 2.28 insn per cycle - 2.568572939 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.290195e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.424700e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.424700e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.227553 sec + 10,032,677,675 cycles # 3.103 GHz + 23,260,235,509 instructions # 2.32 insn per cycle + 3.233916613 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -140,30 +121,26 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039511077804E-002 -Relative difference = 3.8113554068418534e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=53874 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=164749 -WARNING! flagging abnormal ME for ievt=247522 Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.272473e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.009652e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.009652e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.944574 sec - 5,929,261,497 cycles # 3.042 GHz - 11,749,715,523 instructions # 1.98 insn per cycle - 1.950245306 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.470452e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.712827e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.712827e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.023079 sec + 9,085,119,885 cycles # 3.000 GHz + 16,711,585,271 instructions # 1.84 insn per cycle + 3.029450378 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,30 +148,26 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=53874 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=164749 -WARNING! flagging abnormal ME for ievt=247522 Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.364659e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.078675e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.078675e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.914043 sec - 5,843,415,161 cycles # 3.045 GHz - 11,595,784,393 instructions # 1.98 insn per cycle - 1.919813375 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.492995e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765769e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765769e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.999610 sec + 9,001,947,397 cycles # 2.996 GHz + 16,553,774,590 instructions # 1.84 insn per cycle + 3.005954313 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -202,30 +175,26 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= WARNING! Instantiate host Bridge (nevt=524288) -WARNING! flagging abnormal ME for ievt=53874 -WARNING! flagging abnormal ME for ievt=66427 -WARNING! flagging abnormal ME for ievt=164749 -WARNING! flagging abnormal ME for ievt=247522 Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.130829e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.360401e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.360401e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 2.001099 sec - 5,491,473,730 cycles # 2.738 GHz - 10,134,991,267 instructions # 1.85 insn per cycle - 2.006879900 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) +EvtsPerSec[Rmb+ME] (23) = ( 2.493012e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.755928e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.755928e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.998687 sec + 7,467,033,548 cycles # 2.485 GHz + 14,076,135,702 instructions # 1.89 insn per cycle + 3.005132894 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -233,8 +202,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052371332888E-002 -Relative difference = 1.8485528876148422e-07 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 3649e05b61..9eef926427 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:15:13 +DATE: 2023-10-28_13:06:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,40 +44,40 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.566300e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.421516e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.742992e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.387761e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.205793e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.236080e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.134749 sec - 4,098,993,410 cycles # 3.029 GHz - 6,605,372,981 instructions # 1.61 insn per cycle - 1.409722045 seconds time elapsed +TOTAL : 1.141212 sec + 4,198,750,142 cycles # 3.061 GHz + 6,675,407,407 instructions # 1.59 insn per cycle + 1.428782800 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 +Avg ME (F77/CUDA) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.244098e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.493114e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.493114e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 5.726200 sec - 17,811,812,788 cycles # 3.109 GHz - 40,270,712,628 instructions # 2.26 insn per cycle - 5.730949174 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.169470e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.388286e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.388286e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 6.050334 sec + 18,816,256,810 cycles # 3.110 GHz + 43,793,552,195 instructions # 2.33 insn per cycle + 6.054801451 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -85,25 +85,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039807573077E-002 -Relative difference = 1.500049293219082e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.215182e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.000060e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.000060e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270265e-06 ) GeV^0 -TOTAL : 2.623227 sec - 8,152,502,492 cycles # 3.103 GHz - 16,810,279,631 instructions # 2.06 insn per cycle - 2.628033716 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.361706e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.619621e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.619621e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 3.344671 sec + 10,233,257,715 cycles # 3.056 GHz + 22,006,617,137 instructions # 2.15 insn per cycle + 3.349838788 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -111,25 +111,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039511077804E-002 -Relative difference = 3.8113554068418534e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.607786e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.225348e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.225348e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 2.027947 sec - 6,177,720,088 cycles # 3.041 GHz - 10,540,553,410 instructions # 1.71 insn per cycle - 2.032791206 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.577991e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.978206e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.978206e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.100708 sec + 9,308,390,161 cycles # 2.998 GHz + 15,501,686,385 instructions # 1.67 insn per cycle + 3.105683433 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -137,25 +137,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.739337e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.329843e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329843e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270110e-06 ) GeV^0 -TOTAL : 1.998605 sec - 6,104,707,264 cycles # 3.049 GHz - 10,185,815,617 instructions # 1.67 insn per cycle - 2.003437292 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.607278e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.054617e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.054617e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.081884 sec + 9,288,964,111 cycles # 3.010 GHz + 15,144,087,137 instructions # 1.63 insn per cycle + 3.086857149 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -163,25 +163,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.461301e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.134325e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.134325e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371884e-02 +- 3.270111e-06 ) GeV^0 -TOTAL : 2.092643 sec - 5,695,345,035 cycles # 2.717 GHz - 8,637,352,141 instructions # 1.52 insn per cycle - 2.097505728 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) +EvtsPerSec[Rmb+ME] (23) = ( 2.640251e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.085557e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.085557e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.056302 sec + 7,645,821,222 cycles # 2.498 GHz + 12,579,189,915 instructions # 1.65 insn per cycle + 3.061324840 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -189,8 +189,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052371332888E-002 -Relative difference = 1.8485528876148422e-07 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 482f335b2b..1f93ef8de8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -36,48 +36,48 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:12:11 +DATE: 2023-10-28_13:03:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --curhst OMP= Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.572199e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.443581e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.789713e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.852228 sec - 3,102,837,181 cycles # 2.900 GHz - 6,399,466,575 instructions # 2.06 insn per cycle - 1.127081538 seconds time elapsed +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.392219e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.220920e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.264145e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.827944 sec + 3,187,760,196 cycles # 3.049 GHz + 6,425,416,727 instructions # 2.02 insn per cycle + 1.105465709 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 +Avg ME (F77/CUDA) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.242033e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.490396e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.490396e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.420944 sec - 16,812,615,332 cycles # 3.099 GHz - 40,088,432,090 instructions # 2.38 insn per cycle - 5.425745402 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.175115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.393787e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.393787e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 5.708534 sec + 17,816,177,934 cycles # 3.119 GHz + 43,613,110,420 instructions # 2.45 insn per cycle + 5.713529500 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -85,25 +85,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039807573077E-002 -Relative difference = 1.500049293219082e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.220212e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.000588e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.000588e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.308004 sec - 7,155,170,314 cycles # 3.095 GHz - 16,729,446,787 instructions # 2.34 insn per cycle - 2.312771061 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.413368e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.692372e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.692372e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.962135 sec + 9,234,609,876 cycles # 3.113 GHz + 21,925,233,672 instructions # 2.37 insn per cycle + 2.967194129 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -111,25 +111,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039511077804E-002 -Relative difference = 3.8113554068418534e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.640154e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.225902e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.225902e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.703655 sec - 5,164,709,474 cycles # 3.025 GHz - 10,629,819,542 instructions # 2.06 insn per cycle - 1.708526413 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.603845e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.990719e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.990719e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.761968 sec + 8,301,732,177 cycles # 3.001 GHz + 15,590,143,384 instructions # 1.88 insn per cycle + 2.767055994 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -137,25 +137,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.770352e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.326396e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326396e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.669695 sec - 5,072,425,715 cycles # 3.031 GHz - 10,481,476,531 instructions # 2.07 insn per cycle - 1.674496390 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.632154e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.072123e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.072123e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.737561 sec + 8,237,387,151 cycles # 3.004 GHz + 15,434,315,781 instructions # 1.87 insn per cycle + 2.742848124 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -163,25 +163,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.608761e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188893e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.188893e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.713521 sec - 4,682,934,639 cycles # 2.727 GHz - 8,926,936,750 instructions # 1.91 insn per cycle - 1.718311206 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) +EvtsPerSec[Rmb+ME] (23) = ( 2.664766e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.122063e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.122063e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.713872 sec + 6,633,362,586 cycles # 2.440 GHz + 12,870,274,076 instructions # 1.94 insn per cycle + 2.719220394 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -189,8 +189,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052371332888E-002 -Relative difference = 1.8485528876148422e-07 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index efc5436b49..885eb346be 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_19:09:10 +DATE: 2023-10-28_13:00:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,42 +44,42 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=7, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.218117e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.394390e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.636278e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371709e-02 +- 3.270385e-06 ) GeV^0 -TOTAL : 1.461263 sec - 4,982,734,203 cycles # 2.962 GHz - 9,103,785,620 instructions # 1.83 insn per cycle - 1.744448034 seconds time elapsed +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.416685e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.191792e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.169337e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.431938 sec + 5,062,471,867 cycles # 3.066 GHz + 9,243,027,622 instructions # 1.83 insn per cycle + 1.708189535 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 +Avg ME (F77/CUDA) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.245708e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.494542e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.494542e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.404955 sec - 16,819,106,248 cycles # 3.110 GHz - 40,088,802,052 instructions # 2.38 insn per cycle - 5.409792938 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.172128e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.390493e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.390493e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 5.722039 sec + 17,824,581,874 cycles # 3.113 GHz + 43,613,229,473 instructions # 2.45 insn per cycle + 5.727091863 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -87,25 +87,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039807573077E-002 -Relative difference = 1.500049293219082e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.222846e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.010307e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.010307e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.309298 sec - 7,155,727,700 cycles # 3.093 GHz - 16,729,709,933 instructions # 2.34 insn per cycle - 2.314175580 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1360) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.403732e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.680842e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.680842e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.973475 sec + 9,254,405,834 cycles # 3.108 GHz + 21,926,118,421 instructions # 2.37 insn per cycle + 2.978569066 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -113,25 +113,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039511077804E-002 -Relative difference = 3.8113554068418534e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.653137e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.236145e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.236145e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.698626 sec - 5,157,602,141 cycles # 3.030 GHz - 10,629,000,968 instructions # 2.06 insn per cycle - 1.703452950 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1122) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.606204e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.002734e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.002734e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.758603 sec + 8,291,129,557 cycles # 3.001 GHz + 15,589,976,650 instructions # 1.88 insn per cycle + 2.763801597 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -139,25 +139,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.773001e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.326358e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326358e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.667901 sec - 5,061,515,411 cycles # 3.028 GHz - 10,480,815,680 instructions # 2.07 insn per cycle - 1.672594775 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1074) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.573252e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.973552e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.973552e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.801833 sec + 8,237,865,652 cycles # 2.936 GHz + 15,433,868,332 instructions # 1.87 insn per cycle + 2.806881384 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -165,25 +165,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.555200e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166909e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.166909e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.731879 sec - 4,689,690,631 cycles # 2.701 GHz - 8,927,107,008 instructions # 1.90 insn per cycle - 1.736672094 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 343) (512y: 0) (512z: 710) +EvtsPerSec[Rmb+ME] (23) = ( 2.623316e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.043661e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.043661e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.750244 sec + 6,637,357,682 cycles # 2.410 GHz + 12,869,208,378 instructions # 1.94 insn per cycle + 2.755481179 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -191,8 +191,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052371332888E-002 -Relative difference = 1.8485528876148422e-07 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 5dbfdd3213..060299a689 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -36,48 +36,48 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:33:09 +DATE: 2023-10-28_12:08:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.625913e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.503566e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.918133e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.552329 sec - 2,328,888,712 cycles # 3.020 GHz - 3,648,506,478 instructions # 1.57 insn per cycle - 0.830116688 seconds time elapsed +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.096840e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.090967e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.324848e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.584864 sec + 2,429,123,171 cycles # 3.001 GHz + 3,756,300,873 instructions # 1.55 insn per cycle + 0.884979997 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 80 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 +Avg ME (F77/CUDA) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.244101e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.497364e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.497364e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 5.479428 sec - 17,033,310,610 cycles # 3.106 GHz - 40,038,122,508 instructions # 2.35 insn per cycle - 5.484429062 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 347) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.250011e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.503357e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.503357e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 5.392391 sec + 16,749,129,912 cycles # 3.103 GHz + 41,371,534,119 instructions # 2.47 insn per cycle + 5.400279164 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -85,25 +85,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039807573077E-002 -Relative difference = 1.500049293219082e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.023210e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.906828e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.906828e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.441591 sec - 7,417,759,581 cycles # 3.033 GHz - 16,653,923,334 instructions # 2.25 insn per cycle - 2.446386116 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1335) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.470803e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.835685e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.835685e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.902349 sec + 9,025,157,457 cycles # 3.104 GHz + 21,229,572,279 instructions # 2.35 insn per cycle + 2.914870377 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1841) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -111,25 +111,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039511077804E-002 -Relative difference = 3.8113554068418534e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.697456e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.249241e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.249241e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.685112 sec - 5,138,959,489 cycles # 3.042 GHz - 10,615,393,712 instructions # 2.07 insn per cycle - 1.690001992 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1092) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.612236e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.026002e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.026002e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.758538 sec + 8,264,914,372 cycles # 2.991 GHz + 15,424,883,788 instructions # 1.87 insn per cycle + 2.773271734 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2536) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -137,25 +137,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.752164e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.307340e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.307340e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.671544 sec - 5,088,363,074 cycles # 3.037 GHz - 10,468,790,591 instructions # 2.06 insn per cycle - 1.676598026 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1044) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.644470e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.110473e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.110473e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.728316 sec + 8,163,671,191 cycles # 2.986 GHz + 15,244,056,929 instructions # 1.87 insn per cycle + 2.741900405 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -163,25 +163,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053246266791E-002 -Relative difference = 2.5306003563303186e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.538235e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204282e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.204282e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.747955 sec - 4,622,637,035 cycles # 2.639 GHz - 8,857,108,339 instructions # 1.92 insn per cycle - 1.752985428 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 312) (512y: 0) (512z: 678) +EvtsPerSec[Rmb+ME] (23) = ( 2.644603e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.099685e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.099685e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.730411 sec + 6,638,319,522 cycles # 2.426 GHz + 12,848,391,758 instructions # 1.94 insn per cycle + 2.743796244 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1705) (512y: 18) (512z: 1427) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -189,8 +189,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052300392301E-002 -Relative difference = 1.7932517415027764e-07 +Avg ME (F77/C++) = 1.2828052564145764E-002 +Relative difference = 1.9988585667912256e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 7778235778..c7ef9a3620 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -36,48 +36,48 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:53:05 +DATE: 2023-10-28_12:44:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.591002e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.452620e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.766126e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.556490 sec - 2,367,622,596 cycles # 3.019 GHz - 3,659,745,020 instructions # 1.55 insn per cycle - 0.843534071 seconds time elapsed +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.303040e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188390e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.265011e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.570489 sec + 2,388,301,471 cycles # 3.016 GHz + 3,672,166,062 instructions # 1.54 insn per cycle + 0.851396853 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 96 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 +Avg ME (F77/CUDA) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.920065e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.817963e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.817963e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.512043 sec - 7,713,453,992 cycles # 3.066 GHz - 17,403,928,818 instructions # 2.26 insn per cycle - 2.516888756 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.721134e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.237285e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.237285e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 4.012883 sec + 12,189,009,747 cycles # 3.034 GHz + 32,520,660,992 instructions # 2.67 insn per cycle + 4.018143104 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -85,25 +85,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039809392067E-002 -Relative difference = 1.4858695011109669e-08 +Avg ME (F77/C++) = 1.2828039840314887E-002 +Relative difference = 1.244813035273009e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.681101e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.456058e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.456058e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 1.700110 sec - 5,232,564,400 cycles # 3.070 GHz - 10,761,247,884 instructions # 2.06 insn per cycle - 1.704947526 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 941) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.844353e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.819924e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.819924e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.568889 sec + 7,992,345,121 cycles # 3.106 GHz + 18,689,694,318 instructions # 2.34 insn per cycle + 2.574390971 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1554) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -111,25 +111,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039496525889E-002 -Relative difference = 3.924793743706775e-08 +Avg ME (F77/C++) = 1.2828039283704129E-002 +Relative difference = 5.583829420356249e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.136284e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.424889e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.424889e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.396345 sec - 4,222,666,465 cycles # 3.047 GHz - 8,344,159,796 instructions # 1.98 insn per cycle - 1.401275041 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 855) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.974716e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.925885e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.925885e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.466000 sec + 7,424,757,976 cycles # 3.006 GHz + 14,253,649,478 instructions # 1.92 insn per cycle + 2.471253254 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -137,25 +137,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053237171844E-002 -Relative difference = 2.5235104658031306e-07 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.252461e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.832504e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.832504e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.356528 sec - 4,156,578,728 cycles # 3.055 GHz - 8,308,294,757 instructions # 2.00 insn per cycle - 1.361324117 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 779) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.037585e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.105236e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.105236e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.421254 sec + 7,301,719,895 cycles # 3.010 GHz + 13,951,418,899 instructions # 1.91 insn per cycle + 2.426824644 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -163,25 +163,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053237171844E-002 -Relative difference = 2.5235104658031306e-07 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.632818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.213085e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.213085e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.468207 sec - 4,189,515,183 cycles # 2.846 GHz - 8,197,193,406 instructions # 1.96 insn per cycle - 1.473030833 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 280) (512y: 0) (512z: 301) +EvtsPerSec[Rmb+ME] (23) = ( 2.708760e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.220574e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.220574e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.673761 sec + 6,515,348,114 cycles # 2.433 GHz + 13,421,447,299 instructions # 2.06 insn per cycle + 2.679223686 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -189,8 +189,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052258555545E-002 -Relative difference = 1.7606382450777122e-07 +Avg ME (F77/C++) = 1.2828052562326775E-002 +Relative difference = 1.997440588685788e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 0d46a7bcf5..aa4260fb14 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -36,48 +36,48 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:53:26 +DATE: 2023-10-28_12:44:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=2, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.594388e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.488622e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.908806e+09 ) sec^-1 -MeanMatrixElemValue = ( 1.371686e-02 +- 3.270219e-06 ) GeV^0 -TOTAL : 0.555353 sec - 2,369,008,611 cycles # 3.015 GHz - 3,693,428,004 instructions # 1.56 insn per cycle - 0.844152711 seconds time elapsed +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.305875e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.197999e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.303366e+09 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.569902 sec + 2,393,097,307 cycles # 3.023 GHz + 3,728,454,815 instructions # 1.56 insn per cycle + 0.850824445 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 80 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 Avg ME (C++/CUDA) = 1.282802e-02 -Avg ME (F77/CUDA) = 1.2828112108763889E-002 -Relative difference = 7.180279099086847e-06 +Avg ME (F77/CUDA) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.824530e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.823115e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.823115e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 2.002512 sec - 6,203,899,879 cycles # 3.095 GHz - 14,161,126,790 instructions # 2.28 insn per cycle - 2.007246903 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 133) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.306212e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.354504e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.354504e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.087486 sec + 9,423,962,266 cycles # 3.049 GHz + 25,307,428,823 instructions # 2.69 insn per cycle + 3.093213487 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -85,25 +85,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039833038929E-002 -Relative difference = 1.3015322037054697e-08 +Avg ME (F77/C++) = 1.2828039838495897E-002 +Relative difference = 1.2589928273811243e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=6, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.299369e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.234812e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.234812e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270373e-06 ) GeV^0 -TOTAL : 1.542994 sec - 4,763,831,720 cycles # 3.079 GHz - 9,566,058,895 instructions # 2.01 insn per cycle - 1.547857940 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 663) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.212003e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.966983e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.966983e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.316527 sec + 7,194,121,645 cycles # 3.100 GHz + 16,901,739,382 instructions # 2.35 insn per cycle + 2.321817623 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -111,25 +111,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039511077804E-002 -Relative difference = 3.8113554068418534e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.326543e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.012502e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.012502e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.344736 sec - 4,139,920,255 cycles # 3.069 GHz - 8,120,823,500 instructions # 1.96 insn per cycle - 1.349591931 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 623) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.088605e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.290375e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.290375e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.385856 sec + 7,198,917,112 cycles # 3.012 GHz + 13,619,147,871 instructions # 1.89 insn per cycle + 2.391282642 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -137,25 +137,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053244447801E-002 -Relative difference = 2.5291823782248813e-07 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.344788e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.506915e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.506915e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270338e-06 ) GeV^0 -TOTAL : 1.344145 sec - 4,090,602,024 cycles # 3.034 GHz - 8,121,231,635 instructions # 1.99 insn per cycle - 1.349056597 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 590) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.159700e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.484314e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.484314e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.341781 sec + 7,056,960,923 cycles # 3.008 GHz + 13,430,372,265 instructions # 1.90 insn per cycle + 2.347350712 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -163,25 +163,25 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053244447801E-002 -Relative difference = 2.5291823782248813e-07 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=4, zero=0) +FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.650060e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.477330e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.477330e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371705e-02 +- 3.270339e-06 ) GeV^0 -TOTAL : 1.474656 sec - 4,125,251,957 cycles # 2.790 GHz - 8,033,155,651 instructions # 1.95 insn per cycle - 1.479796548 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 238) (512y: 0) (512z: 234) +EvtsPerSec[Rmb+ME] (23) = ( 2.759294e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.397998e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.397998e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.637218 sec + 6,320,867,639 cycles # 2.393 GHz + 13,153,069,484 instructions # 2.08 insn per cycle + 2.642625257 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -189,8 +189,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052378608845E-002 -Relative difference = 1.8542248000365923e-07 +Avg ME (F77/C++) = 1.2828052536860923E-002 +Relative difference = 1.977588895209662e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 42cb535764..e2d15ec9ec 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:33:34 +DATE: 2023-10-28_12:08:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,16 +44,16 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.989272e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.677595e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.794677e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.432223e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.263514e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.061401e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.639629 sec - 2,624,746,782 cycles # 3.027 GHz - 4,095,202,506 instructions # 1.56 insn per cycle - 0.927049463 seconds time elapsed +TOTAL : 0.713704 sec + 2,810,694,285 cycles # 2.970 GHz + 4,352,973,933 instructions # 1.55 insn per cycle + 1.023439423 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 150 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 @@ -69,15 +69,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.199685e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.421947e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.421947e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.113042e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.301497e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.301497e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.639670 sec - 17,406,394,399 cycles # 3.084 GHz - 40,598,366,537 instructions # 2.33 insn per cycle - 5.644674626 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 377) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.048110 sec + 18,728,988,916 cycles # 3.094 GHz + 44,285,855,893 instructions # 2.36 insn per cycle + 6.056235273 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -95,15 +95,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.181362e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.136383e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.136383e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.725869e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.290079e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.290079e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.277504 sec - 10,153,584,525 cycles # 3.094 GHz - 24,841,830,142 instructions # 2.45 insn per cycle - 3.282502956 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1318) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.046149 sec + 12,390,711,310 cycles # 3.059 GHz + 30,961,468,949 instructions # 2.50 insn per cycle + 4.059152850 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.329721e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.123732e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.123732e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.078637e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.886882e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.886882e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.277389 sec - 6,843,760,468 cycles # 3.000 GHz - 13,635,441,327 instructions # 1.99 insn per cycle - 2.282659453 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1211) (512y: 0) (512z: 0) +TOTAL : 3.415997 sec + 10,148,421,740 cycles # 2.966 GHz + 19,398,966,702 instructions # 1.91 insn per cycle + 3.430194328 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.425597e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.470588e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.470588e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.092565e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.940158e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.940158e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.232215 sec - 6,546,202,423 cycles # 2.927 GHz - 13,316,237,781 instructions # 2.03 insn per cycle - 2.237429944 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1141) (512y: 0) (512z: 0) +TOTAL : 3.403217 sec + 9,741,567,918 cycles # 2.858 GHz + 18,981,474,131 instructions # 1.95 insn per cycle + 3.418468893 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1859) (512y: 188) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.286536e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.835267e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.835267e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.901444e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.595212e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.595212e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.304970 sec - 5,866,744,695 cycles # 2.541 GHz - 10,212,406,703 instructions # 1.74 insn per cycle - 2.310030357 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 406) (512y: 0) (512z: 707) +TOTAL : 3.703255 sec + 8,520,387,175 cycles # 2.297 GHz + 15,065,100,246 instructions # 1.77 insn per cycle + 3.716972990 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1023) (512y: 155) (512z: 1316) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 84b58b8eae..f3310b9cac 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-25_18:34:02 +DATE: 2023-10-28_12:09:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,16 +44,16 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.129836e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.454495e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.086141e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.430214e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.253611e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.074009e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.640256 sec - 2,612,587,104 cycles # 3.007 GHz - 4,030,428,105 instructions # 1.54 insn per cycle - 0.928744648 seconds time elapsed +TOTAL : 0.703074 sec + 2,798,414,359 cycles # 2.979 GHz + 4,319,216,534 instructions # 1.54 insn per cycle + 1.005639527 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 118 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 @@ -69,15 +69,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.208951e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.433392e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.433392e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.166428e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.374429e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.374429e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.597792 sec - 17,342,645,679 cycles # 3.096 GHz - 40,546,867,973 instructions # 2.34 insn per cycle - 5.602682073 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 364) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.786685 sec + 17,967,323,887 cycles # 3.102 GHz + 42,535,469,208 instructions # 2.37 insn per cycle + 5.794719220 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -95,15 +95,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.133508e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.059933e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.059933e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.781652e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.369490e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.369490e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.349360 sec - 10,185,466,163 cycles # 3.037 GHz - 24,803,480,189 instructions # 2.44 insn per cycle - 3.354498074 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1305) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.925610 sec + 12,207,226,408 cycles # 3.107 GHz + 30,267,618,799 instructions # 2.48 insn per cycle + 3.940648882 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.351442e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.169056e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.169056e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.101317e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.934375e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.934375e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.263569 sec - 6,798,954,008 cycles # 2.998 GHz - 13,608,714,241 instructions # 2.00 insn per cycle - 2.268496012 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1191) (512y: 0) (512z: 0) +TOTAL : 3.383740 sec + 10,049,844,381 cycles # 2.965 GHz + 19,281,783,052 instructions # 1.92 insn per cycle + 3.398725006 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2162) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.513470e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.629747e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.629747e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.199554e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.121016e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.121016e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.175520 sec - 6,534,951,743 cycles # 2.998 GHz - 13,313,454,459 instructions # 2.04 insn per cycle - 2.180544895 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1121) (512y: 0) (512z: 0) +TOTAL : 3.247190 sec + 9,675,471,455 cycles # 2.975 GHz + 18,782,612,074 instructions # 1.94 insn per cycle + 3.261499344 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1833) (512y: 191) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.364985e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.081130e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.081130e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.954205e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.650147e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.650147e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.260031 sec - 5,775,308,811 cycles # 2.550 GHz - 10,091,603,442 instructions # 1.75 insn per cycle - 2.265220222 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 386) (512y: 0) (512z: 688) +TOTAL : 3.614239 sec + 8,334,476,383 cycles # 2.303 GHz + 14,988,761,152 instructions # 1.80 insn per cycle + 3.627975521 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1020) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index fc7d3d5581..8336024765 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:34:30 +DATE: 2023-10-28_12:09:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.189330e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175818e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270057e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.019001e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.136794e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274389e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.512252 sec - 2,155,173,458 cycles # 2.909 GHz - 3,041,305,881 instructions # 1.41 insn per cycle - 0.798678690 seconds time elapsed +TOTAL : 0.527024 sec + 2,289,159,620 cycles # 3.004 GHz + 3,250,188,454 instructions # 1.42 insn per cycle + 0.831218802 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,15 +69,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.926195e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.975396e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.975396e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.197922e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.262269e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.262269e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.545999 sec - 17,154,877,090 cycles # 3.091 GHz - 45,384,595,667 instructions # 2.65 insn per cycle - 5.551026412 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.874413 sec + 15,154,127,415 cycles # 3.106 GHz + 38,435,939,186 instructions # 2.54 insn per cycle + 4.882482659 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -85,8 +85,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.351243e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.515324e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.515324e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.700267e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.902876e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.902876e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.231698 sec - 10,007,778,960 cycles # 3.093 GHz - 27,771,257,423 instructions # 2.77 insn per cycle - 3.236933218 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.938885 sec + 9,102,250,875 cycles # 3.091 GHz + 24,592,027,223 instructions # 2.70 insn per cycle + 2.953737381 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.340358e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.757627e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.757627e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.841670e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.348294e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.348294e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.065747 sec - 6,039,122,425 cycles # 2.917 GHz - 12,507,446,858 instructions # 2.07 insn per cycle - 2.070933576 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) +TOTAL : 1.898439 sec + 5,505,216,065 cycles # 2.892 GHz + 11,266,747,641 instructions # 2.05 insn per cycle + 1.915944724 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.874280e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.375597e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.375597e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.367341e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.971191e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.971191e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.886900 sec - 5,532,181,520 cycles # 2.925 GHz - 11,883,413,800 instructions # 2.15 insn per cycle - 1.892172826 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) +TOTAL : 1.753044 sec + 4,965,020,075 cycles # 2.823 GHz + 10,572,471,342 instructions # 2.13 insn per cycle + 1.767016306 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.697439e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.891301e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.891301e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.121729e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.361641e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.361641e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.940719 sec - 5,705,029,690 cycles # 1.938 GHz - 8,291,496,940 instructions # 1.45 insn per cycle - 2.945995320 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) +TOTAL : 2.647332 sec + 5,386,531,185 cycles # 2.030 GHz + 7,805,289,561 instructions # 1.45 insn per cycle + 2.664612115 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index bde7cbdb09..89f4885416 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:03:18 +DATE: 2023-10-28_12:54:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.773797e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.294173e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.294173e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.628625e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.020132e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.020132e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.785416 sec - 3,063,164,499 cycles # 3.000 GHz - 4,792,639,654 instructions # 1.56 insn per cycle - 1.079850324 seconds time elapsed +TOTAL : 0.797774 sec + 3,189,194,967 cycles # 3.042 GHz + 4,915,184,954 instructions # 1.54 insn per cycle + 1.106368237 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,15 +78,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.892855e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.940587e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.940587e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.181893e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.245511e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.245511e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.719758 sec - 17,497,619,767 cycles # 3.056 GHz - 45,446,099,914 instructions # 2.60 insn per cycle - 5.726041885 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.985853 sec + 15,494,769,661 cycles # 3.104 GHz + 38,496,185,507 instructions # 2.48 insn per cycle + 4.992645531 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -94,8 +94,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= @@ -105,15 +105,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.325585e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.487595e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.487595e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.547838e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.739897e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.739897e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.333037 sec - 10,354,211,298 cycles # 3.101 GHz - 27,955,092,209 instructions # 2.70 insn per cycle - 3.339341303 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.137914 sec + 9,437,581,770 cycles # 3.002 GHz + 24,774,460,960 instructions # 2.63 insn per cycle + 3.144781618 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -132,15 +132,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.229345e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.633133e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.633133e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.828534e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.328150e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.328150e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.186507 sec - 6,395,865,888 cycles # 2.918 GHz - 12,794,721,791 instructions # 2.00 insn per cycle - 2.192626317 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) +TOTAL : 1.980182 sec + 5,849,819,972 cycles # 2.945 GHz + 11,551,679,172 instructions # 1.97 insn per cycle + 1.986935844 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,15 +159,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.718956e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.198053e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.198053e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.535655e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.166989e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.166989e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.015312 sec - 5,880,292,764 cycles # 2.910 GHz - 12,172,549,562 instructions # 2.07 insn per cycle - 2.021489543 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) +TOTAL : 1.786832 sec + 5,298,722,139 cycles # 2.956 GHz + 10,858,826,865 instructions # 2.05 insn per cycle + 1.793683914 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.795700e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.000887e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.000887e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.984644e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.209702e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.209702e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.946657 sec - 6,072,408,903 cycles # 2.057 GHz - 8,534,252,358 instructions # 1.41 insn per cycle - 2.952768361 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) +TOTAL : 2.815659 sec + 5,764,743,331 cycles # 2.043 GHz + 8,049,005,197 instructions # 1.40 insn per cycle + 2.822613600 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 531c093860..4341cd8cb2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:15:40 +DATE: 2023-10-28_13:06:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.085961e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.172637e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270805e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.738260e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.160695e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270128e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.603556 sec - 2,510,793,087 cycles # 3.030 GHz - 3,673,074,819 instructions # 1.46 insn per cycle - 0.885869264 seconds time elapsed +TOTAL : 0.615783 sec + 2,512,291,360 cycles # 2.984 GHz + 3,661,843,377 instructions # 1.46 insn per cycle + 0.899734611 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,15 +69,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.938086e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.988073e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.988073e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.205018e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.270000e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.270000e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.572089 sec - 17,339,920,977 cycles # 3.110 GHz - 45,401,031,280 instructions # 2.62 insn per cycle - 5.577099493 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.917523 sec + 15,330,499,616 cycles # 3.115 GHz + 38,452,312,172 instructions # 2.51 insn per cycle + 4.922909300 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -85,8 +85,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.366267e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.533818e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.533818e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.728127e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.933166e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.933166e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.275090 sec - 10,190,622,487 cycles # 3.108 GHz - 27,770,717,333 instructions # 2.73 insn per cycle - 3.280028443 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.974283 sec + 9,266,289,564 cycles # 3.111 GHz + 24,590,192,562 instructions # 2.65 insn per cycle + 2.979640099 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.358680e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.785880e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.785880e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.886292e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.405439e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.405439e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.118513 sec - 6,227,899,517 cycles # 2.934 GHz - 12,490,736,505 instructions # 2.01 insn per cycle - 2.123796775 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) +TOTAL : 1.942789 sec + 5,689,267,748 cycles # 2.924 GHz + 11,248,715,582 instructions # 1.98 insn per cycle + 1.948199837 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.791831e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.300158e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.300158e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.670998e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.319718e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.319718e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.975789 sec - 5,740,046,861 cycles # 2.900 GHz - 11,834,515,828 instructions # 2.06 insn per cycle - 1.980847261 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) +TOTAL : 1.733390 sec + 5,130,314,649 cycles # 2.952 GHz + 10,520,705,525 instructions # 2.05 insn per cycle + 1.738662427 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.818538e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.030811e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.030811e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.138982e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.379012e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.379012e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.912399 sec - 5,874,294,388 cycles # 2.014 GHz - 8,239,488,482 instructions # 1.40 insn per cycle - 2.917538602 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) +TOTAL : 2.696595 sec + 5,567,230,521 cycles # 2.061 GHz + 7,753,963,703 instructions # 1.39 insn per cycle + 2.701828311 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index ee80d49776..cb28d279bb 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:12:36 +DATE: 2023-10-28_13:03:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.085112e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169879e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.269257e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.731016e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.163476e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273757e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.542204 sec - 2,298,212,603 cycles # 3.002 GHz - 3,616,714,256 instructions # 1.57 insn per cycle - 0.823159797 seconds time elapsed +TOTAL : 0.562632 sec + 2,291,225,101 cycles # 2.912 GHz + 3,530,216,365 instructions # 1.54 insn per cycle + 0.849249178 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,15 +69,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.902640e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.950734e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.950734e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.205686e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.270616e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.270616e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.613770 sec - 17,167,591,716 cycles # 3.056 GHz - 45,385,422,779 instructions # 2.64 insn per cycle - 5.618850316 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.857452 sec + 15,159,442,405 cycles # 3.118 GHz + 38,437,017,641 instructions # 2.54 insn per cycle + 4.862743230 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -85,8 +85,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.356859e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.522784e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.522784e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.636753e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.830892e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.830892e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.227055 sec - 10,007,434,150 cycles # 3.097 GHz - 27,771,321,943 instructions # 2.78 insn per cycle - 3.232084319 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.986089 sec + 9,111,794,887 cycles # 3.047 GHz + 24,590,815,731 instructions # 2.70 insn per cycle + 2.991338372 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.339247e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.757472e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.757472e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.980804e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.504132e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.504132e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.067924 sec - 6,064,030,334 cycles # 2.927 GHz - 12,508,006,764 instructions # 2.06 insn per cycle - 2.072907317 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) +TOTAL : 1.856262 sec + 5,467,904,077 cycles # 2.938 GHz + 11,264,908,254 instructions # 2.06 insn per cycle + 1.861668376 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.874688e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.380385e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.380385e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.665590e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.312077e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.312077e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.887776 sec - 5,540,691,746 cycles # 2.929 GHz - 11,883,645,896 instructions # 2.14 insn per cycle - 1.892908998 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) +TOTAL : 1.675462 sec + 4,943,209,548 cycles # 2.943 GHz + 10,569,508,421 instructions # 2.14 insn per cycle + 1.680743869 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.817386e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.022798e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.022798e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.115270e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.354259e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.354259e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.850472 sec - 5,706,017,461 cycles # 1.999 GHz - 8,290,142,366 instructions # 1.45 insn per cycle - 2.855563486 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) +TOTAL : 2.651242 sec + 5,408,346,690 cycles # 2.037 GHz + 7,804,849,221 instructions # 1.44 insn per cycle + 2.656556634 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 1b9c9ee7df..b5905b62c5 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:09:35 +DATE: 2023-10-28_13:00:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,14 +45,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.145651e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.173920e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273175e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.097525e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.157814e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.266299e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.691895 sec - 2,781,064,477 cycles # 3.018 GHz - 4,402,279,507 instructions # 1.58 insn per cycle - 0.980225226 seconds time elapsed +TOTAL : 0.694752 sec + 2,788,859,822 cycles # 3.033 GHz + 4,381,365,981 instructions # 1.57 insn per cycle + 0.979054926 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -71,15 +71,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.929230e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.978722e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.978722e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.191412e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.257639e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.257639e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.538774 sec - 17,159,821,844 cycles # 3.096 GHz - 45,385,185,100 instructions # 2.64 insn per cycle - 5.543842374 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.890546 sec + 15,147,332,557 cycles # 3.095 GHz + 38,436,064,384 instructions # 2.54 insn per cycle + 4.896190934 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -87,8 +87,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 +Avg ME (F77/C++) = 2.0288063388515645 +Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= @@ -97,15 +97,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.287540e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.449504e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.449504e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.714741e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.919663e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.919663e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.292731 sec - 10,016,590,673 cycles # 3.038 GHz - 27,771,485,458 instructions # 2.77 insn per cycle - 3.297740861 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2543) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.927374 sec + 9,107,310,115 cycles # 3.106 GHz + 24,591,187,308 instructions # 2.70 insn per cycle + 2.932855113 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -123,15 +123,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.325678e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.741826e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.741826e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.796721e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.291663e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.291663e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.072513 sec - 6,058,236,283 cycles # 2.917 GHz - 12,507,380,733 instructions # 2.06 insn per cycle - 2.077678265 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 0) (512z: 0) +TOTAL : 1.913169 sec + 5,482,543,140 cycles # 2.860 GHz + 11,267,157,705 instructions # 2.06 insn per cycle + 1.918674011 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -149,15 +149,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.871793e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.376853e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.376853e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.504015e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.124956e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.124956e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.888593 sec - 5,531,298,122 cycles # 2.922 GHz - 11,883,369,769 instructions # 2.15 insn per cycle - 1.893708699 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2414) (512y: 144) (512z: 0) +TOTAL : 1.714664 sec + 4,954,880,462 cycles # 2.882 GHz + 10,571,641,627 instructions # 2.13 insn per cycle + 1.720307718 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -175,15 +175,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.825270e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.031884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.031884e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.092262e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.330615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.330615e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.844855 sec - 5,686,333,748 cycles # 1.997 GHz - 8,290,317,138 instructions # 1.46 insn per cycle - 2.849943056 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1451) (512y: 122) (512z: 1797) +TOTAL : 2.665319 sec + 5,400,661,219 cycles # 2.023 GHz + 7,804,738,015 instructions # 1.45 insn per cycle + 2.670742295 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 11300e6895..4f7888d47a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:34:57 +DATE: 2023-10-28_12:10:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.181607e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.171006e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.264343e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.010123e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.132541e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.266884e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.507998 sec - 2,247,373,928 cycles # 3.001 GHz - 3,245,400,581 instructions # 1.44 insn per cycle - 0.806174140 seconds time elapsed +TOTAL : 0.526405 sec + 2,294,951,715 cycles # 3.016 GHz + 3,252,932,201 instructions # 1.42 insn per cycle + 0.829193545 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,15 +69,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.976266e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.029293e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.029293e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.226915e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.292728e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.292728e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.409225 sec - 16,681,628,869 cycles # 3.082 GHz - 44,378,235,380 instructions # 2.66 insn per cycle - 5.414507921 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 576) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.811357 sec + 15,021,105,693 cycles # 3.119 GHz + 40,162,926,531 instructions # 2.67 insn per cycle + 4.819238763 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.514456e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.696441e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.696441e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.915843e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.141959e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.141959e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.086332 sec - 9,510,708,832 cycles # 3.078 GHz - 26,620,808,250 instructions # 2.80 insn per cycle - 3.091424217 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2339) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.780793 sec + 8,672,891,508 cycles # 3.113 GHz + 23,683,922,536 instructions # 2.73 insn per cycle + 2.797395099 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2069) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.735122e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.074181e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.074181e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.337317e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.751342e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.751342e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.319139 sec - 6,584,764,475 cycles # 2.834 GHz - 14,057,249,658 instructions # 2.13 insn per cycle - 2.324235278 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2753) (512y: 0) (512z: 0) +TOTAL : 2.067044 sec + 6,107,061,861 cycles # 2.946 GHz + 13,074,919,528 instructions # 2.14 insn per cycle + 2.081145351 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.117258e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.494373e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.494373e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.603808e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.060730e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.060730e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.150419 sec - 6,273,792,377 cycles # 2.911 GHz - 13,574,431,184 instructions # 2.16 insn per cycle - 2.155695099 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2405) (512y: 296) (512z: 0) +TOTAL : 1.974059 sec + 5,798,776,070 cycles # 2.929 GHz + 12,332,933,370 instructions # 2.13 insn per cycle + 1.986801678 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 294) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.685746e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.878776e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.878776e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.791064e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.992335e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.992335e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.947596 sec - 5,924,739,734 cycles # 2.007 GHz - 10,074,038,054 instructions # 1.70 insn per cycle - 2.952990655 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1324) (512y: 208) (512z: 1980) +TOTAL : 2.869809 sec + 5,830,824,222 cycles # 2.028 GHz + 9,613,378,795 instructions # 1.65 insn per cycle + 2.884107270 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1510) (512y: 209) (512z: 1971) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 72e4f7ff9f..84262108e6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:53:45 +DATE: 2023-10-28_12:44:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.139570e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.179113e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.275835e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.585283e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.160783e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274213e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.507615 sec - 2,255,394,284 cycles # 3.018 GHz - 3,233,142,981 instructions # 1.43 insn per cycle - 0.804645133 seconds time elapsed +TOTAL : 0.522037 sec + 2,225,847,965 cycles # 2.955 GHz + 3,088,745,897 instructions # 1.39 insn per cycle + 0.812232117 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.504791e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.591296e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.591296e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.548055e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.635262e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.635262e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.290630 sec - 13,015,421,868 cycles # 3.031 GHz - 34,406,707,609 instructions # 2.64 insn per cycle - 4.295722046 seconds time elapsed +TOTAL : 4.218746 sec + 13,020,267,103 cycles # 3.083 GHz + 34,406,039,454 instructions # 2.64 insn per cycle + 4.224411668 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.162383e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.308946e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.308946e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.165854e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.312469e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.312469e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.418889 sec - 10,608,875,216 cycles # 3.099 GHz - 24,023,081,327 instructions # 2.26 insn per cycle - 3.424022271 seconds time elapsed +TOTAL : 3.416319 sec + 10,608,312,227 cycles # 3.101 GHz + 24,022,929,053 instructions # 2.26 insn per cycle + 3.422023981 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.707866e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.031974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.031974e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.873705e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.217277e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.217277e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.329926 sec - 6,692,684,984 cycles # 2.867 GHz - 12,415,083,748 instructions # 1.86 insn per cycle - 2.335033574 seconds time elapsed +TOTAL : 2.254903 sec + 6,588,417,890 cycles # 2.915 GHz + 12,414,319,509 instructions # 1.88 insn per cycle + 2.260593676 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3156) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.126119e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.508462e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.508462e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.199714e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.586430e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.586430e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.148404 sec - 6,244,000,039 cycles # 2.900 GHz - 11,586,646,765 instructions # 1.86 insn per cycle - 2.153653502 seconds time elapsed +TOTAL : 2.119206 sec + 6,252,999,725 cycles # 2.944 GHz + 11,587,996,172 instructions # 1.85 insn per cycle + 2.124972005 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2692) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.121660e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.365055e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.365055e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.167152e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.411218e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.411218e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.647363 sec - 5,336,886,997 cycles # 2.013 GHz - 9,309,895,095 instructions # 1.74 insn per cycle - 2.652547834 seconds time elapsed +TOTAL : 2.620092 sec + 5,340,755,470 cycles # 2.035 GHz + 9,308,726,759 instructions # 1.74 insn per cycle + 2.625879222 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2116) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index b8d2933568..1da4bf76bb 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:54:12 +DATE: 2023-10-28_12:45:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.136826e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.174839e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270632e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.574793e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.154292e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.268145e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.507009 sec - 2,214,441,092 cycles # 3.000 GHz - 3,174,554,342 instructions # 1.43 insn per cycle - 0.795728101 seconds time elapsed +TOTAL : 0.517329 sec + 2,263,667,377 cycles # 3.020 GHz + 3,227,214,158 instructions # 1.43 insn per cycle + 0.806491983 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.647788e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.745237e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.745237e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.666799e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.765140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.765140e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.064620 sec - 12,384,265,473 cycles # 3.044 GHz - 35,059,405,316 instructions # 2.83 insn per cycle - 4.069669316 seconds time elapsed +TOTAL : 4.037237 sec + 12,381,626,680 cycles # 3.064 GHz + 35,059,471,093 instructions # 2.83 insn per cycle + 4.042830136 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.138033e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.282936e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.282936e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.075733e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.213422e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.213422e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.444702 sec - 10,688,625,544 cycles # 3.099 GHz - 23,099,820,217 instructions # 2.16 insn per cycle - 3.449755846 seconds time elapsed +TOTAL : 3.513177 sec + 10,690,616,056 cycles # 3.039 GHz + 23,099,681,886 instructions # 2.16 insn per cycle + 3.518988024 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.237103e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.643499e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.643499e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.247097e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.651052e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.651052e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.105153 sec - 6,160,181,225 cycles # 2.920 GHz - 11,969,984,936 instructions # 1.94 insn per cycle - 2.110284671 seconds time elapsed +TOTAL : 2.101774 sec + 6,163,674,317 cycles # 2.926 GHz + 11,969,549,964 instructions # 1.94 insn per cycle + 2.107518313 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2511) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.378177e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.801357e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.801357e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.378968e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.795139e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.795139e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.052827 sec - 6,017,899,320 cycles # 2.925 GHz - 11,142,057,093 instructions # 1.85 insn per cycle - 2.058039153 seconds time elapsed +TOTAL : 2.051593 sec + 6,022,120,306 cycles # 2.928 GHz + 11,142,964,439 instructions # 1.85 insn per cycle + 2.057280159 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 174) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.233582e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.488303e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.488303e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.268812e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.526768e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.526768e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.581242 sec - 5,224,244,555 cycles # 2.021 GHz - 9,033,433,625 instructions # 1.73 insn per cycle - 2.586440370 seconds time elapsed +TOTAL : 2.559507 sec + 5,222,984,898 cycles # 2.037 GHz + 9,033,113,449 instructions # 1.73 insn per cycle + 2.565156483 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 24b477c6c2..443766ae47 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:35:25 +DATE: 2023-10-28_12:10:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.085170e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.712610e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.977210e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.245334e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.581083e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.955429e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.469467 sec - 2,068,301,293 cycles # 3.004 GHz - 3,012,364,622 instructions # 1.46 insn per cycle - 0.747476379 seconds time elapsed +TOTAL : 0.478876 sec + 2,114,450,363 cycles # 3.001 GHz + 2,995,809,201 instructions # 1.42 insn per cycle + 0.774010491 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,15 +69,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.956122e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.010570e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.010570e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.347045e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.423439e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.423439e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.445413 sec - 16,510,915,611 cycles # 3.030 GHz - 45,308,404,518 instructions # 2.74 insn per cycle - 5.450456954 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.551732 sec + 14,163,211,343 cycles # 3.109 GHz + 38,393,750,672 instructions # 2.71 insn per cycle + 4.559626496 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -85,8 +85,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198692724109 -Relative difference = 6.443528218283898e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= @@ -95,15 +95,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.773825e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.129881e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.129881e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.258787e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.694855e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.694855e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.278501 sec - 7,055,633,229 cycles # 3.091 GHz - 17,671,724,757 instructions # 2.50 insn per cycle - 2.283347357 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.077544 sec + 6,482,134,792 cycles # 3.112 GHz + 15,829,813,984 instructions # 2.44 insn per cycle + 2.092266384 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -111,8 +111,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= @@ -121,15 +121,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.823101e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.001328e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.001328e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.088230e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.043336e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.043336e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.269969 sec - 3,728,138,097 cycles # 2.926 GHz - 8,250,735,018 instructions # 2.21 insn per cycle - 1.274926428 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) +TOTAL : 1.239442 sec + 3,486,432,426 cycles # 2.809 GHz + 7,609,890,231 instructions # 2.18 insn per cycle + 1.254380313 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -137,8 +137,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= @@ -147,15 +147,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.356043e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.069996e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.069996e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.032911e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.202298e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.202298e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.201889 sec - 3,525,312,934 cycles # 2.923 GHz - 7,861,079,341 instructions # 2.23 insn per cycle - 1.206782783 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) +TOTAL : 1.098127 sec + 3,254,731,502 cycles # 2.949 GHz + 7,215,829,075 instructions # 2.22 insn per cycle + 1.112790866 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -163,8 +163,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= @@ -173,15 +173,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.081368e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.847086e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.847086e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.290959e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.095325e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.095325e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.564981 sec - 3,252,144,204 cycles # 2.073 GHz - 6,095,772,749 instructions # 1.87 insn per cycle - 1.569858235 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) +TOTAL : 1.524399 sec + 3,070,579,488 cycles # 2.007 GHz + 5,846,226,621 instructions # 1.90 insn per cycle + 1.538728141 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -189,8 +189,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index c228b2c37b..0d00880b5b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:03:47 +DATE: 2023-10-28_12:55:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.513457e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.340252e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.340252e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.271504e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.799925e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.799925e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.650789 sec - 2,641,546,515 cycles # 3.016 GHz - 4,117,903,371 instructions # 1.56 insn per cycle - 0.935043223 seconds time elapsed +TOTAL : 0.661589 sec + 2,684,049,402 cycles # 3.019 GHz + 4,132,808,227 instructions # 1.54 insn per cycle + 0.945716932 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,15 +78,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.972248e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.026239e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.026239e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.328083e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.403078e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.403078e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.440996 sec - 16,702,681,594 cycles # 3.067 GHz - 45,351,045,297 instructions # 2.72 insn per cycle - 5.446683603 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.645191 sec + 14,343,709,354 cycles # 3.094 GHz + 38,435,752,660 instructions # 2.68 insn per cycle + 4.651412931 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -94,8 +94,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198692724109 -Relative difference = 6.443528218283898e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= @@ -105,15 +105,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.605636e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.935791e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.935791e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.177869e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.599623e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.599623e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.405458 sec - 7,261,686,615 cycles # 3.014 GHz - 17,953,553,750 instructions # 2.47 insn per cycle - 2.411441099 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.154265 sec + 6,685,752,776 cycles # 3.098 GHz + 16,110,356,106 instructions # 2.41 insn per cycle + 2.160882399 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -121,8 +121,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= @@ -132,15 +132,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.560330e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.721128e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.721128e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.429674e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.082873e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082873e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.356872 sec - 3,928,188,681 cycles # 2.884 GHz - 8,488,830,304 instructions # 2.16 insn per cycle - 1.362856063 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) +TOTAL : 1.240483 sec + 3,678,582,112 cycles # 2.952 GHz + 7,844,440,175 instructions # 2.13 insn per cycle + 1.247230152 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -148,8 +148,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= @@ -159,15 +159,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.116761e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.040482e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.040482e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.009134e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.170165e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.170165e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.278534 sec - 3,740,578,395 cycles # 2.919 GHz - 8,100,523,605 instructions # 2.17 insn per cycle - 1.284258782 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) +TOTAL : 1.166839 sec + 3,458,050,283 cycles # 2.949 GHz + 7,453,516,502 instructions # 2.16 insn per cycle + 1.173343420 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -175,8 +175,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= @@ -186,15 +186,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.953315e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.671270e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.671270e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.550922e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.388839e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.388839e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.636801 sec - 3,469,634,780 cycles # 2.114 GHz - 6,351,136,410 instructions # 1.83 insn per cycle - 1.642694122 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) +TOTAL : 1.518265 sec + 3,271,259,339 cycles # 2.147 GHz + 6,100,060,320 instructions # 1.86 insn per cycle + 1.524663767 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -202,8 +202,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 5dc74dfed7..8e628a3dfb 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:16:08 +DATE: 2023-10-28_13:07:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.063632e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.693440e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.968571e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.850693e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.663079e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.968425e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.553622 sec - 2,317,649,288 cycles # 3.011 GHz - 3,439,584,300 instructions # 1.48 insn per cycle - 0.828678237 seconds time elapsed +TOTAL : 0.554614 sec + 2,337,932,283 cycles # 3.030 GHz + 3,419,248,560 instructions # 1.46 insn per cycle + 0.830772253 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,15 +69,15 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.989342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.044248e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.044248e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.359790e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.436644e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.436644e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 5.407392 sec - 16,682,462,365 cycles # 3.083 GHz - 45,337,082,640 instructions # 2.72 insn per cycle - 5.412277054 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.578859 sec + 14,314,881,164 cycles # 3.123 GHz + 38,421,819,073 instructions # 2.68 insn per cycle + 4.583864809 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -85,8 +85,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198692724109 -Relative difference = 6.443528218283898e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= @@ -95,15 +95,15 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.799956e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.158589e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.158589e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.282449e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.720847e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.720847e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.320759 sec - 7,223,788,046 cycles # 3.108 GHz - 17,685,035,831 instructions # 2.45 insn per cycle - 2.325560432 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.119646 sec + 6,638,203,667 cycles # 3.125 GHz + 15,842,045,343 instructions # 2.39 insn per cycle + 2.124745744 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -111,8 +111,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= @@ -121,15 +121,15 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.822474e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.004978e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.004978e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.312507e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.067826e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.067826e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.324170 sec - 3,914,841,287 cycles # 2.948 GHz - 8,235,477,108 instructions # 2.10 insn per cycle - 1.328966517 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) +TOTAL : 1.263628 sec + 3,637,578,133 cycles # 2.869 GHz + 7,591,123,457 instructions # 2.09 insn per cycle + 1.268701108 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -137,8 +137,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= @@ -147,15 +147,15 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.391242e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.078618e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.078618e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.030288e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.199567e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.199567e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.253021 sec - 3,707,252,167 cycles # 2.949 GHz - 7,811,825,096 instructions # 2.11 insn per cycle - 1.257855017 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) +TOTAL : 1.154743 sec + 3,427,351,661 cycles # 2.957 GHz + 7,165,042,622 instructions # 2.09 insn per cycle + 1.159809196 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -163,8 +163,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= @@ -173,15 +173,15 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.106245e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.850378e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.850378e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.696445e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.576864e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.576864e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.611554 sec - 3,420,500,042 cycles # 2.117 GHz - 6,046,541,541 instructions # 1.77 insn per cycle - 1.616370057 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) +TOTAL : 1.499358 sec + 3,225,858,049 cycles # 2.146 GHz + 5,796,456,708 instructions # 1.80 insn per cycle + 1.504415012 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -189,8 +189,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index 7b90f03855..3bd9388dc5 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:13:04 +DATE: 2023-10-28_13:04:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.065322e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.700283e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.974533e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.731022e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.640015e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.941204e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.498915 sec - 2,192,440,414 cycles # 3.004 GHz - 3,416,060,899 instructions # 1.56 insn per cycle - 0.787716102 seconds time elapsed +TOTAL : 0.502219 sec + 2,186,499,046 cycles # 3.031 GHz + 3,425,706,749 instructions # 1.57 insn per cycle + 0.778799067 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,15 +69,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.966466e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.020648e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.020648e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.336409e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.412455e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.412455e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.415648 sec - 16,529,341,853 cycles # 3.050 GHz - 45,309,866,535 instructions # 2.74 insn per cycle - 5.420402206 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.570402 sec + 14,291,029,733 cycles # 3.124 GHz + 38,392,351,818 instructions # 2.69 insn per cycle + 4.575464355 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -85,8 +85,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198692724109 -Relative difference = 6.443528218283898e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= @@ -95,15 +95,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.693978e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.035776e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.035776e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.271732e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.704955e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.704955e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.316382 sec - 7,054,105,370 cycles # 3.040 GHz - 17,671,721,016 instructions # 2.51 insn per cycle - 2.321167806 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.072263 sec + 6,475,536,164 cycles # 3.118 GHz + 15,829,377,005 instructions # 2.44 insn per cycle + 2.077475049 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -111,8 +111,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= @@ -121,15 +121,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.830241e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.005280e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005280e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.640298e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.109649e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.109649e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.269396 sec - 3,738,194,435 cycles # 2.936 GHz - 8,251,074,147 instructions # 2.21 insn per cycle - 1.274249735 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) +TOTAL : 1.169660 sec + 3,471,993,263 cycles # 2.958 GHz + 7,606,901,908 instructions # 2.19 insn per cycle + 1.174764224 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -137,8 +137,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= @@ -147,15 +147,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.392639e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.077705e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.077705e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.030442e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.204624e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204624e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.198333 sec - 3,534,628,897 cycles # 2.940 GHz - 7,862,127,936 instructions # 2.22 insn per cycle - 1.203142647 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) +TOTAL : 1.100002 sec + 3,266,709,570 cycles # 2.958 GHz + 7,214,743,597 instructions # 2.21 insn per cycle + 1.105174715 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -163,8 +163,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= @@ -173,15 +173,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.097193e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.836994e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.836994e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.680219e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.568964e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.568964e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.558825 sec - 3,254,164,123 cycles # 2.082 GHz - 6,095,387,295 instructions # 1.87 insn per cycle - 1.563579525 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) +TOTAL : 1.447359 sec + 3,071,044,719 cycles # 2.116 GHz + 5,845,849,973 instructions # 1.90 insn per cycle + 1.452398624 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -189,8 +189,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index fdd315eb16..d0ec1a38b7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_19:10:03 +DATE: 2023-10-28_13:01:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,14 +45,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.170150e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.660447e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.933538e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.738979e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.639875e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.941887e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.602645 sec - 2,479,148,123 cycles # 2.998 GHz - 3,864,785,421 instructions # 1.56 insn per cycle - 0.885563297 seconds time elapsed +TOTAL : 0.608287 sec + 2,467,062,087 cycles # 2.979 GHz + 3,757,858,393 instructions # 1.52 insn per cycle + 0.885651916 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -71,15 +71,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.957679e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.011741e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.011741e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.349040e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.426585e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.426585e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.439322 sec - 16,519,636,229 cycles # 3.035 GHz - 45,307,914,586 instructions # 2.74 insn per cycle - 5.444150127 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.547341 sec + 14,160,240,733 cycles # 3.112 GHz + 38,393,288,556 instructions # 2.71 insn per cycle + 4.552620696 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -87,8 +87,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198692724109 -Relative difference = 6.443528218283898e-08 +Avg ME (F77/C++) = 2.0288199022179469 +Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= @@ -97,15 +97,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.773026e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.136685e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.136685e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.243740e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.688643e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.688643e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.280272 sec - 7,068,660,475 cycles # 3.094 GHz - 17,671,452,966 instructions # 2.50 insn per cycle - 2.285202578 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.083146 sec + 6,486,996,800 cycles # 3.107 GHz + 15,829,233,655 instructions # 2.44 insn per cycle + 2.088258908 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -113,8 +113,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= @@ -123,15 +123,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.805997e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.992625e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.992625e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.559475e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.100525e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100525e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.272643 sec - 3,731,426,488 cycles # 2.926 GHz - 8,249,195,685 instructions # 2.21 insn per cycle - 1.277241448 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3361) (512y: 0) (512z: 0) +TOTAL : 1.179727 sec + 3,480,472,202 cycles # 2.939 GHz + 7,606,629,154 instructions # 2.19 insn per cycle + 1.185041458 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -139,8 +139,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= @@ -149,15 +149,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.399444e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.076890e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.076890e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.029601e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.198465e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.198465e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.197731 sec - 3,530,731,182 cycles # 2.938 GHz - 7,860,812,005 instructions # 2.23 insn per cycle - 1.202556944 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3193) (512y: 20) (512z: 0) +TOTAL : 1.101379 sec + 3,263,097,653 cycles # 2.951 GHz + 7,215,542,606 instructions # 2.21 insn per cycle + 1.106656369 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -165,8 +165,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288181684445590 +Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= @@ -175,15 +175,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.091593e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.835046e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.835046e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.313836e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.156037e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.156037e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.560736 sec - 3,257,981,590 cycles # 2.083 GHz - 6,095,878,647 instructions # 1.87 insn per cycle - 1.565536774 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2377) (512y: 24) (512z: 2156) +TOTAL : 1.519219 sec + 3,074,840,033 cycles # 2.018 GHz + 5,845,496,594 instructions # 1.90 insn per cycle + 1.524648411 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -191,8 +191,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 +Avg ME (F77/C++) = 2.0288183349184692 +Relative difference = 1.6508058850146622e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 663a41142c..34bf784874 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:35:49 +DATE: 2023-10-28_12:10:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.096390e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.766631e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.047368e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.265049e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.609501e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.998796e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.467013 sec - 2,056,734,142 cycles # 2.997 GHz - 2,992,962,147 instructions # 1.46 insn per cycle - 0.744434014 seconds time elapsed +TOTAL : 0.480347 sec + 2,111,279,511 cycles # 2.994 GHz + 3,024,090,036 instructions # 1.43 insn per cycle + 0.784163260 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,15 +69,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.031007e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.087778e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.087778e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.303169e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.377069e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.377069e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.245108 sec - 16,244,805,475 cycles # 3.095 GHz - 44,484,348,190 instructions # 2.74 insn per cycle - 5.249986656 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 576) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.636900 sec + 14,424,355,368 cycles # 3.108 GHz + 39,885,077,384 instructions # 2.77 insn per cycle + 4.644496566 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -85,8 +85,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198692724109 -Relative difference = 6.443528218283898e-08 +Avg ME (F77/C++) = 2.0288199028000236 +Relative difference = 4.790961076489297e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= @@ -95,15 +95,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.358927e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.815806e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.815806e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.110121e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.708994e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.708994e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.039403 sec - 6,083,169,654 cycles # 2.982 GHz - 16,972,342,736 instructions # 2.79 insn per cycle - 2.044363213 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2881) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.799612 sec + 5,599,106,784 cycles # 3.102 GHz + 15,299,546,445 instructions # 2.73 insn per cycle + 1.816097337 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -111,8 +111,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +Avg ME (F77/C++) = 2.0288193548331037 +Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= @@ -121,15 +121,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.400912e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.009313e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.009313e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.842904e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.546331e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.546331e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.720669 sec - 5,008,260,515 cycles # 2.904 GHz - 10,214,809,232 instructions # 2.04 insn per cycle - 1.725527481 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3904) (512y: 0) (512z: 0) +TOTAL : 1.615345 sec + 4,748,131,788 cycles # 2.929 GHz + 9,747,809,707 instructions # 2.05 insn per cycle + 1.630160731 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3710) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -137,8 +137,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288182108197361 +Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= @@ -147,15 +147,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.537295e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.168989e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.168989e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.000621e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.743607e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.743607e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.684974 sec - 4,937,248,513 cycles # 2.923 GHz - 9,938,060,774 instructions # 2.01 insn per cycle - 1.689989340 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3789) (512y: 2) (512z: 0) +TOTAL : 1.580656 sec + 4,645,306,717 cycles # 2.929 GHz + 9,339,785,375 instructions # 2.01 insn per cycle + 1.597293322 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -163,8 +163,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +Avg ME (F77/C++) = 2.0288182108197361 +Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= @@ -173,15 +173,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.077645e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.456866e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.456866e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.218809e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.778035e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.778035e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.147132 sec - 4,363,010,014 cycles # 2.028 GHz - 8,442,845,303 instructions # 1.94 insn per cycle - 2.152072523 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2882) (512y: 4) (512z: 2751) +TOTAL : 1.768992 sec + 3,667,384,840 cycles # 2.067 GHz + 7,045,660,600 instructions # 1.92 insn per cycle + 1.780720848 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2606) (512y: 12) (512z: 2221) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -189,8 +189,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 +Avg ME (F77/C++) = 2.0288183459779248 +Relative difference = 1.7053177021099307e-07 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index d0aa02b37a..d1f9820c60 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:54:38 +DATE: 2023-10-28_12:45:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.072434e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.686668e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.952500e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.395245e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.633232e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.952696e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.467510 sec - 2,069,160,049 cycles # 3.005 GHz - 2,965,842,897 instructions # 1.43 insn per cycle - 0.745986723 seconds time elapsed +TOTAL : 0.475068 sec + 2,125,544,542 cycles # 3.019 GHz + 3,039,422,682 instructions # 1.43 insn per cycle + 0.763100625 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.486168e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.574733e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.574733e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.635104e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.731129e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.731129e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.300765 sec - 12,613,573,541 cycles # 2.930 GHz - 34,394,223,521 instructions # 2.73 insn per cycle - 4.305708849 seconds time elapsed +TOTAL : 4.063498 sec + 12,606,497,091 cycles # 3.099 GHz + 34,393,343,234 instructions # 2.73 insn per cycle + 4.068923017 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.423404e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.902037e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.902037e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.421440e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.912100e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.912100e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.018730 sec - 6,098,231,874 cycles # 3.015 GHz - 14,875,099,697 instructions # 2.44 insn per cycle - 2.023701584 seconds time elapsed +TOTAL : 2.020800 sec + 6,096,062,133 cycles # 3.010 GHz + 14,873,944,377 instructions # 2.44 insn per cycle + 2.026273725 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.288210e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.081005e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.081005e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.606550e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.476550e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.476550e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.521473 sec - 4,271,996,539 cycles # 2.800 GHz - 9,042,309,170 instructions # 2.12 insn per cycle - 1.526427437 seconds time elapsed +TOTAL : 1.461499 sec + 4,282,881,759 cycles # 2.921 GHz + 9,041,483,422 instructions # 2.11 insn per cycle + 1.466874367 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4445) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.762492e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.667202e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.667202e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.796123e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.704814e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.704814e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.433726 sec - 4,213,011,776 cycles # 2.930 GHz - 8,676,320,241 instructions # 2.06 insn per cycle - 1.438715591 seconds time elapsed +TOTAL : 1.427681 sec + 4,207,358,021 cycles # 2.938 GHz + 8,675,881,802 instructions # 2.06 insn per cycle + 1.433026005 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.878645e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.382089e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.382089e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.812928e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.296844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.296844e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.866240 sec - 3,836,736,420 cycles # 2.052 GHz - 7,820,066,058 instructions # 2.04 insn per cycle - 1.871114134 seconds time elapsed +TOTAL : 1.886484 sec + 3,840,034,259 cycles # 2.031 GHz + 7,819,762,172 instructions # 2.04 insn per cycle + 1.892012346 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 0) (512z: 2556) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index de54279b1b..6d76428629 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:55:01 +DATE: 2023-10-28_12:46:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.082452e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.759103e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.038632e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.439177e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.664854e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.005028e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.468671 sec - 2,051,049,137 cycles # 2.975 GHz - 2,933,032,180 instructions # 1.43 insn per cycle - 0.745977930 seconds time elapsed +TOTAL : 0.474639 sec + 2,123,500,718 cycles # 3.023 GHz + 3,021,126,916 instructions # 1.42 insn per cycle + 0.761473128 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.728308e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.832886e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.832886e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.805862e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.915686e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.915686e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 3.926359 sec - 11,754,711,596 cycles # 2.991 GHz - 35,130,335,361 instructions # 2.99 insn per cycle - 3.931207276 seconds time elapsed +TOTAL : 3.820220 sec + 11,763,429,045 cycles # 3.076 GHz + 35,129,372,862 instructions # 2.99 insn per cycle + 3.825554200 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.711585e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.225013e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.225013e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.732850e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.252188e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.252188e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.918624 sec - 5,946,615,728 cycles # 3.093 GHz - 14,483,958,293 instructions # 2.44 insn per cycle - 1.923457186 seconds time elapsed +TOTAL : 1.912996 sec + 5,953,621,834 cycles # 3.105 GHz + 14,483,605,781 instructions # 2.43 insn per cycle + 1.918402844 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.855810e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.786782e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.786782e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.835313e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.791994e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.791994e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.419134 sec - 4,148,821,411 cycles # 2.915 GHz - 8,888,021,481 instructions # 2.14 insn per cycle - 1.424042048 seconds time elapsed +TOTAL : 1.419591 sec + 4,168,513,241 cycles # 2.927 GHz + 8,887,420,483 instructions # 2.13 insn per cycle + 1.425011758 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3576) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.911621e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.860580e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.860580e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.943761e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.903919e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.903919e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.406850 sec - 4,137,327,451 cycles # 2.932 GHz - 8,424,234,551 instructions # 2.04 insn per cycle - 1.411791633 seconds time elapsed +TOTAL : 1.402150 sec + 4,141,058,901 cycles # 2.943 GHz + 8,423,656,753 instructions # 2.03 insn per cycle + 1.407495281 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3320) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.947023e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.462476e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.462476e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.010939e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.537087e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.537087e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.845349 sec - 3,784,294,920 cycles # 2.046 GHz - 7,713,085,184 instructions # 2.04 insn per cycle - 1.850240418 seconds time elapsed +TOTAL : 1.827232 sec + 3,785,415,923 cycles # 2.066 GHz + 7,712,477,428 instructions # 2.04 insn per cycle + 1.832679832 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3436) (512y: 0) (512z: 2108) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index e5b5571dad..2f0874ecfc 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:36:14 +DATE: 2023-10-28_12:11:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.194168e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.177440e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.271526e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.015001e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.133918e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.271091e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.510074 sec - 2,249,628,773 cycles # 3.005 GHz - 3,213,054,699 instructions # 1.43 insn per cycle - 0.807578099 seconds time elapsed +TOTAL : 0.542650 sec + 2,324,509,629 cycles # 2.959 GHz + 3,256,545,431 instructions # 1.40 insn per cycle + 0.857692926 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,15 +69,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.906485e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.954717e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.954717e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.184366e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.248000e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.248000e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.603351 sec - 17,321,977,891 cycles # 3.090 GHz - 45,555,371,368 instructions # 2.63 insn per cycle - 5.608662187 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 624) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.904088 sec + 15,277,347,047 cycles # 3.112 GHz + 38,637,646,747 instructions # 2.53 insn per cycle + 4.912776955 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -95,15 +95,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.376355e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.544078e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.544078e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.683087e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.882735e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.882735e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.208637 sec - 9,923,474,484 cycles # 3.089 GHz - 27,529,097,588 instructions # 2.77 insn per cycle - 3.213810203 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2591) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.950252 sec + 8,963,470,963 cycles # 3.033 GHz + 24,239,446,713 instructions # 2.70 insn per cycle + 2.964919841 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.199043e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.595306e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.595306e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.870970e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.393157e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.393157e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.118667 sec - 5,989,500,217 cycles # 2.821 GHz - 12,420,938,473 instructions # 2.07 insn per cycle - 2.123732572 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2753) (512y: 0) (512z: 0) +TOTAL : 1.890520 sec + 5,524,353,941 cycles # 2.913 GHz + 11,287,940,554 instructions # 2.04 insn per cycle + 1.907409042 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.938752e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.452534e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.452534e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.785531e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.472682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.472682e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.867638 sec - 5,462,355,916 cycles # 2.919 GHz - 11,803,822,809 instructions # 2.16 insn per cycle - 1.872842798 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2503) (512y: 146) (512z: 0) +TOTAL : 1.648735 sec + 4,852,342,037 cycles # 2.933 GHz + 10,537,728,595 instructions # 2.17 insn per cycle + 1.665451136 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.872672e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.090666e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.090666e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.233249e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.487319e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.487319e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.812736 sec - 5,598,441,704 cycles # 1.988 GHz - 8,083,507,451 instructions # 1.44 insn per cycle - 2.817822099 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1660) (512y: 126) (512z: 1854) +TOTAL : 2.582104 sec + 5,221,633,566 cycles # 2.018 GHz + 7,614,459,247 instructions # 1.46 insn per cycle + 2.598287782 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index be7fa646e4..11aa7f6ec6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-25_18:36:41 +DATE: 2023-10-28_12:11:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.207178e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.183934e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.279144e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.022201e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.140606e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.278579e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.508835 sec - 2,247,853,776 cycles # 3.008 GHz - 3,199,999,470 instructions # 1.42 insn per cycle - 0.806333964 seconds time elapsed +TOTAL : 0.525470 sec + 2,284,667,887 cycles # 3.001 GHz + 3,270,255,248 instructions # 1.43 insn per cycle + 0.827459749 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,15 +69,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.962576e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.013722e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.013722e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.135764e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.196535e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.196535e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.445467 sec - 16,864,922,350 cycles # 3.095 GHz - 44,544,928,625 instructions # 2.64 insn per cycle - 5.450679101 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.013317 sec + 15,396,741,584 cycles # 3.072 GHz + 40,435,149,300 instructions # 2.63 insn per cycle + 5.021167793 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -95,15 +95,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.463332e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.638084e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.638084e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.949201e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.190131e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.190131e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.129878 sec - 9,472,664,981 cycles # 3.022 GHz - 26,172,690,479 instructions # 2.76 insn per cycle - 3.134859663 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2397) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.758906 sec + 8,507,837,136 cycles # 3.077 GHz + 23,269,627,166 instructions # 2.74 insn per cycle + 2.773638911 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -121,15 +121,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.769147e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.097427e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.097427e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.906071e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.272590e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.272590e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.300686 sec - 6,708,376,683 cycles # 2.910 GHz - 13,967,973,168 instructions # 2.08 insn per cycle - 2.306085049 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2875) (512y: 0) (512z: 0) +TOTAL : 2.242681 sec + 6,250,535,355 cycles # 2.780 GHz + 12,974,014,205 instructions # 2.08 insn per cycle + 2.256899417 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -147,15 +147,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.897600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.248967e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.248967e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.416534e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.841079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.841079e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.245188 sec - 6,373,380,149 cycles # 2.833 GHz - 13,408,335,115 instructions # 2.10 insn per cycle - 2.250462198 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2517) (512y: 302) (512z: 0) +TOTAL : 2.038525 sec + 5,936,264,395 cycles # 2.904 GHz + 12,249,816,592 instructions # 2.06 insn per cycle + 2.053886656 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -173,15 +173,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.921480e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.136739e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.136739e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.902385e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.117912e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.117912e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.776161 sec - 5,570,521,591 cycles # 2.004 GHz - 9,179,596,120 instructions # 1.65 insn per cycle - 2.781332851 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1439) (512y: 212) (512z: 2053) +TOTAL : 2.791286 sec + 5,622,605,195 cycles # 2.011 GHz + 8,753,644,763 instructions # 1.56 insn per cycle + 2.807743774 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index ebc965cc92..ac3a60d645 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:37:09 +DATE: 2023-10-28_12:12:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.017326e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.054319e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.066811e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.491851e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.048489e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.065167e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.454634 sec - 1,998,901,545 cycles # 2.995 GHz - 2,904,650,993 instructions # 1.45 insn per cycle - 0.724253457 seconds time elapsed +TOTAL : 0.464403 sec + 2,055,005,532 cycles # 3.003 GHz + 2,945,523,651 instructions # 1.43 insn per cycle + 0.763693915 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.124031e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.322289e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.333522e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.041676e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.319252e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.336391e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.587604 sec - 2,471,633,267 cycles # 3.015 GHz - 3,730,138,752 instructions # 1.51 insn per cycle - 0.878825359 seconds time elapsed +TOTAL : 0.631721 sec + 2,560,791,285 cycles # 2.938 GHz + 3,869,794,857 instructions # 1.51 insn per cycle + 0.932528663 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.582748e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.595260e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.595260e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.590737e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.603375e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.603375e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.365564 sec - 19,741,191,972 cycles # 3.100 GHz - 58,964,992,174 instructions # 2.99 insn per cycle - 6.369485962 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1189) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.346844 sec + 19,691,623,987 cycles # 3.101 GHz + 59,609,931,902 instructions # 3.03 insn per cycle + 6.353105445 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.832276e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.875225e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.875225e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.850627e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.895082e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.895082e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.412509 sec - 10,571,839,132 cycles # 3.095 GHz - 30,995,598,646 instructions # 2.93 insn per cycle - 3.416791050 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5217) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.399637 sec + 10,373,383,384 cycles # 3.048 GHz + 30,679,256,260 instructions # 2.96 insn per cycle + 3.411245943 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.671174e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.843577e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.843577e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.906673e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.008657e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.008657e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.715587 sec - 4,991,139,375 cycles # 2.903 GHz - 11,305,706,976 instructions # 2.27 insn per cycle - 1.719836361 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4552) (512y: 0) (512z: 0) +TOTAL : 1.675867 sec + 4,881,672,927 cycles # 2.905 GHz + 11,022,284,259 instructions # 2.26 insn per cycle + 1.690244837 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -150,8 +150,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -160,15 +160,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.100643e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.122719e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.122719e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.106994e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.129605e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.129605e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.509863 sec - 4,400,565,724 cycles # 2.908 GHz - 10,484,557,861 instructions # 2.38 insn per cycle - 1.513887056 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4296) (512y: 91) (512z: 0) +TOTAL : 1.502203 sec + 4,365,694,763 cycles # 2.898 GHz + 10,299,343,112 instructions # 2.36 insn per cycle + 1.520133716 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -176,8 +176,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -186,15 +186,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.479826e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.587663e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.587663e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.816900e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.928266e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.928266e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.214647 sec - 4,100,640,054 cycles # 1.849 GHz - 5,907,026,834 instructions # 1.44 insn per cycle - 2.218934371 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1551) (512y: 95) (512z: 3573) +TOTAL : 2.120206 sec + 4,099,203,773 cycles # 1.930 GHz + 5,846,578,086 instructions # 1.43 insn per cycle + 2.134050769 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 84eb682463..4842553fb4 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_19:04:11 +DATE: 2023-10-28_12:55:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.737533e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.009392e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.009392e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.651766e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.773341e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.773341e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.482468 sec - 2,092,072,357 cycles # 3.017 GHz - 3,191,388,192 instructions # 1.53 insn per cycle - 0.750549951 seconds time elapsed +TOTAL : 0.486524 sec + 2,128,896,563 cycles # 3.007 GHz + 3,188,482,728 instructions # 1.50 insn per cycle + 0.764931683 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.824576e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.948428e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.948428e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.775476e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.664612e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.664612e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.804158 sec - 3,165,815,286 cycles # 3.022 GHz - 5,087,211,394 instructions # 1.61 insn per cycle - 1.108277579 seconds time elapsed +TOTAL : 0.810802 sec + 3,201,081,444 cycles # 3.028 GHz + 5,073,955,185 instructions # 1.59 insn per cycle + 1.119893797 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.552798e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.565584e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.565584e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.560332e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.572938e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.572938e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.447784 sec - 19,812,659,516 cycles # 3.072 GHz - 58,973,017,270 instructions # 2.98 insn per cycle - 6.451999180 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1189) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.428951 sec + 19,727,712,174 cycles # 3.067 GHz + 59,617,621,937 instructions # 3.02 insn per cycle + 6.433180767 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -122,15 +122,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.843686e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.887129e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.887129e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.907398e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.953143e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.953143e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.410617 sec - 10,606,495,630 cycles # 3.107 GHz - 31,045,364,778 instructions # 2.93 insn per cycle - 3.414818481 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5217) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.367858 sec + 10,401,729,083 cycles # 3.085 GHz + 30,728,311,698 instructions # 2.95 insn per cycle + 3.372306535 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -149,15 +149,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.635348e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.810089e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.810089e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.870573e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.005545e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005545e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.728990 sec - 5,025,463,399 cycles # 2.900 GHz - 11,356,936,586 instructions # 2.26 insn per cycle - 1.733253508 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4552) (512y: 0) (512z: 0) +TOTAL : 1.688103 sec + 4,916,085,294 cycles # 2.906 GHz + 11,070,223,626 instructions # 2.25 insn per cycle + 1.692497649 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -165,8 +165,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -176,15 +176,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.091196e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.113656e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.113656e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.076865e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.099535e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.099535e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.530073 sec - 4,437,395,928 cycles # 2.894 GHz - 10,533,774,197 instructions # 2.37 insn per cycle - 1.534310467 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4296) (512y: 91) (512z: 0) +TOTAL : 1.550953 sec + 4,408,008,369 cycles # 2.835 GHz + 10,347,644,406 instructions # 2.35 insn per cycle + 1.555438151 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -192,8 +192,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -203,15 +203,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.778593e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.893791e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.893791e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.815543e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.928414e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.928414e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.137109 sec - 4,135,271,327 cycles # 1.933 GHz - 5,946,987,935 instructions # 1.44 insn per cycle - 2.141484524 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1551) (512y: 95) (512z: 3573) +TOTAL : 2.127425 sec + 4,135,455,550 cycles # 1.941 GHz + 5,885,269,641 instructions # 1.42 insn per cycle + 2.131933938 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 5da42e2dfc..cf30fbcb6b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:37:38 +DATE: 2023-10-28_12:12:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.980838e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.046634e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059100e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.459634e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.041339e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.057620e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.459445 sec - 2,001,624,062 cycles # 2.971 GHz - 2,897,227,747 instructions # 1.45 insn per cycle - 0.730848040 seconds time elapsed +TOTAL : 0.461274 sec + 2,035,058,176 cycles # 2.994 GHz + 2,910,655,702 instructions # 1.43 insn per cycle + 0.748318644 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.119062e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.315389e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326570e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.036992e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.311164e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.327920e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.584635 sec - 2,502,191,176 cycles # 3.029 GHz - 3,799,894,385 instructions # 1.52 insn per cycle - 0.885312342 seconds time elapsed +TOTAL : 0.636889 sec + 2,559,684,656 cycles # 2.916 GHz + 3,844,382,418 instructions # 1.50 insn per cycle + 0.939027525 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.578276e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.590963e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.590963e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.623434e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.636310e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.636310e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.377287 sec - 19,776,032,592 cycles # 3.100 GHz - 59,242,647,666 instructions # 3.00 insn per cycle - 6.381344291 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1315) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.267582 sec + 19,473,438,826 cycles # 3.105 GHz + 58,801,451,427 instructions # 3.02 insn per cycle + 6.273946156 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.838897e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.882635e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.882635e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.962469e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.008667e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.008667e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.407218 sec - 10,428,150,513 cycles # 3.058 GHz - 30,703,821,983 instructions # 2.94 insn per cycle - 3.411368559 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5043) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.324999 sec + 10,252,488,773 cycles # 3.081 GHz + 30,350,737,589 instructions # 2.96 insn per cycle + 3.337113139 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.472201e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.635749e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.635749e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.439128e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.606330e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.606330e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.750914 sec - 5,109,907,048 cycles # 2.913 GHz - 11,785,108,632 instructions # 2.31 insn per cycle - 1.754997634 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4668) (512y: 0) (512z: 0) +TOTAL : 1.757963 sec + 5,047,493,240 cycles # 2.866 GHz + 11,487,255,325 instructions # 2.28 insn per cycle + 1.853025437 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -150,8 +150,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -160,15 +160,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.023891e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.043074e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.043074e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.719624e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.915479e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.915479e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.621556 sec - 4,691,054,117 cycles # 2.887 GHz - 11,032,599,545 instructions # 2.35 insn per cycle - 1.625732931 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4331) (512y: 245) (512z: 0) +TOTAL : 1.709373 sec + 4,660,423,247 cycles # 2.721 GHz + 10,845,585,186 instructions # 2.33 insn per cycle + 1.724795893 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -176,8 +176,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -186,15 +186,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.596531e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.705229e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.705229e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.690449e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.805921e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.805921e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.180528 sec - 4,124,129,963 cycles # 1.890 GHz - 6,174,744,538 instructions # 1.50 insn per cycle - 2.184771281 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1461) (512y: 139) (512z: 3675) +TOTAL : 2.154846 sec + 4,120,296,518 cycles # 1.909 GHz + 6,110,489,071 instructions # 1.48 insn per cycle + 2.165568305 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 758058b159..0e3d8cfa3e 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:38:06 +DATE: 2023-10-28_12:13:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.611847e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.385260e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.471189e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.345370e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.273428e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.386360e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.437775 sec - 1,949,752,289 cycles # 2.999 GHz - 2,775,143,872 instructions # 1.42 insn per cycle - 0.707448913 seconds time elapsed +TOTAL : 0.445126 sec + 1,976,167,592 cycles # 2.999 GHz + 2,793,525,701 instructions # 1.41 insn per cycle + 0.728815394 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.418789e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.455087e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.521596e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.016231e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.359096e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.456445e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.482861 sec - 2,123,251,884 cycles # 3.013 GHz - 3,090,198,407 instructions # 1.46 insn per cycle - 0.761733855 seconds time elapsed +TOTAL : 0.503583 sec + 2,178,928,712 cycles # 2.984 GHz + 3,140,973,434 instructions # 1.44 insn per cycle + 0.787862044 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,24 +82,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.632015e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.645099e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.645099e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.651079e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.664793e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.664793e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.245504 sec - 19,420,521,245 cycles # 3.108 GHz - 59,463,843,270 instructions # 3.06 insn per cycle - 6.249442801 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 961) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.201173 sec + 19,089,624,643 cycles # 3.077 GHz + 58,966,843,094 instructions # 3.09 insn per cycle + 6.209145363 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949692978414 -Relative difference = 2.1728426918172542e-08 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129858051842916 +Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -108,24 +108,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.406220e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.547669e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.547669e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.775960e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.926769e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.926769e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.968760 sec - 5,998,257,000 cycles # 3.042 GHz - 16,914,468,455 instructions # 2.82 insn per cycle - 1.972914932 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5858) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.887122 sec + 5,852,693,907 cycles # 3.095 GHz + 16,697,748,914 instructions # 2.85 insn per cycle + 1.897838524 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5766) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 +Avg ME (C++/C++) = 1.412987e+00 +Avg ME (F77/C++) = 1.4129865669244737 +Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -134,24 +134,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.859553e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.925073e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.925073e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.900948 sec - 2,632,220,925 cycles # 2.911 GHz - 6,140,096,248 instructions # 2.33 insn per cycle - 0.904996982 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5019) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.888328e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.956615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.956615e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.888101 sec + 2,589,524,409 cycles # 2.903 GHz + 5,983,645,878 instructions # 2.31 insn per cycle + 0.900109709 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -160,24 +160,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.072824e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.155063e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.155063e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.809948 sec - 2,370,894,209 cycles # 2.915 GHz - 5,701,521,318 instructions # 2.40 insn per cycle - 0.814071799 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4804) (512y: 36) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.084640e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.169189e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.169189e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.806247 sec + 2,349,102,230 cycles # 2.899 GHz + 5,606,066,476 instructions # 2.39 insn per cycle + 0.818170477 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -186,15 +186,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.607455e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.657302e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.657302e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.602986e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.653593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.653593e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.040929 sec - 2,057,132,306 cycles # 1.970 GHz - 3,365,579,683 instructions # 1.64 insn per cycle - 1.044863677 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2141) (512y: 39) (512z: 3775) +TOTAL : 1.044676 sec + 2,054,220,665 cycles # 1.959 GHz + 3,336,552,151 instructions # 1.62 insn per cycle + 1.055605597 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2131) (512y: 39) (512z: 3668) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 48beeeb5ad..00167e9c33 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_19:04:39 +DATE: 2023-10-28_12:55:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.864071e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.240535e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.240535e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.962239e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.083211e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.083211e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.462582 sec - 1,884,746,111 cycles # 2.811 GHz - 2,786,242,007 instructions # 1.48 insn per cycle - 0.730246160 seconds time elapsed +TOTAL : 0.454514 sec + 1,995,648,721 cycles # 3.004 GHz + 2,959,368,603 instructions # 1.48 insn per cycle + 0.721515462 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.695482e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.755030e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.755030e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.823828e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.651858e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.651858e+07 ) sec^-1 MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.635503 sec - 2,447,308,351 cycles # 2.825 GHz - 3,823,535,894 instructions # 1.56 insn per cycle - 0.923575231 seconds time elapsed +TOTAL : 0.625007 sec + 2,587,638,379 cycles # 3.027 GHz + 3,960,735,556 instructions # 1.53 insn per cycle + 0.911552982 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,24 +95,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.567048e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.579966e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.579966e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.653085e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.666871e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.666871e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.406446 sec - 19,444,692,901 cycles # 3.034 GHz - 59,468,886,107 instructions # 3.06 insn per cycle - 6.410558637 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 961) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.205408 sec + 19,114,978,719 cycles # 3.081 GHz + 58,970,550,642 instructions # 3.09 insn per cycle + 6.209466695 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949692978414 -Relative difference = 2.1728426918172542e-08 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129858051842916 +Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -122,24 +122,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.536135e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.681573e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.681573e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.537033e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.687202e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.687202e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.943530 sec - 6,018,572,710 cycles # 3.091 GHz - 16,962,561,293 instructions # 2.82 insn per cycle - 1.947552922 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5858) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.943360 sec + 5,877,835,029 cycles # 3.024 GHz + 16,747,344,219 instructions # 2.85 insn per cycle + 1.947590837 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5766) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 +Avg ME (C++/C++) = 1.412987e+00 +Avg ME (F77/C++) = 1.4129865669244737 +Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -149,24 +149,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.852963e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.918436e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.918436e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.908122 sec - 2,646,262,038 cycles # 2.903 GHz - 6,176,972,836 instructions # 2.33 insn per cycle - 0.912119450 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5019) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.886015e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.954104e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.954104e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.892848 sec + 2,604,659,563 cycles # 2.906 GHz + 6,019,606,678 instructions # 2.31 insn per cycle + 0.896969494 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -176,24 +176,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.053883e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.135495e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.135495e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.821672 sec - 2,391,067,663 cycles # 2.897 GHz - 5,738,392,055 instructions # 2.40 insn per cycle - 0.825851117 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4804) (512y: 36) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.092631e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.175440e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.175440e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.807239 sec + 2,364,999,004 cycles # 2.917 GHz + 5,642,015,436 instructions # 2.39 insn per cycle + 0.811339455 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -203,15 +203,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.562282e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.609907e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.609907e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.607496e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.656928e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.656928e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.075433 sec - 2,080,452,431 cycles # 1.928 GHz - 3,407,597,282 instructions # 1.64 insn per cycle - 1.079584991 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2141) (512y: 39) (512z: 3775) +TOTAL : 1.045238 sec + 2,072,357,024 cycles # 1.976 GHz + 3,377,232,840 instructions # 1.63 insn per cycle + 1.049434643 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2131) (512y: 39) (512z: 3668) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 35d51d9f5b..39040526ae 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:38:30 +DATE: 2023-10-28_12:13:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.558695e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.304995e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.390429e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.323194e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.236667e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.354926e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.441583 sec - 1,913,307,169 cycles # 2.938 GHz - 2,730,410,416 instructions # 1.43 insn per cycle - 0.710289745 seconds time elapsed +TOTAL : 0.443972 sec + 1,989,313,875 cycles # 2.982 GHz + 2,769,265,611 instructions # 1.39 insn per cycle + 0.743438086 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.431804e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.480539e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.548305e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.045346e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.393796e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.492274e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.484144 sec - 2,122,361,683 cycles # 3.000 GHz - 3,092,428,798 instructions # 1.46 insn per cycle - 0.764929414 seconds time elapsed +TOTAL : 0.502060 sec + 2,167,795,646 cycles # 2.981 GHz + 3,129,751,096 instructions # 1.44 insn per cycle + 0.786114053 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,24 +82,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.622945e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.636379e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.636379e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.674630e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.688687e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.688687e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.266974 sec - 19,387,707,588 cycles # 3.092 GHz - 59,211,783,711 instructions # 3.05 insn per cycle - 6.270947254 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.146503 sec + 19,066,983,066 cycles # 3.100 GHz + 58,708,098,711 instructions # 3.08 insn per cycle + 6.152927808 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949692978414 -Relative difference = 2.1728426918172542e-08 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129858051842916 +Relative difference = 1.3787518662898538e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -108,24 +108,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.919855e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.077833e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.077833e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.978466e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.142783e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.142783e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.856181 sec - 5,736,685,730 cycles # 3.085 GHz - 16,708,949,188 instructions # 2.91 insn per cycle - 1.860305013 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5624) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.844883 sec + 5,595,751,190 cycles # 3.027 GHz + 16,514,228,946 instructions # 2.95 insn per cycle + 1.855734095 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5552) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 +Avg ME (C++/C++) = 1.412987e+00 +Avg ME (F77/C++) = 1.4129865669244737 +Relative difference = 3.06496469061158e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -134,24 +134,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.619777e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.669337e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.669337e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.031321 sec - 3,001,059,822 cycles # 2.901 GHz - 6,807,446,499 instructions # 2.27 insn per cycle - 1.035316846 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5670) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.638498e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.689068e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.689068e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 1.019971 sec + 2,975,890,134 cycles # 2.906 GHz + 6,637,794,121 instructions # 2.23 insn per cycle + 1.028369498 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5568) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -160,24 +160,24 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.758092e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.816843e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.816843e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.951721 sec - 2,767,509,162 cycles # 2.897 GHz - 6,354,591,455 instructions # 2.30 insn per cycle - 0.955744845 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5429) (512y: 22) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.777694e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.837035e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.837035e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 +TOTAL : 0.941610 sec + 2,755,233,571 cycles # 2.914 GHz + 6,258,993,798 instructions # 2.27 insn per cycle + 0.951640144 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5279) (512y: 25) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133161655815059 +Relative difference = 1.1715816267550621e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -186,15 +186,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.458284e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.499118e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.499118e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.459473e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.500290e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.500290e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.146088 sec - 2,235,083,857 cycles # 1.946 GHz - 3,731,059,413 instructions # 1.67 insn per cycle - 1.150018435 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2381) (512y: 29) (512z: 4070) +TOTAL : 1.145362 sec + 2,230,246,069 cycles # 1.940 GHz + 3,701,275,572 instructions # 1.66 insn per cycle + 1.156145462 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2378) (512y: 29) (512z: 3963) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 392905595e..9ab422dbe3 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:38:54 +DATE: 2023-10-28_12:14:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.991876e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.048685e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.061466e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.410216e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.039188e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.055496e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.460934 sec - 1,966,721,120 cycles # 2.924 GHz - 2,827,577,653 instructions # 1.44 insn per cycle - 0.730736586 seconds time elapsed +TOTAL : 0.463271 sec + 2,069,250,958 cycles # 3.003 GHz + 2,927,480,970 instructions # 1.41 insn per cycle + 0.772082200 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.122528e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.320226e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.331429e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.035124e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.310358e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.327324e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.588908 sec - 2,472,359,151 cycles # 3.012 GHz - 3,773,127,523 instructions # 1.53 insn per cycle - 0.879878807 seconds time elapsed +TOTAL : 0.614913 sec + 2,526,856,265 cycles # 2.922 GHz + 3,709,441,692 instructions # 1.47 insn per cycle + 0.922727767 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.519449e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.531484e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.531484e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.556257e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.568688e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.568688e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.525526 sec - 20,092,492,706 cycles # 3.078 GHz - 60,052,973,297 instructions # 2.99 insn per cycle - 6.529664742 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1224) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.433051 sec + 20,008,557,894 cycles # 3.109 GHz + 60,540,006,479 instructions # 3.03 insn per cycle + 6.439212233 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.869773e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.913961e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.913961e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.019070e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.065045e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.065045e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.385815 sec - 10,415,517,369 cycles # 3.073 GHz - 30,737,885,914 instructions # 2.95 insn per cycle - 3.390029957 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.285972 sec + 10,187,016,958 cycles # 3.097 GHz + 30,390,859,032 instructions # 2.98 insn per cycle + 3.301352868 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5280) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.784627e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.958931e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.958931e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.989285e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.017232e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.017232e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.696342 sec - 4,938,080,705 cycles # 2.905 GHz - 11,263,764,405 instructions # 2.28 insn per cycle - 1.700575900 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4683) (512y: 0) (512z: 0) +TOTAL : 1.661873 sec + 4,866,855,961 cycles # 2.922 GHz + 10,982,611,406 instructions # 2.26 insn per cycle + 1.676714535 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4623) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -160,15 +160,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.113465e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.136337e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.136337e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.106943e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.129653e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.129653e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.492907 sec - 4,341,247,814 cycles # 2.902 GHz - 10,434,510,449 instructions # 2.40 insn per cycle - 1.497014311 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4418) (512y: 83) (512z: 0) +TOTAL : 1.502413 sec + 4,275,650,140 cycles # 2.838 GHz + 10,251,987,528 instructions # 2.40 insn per cycle + 1.514545077 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4279) (512y: 82) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.532516e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.636201e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.636201e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.717947e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.824687e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.824687e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.198970 sec - 4,210,314,244 cycles # 1.912 GHz - 6,111,580,609 instructions # 1.45 insn per cycle - 2.203311339 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2065) (512y: 117) (512z: 3649) +TOTAL : 2.146639 sec + 4,201,565,757 cycles # 1.954 GHz + 6,049,789,151 instructions # 1.44 insn per cycle + 2.158797101 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2065) (512y: 117) (512z: 3540) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 39bb25c947..76b7b412f7 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-25_18:39:22 +DATE: 2023-10-28_12:14:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.944009e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.041807e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.053824e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.390550e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033270e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.048966e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.457165 sec - 2,013,112,285 cycles # 3.009 GHz - 2,933,107,931 instructions # 1.46 insn per cycle - 0.726273814 seconds time elapsed +TOTAL : 0.460476 sec + 2,081,307,934 cycles # 3.031 GHz + 2,955,584,789 instructions # 1.42 insn per cycle + 0.756578458 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.111421e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.304827e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.316338e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.034797e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.302411e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.318796e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.584817 sec - 2,503,239,468 cycles # 3.024 GHz - 3,758,528,305 instructions # 1.50 insn per cycle - 0.886403311 seconds time elapsed +TOTAL : 0.602596 sec + 2,555,613,252 cycles # 3.028 GHz + 3,854,786,899 instructions # 1.51 insn per cycle + 0.903400095 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.502057e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.514010e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.514010e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.567199e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.579456e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.579456e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.570415 sec - 20,096,701,303 cycles # 3.057 GHz - 60,261,778,784 instructions # 3.00 insn per cycle - 6.574454844 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1271) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.404936 sec + 19,891,783,124 cycles # 3.104 GHz + 59,941,724,942 instructions # 3.01 insn per cycle + 6.411494032 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.950569e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.996463e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.996463e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.101693e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.150119e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.150119e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.330994 sec - 10,302,022,615 cycles # 3.090 GHz - 30,444,386,178 instructions # 2.96 insn per cycle - 3.335148743 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5149) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.233107 sec + 10,067,635,537 cycles # 3.110 GHz + 30,103,976,009 instructions # 2.99 insn per cycle + 3.244612142 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5082) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.413979e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.578528e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.578528e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.657022e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.831623e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.831623e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.762053 sec - 5,116,204,786 cycles # 2.900 GHz - 11,780,626,112 instructions # 2.30 insn per cycle - 1.766305951 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4795) (512y: 0) (512z: 0) +TOTAL : 1.718382 sec + 5,018,528,066 cycles # 2.914 GHz + 11,487,169,268 instructions # 2.29 insn per cycle + 1.731257916 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4722) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -160,15 +160,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.034226e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.053843e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.053843e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.059601e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.080189e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.080189e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.605857 sec - 4,642,992,473 cycles # 2.885 GHz - 10,992,793,436 instructions # 2.37 insn per cycle - 1.609875653 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4423) (512y: 238) (512z: 0) +TOTAL : 1.567855 sec + 4,593,189,329 cycles # 2.922 GHz + 10,814,244,535 instructions # 2.35 insn per cycle + 1.579062821 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4284) (512y: 234) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.613528e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.720366e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.720366e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.560814e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.667240e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.667240e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.175272 sec - 4,221,455,153 cycles # 1.938 GHz - 6,349,351,796 instructions # 1.50 insn per cycle - 2.179367593 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1959) (512y: 163) (512z: 3727) +TOTAL : 2.191040 sec + 4,225,946,509 cycles # 1.926 GHz + 6,280,173,074 instructions # 1.49 insn per cycle + 2.204926125 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1960) (512y: 163) (512z: 3617) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index bff9233075..b03fb05630 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:39:51 +DATE: 2023-10-28_12:14:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.468061e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.492682e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.494686e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.463578e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.497575e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.499993e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.518589 sec - 2,245,805,905 cycles # 3.007 GHz - 3,544,979,174 instructions # 1.58 insn per cycle - 0.807315238 seconds time elapsed +TOTAL : 0.524913 sec + 2,275,162,229 cycles # 2.955 GHz + 3,553,819,604 instructions # 1.56 insn per cycle + 0.840621862 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.126268e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.153472e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.154624e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.112401e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.153434e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.155135e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.016484 sec - 10,017,854,023 cycles # 3.069 GHz - 22,587,762,207 instructions # 2.25 insn per cycle - 3.322845777 seconds time elapsed +TOTAL : 3.040153 sec + 10,119,184,300 cycles # 3.067 GHz + 22,331,058,292 instructions # 2.21 insn per cycle + 3.356275754 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.955514e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.956404e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.956404e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.969742e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.970726e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.970726e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.396540 sec - 25,870,300,414 cycles # 3.080 GHz - 78,705,757,349 instructions # 3.04 insn per cycle - 8.400556749 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.336811 sec + 25,629,457,447 cycles # 3.073 GHz + 78,942,027,813 instructions # 3.08 insn per cycle + 8.343510031 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.628098e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.631366e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.631366e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.772848e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.776337e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.776337e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.529617 sec - 13,157,831,550 cycles # 2.903 GHz - 39,316,654,466 instructions # 2.99 insn per cycle - 4.533882139 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.356161 sec + 12,939,207,595 cycles # 2.968 GHz + 39,285,045,357 instructions # 3.04 insn per cycle + 4.369724563 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.489125e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.506110e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.506110e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.598110e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.615220e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.615220e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.941066 sec - 5,640,899,050 cycles # 2.901 GHz - 13,915,027,017 instructions # 2.47 insn per cycle - 1.945275776 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) +TOTAL : 1.917837 sec + 5,575,445,996 cycles # 2.903 GHz + 13,689,935,380 instructions # 2.46 insn per cycle + 1.930849564 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -160,15 +160,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.632867e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.655023e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.655023e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.808785e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.831336e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.831336e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.711467 sec - 4,969,822,591 cycles # 2.898 GHz - 12,556,829,300 instructions # 2.53 insn per cycle - 1.715640499 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) +TOTAL : 1.681743 sec + 4,897,861,318 cycles # 2.907 GHz + 12,346,089,227 instructions # 2.52 insn per cycle + 1.694439322 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.688750e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.702937e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.702937e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.719699e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.733791e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.733791e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.142319 sec - 4,116,162,262 cycles # 1.918 GHz - 6,441,474,951 instructions # 1.56 insn per cycle - 2.146523645 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) +TOTAL : 2.135110 sec + 4,120,647,411 cycles # 1.929 GHz + 6,338,696,859 instructions # 1.54 insn per cycle + 2.148760737 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 19b0ccbfe1..c9f4681184 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:05:36 +DATE: 2023-10-28_12:56:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.145401e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.455796e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.455796e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.159242e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.488413e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.488413e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.511465 sec - 2,211,538,389 cycles # 2.999 GHz - 3,499,461,341 instructions # 1.58 insn per cycle - 0.799495725 seconds time elapsed +TOTAL : 0.510805 sec + 2,221,159,082 cycles # 3.005 GHz + 3,427,089,559 instructions # 1.54 insn per cycle + 0.799153737 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.639414e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.104964e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.104964e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.639589e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.106238e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.106238e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.289425 sec - 10,927,544,611 cycles # 3.076 GHz - 23,831,419,819 instructions # 2.18 insn per cycle - 3.609486496 seconds time elapsed +TOTAL : 3.292866 sec + 10,669,679,246 cycles # 3.004 GHz + 21,499,927,390 instructions # 2.02 insn per cycle + 3.618753393 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.955661e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.956599e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.956599e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.982344e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.983366e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.983366e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.399286 sec - 25,889,610,376 cycles # 3.081 GHz - 78,711,674,763 instructions # 3.04 insn per cycle - 8.403464378 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.287757 sec + 25,648,558,798 cycles # 3.094 GHz + 78,947,940,131 instructions # 3.08 insn per cycle + 8.292307316 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -122,15 +122,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.684905e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.688434e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.688434e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.761661e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765162e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765162e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.463396 sec - 13,180,558,134 cycles # 2.951 GHz - 39,329,251,791 instructions # 2.98 insn per cycle - 4.467689901 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.372920 sec + 12,954,588,590 cycles # 2.961 GHz + 39,299,714,112 instructions # 3.03 insn per cycle + 4.377339776 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -149,15 +149,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.312850e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.329128e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.329128e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.433253e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.451841e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.451841e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.985951 sec - 5,671,057,559 cycles # 2.852 GHz - 13,925,731,418 instructions # 2.46 insn per cycle - 1.990267942 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) +TOTAL : 1.958034 sec + 5,599,887,733 cycles # 2.856 GHz + 13,700,928,063 instructions # 2.45 insn per cycle + 1.962488309 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -176,15 +176,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.189591e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.210527e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.210527e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.858350e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.882867e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.882867e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.797800 sec - 4,986,486,293 cycles # 2.768 GHz - 12,566,997,052 instructions # 2.52 insn per cycle - 1.802092456 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) +TOTAL : 1.676908 sec + 4,910,211,168 cycles # 2.922 GHz + 12,355,246,497 instructions # 2.52 insn per cycle + 1.681230393 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -203,15 +203,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.650923e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.665003e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.665003e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.731935e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.747245e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.747245e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.156621 sec - 4,130,305,981 cycles # 1.912 GHz - 6,453,079,741 instructions # 1.56 insn per cycle - 2.160974147 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) +TOTAL : 2.134622 sec + 4,135,541,831 cycles # 1.934 GHz + 6,347,831,005 instructions # 1.53 insn per cycle + 2.139010667 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 81203fa77a..d2163869aa 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:16:31 +DATE: 2023-10-28_13:07:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.481338e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.505105e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.507123e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.482945e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.509030e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.511080e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.501686 sec - 2,214,909,881 cycles # 3.031 GHz - 3,458,747,276 instructions # 1.56 insn per cycle - 0.800330270 seconds time elapsed +TOTAL : 0.503818 sec + 2,242,046,263 cycles # 3.022 GHz + 3,462,245,216 instructions # 1.54 insn per cycle + 0.810702032 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.151978e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.180537e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.181762e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.144510e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.176604e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.177952e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.117189 sec - 10,377,272,213 cycles # 3.074 GHz - 22,017,651,367 instructions # 2.12 insn per cycle - 3.433688033 seconds time elapsed +TOTAL : 3.117292 sec + 10,408,915,486 cycles # 3.090 GHz + 23,283,624,708 instructions # 2.24 insn per cycle + 3.424451185 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.951603e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.952527e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.952527e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.975977e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.976973e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.976973e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.414289 sec - 25,891,693,281 cycles # 3.076 GHz - 78,705,382,161 instructions # 3.04 insn per cycle - 8.418214136 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.311280 sec + 25,652,648,741 cycles # 3.086 GHz + 78,942,035,181 instructions # 3.08 insn per cycle + 8.315453587 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.708393e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.711838e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.711838e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.766259e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.769789e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.769789e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.432833 sec - 13,155,908,783 cycles # 2.966 GHz - 39,315,348,391 instructions # 2.99 insn per cycle - 4.436757068 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.364797 sec + 12,929,400,123 cycles # 2.960 GHz + 39,283,753,035 instructions # 3.04 insn per cycle + 4.368806594 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.511504e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.528904e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.528904e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.603374e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.622598e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.622598e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.937344 sec - 5,646,349,471 cycles # 2.910 GHz - 13,913,307,123 instructions # 2.46 insn per cycle - 1.941259569 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) +TOTAL : 1.917346 sec + 5,583,005,212 cycles # 2.907 GHz + 13,688,490,227 instructions # 2.45 insn per cycle + 1.921338026 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -160,15 +160,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.572932e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.594635e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.594635e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.863476e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.887220e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.887220e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.724509 sec - 4,978,068,357 cycles # 2.882 GHz - 12,554,500,287 instructions # 2.52 insn per cycle - 1.728498714 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) +TOTAL : 1.673382 sec + 4,898,137,074 cycles # 2.921 GHz + 12,342,311,827 instructions # 2.52 insn per cycle + 1.677443337 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.677554e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.691380e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.691380e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.760383e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.775362e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.775362e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.147049 sec - 4,126,422,075 cycles # 1.919 GHz - 6,439,114,110 instructions # 1.56 insn per cycle - 2.151134180 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) +TOTAL : 2.124477 sec + 4,119,654,668 cycles # 1.936 GHz + 6,334,865,926 instructions # 1.54 insn per cycle + 2.128551172 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index 983ed35921..ee4bbf6a10 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:13:27 +DATE: 2023-10-28_13:04:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.486758e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.510432e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.512397e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.458933e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.484257e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.486595e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.503841 sec - 2,219,754,850 cycles # 2.994 GHz - 3,493,120,915 instructions # 1.57 insn per cycle - 0.814222419 seconds time elapsed +TOTAL : 0.505513 sec + 2,142,738,680 cycles # 2.872 GHz + 3,291,927,054 instructions # 1.54 insn per cycle + 0.814547560 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.146385e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.174926e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.176115e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.135835e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167914e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169281e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.060532 sec - 10,143,930,144 cycles # 3.066 GHz - 23,186,884,860 instructions # 2.29 insn per cycle - 3.364498351 seconds time elapsed +TOTAL : 3.071645 sec + 9,484,592,815 cycles # 2.854 GHz + 19,957,614,748 instructions # 2.10 insn per cycle + 3.382302802 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.960187e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.961109e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.961109e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.977445e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.978379e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.978379e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.392474 sec - 25,877,587,986 cycles # 3.088 GHz - 78,705,423,071 instructions # 3.04 insn per cycle - 8.396357877 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.302845 sec + 25,643,681,639 cycles # 3.088 GHz + 78,943,468,215 instructions # 3.08 insn per cycle + 8.306861638 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.691020e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.694421e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.694421e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.759581e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.763145e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.763145e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.452252 sec - 13,153,001,214 cycles # 2.952 GHz - 39,316,173,049 instructions # 2.99 insn per cycle - 4.456201629 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.371292 sec + 12,933,873,362 cycles # 2.957 GHz + 39,285,530,682 instructions # 3.04 insn per cycle + 4.375381944 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.443294e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.459986e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.459986e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.493227e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.509835e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.509835e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.951557 sec - 5,638,519,517 cycles # 2.884 GHz - 13,914,420,326 instructions # 2.47 insn per cycle - 1.955513391 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) +TOTAL : 1.940350 sec + 5,576,631,784 cycles # 2.870 GHz + 13,689,769,110 instructions # 2.45 insn per cycle + 1.944523071 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -160,15 +160,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.662759e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.685664e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.685664e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.853148e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.876403e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.876403e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.706815 sec - 4,966,762,812 cycles # 2.905 GHz - 12,556,639,833 instructions # 2.53 insn per cycle - 1.710823467 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) +TOTAL : 1.673469 sec + 4,897,482,597 cycles # 2.921 GHz + 12,344,356,559 instructions # 2.52 insn per cycle + 1.677557270 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.630990e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.645195e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.645195e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.734174e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.748650e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.748650e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.158956 sec - 4,117,585,001 cycles # 1.904 GHz - 6,441,334,233 instructions # 1.56 insn per cycle - 2.163053685 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) +TOTAL : 2.130291 sec + 4,113,955,714 cycles # 1.929 GHz + 6,336,883,103 instructions # 1.54 insn per cycle + 2.134326924 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 089d292aa8..b75462246a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:10:27 +DATE: 2023-10-28_13:01:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,14 +45,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.218258e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.503307e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.505360e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.203179e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.505047e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.507166e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.506835 sec - 2,221,452,108 cycles # 3.022 GHz - 3,523,570,836 instructions # 1.59 insn per cycle - 0.796752404 seconds time elapsed +TOTAL : 0.507425 sec + 2,256,484,054 cycles # 3.018 GHz + 3,532,292,777 instructions # 1.57 insn per cycle + 0.807552942 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -63,14 +63,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.728232e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.175248e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.176437e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.752259e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.179632e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.180998e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.202196 sec - 10,370,823,383 cycles # 3.013 GHz - 22,699,363,327 instructions # 2.19 insn per cycle - 3.506471268 seconds time elapsed +TOTAL : 3.188259 sec + 10,521,010,437 cycles # 3.060 GHz + 22,991,661,537 instructions # 2.19 insn per cycle + 3.496740114 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -85,15 +85,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.964142e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.965067e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.965067e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.974734e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.975712e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.975712e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.359175 sec - 25,872,166,576 cycles # 3.094 GHz - 78,706,432,099 instructions # 3.04 insn per cycle - 8.363176184 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4800) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.314402 sec + 25,652,011,614 cycles # 3.084 GHz + 78,942,019,789 instructions # 3.08 insn per cycle + 8.318449049 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -111,15 +111,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.675901e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.679176e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.679176e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.762034e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765542e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765542e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.470613 sec - 13,168,571,852 cycles # 2.943 GHz - 39,316,143,486 instructions # 2.99 insn per cycle - 4.474685106 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13159) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.368601 sec + 12,923,718,733 cycles # 2.956 GHz + 39,284,266,866 instructions # 3.04 insn per cycle + 4.372599268 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -137,15 +137,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.475559e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.492856e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.492856e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.585138e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.602865e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.602865e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.944338 sec - 5,656,434,910 cycles # 2.905 GHz - 13,914,488,872 instructions # 2.46 insn per cycle - 1.948357306 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11359) (512y: 0) (512z: 0) +TOTAL : 1.919799 sec + 5,575,045,532 cycles # 2.899 GHz + 13,689,091,754 instructions # 2.46 insn per cycle + 1.923833244 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -163,15 +163,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.315923e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.336840e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.336840e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.709286e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.732700e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.732700e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.769971 sec - 4,966,635,750 cycles # 2.801 GHz - 12,556,400,439 instructions # 2.53 insn per cycle - 1.774243834 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10999) (512y: 88) (512z: 0) +TOTAL : 1.698739 sec + 4,895,541,757 cycles # 2.876 GHz + 12,344,329,839 instructions # 2.52 insn per cycle + 1.702807761 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -189,15 +189,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.505641e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.519600e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.519600e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.521763e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.535260e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.535260e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.194843 sec - 4,122,096,598 cycles # 1.876 GHz - 6,442,654,429 instructions # 1.56 insn per cycle - 2.198924835 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1816) (512y: 102) (512z:10110) +TOTAL : 2.201467 sec + 4,123,996,359 cycles # 1.880 GHz + 6,337,725,442 instructions # 1.54 insn per cycle + 2.205598437 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index db28556fed..285526afab 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:40:27 +DATE: 2023-10-28_12:15:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.480350e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.503709e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.505689e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.468074e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.501103e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.503439e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.515106 sec - 2,241,115,881 cycles # 3.010 GHz - 3,491,500,030 instructions # 1.56 insn per cycle - 0.803761340 seconds time elapsed +TOTAL : 0.522423 sec + 2,258,157,328 cycles # 2.996 GHz + 3,505,967,169 instructions # 1.55 insn per cycle + 0.828984887 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.140925e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.168327e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.169450e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.133945e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.175029e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176516e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.010736 sec - 10,017,349,944 cycles # 3.076 GHz - 21,234,719,579 instructions # 2.12 insn per cycle - 3.315687380 seconds time elapsed +TOTAL : 3.028645 sec + 10,079,761,797 cycles # 3.067 GHz + 21,246,273,087 instructions # 2.11 insn per cycle + 3.345878141 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.950176e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.951182e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.951182e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.994496e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.995483e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.995483e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.419375 sec - 25,820,844,871 cycles # 3.067 GHz - 78,455,782,361 instructions # 3.04 insn per cycle - 8.423501286 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4147) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.232531 sec + 25,587,628,331 cycles # 3.107 GHz + 78,714,235,393 instructions # 3.08 insn per cycle + 8.239130149 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.695368e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.698841e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.698841e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.767187e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.770492e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.770492e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.446590 sec - 13,096,365,032 cycles # 2.943 GHz - 39,266,931,549 instructions # 3.00 insn per cycle - 4.450776925 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12925) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.363131 sec + 12,895,914,281 cycles # 2.954 GHz + 39,231,307,091 instructions # 3.04 insn per cycle + 4.377380038 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12949) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.473385e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.490359e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.490359e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.462946e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.479149e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.479149e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.944494 sec - 5,647,694,185 cycles # 2.899 GHz - 14,031,784,985 instructions # 2.48 insn per cycle - 1.948726891 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11428) (512y: 0) (512z: 0) +TOTAL : 1.947278 sec + 5,616,857,616 cycles # 2.878 GHz + 13,804,844,198 instructions # 2.46 insn per cycle + 1.960556077 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -160,15 +160,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.439321e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.460393e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.460393e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.535336e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.556895e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.556895e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.746193 sec - 5,071,268,913 cycles # 2.898 GHz - 12,684,289,306 instructions # 2.50 insn per cycle - 1.750379728 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10992) (512y: 240) (512z: 0) +TOTAL : 1.729893 sec + 4,961,309,130 cycles # 2.862 GHz + 12,470,429,498 instructions # 2.51 insn per cycle + 1.742792817 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.529292e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.543220e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.543220e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.735372e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.749607e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.749607e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.188060 sec - 4,141,433,761 cycles # 1.890 GHz - 6,563,782,413 instructions # 1.58 insn per cycle - 2.192342750 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1640) (512y: 192) (512z:10068) +TOTAL : 2.130756 sec + 4,113,624,864 cycles # 1.928 GHz + 6,461,615,134 instructions # 1.57 insn per cycle + 2.143576859 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 4c6f36c205..3d8001724d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:55:24 +DATE: 2023-10-28_12:46:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.222490e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.244878e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.246691e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.232608e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.257163e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.259717e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.528299 sec - 2,270,979,797 cycles # 3.018 GHz - 3,579,093,863 instructions # 1.58 insn per cycle - 0.812313256 seconds time elapsed +TOTAL : 0.537548 sec + 2,181,694,080 cycles # 2.844 GHz + 3,448,903,433 instructions # 1.58 insn per cycle + 0.824462802 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.777355e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.800618e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.801578e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.772126e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.800608e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.801737e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.287863 sec - 10,849,521,679 cycles # 3.070 GHz - 24,134,668,326 instructions # 2.22 insn per cycle - 3.593012410 seconds time elapsed +TOTAL : 3.306403 sec + 10,895,937,046 cycles # 3.061 GHz + 25,195,776,290 instructions # 2.31 insn per cycle + 3.618136138 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.444628e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.445098e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.445098e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.475211e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.475697e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.475697e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 36.909027 sec - 113,587,132,048 cycles # 3.078 GHz - 144,964,358,008 instructions # 1.28 insn per cycle - 36.912946290 seconds time elapsed +TOTAL : 36.656742 sec + 113,701,435,580 cycles # 3.102 GHz + 144,964,623,435 instructions # 1.27 insn per cycle + 36.660899030 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:21605) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.256823e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.259432e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.259432e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.281464e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.284057e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.284057e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.044758 sec - 14,717,259,007 cycles # 2.916 GHz - 37,577,668,645 instructions # 2.55 insn per cycle - 5.048857745 seconds time elapsed +TOTAL : 5.007154 sec + 14,757,637,040 cycles # 2.945 GHz + 37,577,566,768 instructions # 2.55 insn per cycle + 5.011445912 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68118) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.803332e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.817865e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.817865e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.834630e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.849176e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.849176e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.110842 sec - 6,124,055,435 cycles # 2.897 GHz - 13,063,274,169 instructions # 2.13 insn per cycle - 2.114845473 seconds time elapsed +TOTAL : 2.103000 sec + 6,138,951,255 cycles # 2.915 GHz + 13,063,781,443 instructions # 2.13 insn per cycle + 2.107315848 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.453213e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.474776e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.474776e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.518402e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.540775e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.540775e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.744006 sec - 5,055,001,520 cycles # 2.893 GHz - 11,442,027,490 instructions # 2.26 insn per cycle - 1.747990275 seconds time elapsed +TOTAL : 1.732398 sec + 5,064,058,853 cycles # 2.917 GHz + 11,441,974,869 instructions # 2.26 insn per cycle + 1.736638349 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.925155e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.940309e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.940309e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.440975e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.454360e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.454360e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.078899 sec - 3,977,787,711 cycles # 1.911 GHz - 5,943,488,721 instructions # 1.49 insn per cycle - 2.082985085 seconds time elapsed +TOTAL : 2.214373 sec + 3,981,991,377 cycles # 1.796 GHz + 5,944,372,586 instructions # 1.49 insn per cycle + 2.218672213 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 6ac5000ce8..bc567145ef 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:56:31 +DATE: 2023-10-28_12:47:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.239290e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.259875e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.261623e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.272416e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.297947e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.300035e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.524624 sec - 2,256,102,466 cycles # 3.003 GHz - 3,565,937,124 instructions # 1.58 insn per cycle - 0.809821745 seconds time elapsed +TOTAL : 0.528298 sec + 2,302,052,159 cycles # 3.036 GHz + 3,620,830,785 instructions # 1.57 insn per cycle + 0.815937018 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.793727e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.817115e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.818113e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.791562e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.820307e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.821476e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.259889 sec - 10,730,925,294 cycles # 3.058 GHz - 24,431,623,702 instructions # 2.28 insn per cycle - 3.565849378 seconds time elapsed +TOTAL : 3.269219 sec + 10,870,262,953 cycles # 3.083 GHz + 25,015,120,646 instructions # 2.30 insn per cycle + 3.582561850 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.412471e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.412949e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.412949e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.439633e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.440117e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.440117e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.178028 sec - 114,361,849,248 cycles # 3.076 GHz - 145,560,134,005 instructions # 1.27 insn per cycle - 37.182120025 seconds time elapsed +TOTAL : 36.950462 sec + 114,331,814,225 cycles # 3.094 GHz + 145,554,942,207 instructions # 1.27 insn per cycle + 36.954719062 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:22248) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.195698e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.198225e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.198225e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.203750e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.206253e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.206253e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.140800 sec - 15,162,495,765 cycles # 2.948 GHz - 37,764,610,972 instructions # 2.49 insn per cycle - 5.144813607 seconds time elapsed +TOTAL : 5.128371 sec + 15,156,085,711 cycles # 2.954 GHz + 37,766,724,921 instructions # 2.49 insn per cycle + 5.132696438 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68446) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.961138e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.976889e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.976889e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.538689e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.553791e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.553791e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.069144 sec - 6,001,988,087 cycles # 2.896 GHz - 12,897,757,655 instructions # 2.15 insn per cycle - 2.073134028 seconds time elapsed +TOTAL : 2.185324 sec + 6,179,617,797 cycles # 2.824 GHz + 12,898,959,576 instructions # 2.09 insn per cycle + 2.189901544 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.425265e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.445952e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.445952e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.387903e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.408655e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.408655e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.748961 sec - 5,100,741,339 cycles # 2.911 GHz - 11,448,531,367 instructions # 2.24 insn per cycle - 1.753002861 seconds time elapsed +TOTAL : 1.756333 sec + 5,103,812,803 cycles # 2.900 GHz + 11,448,412,714 instructions # 2.24 insn per cycle + 1.760591620 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.951658e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.967421e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.967421e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.063003e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.078866e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.078866e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.071513 sec - 3,955,166,061 cycles # 1.907 GHz - 5,898,178,662 instructions # 1.49 insn per cycle - 2.075594399 seconds time elapsed +TOTAL : 2.043864 sec + 3,955,410,863 cycles # 1.932 GHz + 5,897,312,655 instructions # 1.49 insn per cycle + 2.048048607 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 7d5250e643..3fbbac30c0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:41:03 +DATE: 2023-10-28_12:16:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.344411e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.393693e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.398639e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.297352e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.358144e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.364157e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.476002 sec - 2,055,020,376 cycles # 2.991 GHz - 3,081,758,808 instructions # 1.50 insn per cycle - 0.745881313 seconds time elapsed +TOTAL : 0.478722 sec + 2,084,997,410 cycles # 3.001 GHz + 3,026,333,377 instructions # 1.45 insn per cycle + 0.781511130 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.554616e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.614004e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.616642e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.497417e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.585779e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.589540e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.707448 sec - 5,919,573,877 cycles # 3.066 GHz - 11,552,481,792 instructions # 1.95 insn per cycle - 1.990304243 seconds time elapsed +TOTAL : 1.726967 sec + 6,020,957,355 cycles # 3.058 GHz + 11,595,600,563 instructions # 1.93 insn per cycle + 2.028330884 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.027055e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.028034e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.028034e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.077603e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.078639e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.078639e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.098867 sec - 25,111,702,681 cycles # 3.100 GHz - 78,142,230,902 instructions # 3.11 insn per cycle - 8.102919065 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.903289 sec + 24,637,646,455 cycles # 3.117 GHz + 78,134,944,640 instructions # 3.17 insn per cycle + 7.909956096 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -98,8 +98,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 OMP= @@ -108,15 +108,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.175810e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.188746e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.188746e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.336898e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.349941e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.349941e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.292926 sec - 6,573,476,191 cycles # 2.863 GHz - 20,176,795,660 instructions # 3.07 insn per cycle - 2.297103514 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.243261 sec + 6,492,799,837 cycles # 2.890 GHz + 20,126,015,053 instructions # 3.10 insn per cycle + 2.255396532 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -124,8 +124,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 OMP= @@ -134,15 +134,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.680111e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.687118e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.687118e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.684888e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.692077e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.692077e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.983720 sec - 2,861,168,699 cycles # 2.899 GHz - 7,112,434,592 instructions # 2.49 insn per cycle - 0.987814280 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) +TOTAL : 0.981200 sec + 2,874,990,715 cycles # 2.917 GHz + 6,992,497,699 instructions # 2.43 insn per cycle + 0.996477722 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -150,8 +150,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 OMP= @@ -160,15 +160,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.901709e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910562e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910562e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.951838e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.960952e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.960952e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.870092 sec - 2,532,384,607 cycles # 2.899 GHz - 6,407,671,698 instructions # 2.53 insn per cycle - 0.874200480 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) +TOTAL : 0.849043 sec + 2,488,916,995 cycles # 2.921 GHz + 6,299,592,317 instructions # 2.53 insn per cycle + 0.863735210 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -176,8 +176,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 OMP= @@ -186,15 +186,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.544511e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.550364e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.550364e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.569339e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.575404e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.575404e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.069933 sec - 2,059,770,627 cycles # 1.919 GHz - 3,321,177,538 instructions # 1.61 insn per cycle - 1.074034173 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) +TOTAL : 1.053473 sec + 2,048,152,009 cycles # 1.937 GHz + 3,269,571,231 instructions # 1.60 insn per cycle + 1.068752344 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -202,8 +202,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 866fb524ce..eec6c5d5ad 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:06:12 +DATE: 2023-10-28_12:57:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.649753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.350178e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.350178e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.648710e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.349296e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.349296e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.465463 sec - 2,047,479,601 cycles # 2.978 GHz - 3,049,363,259 instructions # 1.49 insn per cycle - 0.744895811 seconds time elapsed +TOTAL : 0.466841 sec + 2,045,819,276 cycles # 3.010 GHz + 3,040,046,648 instructions # 1.49 insn per cycle + 0.737733718 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.287015e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.501118e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.501118e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.258231e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.472706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.472706e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.864461 sec - 6,383,949,622 cycles # 3.047 GHz - 13,653,993,577 instructions # 2.14 insn per cycle - 2.154815432 seconds time elapsed +TOTAL : 1.870793 sec + 6,498,753,280 cycles # 3.085 GHz + 12,854,465,282 instructions # 1.98 insn per cycle + 2.162853434 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,15 +95,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.020017e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.021013e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.021013e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.065545e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.066590e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.066590e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.129725 sec - 25,159,036,941 cycles # 3.094 GHz - 78,146,432,404 instructions # 3.11 insn per cycle - 8.133616973 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.950146 sec + 24,648,239,076 cycles # 3.099 GHz + 78,137,109,320 instructions # 3.17 insn per cycle + 7.954221606 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -111,8 +111,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= @@ -122,15 +122,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.180823e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.193936e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.193936e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.517104e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.531205e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.531205e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.294060 sec - 6,581,200,311 cycles # 2.865 GHz - 20,186,134,505 instructions # 3.07 insn per cycle - 2.298229949 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.192162 sec + 6,488,348,501 cycles # 2.956 GHz + 20,134,095,524 instructions # 3.10 insn per cycle + 2.196338550 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -138,8 +138,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= @@ -149,15 +149,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.675906e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.682979e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.682979e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.705707e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.712958e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.712958e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.989013 sec - 2,874,126,906 cycles # 2.896 GHz - 7,122,171,177 instructions # 2.48 insn per cycle - 0.993096654 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) +TOTAL : 0.971759 sec + 2,847,061,553 cycles # 2.920 GHz + 7,001,791,044 instructions # 2.46 insn per cycle + 0.975993638 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -165,8 +165,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= @@ -176,15 +176,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.895474e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.904073e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.904073e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.951285e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.960917e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.960917e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.875592 sec - 2,541,159,330 cycles # 2.891 GHz - 6,417,191,354 instructions # 2.53 insn per cycle - 0.879698021 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) +TOTAL : 0.850872 sec + 2,498,585,399 cycles # 2.925 GHz + 6,308,774,444 instructions # 2.52 insn per cycle + 0.854972211 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -192,8 +192,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= @@ -203,15 +203,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.548734e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.554330e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.554330e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.565587e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.571426e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.571426e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.069331 sec - 2,068,634,398 cycles # 1.928 GHz - 3,331,804,154 instructions # 1.61 insn per cycle - 1.073485896 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) +TOTAL : 1.058377 sec + 2,055,976,932 cycles # 1.936 GHz + 3,279,231,714 instructions # 1.59 insn per cycle + 1.062483891 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -219,8 +219,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index b125b710bd..8ce6fedd4e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:17:07 +DATE: 2023-10-28_13:08:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.332225e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.378563e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.386102e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.330360e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.378634e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.383801e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.457977 sec - 2,007,013,185 cycles # 3.005 GHz - 3,016,655,563 instructions # 1.50 insn per cycle - 0.725068773 seconds time elapsed +TOTAL : 0.457712 sec + 2,028,900,157 cycles # 3.042 GHz + 3,005,011,671 instructions # 1.48 insn per cycle + 0.724345206 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.573671e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.635105e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.637758e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.546691e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.615539e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.618553e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.789355 sec - 6,154,390,786 cycles # 3.059 GHz - 11,779,031,447 instructions # 1.91 insn per cycle - 2.068482222 seconds time elapsed +TOTAL : 1.794327 sec + 6,222,688,386 cycles # 3.079 GHz + 13,066,262,618 instructions # 2.10 insn per cycle + 2.078637061 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.992613e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.993617e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.993617e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.073035e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.074088e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.074088e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.239039 sec - 25,101,211,563 cycles # 3.046 GHz - 78,141,605,294 instructions # 3.11 insn per cycle - 8.242885554 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.920868 sec + 24,634,444,908 cycles # 3.109 GHz + 78,134,162,346 instructions # 3.17 insn per cycle + 7.924730928 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -98,8 +98,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= @@ -108,15 +108,15 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.360647e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.374286e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.374286e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.426264e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.439620e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.439620e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.237472 sec - 6,571,783,960 cycles # 2.933 GHz - 20,176,847,169 instructions # 3.07 insn per cycle - 2.241174830 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.217152 sec + 6,477,828,190 cycles # 2.918 GHz + 20,122,699,568 instructions # 3.11 insn per cycle + 2.221055188 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -124,8 +124,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= @@ -134,15 +134,15 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.681723e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.688538e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.688538e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.699966e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.706900e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.706900e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.984343 sec - 2,863,785,178 cycles # 2.900 GHz - 7,111,595,374 instructions # 2.48 insn per cycle - 0.988141267 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) +TOTAL : 0.973586 sec + 2,840,357,768 cycles # 2.908 GHz + 6,988,961,639 instructions # 2.46 insn per cycle + 0.977359190 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -150,8 +150,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= @@ -160,15 +160,15 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.906951e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.915838e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.915838e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.940736e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.949914e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.949914e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.868571 sec - 2,534,531,591 cycles # 2.907 GHz - 6,404,093,295 instructions # 2.53 insn per cycle - 0.872424795 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) +TOTAL : 0.853581 sec + 2,490,276,301 cycles # 2.906 GHz + 6,295,146,517 instructions # 2.53 insn per cycle + 0.857457556 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -176,8 +176,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= @@ -186,15 +186,15 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.558486e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.564692e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.564692e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.565463e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.571759e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.571759e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.060964 sec - 2,062,134,932 cycles # 1.938 GHz - 3,317,722,223 instructions # 1.61 insn per cycle - 1.064830329 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) +TOTAL : 1.057170 sec + 2,052,021,691 cycles # 1.935 GHz + 3,266,951,787 instructions # 1.59 insn per cycle + 1.061150766 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -202,8 +202,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 0197c733f9..571b572aaa 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:14:03 +DATE: 2023-10-28_13:05:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.335935e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.379594e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.384424e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.349128e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.398453e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.403644e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.458171 sec - 1,997,983,261 cycles # 2.988 GHz - 3,059,794,719 instructions # 1.53 insn per cycle - 0.725811269 seconds time elapsed +TOTAL : 0.459486 sec + 2,027,052,955 cycles # 3.037 GHz + 3,029,950,855 instructions # 1.49 insn per cycle + 0.726440068 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.578157e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.639706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.642441e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.553265e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.622123e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.625219e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.740586 sec - 5,998,214,102 cycles # 3.054 GHz - 12,259,549,434 instructions # 2.04 insn per cycle - 2.021545903 seconds time elapsed +TOTAL : 1.743283 sec + 6,066,573,695 cycles # 3.087 GHz + 12,650,787,477 instructions # 2.09 insn per cycle + 2.025181321 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001178e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002137e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.002137e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.081230e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.082284e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.082284e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.203084 sec - 25,141,172,926 cycles # 3.064 GHz - 78,142,442,354 instructions # 3.11 insn per cycle - 8.206877433 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.888118 sec + 24,646,440,581 cycles # 3.123 GHz + 78,132,566,770 instructions # 3.17 insn per cycle + 7.892033942 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -98,8 +98,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= @@ -108,15 +108,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.194824e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.207542e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.207542e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.477290e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.491849e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.491849e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.287147 sec - 6,571,682,415 cycles # 2.870 GHz - 20,177,851,750 instructions # 3.07 insn per cycle - 2.290998385 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.201176 sec + 6,474,823,242 cycles # 2.937 GHz + 20,124,709,191 instructions # 3.11 insn per cycle + 2.205114496 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -124,8 +124,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= @@ -134,15 +134,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.671381e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.678184e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.678184e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.708720e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.715921e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.715921e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.988852 sec - 2,877,121,035 cycles # 2.900 GHz - 7,112,414,105 instructions # 2.47 insn per cycle - 0.992772923 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) +TOTAL : 0.967357 sec + 2,835,463,985 cycles # 2.921 GHz + 6,991,513,771 instructions # 2.47 insn per cycle + 0.971305594 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -150,8 +150,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= @@ -160,15 +160,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.905382e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.914336e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.914336e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.953668e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.963014e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.963014e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.868290 sec - 2,533,476,931 cycles # 2.907 GHz - 6,407,633,337 instructions # 2.53 insn per cycle - 0.872075865 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) +TOTAL : 0.846896 sec + 2,487,744,451 cycles # 2.926 GHz + 6,298,688,372 instructions # 2.53 insn per cycle + 0.850803162 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -176,8 +176,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= @@ -186,15 +186,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.547508e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.553260e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.553260e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.569559e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.575559e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.575559e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.067544 sec - 2,060,630,355 cycles # 1.925 GHz - 3,320,987,634 instructions # 1.61 insn per cycle - 1.071418753 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) +TOTAL : 1.052912 sec + 2,046,453,586 cycles # 1.938 GHz + 3,268,883,695 instructions # 1.60 insn per cycle + 1.056822517 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -202,8 +202,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 52987bd60d..84420cb925 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_19:11:03 +DATE: 2023-10-28_13:02:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,14 +45,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.805977e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.405107e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.410125e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.735944e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.378490e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.383692e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.460864 sec - 2,006,899,759 cycles # 2.993 GHz - 3,017,362,425 instructions # 1.50 insn per cycle - 0.728187465 seconds time elapsed +TOTAL : 0.461987 sec + 2,019,341,022 cycles # 3.003 GHz + 3,035,678,392 instructions # 1.50 insn per cycle + 0.729326652 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -63,14 +63,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.501502e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.624461e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.627283e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.489474e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.620676e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.623699e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.814750 sec - 6,240,031,804 cycles # 3.062 GHz - 12,062,681,609 instructions # 1.93 insn per cycle - 2.094320858 seconds time elapsed +TOTAL : 1.822380 sec + 6,280,496,809 cycles # 3.067 GHz + 12,717,918,907 instructions # 2.02 insn per cycle + 2.114324946 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -85,15 +85,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.001113e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002085e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.002085e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.044051e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.045069e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.045069e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.203609 sec - 25,118,317,039 cycles # 3.061 GHz - 78,142,981,648 instructions # 3.11 insn per cycle - 8.207429402 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3558) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.031121 sec + 24,802,943,888 cycles # 3.087 GHz + 78,133,794,834 instructions # 3.15 insn per cycle + 8.035042084 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -101,8 +101,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +Avg ME (F77/C++) = 6.6274863266294753E-004 +Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= @@ -111,15 +111,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.323036e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.336228e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.336228e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.497485e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.511473e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.511473e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.246931 sec - 6,579,301,389 cycles # 2.924 GHz - 20,176,586,022 instructions # 3.07 insn per cycle - 2.250705274 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13749) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.194911 sec + 6,476,795,370 cycles # 2.946 GHz + 20,124,137,981 instructions # 3.11 insn per cycle + 2.198870375 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -127,8 +127,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +Avg ME (F77/C++) = 6.6274861460025036E-004 +Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= @@ -137,15 +137,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.674657e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.681528e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.681528e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.695476e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.702451e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.702451e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.986857 sec - 2,862,922,647 cycles # 2.892 GHz - 7,112,389,781 instructions # 2.48 insn per cycle - 0.990752111 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11880) (512y: 0) (512z: 0) +TOTAL : 0.974748 sec + 2,836,609,217 cycles # 2.900 GHz + 6,991,479,798 instructions # 2.46 insn per cycle + 0.978581955 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -153,8 +153,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= @@ -163,15 +163,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.894478e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.903271e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.903271e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.941700e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.951152e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.951152e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.873227 sec - 2,541,312,245 cycles # 2.899 GHz - 6,407,310,369 instructions # 2.52 insn per cycle - 0.877122463 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11552) (512y: 43) (512z: 0) +TOTAL : 0.852448 sec + 2,487,448,718 cycles # 2.907 GHz + 6,298,548,472 instructions # 2.53 insn per cycle + 0.856362339 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -179,8 +179,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +Avg ME (F77/C++) = 6.6271938174574524E-004 +Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= @@ -189,15 +189,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.549285e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.555166e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.555166e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.564061e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.570228e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.570228e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.066246 sec - 2,058,021,558 cycles # 1.924 GHz - 3,321,051,164 instructions # 1.61 insn per cycle - 1.070183678 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2408) (512y: 46) (512z:10312) +TOTAL : 1.056307 sec + 2,046,735,304 cycles # 1.932 GHz + 3,268,520,300 instructions # 1.60 insn per cycle + 1.060189386 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -205,8 +205,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 +Avg ME (F77/C++) = 6.6271952779373838E-004 +Relative difference = 4.193891735414155e-08 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index e3d102e7b5..787629448b 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:41:32 +DATE: 2023-10-28_12:16:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.347885e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.397852e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.402767e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.363659e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.425249e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.431303e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.473915 sec - 2,057,636,111 cycles # 3.008 GHz - 3,034,789,542 instructions # 1.47 insn per cycle - 0.743124266 seconds time elapsed +TOTAL : 0.476123 sec + 2,092,062,584 cycles # 3.022 GHz + 3,128,707,671 instructions # 1.50 insn per cycle + 0.781363036 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.510288e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.569100e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.571863e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.479223e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.567586e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.570888e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.706491 sec - 5,801,343,951 cycles # 3.003 GHz - 11,478,639,093 instructions # 1.98 insn per cycle - 1.991446406 seconds time elapsed +TOTAL : 1.722233 sec + 6,009,678,227 cycles # 3.078 GHz + 11,794,267,836 instructions # 1.96 insn per cycle + 2.011269808 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.032219e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033197e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.033197e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.083066e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.084131e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.084131e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.077935 sec - 25,002,620,407 cycles # 3.094 GHz - 77,880,023,337 instructions # 3.11 insn per cycle - 8.081833653 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3061) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.881423 sec + 24,584,058,384 cycles # 3.118 GHz + 77,859,604,324 instructions # 3.17 insn per cycle + 7.887656228 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3113) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -98,8 +98,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866250177339E-004 -Relative difference = 5.65798569465384e-08 +Avg ME (F77/C++) = 6.6274866268634797E-004 +Relative difference = 5.630135835748959e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 1 OMP= @@ -108,15 +108,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.437992e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.452045e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.452045e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.586799e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.601341e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.601341e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.212380 sec - 6,525,641,551 cycles # 2.945 GHz - 20,144,168,186 instructions # 3.09 insn per cycle - 2.216462164 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13439) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.169576 sec + 6,419,007,624 cycles # 2.953 GHz + 20,090,056,448 instructions # 3.13 insn per cycle + 2.180130900 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -124,8 +124,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861448331612E-004 -Relative difference = 2.1853408865157068e-08 +Avg ME (F77/C++) = 6.6274861465384638E-004 +Relative difference = 2.211071647257023e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 1 OMP= @@ -134,15 +134,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.631112e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.637585e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.637585e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.648576e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.655314e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.655314e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.012887 sec - 2,950,530,206 cycles # 2.903 GHz - 7,252,358,943 instructions # 2.46 insn per cycle - 1.016940562 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12263) (512y: 0) (512z: 0) +TOTAL : 1.002656 sec + 2,907,339,890 cycles # 2.887 GHz + 7,134,589,491 instructions # 2.45 insn per cycle + 1.018829694 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -150,8 +150,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271939668088170E-004 -Relative difference = 5.008331292535666e-09 +Avg ME (F77/C++) = 6.6271939668077068E-004 +Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe -p 64 256 1 OMP= @@ -160,15 +160,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.851739e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.860081e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.860081e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.852798e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.860765e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.860765e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.893117 sec - 2,605,469,056 cycles # 2.906 GHz - 6,549,528,920 instructions # 2.51 insn per cycle - 0.897080094 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11948) (512y: 26) (512z: 0) +TOTAL : 0.894143 sec + 2,595,831,103 cycles # 2.894 GHz + 6,442,852,308 instructions # 2.48 insn per cycle + 0.909262679 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -176,8 +176,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271939668088170E-004 -Relative difference = 5.008331292535666e-09 +Avg ME (F77/C++) = 6.6271939668077068E-004 +Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe -p 64 256 1 OMP= @@ -186,15 +186,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.502391e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.508119e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.508119e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.497098e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.502652e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.502652e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.099172 sec - 2,123,741,528 cycles # 1.926 GHz - 3,480,482,498 instructions # 1.64 insn per cycle - 1.103291837 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2903) (512y: 22) (512z:10276) +TOTAL : 1.104659 sec + 2,130,443,648 cycles # 1.924 GHz + 3,431,736,168 instructions # 1.61 insn per cycle + 1.116162652 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2912) (512y: 22) (512z: 9647) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -202,8 +202,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952032316561E-004 -Relative difference = 3.066631594207157e-08 +Avg ME (F77/C++) = 6.6271952032322112E-004 +Relative difference = 3.066639970473621e-08 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 31738cc5a1..e834e4352b 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:57:39 +DATE: 2023-10-28_12:48:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.596965e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.633834e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.637880e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.611186e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.652246e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.656683e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.481755 sec - 2,117,394,113 cycles # 2.997 GHz - 3,211,903,752 instructions # 1.52 insn per cycle - 0.763834850 seconds time elapsed +TOTAL : 0.483998 sec + 2,133,361,842 cycles # 3.024 GHz + 3,230,847,730 instructions # 1.51 insn per cycle + 0.765662726 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.702969e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.752015e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.754390e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.742805e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.802168e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.804740e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.845325 sec - 6,364,196,440 cycles # 3.044 GHz - 12,697,584,410 instructions # 2.00 insn per cycle - 2.147798753 seconds time elapsed +TOTAL : 1.850637 sec + 6,419,942,603 cycles # 3.075 GHz + 13,456,993,119 instructions # 2.10 insn per cycle + 2.144581389 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.860218e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.861043e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.861043e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.894043e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.894913e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.894913e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.993515 sec - 85,967,133,243 cycles # 3.071 GHz - 135,563,627,438 instructions # 1.58 insn per cycle - 27.997422911 seconds time elapsed +TOTAL : 27.832817 sec + 86,243,642,188 cycles # 3.098 GHz + 135,564,087,009 instructions # 1.57 insn per cycle + 27.836819083 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:15486) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.196137e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.208997e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.208997e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.222408e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.235825e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.235825e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.287867 sec - 6,773,769,099 cycles # 2.957 GHz - 19,387,600,160 instructions # 2.86 insn per cycle - 2.291838045 seconds time elapsed +TOTAL : 2.280065 sec + 6,777,455,200 cycles # 2.969 GHz + 19,387,279,978 instructions # 2.86 insn per cycle + 2.284257922 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69680) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.513484e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.519092e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.519092e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.519389e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.525011e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.525011e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.091418 sec - 3,173,929,820 cycles # 2.900 GHz - 6,808,660,445 instructions # 2.15 insn per cycle - 1.095213929 seconds time elapsed +TOTAL : 1.087364 sec + 3,177,183,668 cycles # 2.913 GHz + 6,808,689,692 instructions # 2.14 insn per cycle + 1.091503470 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.813250e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.821399e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.821399e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.822546e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.830764e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.830764e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.912193 sec - 2,648,785,634 cycles # 2.893 GHz - 5,986,998,268 instructions # 2.26 insn per cycle - 0.916244855 seconds time elapsed +TOTAL : 0.907774 sec + 2,649,067,915 cycles # 2.907 GHz + 5,986,896,271 instructions # 2.26 insn per cycle + 0.911854373 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.539260e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.545021e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.545021e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.530439e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.536016e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.536016e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.073101 sec - 2,071,594,759 cycles # 1.925 GHz - 3,501,390,779 instructions # 1.69 insn per cycle - 1.077005935 seconds time elapsed +TOTAL : 1.080207 sec + 2,073,980,394 cycles # 1.914 GHz + 3,501,334,924 instructions # 1.69 insn per cycle + 1.084229483 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5198) (512y: 3) (512z:44822) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index cddff811bf..20423fd7e0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:58:30 +DATE: 2023-10-28_12:49:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.566704e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.601747e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.605700e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.565977e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.605591e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.609830e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.479762 sec - 2,129,975,460 cycles # 3.025 GHz - 3,262,724,571 instructions # 1.53 insn per cycle - 0.761351418 seconds time elapsed +TOTAL : 0.483247 sec + 2,122,376,478 cycles # 3.016 GHz + 3,184,647,472 instructions # 1.50 insn per cycle + 0.763868633 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.654164e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.702666e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.704765e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.641371e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.700550e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.703083e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.853079 sec - 6,372,411,967 cycles # 3.059 GHz - 13,261,029,776 instructions # 2.08 insn per cycle - 2.143146478 seconds time elapsed +TOTAL : 1.858297 sec + 6,447,116,808 cycles # 3.079 GHz + 12,363,047,673 instructions # 1.92 insn per cycle + 2.150450600 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.859455e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.860302e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.860302e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.905776e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.906669e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.906669e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.996994 sec - 86,063,307,798 cycles # 3.074 GHz - 135,905,248,930 instructions # 1.58 insn per cycle - 28.000969025 seconds time elapsed +TOTAL : 27.777840 sec + 86,048,956,183 cycles # 3.097 GHz + 135,905,128,444 instructions # 1.58 insn per cycle + 27.781792264 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:15910) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.111595e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.124605e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.124605e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.167806e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.180707e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.180707e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.313738 sec - 6,851,236,852 cycles # 2.957 GHz - 19,439,512,273 instructions # 2.84 insn per cycle - 2.317708744 seconds time elapsed +TOTAL : 2.295484 sec + 6,842,998,319 cycles # 2.977 GHz + 19,439,245,864 instructions # 2.84 insn per cycle + 2.299471036 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69722) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.540866e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.546584e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.546584e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.556585e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.562569e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.562569e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.071791 sec - 3,105,453,036 cycles # 2.888 GHz - 6,719,669,630 instructions # 2.16 insn per cycle - 1.075697475 seconds time elapsed +TOTAL : 1.061484 sec + 3,108,451,526 cycles # 2.919 GHz + 6,720,110,112 instructions # 2.16 insn per cycle + 1.065616350 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.816139e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.824278e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.824278e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.851774e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.860510e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.860510e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.910723 sec - 2,625,346,963 cycles # 2.872 GHz - 5,970,291,755 instructions # 2.27 insn per cycle - 0.914693861 seconds time elapsed +TOTAL : 0.893334 sec + 2,622,654,556 cycles # 2.924 GHz + 5,970,160,518 instructions # 2.28 insn per cycle + 0.897509853 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.541127e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.546712e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.546712e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.536258e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.541992e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.541992e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.071854 sec - 2,074,441,089 cycles # 1.930 GHz - 3,494,899,079 instructions # 1.68 insn per cycle - 1.075757314 seconds time elapsed +TOTAL : 1.075819 sec + 2,079,698,802 cycles # 1.927 GHz + 3,494,793,411 instructions # 1.68 insn per cycle + 1.079910024 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4162) (512y: 4) (512z:44465) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 7ad6f63659..fc3929dd7c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:42:00 +DATE: 2023-10-28_12:17:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.490294e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.513614e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.515507e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.477554e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.512240e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.514677e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.518671 sec - 2,226,766,382 cycles # 2.986 GHz - 3,540,179,326 instructions # 1.59 insn per cycle - 0.807219432 seconds time elapsed +TOTAL : 0.520333 sec + 2,274,021,507 cycles # 3.029 GHz + 3,581,381,947 instructions # 1.57 insn per cycle + 0.824092689 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.120867e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.148001e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.149117e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.124375e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.158377e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.159762e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.019190 sec - 10,060,652,484 cycles # 3.078 GHz - 22,177,348,496 instructions # 2.20 insn per cycle - 3.325766752 seconds time elapsed +TOTAL : 3.045972 sec + 10,220,318,610 cycles # 3.084 GHz + 23,185,856,250 instructions # 2.27 insn per cycle + 3.370823025 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.954175e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.955102e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.955102e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.960978e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.961962e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.961962e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.401551 sec - 26,112,965,520 cycles # 3.107 GHz - 79,187,055,919 instructions # 3.03 insn per cycle - 8.405522832 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4746) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.372619 sec + 25,927,701,919 cycles # 3.096 GHz + 79,445,735,322 instructions # 3.06 insn per cycle + 8.378788971 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4857) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.704056e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.707340e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.707340e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.770608e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.774090e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.774090e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.436595 sec - 12,893,512,565 cycles # 2.905 GHz - 38,578,382,892 instructions # 2.99 insn per cycle - 4.440842197 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13136) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.359859 sec + 12,705,941,284 cycles # 2.912 GHz + 38,554,881,381 instructions # 3.03 insn per cycle + 4.371809091 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13161) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.529594e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.548137e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.548137e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.729702e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.747261e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.747261e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.931885 sec - 5,592,758,933 cycles # 2.891 GHz - 13,704,166,637 instructions # 2.45 insn per cycle - 1.936090809 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11245) (512y: 0) (512z: 0) +TOTAL : 1.887926 sec + 5,515,713,882 cycles # 2.915 GHz + 13,486,487,118 instructions # 2.45 insn per cycle + 1.900317335 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -160,15 +160,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.692246e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.714062e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.714062e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.908364e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.931425e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.931425e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.701537 sec - 4,935,885,889 cycles # 2.895 GHz - 12,346,516,315 instructions # 2.50 insn per cycle - 1.705790521 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10897) (512y: 79) (512z: 0) +TOTAL : 1.665049 sec + 4,873,545,679 cycles # 2.921 GHz + 12,139,621,615 instructions # 2.49 insn per cycle + 1.681410579 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.612609e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.626629e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.626629e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.696915e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.711299e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.711299e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.163571 sec - 4,153,048,865 cycles # 1.917 GHz - 6,440,968,926 instructions # 1.55 insn per cycle - 2.167665946 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1803) (512y: 93) (512z:10092) +TOTAL : 2.140292 sec + 4,143,085,794 cycles # 1.932 GHz + 6,339,665,426 instructions # 1.53 insn per cycle + 2.150706545 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1802) (512y: 93) (512z: 9358) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index f056a45974..b6547b5838 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-25_18:42:36 +DATE: 2023-10-28_12:17:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.481563e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.505145e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.507791e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.464615e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.498511e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.500865e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.521573 sec - 2,153,426,616 cycles # 2.862 GHz - 3,368,297,517 instructions # 1.56 insn per cycle - 0.812765365 seconds time elapsed +TOTAL : 0.519765 sec + 2,251,399,245 cycles # 2.999 GHz + 3,530,654,299 instructions # 1.57 insn per cycle + 0.823876763 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.140316e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.167722e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.168848e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.123316e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.157417e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.158825e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.005546 sec - 10,001,668,357 cycles # 3.073 GHz - 22,545,107,075 instructions # 2.25 insn per cycle - 3.311305896 seconds time elapsed +TOTAL : 3.029825 sec + 10,142,198,468 cycles # 3.086 GHz + 22,026,892,679 instructions # 2.17 insn per cycle + 3.342406718 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.946429e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.947329e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.947329e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.959558e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.960509e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.960509e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.435831 sec - 26,126,854,101 cycles # 3.097 GHz - 79,204,576,073 instructions # 3.03 insn per cycle - 8.439924281 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4401) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.380125 sec + 25,929,437,882 cycles # 3.094 GHz + 79,456,770,369 instructions # 3.06 insn per cycle + 8.386665351 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4504) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.694235e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.697554e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.697554e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.758484e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.761785e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.761785e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.448208 sec - 12,894,092,255 cycles # 2.897 GHz - 38,538,252,439 instructions # 2.99 insn per cycle - 4.452458550 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12903) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.373624 sec + 12,641,537,268 cycles # 2.889 GHz + 38,525,722,075 instructions # 3.05 insn per cycle + 4.385176363 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12928) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -134,14 +134,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.136495e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.152364e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.152364e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.515683e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.533176e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.533176e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.024697 sec - 5,646,666,731 cycles # 2.789 GHz - 13,825,634,230 instructions # 2.45 insn per cycle - 2.029023847 seconds time elapsed +TOTAL : 1.935232 sec + 5,559,372,538 cycles # 2.867 GHz + 13,612,500,451 instructions # 2.45 insn per cycle + 1.948093977 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe @@ -160,15 +160,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.556941e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.579305e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.579305e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.732819e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.754541e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.754541e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.725082 sec - 4,994,411,865 cycles # 2.889 GHz - 12,477,409,386 instructions # 2.50 insn per cycle - 1.729413379 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10888) (512y: 239) (512z: 0) +TOTAL : 1.694082 sec + 4,916,495,574 cycles # 2.895 GHz + 12,276,847,031 instructions # 2.50 insn per cycle + 1.703505035 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.605322e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.619068e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.619068e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.657791e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.671798e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.671798e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.165836 sec - 4,156,250,470 cycles # 1.916 GHz - 6,542,526,880 instructions # 1.57 insn per cycle - 2.169952357 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1628) (512y: 191) (512z:10036) +TOTAL : 2.151504 sec + 4,144,116,107 cycles # 1.923 GHz + 6,445,788,205 instructions # 1.56 insn per cycle + 2.162091115 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1627) (512y: 191) (512z: 9356) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 55b25786bd..c6205ff247 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:44:51 +DATE: 2023-10-28_12:19:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.070749e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.071143e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.071250e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.071870e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.072287e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.072394e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.417941 sec - 8,377,415,337 cycles # 3.062 GHz - 18,838,612,351 instructions # 2.25 insn per cycle - 2.794089225 seconds time elapsed +TOTAL : 2.452254 sec + 8,420,557,360 cycles # 3.080 GHz + 18,911,118,625 instructions # 2.25 insn per cycle + 2.866211248 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.235176e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.237005e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.237223e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.214887e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.217088e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.217267e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.993841 sec - 13,055,750,655 cycles # 3.026 GHz - 31,160,662,070 instructions # 2.39 insn per cycle - 4.373705613 seconds time elapsed +TOTAL : 3.992517 sec + 13,332,757,802 cycles # 3.088 GHz + 28,702,527,926 instructions # 2.15 insn per cycle + 4.373029872 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.897548e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.897775e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.897775e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.576463e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.576710e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.576710e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.705050 sec - 19,396,394,057 cycles # 2.892 GHz - 54,051,876,234 instructions # 2.79 insn per cycle - 6.708932383 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32354) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.160763 sec + 18,791,392,134 cycles # 3.054 GHz + 53,919,306,390 instructions # 2.87 insn per cycle + 6.167140362 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.653870e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.653969e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.653969e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.681047e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.681139e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.681139e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.197917 sec - 9,907,124,994 cycles # 3.095 GHz - 27,081,765,597 instructions # 2.73 insn per cycle - 3.202038670 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96405) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.151993 sec + 9,791,713,411 cycles # 3.111 GHz + 27,094,036,390 instructions # 2.77 insn per cycle + 3.167590114 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.542258e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.542698e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.542698e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.618906e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.619340e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.619340e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.497561 sec - 4,341,680,359 cycles # 2.893 GHz - 9,666,416,740 instructions # 2.23 insn per cycle - 1.501545706 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84384) (512y: 0) (512z: 0) +TOTAL : 1.465759 sec + 4,246,522,704 cycles # 2.894 GHz + 9,563,071,696 instructions # 2.25 insn per cycle + 1.475977407 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -160,15 +160,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.866674e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.867185e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.867185e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.853446e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.853992e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.853992e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.370361 sec - 3,840,509,187 cycles # 2.796 GHz - 8,617,030,376 instructions # 2.24 insn per cycle - 1.374450501 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84025) (512y: 89) (512z: 0) +TOTAL : 1.375453 sec + 3,787,170,163 cycles # 2.751 GHz + 8,487,038,074 instructions # 2.24 insn per cycle + 1.387462037 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.733060e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.733603e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.733603e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.787651e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.788212e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.788212e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.420204 sec - 2,707,945,792 cycles # 1.903 GHz - 4,335,943,514 instructions # 1.60 insn per cycle - 1.424239943 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2300) (512y: 103) (512z:83067) +TOTAL : 1.402148 sec + 2,691,943,688 cycles # 1.919 GHz + 4,274,166,135 instructions # 1.59 insn per cycle + 1.414716458 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 94a23e2f12..8107c36efe 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_19:06:41 +DATE: 2023-10-28_12:57:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.066351e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.067339e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.067339e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.066347e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.067227e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.067227e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.354612 sec - 8,162,423,266 cycles # 3.056 GHz - 18,099,441,211 instructions # 2.22 insn per cycle - 2.730850635 seconds time elapsed +TOTAL : 2.360600 sec + 8,279,327,450 cycles # 3.086 GHz + 16,492,477,688 instructions # 1.99 insn per cycle + 2.742127313 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.246162e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.277852e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.277852e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.226166e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.258649e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.258649e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.971347 sec - 13,204,436,201 cycles # 3.078 GHz - 30,282,503,163 instructions # 2.29 insn per cycle - 4.348676377 seconds time elapsed +TOTAL : 3.971227 sec + 13,317,272,497 cycles # 3.104 GHz + 28,133,706,677 instructions # 2.11 insn per cycle + 4.349584526 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,15 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.244919e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.245139e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.245139e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.644722e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.644991e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.644991e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.408828 sec - 19,448,715,885 cycles # 3.033 GHz - 54,050,853,106 instructions # 2.78 insn per cycle - 6.412664226 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32354) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.112405 sec + 18,789,653,851 cycles # 3.073 GHz + 53,915,458,578 instructions # 2.87 insn per cycle + 6.116157623 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -122,15 +122,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.650316e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.650406e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.650406e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.677688e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.677784e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.677784e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.204606 sec - 9,890,944,577 cycles # 3.084 GHz - 27,082,213,615 instructions # 2.74 insn per cycle - 3.208413447 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96405) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.152699 sec + 9,805,914,819 cycles # 3.107 GHz + 27,093,723,279 instructions # 2.76 insn per cycle + 3.156638918 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -149,15 +149,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.546707e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.547140e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.547140e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.603040e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.603470e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.603470e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.493585 sec - 4,325,320,238 cycles # 2.890 GHz - 9,667,464,688 instructions # 2.24 insn per cycle - 1.497469046 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84384) (512y: 0) (512z: 0) +TOTAL : 1.470078 sec + 4,266,632,570 cycles # 2.896 GHz + 9,562,227,058 instructions # 2.24 insn per cycle + 1.474010313 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -176,15 +176,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.037834e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.038367e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.038367e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.169608e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.170175e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.170175e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.313129 sec - 3,813,971,496 cycles # 2.897 GHz - 8,617,412,652 instructions # 2.26 insn per cycle - 1.316983127 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84025) (512y: 89) (512z: 0) +TOTAL : 1.271323 sec + 3,724,193,335 cycles # 2.922 GHz + 8,486,137,795 instructions # 2.28 insn per cycle + 1.275148729 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -203,15 +203,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.743342e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.743918e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.743918e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.791970e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.792558e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.792558e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.415154 sec - 2,707,174,442 cycles # 1.909 GHz - 4,336,832,605 instructions # 1.60 insn per cycle - 1.419043941 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2300) (512y: 103) (512z:83067) +TOTAL : 1.397787 sec + 2,697,774,472 cycles # 1.926 GHz + 4,274,022,064 instructions # 1.58 insn per cycle + 1.401674381 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index e3241a7638..71518cb1d4 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:45:54 +DATE: 2023-10-28_12:20:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.058972e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.059386e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.059482e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.068790e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.069191e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.069278e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.421465 sec - 8,436,981,615 cycles # 3.067 GHz - 19,028,712,837 instructions # 2.26 insn per cycle - 2.810095670 seconds time elapsed +TOTAL : 2.445436 sec + 8,480,438,455 cycles # 3.100 GHz + 18,702,623,557 instructions # 2.21 insn per cycle + 2.842114287 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.255956e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.257794e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.257987e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.244121e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.246349e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.246537e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.982903 sec - 13,219,371,069 cycles # 3.063 GHz - 29,416,443,528 instructions # 2.23 insn per cycle - 4.375059399 seconds time elapsed +TOTAL : 3.982941 sec + 13,359,085,274 cycles # 3.097 GHz + 31,355,023,643 instructions # 2.35 insn per cycle + 4.372267861 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.015099e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.015315e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.015315e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.736509e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.736803e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.736803e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.593815 sec - 19,095,708,850 cycles # 2.895 GHz - 54,047,292,212 instructions # 2.83 insn per cycle - 6.597605539 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:31965) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.052404 sec + 18,740,603,980 cycles # 3.097 GHz + 53,924,964,923 instructions # 2.88 insn per cycle + 6.058366205 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.634946e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.635033e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.635033e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.673532e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.673621e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.673621e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.235048 sec - 10,011,189,889 cycles # 3.092 GHz - 27,077,379,591 instructions # 2.70 insn per cycle - 3.239059369 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96257) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.163162 sec + 9,814,413,597 cycles # 3.103 GHz + 27,091,104,327 instructions # 2.76 insn per cycle + 3.176882467 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.540865e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.541284e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.541284e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.627999e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.628425e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.628425e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.495827 sec - 4,323,882,817 cycles # 2.884 GHz - 9,677,765,192 instructions # 2.24 insn per cycle - 1.499825664 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84456) (512y: 0) (512z: 0) +TOTAL : 1.460351 sec + 4,255,793,582 cycles # 2.906 GHz + 9,562,618,143 instructions # 2.25 insn per cycle + 1.470731981 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -160,15 +160,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.986806e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.987336e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.987336e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.164521e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.165096e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.165096e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.330112 sec - 3,818,292,084 cycles # 2.864 GHz - 8,626,392,875 instructions # 2.26 insn per cycle - 1.334108022 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83903) (512y: 239) (512z: 0) +TOTAL : 1.274408 sec + 3,702,081,595 cycles # 2.900 GHz + 8,486,823,882 instructions # 2.29 insn per cycle + 1.284572895 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.736329e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.736878e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.736878e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.775460e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.776104e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.776104e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.419053 sec - 2,712,716,906 cycles # 1.907 GHz - 4,344,880,705 instructions # 1.60 insn per cycle - 1.423142145 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2177) (512y: 185) (512z:83030) +TOTAL : 1.408391 sec + 2,699,143,093 cycles # 1.918 GHz + 4,277,591,937 instructions # 1.58 insn per cycle + 1.420662603 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 6327c32a36..9dc310742d 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:46:57 +DATE: 2023-10-28_12:21:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.757288e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.758127e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.758502e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.766191e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.767057e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.767366e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.655983 sec - 5,859,016,098 cycles # 3.037 GHz - 12,599,305,189 instructions # 2.15 insn per cycle - 1.988634806 seconds time elapsed +TOTAL : 1.692928 sec + 5,925,290,822 cycles # 3.058 GHz + 11,860,098,541 instructions # 2.00 insn per cycle + 2.048557571 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.346728e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.347386e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.347472e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.317262e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.318069e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.318159e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.915477 sec - 6,698,649,731 cycles # 3.061 GHz - 13,457,604,803 instructions # 2.01 insn per cycle - 2.247936467 seconds time elapsed +TOTAL : 1.922569 sec + 6,794,142,356 cycles # 3.085 GHz + 14,404,197,197 instructions # 2.12 insn per cycle + 2.259188183 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.909786e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.910066e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.910066e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.093620e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.093900e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.093900e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.942364 sec - 18,295,731,836 cycles # 3.078 GHz - 53,640,525,145 instructions # 2.93 insn per cycle - 5.946250751 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20286) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.812755 sec + 17,848,464,955 cycles # 3.071 GHz + 53,589,690,094 instructions # 3.00 insn per cycle + 5.819102168 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -98,8 +98,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087551509E-003 -Relative difference = 2.119780432912131e-08 +Avg ME (F77/C++) = 9.8479612087541066E-003 +Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 OMP= @@ -108,15 +108,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.560034e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.560473e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.560473e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.603564e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.604045e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.604045e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.488735 sec - 4,616,421,294 cycles # 3.094 GHz - 13,762,957,080 instructions # 2.98 insn per cycle - 1.492690614 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96921) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.474993 sec + 4,564,620,966 cycles # 3.098 GHz + 13,764,219,630 instructions # 3.02 insn per cycle + 1.487863434 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -124,8 +124,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896367235E-003 -Relative difference = 3.1515505172940424e-08 +Avg ME (F77/C++) = 9.8479546896527003E-003 +Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 OMP= @@ -134,15 +134,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.154668e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.156604e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.156604e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.193712e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.195612e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.195612e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.743016 sec - 2,158,936,332 cycles # 2.892 GHz - 4,868,873,872 instructions # 2.26 insn per cycle - 0.746953594 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84898) (512y: 0) (512z: 0) +TOTAL : 0.740890 sec + 2,143,163,497 cycles # 2.884 GHz + 4,818,517,443 instructions # 2.25 insn per cycle + 0.752590497 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -150,8 +150,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 OMP= @@ -160,15 +160,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.993469e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.995570e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.995570e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.737224e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.739389e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.739389e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.665634 sec - 1,930,674,595 cycles # 2.886 GHz - 4,341,032,805 instructions # 2.25 insn per cycle - 0.669411803 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84581) (512y: 44) (512z: 0) +TOTAL : 0.690292 sec + 1,933,646,307 cycles # 2.799 GHz + 4,275,590,953 instructions # 2.21 insn per cycle + 0.703133859 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -176,8 +176,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 OMP= @@ -186,15 +186,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.422100e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.424326e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.424326e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.583424e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.585692e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.585692e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.716875 sec - 1,362,810,793 cycles # 1.892 GHz - 2,191,758,925 instructions # 1.61 insn per cycle - 0.720813478 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2896) (512y: 47) (512z:83271) +TOTAL : 0.704935 sec + 1,355,788,663 cycles # 1.924 GHz + 2,160,286,717 instructions # 1.59 insn per cycle + 0.718020553 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -202,8 +202,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982676284E-003 -Relative difference = 2.004124217057488e-08 +Avg ME (F77/C++) = 9.8929811982958280E-003 +Relative difference = 2.0044092642523172e-08 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 3a8d1c9eac..b2fb322b17 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_19:07:44 +DATE: 2023-10-28_12:58:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.793927e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.795607e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.795607e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.797057e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.798838e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.798838e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.593125 sec - 5,711,497,040 cycles # 3.065 GHz - 11,071,875,711 instructions # 1.94 insn per cycle - 1.922969062 seconds time elapsed +TOTAL : 1.588978 sec + 5,768,872,372 cycles # 3.086 GHz + 12,418,111,463 instructions # 2.15 insn per cycle + 1.925967720 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.332193e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.344979e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.344979e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.292423e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.304760e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.304760e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.859000 sec - 6,562,180,386 cycles # 3.077 GHz - 14,027,341,556 instructions # 2.14 insn per cycle - 2.188744808 seconds time elapsed +TOTAL : 1.888734 sec + 6,669,230,038 cycles # 3.083 GHz + 13,356,652,627 instructions # 2.00 insn per cycle + 2.222852759 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,15 +95,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.934162e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.934430e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.934430e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.069650e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.069942e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.069942e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.914443 sec - 18,289,747,509 cycles # 3.091 GHz - 53,640,880,499 instructions # 2.93 insn per cycle - 5.918348520 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20286) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.826523 sec + 17,892,365,561 cycles # 3.071 GHz + 53,591,944,867 instructions # 3.00 insn per cycle + 5.830371536 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -111,8 +111,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087551509E-003 -Relative difference = 2.119780432912131e-08 +Avg ME (F77/C++) = 9.8479612087541066E-003 +Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= @@ -122,15 +122,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.554596e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.555024e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.555024e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.615160e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.615596e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.615596e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.490597 sec - 4,617,566,850 cycles # 3.091 GHz - 13,763,927,839 instructions # 2.98 insn per cycle - 1.494414225 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96921) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.465426 sec + 4,558,917,635 cycles # 3.104 GHz + 13,763,183,900 instructions # 3.02 insn per cycle + 1.469300644 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -138,8 +138,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896367235E-003 -Relative difference = 3.1515505172940424e-08 +Avg ME (F77/C++) = 9.8479546896527003E-003 +Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= @@ -149,15 +149,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.178518e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.180233e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.180233e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.255421e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.257318e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.257318e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.740375 sec - 2,151,024,422 cycles # 2.893 GHz - 4,869,911,860 instructions # 2.26 insn per cycle - 0.744208061 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84898) (512y: 0) (512z: 0) +TOTAL : 0.732719 sec + 2,140,098,411 cycles # 2.908 GHz + 4,817,587,358 instructions # 2.25 insn per cycle + 0.736572783 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -165,8 +165,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= @@ -176,15 +176,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.974786e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.976961e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.976961e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.330916e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.333248e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.333248e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.667174 sec - 1,931,553,706 cycles # 2.882 GHz - 4,342,018,470 instructions # 2.25 insn per cycle - 0.670962833 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84581) (512y: 44) (512z: 0) +TOTAL : 0.639274 sec + 1,869,539,608 cycles # 2.909 GHz + 4,274,774,583 instructions # 2.29 insn per cycle + 0.643187647 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -192,8 +192,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 +Avg ME (F77/C++) = 9.8929728161070551E-003 +Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= @@ -203,15 +203,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.427390e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.429642e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.429642e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.637115e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.639420e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.639420e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.716682 sec - 1,362,755,127 cycles # 1.894 GHz - 2,192,432,791 instructions # 1.61 insn per cycle - 0.720592858 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2896) (512y: 47) (512z:83271) +TOTAL : 0.697331 sec + 1,353,326,359 cycles # 1.932 GHz + 2,159,426,349 instructions # 1.60 insn per cycle + 0.701134241 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -219,8 +219,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982676284E-003 -Relative difference = 2.004124217057488e-08 +Avg ME (F77/C++) = 9.8929811982958280E-003 +Relative difference = 2.0044092642523172e-08 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index ea39ad8994..6397113aec 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:47:44 +DATE: 2023-10-28_12:22:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.770785e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.771818e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.772102e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.773318e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.774160e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.774427e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.644101 sec - 5,902,851,310 cycles # 3.071 GHz - 12,347,610,066 instructions # 2.09 insn per cycle - 1.979279139 seconds time elapsed +TOTAL : 1.691164 sec + 5,903,886,418 cycles # 3.060 GHz + 12,160,870,690 instructions # 2.06 insn per cycle + 2.042006435 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.344670e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.345330e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.345408e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.324828e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.325611e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.325701e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.896839 sec - 6,669,880,941 cycles # 3.074 GHz - 13,819,367,336 instructions # 2.07 insn per cycle - 2.228526429 seconds time elapsed +TOTAL : 1.915037 sec + 6,720,144,465 cycles # 3.071 GHz + 13,982,732,338 instructions # 2.08 insn per cycle + 2.247579041 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.942946e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.943229e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.943229e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.040248e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.040519e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.040519e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.908590 sec - 18,230,304,521 cycles # 3.084 GHz - 53,620,524,232 instructions # 2.94 insn per cycle - 5.912442239 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20241) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.845855 sec + 17,904,729,960 cycles # 3.064 GHz + 53,581,464,420 instructions # 2.99 insn per cycle + 5.852779763 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20206) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -98,8 +98,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087572898E-003 -Relative difference = 2.1198021522715588e-08 +Avg ME (F77/C++) = 9.8479612087582491E-003 +Relative difference = 2.1198118933954545e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe -p 1 256 2 OMP= @@ -108,15 +108,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.576079e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.576533e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.576533e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.592707e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.593153e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.593153e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.481262 sec - 4,588,697,172 cycles # 3.091 GHz - 13,755,977,699 instructions # 3.00 insn per cycle - 1.485242600 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96593) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.477239 sec + 4,555,009,734 cycles # 3.081 GHz + 13,757,278,633 instructions # 3.02 insn per cycle + 1.489285896 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -124,8 +124,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896065809E-003 -Relative difference = 3.151856596628469e-08 +Avg ME (F77/C++) = 9.8479546896225560E-003 +Relative difference = 3.151694379513441e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe -p 1 256 2 OMP= @@ -134,15 +134,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.020825e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.022661e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.022661e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.103208e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.104897e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.104897e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.756593 sec - 2,190,032,975 cycles # 2.882 GHz - 4,877,215,136 instructions # 2.23 insn per cycle - 0.760480627 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85321) (512y: 0) (512z: 0) +TOTAL : 0.750606 sec + 2,150,954,766 cycles # 2.860 GHz + 4,820,576,058 instructions # 2.24 insn per cycle + 0.762790911 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -150,8 +150,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091923E-003 -Relative difference = 1.85880227405429e-08 +Avg ME (F77/C++) = 9.8929728161070967E-003 +Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe -p 1 256 2 OMP= @@ -160,15 +160,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.993268e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.995423e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.995423e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.251964e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.254359e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.254359e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.665169 sec - 1,931,098,294 cycles # 2.889 GHz - 4,348,628,190 instructions # 2.25 insn per cycle - 0.669029492 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84987) (512y: 24) (512z: 0) +TOTAL : 0.646435 sec + 1,876,373,828 cycles # 2.895 GHz + 4,277,306,801 instructions # 2.28 insn per cycle + 0.658084791 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -176,8 +176,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091923E-003 -Relative difference = 1.85880227405429e-08 +Avg ME (F77/C++) = 9.8929728161070967E-003 +Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe -p 1 256 2 OMP= @@ -186,15 +186,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.452710e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.455265e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.455265e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.508540e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.511102e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.511102e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.713644 sec - 1,367,244,738 cycles # 1.907 GHz - 2,200,694,530 instructions # 1.61 insn per cycle - 0.717609354 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3502) (512y: 32) (512z:83441) +TOTAL : 0.711485 sec + 1,359,019,145 cycles # 1.910 GHz + 2,166,493,278 instructions # 1.59 insn per cycle + 0.722274294 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3475) (512y: 34) (512z:79492) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -202,8 +202,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982957326E-003 -Relative difference = 2.0044082998332894e-08 +Avg ME (F77/C++) = 9.8929811982955140E-003 +Relative difference = 2.0044060904369713e-08 OK (relative difference <= 5E-3) ========================================================================= diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 7bedc1f54b..9f2163d6ce 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:48:30 +DATE: 2023-10-28_12:22:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.689047e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.689716e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.689840e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.691460e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.691972e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.692221e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.165623 sec - 7,623,672,543 cycles # 3.068 GHz - 16,620,823,162 instructions # 2.18 insn per cycle - 2.541779519 seconds time elapsed +TOTAL : 2.212922 sec + 7,535,076,445 cycles # 3.027 GHz + 16,467,490,131 instructions # 2.19 insn per cycle + 2.613325869 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.116092e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.116360e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.116394e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.113760e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.114078e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.114109e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.384767 sec - 11,295,486,944 cycles # 3.039 GHz - 26,143,309,789 instructions # 2.31 insn per cycle - 3.772489660 seconds time elapsed +TOTAL : 3.392868 sec + 11,401,569,365 cycles # 3.066 GHz + 26,441,417,037 instructions # 2.32 insn per cycle + 3.775155527 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.338757e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.339014e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.339014e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.934954e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.935169e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.935169e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.354208 sec - 19,488,655,111 cycles # 3.066 GHz - 54,285,293,279 instructions # 2.79 insn per cycle - 6.358206624 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:31983) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.660710 sec + 19,098,835,520 cycles # 2.868 GHz + 54,153,520,068 instructions # 2.84 insn per cycle + 6.666941649 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32066) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.580188e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.580272e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.580272e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.618452e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.618538e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.618538e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.350145 sec - 9,541,746,297 cycles # 2.846 GHz - 26,114,002,349 instructions # 2.74 insn per cycle - 3.354075604 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:95979) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.267639 sec + 9,481,668,630 cycles # 2.899 GHz + 26,160,746,738 instructions # 2.76 insn per cycle + 3.280820285 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96005) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.673808e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.674247e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.674247e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.802442e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.802928e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.802928e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.442586 sec - 4,186,759,479 cycles # 2.896 GHz - 9,337,503,071 instructions # 2.23 insn per cycle - 1.446618657 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84147) (512y: 0) (512z: 0) +TOTAL : 1.394161 sec + 4,059,860,002 cycles # 2.904 GHz + 9,228,244,008 instructions # 2.27 insn per cycle + 1.405054842 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -160,15 +160,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.207665e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.208316e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.208316e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.378380e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.378995e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.378995e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.260207 sec - 3,641,116,614 cycles # 2.881 GHz - 8,312,794,650 instructions # 2.28 insn per cycle - 1.264172407 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83817) (512y: 79) (512z: 0) +TOTAL : 1.212246 sec + 3,518,495,788 cycles # 2.894 GHz + 8,175,753,097 instructions # 2.32 insn per cycle + 1.226324202 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.791929e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.792527e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.792527e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.812511e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.813101e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.813101e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.397651 sec - 2,653,173,022 cycles # 1.895 GHz - 4,233,021,275 instructions # 1.60 insn per cycle - 1.401768259 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2292) (512y: 93) (512z:82780) +TOTAL : 1.394533 sec + 2,662,670,072 cycles # 1.910 GHz + 4,156,543,307 instructions # 1.56 insn per cycle + 1.409486843 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2045) (512y: 93) (512z:78760) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 0525b2e4c1..1ac764e4d9 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-25_18:49:30 +DATE: 2023-10-28_12:23:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.679608e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.680082e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.680202e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.681933e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.682714e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.682894e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.167186 sec - 7,636,339,894 cycles # 3.072 GHz - 15,813,775,134 instructions # 2.07 insn per cycle - 2.542843200 seconds time elapsed +TOTAL : 2.176043 sec + 7,528,195,044 cycles # 3.013 GHz + 15,513,498,355 instructions # 2.06 insn per cycle + 2.554313698 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.107918e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.108184e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.108215e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.110886e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.111202e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111234e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.395689 sec - 11,403,686,722 cycles # 3.066 GHz - 26,502,494,448 instructions # 2.32 insn per cycle - 3.775763409 seconds time elapsed +TOTAL : 3.404045 sec + 11,460,907,535 cycles # 3.069 GHz + 25,656,367,525 instructions # 2.24 insn per cycle + 3.791106013 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,15 +82,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.358962e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.359192e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.359192e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.984019e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.984276e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.984276e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.333189 sec - 19,421,629,325 cycles # 3.065 GHz - 54,272,919,506 instructions # 2.79 insn per cycle - 6.337193550 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32142) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.618927 sec + 19,066,354,137 cycles # 2.879 GHz + 54,153,376,772 instructions # 2.84 insn per cycle + 6.622902376 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32243) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -108,15 +108,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.573591e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.573676e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.573676e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.628425e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.628513e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.628513e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.359769 sec - 9,491,435,902 cycles # 2.826 GHz - 26,031,969,325 instructions # 2.74 insn per cycle - 3.363778642 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:95858) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.247465 sec + 9,407,247,603 cycles # 2.894 GHz + 26,078,265,911 instructions # 2.77 insn per cycle + 3.251522981 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:95899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -134,15 +134,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.730901e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.731358e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.731358e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.777947e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.778467e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.778467e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.420536 sec - 4,117,938,873 cycles # 2.893 GHz - 9,317,350,688 instructions # 2.26 insn per cycle - 1.424344563 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83787) (512y: 0) (512z: 0) +TOTAL : 1.402798 sec + 4,072,847,386 cycles # 2.896 GHz + 9,213,321,159 instructions # 2.26 insn per cycle + 1.406857687 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -160,15 +160,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.227741e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.228352e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.228352e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.351083e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.351704e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.351704e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.254191 sec - 3,641,355,182 cycles # 2.896 GHz - 8,309,383,106 instructions # 2.28 insn per cycle - 1.258235043 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83306) (512y: 229) (512z: 0) +TOTAL : 1.217933 sec + 3,539,688,188 cycles # 2.898 GHz + 8,168,083,097 instructions # 2.31 insn per cycle + 1.221962472 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -186,15 +186,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.820448e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.821095e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.821095e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.868457e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.869081e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.869081e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.387479 sec - 2,638,179,282 cycles # 1.897 GHz - 4,231,949,116 instructions # 1.60 insn per cycle - 1.391365284 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1731) (512y: 175) (512z:82815) +TOTAL : 1.370226 sec + 2,622,254,330 cycles # 1.910 GHz + 4,153,294,010 instructions # 1.58 insn per cycle + 1.374355989 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1492) (512y: 175) (512z:78776) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 40bad5a7b8..f3f72b8d03 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_17:25:04 +DATE: 2023-10-28_12:18:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.792703e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.539913e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.920245e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.447647 sec - 1,899,649,593 cycles # 2.843 GHz - 2,640,133,591 instructions # 1.39 insn per cycle - 0.725424703 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.686159e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.340503e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.711310e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.441786 sec + 2,026,016,464 cycles # 3.030 GHz + 2,782,125,849 instructions # 1.37 insn per cycle + 0.739628413 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.554122e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.661192e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.088747e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.525959 sec - 2,274,866,242 cycles # 2.999 GHz - 3,244,651,641 instructions # 1.43 insn per cycle - 0.817590808 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.249190e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.094401e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.512491e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 +TOTAL : 0.527258 sec + 2,304,751,005 cycles # 3.025 GHz + 3,263,793,687 instructions # 1.42 insn per cycle + 0.821160742 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.424749e-01 -Avg ME (F77/CUDA) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 +Avg ME (C++/CUDA) = 5.622436e-01 +Avg ME (F77/CUDA) = 0.56224343220024076 +Relative difference = 2.984467216677476e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.135936e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.160836e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.160836e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.464575 sec - 4,532,434,513 cycles # 3.088 GHz - 12,813,606,654 instructions # 2.83 insn per cycle - 1.468861702 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 730) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.105519e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.128509e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.128509e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 1.503749 sec + 4,702,243,850 cycles # 3.120 GHz + 13,466,853,326 instructions # 2.86 insn per cycle + 1.510053207 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 862) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499475 -Relative difference = 5.286896515331313e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.041215e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.121607e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.121607e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.824113 sec - 2,544,894,419 cycles # 3.076 GHz - 7,194,033,887 instructions # 2.83 insn per cycle - 0.828181787 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3149) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499475 -Relative difference = 5.286896515331313e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.619791e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.876953e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.876953e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.474444 sec - 1,367,981,707 cycles # 2.862 GHz - 2,963,399,076 instructions # 2.17 insn per cycle - 0.478653969 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.039550e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.354962e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.354962e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.426894 sec - 1,250,191,222 cycles # 2.906 GHz - 2,816,864,379 instructions # 2.25 insn per cycle - 0.430913373 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.854127e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.012217e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.012217e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.597649 sec - 1,200,960,787 cycles # 1.998 GHz - 1,804,460,217 instructions # 1.50 insn per cycle - 0.601806763 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index b0a19a8e4d..689a8f5648 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_17:27:08 +DATE: 2023-10-28_12:56:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.649597e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.202624e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.202624e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.468562 sec - 2,056,154,943 cycles # 3.011 GHz - 3,068,292,883 instructions # 1.49 insn per cycle - 0.742344173 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.537647e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.118407e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.118407e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.469387 sec + 2,053,681,362 cycles # 3.017 GHz + 2,981,721,290 instructions # 1.45 insn per cycle + 0.737787439 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,20 +72,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.351177e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.446309e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.446309e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.735555 sec - 2,957,630,146 cycles # 3.014 GHz - 4,532,632,907 instructions # 1.53 insn per cycle - 1.038724962 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.334603e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.380887e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.380887e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 +TOTAL : 0.736122 sec + 2,984,351,272 cycles # 3.046 GHz + 4,572,222,181 instructions # 1.53 insn per cycle + 1.038987324 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.424749e-01 -Avg ME (F77/CUDA) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 +Avg ME (C++/CUDA) = 5.622436e-01 +Avg ME (F77/CUDA) = 0.56224343220024076 +Relative difference = 2.984467216677476e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -95,133 +95,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.132503e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156927e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.156927e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.474566 sec - 4,557,819,252 cycles # 3.083 GHz - 12,820,933,724 instructions # 2.81 insn per cycle - 1.478788241 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 730) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.098820e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.121705e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.121705e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 1.518681 sec + 4,734,800,940 cycles # 3.111 GHz + 13,473,654,290 instructions # 2.85 insn per cycle + 1.522969532 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 862) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499475 -Relative difference = 5.286896515331313e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.048121e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.128402e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.128402e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.826995 sec - 2,572,087,066 cycles # 3.097 GHz - 7,242,131,193 instructions # 2.82 insn per cycle - 0.831269361 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3149) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499475 -Relative difference = 5.286896515331313e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.651556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.911752e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.911752e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.476303 sec - 1,398,837,886 cycles # 2.914 GHz - 3,011,956,162 instructions # 2.15 insn per cycle - 0.480642444 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3017) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.989902e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.299552e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.299552e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.438250 sec - 1,282,387,350 cycles # 2.901 GHz - 2,865,664,755 instructions # 2.23 insn per cycle - 0.442674280 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2780) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.848300e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.002921e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.002921e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.604375 sec - 1,228,098,241 cycles # 2.021 GHz - 1,842,126,953 instructions # 1.50 insn per cycle - 0.608691916 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1380) (512y: 106) (512z: 2270) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index d6b5e0aad5..439a063620 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_17:25:20 +DATE: 2023-10-28_12:18:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.773700e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.355869e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.712058e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.446838 sec - 1,946,931,730 cycles # 2.934 GHz - 2,811,438,850 instructions # 1.44 insn per cycle - 0.720668390 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.633006e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.190444e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.531715e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.440720 sec + 1,992,376,776 cycles # 3.029 GHz + 2,800,529,205 instructions # 1.41 insn per cycle + 0.725033993 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.498955e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.548357e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.964288e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.520214 sec - 2,265,273,066 cycles # 3.003 GHz - 3,228,567,893 instructions # 1.43 insn per cycle - 0.811558050 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.233221e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.002410e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.417534e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 +TOTAL : 0.527391 sec + 2,308,177,809 cycles # 3.030 GHz + 3,262,086,295 instructions # 1.41 insn per cycle + 0.821240750 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.424749e-01 -Avg ME (F77/CUDA) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 +Avg ME (C++/CUDA) = 5.622436e-01 +Avg ME (F77/CUDA) = 0.56224343220024076 +Relative difference = 2.984467216677476e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.142548e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.167865e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.167865e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.455879 sec - 4,479,902,790 cycles # 3.070 GHz - 12,692,171,530 instructions # 2.83 insn per cycle - 1.459907347 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 685) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.047338e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.069084e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.069084e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 1.586610 sec + 4,722,160,621 cycles # 2.970 GHz + 13,461,547,465 instructions # 2.85 insn per cycle + 1.593311787 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 851) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499475 -Relative difference = 5.286896515331313e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.092357e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.175485e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.175485e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.803799 sec - 2,492,455,684 cycles # 3.088 GHz - 7,047,759,878 instructions # 2.83 insn per cycle - 0.807789046 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2965) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499475 -Relative difference = 5.286896515331313e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.451621e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.678035e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.678035e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.496055 sec - 1,458,437,445 cycles # 2.919 GHz - 3,195,504,881 instructions # 2.19 insn per cycle - 0.500214979 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3078) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.580569e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.825485e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.825485e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.478720 sec - 1,397,857,315 cycles # 2.899 GHz - 3,099,350,808 instructions # 2.22 insn per cycle - 0.482782307 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2785) (512y: 257) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.737963e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.886358e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.886358e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.621958 sec - 1,247,171,087 cycles # 1.996 GHz - 2,070,041,670 instructions # 1.66 insn per cycle - 0.626017428 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1197) (512y: 194) (512z: 2426) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index f5a2639771..fee856b657 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_17:25:37 +DATE: 2023-10-28_12:18:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,36 +44,36 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.424188e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.234933e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.376824e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.436480 sec - 1,949,958,184 cycles # 3.004 GHz - 2,721,614,771 instructions # 1.40 insn per cycle - 0.705846857 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.351195e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.215965e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.353365e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 +TOTAL : 0.435631 sec + 1,962,028,729 cycles # 3.030 GHz + 2,753,559,026 instructions # 1.40 insn per cycle + 0.723824346 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 168 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.521674e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.845752e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.989370e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.473781 sec - 2,121,776,214 cycles # 3.012 GHz - 3,003,472,057 instructions # 1.42 insn per cycle - 0.763259437 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.222770e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.801907e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.948153e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 +TOTAL : 0.472246 sec + 2,127,234,196 cycles # 3.022 GHz + 3,027,538,268 instructions # 1.42 insn per cycle + 0.761698590 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.424226e-01 -Avg ME (F77/CUDA) = 0.14247488790947038 -Relative difference = 0.0003671321087409729 +Avg ME (C++/CUDA) = 5.619520e-01 +Avg ME (F77/CUDA) = 0.56225629328027793 +Relative difference = 0.0005414933664760033 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.161603e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188157e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.188157e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.430760 sec - 4,393,282,749 cycles # 3.064 GHz - 12,757,139,178 instructions # 2.90 insn per cycle - 1.434699006 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 690) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.164606e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.190696e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.190696e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422773e+01 +- 1.683422e+01 ) GeV^-2 +TOTAL : 1.426678 sec + 4,450,099,487 cycles # 3.112 GHz + 13,052,167,444 instructions # 2.93 insn per cycle + 1.433165265 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 748) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861269162636 -Relative difference = 8.908367430417796e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.243958e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.460072e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.460072e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.524329 sec - 1,617,157,255 cycles # 3.064 GHz - 4,232,194,294 instructions # 2.62 insn per cycle - 0.528384284 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3708) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862328158905 -Relative difference = 1.6341558101589185e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.556556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.447650e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.447650e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 -TOTAL : 0.269642 sec - 793,561,755 cycles # 2.906 GHz - 1,796,372,240 instructions # 2.26 insn per cycle - 0.273627491 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490055046619 -Relative difference = 3.863601240637253e-09 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.066042e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.112073e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.112073e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 -TOTAL : 0.251284 sec - 744,296,871 cycles # 2.924 GHz - 1,717,805,428 instructions # 2.31 insn per cycle - 0.255106971 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490055046619 -Relative difference = 3.863601240637253e-09 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.420901e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.019958e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.019958e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 -TOTAL : 0.324025 sec - 679,485,098 cycles # 2.076 GHz - 1,206,749,407 instructions # 1.78 insn per cycle - 0.327968867 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490833793641 -Relative difference = 5.8522142669546946e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index dcad28db83..9667edc5e8 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_17:27:25 +DATE: 2023-10-28_12:56:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,20 +48,20 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.698357e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.094772e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.094772e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 -TOTAL : 0.449674 sec - 2,000,719,723 cycles # 3.012 GHz - 2,923,895,808 instructions # 1.46 insn per cycle - 0.723109219 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.654442e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.068857e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.068857e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.419752e+01 +- 1.682900e+01 ) GeV^-2 +TOTAL : 0.452198 sec + 1,989,257,204 cycles # 3.008 GHz + 2,936,823,078 instructions # 1.48 insn per cycle + 0.720594064 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 168 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= @@ -72,20 +72,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.069628e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.803608e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.803608e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 -TOTAL : 0.616457 sec - 2,529,130,127 cycles # 2.992 GHz - 3,855,862,028 instructions # 1.52 insn per cycle - 0.905495583 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.295691e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.814169e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.814169e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.349385e+02 +- 2.541442e+02 ) GeV^-2 +TOTAL : 0.611989 sec + 2,543,798,787 cycles # 3.034 GHz + 3,860,680,534 instructions # 1.52 insn per cycle + 0.897475876 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.424226e-01 -Avg ME (F77/CUDA) = 0.14247488790947038 -Relative difference = 0.0003671321087409729 +Avg ME (C++/CUDA) = 5.619520e-01 +Avg ME (F77/CUDA) = 0.56225629328027793 +Relative difference = 0.0005414933664760033 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -95,133 +95,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.170511e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.197246e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.197246e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.422269 sec - 4,408,284,699 cycles # 3.092 GHz - 12,761,377,566 instructions # 2.89 insn per cycle - 1.426267924 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 690) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.161565e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188095e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188095e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422773e+01 +- 1.683422e+01 ) GeV^-2 +TOTAL : 1.433271 sec + 4,468,228,264 cycles # 3.110 GHz + 13,056,784,200 instructions # 2.92 insn per cycle + 1.437467283 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 748) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861269162636 -Relative difference = 8.908367430417796e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.242542e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.458214e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.458214e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.528863 sec - 1,637,240,219 cycles # 3.076 GHz - 4,280,743,566 instructions # 2.61 insn per cycle - 0.532859494 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3708) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862328158905 -Relative difference = 1.6341558101589185e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.405455e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.295574e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.295574e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 -TOTAL : 0.280318 sec - 819,157,481 cycles # 2.885 GHz - 1,833,479,182 instructions # 2.24 insn per cycle - 0.284787120 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3614) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490055046619 -Relative difference = 3.863601240637253e-09 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.993629e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.028908e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.028908e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 -TOTAL : 0.257906 sec - 762,645,935 cycles # 2.918 GHz - 1,754,864,061 instructions # 2.30 insn per cycle - 0.261947181 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490055046619 -Relative difference = 3.863601240637253e-09 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.338183e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.942282e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.942282e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 -TOTAL : 0.332959 sec - 700,752,007 cycles # 2.083 GHz - 1,248,201,481 instructions # 1.78 insn per cycle - 0.336970237 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2007) (512y: 32) (512z: 2493) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490833793641 -Relative difference = 5.8522142669546946e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 0b14ac2732..f8dec0d191 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_17:25:53 +DATE: 2023-10-28_12:18:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,36 +44,36 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.400429e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.233953e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.375182e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.442376 sec - 1,895,015,408 cycles # 2.892 GHz - 2,747,268,890 instructions # 1.45 insn per cycle - 0.712246758 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.250146e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175847e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.298485e+08 ) sec^-1 +MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 +TOTAL : 0.434444 sec + 1,955,535,682 cycles # 3.017 GHz + 2,766,629,392 instructions # 1.41 insn per cycle + 0.718000068 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 162 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.493600e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.828411e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.970176e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.474454 sec - 2,110,192,824 cycles # 2.992 GHz - 2,947,596,873 instructions # 1.40 insn per cycle - 0.764332989 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.147977e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.757689e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.893722e+08 ) sec^-1 +MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 +TOTAL : 0.471488 sec + 2,146,983,188 cycles # 3.028 GHz + 2,931,828,508 instructions # 1.37 insn per cycle + 0.766791721 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.424226e-01 -Avg ME (F77/CUDA) = 0.14247488790947038 -Relative difference = 0.0003671321087409729 +Avg ME (C++/CUDA) = 5.619520e-01 +Avg ME (F77/CUDA) = 0.56225629328027793 +Relative difference = 0.0005414933664760033 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.158986e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.186080e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.186080e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.434486 sec - 4,371,663,810 cycles # 3.041 GHz - 12,657,634,893 instructions # 2.90 insn per cycle - 1.438463868 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 641) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.167815e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.194266e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.194266e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.422773e+01 +- 1.683422e+01 ) GeV^-2 +TOTAL : 1.422545 sec + 4,440,503,513 cycles # 3.114 GHz + 13,032,813,526 instructions # 2.93 insn per cycle + 1.428734586 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 730) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861269162636 -Relative difference = 8.908367430417796e-08 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.446741e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.701875e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.701875e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.495234 sec - 1,479,620,343 cycles # 2.968 GHz - 4,121,009,416 instructions # 2.79 insn per cycle - 0.499281460 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3413) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862328158905 -Relative difference = 1.6341558101589185e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.995783e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.515263e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.515263e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 -TOTAL : 0.348418 sec - 1,011,991,272 cycles # 2.876 GHz - 2,124,849,284 instructions # 2.10 insn per cycle - 0.352463895 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4206) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490055046619 -Relative difference = 3.863601240637253e-09 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.230105e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.786980e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.786980e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 -TOTAL : 0.333455 sec - 970,827,927 cycles # 2.882 GHz - 2,043,935,074 instructions # 2.11 insn per cycle - 0.337479099 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4013) (512y: 9) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490055046619 -Relative difference = 3.863601240637253e-09 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.954154e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.276017e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.276017e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429923e+01 ) GeV^-2 -TOTAL : 0.437007 sec - 857,837,231 cycles # 1.948 GHz - 1,573,448,481 instructions # 1.83 insn per cycle - 0.440984330 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2446) (512y: 16) (512z: 2998) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490833793641 -Relative difference = 5.8522142669546946e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index d8b101c2fc..81446952af 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_17:26:09 +DATE: 2023-10-28_12:19:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.458886e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.465906e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.924587e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.444243 sec - 2,002,421,839 cycles # 3.013 GHz - 2,741,217,649 instructions # 1.37 insn per cycle - 0.723275667 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.675976e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.308760e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.678480e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.439579 sec + 1,986,523,688 cycles # 3.018 GHz + 2,793,227,977 instructions # 1.41 insn per cycle + 0.729915034 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.489884e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.660066e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.119815e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.519948 sec - 2,276,444,720 cycles # 3.018 GHz - 3,229,105,748 instructions # 1.42 insn per cycle - 0.811078605 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.255163e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.114232e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.536354e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 +TOTAL : 0.528100 sec + 2,302,091,318 cycles # 3.015 GHz + 3,285,386,185 instructions # 1.43 insn per cycle + 0.822389875 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.424749e-01 -Avg ME (F77/CUDA) = 0.14247482577104625 -Relative difference = 5.209967070245855e-07 +Avg ME (C++/CUDA) = 5.622436e-01 +Avg ME (F77/CUDA) = 0.56224344354681244 +Relative difference = 2.782658397826986e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.134733e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.158878e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.158878e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.465494 sec - 4,537,654,473 cycles # 3.089 GHz - 12,784,835,228 instructions # 2.82 insn per cycle - 1.469495110 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 705) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.064090e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.085676e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.085676e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 1.561728 sec + 4,747,206,285 cycles # 3.039 GHz + 13,471,570,617 instructions # 2.84 insn per cycle + 1.568175619 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.036919e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.117824e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.117824e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.825266 sec - 2,551,760,245 cycles # 3.080 GHz - 7,116,326,213 instructions # 2.79 insn per cycle - 0.829268114 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3214) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.663407e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.932756e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.932756e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.469135 sec - 1,350,418,007 cycles # 2.856 GHz - 2,936,810,576 instructions # 2.17 insn per cycle - 0.473330080 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3174) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.883756e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.195233e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.195233e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.444372 sec - 1,218,686,581 cycles # 2.721 GHz - 2,790,949,615 instructions # 2.29 insn per cycle - 0.448527550 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2938) (512y: 110) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.750037e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.895766e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.895766e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.619232 sec - 1,238,990,774 cycles # 1.989 GHz - 1,831,327,319 instructions # 1.48 insn per cycle - 0.623295439 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1728) (512y: 114) (512z: 2312) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 3d4a565469..2ed7741ba4 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-26_17:26:26 +DATE: 2023-10-28_12:19:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.459192e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.306531e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.735602e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.444146 sec - 2,013,998,621 cycles # 3.016 GHz - 2,802,542,390 instructions # 1.39 insn per cycle - 0.726529857 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.635071e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.157188e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.509801e+07 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 0.441790 sec + 1,989,499,871 cycles # 3.014 GHz + 2,775,700,133 instructions # 1.40 insn per cycle + 0.726031721 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.467057e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.546570e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.958148e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.519273 sec - 2,271,413,411 cycles # 3.015 GHz - 3,255,005,016 instructions # 1.43 insn per cycle - 0.810510365 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.221630e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.948180e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.370520e+07 ) sec^-1 +MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 +TOTAL : 0.530411 sec + 2,299,474,084 cycles # 2.994 GHz + 3,273,781,886 instructions # 1.42 insn per cycle + 0.825233339 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 1.424749e-01 -Avg ME (F77/CUDA) = 0.14247482577104625 -Relative difference = 5.209967070245855e-07 +Avg ME (C++/CUDA) = 5.622436e-01 +Avg ME (F77/CUDA) = 0.56224344354681244 +Relative difference = 2.782658397826986e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -82,129 +82,15 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.146828e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.171967e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.171967e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.450294 sec - 4,498,136,326 cycles # 3.095 GHz - 12,668,545,692 instructions # 2.82 insn per cycle - 1.454272739 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 657) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.097804e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.120541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.120541e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 +TOTAL : 1.514227 sec + 4,724,087,422 cycles # 3.113 GHz + 13,455,754,805 instructions # 2.85 insn per cycle + 1.520438100 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 829) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.099313e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.184687e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.184687e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.801124 sec - 2,487,092,001 cycles # 3.092 GHz - 6,905,671,869 instructions # 2.78 insn per cycle - 0.805195084 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3035) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.381230e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.602049e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.602049e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.505477 sec - 1,479,054,792 cycles # 2.908 GHz - 3,168,233,594 instructions # 2.14 insn per cycle - 0.509509291 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3284) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.629283e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.881809e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.881809e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.472539 sec - 1,380,679,448 cycles # 2.901 GHz - 3,040,117,670 instructions # 2.20 insn per cycle - 0.476625589 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2936) (512y: 265) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.708530e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.847600e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.847600e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.628483 sec - 1,253,413,112 cycles # 1.987 GHz - 2,004,133,246 instructions # 1.60 insn per cycle - 0.632568870 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1520) (512y: 202) (512z: 2499) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + 2 FAILED TESTS From 536ad49c0247d238d393ebcd15472405b147a277 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sat, 28 Oct 2023 15:15:27 +0200 Subject: [PATCH 060/119] [oct23av] rerun 18 tmad tests - failures in ggttggg (madevent crashes) and gqttq (xsec differs again #748) STARTED AT Sat Oct 28 01:08:49 PM CEST 2023 ENDED AT Sat Oct 28 01:30:55 PM CEST 2023 Status=0 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt --- This is a summary of the changes with respect to my previous logs using the August code base Functionality: - ggttggg: madevent crashes - gqttq: xsec differs again Performance (Fortran overhead): -- all very similar Performance (MEs): -- eemumu: fortran 20% faster, cuda slightly slower, simd a factor 2 to 3 slower -- ggtt: fortran and simd 20% faster, cuda similar -- ggttg: fortran 10% faster, simd and cuda similar -- ggttgg: all very similar +ERROR! ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' failed +d R # 5 > -0.0 -0.0 -0.0 0.4 0.4 +d R # 6 > -0.0 -0.0 -0.0 -0.0 0.4 +s min # 3> 0.0119716.0 29929.0 29929.0 0.0 +s min # 4> 0.0 0.0 29929.0 29929.0 0.0 +s min # 5> 0.0 0.0 0.0 0.0 0.0 +s min # 6> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 3> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 4> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 5> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 6> 0.0 0.0 0.0 0.0 0.0 +ERROR! xsec from fortran (0.26050333309703716) and cpp (1.2757941949814184) differ by more than 2E-14 (3.8974198518457603) --- .../log_eemumu_mad_d_inl0_hrd0.txt | 142 ++--- .../log_eemumu_mad_f_inl0_hrd0.txt | 186 +++--- .../log_eemumu_mad_m_inl0_hrd0.txt | 138 ++--- .../log_ggtt_mad_d_inl0_hrd0.txt | 158 ++--- .../log_ggtt_mad_f_inl0_hrd0.txt | 180 +++--- .../log_ggtt_mad_m_inl0_hrd0.txt | 136 ++--- .../log_ggttg_mad_d_inl0_hrd0.txt | 154 ++--- .../log_ggttg_mad_f_inl0_hrd0.txt | 182 +++--- .../log_ggttg_mad_m_inl0_hrd0.txt | 154 ++--- .../log_ggttgg_mad_d_inl0_hrd0.txt | 166 ++--- .../log_ggttgg_mad_f_inl0_hrd0.txt | 182 +++--- .../log_ggttgg_mad_m_inl0_hrd0.txt | 150 ++--- .../log_ggttggg_mad_d_inl0_hrd0.txt | 568 +---------------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 572 +----------------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 570 +---------------- .../log_gqttq_mad_d_inl0_hrd0.txt | 499 +-------------- .../log_gqttq_mad_f_inl0_hrd0.txt | 501 +-------------- .../log_gqttq_mad_m_inl0_hrd0.txt | 501 +-------------- 18 files changed, 1074 insertions(+), 4065 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 1a4d828546..a2fe9cfc4c 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:18:26 +DATE: 2023-10-28_13:14:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6275s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6175s - [COUNTERS] Fortran MEs ( 1 ) : 0.0099s for 8192 events => throughput is 8.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6257s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6178s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1814s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1709s - [COUNTERS] Fortran MEs ( 1 ) : 0.0105s for 8192 events => throughput is 7.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1779s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1701s + [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4443s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3343s - [COUNTERS] Fortran MEs ( 1 ) : 0.1099s for 90112 events => throughput is 8.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4348s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3498s + [COUNTERS] Fortran MEs ( 1 ) : 0.0850s for 90112 events => throughput is 1.06E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1861s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.31E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -165,15 +165,15 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3422s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0657s for 90112 events => throughput is 1.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4102s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3401s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0701s for 90112 events => throughput is 1.29E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813628E-002) differ by less than 2E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501919904813656E-002) differ by less than 2E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.344246e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.260586e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.355045e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273890e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1815s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1831s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.11E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3697s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3373s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 90112 events => throughput is 2.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3805s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3369s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0437s for 90112 events => throughput is 2.06E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.734240e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.015609e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.880467e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.071474e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1763s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1746s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1786s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.80E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3545s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3375s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 90112 events => throughput is 5.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3706s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3382s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 90112 events => throughput is 2.78E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.204571e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.649644e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.588414e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.777010e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1798s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.76E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1796s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1767s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.82E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3579s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0162s for 90112 events => throughput is 5.55E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3673s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3358s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0314s for 90112 events => throughput is 2.87E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.597781e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.754124e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.189972e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.855874e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1771s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1755s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1823s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.30E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3400s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0182s for 90112 events => throughput is 4.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3770s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3386s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0384s for 90112 events => throughput is 2.35E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.738607e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.172472e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.243870e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.276219e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.6205s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.63E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5911s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5906s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.61E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.93E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7698s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7645s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 90112 events => throughput is 1.71E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.604143e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.976614e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.463611e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910314e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.281447e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.855305e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.027562e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.402134e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.209260e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.842796e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.077193e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.993414e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.272993e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.822100e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.998178e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.125212e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index f98575860b..067e1ee497 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:18:43 +DATE: 2023-10-28_13:15:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.8283s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8183s - [COUNTERS] Fortran MEs ( 1 ) : 0.0100s for 8192 events => throughput is 8.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6223s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6145s + [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1801s - [COUNTERS] Fortran MEs ( 1 ) : 0.0107s for 8192 events => throughput is 7.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1784s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1701s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 9.94E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4773s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3594s - [COUNTERS] Fortran MEs ( 1 ) : 0.1178s for 90112 events => throughput is 7.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4366s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3463s + [COUNTERS] Fortran MEs ( 1 ) : 0.0903s for 90112 events => throughput is 9.98E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2175 [0.21747166140620297] fbridge_mode=1 + [XSECTION] Cross section = 0.2175 [0.21747166087172673] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1802s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 8192 events => throughput is 1.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1925s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1863s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 8192 events => throughput is 1.32E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21747169064681776) and cpp (0.21747166140620297) differ by less than 4E-4 (1.344571088912616e-07) +OK! xsec from fortran (0.21747169064681776) and cpp (0.21747166087172673) differ by less than 4E-4 (1.369147908381052e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,15 +165,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0915 [9.1501907784661565E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0915 [9.1501907796603360E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4026s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3390s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0636s for 90112 events => throughput is 1.42E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4207s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0680s for 90112 events => throughput is 1.32E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501907784661565E-002) differ by less than 4E-4 (1.324578993155967e-07) +OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501907796603360E-002) differ by less than 4E-4 (1.3232739060065057e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.366198e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.229725e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.396541e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.180987e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2175 [0.21747165549479658] fbridge_mode=1 + [XSECTION] Cross section = 0.2175 [0.21747165570339780] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1780s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1763s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.75E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1937s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1911s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.08E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21747169064681776) and cpp (0.21747165549479658) differ by less than 4E-4 (1.6163952687442418e-07) +OK! xsec from fortran (0.21747169064681776) and cpp (0.21747165570339780) differ by less than 4E-4 (1.6068031594151932e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,15 +241,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0915 [9.1501905692857932E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0915 [9.1501905322826635E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3798s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3613s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 90112 events => throughput is 4.88E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3983s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3681s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0302s for 90112 events => throughput is 2.99E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905692857932E-002) differ by less than 4E-4 (1.5531866148776885e-07) +OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905322826635E-002) differ by less than 4E-4 (1.5936263453308896e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.893797e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.070468e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.592095e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.258953e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2175 [0.21747165569099927] fbridge_mode=1 + [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1803s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.92E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1991s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1966s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.33E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21747169064681776) and cpp (0.21747165569099927) differ by less than 4E-4 (1.6073732811427988e-07) +OK! xsec from fortran (0.21747169064681776) and cpp (0.21747165593922979) differ by less than 4E-4 (1.5959588972602745e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,15 +317,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0915 [9.1501905658047333E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3614s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3514s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 90112 events => throughput is 8.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4015s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3733s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0282s for 90112 events => throughput is 3.19E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905658047333E-002) differ by less than 4E-4 (1.5569909717072505e-07) +OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905316084181E-002) differ by less than 4E-4 (1.5943632103443406e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.801869e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.518365e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.185551e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.665283e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2175 [0.21747165569099927] fbridge_mode=1 + [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1793s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 1.05E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.1832s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1810s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.77E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21747169064681776) and cpp (0.21747165569099927) differ by less than 4E-4 (1.6073732811427988e-07) +OK! xsec from fortran (0.21747169064681776) and cpp (0.21747165593922979) differ by less than 4E-4 (1.5959588972602745e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,15 +393,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0915 [9.1501905658047333E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3471s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3384s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 90112 events => throughput is 1.04E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.3863s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3618s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 90112 events => throughput is 3.68E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905658047333E-002) differ by less than 4E-4 (1.5569909717072505e-07) +OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501905316084181E-002) differ by less than 4E-4 (1.5943632103443406e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.124905e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.606645e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.311988e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.727018e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2175 [0.21747166431914253] fbridge_mode=1 + [XSECTION] Cross section = 0.2175 [0.21747166440400542] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1791s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.83E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1837s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.68E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21747169064681776) and cpp (0.21747166431914253) differ by less than 4E-4 (1.2106253999277783e-07) +OK! xsec from fortran (0.21747169064681776) and cpp (0.21747166440400542) differ by less than 4E-4 (1.20672314918302e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,15 +469,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0915 [9.1501909358591468E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0915 [9.1501908978565555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3552s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3451s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0101s for 90112 events => throughput is 8.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3669s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3423s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 90112 events => throughput is 3.67E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501909358591468E-002) differ by less than 4E-4 (1.1525684051605367e-07) +OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501908978565555E-002) differ by less than 4E-4 (1.194100419654731e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.261918e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.553347e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082919e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.573933e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -512,15 +512,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2175 [0.21747166796068879] fbridge_mode=1 + [XSECTION] Cross section = 0.2175 [0.21747166823487174] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5875s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.79E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5931s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5926s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.71E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21747169064681776) and cpp (0.21747166796068879) differ by less than 4E-4 (1.043176189874373e-07) +OK! xsec from fortran (0.21747169064681776) and cpp (0.21747166823487174) differ by less than 4E-4 (1.0305684361444634e-07) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -545,15 +545,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0915 [9.1501910316213061E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0915 [9.1501910542849674E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7572s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7527s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 90112 events => throughput is 2.00E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7539s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7493s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.96E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501910316213061E-002) differ by less than 4E-4 (1.0479125034379422e-07) +OK! xsec from fortran (9.1501919904813656E-002) and cpp (9.1501910542849674E-002) differ by less than 4E-4 (1.0231439939722975e-07) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.090855e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.318352e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.271435e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.853560e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.835160e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.080898e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.510333e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.054742e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.894374e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.090578e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.802495e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.167250e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.131306e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.381546e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.434205e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.438630e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 0d49865b9c..7821d8bf4c 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -16,14 +16,14 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:18:59 +DATE: 2023-10-28_13:15:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6278s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6178s - [COUNTERS] Fortran MEs ( 1 ) : 0.0100s for 8192 events => throughput is 8.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6407s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6327s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1782s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1684s - [COUNTERS] Fortran MEs ( 1 ) : 0.0099s for 8192 events => throughput is 8.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1816s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1735s + [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.02E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4565s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3432s - [COUNTERS] Fortran MEs ( 1 ) : 0.1134s for 90112 events => throughput is 7.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4234s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3353s + [COUNTERS] Fortran MEs ( 1 ) : 0.0881s for 90112 events => throughput is 1.02E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 + [XSECTION] Cross section = 0.2175 [0.21747169074211734] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1810s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1919s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1850s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.18E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21747169064681776) and cpp (0.21747169074211728) differ by less than 2E-4 (4.382156859605857e-10) +OK! xsec from fortran (0.21747169064681776) and cpp (0.21747169074211734) differ by less than 2E-4 (4.382159080051906e-10) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0667s for 90112 events => throughput is 1.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4141s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3418s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0722s for 90112 events => throughput is 1.25E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.341399e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.218225e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.323934e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.251141e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1772s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1823s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.13E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3694s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3374s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 90112 events => throughput is 2.82E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3796s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3380s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0417s for 90112 events => throughput is 2.16E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.783585e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.041832e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.872248e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.190968e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1786s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1770s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1799s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1769s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3561s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3389s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 90112 events => throughput is 5.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3699s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3368s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0331s for 90112 events => throughput is 2.73E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.356083e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.665940e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.847494e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.683509e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1798s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1806s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1778s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.92E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3603s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3443s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 90112 events => throughput is 5.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3689s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3373s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0316s for 90112 events => throughput is 2.85E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.610629e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.662410e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.797344e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.918061e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1808s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1800s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3604s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 90112 events => throughput is 4.88E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3783s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0371s for 90112 events => throughput is 2.43E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.814394e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.310202e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.589840e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.414668e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169066587257] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5950s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5945s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.67E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5938s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5933s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919911173610E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7577s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7529s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.90E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7588s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7539s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.84E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.583003e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.905469e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.467396e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.913840e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.252357e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.847313e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.030772e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.387824e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.251034e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.834525e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.079891e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.012912e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.286168e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.865488e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.995935e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.125515e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 57c094acdf..7f10f908fb 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,9 +1,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:19:16 +DATE: 2023-10-28_13:15:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.4367s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3846s - [COUNTERS] Fortran MEs ( 1 ) : 0.0521s for 8192 events => throughput is 1.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7268s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6856s + [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3291s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2810s - [COUNTERS] Fortran MEs ( 1 ) : 0.0481s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3106s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2704s + [COUNTERS] Fortran MEs ( 1 ) : 0.0402s for 8192 events => throughput is 2.04E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7885s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2393s - [COUNTERS] Fortran MEs ( 1 ) : 0.5492s for 90112 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6386s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1973s + [COUNTERS] Fortran MEs ( 1 ) : 0.4413s for 90112 events => throughput is 2.04E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3572s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3148s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3491s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3126s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0365s for 8192 events => throughput is 2.25E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -165,15 +165,15 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 + [XSECTION] Cross section = 46.22 [46.223782291775372] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6843s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2210s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4633s for 90112 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6518s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2491s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4027s for 90112 events => throughput is 2.24E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775379) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775372) differ by less than 2E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.950448e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.219817e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.964951e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.195772e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 + [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3143s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2905s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0239s for 8192 events => throughput is 3.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3108s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2894s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.82E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.690708277600116) and cpp (47.690708277600109) differ by less than 2E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.690708277600116) and cpp (47.690708277600102) differ by less than 2E-14 (3.3306690738754696e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4654s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2042s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2612s for 90112 events => throughput is 3.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4861s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2485s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2376s for 90112 events => throughput is 3.79E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.422695e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.770420e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.385899e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.714739e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2964s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2956s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2824s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.25E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -317,15 +317,15 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 + [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3564s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1960s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1604s for 90112 events => throughput is 5.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3707s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2291s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1417s for 90112 events => throughput is 6.36E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ by less than 2E-14 (6.661338147750939e-16) +OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775386) differ by less than 2E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.356829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.097285e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.465883e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.994538e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2947s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2816s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2921s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2806s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0114s for 8192 events => throughput is 7.16E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -393,15 +393,15 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 + [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3386s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1916s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1469s for 90112 events => throughput is 6.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3574s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2299s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1275s for 90112 events => throughput is 7.07E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ by less than 2E-14 (6.661338147750939e-16) +OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775386) differ by less than 2E-14 (4.440892098500626e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.955245e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.938371e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.038215e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.877095e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2868s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0208s for 8192 events => throughput is 3.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3055s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2865s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0190s for 8192 events => throughput is 4.32E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -469,15 +469,15 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223782291775393] fbridge_mode=1 + [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4351s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2261s for 90112 events => throughput is 3.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4449s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2340s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2108s for 90112 events => throughput is 4.27E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775393) differ by less than 2E-14 (6.661338147750939e-16) +OK! xsec from fortran (46.223782291775365) and cpp (46.223782291775386) differ by less than 2E-14 (4.440892098500626e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.616558e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.163799e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.687418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.045277e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6890s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6885s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6900s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6895s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6130s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6068s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.46E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6486s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6423s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.179274e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.002621e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.706047e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.703583e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.310976e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.103340e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.080551e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.079091e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.327429e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.065422e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.152072e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155854e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.328654e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.093445e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.061951e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.071429e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index ac65217070..f0444104b4 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:19:41 +DATE: 2023-10-28_13:16:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3097s - [COUNTERS] Fortran MEs ( 1 ) : 0.0485s for 8192 events => throughput is 1.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3544s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3138s + [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3161s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2673s - [COUNTERS] Fortran MEs ( 1 ) : 0.0488s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2675s + [COUNTERS] Fortran MEs ( 1 ) : 0.0401s for 8192 events => throughput is 2.04E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7673s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2278s - [COUNTERS] Fortran MEs ( 1 ) : 0.5395s for 90112 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6371s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1961s + [COUNTERS] Fortran MEs ( 1 ) : 0.4410s for 90112 events => throughput is 2.04E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.69 [47.690706211693573] fbridge_mode=1 + [XSECTION] Cross section = 47.69 [47.690706767555099] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3486s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3073s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3351s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3011s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0340s for 8192 events => throughput is 2.41E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.690708277600116) and cpp (47.690706211693573) differ by less than 4E-4 (4.331884795316654e-08) +OK! xsec from fortran (47.690708277600116) and cpp (47.690706767555099) differ by less than 4E-4 (3.1663296096162696e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,15 +165,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223782418787778] fbridge_mode=1 + [XSECTION] Cross section = 46.22 [46.223782605295497] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6743s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2245s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4497s for 90112 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6294s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2500s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3794s for 90112 events => throughput is 2.37E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (46.223782291775365) and cpp (46.223782418787778) differ by less than 4E-4 (2.7477720099255976e-09) +OK! xsec from fortran (46.223782291775365) and cpp (46.223782605295497) differ by less than 4E-4 (6.782658656945273e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.956288e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.407105e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.982512e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.379008e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.69 [47.690702562167019] fbridge_mode=1 + [XSECTION] Cross section = 47.69 [47.690702885183541] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2999s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0166s for 8192 events => throughput is 4.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3099s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2953s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0146s for 8192 events => throughput is 5.60E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.690708277600116) and cpp (47.690702562167019) differ by less than 4E-4 (1.1984374537199471e-07) +OK! xsec from fortran (47.690708277600116) and cpp (47.690702885183541) differ by less than 4E-4 (1.1307059111231865e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,15 +241,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223778631221009] fbridge_mode=1 + [XSECTION] Cross section = 46.22 [46.223778858016772] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3965s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2188s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1777s for 90112 events => throughput is 5.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3905s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2312s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1593s for 90112 events => throughput is 5.66E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (46.223782291775365) and cpp (46.223778631221009) differ by less than 4E-4 (7.919201272521548e-08) +OK! xsec from fortran (46.223782291775365) and cpp (46.223778858016772) differ by less than 4E-4 (7.428553927546488e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.832476e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.339316e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.796478e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.342807e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.69 [47.690694055768034] fbridge_mode=1 + [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2883s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2798s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2779s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 8192 events => throughput is 1.07E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.690708277600116) and cpp (47.690694055768034) differ by less than 4E-4 (2.9820970570693106e-07) +OK! xsec from fortran (47.690708277600116) and cpp (47.690694374060818) differ by less than 4E-4 (2.9153560099359765e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,15 +317,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223775988760060] fbridge_mode=1 + [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2853s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1921s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0933s for 90112 events => throughput is 9.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3028s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0845s for 90112 events => throughput is 1.07E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (46.223782291775365) and cpp (46.223775988760060) differ by less than 4E-4 (1.3635870954598062e-07) +OK! xsec from fortran (46.223782291775365) and cpp (46.223775951815753) differ by less than 4E-4 (1.3715795843527445e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.545083e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.031930e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.445429e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.050564e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.69 [47.690694055768034] fbridge_mode=1 + [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2874s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2794s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2823s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2751s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.690708277600116) and cpp (47.690694055768034) differ by less than 4E-4 (2.9820970570693106e-07) +OK! xsec from fortran (47.690708277600116) and cpp (47.690694374060818) differ by less than 4E-4 (2.9153560099359765e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,15 +393,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223775988760060] fbridge_mode=1 + [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.2777s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1907s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0870s for 90112 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3275s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2486s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0789s for 90112 events => throughput is 1.14E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (46.223782291775365) and cpp (46.223775988760060) differ by less than 4E-4 (1.3635870954598062e-07) +OK! xsec from fortran (46.223782291775365) and cpp (46.223775951815753) differ by less than 4E-4 (1.3715795843527445e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.937961e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082923e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.006840e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108645e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.69 [47.690698865531559] fbridge_mode=1 + [XSECTION] Cross section = 47.69 [47.690698914467276] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2925s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2897s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2795s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0102s for 8192 events => throughput is 8.07E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.690708277600116) and cpp (47.690698865531559) differ by less than 4E-4 (1.9735644318785717e-07) +OK! xsec from fortran (47.690708277600116) and cpp (47.690698914467276) differ by less than 4E-4 (1.9633033720989346e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,15 +469,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 46.22 [46.223780255562296] fbridge_mode=1 + [XSECTION] Cross section = 46.22 [46.223780273983500] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3226s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2041s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1185s for 90112 events => throughput is 7.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3363s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2264s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1099s for 90112 events => throughput is 8.20E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (46.223782291775365) and cpp (46.223780255562296) differ by less than 4E-4 (4.4051199799000074e-08) +OK! xsec from fortran (46.223782291775365) and cpp (46.223780273983500) differ by less than 4E-4 (4.3652677583772004e-08) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.089065e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.727236e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.293009e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.861479e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690703397697980] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6872s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.51E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6913s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6907s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223786763175951] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6161s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6107s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.66E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6475s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6421s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.68E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.382566e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.293690e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.934525e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.917959e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.441859e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.783289e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.769075e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.775118e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.403190e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.883043e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.876277e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.851465e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.887286e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.383799e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.439919e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.407914e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index c3747a1448..6bc95cf474 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,8 +1,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:20:06 +DATE: 2023-10-28_13:16:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3588s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3101s - [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3474s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3070s + [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3193s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2707s - [COUNTERS] Fortran MEs ( 1 ) : 0.0487s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3042s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2642s + [COUNTERS] Fortran MEs ( 1 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7636s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2247s - [COUNTERS] Fortran MEs ( 1 ) : 0.5388s for 90112 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6707s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2288s + [COUNTERS] Fortran MEs ( 1 ) : 0.4419s for 90112 events => throughput is 2.04E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3537s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3109s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3423s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3051s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0373s for 8192 events => throughput is 2.20E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7009s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2276s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4732s for 90112 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7218s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2976s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4243s for 90112 events => throughput is 2.12E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.932354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.220322e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.907654e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.204411e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3139s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2905s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3083s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2878s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4697s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2111s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2585s for 90112 events => throughput is 3.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4647s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2357s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2290s for 90112 events => throughput is 3.94E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.342983e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.910371e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.391505e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.841808e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3006s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2861s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2956s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2830s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0126s for 8192 events => throughput is 6.49E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3584s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1999s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1584s for 90112 events => throughput is 5.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3644s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2244s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1400s for 90112 events => throughput is 6.44E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.413133e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.194929e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.510615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.158421e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2939s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2806s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2944s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2831s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.23E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3412s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1968s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1445s for 90112 events => throughput is 6.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3537s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2281s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1255s for 90112 events => throughput is 7.18E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.210141e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.949915e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.018773e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.076342e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3082s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2878s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3106s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2922s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0184s for 8192 events => throughput is 4.45E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.5311s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2846s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2465s for 90112 events => throughput is 3.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4474s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2441s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2033s for 90112 events => throughput is 4.43E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.423825e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.104872e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.530096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.062333e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6929s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6924s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.49E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6907s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6902s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782303744791] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6044s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5982s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.45E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6486s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6423s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.130110e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.061036e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.628036e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.605008e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.274030e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.107641e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.059654e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.064944e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.301020e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.102601e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.126562e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.137506e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.294170e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.083399e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.968784e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.034037e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 925cf1dd8b..3bbbee8e28 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:20:33 +DATE: 2023-10-28_13:16:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5609s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2258s - [COUNTERS] Fortran MEs ( 1 ) : 0.3351s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5361s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2241s + [COUNTERS] Fortran MEs ( 1 ) : 0.3120s for 8192 events => throughput is 2.63E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5566s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2205s - [COUNTERS] Fortran MEs ( 1 ) : 0.3361s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5301s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2172s + [COUNTERS] Fortran MEs ( 1 ) : 0.3129s for 8192 events => throughput is 2.62E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.0766s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3919s - [COUNTERS] Fortran MEs ( 1 ) : 3.6847s for 90112 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8733s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3971s + [COUNTERS] Fortran MEs ( 1 ) : 3.4762s for 90112 events => throughput is 2.59E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,13 +134,13 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470791E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8617s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5333s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3285s for 8192 events => throughput is 2.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8908s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5691s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3217s for 8192 events => throughput is 2.55E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470791E-002) differ by less than 2E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196357922470791E-002) differ by less than 2E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2565s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6821s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5744s for 90112 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2396s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6791s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5604s for 90112 events => throughput is 2.53E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.585485e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.628768e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.602908e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.616955e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,13 +210,13 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470777E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5594s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3875s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1718s for 8192 events => throughput is 4.77E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5630s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3859s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1771s for 8192 events => throughput is 4.63E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470777E-002) differ by less than 2E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196357922470777E-002) differ by less than 2E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.4116s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5278s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8838s for 90112 events => throughput is 4.78E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4252s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5492s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8760s for 90112 events => throughput is 4.80E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.886187e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.965550e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.863074e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.022472e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,13 +286,13 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3020s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0845s for 8192 events => throughput is 9.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3805s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2972s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0833s for 8192 events => throughput is 9.83E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3806s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9368s for 90112 events => throughput is 9.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3448s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4317s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9132s for 90112 events => throughput is 9.87E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.810175e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.686669e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.814710e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.468054e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,13 +362,13 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3697s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2950s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0747s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3664s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0744s for 8192 events => throughput is 1.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (1.1102230246251565e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2633s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4344s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8288s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2427s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4261s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8166s for 90112 events => throughput is 1.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111782e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.136623e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.107532e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.141647e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,13 +438,13 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4288s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3258s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1030s for 8192 events => throughput is 7.95E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4202s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1017s for 8192 events => throughput is 8.05E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (5.551115123125783e-16) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196357922470750E-002) differ by less than 2E-14 (1.1102230246251565e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6474s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4850s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1624s for 90112 events => throughput is 7.75E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5849s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4598s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1252s for 90112 events => throughput is 8.01E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.891678e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.875766e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.153326e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.969717e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,13 +514,13 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6666s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6612s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6499s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6445s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.49E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196357922470764E-002) differ by less than 2E-14 (4.440892098500626e-16) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196357922470764E-002) differ by less than 2E-14 (0.0) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8130s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7902s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.96E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8029s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7803s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.98E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.639819e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.614447e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.202614e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.355330e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.989805e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.811765e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.236697e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.239741e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.956986e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.840747e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.247459e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.251858e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.990764e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.882435e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.731178e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.741625e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index fa99d034ca..1ddd406cf2 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:21:14 +DATE: 2023-10-28_13:17:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5523s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2203s - [COUNTERS] Fortran MEs ( 1 ) : 0.3320s for 8192 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5330s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2187s + [COUNTERS] Fortran MEs ( 1 ) : 0.3143s for 8192 events => throughput is 2.61E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5511s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2185s - [COUNTERS] Fortran MEs ( 1 ) : 0.3325s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5316s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s + [COUNTERS] Fortran MEs ( 1 ) : 0.3133s for 8192 events => throughput is 2.61E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.0773s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3954s - [COUNTERS] Fortran MEs ( 1 ) : 3.6819s for 90112 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8235s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3733s + [COUNTERS] Fortran MEs ( 1 ) : 3.4502s for 90112 events => throughput is 2.61E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196349725192449E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196349765248158E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5506s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3355s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8327s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5203s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3124s for 8192 events => throughput is 2.62E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196349725192449E-002) differ by less than 4E-4 (8.433729958845504e-08) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196349765248158E-002) differ by less than 4E-4 (8.392518791033865e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,15 +165,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310860682799649E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310860767768514E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.1805s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6708s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5097s for 90112 events => throughput is 2.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2294s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7666s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4628s for 90112 events => throughput is 2.60E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310860682799649E-002) differ by less than 4E-4 (1.4013938864909647e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310860767768514E-002) differ by less than 4E-4 (1.3909440088610836e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.644874e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.701385e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.623423e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.714632e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196334032667323E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196334183509370E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4066s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3108s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0959s for 8192 events => throughput is 8.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4009s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0928s for 8192 events => throughput is 8.83E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196334032667323E-002) differ by less than 4E-4 (2.4578908086603235e-07) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196334183509370E-002) differ by less than 4E-4 (2.4423714939381114e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,15 +241,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310847525777316E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310847547651041E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.5043s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4504s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0538s for 90112 events => throughput is 8.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4684s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4441s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0243s for 90112 events => throughput is 8.80E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847525777316E-002) differ by less than 4E-4 (3.0195074296468505e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847547651041E-002) differ by less than 4E-4 (3.0168172948652483e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.644259e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.982844e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.595838e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.952179e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196330842071521E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3044s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2603s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3013s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2585s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0429s for 8192 events => throughput is 1.91E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196330842071521E-002) differ by less than 4E-4 (2.786153705525152e-07) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196330801117323E-002) differ by less than 4E-4 (2.790367255034454e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,15 +317,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310847485320789E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9365s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4416s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4949s for 90112 events => throughput is 1.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8658s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3951s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4706s for 90112 events => throughput is 1.91E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847485320789E-002) differ by less than 4E-4 (3.024482967406428e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847326088065E-002) differ by less than 4E-4 (3.0440661691333304e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.877444e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.899231e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.866007e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.918892e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196330842071521E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2968s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2571s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0397s for 8192 events => throughput is 2.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2928s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2543s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0385s for 8192 events => throughput is 2.13E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196330842071521E-002) differ by less than 4E-4 (2.786153705525152e-07) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196330801117323E-002) differ by less than 4E-4 (2.790367255034454e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,15 +393,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310847485320789E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8329s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3969s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4360s for 90112 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8142s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3916s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4226s for 90112 events => throughput is 2.13E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847485320789E-002) differ by less than 4E-4 (3.024482967406428e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310847326088065E-002) differ by less than 4E-4 (3.0440661691333304e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.064455e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.152263e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.134523e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.147217e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196344068381207E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.0972 [9.7196344079460428E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3182s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2680s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0503s for 8192 events => throughput is 1.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3155s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2656s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0499s for 8192 events => throughput is 1.64E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196344068381207E-002) differ by less than 4E-4 (1.42537126879283e-07) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196344079460428E-002) differ by less than 4E-4 (1.424231383939656e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,15 +469,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.08131 [8.1310857803543385E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.08131 [8.1310857804286998E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9728s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4203s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5524s for 90112 events => throughput is 1.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9509s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4027s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5481s for 90112 events => throughput is 1.64E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310857803543385E-002) differ by less than 4E-4 (1.755498595379379e-07) +OK! xsec from fortran (8.1310872077655555E-002) and cpp (8.1310857804286998E-002) differ by less than 4E-4 (1.7554071418679484e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.608228e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.632673e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.603946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.596476e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,13 +514,13 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349366365994E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6467s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6458s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.73E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6420s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6411s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.63E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196349366365994E-002) differ by less than 4E-4 (8.802906814597833e-08) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196349366365994E-002) differ by less than 4E-4 (8.802906770188912e-08) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310864949473968E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.7893s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7798s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8252s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8155s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 90112 events => throughput is 9.25E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.347402e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.300629e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.856435e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.854879e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.795868e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.663339e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.305516e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.466672e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.791470e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.646830e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.491438e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.524923e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.627451e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.510623e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.615609e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.613146e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 8d56c45efe..721eefb7dd 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:21:50 +DATE: 2023-10-28_13:18:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5526s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2199s - [COUNTERS] Fortran MEs ( 1 ) : 0.3328s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5337s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2185s + [COUNTERS] Fortran MEs ( 1 ) : 0.3152s for 8192 events => throughput is 2.60E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.0972 [9.7196357922470805E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5518s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s - [COUNTERS] Fortran MEs ( 1 ) : 0.3335s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5318s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2188s + [COUNTERS] Fortran MEs ( 1 ) : 0.3130s for 8192 events => throughput is 2.62E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.0409s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3789s - [COUNTERS] Fortran MEs ( 1 ) : 3.6620s for 90112 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8408s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3808s + [COUNTERS] Fortran MEs ( 1 ) : 3.4599s for 90112 events => throughput is 2.60E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,13 +134,13 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358763382007E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8757s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5434s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3323s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8640s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5392s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3248s for 8192 events => throughput is 2.52E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358763382007E-002) differ by less than 2E-4 (8.651674043846924e-09) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196358763382007E-002) differ by less than 2E-4 (8.651674487936134e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872835011053E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.3915s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7212s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6703s for 90112 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2834s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6997s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5836s for 90112 events => throughput is 2.51E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.532584e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.576974e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.521877e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.583988e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,13 +210,13 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358804670396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5544s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3845s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1699s for 8192 events => throughput is 4.82E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5380s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3748s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1632s for 8192 events => throughput is 5.02E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358804670396E-002) differ by less than 2E-4 (9.076467577529002e-09) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196358804670396E-002) differ by less than 2E-4 (9.076468021618211e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872836789727E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.3887s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5301s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8586s for 90112 events => throughput is 4.85E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.3341s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5132s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8209s for 90112 events => throughput is 4.95E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.952919e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.045666e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.914380e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.075727e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,13 +286,13 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3876s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3037s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0839s for 8192 events => throughput is 9.76E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3820s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2999s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0821s for 8192 events => throughput is 9.98E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358586501358E-002) differ by less than 2E-4 (6.831845977828266e-09) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196358586501358E-002) differ by less than 2E-4 (6.831846421917476e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3852s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4574s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9277s for 90112 events => throughput is 9.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3416s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4347s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9069s for 90112 events => throughput is 9.94E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.889061e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.012431e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.855619e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.019636e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,13 +362,13 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3661s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2915s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0745s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3604s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2881s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0723s for 8192 events => throughput is 1.13E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358586501358E-002) differ by less than 2E-4 (6.831845977828266e-09) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196358586501358E-002) differ by less than 2E-4 (6.831846421917476e-09) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2630s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4388s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8242s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2152s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7969s for 90112 events => throughput is 1.13E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.127984e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155859e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.119463e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.142606e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,13 +438,13 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358757578441E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4328s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3253s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1075s for 8192 events => throughput is 7.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4284s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3240s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1044s for 8192 events => throughput is 7.85E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358757578441E-002) differ by less than 2E-4 (8.591964251181139e-09) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196358757578441E-002) differ by less than 2E-4 (8.591964695270349e-09) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872803699391E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6477s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4725s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1752s for 90112 events => throughput is 7.67E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6056s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4514s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1542s for 90112 events => throughput is 7.81E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.668693e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.743118e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.591243e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.687089e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,13 +514,13 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358102981245E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.7068s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7013s + [COUNTERS] PROGRAM TOTAL : 0.6479s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6424s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.50E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.7196357922470805E-002) and cpp (9.7196358102981245E-002) differ by less than 2E-4 (1.8571728599425796e-09) +OK! xsec from fortran (9.7196357922470764E-002) and cpp (9.7196358102981245E-002) differ by less than 2E-4 (1.8571733040317895e-09) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872068634174E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8933s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8700s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8072s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7844s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.611008e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.622531e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.220129e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.055833e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.993875e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.842336e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.234445e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236411e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.000644e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.824593e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.243443e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.245346e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.965395e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.823254e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.708140e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716220e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 62d0e45c34..aac392d268 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:22:32 +DATE: 2023-10-28_13:18:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3806s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2793s - [COUNTERS] Fortran MEs ( 1 ) : 4.1013s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3901s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2742s + [COUNTERS] Fortran MEs ( 1 ) : 4.1159s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3901s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2745s - [COUNTERS] Fortran MEs ( 1 ) : 4.1156s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3341s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2715s + [COUNTERS] Fortran MEs ( 1 ) : 4.0627s for 8192 events => throughput is 2.02E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.2170s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8938s - [COUNTERS] Fortran MEs ( 1 ) : 45.3231s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 46.8744s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8583s + [COUNTERS] Fortran MEs ( 1 ) : 45.0161s for 90112 events => throughput is 2.00E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,13 +134,13 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.7189s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4329s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2860s for 8192 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.5573s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3683s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1891s for 8192 events => throughput is 1.96E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311352998E-004) differ by less than 2E-14 (4.440892098500626e-16) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277311352998E-004) differ by less than 2E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 53.2752s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0306s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.2446s for 90112 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.8537s + [COUNTERS] Fortran Overhead ( 0 ) : 5.9783s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.8754s for 90112 events => throughput is 1.92E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.975932e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.010493e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.969422e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.010686e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,13 +210,13 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.7792s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4930s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2862s for 8192 events => throughput is 3.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7647s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4538s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3108s for 8192 events => throughput is 3.55E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311352993E-004) differ by less than 2E-14 (2.220446049250313e-16) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277311352993E-004) differ by less than 2E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 29.2447s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0623s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.1823s for 90112 events => throughput is 3.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.7512s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1062s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.6450s for 90112 events => throughput is 3.66E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.719602e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.797450e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.704578e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.797670e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,13 +286,13 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.2095s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2292s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9803s for 8192 events => throughput is 8.36E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1814s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2134s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9680s for 8192 events => throughput is 8.46E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311353009E-004) differ by less than 2E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277311353009E-004) differ by less than 2E-14 (6.661338147750939e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,15 +317,15 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.7366s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8124s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.9242s for 90112 events => throughput is 8.25E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.5022s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8140s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.6882s for 90112 events => throughput is 8.43E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421158E-004) differ by less than 2E-14 (3.3306690738754696e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.553162e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.687840e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.569833e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.709461e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,13 +362,13 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9852s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1172s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8679s for 8192 events => throughput is 9.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9534s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1017s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8517s for 8192 events => throughput is 9.62E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311353009E-004) differ by less than 2E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277311353009E-004) differ by less than 2E-14 (6.661338147750939e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,15 +393,15 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.2612s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6975s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5637s for 90112 events => throughput is 9.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.2472s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7179s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5293s for 90112 events => throughput is 9.46E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421158E-004) differ by less than 2E-14 (3.3306690738754696e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.732365e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.935854e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.738434e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.898733e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,13 +438,13 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.3926s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3264s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0661s for 8192 events => throughput is 7.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.3911s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3291s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0620s for 8192 events => throughput is 7.71E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311353009E-004) differ by less than 2E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277311353009E-004) differ by less than 2E-14 (6.661338147750939e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,15 +469,15 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.7899s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9106s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8793s for 90112 events => throughput is 7.59E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.6332s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9213s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.7119s for 90112 events => throughput is 7.69E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421164E-004) differ by less than 2E-14 (0.0) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803725748421158E-004) differ by less than 2E-14 (3.3306690738754696e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.707688e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.814694e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.746281e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.756632e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,13 +514,13 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7947s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7633s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8049s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7728s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0320s for 8192 events => throughput is 2.56E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277311352998E-004) differ by less than 2E-14 (4.440892098500626e-16) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277311352998E-004) differ by less than 2E-14 (2.220446049250313e-16) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,8 +547,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.6726s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3253s + [COUNTERS] PROGRAM TOTAL : 2.7088s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3616s [COUNTERS] CudaCpp MEs ( 2 ) : 0.3472s for 90112 events => throughput is 2.60E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.298413e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.294446e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.515584e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.526802e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.121587e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.115140e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.140818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.151029e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.116718e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.105278e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.158341e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.148035e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.107036e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.107513e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.429475e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.437634e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index a11d40fa18..eda915608b 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2023-10-25_19:26:45 +DATE: 2023-10-28_13:23:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3808s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2757s - [COUNTERS] Fortran MEs ( 1 ) : 4.1052s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4392s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2797s + [COUNTERS] Fortran MEs ( 1 ) : 4.1594s for 8192 events => throughput is 1.97E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2719s - [COUNTERS] Fortran MEs ( 1 ) : 4.1018s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3436s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2686s + [COUNTERS] Fortran MEs ( 1 ) : 4.0750s for 8192 events => throughput is 2.01E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.1736s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9032s - [COUNTERS] Fortran MEs ( 1 ) : 45.2704s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 46.6315s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8499s + [COUNTERS] Fortran MEs ( 1 ) : 44.7816s for 90112 events => throughput is 2.01E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277396515517582E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0003628 [3.6277396490802749E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.4177s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2835s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1342s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.4026s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2286s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1741s for 8192 events => throughput is 1.96E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277396515517582E-004) differ by less than 4E-4 (3.285918168005608e-06) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277396490802749E-004) differ by less than 4E-4 (3.2852368918590003e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,15 +165,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774605164224E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774602344628E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 51.5557s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8491s - [COUNTERS] CudaCpp MEs ( 2 ) : 45.7066s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 50.5071s + [COUNTERS] Fortran Overhead ( 0 ) : 5.7965s + [COUNTERS] CudaCpp MEs ( 2 ) : 44.7106s for 90112 events => throughput is 2.02E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774605164224E-004) differ by less than 4E-4 (3.091469938043545e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774602344628E-004) differ by less than 4E-4 (3.0912915247593986e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.033185e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.074741e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.041307e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.057737e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277389113409186E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0003628 [3.6277389126121586E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.5057s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3763s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1294s for 8192 events => throughput is 7.25E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5511s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4494s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1017s for 8192 events => throughput is 7.44E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277389113409186E-004) differ by less than 4E-4 (3.0818756115991164e-06) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277389126121586E-004) differ by less than 4E-4 (3.0822260348450925e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,15 +241,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803771885814218E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803771887543366E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 15.4913s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9536s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.5377s for 90112 events => throughput is 7.19E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.1225s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9526s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.1699s for 90112 events => throughput is 7.40E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803771885814218E-004) differ by less than 4E-4 (2.9193997534981975e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803771887543366E-004) differ by less than 4E-4 (2.9195091675315865e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.412053e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.579541e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.406103e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.559073e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277390171873933E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.2465s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7541s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4925s for 8192 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2361s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7465s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4896s for 8192 events => throughput is 1.67E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277390171873933E-004) differ by less than 4E-4 (3.1110526841349184e-06) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277390198115864E-004) differ by less than 4E-4 (3.111776055053639e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,15 +317,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774410472313E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.7561s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3072s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4489s for 90112 events => throughput is 1.65E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.7262s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3331s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.3931s for 90112 events => throughput is 1.67E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774410472313E-004) differ by less than 4E-4 (3.0791505700733524e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774416711566E-004) differ by less than 4E-4 (3.079545366491132e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.691454e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.717269e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.694442e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.720207e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277390171873933E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1341s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6956s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4385s for 8192 events => throughput is 1.87E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1137s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6861s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4276s for 8192 events => throughput is 1.92E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277390171873933E-004) differ by less than 4E-4 (3.1110526841349184e-06) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277390198115864E-004) differ by less than 4E-4 (3.111776055053639e-06) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,15 +393,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803774410472313E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.1002s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2595s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.8407s for 90112 events => throughput is 1.86E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.9951s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2754s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.7197s for 90112 events => throughput is 1.91E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774410472313E-004) differ by less than 4E-4 (3.0791505700733524e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803774416711566E-004) differ by less than 4E-4 (3.079545366491132e-06) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.917142e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.959355e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.912635e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.901163e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277396414214383E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0003628 [3.6277396394633404E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.3236s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7938s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5298s for 8192 events => throughput is 1.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3121s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7881s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5240s for 8192 events => throughput is 1.56E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277396414214383E-004) differ by less than 4E-4 (3.2831256981946666e-06) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277396394633404E-004) differ by less than 4E-4 (3.2825859392904277e-06) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,15 +469,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000158 [1.5803777740743528E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000158 [1.5803777741065333E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 8.3515s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4437s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.9078s for 90112 events => throughput is 1.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.1307s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3840s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.7467s for 90112 events => throughput is 1.57E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803777740743528E-004) differ by less than 4E-4 (3.289877538392716e-06) +OK! xsec from fortran (1.5803725748421164E-004) and cpp (1.5803777741065333E-004) differ by less than 4E-4 (3.2898979009932106e-06) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.556068e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.586523e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.554806e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.580672e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,13 +514,13 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277400478491260E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7609s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7395s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7681s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7468s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.85E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277400478491260E-004) differ by less than 4E-4 (3.3951593780834344e-06) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277400478491260E-004) differ by less than 4E-4 (3.3951593780834344e-06) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,8 +547,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803779990154892E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.5388s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3037s + [COUNTERS] PROGRAM TOTAL : 2.5688s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3337s [COUNTERS] CudaCpp MEs ( 2 ) : 0.2351s for 90112 events => throughput is 3.83E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.602401e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.589994e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.943641e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.944592e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.505586e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.486310e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.637854e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.668788e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.505362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.486395e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.630323e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.665691e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.491202e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.463738e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.522012e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.523197e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 344f040590..fe5b743267 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -3,9 +3,9 @@ CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -16,12 +16,12 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:30:04 +DATE: 2023-10-28_13:26:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2765s - [COUNTERS] Fortran MEs ( 1 ) : 4.1104s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3386s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2704s + [COUNTERS] Fortran MEs ( 1 ) : 4.0682s for 8192 events => throughput is 2.01E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0003628 [3.6277277311352982E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3884s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2753s - [COUNTERS] Fortran MEs ( 1 ) : 4.1132s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3271s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2676s + [COUNTERS] Fortran MEs ( 1 ) : 4.0595s for 8192 events => throughput is 2.02E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.2155s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9013s - [COUNTERS] Fortran MEs ( 1 ) : 45.3142s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 46.7393s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8552s + [COUNTERS] Fortran MEs ( 1 ) : 44.8841s for 90112 events => throughput is 2.01E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,13 +134,13 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277432965013E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.8199s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4948s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3251s for 8192 events => throughput is 1.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6228s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3950s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2278s for 8192 events => throughput is 1.94E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277432965013E-004) differ by less than 2E-4 (3.352291999547674e-09) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277432965013E-004) differ by less than 2E-4 (3.352291999547674e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725813026109E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 53.9672s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0490s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.9182s for 90112 events => throughput is 1.88E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.8005s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0032s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.7973s for 90112 events => throughput is 1.93E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.959802e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984536e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.958905e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.986255e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,13 +210,13 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277430934464E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.7279s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4759s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2519s for 8192 events => throughput is 3.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6430s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4316s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2114s for 8192 events => throughput is 3.70E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277430934464E-004) differ by less than 2E-4 (3.296318995538172e-09) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277430934464E-004) differ by less than 2E-4 (3.296318995538172e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725816246317E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 29.0177s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0402s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.9775s for 90112 events => throughput is 3.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.3439s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0168s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.3271s for 90112 events => throughput is 3.70E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.730381e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.788272e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.728042e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.779315e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,13 +286,13 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.1830s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2180s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9649s for 8192 events => throughput is 8.49E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1986s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2363s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9623s for 8192 events => throughput is 8.51E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277419683297E-004) differ by less than 2E-4 (2.9861755290738756e-09) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277419683297E-004) differ by less than 2E-4 (2.9861753070292707e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.5258s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8006s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.7251s for 90112 events => throughput is 8.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.4918s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8244s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.6674s for 90112 events => throughput is 8.45E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.577284e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.777106e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.645443e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.780881e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,13 +362,13 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9705s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1082s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8624s for 8192 events => throughput is 9.50E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9468s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0986s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8482s for 8192 events => throughput is 9.66E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277419683297E-004) differ by less than 2E-4 (2.9861755290738756e-09) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277419683297E-004) differ by less than 2E-4 (2.9861753070292707e-09) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.1916s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6788s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5128s for 90112 events => throughput is 9.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.0437s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6937s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.3500s for 90112 events => throughput is 9.64E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.762571e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.926013e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.791935e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.924664e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,13 +438,13 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4166s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3396s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0770s for 8192 events => throughput is 7.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.3947s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3271s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0675s for 8192 events => throughput is 7.67E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277419683297E-004) differ by less than 2E-4 (2.9861755290738756e-09) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277419683297E-004) differ by less than 2E-4 (2.9861753070292707e-09) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.9279s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9270s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.0009s for 90112 events => throughput is 7.51E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.6834s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9387s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.7447s for 90112 events => throughput is 7.67E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.723112e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.783001e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.593326e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.767596e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,13 +514,13 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277293084707E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7973s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7658s + [COUNTERS] PROGRAM TOTAL : 0.7998s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7683s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.6277277311352982E-004) and cpp (3.6277277293084707E-004) differ by less than 2E-4 (5.035735162195465e-10) +OK! xsec from fortran (3.6277277311352988E-004) and cpp (3.6277277293084707E-004) differ by less than 2E-4 (5.03573627241849e-10) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725738731039E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.6608s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3213s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3395s for 90112 events => throughput is 2.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7129s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3677s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3453s for 90112 events => throughput is 2.61E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.295568e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.282969e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.529340e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.538257e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.107713e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.114802e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.153742e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.177837e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.126841e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.115570e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.176580e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.155287e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.122237e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.108118e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.438612e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.436982e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 90411e1b5b..909442f839 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_19:34:37 +DATE: 2023-10-28_13:30:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -51,552 +51,14 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 96.8592s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4672s - [COUNTERS] Fortran MEs ( 1 ) : 96.3920s for 8192 events => throughput is 8.50E+01 events/s - -*** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 97.0100s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4404s - [COUNTERS] Fortran MEs ( 1 ) : 96.5697s for 8192 events => throughput is 8.48E+01 events/s - -*** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1062.8511s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0497s - [COUNTERS] Fortran MEs ( 1 ) : 1058.8014s for 90112 events => throughput is 8.51E+01 events/s - -*** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435831E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 221.3790s - [COUNTERS] Fortran Overhead ( 0 ) : 101.7058s - [COUNTERS] CudaCpp MEs ( 2 ) : 119.6732s for 8192 events => throughput is 6.85E+01 events/s - -*** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693100945435831E-006) differ by less than 2E-14 (1.9984014443252818e-15) - -*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436158813953E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1450.0547s - [COUNTERS] Fortran Overhead ( 0 ) : 106.7786s - [COUNTERS] CudaCpp MEs ( 2 ) : 1343.2761s for 90112 events => throughput is 6.71E+01 events/s - -*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813953E-007) differ by less than 2E-14 (1.1102230246251565e-15) - -*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.310154e+01 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.288200e+01 ) sec^-1 - -*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435827E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 107.4772s - [COUNTERS] Fortran Overhead ( 0 ) : 49.8239s - [COUNTERS] CudaCpp MEs ( 2 ) : 57.6534s for 8192 events => throughput is 1.42E+02 events/s - -*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693100945435827E-006) differ by less than 2E-14 (1.5543122344752192e-15) - -*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 689.8975s - [COUNTERS] Fortran Overhead ( 0 ) : 53.4603s - [COUNTERS] CudaCpp MEs ( 2 ) : 636.4372s for 90112 events => throughput is 1.42E+02 events/s - -*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) - -*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.666029e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.661964e+02 ) sec^-1 - -*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 51.5576s - [COUNTERS] Fortran Overhead ( 0 ) : 23.6406s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.9170s for 8192 events => throughput is 2.93E+02 events/s - -*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693100945435829E-006) differ by less than 2E-14 (1.7763568394002505e-15) - -*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 332.8698s - [COUNTERS] Fortran Overhead ( 0 ) : 27.2762s - [COUNTERS] CudaCpp MEs ( 2 ) : 305.5936s for 90112 events => throughput is 2.95E+02 events/s - -*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) - -*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.564287e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.571126e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 45.7634s - [COUNTERS] Fortran Overhead ( 0 ) : 20.8536s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.9098s for 8192 events => throughput is 3.29E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693100945435829E-006) differ by less than 2E-14 (1.7763568394002505e-15) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 298.7296s - [COUNTERS] Fortran Overhead ( 0 ) : 24.5229s - [COUNTERS] CudaCpp MEs ( 2 ) : 274.2068s for 90112 events => throughput is 3.29E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.037302e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.011224e+02 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 46.0178s - [COUNTERS] Fortran Overhead ( 0 ) : 22.2114s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.8064s for 8192 events => throughput is 3.44E+02 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693100945435829E-006) differ by less than 2E-14 (1.7763568394002505e-15) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 285.3431s - [COUNTERS] Fortran Overhead ( 0 ) : 25.8685s - [COUNTERS] CudaCpp MEs ( 2 ) : 259.4745s for 90112 events => throughput is 3.47E+02 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.723231e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.765137e+02 ) sec^-1 - -*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435838E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 4.1896s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1063s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0833s for 8192 events => throughput is 7.56E+03 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693100945435838E-006) differ by less than 2E-14 (2.4424906541753444e-15) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 18.5925s - [COUNTERS] Fortran Overhead ( 0 ) : 6.7176s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8749s for 90112 events => throughput is 7.59E+03 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.538676e+03 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.266826e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.276191e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.572409e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.231100e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.464972e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.233366e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.245300e+03 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' failed +d R # 5 > -0.0 -0.0 -0.0 0.4 0.4 +d R # 6 > -0.0 -0.0 -0.0 -0.0 0.4 +s min # 3> 0.0119716.0 29929.0 29929.0 0.0 +s min # 4> 0.0 0.0 29929.0 29929.0 0.0 +s min # 5> 0.0 0.0 0.0 0.0 0.0 +s min # 6> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 3> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 4> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 5> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 6> 0.0 0.0 0.0 0.0 0.0 diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 93e9694d2a..58ec19bcc2 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2023-10-25_21:02:37 +DATE: 2023-10-28_13:30:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -51,552 +51,14 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 97.0230s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4439s - [COUNTERS] Fortran MEs ( 1 ) : 96.5791s for 8192 events => throughput is 8.48E+01 events/s - -*** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 98.3689s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4421s - [COUNTERS] Fortran MEs ( 1 ) : 97.9268s for 8192 events => throughput is 8.37E+01 events/s - -*** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1065.5287s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0935s - [COUNTERS] Fortran MEs ( 1 ) : 1061.4352s for 90112 events => throughput is 8.49E+01 events/s - -*** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1694768395202781E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 198.8176s - [COUNTERS] Fortran Overhead ( 0 ) : 92.5000s - [COUNTERS] CudaCpp MEs ( 2 ) : 106.3176s for 8192 events => throughput is 7.71E+01 events/s - -*** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694768395202781E-006) differ by less than 4E-4 (0.00014260116069753082) - -*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1361436140448921E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1266.1327s - [COUNTERS] Fortran Overhead ( 0 ) : 95.7086s - [COUNTERS] CudaCpp MEs ( 2 ) : 1170.4241s for 90112 events => throughput is 7.70E+01 events/s - -*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361436140448921E-007) differ by less than 4E-4 (0.00014045886190539036) - -*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.002269e+01 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.967415e+01 ) sec^-1 - -*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1694765850076731E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 49.7619s - [COUNTERS] Fortran Overhead ( 0 ) : 23.4310s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.3309s for 8192 events => throughput is 3.11E+02 events/s - -*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694765850076731E-006) differ by less than 4E-4 (0.0001423835002103413) - -*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1361430662723898E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 316.9053s - [COUNTERS] Fortran Overhead ( 0 ) : 27.1348s - [COUNTERS] CudaCpp MEs ( 2 ) : 289.7704s for 90112 events => throughput is 3.11E+02 events/s - -*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430662723898E-007) differ by less than 4E-4 (0.00014020239532785972) - -*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.586738e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.580652e+02 ) sec^-1 - -*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1694764962310603E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 25.7462s - [COUNTERS] Fortran Overhead ( 0 ) : 11.9729s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.7733s for 8192 events => throughput is 5.95E+02 events/s - -*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694764962310603E-006) differ by less than 4E-4 (0.00014230757799493787) - -*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1361430432807771E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 168.1713s - [COUNTERS] Fortran Overhead ( 0 ) : 15.4495s - [COUNTERS] CudaCpp MEs ( 2 ) : 152.7218s for 90112 events => throughput is 5.90E+02 events/s - -*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430432807771E-007) differ by less than 4E-4 (0.00014019163067602314) - -*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.197172e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.226176e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1694764962310603E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 22.7901s - [COUNTERS] Fortran Overhead ( 0 ) : 10.6491s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.1410s for 8192 events => throughput is 6.75E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694764962310603E-006) differ by less than 4E-4 (0.00014230757799493787) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1361430432807771E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 147.9220s - [COUNTERS] Fortran Overhead ( 0 ) : 14.3147s - [COUNTERS] CudaCpp MEs ( 2 ) : 133.6073s for 90112 events => throughput is 6.74E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430432807771E-007) differ by less than 4E-4 (0.00014019163067602314) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.019572e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.643340e+02 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1694767969588676E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 23.2769s - [COUNTERS] Fortran Overhead ( 0 ) : 11.3993s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8776s for 8192 events => throughput is 6.90E+02 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694767969588676E-006) differ by less than 4E-4 (0.0001425647619606174) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1361435931847224E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 144.2454s - [COUNTERS] Fortran Overhead ( 0 ) : 15.1090s - [COUNTERS] CudaCpp MEs ( 2 ) : 129.1364s for 90112 events => throughput is 6.98E+02 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361435931847224E-007) differ by less than 4E-4 (0.00014044909519328463) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.574328e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.606464e+02 ) sec^-1 - -*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1694770708195000E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 2.4561s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9552s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5008s for 8192 events => throughput is 1.64E+04 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1694770708195000E-006) differ by less than 4E-4 (0.00014279896898039546) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1361443477565659E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 10.9874s - [COUNTERS] Fortran Overhead ( 0 ) : 5.5585s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4289s for 90112 events => throughput is 1.66E+04 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361443477565659E-007) differ by less than 4E-4 (0.0001408023850304474) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.636533e+04 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.620348e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.363235e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.405136e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.330068e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.392338e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.319690e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.432306e+03 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' failed +d R # 5 > -0.0 -0.0 -0.0 0.4 0.4 +d R # 6 > -0.0 -0.0 -0.0 -0.0 0.4 +s min # 3> 0.0119716.0 29929.0 29929.0 0.0 +s min # 4> 0.0 0.0 29929.0 29929.0 0.0 +s min # 5> 0.0 0.0 0.0 0.0 0.0 +s min # 6> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 3> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 4> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 5> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 6> 0.0 0.0 0.0 0.0 0.0 diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index fd7d860c5e..f93dee6d06 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-25_22:08:36 +DATE: 2023-10-28_13:30:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -51,552 +51,14 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 1 events (found 166 events) - [COUNTERS] PROGRAM TOTAL : 97.1556s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4422s - [COUNTERS] Fortran MEs ( 1 ) : 96.7134s for 8192 events => throughput is 8.47E+01 events/s - -*** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100945435808E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 96.9689s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4446s - [COUNTERS] Fortran MEs ( 1 ) : 96.5243s for 8192 events => throughput is 8.49E+01 events/s - -*** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1064.4592s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0566s - [COUNTERS] Fortran MEs ( 1 ) : 1060.4026s for 90112 events => throughput is 8.50E+01 events/s - -*** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693101016896846E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 212.5728s - [COUNTERS] Fortran Overhead ( 0 ) : 98.0572s - [COUNTERS] CudaCpp MEs ( 2 ) : 114.5156s for 8192 events => throughput is 7.15E+01 events/s - -*** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693101016896846E-006) differ by less than 2E-4 (6.1113847316107694e-09) - -*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436275882778E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 1418.4218s - [COUNTERS] Fortran Overhead ( 0 ) : 103.9288s - [COUNTERS] CudaCpp MEs ( 2 ) : 1314.4929s for 90112 events => throughput is 6.86E+01 events/s - -*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436275882778E-007) differ by less than 2E-4 (5.48115042242614e-09) - -*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.001258e+01 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.939164e+01 ) sec^-1 - -*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693101020910778E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 111.7906s - [COUNTERS] Fortran Overhead ( 0 ) : 51.2589s - [COUNTERS] CudaCpp MEs ( 2 ) : 60.5317s for 8192 events => throughput is 1.35E+02 events/s - -*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693101020910778E-006) differ by less than 2E-4 (6.4546581413083e-09) - -*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436284111598E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 721.3517s - [COUNTERS] Fortran Overhead ( 0 ) : 54.9246s - [COUNTERS] CudaCpp MEs ( 2 ) : 666.4271s for 90112 events => throughput is 1.35E+02 events/s - -*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436284111598E-007) differ by less than 2E-4 (5.866422903011426e-09) - -*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.612539e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.614353e+02 ) sec^-1 - -*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 49.1637s - [COUNTERS] Fortran Overhead ( 0 ) : 22.7432s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.4205s for 8192 events => throughput is 3.10E+02 events/s - -*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693101021831071E-006) differ by less than 2E-4 (6.533362073568583e-09) - -*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 315.7719s - [COUNTERS] Fortran Overhead ( 0 ) : 26.2544s - [COUNTERS] CudaCpp MEs ( 2 ) : 289.5175s for 90112 events => throughput is 3.11E+02 events/s - -*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007) differ by less than 2E-4 (5.742375686068613e-09) - -*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.644706e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.685868e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 44.6193s - [COUNTERS] Fortran Overhead ( 0 ) : 20.5510s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.0684s for 8192 events => throughput is 3.40E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693101021831071E-006) differ by less than 2E-4 (6.533362073568583e-09) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 281.8182s - [COUNTERS] Fortran Overhead ( 0 ) : 23.6088s - [COUNTERS] CudaCpp MEs ( 2 ) : 258.2094s for 90112 events => throughput is 3.49E+02 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007) differ by less than 2E-4 (5.742375686068613e-09) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.253034e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.210071e+02 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 45.1323s - [COUNTERS] Fortran Overhead ( 0 ) : 21.8275s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.3047s for 8192 events => throughput is 3.52E+02 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693101021831071E-006) differ by less than 2E-4 (6.533362073568583e-09) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 283.0785s - [COUNTERS] Fortran Overhead ( 0 ) : 25.5465s - [COUNTERS] CudaCpp MEs ( 2 ) : 257.5320s for 90112 events => throughput is 3.50E+02 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007) differ by less than 2E-4 (5.742375686068613e-09) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.872786e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.828458e+02 ) sec^-1 - -*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.169e-06 [1.1693100942770687E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 15 events (found 163 events) - [COUNTERS] PROGRAM TOTAL : 3.5931s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7300s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8631s for 8192 events => throughput is 9.49E+03 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (1.1693100945435808E-006) and cpp (1.1693100942770687E-006) differ by less than 2E-4 (2.2792256970660674e-10) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.136e-07 [2.1358436157495368E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 84 events (found 808 events) - [COUNTERS] PROGRAM TOTAL : 15.7895s - [COUNTERS] Fortran Overhead ( 0 ) : 6.2917s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4978s for 90112 events => throughput is 9.49E+03 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436157495368E-007) differ by less than 2E-4 (6.173705990875078e-11) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.449720e+03 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.084973e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.109916e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.161304e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111586e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.116031e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.106540e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.648699e+03 ) sec^-1 - -TEST COMPLETED +ERROR! ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' failed +d R # 5 > -0.0 -0.0 -0.0 0.4 0.4 +d R # 6 > -0.0 -0.0 -0.0 -0.0 0.4 +s min # 3> 0.0119716.0 29929.0 29929.0 0.0 +s min # 4> 0.0 0.0 29929.0 29929.0 0.0 +s min # 5> 0.0 0.0 0.0 0.0 0.0 +s min # 6> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 3> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 4> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 5> 0.0 0.0 0.0 0.0 0.0 +xqcutij # 6> 0.0 0.0 0.0 0.0 0.0 diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index ef2a008fd8..e99c539eed 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-26_13:52:18 +DATE: 2023-10-28_13:30:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3116s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2347s - [COUNTERS] Fortran MEs ( 1 ) : 0.0769s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3031s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2342s + [COUNTERS] Fortran MEs ( 1 ) : 0.0688s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3066s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2302s - [COUNTERS] Fortran MEs ( 1 ) : 0.0764s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2266s + [COUNTERS] Fortran MEs ( 1 ) : 0.0687s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2632s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4231s - [COUNTERS] Fortran MEs ( 1 ) : 0.8401s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1552s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3998s + [COUNTERS] Fortran MEs ( 1 ) : 0.7554s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,471 +132,12 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3754s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3031s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0723s for 8192 events => throughput is 1.13E+05 events/s + [XSECTION] Cross section = 1.276 [1.2757941949814184] fbridge_mode=1 + [UNWEIGHT] Wrote 105 events (found 652 events) + [COUNTERS] PROGRAM TOTAL : 0.3934s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3192s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0742s for 8192 events => throughput is 1.10E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703716) differ by less than 2E-14 (0.0) - -*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182648615872] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2698s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4786s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7912s for 90112 events => throughput is 1.14E+05 events/s - -*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615872) differ by less than 2E-14 (1.1102230246251565e-16) - -*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.151677e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155366e+05 ) sec^-1 - -*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333309703727] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3152s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2761s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0391s for 8192 events => throughput is 2.10E+05 events/s - -*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703727) differ by less than 2E-14 (4.440892098500626e-16) - -*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8777s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4290s for 90112 events => throughput is 2.10E+05 events/s - -*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615874) differ by less than 2E-14 (0.0) - -*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.093968e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.072845e+05 ) sec^-1 - -*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2755s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2543s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.85E+05 events/s - -*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) - -*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6671s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4324s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2348s for 90112 events => throughput is 3.84E+05 events/s - -*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615863) differ by less than 2E-14 (5.551115123125783e-16) - -*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.838452e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.878300e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2707s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2515s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0192s for 8192 events => throughput is 4.27E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6979s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4773s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2206s for 90112 events => throughput is 4.08E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615863) differ by less than 2E-14 (5.551115123125783e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.101159e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.094655e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2902s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2615s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0287s for 8192 events => throughput is 2.85E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7445s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4413s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3032s for 90112 events => throughput is 2.97E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615863) differ by less than 2E-14 (5.551115123125783e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.863782e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.971210e+05 ) sec^-1 - -*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6724s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6717s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182648615869] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8414s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8339s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 90112 events => throughput is 1.20E+07 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615869) differ by less than 2E-14 (2.220446049250313e-16) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.555463e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.293951e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.436968e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.907161e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.448784e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.272589e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.448370e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.882086e+07 ) sec^-1 - -TEST COMPLETED +ERROR! xsec from fortran (0.26050333309703716) and cpp (1.2757941949814184) differ by more than 2E-14 (3.8974198518457603) diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 3031fbe602..ef32365876 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2023-10-26_16:26:08 +DATE: 2023-10-28_13:30:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3132s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2359s - [COUNTERS] Fortran MEs ( 1 ) : 0.0773s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2993s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2303s + [COUNTERS] Fortran MEs ( 1 ) : 0.0690s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3083s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2305s - [COUNTERS] Fortran MEs ( 1 ) : 0.0778s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2952s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2264s + [COUNTERS] Fortran MEs ( 1 ) : 0.0688s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2687s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4262s - [COUNTERS] Fortran MEs ( 1 ) : 0.8425s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1487s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3925s + [COUNTERS] Fortran MEs ( 1 ) : 0.7562s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,471 +132,12 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050316227723969] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3718s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3018s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0700s for 8192 events => throughput is 1.17E+05 events/s + [XSECTION] Cross section = 1.276 [1.2757939773540909] fbridge_mode=1 + [UNWEIGHT] Wrote 105 events (found 652 events) + [COUNTERS] PROGRAM TOTAL : 0.3844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3143s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050316227723969) differ by less than 4E-4 (6.55729796017468e-07) - -*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182848184220] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2605s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4911s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7694s for 90112 events => throughput is 1.17E+05 events/s - -*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182848184220) differ by less than 4E-4 (9.154014657397624e-09) - -*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.189355e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.185221e+05 ) sec^-1 - -*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050312995876956] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2829s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2595s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.50E+05 events/s - -*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050312995876956) differ by less than 4E-4 (7.797914336471479e-07) - -*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801179112233499] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7023s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4403s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2619s for 90112 events => throughput is 3.44E+05 events/s - -*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801179112233499) differ by less than 4E-4 (1.6221057508314374e-07) - -*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.361662e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.377760e+05 ) sec^-1 - -*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050312669591458] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2579s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2465s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0114s for 8192 events => throughput is 7.16E+05 events/s - -*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050312669591458) differ by less than 4E-4 (7.923166284173888e-07) - -*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801178977086591] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5607s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4343s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1264s for 90112 events => throughput is 7.13E+05 events/s - -*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801178977086591) differ by less than 4E-4 (1.6840963823483435e-07) - -*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.978568e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.011663e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050312669591458] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2565s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2457s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.62E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050312669591458) differ by less than 4E-4 (7.923166284173888e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801178977086591] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5367s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4216s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1151s for 90112 events => throughput is 7.83E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801178977086591) differ by less than 4E-4 (1.6840963823483435e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.722830e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.609197e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050317064675232] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2615s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2475s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.88E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050317064675232) differ by less than 4E-4 (6.23601559723852e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801181998460883] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5932s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4384s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1548s for 90112 events => throughput is 5.82E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801181998460883) differ by less than 4E-4 (2.9822005642721194e-08) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.435125e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.574503e+05 ) sec^-1 - -*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050319269579369] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6750s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6744s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.52E+07 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050319269579369) differ by less than 4E-4 (5.389614090578476e-07) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801186042050189] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8445s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8387s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 90112 events => throughput is 1.55E+07 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801186042050189) differ by less than 4E-4 (1.556536803892783e-07) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.784538e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.616857e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.893676e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.761961e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.892834e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.847341e+08 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.399446e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.280733e+07 ) sec^-1 - -TEST COMPLETED +ERROR! xsec from fortran (0.26050333309703716) and cpp (1.2757939773540909) differ by more than 4E-4 (3.8974190164348466) diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 356eeee837..7a64e889e6 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -3,9 +3,9 @@ CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2023-10-26_16:28:48 +DATE: 2023-10-28_13:30:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3104s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2337s - [COUNTERS] Fortran MEs ( 1 ) : 0.0767s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3003s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2318s + [COUNTERS] Fortran MEs ( 1 ) : 0.0686s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3269s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2499s - [COUNTERS] Fortran MEs ( 1 ) : 0.0770s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2952s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2266s + [COUNTERS] Fortran MEs ( 1 ) : 0.0686s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2593s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4222s - [COUNTERS] Fortran MEs ( 1 ) : 0.8371s for 90112 events => throughput is 1.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1553s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3975s + [COUNTERS] Fortran MEs ( 1 ) : 0.7578s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,471 +132,12 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333287021976] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3748s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3034s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0714s for 8192 events => throughput is 1.15E+05 events/s + [XSECTION] Cross section = 1.276 [1.2757941964256063] fbridge_mode=1 + [UNWEIGHT] Wrote 105 events (found 652 events) + [COUNTERS] PROGRAM TOTAL : 0.3922s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3179s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0743s for 8192 events => throughput is 1.10E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333287021976) differ by less than 2E-4 (8.706890763932051e-10) - -*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182637309846] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2653s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4786s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7867s for 90112 events => throughput is 1.15E+05 events/s - -*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637309846) differ by less than 2E-4 (5.185970541887741e-10) - -*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.138536e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159613e+05 ) sec^-1 - -*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333287021976] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3084s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2698s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0386s for 8192 events => throughput is 2.12E+05 events/s - -*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333287021976) differ by less than 2E-4 (8.706890763932051e-10) - -*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182637309841] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8771s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4553s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4219s for 90112 events => throughput is 2.14E+05 events/s - -*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637309841) differ by less than 2E-4 (5.18597276233379e-10) - -*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.090427e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.088304e+05 ) sec^-1 - -*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333293296080] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2744s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2536s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0208s for 8192 events => throughput is 3.94E+05 events/s - -*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333293296080) differ by less than 2E-4 (6.29843621702264e-10) - -*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182637602595] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6781s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4467s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2314s for 90112 events => throughput is 3.89E+05 events/s - -*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637602595) differ by less than 2E-4 (5.051689067059328e-10) - -*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.901746e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.831760e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333293296080] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2720s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2526s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0193s for 8192 events => throughput is 4.24E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333293296080) differ by less than 2E-4 (6.29843621702264e-10) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182637602595] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6633s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4538s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2095s for 90112 events => throughput is 4.30E+05 events/s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637602595) differ by less than 2E-4 (5.051689067059328e-10) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.283227e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.421519e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333293296080] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2903s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2614s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0289s for 8192 events => throughput is 2.84E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333293296080) differ by less than 2E-4 (6.29843621702264e-10) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182637602595] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8753s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5307s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3445s for 90112 events => throughput is 2.62E+05 events/s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637602595) differ by less than 2E-4 (5.051689067059328e-10) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.807854e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.564558e+05 ) sec^-1 - -*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2605 [0.26050333301029693] fbridge_mode=1 - [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6551s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6544s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s - -*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333301029693) differ by less than 2E-4 (3.329716502520341e-10) - -*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.218 [0.21801182637219937] fbridge_mode=1 - [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8418s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8341s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.18E+07 events/s - -*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637219937) differ by less than 2E-4 (5.227210886360467e-10) - -*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.554480e+07 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.260564e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.452960e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.904295e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.454069e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.238408e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.441519e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.877685e+07 ) sec^-1 - -TEST COMPLETED +ERROR! xsec from fortran (0.26050333309703716) and cpp (1.2757941964256063) differ by more than 2E-4 (3.897419857389597) From 973349d26ae52550fb50eda1fea3fc93efd094e7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 10:08:18 +0100 Subject: [PATCH 061/119] [oct23av] in ggtt.mad, fix all build errors and warnings for macOS on Olivier's M1 system --- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 10 +++++++--- epochX/cudacpp/gg_tt.mad/SubProcesses/makefile | 17 ++++++++++++++--- epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk | 11 +++++++++-- 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile index 74b19033a8..65369d6101 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile @@ -9,6 +9,12 @@ FFLAGS+= -cpp # Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740) CXXFLAGS = -O3 -Wall -Wshadow -Wextra +# Add -std=c++17 explicitly to avoid build errors on macOS +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3 +endif + # Enable ccache if USECCACHE=1 ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) override CXX:=ccache $(CXX) @@ -89,7 +95,12 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) +LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" +else +LDFLAGS += -Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (not supported on macOS) +endif all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) @@ -100,8 +111,8 @@ LINKLIBS += -lintlc # undefined reference to `_intel_fast_memcpy' else ifneq ($(shell $(CXX) --version | egrep '^clang'),) override OMPFLAGS = -fopenmp $(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###override OMPFLAGS = -fopenmp # OMP is not supported yet by cudacpp for Apple clang +else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang else override OMPFLAGS = -fopenmp endif diff --git a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- From 2065bb98afd3b1c3237ae4cbbcf67bdda10d534f Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 10:12:38 +0100 Subject: [PATCH 062/119] [oct23av] in CODEGEN, backport from gg_tt.mad the fixes for macOS build errors and warnings tested on Olivier's M1 system --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 10 +++++++--- .../iolibs/template_files/gpu/cudacpp_src.mk | 15 +++++++++++---- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 6ad4e44620..2f4474c1d6 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk index dac2e47d1d..e3febf9a0c 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -250,20 +257,20 @@ $(BUILDDIR)/%%_cu.o : %%.cc *.h $(BUILDDIR)/.build.$(TAG) #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, Parameters_%(model)s.o read_slha.o) +cxx_objects=$(addprefix $(BUILDDIR)/, Parameters_sm.o read_slha.o) ifneq ($(NVCC),) -cu_objects=$(addprefix $(BUILDDIR)/, Parameters_%(model)s_cu.o) +cu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_cu.o) endif # Target (and build rules): common (src) library ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- From 84ddaf866bca8511dccac6da3c7cb681b3147642 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 10:14:57 +0100 Subject: [PATCH 063/119] [oct23av] in CODEGEN, regenerate patch.P1 and patch.common from gg_tt.mad, including the fixes for macOS build errors and warnings tested on Olivier's M1 system ./CODEGEN/generateAndCompare.sh gg_tt --mad --nopatch sed -i 's/DEFAULT_F2PY_COMPILER=f2py3.*/DEFAULT_F2PY_COMPILER=f2py3/' gg_tt.mad/Source/make_opts git diff --no-ext-diff -R gg_tt.mad/Source/makefile gg_tt.mad/Source/dsample.f gg_tt.mad/Source/genps.inc gg_tt.mad/SubProcesses/makefile > CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common git diff --no-ext-diff -R gg_tt.mad/Source/make_opts gg_tt.mad/bin/internal/banner.py gg_tt.mad/bin/internal/gen_ximprove.py gg_tt.mad/bin/internal/madevent_interface.py >> CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common git diff --no-ext-diff -R gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f > CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 git checkout gg_tt.mad --- .../MG5aMC_patches/PROD/patch.P1 | 6 +-- .../MG5aMC_patches/PROD/patch.common | 52 +++++++++++-------- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 index d7c2c53fa7..fe883a6b25 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 @@ -1,8 +1,8 @@ diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f -index 27ed1439e..3b24a9924 100644 +index 880769442..5a3da931f 100644 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f -@@ -469,23 +469,140 @@ C +@@ -484,23 +484,140 @@ C INTEGER VECSIZE_USED INTEGER IVEC @@ -284,7 +284,7 @@ index 71fbf2b25..0f1d199fc 100644 open(unit=lun,file=tempname,status='old',ERR=20) fopened=.true. diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f -index 3ac962688..ef18aff22 100644 +index 3ac962688..daea73a6d 100644 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -72,7 +72,10 @@ C diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common index 87315981d5..fd6ca72279 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common @@ -13,7 +13,7 @@ index a59181c70..af7e0efbc 100644 PARAMETER(MAXTRIES=25) C To pass the helicity configuration chosen by the DiscreteSampler to diff --git b/epochX/cudacpp/gg_tt.mad/Source/makefile a/epochX/cudacpp/gg_tt.mad/Source/makefile -index 617f10b93..dbe08b846 100644 +index 617f10b93..00c73099a 100644 --- b/epochX/cudacpp/gg_tt.mad/Source/makefile +++ a/epochX/cudacpp/gg_tt.mad/Source/makefile @@ -120,7 +120,7 @@ $(LIBDIR)libiregi.a: $(IREGIDIR) @@ -37,12 +37,11 @@ index 617f10b93..dbe08b846 100644 + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; +cleanall: cleanSource # THIS IS THE ONE + for i in `ls -d ../SubProcesses/P*`; do cd $$i; make cleanavxs; cd -; done; -+ diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile -index 348c283be..74db44d84 100644 +index 348c283be..65369d610 100644 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile -@@ -1,6 +1,22 @@ +@@ -1,6 +1,28 @@ +SHELL := /bin/bash + include ../../Source/make_opts @@ -54,6 +53,12 @@ index 348c283be..74db44d84 100644 +# Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740) +CXXFLAGS = -O3 -Wall -Wshadow -Wextra + ++# Add -std=c++17 explicitly to avoid build errors on macOS ++# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" ++ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) ++CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3 ++endif ++ +# Enable ccache if USECCACHE=1 +ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) + override CXX:=ccache $(CXX) @@ -65,7 +70,7 @@ index 348c283be..74db44d84 100644 # Load additional dependencies of the bias module, if present ifeq (,$(wildcard ../bias_dependencies)) BIASDEPENDENCIES = -@@ -24,7 +40,26 @@ else +@@ -24,7 +46,26 @@ else MADLOOP_LIB = endif @@ -93,7 +98,7 @@ index 348c283be..74db44d84 100644 LIBS = $(LIBDIR)libbias.$(libext) $(LIBDIR)libdhelas.$(libext) $(LIBDIR)libdsample.$(libext) $(LIBDIR)libgeneric.$(libext) $(LIBDIR)libpdf.$(libext) $(LIBDIR)libgammaUPC.$(libext) $(LIBDIR)libmodel.$(libext) $(LIBDIR)libcernlib.$(libext) $(MADLOOP_LIB) $(LOOP_LIBS) -@@ -43,41 +78,112 @@ ifeq ($(strip $(MATRIX_HEL)),) +@@ -43,41 +84,117 @@ ifeq ($(strip $(MATRIX_HEL)),) endif @@ -113,7 +118,12 @@ index 348c283be..74db44d84 100644 -$(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) - $(FC) -o $(PROG) $(PROCESS) $(MATRIX) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -+LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) ++ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) ++LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) ++LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" ++else ++LDFLAGS += -Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (not supported on macOS) ++endif -$(PROG)_forhel: $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX_HEL) - $(FC) -o $(PROG)_forhel $(PROCESS) $(MATRIX_HEL) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp @@ -128,8 +138,8 @@ index 348c283be..74db44d84 100644 +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +override OMPFLAGS = -fopenmp +$(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -+###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -+###override OMPFLAGS = -fopenmp # OMP is not supported yet by cudacpp for Apple clang ++else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) ++override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang +else +override OMPFLAGS = -fopenmp +endif @@ -166,24 +176,24 @@ index 348c283be..74db44d84 100644 +madevent_fortran_link: $(PROG)_fortran + rm -f $(PROG) + ln -s $(PROG)_fortran $(PROG) -+ + +-$(LIBDIR)libpdf.$(libext): +- cd ../../Source/PDF; make +madevent_cpp_link: $(CUDACPP_BUILDDIR)/$(PROG)_cpp + rm -f $(PROG) + ln -s $(CUDACPP_BUILDDIR)/$(PROG)_cpp $(PROG) -+ + +-$(LIBDIR)libgammaUPC.$(libext): +- cd ../../Source/PDF/gammaUPC; make +madevent_cuda_link: $(CUDACPP_BUILDDIR)/$(PROG)_cuda + rm -f $(PROG) + ln -s $(CUDACPP_BUILDDIR)/$(PROG)_cuda $(PROG) - --$(LIBDIR)libpdf.$(libext): -- cd ../../Source/PDF; make ++ +# Building $(PROG)_cpp also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (improved patch for cpp-only builds #503) +$(CUDACPP_BUILDDIR)/$(PROG)_cpp: $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(LIBS) $(MATRIX) counters.o ompnumthreads.o $(CUDACPP_BUILDDIR)/.cudacpplibs + $(FC) -o $(CUDACPP_BUILDDIR)/$(PROG)_cpp $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o -L$(LIBDIR)/$(CUDACPP_BUILDDIR) -l$(CUDACPP_COMMONLIB) -l$(CUDACPP_CXXLIB) $(LIBFLAGSRPATH) $(LDFLAGS) + if [ -f $(LIBDIR)/$(CUDACPP_BUILDDIR)/lib$(CUDACPP_CULIB).* ]; then $(FC) -o $(CUDACPP_BUILDDIR)/$(PROG)_cuda $(PROCESS) $(DSIG_cudacpp) auto_dsig.o $(MATRIX) $(LINKLIBS) $(BIASDEPENDENCIES) $(OMPFLAGS) counters.o ompnumthreads.o -L$(LIBDIR)/$(CUDACPP_BUILDDIR) -l$(CUDACPP_COMMONLIB) -l$(CUDACPP_CULIB) $(LIBFLAGSRPATH) $(LDFLAGS); fi - --$(LIBDIR)libgammaUPC.$(libext): -- cd ../../Source/PDF/gammaUPC; make ++ +$(CUDACPP_BUILDDIR)/$(PROG)_cuda: $(CUDACPP_BUILDDIR)/$(PROG)_cpp + +counters.o: counters.cc timer.h @@ -222,7 +232,7 @@ index 348c283be..74db44d84 100644 # Dependencies -@@ -97,5 +203,61 @@ unwgt.o: genps.inc nexternal.inc symswap.inc cluster.inc run.inc message.inc \ +@@ -97,5 +214,61 @@ unwgt.o: genps.inc nexternal.inc symswap.inc cluster.inc run.inc message.inc \ run_config.inc initcluster.o: message.inc @@ -287,10 +297,10 @@ index 348c283be..74db44d84 100644 +distclean: cleanall # Clean all fortran and cudacpp builds as well as the googletest installation + $(MAKE) -f $(CUDACPP_MAKEFILE) distclean diff --git b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py a/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py -index 4dd71db86..3b8ec3121 100755 +index ebbc1ac1d..a88d60b28 100755 --- b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py +++ a/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py -@@ -380,8 +380,20 @@ class gensym(object): +@@ -385,8 +385,20 @@ class gensym(object): done = True if not done: raise Exception('Parsing error in gensym: %s' % stdout) @@ -314,7 +324,7 @@ index 4dd71db86..3b8ec3121 100755 self.submit_to_cluster(job_list) job_list = {} diff --git b/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py a/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py -index a056d3861..b70b548e5 100755 +index 389b93ab8..d72270289 100755 --- b/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py +++ a/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py @@ -3614,8 +3614,20 @@ Beware that this can be dangerous for local multicore runs.""") From 2902dfbad479b6a3efde2e5df68d8e4bfad5e62a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 10:16:28 +0100 Subject: [PATCH 064/119] [oct23av] regenerate ggtt.mad after updating CODEGEN with macOS patches, all ok no change --- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index f25d273fa3..1c84d3f0e1 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00533747673034668  +DEBUG: model prefixing takes 0.005392789840698242  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,16 +191,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.100 s +Wrote files for 10 helas calls in 0.099 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.143 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.136 s VVV1 FFV1 FFV1 @@ -227,11 +227,9 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f DEBUG: p.returncode =  0 [output.py at line 233]  @@ -241,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.746s -user 0m1.450s -sys 0m0.214s +real 0m1.670s +user 0m1.471s +sys 0m0.196s ************************************************************ * * * W E L C O M E to * From 2cbd4aa24bf78e0aa24726a637fa90d61e2a96e7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 10:19:55 +0100 Subject: [PATCH 065/119] [oct23av] regenerate all other 7 mad and 7 sa processes after updating CODEGEN with macOS patches, all ok no change --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 19 ++-- .../ee_mumu.mad/SubProcesses/cudacpp.mk | 10 ++- .../cudacpp/ee_mumu.mad/SubProcesses/makefile | 17 +++- epochX/cudacpp/ee_mumu.mad/src/cudacpp_src.mk | 11 ++- .../CODEGEN_cudacpp_ee_mumu_log.txt | 8 +- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 10 ++- epochX/cudacpp/ee_mumu.sa/src/cudacpp_src.mk | 11 ++- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 +-- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 10 ++- epochX/cudacpp/gg_tt.sa/src/cudacpp_src.mk | 11 ++- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 23 +++-- .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 10 ++- .../gg_tt01g.mad/SubProcesses/makefile | 17 +++- .../cudacpp/gg_tt01g.mad/src/cudacpp_src.mk | 11 ++- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 ++--- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 10 ++- .../cudacpp/gg_ttg.mad/SubProcesses/makefile | 17 +++- epochX/cudacpp/gg_ttg.mad/src/cudacpp_src.mk | 11 ++- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 10 +-- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 10 ++- epochX/cudacpp/gg_ttg.sa/src/cudacpp_src.mk | 11 ++- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 22 +++-- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 10 ++- .../cudacpp/gg_ttgg.mad/SubProcesses/makefile | 17 +++- epochX/cudacpp/gg_ttgg.mad/src/cudacpp_src.mk | 11 ++- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 +-- .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 10 ++- epochX/cudacpp/gg_ttgg.sa/src/cudacpp_src.mk | 11 ++- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 22 +++-- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 10 ++- .../gg_ttggg.mad/SubProcesses/makefile | 17 +++- .../cudacpp/gg_ttggg.mad/src/cudacpp_src.mk | 11 ++- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 +-- .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 10 ++- epochX/cudacpp/gg_ttggg.sa/src/cudacpp_src.mk | 11 ++- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 25 +++--- .../gq_ttq.mad/SubProcesses/cudacpp.mk | 10 ++- .../cudacpp/gq_ttq.mad/SubProcesses/makefile | 17 +++- epochX/cudacpp/gq_ttq.mad/src/cudacpp_src.mk | 11 ++- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 10 +-- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 10 ++- epochX/cudacpp/gq_ttq.sa/src/cudacpp_src.mk | 11 ++- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 8 +- .../heft_gg_h.sa/SubProcesses/cudacpp.mk | 10 ++- .../cudacpp/heft_gg_h.sa/src/cudacpp_src.mk | 15 +++- .../CODEGEN_mad_pp_tt012j_log.txt | 90 +++++++++---------- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 10 ++- .../pp_tt012j.mad/SubProcesses/makefile | 17 +++- .../cudacpp/pp_tt012j.mad/src/cudacpp_src.mk | 11 ++- 49 files changed, 464 insertions(+), 248 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index e304c1595b..8b7b112fc5 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005795001983642578  +DEBUG: model prefixing takes 0.005471944808959961  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,19 +191,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.100 s +Wrote files for 8 helas calls in 0.097 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.198 s +ALOHA: aloha creates 3 routines in 0.195 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.261 s +ALOHA: aloha creates 7 routines in 0.249 s FFV1 FFV1 FFV2 @@ -234,11 +234,10 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 496 (offset 27 lines). +Hunk #1 succeeded at 496 (offset 12 lines). patching file driver.f patching file matrix1.f Hunk #3 succeeded at 230 (offset 9 lines). @@ -251,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.657s -user 0m1.670s -sys 0m0.206s +real 0m1.849s +user 0m1.645s +sys 0m0.186s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile index 74b19033a8..65369d6101 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile @@ -9,6 +9,12 @@ FFLAGS+= -cpp # Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740) CXXFLAGS = -O3 -Wall -Wshadow -Wextra +# Add -std=c++17 explicitly to avoid build errors on macOS +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3 +endif + # Enable ccache if USECCACHE=1 ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) override CXX:=ccache $(CXX) @@ -89,7 +95,12 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) +LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" +else +LDFLAGS += -Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (not supported on macOS) +endif all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) @@ -100,8 +111,8 @@ LINKLIBS += -lintlc # undefined reference to `_intel_fast_memcpy' else ifneq ($(shell $(CXX) --version | egrep '^clang'),) override OMPFLAGS = -fopenmp $(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###override OMPFLAGS = -fopenmp # OMP is not supported yet by cudacpp for Apple clang +else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang else override OMPFLAGS = -fopenmp endif diff --git a/epochX/cudacpp/ee_mumu.mad/src/cudacpp_src.mk b/epochX/cudacpp/ee_mumu.mad/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/ee_mumu.mad/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 19633ae89d..0c2253f725 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005425214767456055  +DEBUG: model prefixing takes 0.005351066589355469  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -202,6 +202,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.691s -user 0m0.582s -sys 0m0.053s +real 0m0.674s +user 0m0.575s +sys 0m0.058s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/ee_mumu.sa/src/cudacpp_src.mk b/epochX/cudacpp/ee_mumu.sa/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/ee_mumu.sa/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 6106a063b1..c83a08f394 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053386688232421875  +DEBUG: model prefixing takes 0.005424022674560547  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.143 s VVV1 FFV1 FFV1 @@ -197,6 +197,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.562s -user 0m0.474s -sys 0m0.049s +real 0m0.536s +user 0m0.471s +sys 0m0.048s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt.sa/src/cudacpp_src.mk b/epochX/cudacpp/gg_tt.sa/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/gg_tt.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_tt.sa/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 5cfefc86ec..d1d49f4b29 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005840778350830078  +DEBUG: model prefixing takes 0.00554347038269043  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,14 +155,14 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.019 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -184,7 +184,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -201,7 +201,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -224,7 +224,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.322 s +ALOHA: aloha creates 5 routines in 0.319 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -232,7 +232,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.305 s +ALOHA: aloha creates 10 routines in 0.304 s VVV1 VVV1 FFV1 @@ -264,16 +264,13 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). @@ -287,9 +284,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.275s -user 0m2.024s -sys 0m0.250s +real 0m2.255s +user 0m2.031s +sys 0m0.215s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile index 74b19033a8..65369d6101 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile @@ -9,6 +9,12 @@ FFLAGS+= -cpp # Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740) CXXFLAGS = -O3 -Wall -Wshadow -Wextra +# Add -std=c++17 explicitly to avoid build errors on macOS +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3 +endif + # Enable ccache if USECCACHE=1 ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) override CXX:=ccache $(CXX) @@ -89,7 +95,12 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) +LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" +else +LDFLAGS += -Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (not supported on macOS) +endif all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) @@ -100,8 +111,8 @@ LINKLIBS += -lintlc # undefined reference to `_intel_fast_memcpy' else ifneq ($(shell $(CXX) --version | egrep '^clang'),) override OMPFLAGS = -fopenmp $(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###override OMPFLAGS = -fopenmp # OMP is not supported yet by cudacpp for Apple clang +else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang else override OMPFLAGS = -fopenmp endif diff --git a/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_tt01g.mad/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index fe344f8cc1..89918164ec 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052983760833740234  +DEBUG: model prefixing takes 0.005265474319458008  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.147 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Wrote files for 36 helas calls in 0.149 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.320 s +ALOHA: aloha creates 5 routines in 0.324 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.305 s +ALOHA: aloha creates 10 routines in 0.312 s VVV1 VVV1 FFV1 @@ -238,11 +238,9 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). @@ -256,9 +254,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.259s -user 0m1.920s -sys 0m0.209s +real 0m2.171s +user 0m1.918s +sys 0m0.246s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile index 74b19033a8..65369d6101 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile @@ -9,6 +9,12 @@ FFLAGS+= -cpp # Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740) CXXFLAGS = -O3 -Wall -Wshadow -Wextra +# Add -std=c++17 explicitly to avoid build errors on macOS +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3 +endif + # Enable ccache if USECCACHE=1 ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) override CXX:=ccache $(CXX) @@ -89,7 +95,12 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) +LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" +else +LDFLAGS += -Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (not supported on macOS) +endif all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) @@ -100,8 +111,8 @@ LINKLIBS += -lintlc # undefined reference to `_intel_fast_memcpy' else ifneq ($(shell $(CXX) --version | egrep '^clang'),) override OMPFLAGS = -fopenmp $(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###override OMPFLAGS = -fopenmp # OMP is not supported yet by cudacpp for Apple clang +else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang else override OMPFLAGS = -fopenmp endif diff --git a/epochX/cudacpp/gg_ttg.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttg.mad/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttg.mad/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 78da7e26c9..213796e191 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00540924072265625  +DEBUG: model prefixing takes 0.0052645206451416016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -205,6 +205,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.770s -user 0m0.712s -sys 0m0.046s +real 0m0.768s +user 0m0.709s +sys 0m0.048s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttg.sa/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttg.sa/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttg.sa/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 5b686d942c..4ad89e68b6 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005361080169677734  +DEBUG: model prefixing takes 0.005406856536865234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.157 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.417 s -Wrote files for 222 helas calls in 0.676 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.424 s +Wrote files for 222 helas calls in 0.687 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.325 s +ALOHA: aloha creates 5 routines in 0.329 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.313 s VVV1 VVV1 FFV1 @@ -241,11 +241,9 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 191 (offset 48 lines). @@ -259,9 +257,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.441s -user 0m2.976s -sys 0m0.225s +real 0m3.340s +user 0m2.973s +sys 0m0.264s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile index 74b19033a8..65369d6101 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile @@ -9,6 +9,12 @@ FFLAGS+= -cpp # Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740) CXXFLAGS = -O3 -Wall -Wshadow -Wextra +# Add -std=c++17 explicitly to avoid build errors on macOS +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3 +endif + # Enable ccache if USECCACHE=1 ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) override CXX:=ccache $(CXX) @@ -89,7 +95,12 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) +LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" +else +LDFLAGS += -Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (not supported on macOS) +endif all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) @@ -100,8 +111,8 @@ LINKLIBS += -lintlc # undefined reference to `_intel_fast_memcpy' else ifneq ($(shell $(CXX) --version | egrep '^clang'),) override OMPFLAGS = -fopenmp $(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###override OMPFLAGS = -fopenmp # OMP is not supported yet by cudacpp for Apple clang +else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang else override OMPFLAGS = -fopenmp endif diff --git a/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttgg.mad/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index d9d30196d4..aaa9f8c921 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005450725555419922  +DEBUG: model prefixing takes 0.005413532257080078  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.156 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.418 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.416 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.313 s +ALOHA: aloha creates 5 routines in 0.317 s VVV1 VVV1 FFV1 @@ -208,6 +208,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m1.416s -user 0m1.336s -sys 0m0.068s +real 0m1.418s +user 0m1.353s +sys 0m0.052s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttgg.sa/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index c0051a2221..e6bbcf3798 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053594112396240234  +DEBUG: model prefixing takes 0.00542140007019043  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.855 s +1 processes with 1240 diagrams generated in 1.838 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.449 s -Wrote files for 2281 helas calls in 18.329 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.443 s +Wrote files for 2281 helas calls in 18.293 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.320 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.323 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -243,11 +243,9 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 255 (offset 112 lines). @@ -261,9 +259,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m28.868s -user 0m28.342s -sys 0m0.399s +real 0m28.777s +user 0m28.291s +sys 0m0.395s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile index 74b19033a8..65369d6101 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile @@ -9,6 +9,12 @@ FFLAGS+= -cpp # Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740) CXXFLAGS = -O3 -Wall -Wshadow -Wextra +# Add -std=c++17 explicitly to avoid build errors on macOS +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3 +endif + # Enable ccache if USECCACHE=1 ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) override CXX:=ccache $(CXX) @@ -89,7 +95,12 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) +LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" +else +LDFLAGS += -Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (not supported on macOS) +endif all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) @@ -100,8 +111,8 @@ LINKLIBS += -lintlc # undefined reference to `_intel_fast_memcpy' else ifneq ($(shell $(CXX) --version | egrep '^clang'),) override OMPFLAGS = -fopenmp $(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###override OMPFLAGS = -fopenmp # OMP is not supported yet by cudacpp for Apple clang +else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang else override OMPFLAGS = -fopenmp endif diff --git a/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttggg.mad/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index dc512f186d..0a3ebf79ac 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0054700374603271484  +DEBUG: model prefixing takes 0.005333423614501953  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.839 s +1 processes with 1240 diagrams generated in 1.874 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.466 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.505 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.346 s +ALOHA: aloha creates 5 routines in 0.342 s VVV1 VVV1 FFV1 @@ -208,6 +208,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m12.842s -user 0m12.634s -sys 0m0.111s +real 0m13.012s +user 0m12.698s +sys 0m0.117s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_src.mk b/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_ttggg.sa/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index aeeaa24a46..11d08c9af6 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005341529846191406  +DEBUG: model prefixing takes 0.005434274673461914  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.076 s +8 processes with 40 diagrams generated in 0.078 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -197,7 +197,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -214,7 +214,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -229,12 +229,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.215 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Wrote files for 32 helas calls in 0.217 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.144 s +ALOHA: aloha creates 2 routines in 0.142 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines @@ -267,11 +267,10 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -281,7 +280,7 @@ Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -296,9 +295,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.938s -user 0m1.675s -sys 0m0.219s +real 0m1.902s +user 0m1.688s +sys 0m0.208s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile index 74b19033a8..65369d6101 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile @@ -9,6 +9,12 @@ FFLAGS+= -cpp # Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740) CXXFLAGS = -O3 -Wall -Wshadow -Wextra +# Add -std=c++17 explicitly to avoid build errors on macOS +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3 +endif + # Enable ccache if USECCACHE=1 ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) override CXX:=ccache $(CXX) @@ -89,7 +95,12 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) +LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" +else +LDFLAGS += -Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (not supported on macOS) +endif all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) @@ -100,8 +111,8 @@ LINKLIBS += -lintlc # undefined reference to `_intel_fast_memcpy' else ifneq ($(shell $(CXX) --version | egrep '^clang'),) override OMPFLAGS = -fopenmp $(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###override OMPFLAGS = -fopenmp # OMP is not supported yet by cudacpp for Apple clang +else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang else override OMPFLAGS = -fopenmp endif diff --git a/epochX/cudacpp/gq_ttq.mad/src/cudacpp_src.mk b/epochX/cudacpp/gq_ttq.mad/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gq_ttq.mad/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 5dd652a8ff..0a81668e34 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005471229553222656  +DEBUG: model prefixing takes 0.005318880081176758  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.076 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -228,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.636s -user 0m0.581s -sys 0m0.049s +real 0m0.639s +user 0m0.590s +sys 0m0.039s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/gq_ttq.sa/src/cudacpp_src.mk b/epochX/cudacpp/gq_ttq.sa/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/gq_ttq.sa/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index caae38eb15..0df769235d 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -151,7 +151,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.062 s +ALOHA: aloha creates 1 routines in 0.060 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -165,6 +165,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.426s -user 0m0.355s -sys 0m0.059s +real 0m0.415s +user 0m0.359s +sys 0m0.045s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/heft_gg_h.sa/src/cudacpp_src.mk b/epochX/cudacpp/heft_gg_h.sa/src/cudacpp_src.mk index 632d2135c5..d4cc628aec 100644 --- a/epochX/cudacpp/heft_gg_h.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/heft_gg_h.sa/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -250,20 +257,20 @@ $(BUILDDIR)/%_cu.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, Parameters_heft.o read_slha.o) +cxx_objects=$(addprefix $(BUILDDIR)/, Parameters_sm.o read_slha.o) ifneq ($(NVCC),) -cu_objects=$(addprefix $(BUILDDIR)/, Parameters_heft_cu.o) +cu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_cu.o) endif # Target (and build rules): common (src) library ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index e3b0d16901..a478197d0e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005556583404541016  +DEBUG: model prefixing takes 0.005745649337768555  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.028 s +5 processes with 7 diagrams generated in 0.029 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.134 s +13 processes with 76 diagrams generated in 0.135 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.812 s +65 processes with 1119 diagrams generated in 1.803 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,15 +801,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.260 s -Wrote files for 810 helas calls in 3.181 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.283 s +Wrote files for 810 helas calls in 3.215 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.335 s +ALOHA: aloha creates 5 routines in 0.332 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.309 s +ALOHA: aloha creates 10 routines in 0.312 s VVV1 VVV1 FFV1 @@ -852,16 +852,14 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 385 (offset 5 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 539 (offset 70 lines). +Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -871,7 +869,6 @@ Hunk #4 succeeded at 252 (offset 3 lines). Hunk #5 succeeded at 297 (offset 3 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 159 (offset 16 lines). @@ -880,7 +877,7 @@ Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -890,7 +887,7 @@ Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -900,7 +897,7 @@ Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 539 (offset 70 lines). +Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -910,7 +907,6 @@ Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 484 (offset 15 lines). patching file driver.f patching file matrix1.f Hunk #2 succeeded at 191 (offset 48 lines). @@ -919,7 +915,7 @@ Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 517 (offset 48 lines). +Hunk #1 succeeded at 517 (offset 33 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -929,7 +925,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -939,7 +935,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 528 (offset 59 lines). +Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -949,7 +945,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 555 (offset 86 lines). +Hunk #1 succeeded at 555 (offset 71 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 77 (offset 5 lines). @@ -959,7 +955,7 @@ Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 627 (offset 158 lines). +Hunk #1 succeeded at 627 (offset 143 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 83 (offset 11 lines). @@ -969,7 +965,7 @@ Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 539 (offset 70 lines). +Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -979,7 +975,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 627 (offset 158 lines). +Hunk #1 succeeded at 627 (offset 143 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 83 (offset 11 lines). @@ -989,7 +985,7 @@ Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 539 (offset 70 lines). +Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -999,7 +995,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 539 (offset 70 lines). +Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1009,7 +1005,7 @@ Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 555 (offset 86 lines). +Hunk #1 succeeded at 555 (offset 71 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 77 (offset 5 lines). @@ -1019,7 +1015,7 @@ Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f -Hunk #1 succeeded at 539 (offset 70 lines). +Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f patching file matrix1.f Hunk #1 succeeded at 75 (offset 3 lines). @@ -1034,8 +1030,8 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m8.723s -user 0m8.211s +real 0m8.755s +user 0m8.270s sys 0m0.450s ************************************************************ * * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index c6c1826de7..0d604e747c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -103,6 +103,11 @@ endif # Note: AR, CXX and FC are implicitly defined if not set externally # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler @@ -222,8 +227,8 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) -###override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang (TODO: check if builds fail in the CI #578) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -558,7 +563,6 @@ $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# ###$(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) $(LIBFLAGS) -fopenmp # AV remove (added by OM) ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile index 74b19033a8..65369d6101 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile @@ -9,6 +9,12 @@ FFLAGS+= -cpp # Compile counters with -O3 as in the cudacpp makefile (avoid being "unfair" to Fortran #740) CXXFLAGS = -O3 -Wall -Wshadow -Wextra +# Add -std=c++17 explicitly to avoid build errors on macOS +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -std=c++17 -mmacosx-version-min=11.3 +endif + # Enable ccache if USECCACHE=1 ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) override CXX:=ccache $(CXX) @@ -89,7 +95,12 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -LDFLAGS+=-Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (OM: flag not universal, skip?) +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) +LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" +else +LDFLAGS += -Wl,--no-relax # avoid 'failed to convert GOTPCREL relocation' error #458 (not supported on macOS) +endif all: $(PROG)_fortran $(CUDACPP_BUILDDIR)/$(PROG)_cpp # also builds $(PROG)_cuda if $(CUDACPP_CULIB) exists (#503) @@ -100,8 +111,8 @@ LINKLIBS += -lintlc # undefined reference to `_intel_fast_memcpy' else ifneq ($(shell $(CXX) --version | egrep '^clang'),) override OMPFLAGS = -fopenmp $(CUDACPP_BUILDDIR)/$(PROG)_cpp: LINKLIBS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###override OMPFLAGS = -fopenmp # OMP is not supported yet by cudacpp for Apple clang +else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +override OMPFLAGS = # OMP is not supported yet by cudacpp for Apple clang else override OMPFLAGS = -fopenmp endif diff --git a/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_src.mk b/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_src.mk index 554d7a704c..d4cc628aec 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/pp_tt012j.mad/src/cudacpp_src.mk @@ -36,6 +36,13 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html ###RANLIB = ranlib +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +LDFLAGS = +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +LDFLAGS += -mmacosx-version-min=11.3 +endif + #------------------------------------------------------------------------------- #=== Configure the CUDA compiler (note: NVCC is already exported including ccache) @@ -259,11 +266,11 @@ endif ifneq ($(NVCC),) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) $(cu_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) + $(NVCC) -shared -o $@ $(cxx_objects) $(cu_objects) $(LDFLAGS) else $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so : $(cxx_objects) @if [ ! -d $(LIBDIR) ]; then echo "mkdir -p $(LIBDIR)"; mkdir -p $(LIBDIR); fi - $(CXX) -shared -o $@ $(cxx_objects) + $(CXX) -shared -o $@ $(cxx_objects) $(LDFLAGS) endif #------------------------------------------------------------------------------- From b52544770665b94fff951f7f9fa7aa5c843dff0f Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 10:26:40 +0100 Subject: [PATCH 066/119] [oct23av] in github workflows, switch CI tests from .sa to .mad directories (we should have done this long ago) --- .github/workflows/c-cpp.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 34cff0be37..7e724f03d0 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum , epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum , epoch2/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum ] + folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum , epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum , epoch2/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum ] fail-fast: false steps: - uses: actions/checkout@v2 @@ -21,7 +21,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum , epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg ] + folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum , epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ] precision: [ d , f , m ] fail-fast: false steps: @@ -38,7 +38,7 @@ jobs: FC: gfortran-11 strategy: matrix: - folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum, epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg ] + folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum, epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ] precision: [ d , f , m ] fail-fast: false steps: @@ -57,7 +57,7 @@ jobs: REQUIRE_CUDA: 1 strategy: matrix: - folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum , epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg ] + folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum , epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ] precision: [ d , f , m ] fail-fast: false steps: From 0605140071192e6d946361a0cbaf156d924d13b5 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 10:40:47 +0100 Subject: [PATCH 067/119] [oct23av] fix issues in my previous patch in the CI configuration --- .github/workflows/c-cpp.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 7e724f03d0..ec671246bd 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -11,11 +11,11 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum , epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum , epoch2/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum ] + folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum , epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum , epoch2/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum ] fail-fast: false steps: - uses: actions/checkout@v2 - - name: make epoch1 + - name: make debug run: make -C ${{ matrix.folder }} debug CPU: runs-on: ubuntu-latest @@ -27,11 +27,11 @@ jobs: steps: - uses: actions/checkout@v2 - name: make info - run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} info + run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info - name: make run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} - name: make check - run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} check + run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk check CPU_MAC: runs-on: macos-latest env: @@ -44,11 +44,11 @@ jobs: steps: - uses: actions/checkout@v2 - name: make info - run: make AVX=none OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} info + run: make AVX=none OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info - name: make run: make AVX=none OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} - name: make check - run: make AVX=none OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} check + run: make AVX=none OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk check GPU: runs-on: self-hosted env: @@ -65,8 +65,8 @@ jobs: - name: path run: echo "PATH=$PATH" - name: make info - run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} info + run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info - name: make run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} - name: make check - run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} check + run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk check From 15949149589efde463062e15288a0a410550a0a9 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 10:56:57 +0100 Subject: [PATCH 068/119] [oct23av] regenerate all 8 mad and 7 sa processes after merging Stefan's fix for gqttq in PR #782 Indeed, only gqttq Parameters source code changes. --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 12 ++-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 14 ++--- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 16 +++--- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 ++-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 20 +++---- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 +++---- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 10 ++-- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +++---- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 ++--- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 +++---- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 12 ++-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 22 ++++---- .../cudacpp/gq_ttq.mad/src/Parameters_sm.cc | 4 +- epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h | 18 +++--- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 10 ++-- epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc | 4 +- epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h | 18 +++--- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 6 +- .../CODEGEN_mad_pp_tt012j_log.txt | 56 +++++++++---------- 19 files changed, 153 insertions(+), 153 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 8b7b112fc5..082b29214b 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005471944808959961  +DEBUG: model prefixing takes 0.005426645278930664  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -203,7 +203,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.249 s +ALOHA: aloha creates 7 routines in 0.248 s FFV1 FFV1 FFV2 @@ -250,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.849s -user 0m1.645s -sys 0m0.186s +real 0m1.886s +user 0m1.610s +sys 0m0.215s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 0c2253f725..4feb407ff8 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005351066589355469  +DEBUG: model prefixing takes 0.005356311798095703  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.004 s +1 processes with 2 diagrams generated in 0.005 s Total: 1 processes with 2 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Load PLUGIN.CUDACPP_OUTPUT @@ -174,14 +174,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.262 s +ALOHA: aloha creates 4 routines in 0.265 s FFV1 FFV1 FFV2 @@ -202,6 +202,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.674s -user 0m0.575s -sys 0m0.058s +real 0m0.648s +user 0m0.580s +sys 0m0.063s diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 1c84d3f0e1..3967086980 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005392789840698242  +DEBUG: model prefixing takes 0.005898714065551758  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,16 +191,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.099 s +Wrote files for 10 helas calls in 0.101 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.142 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.136 s +ALOHA: aloha creates 4 routines in 0.130 s VVV1 FFV1 FFV1 @@ -239,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.670s -user 0m1.471s -sys 0m0.196s +real 0m1.715s +user 0m1.464s +sys 0m0.218s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index c83a08f394..28306d5b90 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005424022674560547  +DEBUG: model prefixing takes 0.005451679229736328  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.142 s VVV1 FFV1 FFV1 @@ -197,6 +197,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.536s -user 0m0.471s -sys 0m0.048s +real 0m0.589s +user 0m0.464s +sys 0m0.056s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index d1d49f4b29..5e19f38510 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~; add process g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00554347038269043  +DEBUG: model prefixing takes 0.005849123001098633  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. @@ -184,7 +184,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -201,7 +201,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,14 +217,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.239 s +Wrote files for 46 helas calls in 0.240 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.319 s +ALOHA: aloha creates 5 routines in 0.322 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -232,7 +232,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.304 s +ALOHA: aloha creates 10 routines in 0.312 s VVV1 VVV1 FFV1 @@ -284,9 +284,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.255s -user 0m2.031s -sys 0m0.215s +real 0m2.335s +user 0m2.111s +sys 0m0.220s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 89918164ec..de38108943 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005265474319458008  +DEBUG: model prefixing takes 0.005337953567504883  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s -Wrote files for 36 helas calls in 0.149 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +Wrote files for 36 helas calls in 0.146 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.324 s +ALOHA: aloha creates 5 routines in 0.321 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.312 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -254,9 +254,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.171s -user 0m1.918s -sys 0m0.246s +real 0m2.143s +user 0m1.908s +sys 0m0.230s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 213796e191..ddd57aa8e0 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052645206451416016  +DEBUG: model prefixing takes 0.005626201629638672  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.319 s +ALOHA: aloha creates 5 routines in 0.321 s VVV1 VVV1 FFV1 @@ -205,6 +205,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.768s -user 0m0.709s -sys 0m0.048s +real 0m0.792s +user 0m0.705s +sys 0m0.057s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 4ad89e68b6..843fa9b46f 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005406856536865234  +DEBUG: model prefixing takes 0.005464076995849609  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.157 s +1 processes with 123 diagrams generated in 0.156 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.424 s -Wrote files for 222 helas calls in 0.687 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.421 s +Wrote files for 222 helas calls in 0.680 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.325 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.313 s +ALOHA: aloha creates 10 routines in 0.314 s VVV1 VVV1 FFV1 @@ -257,9 +257,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.340s -user 0m2.973s -sys 0m0.264s +real 0m3.241s +user 0m2.970s +sys 0m0.249s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index aaa9f8c921..753184fea8 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005413532257080078  +DEBUG: model prefixing takes 0.005289793014526367  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.159 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.416 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.421 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.317 s +ALOHA: aloha creates 5 routines in 0.313 s VVV1 VVV1 FFV1 @@ -208,6 +208,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m1.418s -user 0m1.353s -sys 0m0.052s +real 0m1.487s +user 0m1.350s +sys 0m0.062s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index e6bbcf3798..14b1e83a4e 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00542140007019043  +DEBUG: model prefixing takes 0.00543212890625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.838 s +1 processes with 1240 diagrams generated in 1.854 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.443 s -Wrote files for 2281 helas calls in 18.293 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.461 s +Wrote files for 2281 helas calls in 18.375 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.320 s +ALOHA: aloha creates 5 routines in 0.311 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.306 s VVV1 VVV1 FFV1 @@ -259,9 +259,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m28.777s -user 0m28.291s -sys 0m0.395s +real 0m28.887s +user 0m28.381s +sys 0m0.404s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 0a3ebf79ac..938b434a63 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005333423614501953  +DEBUG: model prefixing takes 0.0052831172943115234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.874 s +1 processes with 1240 diagrams generated in 1.893 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.505 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.593 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.342 s +ALOHA: aloha creates 5 routines in 0.344 s VVV1 VVV1 FFV1 @@ -208,6 +208,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m13.012s -user 0m12.698s +real 0m13.247s +user 0m12.792s sys 0m0.117s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 11d08c9af6..30f3409999 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005434274673461914  +DEBUG: model prefixing takes 0.0053250789642333984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -169,7 +169,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.078 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -197,7 +197,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -214,7 +214,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -229,17 +229,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s -Wrote files for 32 helas calls in 0.217 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Wrote files for 32 helas calls in 0.215 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.143 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.131 s FFV1 FFV1 FFV1 @@ -295,9 +295,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.902s -user 0m1.688s -sys 0m0.208s +real 0m2.002s +user 0m1.736s +sys 0m0.228s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc index 3452d1e8da..d5eda63ee0 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc @@ -111,8 +111,8 @@ Parameters_sm::setDependentParameters() // now computed event-by-event (running void Parameters_sm::setDependentCouplings() // now computed event-by-event (running alphas #373) { - GC_10 = -G; GC_11 = mdl_complexi * G; + GC_10 = -G; } */ @@ -195,7 +195,7 @@ void Parameters_sm::printDependentCouplings() // now computed event-by-event (running alphas #373) { std::cout << "sm model couplings dependent on event kinematics:" << std::endl; - std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; + std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; } */ diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h index 4f6f322ed9..0c77cf58f0 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h @@ -54,7 +54,7 @@ namespace mg5amcCpu //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //cxsmpl GC_10, GC_11; // now computed event-by-event (running alphas #373) + //cxsmpl GC_11, GC_10; // now computed event-by-event (running alphas #373) // Set parameters that are unchanged during the run void setIndependentParameters( SLHAReader& slha ); @@ -194,8 +194,8 @@ namespace mg5amcCpu //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) + //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) // Print parameters that are unchanged during the run void printIndependentParameters(); @@ -226,12 +226,12 @@ namespace mg5amcCpu namespace Parameters_sm_dependentCouplings { constexpr size_t ndcoup = 2; // #couplings that vary event by event because they depend on the running alphas QCD - constexpr size_t idcoup_GC_10 = 0; - constexpr size_t idcoup_GC_11 = 1; + constexpr size_t idcoup_GC_11 = 0; + constexpr size_t idcoup_GC_10 = 1; struct DependentCouplings_sv { - cxtype_sv GC_10; cxtype_sv GC_11; + cxtype_sv GC_10; }; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> @@ -257,8 +257,8 @@ namespace mg5amcCpu //const fptype_sv G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); const fptype_sv mdl_G__exp__2 = ( ( G ) * ( G ) ); // Model couplings dependent on aS - out.GC_10 = -G; out.GC_11 = cI * G; + out.GC_10 = -G; } // End SM implementation - no special handling of vectors of floats as in EFT (#439) return out; @@ -293,12 +293,12 @@ namespace mg5amcCpu using namespace Parameters_sm_dependentCouplings; const fptype_sv& gs_sv = G_ACCESS::kernelAccessConst( gs ); DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv ); - fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); - cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); + fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); - GC_10s_sv = couplings_sv.GC_10; + cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); GC_11s_sv = couplings_sv.GC_11; + GC_10s_sv = couplings_sv.GC_10; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 0a81668e34..1894407637 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~; generate g q > t t~ q INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005318880081176758  +DEBUG: model prefixing takes 0.005591869354248047  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -210,7 +210,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.144 s FFV1 FFV1 FFV1 @@ -228,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.639s -user 0m0.590s -sys 0m0.039s +real 0m0.998s +user 0m0.583s +sys 0m0.054s diff --git a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc index 3452d1e8da..d5eda63ee0 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc +++ b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc @@ -111,8 +111,8 @@ Parameters_sm::setDependentParameters() // now computed event-by-event (running void Parameters_sm::setDependentCouplings() // now computed event-by-event (running alphas #373) { - GC_10 = -G; GC_11 = mdl_complexi * G; + GC_10 = -G; } */ @@ -195,7 +195,7 @@ void Parameters_sm::printDependentCouplings() // now computed event-by-event (running alphas #373) { std::cout << "sm model couplings dependent on event kinematics:" << std::endl; - std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; std::cout << std::setw( 20 ) << "GC_11 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_11 << std::endl; + std::cout << std::setw( 20 ) << "GC_10 = " << std::setiosflags( std::ios::scientific ) << std::setw( 10 ) << GC_10 << std::endl; } */ diff --git a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h index 4f6f322ed9..0c77cf58f0 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h +++ b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h @@ -54,7 +54,7 @@ namespace mg5amcCpu //double mdl_sqrt__aS, G, mdl_G__exp__2; // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //cxsmpl GC_10, GC_11; // now computed event-by-event (running alphas #373) + //cxsmpl GC_11, GC_10; // now computed event-by-event (running alphas #373) // Set parameters that are unchanged during the run void setIndependentParameters( SLHAReader& slha ); @@ -194,8 +194,8 @@ namespace mg5amcCpu //constexpr double mdl_G__exp__2 = ( ( G ) * ( G ) ); // now computed event-by-event (running alphas #373) // Model couplings dependent on aS - //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) //constexpr cxsmpl GC_11 = mdl_complexi * G; // now computed event-by-event (running alphas #373) + //constexpr cxsmpl GC_10 = -G; // now computed event-by-event (running alphas #373) // Print parameters that are unchanged during the run void printIndependentParameters(); @@ -226,12 +226,12 @@ namespace mg5amcCpu namespace Parameters_sm_dependentCouplings { constexpr size_t ndcoup = 2; // #couplings that vary event by event because they depend on the running alphas QCD - constexpr size_t idcoup_GC_10 = 0; - constexpr size_t idcoup_GC_11 = 1; + constexpr size_t idcoup_GC_11 = 0; + constexpr size_t idcoup_GC_10 = 1; struct DependentCouplings_sv { - cxtype_sv GC_10; cxtype_sv GC_11; + cxtype_sv GC_10; }; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <> @@ -257,8 +257,8 @@ namespace mg5amcCpu //const fptype_sv G = 2. * mdl_sqrt__aS * constexpr_sqrt( M_PI ); const fptype_sv mdl_G__exp__2 = ( ( G ) * ( G ) ); // Model couplings dependent on aS - out.GC_10 = -G; out.GC_11 = cI * G; + out.GC_10 = -G; } // End SM implementation - no special handling of vectors of floats as in EFT (#439) return out; @@ -293,12 +293,12 @@ namespace mg5amcCpu using namespace Parameters_sm_dependentCouplings; const fptype_sv& gs_sv = G_ACCESS::kernelAccessConst( gs ); DependentCouplings_sv couplings_sv = computeDependentCouplings_fromG( gs_sv ); - fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); fptype* GC_11s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_11 ); - cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); + fptype* GC_10s = C_ACCESS::idcoupAccessBuffer( couplings, idcoup_GC_10 ); cxtype_sv_ref GC_11s_sv = C_ACCESS::kernelAccess( GC_11s ); - GC_10s_sv = couplings_sv.GC_10; + cxtype_sv_ref GC_10s_sv = C_ACCESS::kernelAccess( GC_10s ); GC_11s_sv = couplings_sv.GC_11; + GC_10s_sv = couplings_sv.GC_10; mgDebug( 1, __FUNCTION__ ); return; } diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 0df769235d..f9fb551ac7 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -165,6 +165,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.415s -user 0m0.359s -sys 0m0.045s +real 0m0.582s +user 0m0.368s +sys 0m0.042s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index a478197d0e..a1f590687e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005745649337768555  +DEBUG: model prefixing takes 0.005295515060424805  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.135 s +13 processes with 76 diagrams generated in 0.134 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.803 s +65 processes with 1119 diagrams generated in 1.793 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,15 +801,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.283 s -Wrote files for 810 helas calls in 3.215 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.263 s +Wrote files for 810 helas calls in 3.193 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.332 s +ALOHA: aloha creates 5 routines in 0.333 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.312 s +ALOHA: aloha creates 10 routines in 0.310 s VVV1 VVV1 FFV1 @@ -1030,9 +1030,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m8.755s -user 0m8.270s -sys 0m0.450s +real 0m8.705s +user 0m8.197s +sys 0m0.465s ************************************************************ * * * W E L C O M E to * From 36080e01dd7ac4562d36166ae2ee4ce33d3c32c2 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 11:15:45 +0100 Subject: [PATCH 069/119] [oct23av] rerun 8 tput tests for gqttq - now all ok again after including Stefan's PR #782 ./tput/teeThroughputX.sh -mix -hrd -makej -gqttq -makeclean ./tput/teeThroughputX.sh -makej -gqttq -flt -bridge -makeclean --- .../log_gqttq_mad_d_inl0_hrd0.txt | 174 ++++++++++++++--- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 178 +++++++++++++++--- .../log_gqttq_mad_d_inl0_hrd1.txt | 174 ++++++++++++++--- .../log_gqttq_mad_f_inl0_hrd0.txt | 174 ++++++++++++++--- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 178 +++++++++++++++--- .../log_gqttq_mad_f_inl0_hrd1.txt | 174 ++++++++++++++--- .../log_gqttq_mad_m_inl0_hrd0.txt | 174 ++++++++++++++--- .../log_gqttq_mad_m_inl0_hrd1.txt | 174 ++++++++++++++--- 8 files changed, 1160 insertions(+), 240 deletions(-) diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index f3f72b8d03..f85b83afc5 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-28_12:18:19 +DATE: 2023-10-29_11:11:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.686159e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.340503e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.711310e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.441786 sec - 2,026,016,464 cycles # 3.030 GHz - 2,782,125,849 instructions # 1.37 insn per cycle - 0.739628413 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.425505e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.236435e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.657072e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.458189 sec + 1,993,657,564 cycles # 3.003 GHz + 2,804,107,618 instructions # 1.41 insn per cycle + 0.733844419 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.249190e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.094401e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.512491e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.527258 sec - 2,304,751,005 cycles # 3.025 GHz - 3,263,793,687 instructions # 1.42 insn per cycle - 0.821160742 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.249352e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.087153e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.509076e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.529582 sec + 2,292,930,937 cycles # 2.997 GHz + 3,293,249,585 instructions # 1.44 insn per cycle + 0.824142515 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224343220024076 -Relative difference = 2.984467216677476e-07 +Avg ME (C++/CUDA) = 1.424749e-01 +Avg ME (F77/CUDA) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -82,15 +82,129 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.105519e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.128509e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.128509e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.503749 sec - 4,702,243,850 cycles # 3.120 GHz - 13,466,853,326 instructions # 2.86 insn per cycle - 1.510053207 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 862) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.094181e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.117189e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.117189e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.519578 sec + 4,700,803,105 cycles # 3.087 GHz + 13,467,820,238 instructions # 2.87 insn per cycle + 1.523730874 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499484 +Relative difference = 5.286896509487005e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.988328e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.062679e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.062679e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.845077 sec + 2,624,090,994 cycles # 3.092 GHz + 7,555,318,399 instructions # 2.88 insn per cycle + 0.849297883 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499478 +Relative difference = 5.28689651338321e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.381886e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.602535e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.602535e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.505903 sec + 1,489,612,723 cycles # 2.924 GHz + 3,122,146,984 instructions # 2.10 insn per cycle + 0.510105162 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.752139e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.021247e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.021247e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.458220 sec + 1,341,657,342 cycles # 2.905 GHz + 2,984,216,057 instructions # 2.22 insn per cycle + 0.462365340 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.580684e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.707250e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.707250e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.658521 sec + 1,326,321,680 cycles # 2.003 GHz + 1,955,661,971 instructions # 1.47 insn per cycle + 0.662818386 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 689a8f5648..bb40575287 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-28_12:56:16 +DATE: 2023-10-29_11:13:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.537647e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.118407e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.118407e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.469387 sec - 2,053,681,362 cycles # 3.017 GHz - 2,981,721,290 instructions # 1.45 insn per cycle - 0.737787439 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.499145e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.000526e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.000526e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.471660 sec + 2,056,274,121 cycles # 3.004 GHz + 3,032,901,904 instructions # 1.47 insn per cycle + 0.741611497 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,20 +72,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.334603e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.380887e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.380887e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.736122 sec - 2,984,351,272 cycles # 3.046 GHz - 4,572,222,181 instructions # 1.53 insn per cycle - 1.038987324 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.137166e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.151984e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.151984e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.782426 sec + 3,108,804,978 cycles # 2.985 GHz + 4,639,704,366 instructions # 1.49 insn per cycle + 1.102099164 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224343220024076 -Relative difference = 2.984467216677476e-07 +Avg ME (C++/CUDA) = 1.424749e-01 +Avg ME (F77/CUDA) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -95,15 +95,133 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.098820e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.121705e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.121705e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.518681 sec - 4,734,800,940 cycles # 3.111 GHz - 13,473,654,290 instructions # 2.85 insn per cycle - 1.522969532 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 862) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.096327e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.119247e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.119247e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.521465 sec + 4,727,662,680 cycles # 3.100 GHz + 13,471,889,912 instructions # 2.85 insn per cycle + 1.525740740 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499484 +Relative difference = 5.286896509487005e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.976939e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.052328e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.052328e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.856489 sec + 2,662,267,830 cycles # 3.095 GHz + 7,603,678,621 instructions # 2.86 insn per cycle + 0.860905365 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499478 +Relative difference = 5.28689651338321e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.387239e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.609264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.609264e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.512281 sec + 1,513,864,705 cycles # 2.932 GHz + 3,172,887,101 instructions # 2.10 insn per cycle + 0.516813268 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.731800e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.003313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.003313e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.467919 sec + 1,378,999,909 cycles # 2.923 GHz + 3,034,843,091 instructions # 2.20 insn per cycle + 0.472342300 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.551387e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.681149e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.681149e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.673053 sec + 1,359,798,395 cycles # 2.009 GHz + 1,995,251,436 instructions # 1.47 insn per cycle + 0.677423923 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 439a063620..0c2864af4d 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-28_12:18:29 +DATE: 2023-10-29_11:12:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.633006e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.190444e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.531715e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.440720 sec - 1,992,376,776 cycles # 3.029 GHz - 2,800,529,205 instructions # 1.41 insn per cycle - 0.725033993 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.374698e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.066567e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.458475e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.443172 sec + 1,984,389,740 cycles # 3.008 GHz + 2,813,702,209 instructions # 1.42 insn per cycle + 0.717001254 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.233221e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.002410e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.417534e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.527391 sec - 2,308,177,809 cycles # 3.030 GHz - 3,262,086,295 instructions # 1.41 insn per cycle - 0.821240750 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.243499e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.022510e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.434945e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.528031 sec + 2,306,782,100 cycles # 3.014 GHz + 3,293,790,173 instructions # 1.43 insn per cycle + 0.823170146 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224343220024076 -Relative difference = 2.984467216677476e-07 +Avg ME (C++/CUDA) = 1.424749e-01 +Avg ME (F77/CUDA) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -82,15 +82,129 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.047338e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.069084e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.069084e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.586610 sec - 4,722,160,621 cycles # 2.970 GHz - 13,461,547,465 instructions # 2.85 insn per cycle - 1.593311787 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 851) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.082757e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.105146e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.105146e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.535031 sec + 4,702,151,242 cycles # 3.056 GHz + 13,460,848,480 instructions # 2.86 insn per cycle + 1.539172370 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499484 +Relative difference = 5.286896509487005e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.975479e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.051003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.051003e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.850537 sec + 2,627,273,138 cycles # 3.077 GHz + 7,554,652,083 instructions # 2.88 insn per cycle + 0.854779657 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3088) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499478 +Relative difference = 5.28689651338321e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.403057e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.631167e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.631167e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.503108 sec + 1,479,372,046 cycles # 2.921 GHz + 3,120,535,969 instructions # 2.11 insn per cycle + 0.507264468 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.783590e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.056952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.056952e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.454143 sec + 1,339,187,575 cycles # 2.928 GHz + 2,981,219,136 instructions # 2.23 insn per cycle + 0.458255982 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.579822e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.705158e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.705158e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.658524 sec + 1,326,723,838 cycles # 2.004 GHz + 1,953,998,504 instructions # 1.47 insn per cycle + 0.662655722 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492595 +Relative difference = 5.286901344678233e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index fee856b657..4efa63c73f 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-28_12:18:39 +DATE: 2023-10-29_11:12:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.351195e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.215965e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.353365e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 -TOTAL : 0.435631 sec - 1,962,028,729 cycles # 3.030 GHz - 2,753,559,026 instructions # 1.40 insn per cycle - 0.723824346 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.846056e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.190837e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.345223e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.441885 sec + 1,964,981,045 cycles # 3.008 GHz + 2,777,977,923 instructions # 1.41 insn per cycle + 0.712518260 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.222770e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.801907e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.948153e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 -TOTAL : 0.472246 sec - 2,127,234,196 cycles # 3.022 GHz - 3,027,538,268 instructions # 1.42 insn per cycle - 0.761698590 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.228241e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.808928e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953110e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 +TOTAL : 0.474858 sec + 2,136,444,467 cycles # 3.019 GHz + 3,023,304,032 instructions # 1.42 insn per cycle + 0.764875305 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629328027793 -Relative difference = 0.0005414933664760033 +Avg ME (C++/CUDA) = 1.424226e-01 +Avg ME (F77/CUDA) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -82,15 +82,129 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.164606e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.190696e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.190696e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683422e+01 ) GeV^-2 -TOTAL : 1.426678 sec - 4,450,099,487 cycles # 3.112 GHz - 13,052,167,444 instructions # 2.93 insn per cycle - 1.433165265 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 748) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.162214e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188615e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.429811 sec + 4,456,320,748 cycles # 3.110 GHz + 13,051,985,828 instructions # 2.93 insn per cycle + 1.433732093 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246857540270419 +Relative difference = 1.7265064590569047e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.111752e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.309723e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.309723e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.545843 sec + 1,699,369,970 cycles # 3.094 GHz + 4,514,916,171 instructions # 2.66 insn per cycle + 0.549893471 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246859631675157 +Relative difference = 2.5853054135974944e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.097094e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.867963e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.867963e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.288960 sec + 851,373,452 cycles # 2.910 GHz + 1,898,371,980 instructions # 2.23 insn per cycle + 0.293051317 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.518614e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.412299e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.412299e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.271412 sec + 800,455,858 cycles # 2.911 GHz + 1,821,467,150 instructions # 2.28 insn per cycle + 0.275547796 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.880135e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.364108e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.364108e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.357728 sec + 738,502,166 cycles # 2.046 GHz + 1,305,590,914 instructions # 1.77 insn per cycle + 0.361826347 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489383243206 +Relative difference = 4.32888033512879e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 9667edc5e8..b3d6f455ab 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-28_12:56:27 +DATE: 2023-10-29_11:14:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.654442e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.068857e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.068857e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.419752e+01 +- 1.682900e+01 ) GeV^-2 -TOTAL : 0.452198 sec - 1,989,257,204 cycles # 3.008 GHz - 2,936,823,078 instructions # 1.48 insn per cycle - 0.720594064 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.590965e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.773915e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.773915e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 +TOTAL : 0.450702 sec + 1,994,471,727 cycles # 3.019 GHz + 2,920,362,057 instructions # 1.46 insn per cycle + 0.717654721 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,20 +72,20 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.295691e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.814169e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.814169e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.349385e+02 +- 2.541442e+02 ) GeV^-2 -TOTAL : 0.611989 sec - 2,543,798,787 cycles # 3.034 GHz - 3,860,680,534 instructions # 1.52 insn per cycle - 0.897475876 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.807015e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.180092e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.180092e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 +TOTAL : 0.639942 sec + 2,501,192,232 cycles # 2.864 GHz + 3,760,146,551 instructions # 1.50 insn per cycle + 0.930803843 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629328027793 -Relative difference = 0.0005414933664760033 +Avg ME (C++/CUDA) = 1.424226e-01 +Avg ME (F77/CUDA) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= @@ -95,15 +95,133 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.161565e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188095e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.188095e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683422e+01 ) GeV^-2 -TOTAL : 1.433271 sec - 4,468,228,264 cycles # 3.110 GHz - 13,056,784,200 instructions # 2.92 insn per cycle - 1.437467283 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 748) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.163010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.189128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.189128e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.432348 sec + 4,468,926,407 cycles # 3.113 GHz + 13,056,471,140 instructions # 2.92 insn per cycle + 1.436449099 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246857540270419 +Relative difference = 1.7265064590569047e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.101496e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.298069e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.298069e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.551945 sec + 1,720,858,195 cycles # 3.099 GHz + 4,563,075,078 instructions # 2.65 insn per cycle + 0.556069064 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246859631675157 +Relative difference = 2.5853054135974944e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.886684e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.625652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.625652e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.303879 sec + 878,433,693 cycles # 2.855 GHz + 1,935,192,566 instructions # 2.20 insn per cycle + 0.308372436 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.388392e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.262834e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.262834e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.280915 sec + 821,362,202 cycles # 2.888 GHz + 1,858,402,043 instructions # 2.26 insn per cycle + 0.285072483 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.870968e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.352273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.352273e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.362591 sec + 758,880,170 cycles # 2.073 GHz + 1,347,085,517 instructions # 1.78 insn per cycle + 0.366695100 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489383243206 +Relative difference = 4.32888033512879e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index f8dec0d191..6315058ab2 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-28_12:18:50 +DATE: 2023-10-29_11:12:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.250146e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175847e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.298485e+08 ) sec^-1 -MeanMatrixElemValue = ( 3.402886e+01 +- 1.677500e+01 ) GeV^-2 -TOTAL : 0.434444 sec - 1,955,535,682 cycles # 3.017 GHz - 2,766,629,392 instructions # 1.41 insn per cycle - 0.718000068 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.702162e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.156083e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.311905e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.440240 sec + 1,914,660,193 cycles # 2.927 GHz + 2,640,669,055 instructions # 1.38 insn per cycle + 0.710962903 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.147977e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.757689e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.893722e+08 ) sec^-1 -MeanMatrixElemValue = ( 4.166198e+02 +- 2.517590e+02 ) GeV^-2 -TOTAL : 0.471488 sec - 2,146,983,188 cycles # 3.028 GHz - 2,931,828,508 instructions # 1.37 insn per cycle - 0.766791721 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.185142e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.784341e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.927350e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 +TOTAL : 0.478204 sec + 2,109,342,162 cycles # 3.006 GHz + 2,990,715,659 instructions # 1.42 insn per cycle + 0.761391105 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.619520e-01 -Avg ME (F77/CUDA) = 0.56225629328027793 -Relative difference = 0.0005414933664760033 +Avg ME (C++/CUDA) = 1.424226e-01 +Avg ME (F77/CUDA) = 0.14247488790821983 +Relative difference = 0.00036713209996037764 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -82,15 +82,129 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.167815e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.194266e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.194266e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.422773e+01 +- 1.683422e+01 ) GeV^-2 -TOTAL : 1.422545 sec - 4,440,503,513 cycles # 3.114 GHz - 13,032,813,526 instructions # 2.93 insn per cycle - 1.428734586 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 730) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.160687e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.186899e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.186899e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.431367 sec + 4,453,449,929 cycles # 3.105 GHz + 13,033,473,248 instructions # 2.93 insn per cycle + 1.435338813 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246857540270419 +Relative difference = 1.7265064590569047e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.119834e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.322376e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.322376e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 +TOTAL : 0.544693 sec + 1,689,425,653 cycles # 3.083 GHz + 4,511,028,047 instructions # 2.67 insn per cycle + 0.548780764 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246859631675157 +Relative difference = 2.5853054135974944e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.102572e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.869486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.869486e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.287991 sec + 850,946,422 cycles # 2.921 GHz + 1,895,366,615 instructions # 2.23 insn per cycle + 0.292004201 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.550351e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.453270e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.453270e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.269823 sec + 800,276,038 cycles # 2.930 GHz + 1,817,389,395 instructions # 2.27 insn per cycle + 0.273798599 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489318272599 +Relative difference = 4.784894739577799e-08 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.882771e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.367359e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.367359e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.357346 sec + 740,492,059 cycles # 2.052 GHz + 1,303,318,805 instructions # 1.76 insn per cycle + 0.361466844 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1932) (512y: 32) (512z: 2383) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247489383243206 +Relative difference = 4.32888033512879e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 81446952af..61f818ffd3 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-28_12:19:00 +DATE: 2023-10-29_11:13:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.675976e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.308760e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.678480e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.439579 sec - 1,986,523,688 cycles # 3.018 GHz - 2,793,227,977 instructions # 1.41 insn per cycle - 0.729915034 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.422535e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.222834e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.641036e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.446535 sec + 1,997,383,761 cycles # 2.995 GHz + 2,799,829,441 instructions # 1.40 insn per cycle + 0.723708714 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.255163e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.114232e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.536354e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.528100 sec - 2,302,091,318 cycles # 3.015 GHz - 3,285,386,185 instructions # 1.43 insn per cycle - 0.822389875 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.250735e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.126800e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.544299e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.531299 sec + 2,306,027,111 cycles # 3.007 GHz + 3,282,970,610 instructions # 1.42 insn per cycle + 0.825811359 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224344354681244 -Relative difference = 2.782658397826986e-07 +Avg ME (C++/CUDA) = 1.424749e-01 +Avg ME (F77/CUDA) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= @@ -82,15 +82,129 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.064090e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.085676e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.085676e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.561728 sec - 4,747,206,285 cycles # 3.039 GHz - 13,471,570,617 instructions # 2.84 insn per cycle - 1.568175619 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.093896e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.116488e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.116488e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.519670 sec + 4,721,504,023 cycles # 3.100 GHz + 13,469,498,246 instructions # 2.85 insn per cycle + 1.523736357 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.013874e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.090091e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.090091e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.834790 sec + 2,597,461,679 cycles # 3.098 GHz + 7,388,399,527 instructions # 2.84 insn per cycle + 0.839225828 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.421565e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.647240e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.647240e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.500518 sec + 1,469,864,578 cycles # 2.915 GHz + 3,057,713,704 instructions # 2.08 insn per cycle + 0.504803616 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.862725e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.148513e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.148513e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.445813 sec + 1,308,991,772 cycles # 2.913 GHz + 2,932,828,584 instructions # 2.24 insn per cycle + 0.449922923 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.488490e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.605792e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.605792e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.682096 sec + 1,364,315,010 cycles # 1.990 GHz + 1,971,638,562 instructions # 1.45 insn per cycle + 0.686218540 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 2ed7741ba4..b0f37524f2 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-28_12:19:10 +DATE: 2023-10-29_11:13:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.635071e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.157188e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.509801e+07 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 0.441790 sec - 1,989,499,871 cycles # 3.014 GHz - 2,775,700,133 instructions # 1.40 insn per cycle - 0.726031721 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.389893e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.052684e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.466443e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.444473 sec + 2,007,835,279 cycles # 3.017 GHz + 2,790,075,561 instructions # 1.39 insn per cycle + 0.723819294 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,20 +60,20 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.221630e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.948180e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.370520e+07 ) sec^-1 -MeanMatrixElemValue = ( 4.282445e+02 +- 2.530899e+02 ) GeV^-2 -TOTAL : 0.530411 sec - 2,299,474,084 cycles # 2.994 GHz - 3,273,781,886 instructions # 1.42 insn per cycle - 0.825233339 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.213263e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.924808e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.325840e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.526659 sec + 2,290,049,296 cycles # 3.001 GHz + 3,242,676,439 instructions # 1.42 insn per cycle + 0.821642314 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 -Avg ME (C++/CUDA) = 5.622436e-01 -Avg ME (F77/CUDA) = 0.56224344354681244 -Relative difference = 2.782658397826986e-07 +Avg ME (C++/CUDA) = 1.424749e-01 +Avg ME (F77/CUDA) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= @@ -82,15 +82,129 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.097804e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.120541e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.120541e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.404831e+01 +- 1.677228e+01 ) GeV^-2 -TOTAL : 1.514227 sec - 4,724,087,422 cycles # 3.113 GHz - 13,455,754,805 instructions # 2.85 insn per cycle - 1.520438100 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 829) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.095275e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.118185e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.118185e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.517345 sec + 4,725,285,176 cycles # 3.107 GHz + 13,455,381,107 instructions # 2.85 insn per cycle + 1.521506668 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe - 2 FAILED TESTS +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.003251e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.080875e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.080875e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.839136 sec + 2,603,801,918 cycles # 3.090 GHz + 7,392,674,875 instructions # 2.84 insn per cycle + 0.843316365 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.396132e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.618964e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.618964e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.503966 sec + 1,469,332,997 cycles # 2.895 GHz + 3,057,816,888 instructions # 2.08 insn per cycle + 0.508181558 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.883747e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.172497e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.172497e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.442976 sec + 1,307,708,508 cycles # 2.929 GHz + 2,933,584,495 instructions # 2.24 insn per cycle + 0.447189273 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.475241e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.592855e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.592855e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.685645 sec + 1,364,917,576 cycles # 1.981 GHz + 1,971,508,007 instructions # 1.44 insn per cycle + 0.689804896 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) +------------------------------------------------------------------------- +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe +[ PASSED ] 6 tests. +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED From ab05eb43f36ef3a25e1a9d1a7d800766597b2bae Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 11:33:41 +0100 Subject: [PATCH 070/119] [oct23av] rerun 3 tmad tests for gqttq - now all ok again after including Stefan's PR #782 ./tmad/teeMadX.sh +10x -gqttq -makeclean ./tmad/teeMadX.sh +10x -gqttq -makeclean -fltonly ./tmad/teeMadX.sh +10x -gqttq -makeclean -mixonly --- .../log_gqttq_mad_d_inl0_hrd0.txt | 499 ++++++++++++++++- .../log_gqttq_mad_f_inl0_hrd0.txt | 497 ++++++++++++++++- .../log_gqttq_mad_m_inl0_hrd0.txt | 503 +++++++++++++++++- 3 files changed, 1438 insertions(+), 61 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index e99c539eed..82d7b93a8b 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:30:28 +DATE: 2023-10-29_11:19:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3031s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2342s - [COUNTERS] Fortran MEs ( 1 ) : 0.0688s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3071s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2370s + [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2266s - [COUNTERS] Fortran MEs ( 1 ) : 0.0687s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2972s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2278s + [COUNTERS] Fortran MEs ( 1 ) : 0.0693s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1552s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3998s - [COUNTERS] Fortran MEs ( 1 ) : 0.7554s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1557s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3979s + [COUNTERS] Fortran MEs ( 1 ) : 0.7578s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,12 +132,471 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.276 [1.2757941949814184] fbridge_mode=1 - [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3934s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3192s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0742s for 8192 events => throughput is 1.10E+05 events/s + [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3888s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3140s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0748s for 8192 events => throughput is 1.10E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (0.26050333309703716) and cpp (1.2757941949814184) differ by more than 2E-14 (3.8974198518457603) +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703716) differ by less than 2E-14 (0.0) + +*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 2.3174s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4958s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8215s for 90112 events => throughput is 1.10E+05 events/s + +*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615863) differ by less than 2E-14 (5.551115123125783e-16) + +*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.109455e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.112734e+05 ) sec^-1 + +*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3139s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2735s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0403s for 8192 events => throughput is 2.03E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615872] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.9098s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4660s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4439s for 90112 events => throughput is 2.03E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615872) differ by less than 2E-14 (1.1102230246251565e-16) + +*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.026202e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.015864e+05 ) sec^-1 + +*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2784s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2549s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0235s for 8192 events => throughput is 3.49E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.6962s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4433s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2529s for 90112 events => throughput is 3.56E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615863) differ by less than 2E-14 (5.551115123125783e-16) + +*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.519852e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.514255e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2755s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2546s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.92E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.6760s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4457s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2302s for 90112 events => throughput is 3.91E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615863) differ by less than 2E-14 (5.551115123125783e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.921611e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.966924e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3085s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2743s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0342s for 8192 events => throughput is 2.40E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8203s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4753s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3450s for 90112 events => throughput is 2.61E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615863) differ by less than 2E-14 (5.551115123125783e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.334278e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.347346e+05 ) sec^-1 + +*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.6584s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6578s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333309703733) differ by less than 2E-14 (6.661338147750939e-16) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182648615869] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8641s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8565s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.18E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182648615869) differ by less than 2E-14 (2.220446049250313e-16) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.470058e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.083688e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.104258e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.522760e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.096774e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.800964e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.099625e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.781256e+07 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index ef32365876..be890c5e3f 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:30:34 +DATE: 2023-10-29_11:22:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.2993s + [COUNTERS] PROGRAM TOTAL : 0.2990s [COUNTERS] Fortran Overhead ( 0 ) : 0.2303s - [COUNTERS] Fortran MEs ( 1 ) : 0.0690s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] Fortran MEs ( 1 ) : 0.0687s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2952s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2264s - [COUNTERS] Fortran MEs ( 1 ) : 0.0688s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2971s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2281s + [COUNTERS] Fortran MEs ( 1 ) : 0.0691s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1487s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3925s - [COUNTERS] Fortran MEs ( 1 ) : 0.7562s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1544s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3991s + [COUNTERS] Fortran MEs ( 1 ) : 0.7553s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,12 +132,471 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.276 [1.2757939773540909] fbridge_mode=1 - [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3143s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s + [XSECTION] Cross section = 0.2605 [0.26050316058770007] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3023s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0704s for 8192 events => throughput is 1.16E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (0.26050333309703716) and cpp (1.2757939773540909) differ by more than 4E-4 (3.8974190164348466) +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050316058770007) differ by less than 4E-4 (6.622154696822591e-07) + +*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182797520666] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 2.2661s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4911s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7750s for 90112 events => throughput is 1.16E+05 events/s + +*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182797520666) differ by less than 4E-4 (6.830124466006282e-09) + +*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.175314e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.172660e+05 ) sec^-1 + +*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050313133963987] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2810s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2562s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.30E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050313133963987) differ by less than 4E-4 (7.744906558304621e-07) + +*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801179276862181] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.7264s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4508s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2756s for 90112 events => throughput is 3.27E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801179276862181) differ by less than 4E-4 (1.5465921032742358e-07) + +*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.185187e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.174762e+05 ) sec^-1 + +*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2571s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2447s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.64E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050313344346482) differ by less than 4E-4 (7.664146557395668e-07) + +*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.5675s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4330s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1345s for 90112 events => throughput is 6.70E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801179137376883) differ by less than 4E-4 (1.6105727140836024e-07) + +*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.522440e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.430322e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2547s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2431s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.06E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050313344346482) differ by less than 4E-4 (7.664146557395668e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.5565s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4315s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1251s for 90112 events => throughput is 7.21E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801179137376883) differ by less than 4E-4 (1.6105727140836024e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.888139e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.958387e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050317064561834] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2628s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.37E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050317064561834) differ by less than 4E-4 (6.236059127973093e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182143140752] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.6690s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4890s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1801s for 90112 events => throughput is 5.00E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182143140752) differ by less than 4E-4 (2.3185674380421517e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.448447e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.498943e+05 ) sec^-1 + +*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050319131407651] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.6629s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6624s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050319131407651) differ by less than 4E-4 (5.442654378295941e-07) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801186038252196] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8530s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8470s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 90112 events => throughput is 1.51E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801186038252196) differ by less than 4E-4 (1.5547946996541384e-07) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.626121e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.433120e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.189243e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.716903e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.171649e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.785778e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.864395e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.004377e+07 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 7a64e889e6..c29c2a87c5 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2023-10-28_13:30:41 +DATE: 2023-10-29_11:24:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3003s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2318s - [COUNTERS] Fortran MEs ( 1 ) : 0.0686s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2999s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2315s + [COUNTERS] Fortran MEs ( 1 ) : 0.0684s for 8192 events => throughput is 1.20E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2952s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2266s - [COUNTERS] Fortran MEs ( 1 ) : 0.0686s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2970s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2284s + [COUNTERS] Fortran MEs ( 1 ) : 0.0687s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1553s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3975s - [COUNTERS] Fortran MEs ( 1 ) : 0.7578s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1561s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4003s + [COUNTERS] Fortran MEs ( 1 ) : 0.7559s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,12 +132,471 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.276 [1.2757941964256063] fbridge_mode=1 - [UNWEIGHT] Wrote 105 events (found 652 events) - [COUNTERS] PROGRAM TOTAL : 0.3922s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3179s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0743s for 8192 events => throughput is 1.10E+05 events/s + [XSECTION] Cross section = 0.2605 [0.26050333282657206] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3817s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3068s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -ERROR! xsec from fortran (0.26050333309703716) and cpp (1.2757941964256063) differ by more than 2E-4 (3.897419857389597) +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333282657206) differ by less than 2E-4 (1.0382404935782574e-09) + +*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182636608796] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 2.3168s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4948s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8220s for 90112 events => throughput is 1.10E+05 events/s + +*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182636608796) differ by less than 2E-4 (5.507535538740171e-10) + +*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.093427e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.109661e+05 ) sec^-1 + +*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333282657201] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3099s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2712s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0387s for 8192 events => throughput is 2.12E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333282657201) differ by less than 2E-4 (1.0382406046005599e-09) + +*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182636608810] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8886s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4607s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4279s for 90112 events => throughput is 2.11E+05 events/s + +*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182636608810) differ by less than 2E-4 (5.507528877402024e-10) + +*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.037813e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.067790e+05 ) sec^-1 + +*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2773s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2543s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 8192 events => throughput is 3.56E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333291481387) differ by less than 2E-4 (6.99504676404672e-10) + +*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.7024s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4488s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2536s for 90112 events => throughput is 3.55E+05 events/s + +*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182638680733) differ by less than 2E-4 (4.557156874085422e-10) + +*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.543078e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.581096e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.2790s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2573s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0217s for 8192 events => throughput is 3.78E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333291481387) differ by less than 2E-4 (6.99504676404672e-10) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.6914s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4643s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2270s for 90112 events => throughput is 3.97E+05 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182638680733) differ by less than 2E-4 (4.557156874085422e-10) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.967089e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.065644e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.3065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2716s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0349s for 8192 events => throughput is 2.35E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333291481387) differ by less than 2E-4 (6.99504676404672e-10) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8446s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4881s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3566s for 90112 events => throughput is 2.53E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182638680733) differ by less than 2E-4 (4.557156874085422e-10) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.525942e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.487063e+05 ) sec^-1 + +*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2605 [0.26050333301029693] fbridge_mode=1 + [UNWEIGHT] Wrote 81 events (found 540 events) + [COUNTERS] PROGRAM TOTAL : 0.6573s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6566s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.26050333309703716) and cpp (0.26050333301029693) differ by less than 2E-4 (3.329716502520341e-10) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.218 [0.21801182637219935] fbridge_mode=1 + [UNWEIGHT] Wrote 853 events (found 1849 events) + [COUNTERS] PROGRAM TOTAL : 1.8595s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8518s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 90112 events => throughput is 1.18E+07 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21801182648615874) and cpp (0.21801182637219935) differ by less than 2E-4 (5.227211996583492e-10) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.461495e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.077164e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.105487e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.495666e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.099165e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.801794e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.099654e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.781230e+07 ) sec^-1 + +TEST COMPLETED From 5a140d2ad527b4bd0704eb8efd91bdee8b55288a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 13:41:58 +0100 Subject: [PATCH 071/119] [oct23av] rerun all 78 tput tests - now all of them succed including gqttqq (but ggttggg fails madevent tests) STARTED AT Sun Oct 29 11:58:18 AM CET 2023 ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean ENDED(1) AT Sun Oct 29 12:23:07 PM CET 2023 [Status=0] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Sun Oct 29 12:32:26 PM CET 2023 [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Sun Oct 29 12:41:38 PM CET 2023 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Sun Oct 29 12:44:47 PM CET 2023 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ENDED(5) AT Sun Oct 29 12:47:55 PM CET 2023 [Status=0] --- .../log_eemumu_mad_d_inl0_hrd0.txt | 86 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 86 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 86 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 86 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 86 +++++++-------- .../log_eemumu_mad_d_inl0_hrd1.txt | 86 +++++++-------- .../log_eemumu_mad_d_inl1_hrd0.txt | 86 +++++++-------- .../log_eemumu_mad_d_inl1_hrd1.txt | 86 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0.txt | 86 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 86 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 86 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 86 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 86 +++++++-------- .../log_eemumu_mad_f_inl0_hrd1.txt | 86 +++++++-------- .../log_eemumu_mad_f_inl1_hrd0.txt | 86 +++++++-------- .../log_eemumu_mad_f_inl1_hrd1.txt | 86 +++++++-------- .../log_eemumu_mad_m_inl0_hrd0.txt | 86 +++++++-------- .../log_eemumu_mad_m_inl0_hrd1.txt | 86 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0.txt | 86 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 86 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 86 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 86 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 86 +++++++-------- .../log_ggtt_mad_d_inl0_hrd1.txt | 86 +++++++-------- .../log_ggtt_mad_d_inl1_hrd0.txt | 86 +++++++-------- .../log_ggtt_mad_d_inl1_hrd1.txt | 86 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0.txt | 86 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 86 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 86 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 86 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 86 +++++++-------- .../log_ggtt_mad_f_inl0_hrd1.txt | 86 +++++++-------- .../log_ggtt_mad_f_inl1_hrd0.txt | 86 +++++++-------- .../log_ggtt_mad_f_inl1_hrd1.txt | 86 +++++++-------- .../log_ggtt_mad_m_inl0_hrd0.txt | 86 +++++++-------- .../log_ggtt_mad_m_inl0_hrd1.txt | 86 +++++++-------- .../log_ggttg_mad_d_inl0_hrd0.txt | 100 +++++++++--------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 100 +++++++++--------- .../log_ggttg_mad_d_inl0_hrd1.txt | 100 +++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 100 +++++++++--------- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 100 +++++++++--------- .../log_ggttg_mad_f_inl0_hrd1.txt | 100 +++++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 100 +++++++++--------- .../log_ggttg_mad_m_inl0_hrd1.txt | 100 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 100 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 100 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 100 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 100 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 100 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 100 +++++++++--------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 100 +++++++++--------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 100 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 100 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 100 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 100 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 100 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 100 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 100 +++++++++--------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 100 +++++++++--------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 100 +++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 100 +++++++++--------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 100 +++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 100 +++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 100 +++++++++--------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 100 +++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 100 +++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 100 +++++++++--------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 100 +++++++++--------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 100 +++++++++--------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 100 +++++++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 100 +++++++++--------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 100 +++++++++--------- .../log_gqttq_mad_d_inl0_hrd1.txt | 100 +++++++++--------- .../log_gqttq_mad_f_inl0_hrd0.txt | 100 +++++++++--------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 100 +++++++++--------- .../log_gqttq_mad_f_inl0_hrd1.txt | 100 +++++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 100 +++++++++--------- .../log_gqttq_mad_m_inl0_hrd1.txt | 100 +++++++++--------- 78 files changed, 3648 insertions(+), 3648 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 9d61768255..3b679f61da 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:06:35 +DATE: 2023-10-29_12:04:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.436209e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.229794e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.009285e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.690864e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.871080e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.072256e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.744144 sec - 2,828,116,167 cycles # 3.006 GHz - 4,431,343,825 instructions # 1.57 insn per cycle - 1.067121357 seconds time elapsed +TOTAL : 0.657274 sec + 2,655,184,309 cycles # 2.998 GHz + 4,121,854,344 instructions # 1.55 insn per cycle + 0.945327418 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.139844e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.339077e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.339077e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.147782e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.346714e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.346714e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.916739 sec - 18,330,054,823 cycles # 3.096 GHz - 44,036,343,414 instructions # 2.40 insn per cycle - 5.924908084 seconds time elapsed +TOTAL : 5.875373 sec + 18,262,081,421 cycles # 3.106 GHz + 44,034,454,795 instructions # 2.41 insn per cycle + 5.880557547 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.685588e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.206291e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.206291e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.692372e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.217423e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.217423e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.126625 sec - 12,790,163,629 cycles # 3.096 GHz - 31,002,245,035 instructions # 2.42 insn per cycle - 4.142582600 seconds time elapsed +TOTAL : 4.111533 sec + 12,775,111,266 cycles # 3.105 GHz + 31,002,507,522 instructions # 2.43 insn per cycle + 4.116781762 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.100110e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.946697e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.946697e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.108893e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.934033e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.934033e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.384390 sec - 10,090,434,829 cycles # 2.977 GHz - 19,377,580,740 instructions # 1.92 insn per cycle - 3.399760208 seconds time elapsed +TOTAL : 3.372821 sec + 10,022,359,660 cycles # 2.968 GHz + 19,377,807,125 instructions # 1.93 insn per cycle + 3.377931371 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.153186e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.046388e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.046388e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.187659e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.076628e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.076628e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.315608 sec - 9,756,600,437 cycles # 2.938 GHz - 18,996,634,425 instructions # 1.95 insn per cycle - 3.330988477 seconds time elapsed +TOTAL : 3.262058 sec + 9,657,902,742 cycles # 2.957 GHz + 19,007,012,523 instructions # 1.97 insn per cycle + 3.267075123 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.848119e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.465628e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.465628e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.858663e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.471817e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.471817e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.802025 sec - 8,634,120,490 cycles # 2.268 GHz - 15,739,590,432 instructions # 1.82 insn per cycle - 3.813740164 seconds time elapsed +TOTAL : 3.779777 sec + 8,578,041,378 cycles # 2.267 GHz + 15,737,549,950 instructions # 1.83 insn per cycle + 3.784977813 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 5e5aef5e19..ab94719473 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:53:25 +DATE: 2023-10-29_12:35:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.655812e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.764738e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.764738e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.839257e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.789527e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.789527e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.243071 sec - 7,442,574,377 cycles # 2.991 GHz - 13,225,389,066 instructions # 1.78 insn per cycle - 2.549080102 seconds time elapsed +TOTAL : 2.172159 sec + 7,387,905,092 cycles # 3.062 GHz + 13,235,801,583 instructions # 1.79 insn per cycle + 2.468887135 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,14 +78,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.102407e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.289656e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.289656e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.102401e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.286601e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.286601e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.301577 sec - 19,602,629,453 cycles # 3.109 GHz - 44,262,843,264 instructions # 2.26 insn per cycle - 6.308282734 seconds time elapsed +TOTAL : 6.300116 sec + 19,508,744,656 cycles # 3.094 GHz + 44,261,234,942 instructions # 2.27 insn per cycle + 6.306471740 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -105,14 +105,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.604137e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.071968e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.071968e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.575147e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.034583e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.034583e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.520826 sec - 14,045,705,180 cycles # 3.104 GHz - 31,843,337,701 instructions # 2.27 insn per cycle - 4.527637329 seconds time elapsed +TOTAL : 4.597477 sec + 14,011,492,780 cycles # 3.044 GHz + 31,844,820,754 instructions # 2.27 insn per cycle + 4.603973232 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -132,14 +132,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.921703e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.616697e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.616697e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.960593e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.674342e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.674342e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.884222 sec - 11,358,296,431 cycles # 2.920 GHz - 20,737,654,475 instructions # 1.83 insn per cycle - 3.890844402 seconds time elapsed +TOTAL : 3.816035 sec + 11,316,824,245 cycles # 2.962 GHz + 20,738,410,012 instructions # 1.83 insn per cycle + 3.822348357 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -159,14 +159,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.042579e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.818494e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.818494e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.017635e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.775212e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.775212e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.680139 sec - 11,063,141,020 cycles # 3.001 GHz - 20,365,630,441 instructions # 1.84 insn per cycle - 3.686776925 seconds time elapsed +TOTAL : 3.721718 sec + 10,991,240,129 cycles # 2.950 GHz + 20,368,407,726 instructions # 1.85 insn per cycle + 3.728042967 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.753883e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.296790e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.296790e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.759549e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.300261e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.300261e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.203410 sec - 10,020,568,248 cycles # 2.381 GHz - 16,883,677,599 instructions # 1.68 insn per cycle - 4.210142720 seconds time elapsed +TOTAL : 4.185030 sec + 9,952,016,559 cycles # 2.376 GHz + 16,884,994,577 instructions # 1.70 insn per cycle + 4.191402168 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 7f4cdc7cb4..1f9b823b55 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_13:05:52 +DATE: 2023-10-29_12:48:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.829805e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.626756e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.006288e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.829033e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.630053e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.003352e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.335722 sec - 4,629,481,061 cycles # 2.946 GHz - 7,232,158,826 instructions # 1.56 insn per cycle - 1.628149973 seconds time elapsed +TOTAL : 1.308179 sec + 4,682,134,663 cycles # 3.050 GHz + 7,265,916,082 instructions # 1.55 insn per cycle + 1.592358280 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.125259e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.321125e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.321125e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.141721e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.338591e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.338591e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.343833 sec - 19,420,661,031 cycles # 3.061 GHz - 44,140,174,090 instructions # 2.27 insn per cycle - 6.349019015 seconds time elapsed +TOTAL : 6.257064 sec + 19,363,791,469 cycles # 3.093 GHz + 44,137,580,779 instructions # 2.28 insn per cycle + 6.262074313 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.685239e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.203002e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.203002e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.672529e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.182072e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.182072e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.470451 sec - 13,918,658,241 cycles # 3.110 GHz - 31,003,563,450 instructions # 2.23 insn per cycle - 4.475964868 seconds time elapsed +TOTAL : 4.506161 sec + 13,825,876,732 cycles # 3.065 GHz + 31,003,441,377 instructions # 2.24 insn per cycle + 4.511293914 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.104044e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.951379e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.951379e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.095787e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.921433e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.921433e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.725185 sec - 11,191,729,074 cycles # 3.002 GHz - 19,277,666,580 instructions # 1.72 insn per cycle - 3.730254016 seconds time elapsed +TOTAL : 3.739541 sec + 11,127,620,738 cycles # 2.972 GHz + 19,279,101,554 instructions # 1.73 insn per cycle + 3.744717536 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.170659e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.090967e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.090967e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.178706e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.065359e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.065359e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.639431 sec - 10,927,950,837 cycles # 3.000 GHz - 18,707,814,304 instructions # 1.71 insn per cycle - 3.644708893 seconds time elapsed +TOTAL : 3.636476 sec + 10,830,671,879 cycles # 2.975 GHz + 18,707,850,182 instructions # 1.73 insn per cycle + 3.641699159 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.887944e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.525023e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.525023e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.860313e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.476557e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.476557e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.076704 sec - 9,749,882,110 cycles # 2.390 GHz - 15,436,538,196 instructions # 1.58 insn per cycle - 4.081702298 seconds time elapsed +TOTAL : 4.137153 sec + 9,732,284,492 cycles # 2.350 GHz + 15,439,344,480 instructions # 1.59 insn per cycle + 4.142397756 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index ce4a04519c..a57c0afd5f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_13:02:47 +DATE: 2023-10-29_12:45:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.845902e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.649474e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.025258e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.857201e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.666270e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.055653e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.955989 sec - 3,578,163,738 cycles # 3.023 GHz - 7,068,279,319 instructions # 1.98 insn per cycle - 1.241156432 seconds time elapsed +TOTAL : 0.957338 sec + 3,568,978,348 cycles # 3.013 GHz + 7,097,529,887 instructions # 1.99 insn per cycle + 1.241766786 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.138227e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.336088e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.336088e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.140283e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.338232e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.338232e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.921217 sec - 18,291,774,290 cycles # 3.087 GHz - 44,034,233,337 instructions # 2.41 insn per cycle - 5.926430108 seconds time elapsed +TOTAL : 5.913191 sec + 18,291,555,987 cycles # 3.092 GHz + 44,034,842,699 instructions # 2.41 insn per cycle + 5.918338832 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.674598e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.187066e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.187066e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.652983e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.156618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.156618e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.152819 sec - 12,778,275,966 cycles # 3.074 GHz - 31,001,281,707 instructions # 2.43 insn per cycle - 4.158063842 seconds time elapsed +TOTAL : 4.207372 sec + 12,748,693,047 cycles # 3.027 GHz + 31,000,802,407 instructions # 2.43 insn per cycle + 4.212630223 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.121816e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.958302e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.958302e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.098198e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.924886e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.924886e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.351941 sec - 10,044,492,755 cycles # 2.993 GHz - 19,377,203,012 instructions # 1.93 insn per cycle - 3.357252123 seconds time elapsed +TOTAL : 3.387559 sec + 10,061,207,969 cycles # 2.966 GHz + 19,378,764,597 instructions # 1.93 insn per cycle + 3.392871997 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.202934e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.107252e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.107252e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.192299e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.089090e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.089090e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.242049 sec - 9,710,030,443 cycles # 2.991 GHz - 19,005,255,795 instructions # 1.96 insn per cycle - 3.247405175 seconds time elapsed +TOTAL : 3.258563 sec + 9,691,496,400 cycles # 2.971 GHz + 18,995,322,883 instructions # 1.96 insn per cycle + 3.263721421 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.891534e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.531334e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.531334e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.880972e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.505823e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.505823e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.720604 sec - 8,629,557,650 cycles # 2.318 GHz - 15,737,846,740 instructions # 1.82 insn per cycle - 3.725901019 seconds time elapsed +TOTAL : 3.735319 sec + 8,600,046,261 cycles # 2.300 GHz + 15,737,440,798 instructions # 1.83 insn per cycle + 3.740664421 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 2870ebbc32..14e59ac3a7 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:59:40 +DATE: 2023-10-29_12:41:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,14 +45,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.299550e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.591007e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.931193e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.321455e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.580409e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.928218e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.820738 sec - 6,295,899,400 cycles # 3.073 GHz - 11,479,210,876 instructions # 1.82 insn per cycle - 2.105126705 seconds time elapsed +TOTAL : 1.815521 sec + 6,300,479,484 cycles # 3.068 GHz + 11,616,768,393 instructions # 1.84 insn per cycle + 2.110018829 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -71,14 +71,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.127640e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.323593e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.323593e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.135006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.331115e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.331115e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.976156 sec - 18,298,750,649 cycles # 3.060 GHz - 44,033,955,064 instructions # 2.41 insn per cycle - 5.981385208 seconds time elapsed +TOTAL : 5.939454 sec + 18,283,010,727 cycles # 3.076 GHz + 44,034,450,286 instructions # 2.41 insn per cycle + 5.944452992 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -97,14 +97,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.692034e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.217323e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.217323e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.667343e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.181731e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.181731e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.110550 sec - 12,786,408,904 cycles # 3.108 GHz - 31,001,807,900 instructions # 2.42 insn per cycle - 4.116019692 seconds time elapsed +TOTAL : 4.173581 sec + 12,755,850,172 cycles # 3.055 GHz + 31,002,739,822 instructions # 2.43 insn per cycle + 4.178760665 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -123,14 +123,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.022948e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.833059e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.833059e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.119818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.952641e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.952641e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.511619 sec - 10,107,987,005 cycles # 2.881 GHz - 19,380,873,481 instructions # 1.92 insn per cycle - 3.517122011 seconds time elapsed +TOTAL : 3.353719 sec + 10,022,124,116 cycles # 2.985 GHz + 19,377,738,463 instructions # 1.93 insn per cycle + 3.358949031 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.183458e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.090327e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.090327e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.197647e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.094541e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.094541e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.269447 sec - 9,757,920,533 cycles # 2.981 GHz - 19,005,390,431 instructions # 1.95 insn per cycle - 3.274812048 seconds time elapsed +TOTAL : 3.246854 sec + 9,691,440,441 cycles # 2.981 GHz + 19,006,411,877 instructions # 1.96 insn per cycle + 3.252278792 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -175,14 +175,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.882345e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.512559e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.512559e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.892226e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.523289e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.523289e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.732017 sec - 8,623,800,932 cycles # 2.309 GHz - 15,737,047,561 instructions # 1.82 insn per cycle - 3.737384155 seconds time elapsed +TOTAL : 3.715100 sec + 8,592,102,883 cycles # 2.310 GHz + 15,737,316,253 instructions # 1.83 insn per cycle + 3.720383119 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 5657d881b3..f380f8f77b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:07:08 +DATE: 2023-10-29_12:04:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.441096e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.268097e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.113690e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.688679e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.878052e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.084038e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.709809 sec - 2,803,996,602 cycles # 2.965 GHz - 4,290,428,381 instructions # 1.53 insn per cycle - 1.019645233 seconds time elapsed +TOTAL : 0.664702 sec + 2,527,358,473 cycles # 2.821 GHz + 3,950,149,218 instructions # 1.56 insn per cycle + 0.956523953 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.200878e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.422800e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.422800e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.176849e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.393454e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.393454e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.634059 sec - 17,440,710,145 cycles # 3.093 GHz - 41,880,329,393 instructions # 2.40 insn per cycle - 5.642218325 seconds time elapsed +TOTAL : 5.749146 sec + 17,418,245,221 cycles # 3.027 GHz + 41,881,264,051 instructions # 2.40 insn per cycle + 5.754425044 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.736085e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.290780e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.290780e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.727663e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.274892e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.274892e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.019667 sec - 12,477,262,405 cycles # 3.100 GHz - 30,163,431,800 instructions # 2.42 insn per cycle - 4.033775326 seconds time elapsed +TOTAL : 4.036880 sec + 12,443,172,071 cycles # 3.079 GHz + 30,163,295,291 instructions # 2.42 insn per cycle + 4.042069911 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.125635e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.982968e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.982968e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.119192e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.962960e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.962960e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.348054 sec - 9,972,951,874 cycles # 2.974 GHz - 19,109,456,596 instructions # 1.92 insn per cycle - 3.361220026 seconds time elapsed +TOTAL : 3.362573 sec + 9,914,887,002 cycles # 2.945 GHz + 19,109,568,676 instructions # 1.93 insn per cycle + 3.367772960 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1930) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.196494e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.114824e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.114824e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.148695e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.010722e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.010722e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.252567 sec - 9,689,824,724 cycles # 2.974 GHz - 18,765,633,099 instructions # 1.94 insn per cycle - 3.263307852 seconds time elapsed +TOTAL : 3.319719 sec + 9,606,034,124 cycles # 2.890 GHz + 18,764,443,608 instructions # 1.95 insn per cycle + 3.324776207 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1661) (512y: 178) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.908402e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.562129e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.562129e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.926149e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.586513e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.586513e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.690497 sec - 8,466,142,206 cycles # 2.291 GHz - 15,614,351,262 instructions # 1.84 insn per cycle - 3.704028359 seconds time elapsed +TOTAL : 3.658483 sec + 8,411,821,797 cycles # 2.297 GHz + 15,613,679,149 instructions # 1.86 insn per cycle + 3.663873663 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 886) (512y: 156) (512z: 1239) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 13070edd4c..1c092f6985 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:43:11 +DATE: 2023-10-29_12:25:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.494476e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.553934e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.018374e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.824995e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.672902e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.055784e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.673907 sec - 2,748,926,087 cycles # 3.032 GHz - 4,244,261,400 instructions # 1.54 insn per cycle - 0.965294408 seconds time elapsed +TOTAL : 0.668648 sec + 2,691,554,614 cycles # 2.996 GHz + 4,161,203,730 instructions # 1.55 insn per cycle + 0.958727124 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.713214e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.199457e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.199457e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.704241e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.185386e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.185386e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.064069 sec - 12,678,588,689 cycles # 3.116 GHz - 32,576,476,028 instructions # 2.57 insn per cycle - 4.069761387 seconds time elapsed +TOTAL : 4.087797 sec + 12,666,124,359 cycles # 3.096 GHz + 32,577,303,385 instructions # 2.57 insn per cycle + 4.092925597 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.163410e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.090108e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.090108e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.144087e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.061274e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.061274e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.302170 sec - 10,269,221,045 cycles # 3.105 GHz - 24,504,851,413 instructions # 2.39 insn per cycle - 3.307906175 seconds time elapsed +TOTAL : 3.328636 sec + 10,261,573,946 cycles # 3.079 GHz + 24,505,950,861 instructions # 2.39 insn per cycle + 3.334196649 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.375029e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.478948e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.478948e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.376914e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.481586e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.481586e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.037442 sec - 9,111,077,932 cycles # 2.995 GHz - 16,941,027,954 instructions # 1.86 insn per cycle - 3.043137650 seconds time elapsed +TOTAL : 3.036227 sec + 9,059,680,784 cycles # 2.980 GHz + 16,942,142,440 instructions # 1.87 insn per cycle + 3.041553155 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.360262e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.485603e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.485603e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.440750e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.596606e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.596606e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.062735 sec - 8,912,572,641 cycles # 2.907 GHz - 16,357,435,332 instructions # 1.84 insn per cycle - 3.068517018 seconds time elapsed +TOTAL : 2.963120 sec + 8,832,825,912 cycles # 2.977 GHz + 16,357,330,244 instructions # 1.85 insn per cycle + 2.968310420 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.019189e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.788585e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.788585e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.029944e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.804543e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.804543e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.519094 sec - 7,914,322,807 cycles # 2.246 GHz - 14,592,508,916 instructions # 1.84 insn per cycle - 3.524967051 seconds time elapsed +TOTAL : 3.502018 sec + 7,914,855,069 cycles # 2.258 GHz + 14,594,378,606 instructions # 1.84 insn per cycle + 3.507229260 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 5ce6c3294f..8aaee529f3 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:43:40 +DATE: 2023-10-29_12:25:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.498637e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.592465e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.106967e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.832257e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.689820e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.107266e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.672595 sec - 2,740,210,191 cycles # 3.028 GHz - 4,246,653,984 instructions # 1.55 insn per cycle - 0.964049396 seconds time elapsed +TOTAL : 0.660186 sec + 2,704,353,909 cycles # 3.037 GHz + 4,132,550,954 instructions # 1.53 insn per cycle + 0.950731320 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.270288e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.204979e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.204979e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.255392e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.177534e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.177534e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.163536 sec - 9,876,175,945 cycles # 3.117 GHz - 25,456,751,506 instructions # 2.58 insn per cycle - 3.169196174 seconds time elapsed +TOTAL : 3.185155 sec + 9,847,530,364 cycles # 3.087 GHz + 25,457,133,972 instructions # 2.59 insn per cycle + 3.190284100 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.524894e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.890405e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.890405e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.530932e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.892174e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.892174e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.883411 sec - 8,990,488,928 cycles # 3.113 GHz - 21,514,519,903 instructions # 2.39 insn per cycle - 2.888918635 seconds time elapsed +TOTAL : 2.879384 sec + 8,913,895,098 cycles # 3.091 GHz + 21,514,459,263 instructions # 2.41 insn per cycle + 2.884612658 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.510849e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.830526e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.830526e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.545937e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.843167e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.843167e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.893235 sec - 8,686,251,634 cycles # 2.997 GHz - 15,829,692,324 instructions # 1.82 insn per cycle - 2.898833065 seconds time elapsed +TOTAL : 2.859280 sec + 8,544,306,826 cycles # 2.985 GHz + 15,827,712,225 instructions # 1.85 insn per cycle + 2.864111025 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.587762e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.939824e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.939824e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.608345e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.959405e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.959405e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.817140 sec - 8,420,351,252 cycles # 2.984 GHz - 15,528,245,109 instructions # 1.84 insn per cycle - 2.822805511 seconds time elapsed +TOTAL : 2.796657 sec + 8,368,015,466 cycles # 2.988 GHz + 15,528,362,084 instructions # 1.86 insn per cycle + 2.801823903 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.181864e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.103964e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.103964e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.254251e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.213319e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.213319e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.287080 sec - 7,580,170,366 cycles # 2.303 GHz - 14,294,844,412 instructions # 1.89 insn per cycle - 3.292914044 seconds time elapsed +TOTAL : 3.181418 sec + 7,557,201,393 cycles # 2.374 GHz + 14,292,258,011 instructions # 1.89 insn per cycle + 3.186495390 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 2bc6d58328..7f46bde5e9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:07:40 +DATE: 2023-10-29_12:05:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.090543e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.080299e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.281561e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.569808e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.303498e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.281562e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.587204 sec - 2,430,047,127 cycles # 2.990 GHz - 3,806,946,919 instructions # 1.57 insn per cycle - 0.886413121 seconds time elapsed +TOTAL : 0.555717 sec + 2,383,859,082 cycles # 3.022 GHz + 3,700,757,768 instructions # 1.55 insn per cycle + 0.846227370 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.166380e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.383006e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.383006e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.170731e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.387851e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.387851e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.752162 sec - 17,833,499,336 cycles # 3.098 GHz - 43,613,139,570 instructions # 2.45 insn per cycle - 5.760218770 seconds time elapsed +TOTAL : 5.728025 sec + 17,798,264,765 cycles # 3.105 GHz + 43,613,321,746 instructions # 2.45 insn per cycle + 5.732968535 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.398175e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.666453e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.666453e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.416371e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.689737e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.689737e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.981717 sec - 9,251,617,129 cycles # 3.097 GHz - 21,926,067,081 instructions # 2.37 insn per cycle - 2.997914976 seconds time elapsed +TOTAL : 2.958460 sec + 9,200,489,884 cycles # 3.106 GHz + 21,925,512,062 instructions # 2.38 insn per cycle + 2.963425630 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.574825e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.945979e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.945979e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.606473e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.987849e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.987849e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.791462 sec - 8,314,714,212 cycles # 2.973 GHz - 15,591,186,569 instructions # 1.88 insn per cycle - 2.811182829 seconds time elapsed +TOTAL : 2.760956 sec + 8,262,496,420 cycles # 2.988 GHz + 15,591,539,878 instructions # 1.89 insn per cycle + 2.766076443 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.503167e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.857737e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.857737e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.607993e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.019342e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.019342e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.876747 sec - 8,267,197,933 cycles # 2.869 GHz - 15,440,367,783 instructions # 1.87 insn per cycle - 2.891647671 seconds time elapsed +TOTAL : 2.765968 sec + 8,198,371,730 cycles # 2.961 GHz + 15,435,025,113 instructions # 1.88 insn per cycle + 2.771149365 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.645972e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.083907e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.083907e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.657990e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.094383e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.094383e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.731553 sec - 6,627,921,922 cycles # 2.422 GHz - 12,869,631,967 instructions # 1.94 insn per cycle - 2.748478826 seconds time elapsed +TOTAL : 2.718566 sec + 6,606,742,247 cycles # 2.427 GHz + 12,869,980,452 instructions # 1.95 insn per cycle + 2.723582960 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 9a142ed19e..0b4db02b86 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:54:02 +DATE: 2023-10-29_12:35:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.422048e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.972937e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.972937e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.474804e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.962112e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.962112e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.649582 sec - 5,711,265,423 cycles # 3.047 GHz - 10,308,364,781 instructions # 1.80 insn per cycle - 1.932930473 seconds time elapsed +TOTAL : 1.636795 sec + 5,643,077,305 cycles # 3.027 GHz + 10,280,576,945 instructions # 1.82 insn per cycle + 1.921179131 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,14 +78,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.140422e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.349734e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.349734e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.130619e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.334867e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.334867e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.977808 sec - 18,533,331,220 cycles # 3.098 GHz - 43,762,290,767 instructions # 2.36 insn per cycle - 5.983909821 seconds time elapsed +TOTAL : 6.026937 sec + 18,463,069,696 cycles # 3.061 GHz + 43,762,559,108 instructions # 2.37 insn per cycle + 6.032712961 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -105,14 +105,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.290195e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.424700e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.424700e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.291047e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.416048e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.416048e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.227553 sec - 10,032,677,675 cycles # 3.103 GHz - 23,260,235,509 instructions # 2.32 insn per cycle - 3.233916613 seconds time elapsed +TOTAL : 3.224000 sec + 9,974,231,409 cycles # 3.089 GHz + 23,261,628,829 instructions # 2.33 insn per cycle + 3.229980045 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -132,14 +132,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.470452e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.712827e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.712827e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.470068e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.722431e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.722431e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.023079 sec - 9,085,119,885 cycles # 3.000 GHz - 16,711,585,271 instructions # 1.84 insn per cycle - 3.029450378 seconds time elapsed +TOTAL : 3.014309 sec + 9,033,599,745 cycles # 2.992 GHz + 16,710,722,683 instructions # 1.85 insn per cycle + 3.020418073 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -159,14 +159,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.492995e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765769e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765769e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.485239e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.752852e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.752852e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.999610 sec - 9,001,947,397 cycles # 2.996 GHz - 16,553,774,590 instructions # 1.84 insn per cycle - 3.005954313 seconds time elapsed +TOTAL : 3.005083 sec + 8,983,180,192 cycles # 2.986 GHz + 16,560,789,697 instructions # 1.84 insn per cycle + 3.010982814 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.493012e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.755928e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.755928e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.502480e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.764480e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.764480e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.998687 sec - 7,467,033,548 cycles # 2.485 GHz - 14,076,135,702 instructions # 1.89 insn per cycle - 3.005132894 seconds time elapsed +TOTAL : 2.985922 sec + 7,387,231,495 cycles # 2.471 GHz + 14,076,334,743 instructions # 1.91 insn per cycle + 2.991991008 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 9eef926427..2b0f219a41 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_13:06:27 +DATE: 2023-10-29_12:48:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.387761e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.205793e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.236080e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.387494e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.214508e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.248425e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.141212 sec - 4,198,750,142 cycles # 3.061 GHz - 6,675,407,407 instructions # 1.59 insn per cycle - 1.428782800 seconds time elapsed +TOTAL : 1.186437 sec + 4,201,334,068 cycles # 2.989 GHz + 6,627,660,432 instructions # 1.58 insn per cycle + 1.463153599 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.169470e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.388286e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.388286e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.165195e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.383351e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.383351e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.050334 sec - 18,816,256,810 cycles # 3.110 GHz - 43,793,552,195 instructions # 2.33 insn per cycle - 6.054801451 seconds time elapsed +TOTAL : 6.072926 sec + 18,835,057,538 cycles # 3.100 GHz + 43,796,928,480 instructions # 2.33 insn per cycle + 6.077944722 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.361706e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.619621e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.619621e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.342008e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.584442e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.584442e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.344671 sec - 10,233,257,715 cycles # 3.056 GHz - 22,006,617,137 instructions # 2.15 insn per cycle - 3.349838788 seconds time elapsed +TOTAL : 3.369100 sec + 10,266,001,012 cycles # 3.044 GHz + 22,007,821,518 instructions # 2.14 insn per cycle + 3.374095696 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.577991e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.978206e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.978206e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.561455e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.927682e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.927682e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.100708 sec - 9,308,390,161 cycles # 2.998 GHz - 15,501,686,385 instructions # 1.67 insn per cycle - 3.105683433 seconds time elapsed +TOTAL : 3.127417 sec + 9,261,755,413 cycles # 2.958 GHz + 15,502,016,343 instructions # 1.67 insn per cycle + 3.132444295 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.607278e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.054617e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.054617e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.611431e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.034996e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.034996e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.081884 sec - 9,288,964,111 cycles # 3.010 GHz - 15,144,087,137 instructions # 1.63 insn per cycle - 3.086857149 seconds time elapsed +TOTAL : 3.079991 sec + 9,215,198,289 cycles # 2.988 GHz + 15,143,893,450 instructions # 1.64 insn per cycle + 3.084905456 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.640251e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.085557e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.085557e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.640665e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.084466e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.084466e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.056302 sec - 7,645,821,222 cycles # 2.498 GHz - 12,579,189,915 instructions # 1.65 insn per cycle - 3.061324840 seconds time elapsed +TOTAL : 3.056004 sec + 7,613,812,981 cycles # 2.488 GHz + 12,579,209,488 instructions # 1.65 insn per cycle + 3.061105590 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 1f93ef8de8..7c4facf8c1 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_13:03:20 +DATE: 2023-10-29_12:45:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.392219e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.220920e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.264145e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.392329e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.226962e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.280172e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.827944 sec - 3,187,760,196 cycles # 3.049 GHz - 6,425,416,727 instructions # 2.02 insn per cycle - 1.105465709 seconds time elapsed +TOTAL : 0.834755 sec + 3,177,702,111 cycles # 3.023 GHz + 6,449,142,343 instructions # 2.03 insn per cycle + 1.110221614 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.175115e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.393787e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.393787e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.139739e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.351221e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.351221e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.708534 sec - 17,816,177,934 cycles # 3.119 GHz - 43,613,110,420 instructions # 2.45 insn per cycle - 5.713529500 seconds time elapsed +TOTAL : 5.884327 sec + 17,821,810,664 cycles # 3.027 GHz + 43,614,131,030 instructions # 2.45 insn per cycle + 5.889383739 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.413368e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.692372e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.692372e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.398185e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.659097e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.659097e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.962135 sec - 9,234,609,876 cycles # 3.113 GHz - 21,925,233,672 instructions # 2.37 insn per cycle - 2.967194129 seconds time elapsed +TOTAL : 2.980585 sec + 9,234,568,261 cycles # 3.094 GHz + 21,925,493,552 instructions # 2.37 insn per cycle + 2.985551520 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.603845e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.990719e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.990719e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.595452e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.963542e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.963542e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.761968 sec - 8,301,732,177 cycles # 3.001 GHz - 15,590,143,384 instructions # 1.88 insn per cycle - 2.767055994 seconds time elapsed +TOTAL : 2.770209 sec + 8,264,942,329 cycles # 2.979 GHz + 15,590,380,822 instructions # 1.89 insn per cycle + 2.775128570 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.632154e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.072123e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.072123e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.614288e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.036058e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.036058e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.737561 sec - 8,237,387,151 cycles # 3.004 GHz - 15,434,315,781 instructions # 1.87 insn per cycle - 2.742848124 seconds time elapsed +TOTAL : 2.755945 sec + 8,229,430,317 cycles # 2.981 GHz + 15,439,224,332 instructions # 1.88 insn per cycle + 2.761056638 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.664766e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.122063e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.122063e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.566410e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.944778e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.944778e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.713872 sec - 6,633,362,586 cycles # 2.440 GHz - 12,870,274,076 instructions # 1.94 insn per cycle - 2.719220394 seconds time elapsed +TOTAL : 2.814055 sec + 6,615,971,523 cycles # 2.349 GHz + 12,870,676,545 instructions # 1.95 insn per cycle + 2.819075054 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index 885eb346be..a9154c2748 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_13:00:14 +DATE: 2023-10-29_12:42:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,14 +45,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.416685e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.191792e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.169337e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.235922e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.189314e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.175908e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.431938 sec - 5,062,471,867 cycles # 3.066 GHz - 9,243,027,622 instructions # 1.83 insn per cycle - 1.708189535 seconds time elapsed +TOTAL : 1.452913 sec + 5,098,838,507 cycles # 3.053 GHz + 9,239,267,347 instructions # 1.81 insn per cycle + 1.728632717 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -71,14 +71,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.172128e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.390493e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.390493e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.170247e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.386746e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.386746e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.722039 sec - 17,824,581,874 cycles # 3.113 GHz - 43,613,229,473 instructions # 2.45 insn per cycle - 5.727091863 seconds time elapsed +TOTAL : 5.731230 sec + 17,811,008,964 cycles # 3.105 GHz + 43,613,062,162 instructions # 2.45 insn per cycle + 5.736148226 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -97,14 +97,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.403732e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.680842e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.680842e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.338265e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.556723e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.556723e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.973475 sec - 9,254,405,834 cycles # 3.108 GHz - 21,926,118,421 instructions # 2.37 insn per cycle - 2.978569066 seconds time elapsed +TOTAL : 3.060662 sec + 9,221,238,318 cycles # 3.009 GHz + 21,925,884,562 instructions # 2.38 insn per cycle + 3.065656017 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -123,14 +123,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.606204e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.002734e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.002734e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.575598e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.926277e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.926277e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.758603 sec - 8,291,129,557 cycles # 3.001 GHz - 15,589,976,650 instructions # 1.88 insn per cycle - 2.763801597 seconds time elapsed +TOTAL : 2.795328 sec + 8,274,157,710 cycles # 2.956 GHz + 15,590,638,637 instructions # 1.88 insn per cycle + 2.800362174 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.573252e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.973552e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.973552e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.608597e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.004444e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.004444e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.801833 sec - 8,237,865,652 cycles # 2.936 GHz - 15,433,868,332 instructions # 1.87 insn per cycle - 2.806881384 seconds time elapsed +TOTAL : 2.765892 sec + 8,210,872,152 cycles # 2.964 GHz + 15,434,199,512 instructions # 1.88 insn per cycle + 2.771052894 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -175,14 +175,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.623316e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.043661e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.043661e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.639167e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.056020e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.056020e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.750244 sec - 6,637,357,682 cycles # 2.410 GHz - 12,869,208,378 instructions # 1.94 insn per cycle - 2.755481179 seconds time elapsed +TOTAL : 2.736960 sec + 6,603,283,532 cycles # 2.409 GHz + 12,869,076,714 instructions # 1.95 insn per cycle + 2.741922035 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 060299a689..57bc44590f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:08:10 +DATE: 2023-10-29_12:05:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.096840e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.090967e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.324848e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.573323e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.316951e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.316598e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.584864 sec - 2,429,123,171 cycles # 3.001 GHz - 3,756,300,873 instructions # 1.55 insn per cycle - 0.884979997 seconds time elapsed +TOTAL : 0.555253 sec + 2,382,172,389 cycles # 3.020 GHz + 3,675,857,664 instructions # 1.54 insn per cycle + 0.846167400 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.250011e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.503357e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.503357e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.249737e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.499844e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.499844e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.392391 sec - 16,749,129,912 cycles # 3.103 GHz - 41,371,534,119 instructions # 2.47 insn per cycle - 5.400279164 seconds time elapsed +TOTAL : 5.388471 sec + 16,729,282,863 cycles # 3.102 GHz + 41,371,471,590 instructions # 2.47 insn per cycle + 5.393316082 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.470803e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.835685e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.835685e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.479747e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.842891e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.842891e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.902349 sec - 9,025,157,457 cycles # 3.104 GHz - 21,229,572,279 instructions # 2.35 insn per cycle - 2.914870377 seconds time elapsed +TOTAL : 2.890771 sec + 8,980,628,419 cycles # 3.102 GHz + 21,229,293,902 instructions # 2.36 insn per cycle + 2.895816535 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1841) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.612236e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.026002e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.026002e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.553067e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.911755e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.911755e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.758538 sec - 8,264,914,372 cycles # 2.991 GHz - 15,424,883,788 instructions # 1.87 insn per cycle - 2.773271734 seconds time elapsed +TOTAL : 2.818350 sec + 8,236,815,339 cycles # 2.918 GHz + 15,424,982,194 instructions # 1.87 insn per cycle + 2.823290241 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2536) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.644470e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.110473e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.110473e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.628835e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.060475e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.060475e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.728316 sec - 8,163,671,191 cycles # 2.986 GHz - 15,244,056,929 instructions # 1.87 insn per cycle - 2.741900405 seconds time elapsed +TOTAL : 2.744370 sec + 8,095,406,107 cycles # 2.945 GHz + 15,243,714,977 instructions # 1.88 insn per cycle + 2.749338594 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.644603e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.099685e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.099685e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.596823e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.990816e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.990816e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.730411 sec - 6,638,319,522 cycles # 2.426 GHz - 12,848,391,758 instructions # 1.94 insn per cycle - 2.743796244 seconds time elapsed +TOTAL : 2.778794 sec + 6,577,973,479 cycles # 2.364 GHz + 12,848,266,614 instructions # 1.95 insn per cycle + 2.783917309 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1705) (512y: 18) (512z: 1427) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index c7ef9a3620..6be58ed42d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:44:07 +DATE: 2023-10-29_12:25:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.303040e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188390e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.265011e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.381038e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.216517e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.259040e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.570489 sec - 2,388,301,471 cycles # 3.016 GHz - 3,672,166,062 instructions # 1.54 insn per cycle - 0.851396853 seconds time elapsed +TOTAL : 0.564554 sec + 2,368,656,023 cycles # 3.011 GHz + 3,688,760,637 instructions # 1.56 insn per cycle + 0.843324142 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.721134e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.237285e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.237285e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.741983e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.273463e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.273463e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 4.012883 sec - 12,189,009,747 cycles # 3.034 GHz - 32,520,660,992 instructions # 2.67 insn per cycle - 4.018143104 seconds time elapsed +TOTAL : 3.966724 sec + 12,194,181,082 cycles # 3.075 GHz + 32,520,249,828 instructions # 2.67 insn per cycle + 3.971213420 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.844353e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.819924e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.819924e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.759372e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.661299e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.661299e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.568889 sec - 7,992,345,121 cycles # 3.106 GHz - 18,689,694,318 instructions # 2.34 insn per cycle - 2.574390971 seconds time elapsed +TOTAL : 2.644286 sec + 7,955,830,690 cycles # 3.004 GHz + 18,690,505,489 instructions # 2.35 insn per cycle + 2.649232282 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1554) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.974716e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.925885e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.925885e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.950641e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.852361e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.852361e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.466000 sec - 7,424,757,976 cycles # 3.006 GHz - 14,253,649,478 instructions # 1.92 insn per cycle - 2.471253254 seconds time elapsed +TOTAL : 2.481745 sec + 7,418,882,379 cycles # 2.984 GHz + 14,254,122,903 instructions # 1.92 insn per cycle + 2.486684765 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.037585e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.105236e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.105236e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.028679e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.073597e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.073597e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.421254 sec - 7,301,719,895 cycles # 3.010 GHz - 13,951,418,899 instructions # 1.91 insn per cycle - 2.426824644 seconds time elapsed +TOTAL : 2.428935 sec + 7,266,876,500 cycles # 2.991 GHz + 13,949,467,084 instructions # 1.92 insn per cycle + 2.433605636 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.708760e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.220574e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.220574e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.730559e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.262408e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.262408e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.673761 sec - 6,515,348,114 cycles # 2.433 GHz - 13,421,447,299 instructions # 2.06 insn per cycle - 2.679223686 seconds time elapsed +TOTAL : 2.656095 sec + 6,484,738,554 cycles # 2.438 GHz + 13,421,284,492 instructions # 2.07 insn per cycle + 2.661161068 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index aa4260fb14..ea1b5c4ce9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:44:33 +DATE: 2023-10-29_12:26:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.305875e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.197999e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.303366e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.378042e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.230898e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.303636e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.569902 sec - 2,393,097,307 cycles # 3.023 GHz - 3,728,454,815 instructions # 1.56 insn per cycle - 0.850824445 seconds time elapsed +TOTAL : 0.572348 sec + 2,315,088,058 cycles # 2.918 GHz + 3,606,090,617 instructions # 1.56 insn per cycle + 0.851746680 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.306212e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.354504e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.354504e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.337523e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.407077e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.407077e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.087486 sec - 9,423,962,266 cycles # 3.049 GHz - 25,307,428,823 instructions # 2.69 insn per cycle - 3.093213487 seconds time elapsed +TOTAL : 3.047145 sec + 9,399,501,328 cycles # 3.081 GHz + 25,306,524,755 instructions # 2.69 insn per cycle + 3.052129026 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.212003e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.966983e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.966983e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.148384e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.855784e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.855784e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.316527 sec - 7,194,121,645 cycles # 3.100 GHz - 16,901,739,382 instructions # 2.35 insn per cycle - 2.321817623 seconds time elapsed +TOTAL : 2.361555 sec + 7,165,256,881 cycles # 3.028 GHz + 16,902,533,123 instructions # 2.36 insn per cycle + 2.366898623 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.088605e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.290375e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.290375e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.064233e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.200064e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.200064e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.385856 sec - 7,198,917,112 cycles # 3.012 GHz - 13,619,147,871 instructions # 1.89 insn per cycle - 2.391282642 seconds time elapsed +TOTAL : 2.402350 sec + 7,155,498,870 cycles # 2.973 GHz + 13,619,819,517 instructions # 1.90 insn per cycle + 2.407497506 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.159700e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.484314e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.484314e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.161218e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.465591e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.465591e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.341781 sec - 7,056,960,923 cycles # 3.008 GHz - 13,430,372,265 instructions # 1.90 insn per cycle - 2.347350712 seconds time elapsed +TOTAL : 2.343466 sec + 7,029,083,635 cycles # 2.994 GHz + 13,435,784,739 instructions # 1.91 insn per cycle + 2.348424312 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.759294e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.397998e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.397998e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.828979e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.532554e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.532554e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.637218 sec - 6,320,867,639 cycles # 2.393 GHz - 13,153,069,484 instructions # 2.08 insn per cycle - 2.642625257 seconds time elapsed +TOTAL : 2.576228 sec + 6,306,840,953 cycles # 2.444 GHz + 13,154,129,713 instructions # 2.09 insn per cycle + 2.581443302 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index e2d15ec9ec..f01a9a57a3 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:08:38 +DATE: 2023-10-29_12:06:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.432223e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.263514e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.061401e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.681696e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.844881e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.020418e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.713704 sec - 2,810,694,285 cycles # 2.970 GHz - 4,352,973,933 instructions # 1.55 insn per cycle - 1.023439423 seconds time elapsed +TOTAL : 0.657315 sec + 2,598,665,017 cycles # 2.936 GHz + 4,028,861,679 instructions # 1.55 insn per cycle + 0.945030332 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.113042e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.301497e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.301497e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.100332e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.287076e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.287076e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.048110 sec - 18,728,988,916 cycles # 3.094 GHz - 44,285,855,893 instructions # 2.36 insn per cycle - 6.056235273 seconds time elapsed +TOTAL : 6.113513 sec + 18,734,521,807 cycles # 3.062 GHz + 44,287,146,095 instructions # 2.36 insn per cycle + 6.118759717 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.725869e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.290079e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.290079e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.757716e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.321897e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.321897e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.046149 sec - 12,390,711,310 cycles # 3.059 GHz - 30,961,468,949 instructions # 2.50 insn per cycle - 4.059152850 seconds time elapsed +TOTAL : 3.969796 sec + 12,337,756,331 cycles # 3.105 GHz + 30,959,811,407 instructions # 2.51 insn per cycle + 3.974750304 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.078637e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.886882e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.886882e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.090570e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.906912e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.906912e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.415997 sec - 10,148,421,740 cycles # 2.966 GHz - 19,398,966,702 instructions # 1.91 insn per cycle - 3.430194328 seconds time elapsed +TOTAL : 3.396973 sec + 10,093,757,140 cycles # 2.968 GHz + 19,398,694,320 instructions # 1.92 insn per cycle + 3.402037195 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.092565e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.940158e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.940158e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.152608e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.020125e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.020125e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.403217 sec - 9,741,567,918 cycles # 2.858 GHz - 18,981,474,131 instructions # 1.95 insn per cycle - 3.418468893 seconds time elapsed +TOTAL : 3.313893 sec + 9,678,205,730 cycles # 2.917 GHz + 18,980,795,577 instructions # 1.96 insn per cycle + 3.318879598 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1859) (512y: 188) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.901444e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.595212e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.595212e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.849324e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.478638e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.478638e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.703255 sec - 8,520,387,175 cycles # 2.297 GHz - 15,065,100,246 instructions # 1.77 insn per cycle - 3.716972990 seconds time elapsed +TOTAL : 3.806963 sec + 8,367,265,353 cycles # 2.196 GHz + 15,064,288,991 instructions # 1.80 insn per cycle + 3.812130834 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1023) (512y: 155) (512z: 1316) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index f3310b9cac..9edf8700e8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-28_12:09:11 +DATE: 2023-10-29_12:06:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProces Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.430214e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.253611e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.074009e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.664259e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.857808e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.041274e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.703074 sec - 2,798,414,359 cycles # 2.979 GHz - 4,319,216,534 instructions # 1.54 insn per cycle - 1.005639527 seconds time elapsed +TOTAL : 0.667696 sec + 2,606,641,308 cycles # 2.888 GHz + 4,060,337,867 instructions # 1.56 insn per cycle + 0.968428850 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.166428e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.374429e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.374429e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.159840e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.365547e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.365547e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.786685 sec - 17,967,323,887 cycles # 3.102 GHz - 42,535,469,208 instructions # 2.37 insn per cycle - 5.794719220 seconds time elapsed +TOTAL : 5.819162 sec + 17,933,037,739 cycles # 3.080 GHz + 42,536,132,640 instructions # 2.37 insn per cycle + 5.824253591 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.781652e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.369490e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.369490e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.757614e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.338049e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.338049e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.925610 sec - 12,207,226,408 cycles # 3.107 GHz - 30,267,618,799 instructions # 2.48 insn per cycle - 3.940648882 seconds time elapsed +TOTAL : 3.979221 sec + 12,132,707,817 cycles # 3.048 GHz + 30,269,707,726 instructions # 2.49 insn per cycle + 3.984583996 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.101317e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.934375e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.934375e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.082667e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.901054e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.901054e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.383740 sec - 10,049,844,381 cycles # 2.965 GHz - 19,281,783,052 instructions # 1.92 insn per cycle - 3.398725006 seconds time elapsed +TOTAL : 3.412907 sec + 9,984,888,667 cycles # 2.922 GHz + 19,281,802,256 instructions # 1.93 insn per cycle + 3.417989674 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2162) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.199554e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.121016e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.121016e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.167358e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.053730e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.053730e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.247190 sec - 9,675,471,455 cycles # 2.975 GHz - 18,782,612,074 instructions # 1.94 insn per cycle - 3.261499344 seconds time elapsed +TOTAL : 3.295521 sec + 9,655,639,183 cycles # 2.926 GHz + 18,781,731,091 instructions # 1.95 insn per cycle + 3.300616608 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1833) (512y: 191) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.954205e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.650147e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.650147e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.985792e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.694413e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.694413e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.614239 sec - 8,334,476,383 cycles # 2.303 GHz - 14,988,761,152 instructions # 1.80 insn per cycle - 3.627975521 seconds time elapsed +TOTAL : 3.562394 sec + 8,262,941,640 cycles # 2.317 GHz + 14,988,031,204 instructions # 1.81 insn per cycle + 3.567536273 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1020) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 8336024765..cf07c78786 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:09:44 +DATE: 2023-10-29_12:07:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.019001e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.136794e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.274389e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.136997e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.176783e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273393e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.527024 sec - 2,289,159,620 cycles # 3.004 GHz - 3,250,188,454 instructions # 1.42 insn per cycle - 0.831218802 seconds time elapsed +TOTAL : 0.511809 sec + 2,253,660,474 cycles # 3.022 GHz + 3,223,175,085 instructions # 1.43 insn per cycle + 0.803611548 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.197922e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.262269e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.262269e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.200834e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.264758e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.264758e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.874413 sec - 15,154,127,415 cycles # 3.106 GHz - 38,435,939,186 instructions # 2.54 insn per cycle - 4.882482659 seconds time elapsed +TOTAL : 4.867099 sec + 15,140,743,768 cycles # 3.108 GHz + 38,436,444,633 instructions # 2.54 insn per cycle + 4.872245498 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.700267e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.902876e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.902876e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.626756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.818405e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.818405e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.938885 sec - 9,102,250,875 cycles # 3.091 GHz - 24,592,027,223 instructions # 2.70 insn per cycle - 2.953737381 seconds time elapsed +TOTAL : 2.994726 sec + 9,137,139,873 cycles # 3.047 GHz + 24,590,940,060 instructions # 2.69 insn per cycle + 3.000053290 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.841670e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.348294e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.348294e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.983253e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.504163e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.504163e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.898439 sec - 5,505,216,065 cycles # 2.892 GHz - 11,266,747,641 instructions # 2.05 insn per cycle - 1.915944724 seconds time elapsed +TOTAL : 1.854838 sec + 5,452,843,992 cycles # 2.933 GHz + 11,265,041,070 instructions # 2.07 insn per cycle + 1.860040563 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.367341e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.971191e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.971191e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.659391e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.292943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.292943e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.753044 sec - 4,965,020,075 cycles # 2.823 GHz - 10,572,471,342 instructions # 2.13 insn per cycle - 1.767016306 seconds time elapsed +TOTAL : 1.676612 sec + 4,925,739,130 cycles # 2.930 GHz + 10,571,474,421 instructions # 2.15 insn per cycle + 1.681870831 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.121729e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.361641e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.361641e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.095664e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.330650e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.330650e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.647332 sec - 5,386,531,185 cycles # 2.030 GHz - 7,805,289,561 instructions # 1.45 insn per cycle - 2.664612115 seconds time elapsed +TOTAL : 2.663023 sec + 5,382,784,208 cycles # 2.018 GHz + 7,804,881,730 instructions # 1.45 insn per cycle + 2.668299561 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 89f4885416..25a1d7368c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:54:34 +DATE: 2023-10-29_12:36:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.628625e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.020132e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.020132e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.647828e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.026321e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.026321e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.797774 sec - 3,189,194,967 cycles # 3.042 GHz - 4,915,184,954 instructions # 1.54 insn per cycle - 1.106368237 seconds time elapsed +TOTAL : 0.795675 sec + 3,150,339,357 cycles # 3.032 GHz + 4,902,081,574 instructions # 1.56 insn per cycle + 1.097297675 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,14 +78,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.181893e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.245511e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.245511e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.105579e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.164982e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.164982e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.985853 sec - 15,494,769,661 cycles # 3.104 GHz - 38,496,185,507 instructions # 2.48 insn per cycle - 4.992645531 seconds time elapsed +TOTAL : 5.159176 sec + 15,505,788,287 cycles # 3.003 GHz + 38,497,293,687 instructions # 2.48 insn per cycle + 5.165450381 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -105,14 +105,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.547838e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.739897e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.739897e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.663091e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.858706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.858706e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.137914 sec - 9,437,581,770 cycles # 3.002 GHz - 24,774,460,960 instructions # 2.63 insn per cycle - 3.144781618 seconds time elapsed +TOTAL : 3.040555 sec + 9,431,019,118 cycles # 3.096 GHz + 24,774,385,658 instructions # 2.63 insn per cycle + 3.046912316 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -132,14 +132,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.828534e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.328150e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.328150e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.863542e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.365394e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.365394e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.980182 sec - 5,849,819,972 cycles # 2.945 GHz - 11,551,679,172 instructions # 1.97 insn per cycle - 1.986935844 seconds time elapsed +TOTAL : 1.967510 sec + 5,786,297,263 cycles # 2.932 GHz + 11,552,071,765 instructions # 2.00 insn per cycle + 1.974044258 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -159,14 +159,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.535655e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.166989e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.166989e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.472456e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.085835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.085835e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.786832 sec - 5,298,722,139 cycles # 2.956 GHz - 10,858,826,865 instructions # 2.05 insn per cycle - 1.793683914 seconds time elapsed +TOTAL : 1.800429 sec + 5,305,491,699 cycles # 2.938 GHz + 10,859,048,691 instructions # 2.05 insn per cycle + 1.806998436 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.984644e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.209702e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.209702e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.032410e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.258848e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.258848e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.815659 sec - 5,764,743,331 cycles # 2.043 GHz - 8,049,005,197 instructions # 1.40 insn per cycle - 2.822613600 seconds time elapsed +TOTAL : 2.781006 sec + 5,749,297,000 cycles # 2.063 GHz + 8,049,410,634 instructions # 1.40 insn per cycle + 2.787426015 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 4341cd8cb2..cea02f65d0 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_13:06:58 +DATE: 2023-10-29_12:49:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.738260e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.160695e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270128e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.728812e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.164066e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276046e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.615783 sec - 2,512,291,360 cycles # 2.984 GHz - 3,661,843,377 instructions # 1.46 insn per cycle - 0.899734611 seconds time elapsed +TOTAL : 0.608013 sec + 2,509,243,315 cycles # 3.014 GHz + 3,608,155,847 instructions # 1.44 insn per cycle + 0.891602544 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.205018e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.270000e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.270000e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.192667e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.256790e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.256790e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.917523 sec - 15,330,499,616 cycles # 3.115 GHz - 38,452,312,172 instructions # 2.51 insn per cycle - 4.922909300 seconds time elapsed +TOTAL : 4.943798 sec + 15,329,589,495 cycles # 3.098 GHz + 38,452,781,754 instructions # 2.51 insn per cycle + 4.949051931 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.728127e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.933166e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.933166e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.710860e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.912537e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.912537e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.974283 sec - 9,266,289,564 cycles # 3.111 GHz - 24,590,192,562 instructions # 2.65 insn per cycle - 2.979640099 seconds time elapsed +TOTAL : 2.986722 sec + 9,267,137,535 cycles # 3.098 GHz + 24,590,320,806 instructions # 2.65 insn per cycle + 2.991908911 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.886292e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.405439e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.405439e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.913517e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.427131e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.427131e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.942789 sec - 5,689,267,748 cycles # 2.924 GHz - 11,248,715,582 instructions # 1.98 insn per cycle - 1.948199837 seconds time elapsed +TOTAL : 1.935767 sec + 5,673,483,420 cycles # 2.927 GHz + 11,248,913,442 instructions # 1.98 insn per cycle + 1.941003372 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.670998e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.319718e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.319718e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.389406e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.990338e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.990338e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.733390 sec - 5,130,314,649 cycles # 2.952 GHz - 10,520,705,525 instructions # 2.05 insn per cycle - 1.738662427 seconds time elapsed +TOTAL : 1.803078 sec + 5,137,798,097 cycles # 2.848 GHz + 10,521,456,294 instructions # 2.05 insn per cycle + 1.808296325 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.138982e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.379012e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.379012e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.055914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.287900e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.287900e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.696595 sec - 5,567,230,521 cycles # 2.061 GHz - 7,753,963,703 instructions # 1.39 insn per cycle - 2.701828311 seconds time elapsed +TOTAL : 2.748891 sec + 5,590,724,584 cycles # 2.031 GHz + 7,756,083,386 instructions # 1.39 insn per cycle + 2.754238360 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index cb28d279bb..32065e8c80 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_13:03:49 +DATE: 2023-10-29_12:46:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.731016e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.163476e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273757e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.752520e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.165198e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275080e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.562632 sec - 2,291,225,101 cycles # 2.912 GHz - 3,530,216,365 instructions # 1.54 insn per cycle - 0.849249178 seconds time elapsed +TOTAL : 0.550572 sec + 2,362,516,573 cycles # 2.997 GHz + 3,675,267,695 instructions # 1.56 insn per cycle + 0.847304373 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.205686e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.270616e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.270616e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.189601e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.253029e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.253029e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.857452 sec - 15,159,442,405 cycles # 3.118 GHz - 38,437,017,641 instructions # 2.54 insn per cycle - 4.862743230 seconds time elapsed +TOTAL : 4.892729 sec + 15,145,617,059 cycles # 3.093 GHz + 38,436,286,492 instructions # 2.54 insn per cycle + 4.898049782 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.636753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.830892e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.830892e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.684613e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.884352e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.884352e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.986089 sec - 9,111,794,887 cycles # 3.047 GHz - 24,590,815,731 instructions # 2.70 insn per cycle - 2.991338372 seconds time elapsed +TOTAL : 2.949502 sec + 9,111,469,827 cycles # 3.085 GHz + 24,591,911,713 instructions # 2.70 insn per cycle + 2.954800828 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.980804e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.504132e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.504132e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.740095e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.238065e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.238065e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.856262 sec - 5,467,904,077 cycles # 2.938 GHz - 11,264,908,254 instructions # 2.06 insn per cycle - 1.861668376 seconds time elapsed +TOTAL : 1.931930 sec + 5,475,741,469 cycles # 2.828 GHz + 11,265,347,233 instructions # 2.06 insn per cycle + 1.937332643 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.665590e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.312077e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.312077e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.605310e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.238263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.238263e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.675462 sec - 4,943,209,548 cycles # 2.943 GHz - 10,569,508,421 instructions # 2.14 insn per cycle - 1.680743869 seconds time elapsed +TOTAL : 1.690772 sec + 4,944,100,857 cycles # 2.918 GHz + 10,570,436,520 instructions # 2.14 insn per cycle + 1.696028312 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.115270e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.354259e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.354259e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.118850e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.357411e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.357411e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.651242 sec - 5,408,346,690 cycles # 2.037 GHz - 7,804,849,221 instructions # 1.44 insn per cycle - 2.656556634 seconds time elapsed +TOTAL : 2.648858 sec + 5,377,849,755 cycles # 2.027 GHz + 7,805,055,706 instructions # 1.45 insn per cycle + 2.654091921 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index b5905b62c5..246fbcacdc 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_13:00:44 +DATE: 2023-10-29_12:42:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,14 +45,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.097525e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.157814e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.266299e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.960530e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.160163e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275575e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.694752 sec - 2,788,859,822 cycles # 3.033 GHz - 4,381,365,981 instructions # 1.57 insn per cycle - 0.979054926 seconds time elapsed +TOTAL : 0.696058 sec + 2,780,887,356 cycles # 3.015 GHz + 4,385,594,335 instructions # 1.58 insn per cycle + 0.979294954 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -71,14 +71,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.191412e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.257639e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.257639e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.170580e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.234040e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.234040e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.890546 sec - 15,147,332,557 cycles # 3.095 GHz - 38,436,064,384 instructions # 2.54 insn per cycle - 4.896190934 seconds time elapsed +TOTAL : 4.935348 sec + 15,155,957,484 cycles # 3.068 GHz + 38,436,941,468 instructions # 2.54 insn per cycle + 4.940714673 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -97,14 +97,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.714741e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.919663e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.919663e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.699905e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.901140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.901140e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.927374 sec - 9,107,310,115 cycles # 3.106 GHz - 24,591,187,308 instructions # 2.70 insn per cycle - 2.932855113 seconds time elapsed +TOTAL : 2.938495 sec + 9,096,664,653 cycles # 3.091 GHz + 24,590,704,048 instructions # 2.70 insn per cycle + 2.943788052 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -123,14 +123,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.796721e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.291663e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.291663e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.920011e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.436855e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.436855e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.913169 sec - 5,482,543,140 cycles # 2.860 GHz - 11,267,157,705 instructions # 2.06 insn per cycle - 1.918674011 seconds time elapsed +TOTAL : 1.873761 sec + 5,479,875,596 cycles # 2.918 GHz + 11,265,025,893 instructions # 2.06 insn per cycle + 1.879007737 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.504015e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.124956e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.124956e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.591031e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.220898e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.220898e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.714664 sec - 4,954,880,462 cycles # 2.882 GHz - 10,571,641,627 instructions # 2.13 insn per cycle - 1.720307718 seconds time elapsed +TOTAL : 1.693862 sec + 4,936,721,786 cycles # 2.906 GHz + 10,571,567,660 instructions # 2.14 insn per cycle + 1.699207961 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -175,14 +175,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.092262e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.330615e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.330615e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.096221e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.331880e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.331880e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.665319 sec - 5,400,661,219 cycles # 2.023 GHz - 7,804,738,015 instructions # 1.45 insn per cycle - 2.670742295 seconds time elapsed +TOTAL : 2.662905 sec + 5,374,981,222 cycles # 2.015 GHz + 7,804,817,945 instructions # 1.45 insn per cycle + 2.668129038 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 4f7888d47a..63b4155174 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:10:10 +DATE: 2023-10-29_12:07:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.010123e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.132541e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.266884e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.132498e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.171034e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.266333e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.526405 sec - 2,294,951,715 cycles # 3.016 GHz - 3,252,932,201 instructions # 1.42 insn per cycle - 0.829193545 seconds time elapsed +TOTAL : 0.514455 sec + 2,220,091,958 cycles # 2.993 GHz + 3,194,234,694 instructions # 1.44 insn per cycle + 0.799874670 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.226915e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.292728e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.292728e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.215738e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.280974e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.280974e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.811357 sec - 15,021,105,693 cycles # 3.119 GHz - 40,162,926,531 instructions # 2.67 insn per cycle - 4.819238763 seconds time elapsed +TOTAL : 4.835319 sec + 15,019,433,345 cycles # 3.103 GHz + 40,163,621,810 instructions # 2.67 insn per cycle + 4.840634277 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.915843e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.141959e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.141959e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.884500e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.107942e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.107942e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.780793 sec - 8,672,891,508 cycles # 3.113 GHz - 23,683,922,536 instructions # 2.73 insn per cycle - 2.797395099 seconds time elapsed +TOTAL : 2.803037 sec + 8,665,440,053 cycles # 3.087 GHz + 23,685,025,671 instructions # 2.73 insn per cycle + 2.808510683 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2069) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.337317e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.751342e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.751342e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.310538e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.719059e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.719059e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.067044 sec - 6,107,061,861 cycles # 2.946 GHz - 13,074,919,528 instructions # 2.14 insn per cycle - 2.081145351 seconds time elapsed +TOTAL : 2.077453 sec + 6,107,652,411 cycles # 2.934 GHz + 13,074,978,434 instructions # 2.14 insn per cycle + 2.082820176 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.603808e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.060730e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.060730e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.591839e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.032483e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.032483e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.974059 sec - 5,798,776,070 cycles # 2.929 GHz - 12,332,933,370 instructions # 2.13 insn per cycle - 1.986801678 seconds time elapsed +TOTAL : 1.978049 sec + 5,776,625,422 cycles # 2.915 GHz + 12,334,731,018 instructions # 2.14 insn per cycle + 1.983376605 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 294) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.791064e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.992335e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.992335e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.717467e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.916519e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.916519e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.869809 sec - 5,830,824,222 cycles # 2.028 GHz - 9,613,378,795 instructions # 1.65 insn per cycle - 2.884107270 seconds time elapsed +TOTAL : 2.923702 sec + 5,814,117,462 cycles # 1.986 GHz + 9,613,349,209 instructions # 1.65 insn per cycle + 2.928942693 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1510) (512y: 209) (512z: 1971) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 84262108e6..f681280332 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:44:58 +DATE: 2023-10-29_12:26:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.585283e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.160783e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.274213e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.725703e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.159692e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.268883e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.522037 sec - 2,225,847,965 cycles # 2.955 GHz - 3,088,745,897 instructions # 1.39 insn per cycle - 0.812232117 seconds time elapsed +TOTAL : 0.516069 sec + 2,246,320,767 cycles # 3.015 GHz + 3,236,311,212 instructions # 1.44 insn per cycle + 0.803428844 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.548055e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.635262e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.635262e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.555106e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.642080e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.642080e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.218746 sec - 13,020,267,103 cycles # 3.083 GHz - 34,406,039,454 instructions # 2.64 insn per cycle - 4.224411668 seconds time elapsed +TOTAL : 4.207931 sec + 13,014,141,427 cycles # 3.090 GHz + 34,406,405,912 instructions # 2.64 insn per cycle + 4.213404340 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.165854e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.312469e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.312469e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.092906e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.236549e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.236549e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.416319 sec - 10,608,312,227 cycles # 3.101 GHz - 24,022,929,053 instructions # 2.26 insn per cycle - 3.422023981 seconds time elapsed +TOTAL : 3.496208 sec + 10,596,570,849 cycles # 3.027 GHz + 24,023,369,316 instructions # 2.27 insn per cycle + 3.501419893 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.873705e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.217277e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.217277e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.853734e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.191560e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.191560e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.254903 sec - 6,588,417,890 cycles # 2.915 GHz - 12,414,319,509 instructions # 1.88 insn per cycle - 2.260593676 seconds time elapsed +TOTAL : 2.263215 sec + 6,594,620,627 cycles # 2.908 GHz + 12,414,751,758 instructions # 1.88 insn per cycle + 2.268614948 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3156) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.199714e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.586430e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.586430e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.171099e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.557909e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.557909e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.119206 sec - 6,252,999,725 cycles # 2.944 GHz - 11,587,996,172 instructions # 1.85 insn per cycle - 2.124972005 seconds time elapsed +TOTAL : 2.132052 sec + 6,231,185,857 cycles # 2.916 GHz + 11,589,413,017 instructions # 1.86 insn per cycle + 2.137607872 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2692) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.167152e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.411218e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.411218e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.139272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.380011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.380011e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.620092 sec - 5,340,755,470 cycles # 2.035 GHz - 9,308,726,759 instructions # 1.74 insn per cycle - 2.625879222 seconds time elapsed +TOTAL : 2.637341 sec + 5,333,432,205 cycles # 2.019 GHz + 9,309,292,233 instructions # 1.75 insn per cycle + 2.642779358 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2116) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 1da4bf76bb..b6f385dd32 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:45:25 +DATE: 2023-10-29_12:27:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.574793e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.154292e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.268145e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.723495e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.157942e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.265661e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.517329 sec - 2,263,667,377 cycles # 3.020 GHz - 3,227,214,158 instructions # 1.43 insn per cycle - 0.806491983 seconds time elapsed +TOTAL : 0.515428 sec + 2,250,011,882 cycles # 3.016 GHz + 3,234,085,121 instructions # 1.44 insn per cycle + 0.803968951 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.666799e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.765140e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.765140e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.702731e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.800396e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.800396e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.037237 sec - 12,381,626,680 cycles # 3.064 GHz - 35,059,471,093 instructions # 2.83 insn per cycle - 4.042830136 seconds time elapsed +TOTAL : 3.982690 sec + 12,369,134,181 cycles # 3.102 GHz + 35,058,846,564 instructions # 2.83 insn per cycle + 3.987863298 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.075733e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.213422e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.213422e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.061923e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.202369e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.202369e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.513177 sec - 10,690,616,056 cycles # 3.039 GHz - 23,099,681,886 instructions # 2.16 insn per cycle - 3.518988024 seconds time elapsed +TOTAL : 3.530068 sec + 10,676,553,253 cycles # 3.020 GHz + 23,099,794,307 instructions # 2.16 insn per cycle + 3.535561320 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.247097e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.651052e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.651052e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.225421e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.617585e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.617585e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.101774 sec - 6,163,674,317 cycles # 2.926 GHz - 11,969,549,964 instructions # 1.94 insn per cycle - 2.107518313 seconds time elapsed +TOTAL : 2.109541 sec + 6,165,675,874 cycles # 2.917 GHz + 11,969,858,026 instructions # 1.94 insn per cycle + 2.114857829 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2511) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.378968e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.795139e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.795139e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.370200e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.777020e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.777020e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.051593 sec - 6,022,120,306 cycles # 2.928 GHz - 11,142,964,439 instructions # 1.85 insn per cycle - 2.057280159 seconds time elapsed +TOTAL : 2.055254 sec + 5,996,913,166 cycles # 2.911 GHz + 11,143,393,215 instructions # 1.86 insn per cycle + 2.060568204 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 174) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.268812e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.526768e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.526768e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.195526e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.443172e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.443172e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.559507 sec - 5,222,984,898 cycles # 2.037 GHz - 9,033,113,449 instructions # 1.73 insn per cycle - 2.565156483 seconds time elapsed +TOTAL : 2.602515 sec + 5,216,067,649 cycles # 2.001 GHz + 9,033,452,178 instructions # 1.73 insn per cycle + 2.607965373 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 443766ae47..df5aa8ac18 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:10:37 +DATE: 2023-10-29_12:08:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.245334e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.581083e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.955429e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.068609e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.700374e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.971314e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.478876 sec - 2,114,450,363 cycles # 3.001 GHz - 2,995,809,201 instructions # 1.42 insn per cycle - 0.774010491 seconds time elapsed +TOTAL : 0.476640 sec + 1,995,636,481 cycles # 2.842 GHz + 2,842,978,749 instructions # 1.42 insn per cycle + 0.759763821 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.347045e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.423439e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.423439e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.337599e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.414470e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.414470e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.551732 sec - 14,163,211,343 cycles # 3.109 GHz - 38,393,750,672 instructions # 2.71 insn per cycle - 4.559626496 seconds time elapsed +TOTAL : 4.568864 sec + 14,161,294,667 cycles # 3.097 GHz + 38,393,773,103 instructions # 2.71 insn per cycle + 4.573929482 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.258787e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.694855e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.694855e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.238510e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.663923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.663923e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.077544 sec - 6,482,134,792 cycles # 3.112 GHz - 15,829,813,984 instructions # 2.44 insn per cycle - 2.092266384 seconds time elapsed +TOTAL : 2.084946 sec + 6,469,899,480 cycles # 3.097 GHz + 15,829,500,280 instructions # 2.45 insn per cycle + 2.089957611 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.088230e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.043336e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.043336e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.562959e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.098086e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.098086e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.239442 sec - 3,486,432,426 cycles # 2.809 GHz - 7,609,890,231 instructions # 2.18 insn per cycle - 1.254380313 seconds time elapsed +TOTAL : 1.179401 sec + 3,459,426,980 cycles # 2.925 GHz + 7,607,068,208 instructions # 2.20 insn per cycle + 1.184590061 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.032911e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.202298e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.202298e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.027581e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.192505e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.192505e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.098127 sec - 3,254,731,502 cycles # 2.949 GHz - 7,215,829,075 instructions # 2.22 insn per cycle - 1.112790866 seconds time elapsed +TOTAL : 1.102233 sec + 3,244,602,392 cycles # 2.932 GHz + 7,214,608,245 instructions # 2.22 insn per cycle + 1.107242684 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.290959e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.095325e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.095325e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.608210e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.462624e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.462624e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.524399 sec - 3,070,579,488 cycles # 2.007 GHz - 5,846,226,621 instructions # 1.90 insn per cycle - 1.538728141 seconds time elapsed +TOTAL : 1.460661 sec + 3,052,035,122 cycles # 2.083 GHz + 5,845,582,428 instructions # 1.92 insn per cycle + 1.465683285 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 0d00880b5b..b5be0b4e18 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:55:01 +DATE: 2023-10-29_12:36:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.271504e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.799925e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.799925e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.895050e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.686304e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.686304e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.661589 sec - 2,684,049,402 cycles # 3.019 GHz - 4,132,808,227 instructions # 1.54 insn per cycle - 0.945716932 seconds time elapsed +TOTAL : 0.669131 sec + 2,666,038,605 cycles # 2.984 GHz + 4,131,575,976 instructions # 1.55 insn per cycle + 0.951764894 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,14 +78,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.328083e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.403078e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.403078e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.329231e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.405966e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.405966e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.645191 sec - 14,343,709,354 cycles # 3.094 GHz - 38,435,752,660 instructions # 2.68 insn per cycle - 4.651412931 seconds time elapsed +TOTAL : 4.626382 sec + 14,341,300,654 cycles # 3.097 GHz + 38,435,549,708 instructions # 2.68 insn per cycle + 4.632329822 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -105,14 +105,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.177869e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.599623e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.599623e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.192819e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.611750e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.611750e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.154265 sec - 6,685,752,776 cycles # 3.098 GHz - 16,110,356,106 instructions # 2.41 insn per cycle - 2.160882399 seconds time elapsed +TOTAL : 2.145322 sec + 6,664,477,255 cycles # 3.099 GHz + 16,109,970,801 instructions # 2.42 insn per cycle + 2.151392542 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -132,14 +132,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.429674e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.082873e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.082873e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.486569e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.087327e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.087327e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.240483 sec - 3,678,582,112 cycles # 2.952 GHz - 7,844,440,175 instructions # 2.13 insn per cycle - 1.247230152 seconds time elapsed +TOTAL : 1.231584 sec + 3,644,407,413 cycles # 2.947 GHz + 7,844,021,819 instructions # 2.15 insn per cycle + 1.237558832 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -159,14 +159,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.009134e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.170165e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.170165e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.007607e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169689e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.169689e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.166839 sec - 3,458,050,283 cycles # 2.949 GHz - 7,453,516,502 instructions # 2.16 insn per cycle - 1.173343420 seconds time elapsed +TOTAL : 1.167898 sec + 3,439,867,530 cycles # 2.932 GHz + 7,453,045,112 instructions # 2.17 insn per cycle + 1.173934717 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.550922e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.388839e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.388839e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.499425e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.333958e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.333958e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.518265 sec - 3,271,259,339 cycles # 2.147 GHz - 6,100,060,320 instructions # 1.86 insn per cycle - 1.524663767 seconds time elapsed +TOTAL : 1.525803 sec + 3,266,944,202 cycles # 2.134 GHz + 6,100,961,311 instructions # 1.87 insn per cycle + 1.531830770 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 8e628a3dfb..0acb7a9e73 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_13:07:24 +DATE: 2023-10-29_12:49:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.850693e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.663079e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.968425e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.844201e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.663361e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.970041e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.554614 sec - 2,337,932,283 cycles # 3.030 GHz - 3,419,248,560 instructions # 1.46 insn per cycle - 0.830772253 seconds time elapsed +TOTAL : 0.554826 sec + 2,327,069,146 cycles # 3.010 GHz + 3,431,677,365 instructions # 1.47 insn per cycle + 0.830631626 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.359790e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.436644e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.436644e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.321408e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.398513e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.398513e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.578859 sec - 14,314,881,164 cycles # 3.123 GHz - 38,421,819,073 instructions # 2.68 insn per cycle - 4.583864809 seconds time elapsed +TOTAL : 4.656867 sec + 14,315,574,695 cycles # 3.072 GHz + 38,422,268,817 instructions # 2.68 insn per cycle + 4.662075448 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.282449e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.720847e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.720847e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.231554e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.659617e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.659617e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.119646 sec - 6,638,203,667 cycles # 3.125 GHz - 15,842,045,343 instructions # 2.39 insn per cycle - 2.124745744 seconds time elapsed +TOTAL : 2.139911 sec + 6,633,974,828 cycles # 3.094 GHz + 15,842,107,200 instructions # 2.39 insn per cycle + 2.145051071 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.312507e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.067826e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.067826e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.165587e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.054986e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.054986e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.263628 sec - 3,637,578,133 cycles # 2.869 GHz - 7,591,123,457 instructions # 2.09 insn per cycle - 1.268701108 seconds time elapsed +TOTAL : 1.286221 sec + 3,615,431,093 cycles # 2.801 GHz + 7,591,036,822 instructions # 2.10 insn per cycle + 1.291338242 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.030288e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.199567e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.199567e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.026813e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.192367e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.192367e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.154743 sec - 3,427,351,661 cycles # 2.957 GHz - 7,165,042,622 instructions # 2.09 insn per cycle - 1.159809196 seconds time elapsed +TOTAL : 1.160600 sec + 3,412,626,284 cycles # 2.929 GHz + 7,165,387,866 instructions # 2.10 insn per cycle + 1.165674406 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.696445e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.576864e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.576864e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.382760e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.206265e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.206265e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.499358 sec - 3,225,858,049 cycles # 2.146 GHz - 5,796,456,708 instructions # 1.80 insn per cycle - 1.504415012 seconds time elapsed +TOTAL : 1.556726 sec + 3,230,120,120 cycles # 2.069 GHz + 5,796,867,387 instructions # 1.79 insn per cycle + 1.561748387 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index 3bd9388dc5..a7a3d3f332 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_13:04:15 +DATE: 2023-10-29_12:46:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.731022e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.640015e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.941204e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.856308e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.667298e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.972666e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.502219 sec - 2,186,499,046 cycles # 3.031 GHz - 3,425,706,749 instructions # 1.57 insn per cycle - 0.778799067 seconds time elapsed +TOTAL : 0.502947 sec + 2,165,124,054 cycles # 3.003 GHz + 3,377,369,122 instructions # 1.56 insn per cycle + 0.778634889 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.336409e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.412455e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.412455e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.334502e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.411133e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.411133e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.570402 sec - 14,291,029,733 cycles # 3.124 GHz - 38,392,351,818 instructions # 2.69 insn per cycle - 4.575464355 seconds time elapsed +TOTAL : 4.574512 sec + 14,158,114,653 cycles # 3.092 GHz + 38,394,139,315 instructions # 2.71 insn per cycle + 4.579594412 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.271732e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.704955e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.704955e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.120883e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.529305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.529305e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.072263 sec - 6,475,536,164 cycles # 3.118 GHz - 15,829,377,005 instructions # 2.44 insn per cycle - 2.077475049 seconds time elapsed +TOTAL : 2.130821 sec + 6,475,593,553 cycles # 3.033 GHz + 15,829,500,247 instructions # 2.44 insn per cycle + 2.135849031 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.640298e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.109649e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.109649e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.472598e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.087489e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.087489e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.169660 sec - 3,471,993,263 cycles # 2.958 GHz - 7,606,901,908 instructions # 2.19 insn per cycle - 1.174764224 seconds time elapsed +TOTAL : 1.191416 sec + 3,457,290,525 cycles # 2.891 GHz + 7,606,423,962 instructions # 2.20 insn per cycle + 1.196523405 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.030442e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204624e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.204624e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.029636e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.194772e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.194772e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.100002 sec - 3,266,709,570 cycles # 2.958 GHz - 7,214,743,597 instructions # 2.21 insn per cycle - 1.105174715 seconds time elapsed +TOTAL : 1.100935 sec + 3,247,410,753 cycles # 2.938 GHz + 7,216,104,427 instructions # 2.22 insn per cycle + 1.106041400 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.680219e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.568964e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.568964e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.601959e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.469102e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.469102e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.447359 sec - 3,071,044,719 cycles # 2.116 GHz - 5,845,849,973 instructions # 1.90 insn per cycle - 1.452398624 seconds time elapsed +TOTAL : 1.460971 sec + 3,065,417,357 cycles # 2.092 GHz + 5,846,314,702 instructions # 1.91 insn per cycle + 1.466145311 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index d0ec1a38b7..8ab92e024f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_13:01:10 +DATE: 2023-10-29_12:43:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,14 +45,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.738979e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.639875e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.941887e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.120390e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.647328e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.944985e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.608287 sec - 2,467,062,087 cycles # 2.979 GHz - 3,757,858,393 instructions # 1.52 insn per cycle - 0.885651916 seconds time elapsed +TOTAL : 0.602037 sec + 2,439,189,303 cycles # 2.975 GHz + 3,768,649,868 instructions # 1.55 insn per cycle + 0.877319776 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -71,14 +71,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.349040e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.426585e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.426585e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.334080e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.410077e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.410077e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.547341 sec - 14,160,240,733 cycles # 3.112 GHz - 38,393,288,556 instructions # 2.71 insn per cycle - 4.552620696 seconds time elapsed +TOTAL : 4.577301 sec + 14,152,405,326 cycles # 3.089 GHz + 38,392,358,857 instructions # 2.71 insn per cycle + 4.582307407 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -97,14 +97,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.243740e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.688643e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.688643e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.230806e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.656536e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.656536e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.083146 sec - 6,486,996,800 cycles # 3.107 GHz - 15,829,233,655 instructions # 2.44 insn per cycle - 2.088258908 seconds time elapsed +TOTAL : 2.088193 sec + 6,472,075,371 cycles # 3.093 GHz + 15,830,276,009 instructions # 2.45 insn per cycle + 2.093482433 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -123,14 +123,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.559475e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.100525e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.100525e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.567124e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.097679e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.097679e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.179727 sec - 3,480,472,202 cycles # 2.939 GHz - 7,606,629,154 instructions # 2.19 insn per cycle - 1.185041458 seconds time elapsed +TOTAL : 1.180111 sec + 3,461,207,927 cycles # 2.925 GHz + 7,606,883,443 instructions # 2.20 insn per cycle + 1.185275654 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.029601e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.198465e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.198465e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.023458e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188704e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188704e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.101379 sec - 3,263,097,653 cycles # 2.951 GHz - 7,215,542,606 instructions # 2.21 insn per cycle - 1.106656369 seconds time elapsed +TOTAL : 1.108734 sec + 3,249,015,929 cycles # 2.919 GHz + 7,215,561,707 instructions # 2.22 insn per cycle + 1.113795101 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -175,14 +175,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.313836e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.156037e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.156037e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.172083e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.957523e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.957523e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.519219 sec - 3,074,840,033 cycles # 2.018 GHz - 5,845,496,594 instructions # 1.90 insn per cycle - 1.524648411 seconds time elapsed +TOTAL : 1.547659 sec + 3,061,807,613 cycles # 1.974 GHz + 5,846,435,749 instructions # 1.91 insn per cycle + 1.552775097 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 34bf784874..4a15365f27 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:10:59 +DATE: 2023-10-29_12:08:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.265049e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.609501e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.998796e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.073100e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.732202e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.024409e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.480347 sec - 2,111,279,511 cycles # 2.994 GHz - 3,024,090,036 instructions # 1.43 insn per cycle - 0.784163260 seconds time elapsed +TOTAL : 0.472590 sec + 2,049,324,950 cycles # 2.965 GHz + 2,988,118,481 instructions # 1.46 insn per cycle + 0.749805878 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.303169e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.377069e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.377069e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.286965e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.360277e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.360277e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.636900 sec - 14,424,355,368 cycles # 3.108 GHz - 39,885,077,384 instructions # 2.77 insn per cycle - 4.644496566 seconds time elapsed +TOTAL : 4.668841 sec + 14,415,454,850 cycles # 3.085 GHz + 39,884,803,964 instructions # 2.77 insn per cycle + 4.673868552 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.110121e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.708994e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.708994e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.067369e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.650323e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.650323e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.799612 sec - 5,599,106,784 cycles # 3.102 GHz - 15,299,546,445 instructions # 2.73 insn per cycle - 1.816097337 seconds time elapsed +TOTAL : 1.811456 sec + 5,581,772,796 cycles # 3.075 GHz + 15,299,386,125 instructions # 2.74 insn per cycle + 1.816518063 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.842904e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.546331e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.546331e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.765373e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.445236e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.445236e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.615345 sec - 4,748,131,788 cycles # 2.929 GHz - 9,747,809,707 instructions # 2.05 insn per cycle - 1.630160731 seconds time elapsed +TOTAL : 1.632307 sec + 4,737,058,603 cycles # 2.894 GHz + 9,747,529,930 instructions # 2.06 insn per cycle + 1.637455307 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3710) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.000621e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.743607e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.743607e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.802562e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.489807e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.489807e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.580656 sec - 4,645,306,717 cycles # 2.929 GHz - 9,339,785,375 instructions # 2.01 insn per cycle - 1.597293322 seconds time elapsed +TOTAL : 1.624786 sec + 4,624,929,511 cycles # 2.839 GHz + 9,339,461,956 instructions # 2.02 insn per cycle + 1.629929018 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.218809e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.778035e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.778035e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.198562e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.758096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.758096e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.768992 sec - 3,667,384,840 cycles # 2.067 GHz - 7,045,660,600 instructions # 1.92 insn per cycle - 1.780720848 seconds time elapsed +TOTAL : 1.775153 sec + 3,657,127,414 cycles # 2.055 GHz + 7,046,249,834 instructions # 1.93 insn per cycle + 1.780322386 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2606) (512y: 12) (512z: 2221) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index d1f9820c60..0b94ca6efd 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:45:51 +DATE: 2023-10-29_12:27:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.395245e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.633232e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.952696e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.748019e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.648137e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.948823e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.475068 sec - 2,125,544,542 cycles # 3.019 GHz - 3,039,422,682 instructions # 1.43 insn per cycle - 0.763100625 seconds time elapsed +TOTAL : 0.472128 sec + 2,084,648,086 cycles # 3.008 GHz + 3,003,071,400 instructions # 1.44 insn per cycle + 0.750693235 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.635104e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.731129e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.731129e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.622703e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.718199e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.718199e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.063498 sec - 12,606,497,091 cycles # 3.099 GHz - 34,393,343,234 instructions # 2.73 insn per cycle - 4.068923017 seconds time elapsed +TOTAL : 4.080807 sec + 12,597,466,937 cycles # 3.084 GHz + 34,393,194,192 instructions # 2.73 insn per cycle + 4.085827485 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.421440e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.912100e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.912100e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.593372e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.080103e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.080103e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.020800 sec - 6,096,062,133 cycles # 3.010 GHz - 14,873,944,377 instructions # 2.44 insn per cycle - 2.026273725 seconds time elapsed +TOTAL : 1.957639 sec + 6,081,852,390 cycles # 3.100 GHz + 14,874,109,615 instructions # 2.45 insn per cycle + 1.962732822 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.606550e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.476550e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.476550e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.283773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.100706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.100706e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.461499 sec - 4,282,881,759 cycles # 2.921 GHz - 9,041,483,422 instructions # 2.11 insn per cycle - 1.466874367 seconds time elapsed +TOTAL : 1.523796 sec + 4,273,696,688 cycles # 2.804 GHz + 9,044,872,196 instructions # 2.12 insn per cycle + 1.528963268 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4445) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.796123e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.704814e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.704814e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.673227e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.553201e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.553201e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.427681 sec - 4,207,358,021 cycles # 2.938 GHz - 8,675,881,802 instructions # 2.06 insn per cycle - 1.433026005 seconds time elapsed +TOTAL : 1.450509 sec + 4,204,067,849 cycles # 2.891 GHz + 8,677,293,067 instructions # 2.06 insn per cycle + 1.455611235 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.812928e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.296844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.296844e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.766332e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.255676e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.255676e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.886484 sec - 3,840,034,259 cycles # 2.031 GHz - 7,819,762,172 instructions # 2.04 insn per cycle - 1.892012346 seconds time elapsed +TOTAL : 1.903201 sec + 3,830,827,038 cycles # 2.008 GHz + 7,820,163,115 instructions # 2.04 insn per cycle + 1.908322170 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 0) (512z: 2556) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 6d76428629..0c4c8a0ffc 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:46:14 +DATE: 2023-10-29_12:28:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.439177e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.664854e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.005028e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.862319e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.711280e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.025159e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.474639 sec - 2,123,500,718 cycles # 3.023 GHz - 3,021,126,916 instructions # 1.42 insn per cycle - 0.761473128 seconds time elapsed +TOTAL : 0.472511 sec + 2,088,068,567 cycles # 3.006 GHz + 3,003,865,089 instructions # 1.44 insn per cycle + 0.751825355 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.805862e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.915686e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.915686e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.787349e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.894785e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.894785e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 3.820220 sec - 11,763,429,045 cycles # 3.076 GHz - 35,129,372,862 instructions # 2.99 insn per cycle - 3.825554200 seconds time elapsed +TOTAL : 3.845279 sec + 11,754,758,395 cycles # 3.054 GHz + 35,130,095,118 instructions # 2.99 insn per cycle + 3.850208573 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.732850e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.252188e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.252188e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.718455e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.230618e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.230618e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.912996 sec - 5,953,621,834 cycles # 3.105 GHz - 14,483,605,781 instructions # 2.43 insn per cycle - 1.918402844 seconds time elapsed +TOTAL : 1.917322 sec + 5,948,729,930 cycles # 3.096 GHz + 14,483,816,532 instructions # 2.43 insn per cycle + 1.922400974 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.835313e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.791994e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.791994e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.874050e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.799685e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.799685e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.419591 sec - 4,168,513,241 cycles # 2.927 GHz - 8,887,420,483 instructions # 2.13 insn per cycle - 1.425011758 seconds time elapsed +TOTAL : 1.413124 sec + 4,150,858,124 cycles # 2.928 GHz + 8,888,765,602 instructions # 2.14 insn per cycle + 1.418367650 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3576) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.943761e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.903919e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.903919e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.916161e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.854206e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.854206e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.402150 sec - 4,141,058,901 cycles # 2.943 GHz - 8,423,656,753 instructions # 2.03 insn per cycle - 1.407495281 seconds time elapsed +TOTAL : 1.406270 sec + 4,132,950,739 cycles # 2.930 GHz + 8,425,066,086 instructions # 2.04 insn per cycle + 1.411315775 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3320) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.010939e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.537087e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.537087e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.981602e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.498052e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.498052e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.827232 sec - 3,785,415,923 cycles # 2.066 GHz - 7,712,477,428 instructions # 2.04 insn per cycle - 1.832679832 seconds time elapsed +TOTAL : 1.835868 sec + 3,793,142,232 cycles # 2.062 GHz + 7,713,295,934 instructions # 2.03 insn per cycle + 1.841064820 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3436) (512y: 0) (512z: 2108) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 2f0874ecfc..3105030e00 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:11:23 +DATE: 2023-10-29_12:08:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.015001e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.133918e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.271091e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.131497e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.176298e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.272818e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.542650 sec - 2,324,509,629 cycles # 2.959 GHz - 3,256,545,431 instructions # 1.40 insn per cycle - 0.857692926 seconds time elapsed +TOTAL : 0.511653 sec + 2,228,669,200 cycles # 3.008 GHz + 3,208,057,648 instructions # 1.44 insn per cycle + 0.799834209 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.184366e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.248000e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.248000e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.156329e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.218093e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.218093e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.904088 sec - 15,277,347,047 cycles # 3.112 GHz - 38,637,646,747 instructions # 2.53 insn per cycle - 4.912776955 seconds time elapsed +TOTAL : 4.966196 sec + 15,259,732,402 cycles # 3.070 GHz + 38,638,245,819 instructions # 2.53 insn per cycle + 4.971450935 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.683087e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.882735e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.882735e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.635241e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.828042e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.828042e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.950252 sec - 8,963,470,963 cycles # 3.033 GHz - 24,239,446,713 instructions # 2.70 insn per cycle - 2.964919841 seconds time elapsed +TOTAL : 2.987841 sec + 8,928,273,750 cycles # 2.984 GHz + 24,239,215,531 instructions # 2.71 insn per cycle + 2.993088872 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.870970e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.393157e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.393157e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.602670e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.086544e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.086544e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.890520 sec - 5,524,353,941 cycles # 2.913 GHz - 11,287,940,554 instructions # 2.04 insn per cycle - 1.907409042 seconds time elapsed +TOTAL : 1.978132 sec + 5,419,631,994 cycles # 2.733 GHz + 11,287,865,943 instructions # 2.08 insn per cycle + 1.983532568 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.785531e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.472682e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.472682e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.761038e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.429385e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.429385e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.648735 sec - 4,852,342,037 cycles # 2.933 GHz - 10,537,728,595 instructions # 2.17 insn per cycle - 1.665451136 seconds time elapsed +TOTAL : 1.653810 sec + 4,846,105,915 cycles # 2.924 GHz + 10,538,364,912 instructions # 2.17 insn per cycle + 1.659126356 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.233249e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.487319e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.487319e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.255470e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.509653e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.509653e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.582104 sec - 5,221,633,566 cycles # 2.018 GHz - 7,614,459,247 instructions # 1.46 insn per cycle - 2.598287782 seconds time elapsed +TOTAL : 2.566802 sec + 5,210,176,310 cycles # 2.027 GHz + 7,615,240,991 instructions # 1.46 insn per cycle + 2.572256684 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 11aa7f6ec6..86fd27a7a6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-28_12:11:49 +DATE: 2023-10-29_12:09:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesse Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.022201e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.140606e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.278579e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.147140e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.180559e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277278e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.525470 sec - 2,284,667,887 cycles # 3.001 GHz - 3,270,255,248 instructions # 1.43 insn per cycle - 0.827459749 seconds time elapsed +TOTAL : 0.511444 sec + 2,244,424,459 cycles # 3.012 GHz + 3,230,920,098 instructions # 1.44 insn per cycle + 0.802219968 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -69,14 +69,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.135764e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.196535e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.196535e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.160319e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.222253e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.222253e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.013317 sec - 15,396,741,584 cycles # 3.072 GHz - 40,435,149,300 instructions # 2.63 insn per cycle - 5.021167793 seconds time elapsed +TOTAL : 4.957043 sec + 15,381,768,580 cycles # 3.101 GHz + 40,434,677,859 instructions # 2.63 insn per cycle + 4.962325085 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe @@ -95,14 +95,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.949201e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.190131e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.190131e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.984906e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.218127e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.218127e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.758906 sec - 8,507,837,136 cycles # 3.077 GHz - 23,269,627,166 instructions # 2.74 insn per cycle - 2.773638911 seconds time elapsed +TOTAL : 2.734927 sec + 8,488,131,666 cycles # 3.099 GHz + 23,269,666,895 instructions # 2.74 insn per cycle + 2.740313634 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.906071e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.272590e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.272590e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.169260e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.554990e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.554990e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.242681 sec - 6,250,535,355 cycles # 2.780 GHz - 12,974,014,205 instructions # 2.08 insn per cycle - 2.256899417 seconds time elapsed +TOTAL : 2.131213 sec + 6,244,993,149 cycles # 2.925 GHz + 12,974,275,312 instructions # 2.08 insn per cycle + 2.136479402 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe @@ -147,14 +147,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.416534e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.841079e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.841079e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.404477e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.818375e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.818375e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.038525 sec - 5,936,264,395 cycles # 2.904 GHz - 12,249,816,592 instructions # 2.06 insn per cycle - 2.053886656 seconds time elapsed +TOTAL : 2.042856 sec + 5,910,711,102 cycles # 2.887 GHz + 12,249,599,385 instructions # 2.07 insn per cycle + 2.048130482 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe @@ -173,14 +173,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.902385e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.117912e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.117912e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.919702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.138324e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.138324e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.791286 sec - 5,622,605,195 cycles # 2.011 GHz - 8,753,644,763 instructions # 1.56 insn per cycle - 2.807743774 seconds time elapsed +TOTAL : 2.777954 sec + 5,597,813,424 cycles # 2.012 GHz + 8,753,565,299 instructions # 1.56 insn per cycle + 2.783109376 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index ac3a60d645..901d2a6b79 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-28_12:12:16 +DATE: 2023-10-29_12:09:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.491851e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.048489e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.065167e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.965049e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.053793e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.067077e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.464403 sec - 2,055,005,532 cycles # 3.003 GHz - 2,945,523,651 instructions # 1.43 insn per cycle - 0.763693915 seconds time elapsed +TOTAL : 0.459845 sec + 2,017,798,845 cycles # 3.006 GHz + 2,871,664,576 instructions # 1.42 insn per cycle + 0.730427502 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.041676e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319252e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.336391e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.121331e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.323555e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.335068e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.631721 sec - 2,560,791,285 cycles # 2.938 GHz - 3,869,794,857 instructions # 1.51 insn per cycle - 0.932528663 seconds time elapsed +TOTAL : 0.588290 sec + 2,481,557,979 cycles # 3.021 GHz + 3,747,764,070 instructions # 1.51 insn per cycle + 0.880057164 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.590737e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.603375e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.603375e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.552316e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.564667e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.564667e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.346844 sec - 19,691,623,987 cycles # 3.101 GHz - 59,609,931,902 instructions # 3.03 insn per cycle - 6.353105445 seconds time elapsed +TOTAL : 6.441317 sec + 19,792,438,479 cycles # 3.071 GHz + 59,610,211,281 instructions # 3.01 insn per cycle + 6.445390765 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.850627e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.895082e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.895082e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.806418e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.849994e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.849994e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.399637 sec - 10,373,383,384 cycles # 3.048 GHz - 30,679,256,260 instructions # 2.96 insn per cycle - 3.411245943 seconds time elapsed +TOTAL : 3.430990 sec + 10,359,487,693 cycles # 3.016 GHz + 30,678,330,855 instructions # 2.96 insn per cycle + 3.435079540 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.906673e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.008657e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.008657e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.900243e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.008050e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.008050e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.675867 sec - 4,881,672,927 cycles # 2.905 GHz - 11,022,284,259 instructions # 2.26 insn per cycle - 1.690244837 seconds time elapsed +TOTAL : 1.676763 sec + 4,883,160,122 cycles # 2.907 GHz + 11,021,724,288 instructions # 2.26 insn per cycle + 1.680921849 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.106994e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.129605e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.129605e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.107653e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.130101e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.130101e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.502203 sec - 4,365,694,763 cycles # 2.898 GHz - 10,299,343,112 instructions # 2.36 insn per cycle - 1.520133716 seconds time elapsed +TOTAL : 1.501039 sec + 4,367,829,020 cycles # 2.903 GHz + 10,298,584,119 instructions # 2.36 insn per cycle + 1.505199219 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.816900e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.928266e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.928266e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.510567e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.621371e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.621371e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.120206 sec - 4,099,203,773 cycles # 1.930 GHz - 5,846,578,086 instructions # 1.43 insn per cycle - 2.134050769 seconds time elapsed +TOTAL : 2.206299 sec + 4,102,761,181 cycles # 1.857 GHz + 5,845,915,507 instructions # 1.42 insn per cycle + 2.210468411 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 4842553fb4..cd7df250dc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-28_12:55:24 +DATE: 2023-10-29_12:37:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.651766e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.773341e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.773341e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.693217e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.926702e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.926702e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.486524 sec - 2,128,896,563 cycles # 3.007 GHz - 3,188,482,728 instructions # 1.50 insn per cycle - 0.764931683 seconds time elapsed +TOTAL : 0.487867 sec + 2,121,769,325 cycles # 3.008 GHz + 3,150,750,186 instructions # 1.48 insn per cycle + 0.763645513 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.775476e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.664612e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.664612e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.791626e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.637606e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.637606e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.810802 sec - 3,201,081,444 cycles # 3.028 GHz - 5,073,955,185 instructions # 1.59 insn per cycle - 1.119893797 seconds time elapsed +TOTAL : 0.807637 sec + 3,237,204,957 cycles # 3.044 GHz + 5,104,521,537 instructions # 1.58 insn per cycle + 1.126122223 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.560332e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.572938e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.572938e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.554780e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.567292e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.567292e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.428951 sec - 19,727,712,174 cycles # 3.067 GHz - 59,617,621,937 instructions # 3.02 insn per cycle - 6.433180767 seconds time elapsed +TOTAL : 6.442685 sec + 19,734,537,817 cycles # 3.065 GHz + 59,621,480,724 instructions # 3.02 insn per cycle + 6.446840072 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -122,14 +122,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.907398e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.953143e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.953143e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.790530e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.835100e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.835100e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.367858 sec - 10,401,729,083 cycles # 3.085 GHz - 30,728,311,698 instructions # 2.95 insn per cycle - 3.372306535 seconds time elapsed +TOTAL : 3.448787 sec + 10,392,677,425 cycles # 3.010 GHz + 30,726,890,032 instructions # 2.96 insn per cycle + 3.453282456 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.870573e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.005545e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005545e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.761124e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.945111e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.945111e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.688103 sec - 4,916,085,294 cycles # 2.906 GHz - 11,070,223,626 instructions # 2.25 insn per cycle - 1.692497649 seconds time elapsed +TOTAL : 1.707553 sec + 4,918,690,921 cycles # 2.875 GHz + 11,070,254,110 instructions # 2.25 insn per cycle + 1.711850509 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -176,14 +176,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.076865e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.099535e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.099535e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.105869e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.128347e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.128347e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.550953 sec - 4,408,008,369 cycles # 2.835 GHz - 10,347,644,406 instructions # 2.35 insn per cycle - 1.555438151 seconds time elapsed +TOTAL : 1.510363 sec + 4,398,214,508 cycles # 2.905 GHz + 10,349,345,987 instructions # 2.35 insn per cycle + 1.514719920 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -203,14 +203,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.815543e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.928414e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.928414e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.811562e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.924385e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.924385e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.127425 sec - 4,135,455,550 cycles # 1.941 GHz - 5,885,269,641 instructions # 1.42 insn per cycle - 2.131933938 seconds time elapsed +TOTAL : 2.128054 sec + 4,134,017,356 cycles # 1.939 GHz + 5,885,473,567 instructions # 1.42 insn per cycle + 2.132603203 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index cf30fbcb6b..1c0f440c0f 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-28_12:12:45 +DATE: 2023-10-29_12:10:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.459634e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.041339e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.057620e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.914584e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.046212e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059276e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.461274 sec - 2,035,058,176 cycles # 2.994 GHz - 2,910,655,702 instructions # 1.43 insn per cycle - 0.748318644 seconds time elapsed +TOTAL : 0.458950 sec + 2,025,257,806 cycles # 3.014 GHz + 2,838,342,308 instructions # 1.40 insn per cycle + 0.729521236 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.036992e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.311164e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.327920e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.114844e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.314618e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.326004e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.636889 sec - 2,559,684,656 cycles # 2.916 GHz - 3,844,382,418 instructions # 1.50 insn per cycle - 0.939027525 seconds time elapsed +TOTAL : 0.588803 sec + 2,478,020,834 cycles # 3.022 GHz + 3,797,746,550 instructions # 1.53 insn per cycle + 0.880923119 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.623434e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.636310e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.636310e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.622100e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.634932e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.634932e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.267582 sec - 19,473,438,826 cycles # 3.105 GHz - 58,801,451,427 instructions # 3.02 insn per cycle - 6.273946156 seconds time elapsed +TOTAL : 6.270409 sec + 19,469,015,028 cycles # 3.103 GHz + 58,801,088,362 instructions # 3.02 insn per cycle + 6.274552233 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.962469e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.008667e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.008667e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.995468e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.041449e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.041449e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.324999 sec - 10,252,488,773 cycles # 3.081 GHz - 30,350,737,589 instructions # 2.96 insn per cycle - 3.337113139 seconds time elapsed +TOTAL : 3.301245 sec + 10,235,413,277 cycles # 3.097 GHz + 30,349,704,494 instructions # 2.97 insn per cycle + 3.305304820 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.439128e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.606330e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.606330e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.573009e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.744339e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.744339e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.757963 sec - 5,047,493,240 cycles # 2.866 GHz - 11,487,255,325 instructions # 2.28 insn per cycle - 1.853025437 seconds time elapsed +TOTAL : 1.733129 sec + 5,040,264,850 cycles # 2.903 GHz + 11,485,470,427 instructions # 2.28 insn per cycle + 1.737235033 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.719624e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.915479e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.915479e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.040240e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.060102e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.060102e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.709373 sec - 4,660,423,247 cycles # 2.721 GHz - 10,845,585,186 instructions # 2.33 insn per cycle - 1.724795893 seconds time elapsed +TOTAL : 1.596698 sec + 4,645,794,162 cycles # 2.904 GHz + 10,844,658,112 instructions # 2.33 insn per cycle + 1.600851477 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.690449e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.805921e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.805921e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.683288e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.795724e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.795724e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.154846 sec - 4,120,296,518 cycles # 1.909 GHz - 6,110,489,071 instructions # 1.48 insn per cycle - 2.165568305 seconds time elapsed +TOTAL : 2.157094 sec + 4,112,611,788 cycles # 1.904 GHz + 6,110,068,673 instructions # 1.49 insn per cycle + 2.161179215 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 0e3d8cfa3e..17451a57e1 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-28_12:13:13 +DATE: 2023-10-29_12:10:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.345370e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.273428e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.386360e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.540736e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.325871e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.412262e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.445126 sec - 1,976,167,592 cycles # 2.999 GHz - 2,793,525,701 instructions # 1.41 insn per cycle - 0.728815394 seconds time elapsed +TOTAL : 0.439495 sec + 1,960,974,135 cycles # 3.010 GHz + 2,773,234,745 instructions # 1.41 insn per cycle + 0.708896009 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.016231e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.359096e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.456445e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.414003e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.468594e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.537406e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.503583 sec - 2,178,928,712 cycles # 2.984 GHz - 3,140,973,434 instructions # 1.44 insn per cycle - 0.787862044 seconds time elapsed +TOTAL : 0.487228 sec + 2,128,818,676 cycles # 3.001 GHz + 3,089,467,042 instructions # 1.45 insn per cycle + 0.767090383 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.651079e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.664793e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.664793e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.655254e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.668863e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.668863e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.201173 sec - 19,089,624,643 cycles # 3.077 GHz - 58,966,843,094 instructions # 3.09 insn per cycle - 6.209145363 seconds time elapsed +TOTAL : 6.191108 sec + 19,080,061,273 cycles # 3.080 GHz + 58,965,657,443 instructions # 3.09 insn per cycle + 6.195129418 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.775960e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.926769e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.926769e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.508212e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.656701e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.656701e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.887122 sec - 5,852,693,907 cycles # 3.095 GHz - 16,697,748,914 instructions # 2.85 insn per cycle - 1.897838524 seconds time elapsed +TOTAL : 1.946211 sec + 5,855,964,511 cycles # 3.004 GHz + 16,697,592,998 instructions # 2.85 insn per cycle + 1.950434492 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5766) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.888328e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.956615e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.956615e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.883140e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.951820e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.951820e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.888101 sec - 2,589,524,409 cycles # 2.903 GHz - 5,983,645,878 instructions # 2.31 insn per cycle - 0.900109709 seconds time elapsed +TOTAL : 0.890090 sec + 2,589,711,743 cycles # 2.898 GHz + 5,983,428,902 instructions # 2.31 insn per cycle + 0.894597805 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.084640e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.169189e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.169189e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.992770e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.073104e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.073104e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.806247 sec - 2,349,102,230 cycles # 2.899 GHz - 5,606,066,476 instructions # 2.39 insn per cycle - 0.818170477 seconds time elapsed +TOTAL : 0.843057 sec + 2,349,625,279 cycles # 2.776 GHz + 5,605,182,274 instructions # 2.39 insn per cycle + 0.847063086 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.602986e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.653593e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.653593e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.587481e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.635187e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.635187e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.044676 sec - 2,054,220,665 cycles # 1.959 GHz - 3,336,552,151 instructions # 1.62 insn per cycle - 1.055605597 seconds time elapsed +TOTAL : 1.053998 sec + 2,052,367,530 cycles # 1.941 GHz + 3,335,862,386 instructions # 1.63 insn per cycle + 1.057979764 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2131) (512y: 39) (512z: 3668) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 00167e9c33..991838f582 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-28_12:55:52 +DATE: 2023-10-29_12:37:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.962239e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.083211e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.083211e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.108982e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.182336e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.182336e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.454514 sec - 1,995,648,721 cycles # 3.004 GHz - 2,959,368,603 instructions # 1.48 insn per cycle - 0.721515462 seconds time elapsed +TOTAL : 0.453117 sec + 1,988,494,564 cycles # 3.009 GHz + 2,951,709,903 instructions # 1.48 insn per cycle + 0.719180627 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.823828e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.651858e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.651858e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.832102e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.641855e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.641855e+07 ) sec^-1 MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.625007 sec - 2,587,638,379 cycles # 3.027 GHz - 3,960,735,556 instructions # 1.53 insn per cycle - 0.911552982 seconds time elapsed +TOTAL : 0.627359 sec + 2,595,598,338 cycles # 3.023 GHz + 3,998,508,208 instructions # 1.54 insn per cycle + 0.916218226 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.653085e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.666871e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.666871e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.663266e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.676995e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.676995e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.205408 sec - 19,114,978,719 cycles # 3.081 GHz - 58,970,550,642 instructions # 3.09 insn per cycle - 6.209466695 seconds time elapsed +TOTAL : 6.175351 sec + 19,098,383,794 cycles # 3.091 GHz + 58,969,986,603 instructions # 3.09 insn per cycle + 6.179430024 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe @@ -122,14 +122,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.537033e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.687202e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.687202e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.759009e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.910670e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.910670e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.943360 sec - 5,877,835,029 cycles # 3.024 GHz - 16,747,344,219 instructions # 2.85 insn per cycle - 1.947590837 seconds time elapsed +TOTAL : 1.894239 sec + 5,869,520,004 cycles # 3.093 GHz + 16,744,812,460 instructions # 2.85 insn per cycle + 1.898329669 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5766) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.886015e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.954104e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.954104e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.880282e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.947896e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947896e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.892848 sec - 2,604,659,563 cycles # 2.906 GHz - 6,019,606,678 instructions # 2.31 insn per cycle - 0.896969494 seconds time elapsed +TOTAL : 0.895429 sec + 2,603,817,732 cycles # 2.896 GHz + 6,019,674,166 instructions # 2.31 insn per cycle + 0.899627318 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe @@ -176,14 +176,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.092631e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.175440e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.175440e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.079121e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.162628e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.162628e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.807239 sec - 2,364,999,004 cycles # 2.917 GHz - 5,642,015,436 instructions # 2.39 insn per cycle - 0.811339455 seconds time elapsed +TOTAL : 0.812546 sec + 2,366,541,582 cycles # 2.900 GHz + 5,641,622,908 instructions # 2.38 insn per cycle + 0.816726130 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe @@ -203,14 +203,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.607496e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.656928e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.656928e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.601201e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.649761e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.649761e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.045238 sec - 2,072,357,024 cycles # 1.976 GHz - 3,377,232,840 instructions # 1.63 insn per cycle - 1.049434643 seconds time elapsed +TOTAL : 1.049478 sec + 2,071,790,293 cycles # 1.968 GHz + 3,377,452,170 instructions # 1.63 insn per cycle + 1.053461167 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2131) (512y: 39) (512z: 3668) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 39040526ae..4594e108f9 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-28_12:13:37 +DATE: 2023-10-29_12:11:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.323194e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.236667e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.354926e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.564685e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.375281e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.467611e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.443972 sec - 1,989,313,875 cycles # 2.982 GHz - 2,769,265,611 instructions # 1.39 insn per cycle - 0.743438086 seconds time elapsed +TOTAL : 0.441255 sec + 1,955,562,909 cycles # 3.007 GHz + 2,787,251,095 instructions # 1.43 insn per cycle + 0.709632874 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.045346e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.393796e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.492274e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.351322e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.360521e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.430524e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.502060 sec - 2,167,795,646 cycles # 2.981 GHz - 3,129,751,096 instructions # 1.44 insn per cycle - 0.786114053 seconds time elapsed +TOTAL : 0.486324 sec + 2,131,058,818 cycles # 3.002 GHz + 3,095,610,657 instructions # 1.45 insn per cycle + 0.766843952 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.674630e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.688687e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.688687e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.669588e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.683524e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.683524e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.146503 sec - 19,066,983,066 cycles # 3.100 GHz - 58,708,098,711 instructions # 3.08 insn per cycle - 6.152927808 seconds time elapsed +TOTAL : 6.157456 sec + 18,971,452,097 cycles # 3.080 GHz + 58,707,978,021 instructions # 3.09 insn per cycle + 6.161349222 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.978466e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.142783e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.142783e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.176492e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.344165e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.344165e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.844883 sec - 5,595,751,190 cycles # 3.027 GHz - 16,514,228,946 instructions # 2.95 insn per cycle - 1.855734095 seconds time elapsed +TOTAL : 1.804802 sec + 5,594,484,187 cycles # 3.094 GHz + 16,513,342,463 instructions # 2.95 insn per cycle + 1.808828925 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5552) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.638498e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.689068e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.689068e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.641560e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.692808e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.692808e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 1.019971 sec - 2,975,890,134 cycles # 2.906 GHz - 6,637,794,121 instructions # 2.23 insn per cycle - 1.028369498 seconds time elapsed +TOTAL : 1.018009 sec + 2,972,835,880 cycles # 2.910 GHz + 6,636,773,662 instructions # 2.23 insn per cycle + 1.022091022 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5568) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.777694e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.837035e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.837035e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.775150e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.835465e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.835465e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.941610 sec - 2,755,233,571 cycles # 2.914 GHz - 6,258,993,798 instructions # 2.27 insn per cycle - 0.951640144 seconds time elapsed +TOTAL : 0.942822 sec + 2,750,756,278 cycles # 2.908 GHz + 6,258,176,694 instructions # 2.28 insn per cycle + 0.946897871 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5279) (512y: 25) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.459473e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.500290e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.500290e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.467022e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.508165e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.508165e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.145362 sec - 2,230,246,069 cycles # 1.940 GHz - 3,701,275,572 instructions # 1.66 insn per cycle - 1.156145462 seconds time elapsed +TOTAL : 1.138853 sec + 2,225,844,994 cycles # 1.949 GHz + 3,700,498,375 instructions # 1.66 insn per cycle + 1.142939136 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2378) (512y: 29) (512z: 3963) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 9ab422dbe3..f54a59d5fe 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-28_12:14:01 +DATE: 2023-10-29_12:11:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.410216e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.039188e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.055496e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.905164e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.044247e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.057360e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.463271 sec - 2,069,250,958 cycles # 3.003 GHz - 2,927,480,970 instructions # 1.41 insn per cycle - 0.772082200 seconds time elapsed +TOTAL : 0.462541 sec + 1,972,895,984 cycles # 2.929 GHz + 2,903,856,371 instructions # 1.47 insn per cycle + 0.732236757 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.035124e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.310358e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.327324e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.120425e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322230e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.333819e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.614913 sec - 2,526,856,265 cycles # 2.922 GHz - 3,709,441,692 instructions # 1.47 insn per cycle - 0.922727767 seconds time elapsed +TOTAL : 0.594644 sec + 2,492,989,257 cycles # 3.011 GHz + 3,840,726,093 instructions # 1.54 insn per cycle + 0.886407043 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.556257e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.568688e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.568688e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.530579e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.542965e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.542965e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.433051 sec - 20,008,557,894 cycles # 3.109 GHz - 60,540,006,479 instructions # 3.03 insn per cycle - 6.439212233 seconds time elapsed +TOTAL : 6.497943 sec + 20,014,842,539 cycles # 3.079 GHz + 60,540,154,499 instructions # 3.02 insn per cycle + 6.502158930 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.019070e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.065045e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.065045e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.010482e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.057204e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.057204e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.285972 sec - 10,187,016,958 cycles # 3.097 GHz - 30,390,859,032 instructions # 2.98 insn per cycle - 3.301352868 seconds time elapsed +TOTAL : 3.291332 sec + 10,183,408,251 cycles # 3.091 GHz + 30,388,862,594 instructions # 2.98 insn per cycle + 3.295386231 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5280) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.989285e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.017232e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.017232e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.889739e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.006782e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.006782e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.661873 sec - 4,866,855,961 cycles # 2.922 GHz - 10,982,611,406 instructions # 2.26 insn per cycle - 1.676714535 seconds time elapsed +TOTAL : 1.678792 sec + 4,866,504,720 cycles # 2.893 GHz + 10,982,241,057 instructions # 2.26 insn per cycle + 1.682949636 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4623) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.106943e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.129653e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.129653e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.131863e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155501e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155501e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.502413 sec - 4,275,650,140 cycles # 2.838 GHz - 10,251,987,528 instructions # 2.40 insn per cycle - 1.514545077 seconds time elapsed +TOTAL : 1.469235 sec + 4,275,763,092 cycles # 2.903 GHz + 10,250,853,336 instructions # 2.40 insn per cycle + 1.473398330 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4279) (512y: 82) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.717947e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.824687e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.824687e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.621843e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.728247e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.728247e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.146639 sec - 4,201,565,757 cycles # 1.954 GHz - 6,049,789,151 instructions # 1.44 insn per cycle - 2.158797101 seconds time elapsed +TOTAL : 2.173637 sec + 4,203,419,623 cycles # 1.931 GHz + 6,049,226,551 instructions # 1.44 insn per cycle + 2.177772868 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2065) (512y: 117) (512z: 3540) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 76b7b412f7..a14cca51af 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-28_12:14:30 +DATE: 2023-10-29_12:12:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.390550e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.033270e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.048966e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.852098e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.037871e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.050490e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.460476 sec - 2,081,307,934 cycles # 3.031 GHz - 2,955,584,789 instructions # 1.42 insn per cycle - 0.756578458 seconds time elapsed +TOTAL : 0.467984 sec + 1,998,838,445 cycles # 2.896 GHz + 2,912,610,966 instructions # 1.46 insn per cycle + 0.748728175 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.034797e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.302411e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.318796e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.108040e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.305385e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.316521e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.602596 sec - 2,555,613,252 cycles # 3.028 GHz - 3,854,786,899 instructions # 1.51 insn per cycle - 0.903400095 seconds time elapsed +TOTAL : 0.586972 sec + 2,485,257,138 cycles # 3.003 GHz + 3,752,100,969 instructions # 1.51 insn per cycle + 0.885534164 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.567199e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.579456e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.579456e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.553890e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.566027e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.566027e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.404936 sec - 19,891,783,124 cycles # 3.104 GHz - 59,941,724,942 instructions # 3.01 insn per cycle - 6.411494032 seconds time elapsed +TOTAL : 6.437822 sec + 19,867,702,905 cycles # 3.085 GHz + 59,941,439,802 instructions # 3.02 insn per cycle + 6.441937921 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.101693e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.150119e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.150119e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.902669e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.948654e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.948654e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.233107 sec - 10,067,635,537 cycles # 3.110 GHz - 30,103,976,009 instructions # 2.99 insn per cycle - 3.244612142 seconds time elapsed +TOTAL : 3.363921 sec + 10,066,901,567 cycles # 2.990 GHz + 30,102,442,654 instructions # 2.99 insn per cycle + 3.368170033 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5082) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.657022e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.831623e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.831623e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.655442e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.828218e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.828218e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.718382 sec - 5,018,528,066 cycles # 2.914 GHz - 11,487,169,268 instructions # 2.29 insn per cycle - 1.731257916 seconds time elapsed +TOTAL : 1.718351 sec + 5,011,970,294 cycles # 2.911 GHz + 11,485,977,959 instructions # 2.29 insn per cycle + 1.722449030 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4722) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.059601e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.080189e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.080189e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.050454e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.070781e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.070781e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.567855 sec - 4,593,189,329 cycles # 2.922 GHz - 10,814,244,535 instructions # 2.35 insn per cycle - 1.579062821 seconds time elapsed +TOTAL : 1.580973 sec + 4,589,831,248 cycles # 2.897 GHz + 10,813,098,589 instructions # 2.36 insn per cycle + 1.585135297 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4284) (512y: 234) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.560814e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.667240e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.667240e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.633290e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.739141e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.739141e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.191040 sec - 4,225,946,509 cycles # 1.926 GHz - 6,280,173,074 instructions # 1.49 insn per cycle - 2.204926125 seconds time elapsed +TOTAL : 2.170052 sec + 4,216,147,141 cycles # 1.940 GHz + 6,278,929,582 instructions # 1.49 insn per cycle + 2.174195697 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1960) (512y: 163) (512z: 3617) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index b03fb05630..11c2c1ab32 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:14:58 +DATE: 2023-10-29_12:12:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.463578e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.497575e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.499993e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.522753e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.548627e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.550750e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.524913 sec - 2,275,162,229 cycles # 2.955 GHz - 3,553,819,604 instructions # 1.56 insn per cycle - 0.840621862 seconds time elapsed +TOTAL : 0.518045 sec + 2,253,392,327 cycles # 3.016 GHz + 3,541,073,735 instructions # 1.57 insn per cycle + 0.806095149 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.112401e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.153434e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.155135e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.133203e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.161116e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.162261e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.040153 sec - 10,119,184,300 cycles # 3.067 GHz - 22,331,058,292 instructions # 2.21 insn per cycle - 3.356275754 seconds time elapsed +TOTAL : 3.022651 sec + 10,067,556,986 cycles # 3.077 GHz + 21,723,155,404 instructions # 2.16 insn per cycle + 3.328454491 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.969742e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.970726e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.970726e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.960753e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.961679e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.961679e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.336811 sec - 25,629,457,447 cycles # 3.073 GHz - 78,942,027,813 instructions # 3.08 insn per cycle - 8.343510031 seconds time elapsed +TOTAL : 8.374018 sec + 25,670,008,434 cycles # 3.066 GHz + 78,945,127,859 instructions # 3.08 insn per cycle + 8.378169528 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.772848e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.776337e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.776337e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.761609e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765116e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765116e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.356161 sec - 12,939,207,595 cycles # 2.968 GHz - 39,285,045,357 instructions # 3.04 insn per cycle - 4.369724563 seconds time elapsed +TOTAL : 4.368935 sec + 12,930,681,857 cycles # 2.957 GHz + 39,284,393,094 instructions # 3.04 insn per cycle + 4.373129403 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.598110e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.615220e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.615220e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.588352e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.606225e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.606225e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.917837 sec - 5,575,445,996 cycles # 2.903 GHz - 13,689,935,380 instructions # 2.46 insn per cycle - 1.930849564 seconds time elapsed +TOTAL : 1.918707 sec + 5,579,192,973 cycles # 2.903 GHz + 13,689,174,313 instructions # 2.45 insn per cycle + 1.922851888 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.808785e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.831336e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.831336e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.788874e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.811382e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.811382e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.681743 sec - 4,897,861,318 cycles # 2.907 GHz - 12,346,089,227 instructions # 2.52 insn per cycle - 1.694439322 seconds time elapsed +TOTAL : 1.684655 sec + 4,896,071,309 cycles # 2.900 GHz + 12,344,332,584 instructions # 2.52 insn per cycle + 1.688799152 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.719699e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.733791e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.733791e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.598473e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.612773e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.612773e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.135110 sec - 4,120,647,411 cycles # 1.929 GHz - 6,338,696,859 instructions # 1.54 insn per cycle - 2.148760737 seconds time elapsed +TOTAL : 2.167854 sec + 4,117,500,482 cycles # 1.897 GHz + 6,337,115,934 instructions # 1.54 insn per cycle + 2.172030896 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index c9f4681184..5fa11c735c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:56:37 +DATE: 2023-10-29_12:38:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.159242e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.488413e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.488413e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.178531e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.491763e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.491763e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.510805 sec - 2,221,159,082 cycles # 3.005 GHz - 3,427,089,559 instructions # 1.54 insn per cycle - 0.799153737 seconds time elapsed +TOTAL : 0.509461 sec + 2,248,167,105 cycles # 3.025 GHz + 3,581,747,687 instructions # 1.59 insn per cycle + 0.803251034 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.639589e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.106238e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.106238e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.638113e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.098100e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.098100e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.292866 sec - 10,669,679,246 cycles # 3.004 GHz - 21,499,927,390 instructions # 2.02 insn per cycle - 3.618753393 seconds time elapsed +TOTAL : 3.295657 sec + 10,940,660,237 cycles # 3.073 GHz + 21,897,117,038 instructions # 2.00 insn per cycle + 3.619731570 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.982344e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.983366e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.983366e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.961024e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.962005e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.962005e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.287757 sec - 25,648,558,798 cycles # 3.094 GHz - 78,947,940,131 instructions # 3.08 insn per cycle - 8.292307316 seconds time elapsed +TOTAL : 8.376264 sec + 25,654,322,049 cycles # 3.062 GHz + 78,950,853,496 instructions # 3.08 insn per cycle + 8.380554018 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -122,14 +122,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.761661e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765162e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765162e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.720177e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.723485e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.723485e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.372920 sec - 12,954,588,590 cycles # 2.961 GHz - 39,299,714,112 instructions # 3.03 insn per cycle - 4.377339776 seconds time elapsed +TOTAL : 4.421209 sec + 13,021,231,521 cycles # 2.943 GHz + 39,297,463,556 instructions # 3.02 insn per cycle + 4.425572459 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.433253e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.451841e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.451841e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.595418e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.613259e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.613259e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.958034 sec - 5,599,887,733 cycles # 2.856 GHz - 13,700,928,063 instructions # 2.45 insn per cycle - 1.962488309 seconds time elapsed +TOTAL : 1.921008 sec + 5,588,903,977 cycles # 2.904 GHz + 13,699,707,384 instructions # 2.45 insn per cycle + 1.925217698 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -176,14 +176,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.858350e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.882867e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.882867e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.790376e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.814781e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.814781e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.676908 sec - 4,910,211,168 cycles # 2.922 GHz - 12,355,246,497 instructions # 2.52 insn per cycle - 1.681230393 seconds time elapsed +TOTAL : 1.688290 sec + 4,906,672,712 cycles # 2.900 GHz + 12,355,095,289 instructions # 2.52 insn per cycle + 1.692593198 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -203,14 +203,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.731935e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.747245e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.747245e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.665562e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.680550e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.680550e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.134622 sec - 4,135,541,831 cycles # 1.934 GHz - 6,347,831,005 instructions # 1.53 insn per cycle - 2.139010667 seconds time elapsed +TOTAL : 2.152887 sec + 4,135,096,676 cycles # 1.918 GHz + 6,348,239,646 instructions # 1.54 insn per cycle + 2.157236836 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index d2163869aa..6e69eca414 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_13:07:47 +DATE: 2023-10-29_12:50:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.482945e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.509030e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.511080e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.488953e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.515779e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.518130e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.503818 sec - 2,242,046,263 cycles # 3.022 GHz - 3,462,245,216 instructions # 1.54 insn per cycle - 0.810702032 seconds time elapsed +TOTAL : 0.505230 sec + 2,209,587,629 cycles # 3.008 GHz + 3,455,452,601 instructions # 1.56 insn per cycle + 0.795320871 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.144510e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.176604e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.177952e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.142087e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.173823e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.175176e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.117292 sec - 10,408,915,486 cycles # 3.090 GHz - 23,283,624,708 instructions # 2.24 insn per cycle - 3.424451185 seconds time elapsed +TOTAL : 3.119931 sec + 10,305,373,783 cycles # 3.060 GHz + 22,236,923,118 instructions # 2.16 insn per cycle + 3.427874463 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.975977e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.976973e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.976973e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.981434e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.982406e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.982406e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.311280 sec - 25,652,648,741 cycles # 3.086 GHz - 78,942,035,181 instructions # 3.08 insn per cycle - 8.315453587 seconds time elapsed +TOTAL : 8.288289 sec + 25,654,633,072 cycles # 3.094 GHz + 78,941,515,466 instructions # 3.08 insn per cycle + 8.292332759 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.766259e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.769789e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.769789e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.739349e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.742888e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.742888e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.364797 sec - 12,929,400,123 cycles # 2.960 GHz - 39,283,753,035 instructions # 3.04 insn per cycle - 4.368806594 seconds time elapsed +TOTAL : 4.396528 sec + 12,944,055,846 cycles # 2.943 GHz + 39,285,289,410 instructions # 3.04 insn per cycle + 4.400528334 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.603374e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.622598e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.622598e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.586617e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.603945e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.603945e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.917346 sec - 5,583,005,212 cycles # 2.907 GHz - 13,688,490,227 instructions # 2.45 insn per cycle - 1.921338026 seconds time elapsed +TOTAL : 1.920517 sec + 5,579,544,952 cycles # 2.900 GHz + 13,688,140,330 instructions # 2.45 insn per cycle + 1.924582456 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.863476e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.887220e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.887220e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.777785e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.801156e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.801156e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.673382 sec - 4,898,137,074 cycles # 2.921 GHz - 12,342,311,827 instructions # 2.52 insn per cycle - 1.677443337 seconds time elapsed +TOTAL : 1.687811 sec + 4,898,373,572 cycles # 2.897 GHz + 12,342,306,196 instructions # 2.52 insn per cycle + 1.691792505 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.760383e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.775362e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.775362e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.662466e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.677203e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.677203e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.124477 sec - 4,119,654,668 cycles # 1.936 GHz - 6,334,865,926 instructions # 1.54 insn per cycle - 2.128551172 seconds time elapsed +TOTAL : 2.151329 sec + 4,122,708,782 cycles # 1.914 GHz + 6,334,974,901 instructions # 1.54 insn per cycle + 2.155459573 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index ee4bbf6a10..e3c3365666 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_13:04:37 +DATE: 2023-10-29_12:46:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.458933e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.484257e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.486595e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.470491e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.496835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.498894e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.505513 sec - 2,142,738,680 cycles # 2.872 GHz - 3,291,927,054 instructions # 1.54 insn per cycle - 0.814547560 seconds time elapsed +TOTAL : 0.502429 sec + 2,239,802,104 cycles # 3.025 GHz + 3,519,837,635 instructions # 1.57 insn per cycle + 0.810113153 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.135835e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.167914e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.169281e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.144423e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.176580e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.177932e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.071645 sec - 9,484,592,815 cycles # 2.854 GHz - 19,957,614,748 instructions # 2.10 insn per cycle - 3.382302802 seconds time elapsed +TOTAL : 3.064197 sec + 10,177,900,222 cycles # 3.071 GHz + 23,744,760,110 instructions # 2.33 insn per cycle + 3.371080698 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.977445e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.978379e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.978379e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.958926e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.959917e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.959917e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.302845 sec - 25,643,681,639 cycles # 3.088 GHz - 78,943,468,215 instructions # 3.08 insn per cycle - 8.306861638 seconds time elapsed +TOTAL : 8.381577 sec + 25,640,723,341 cycles # 3.058 GHz + 78,941,748,743 instructions # 3.08 insn per cycle + 8.385627979 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.759581e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.763145e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.763145e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.732687e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.736090e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.736090e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.371292 sec - 12,933,873,362 cycles # 2.957 GHz - 39,285,530,682 instructions # 3.04 insn per cycle - 4.375381944 seconds time elapsed +TOTAL : 4.402944 sec + 12,923,451,618 cycles # 2.933 GHz + 39,285,462,858 instructions # 3.04 insn per cycle + 4.407052351 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.493227e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.509835e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.509835e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.554505e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.572181e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.572181e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.940350 sec - 5,576,631,784 cycles # 2.870 GHz - 13,689,769,110 instructions # 2.45 insn per cycle - 1.944523071 seconds time elapsed +TOTAL : 1.926338 sec + 5,575,425,154 cycles # 2.889 GHz + 13,689,061,442 instructions # 2.46 insn per cycle + 1.930424805 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.853148e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.876403e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.876403e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.784330e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.807171e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.807171e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.673469 sec - 4,897,482,597 cycles # 2.921 GHz - 12,344,356,559 instructions # 2.52 insn per cycle - 1.677557270 seconds time elapsed +TOTAL : 1.685516 sec + 4,893,132,186 cycles # 2.897 GHz + 12,344,280,166 instructions # 2.52 insn per cycle + 1.689579681 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.734174e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.748650e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.748650e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.645425e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.659225e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.659225e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.130291 sec - 4,113,955,714 cycles # 1.929 GHz - 6,336,883,103 instructions # 1.54 insn per cycle - 2.134326924 seconds time elapsed +TOTAL : 2.154439 sec + 4,120,191,344 cycles # 1.910 GHz + 6,336,875,208 instructions # 1.54 insn per cycle + 2.158495161 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index b75462246a..fcc45ae054 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_13:01:32 +DATE: 2023-10-29_12:43:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,14 +45,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.203179e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.505047e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.507166e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.227066e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.516305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.518459e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.507425 sec - 2,256,484,054 cycles # 3.018 GHz - 3,532,292,777 instructions # 1.57 insn per cycle - 0.807552942 seconds time elapsed +TOTAL : 0.505902 sec + 2,232,711,490 cycles # 3.013 GHz + 3,431,478,383 instructions # 1.54 insn per cycle + 0.805137058 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -63,14 +63,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.752259e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.179632e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.180998e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.745719e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.173586e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.174904e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.188259 sec - 10,521,010,437 cycles # 3.060 GHz - 22,991,661,537 instructions # 2.19 insn per cycle - 3.496740114 seconds time elapsed +TOTAL : 3.183069 sec + 10,549,506,794 cycles # 3.073 GHz + 23,826,364,648 instructions # 2.26 insn per cycle + 3.489321072 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -85,14 +85,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.974734e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.975712e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.975712e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.963957e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.964914e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.964914e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.314402 sec - 25,652,011,614 cycles # 3.084 GHz - 78,942,019,789 instructions # 3.08 insn per cycle - 8.318449049 seconds time elapsed +TOTAL : 8.359554 sec + 25,640,588,853 cycles # 3.066 GHz + 78,941,935,823 instructions # 3.08 insn per cycle + 8.363595068 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -111,14 +111,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.762034e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765542e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765542e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.752315e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.755741e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.755741e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.368601 sec - 12,923,718,733 cycles # 2.956 GHz - 39,284,266,866 instructions # 3.04 insn per cycle - 4.372599268 seconds time elapsed +TOTAL : 4.379894 sec + 12,924,609,236 cycles # 2.949 GHz + 39,284,528,815 instructions # 3.04 insn per cycle + 4.384005600 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -137,14 +137,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.585138e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.602865e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.602865e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.534929e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.553394e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.553394e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.919799 sec - 5,575,045,532 cycles # 2.899 GHz - 13,689,091,754 instructions # 2.46 insn per cycle - 1.923833244 seconds time elapsed +TOTAL : 1.930859 sec + 5,574,251,796 cycles # 2.882 GHz + 13,689,065,070 instructions # 2.46 insn per cycle + 1.935072396 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -163,14 +163,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.709286e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.732700e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.732700e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.763710e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.787175e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.787175e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.698739 sec - 4,895,541,757 cycles # 2.876 GHz - 12,344,329,839 instructions # 2.52 insn per cycle - 1.702807761 seconds time elapsed +TOTAL : 1.688908 sec + 4,893,973,419 cycles # 2.892 GHz + 12,344,401,694 instructions # 2.52 insn per cycle + 1.692959328 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -189,14 +189,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.521763e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.535260e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.535260e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.661706e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.675864e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.675864e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.201467 sec - 4,123,996,359 cycles # 1.880 GHz - 6,337,725,442 instructions # 1.54 insn per cycle - 2.205598437 seconds time elapsed +TOTAL : 2.149704 sec + 4,115,580,289 cycles # 1.912 GHz + 6,336,846,442 instructions # 1.54 insn per cycle + 2.153669716 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 285526afab..a515f7b22d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:15:34 +DATE: 2023-10-29_12:13:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.468074e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.501103e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.503439e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.471472e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.495309e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497296e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.522423 sec - 2,258,157,328 cycles # 2.996 GHz - 3,505,967,169 instructions # 1.55 insn per cycle - 0.828984887 seconds time elapsed +TOTAL : 0.517888 sec + 2,245,194,212 cycles # 3.014 GHz + 3,545,171,662 instructions # 1.58 insn per cycle + 0.806452485 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.133945e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.175029e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.176516e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.140818e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.168858e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.170012e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.028645 sec - 10,079,761,797 cycles # 3.067 GHz - 21,246,273,087 instructions # 2.11 insn per cycle - 3.345878141 seconds time elapsed +TOTAL : 3.011124 sec + 10,016,658,248 cycles # 3.072 GHz + 21,347,400,055 instructions # 2.13 insn per cycle + 3.316522661 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.994496e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.995483e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.995483e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.946476e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.947448e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947448e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.232531 sec - 25,587,628,331 cycles # 3.107 GHz - 78,714,235,393 instructions # 3.08 insn per cycle - 8.239130149 seconds time elapsed +TOTAL : 8.435258 sec + 25,604,767,793 cycles # 3.036 GHz + 78,717,648,044 instructions # 3.07 insn per cycle + 8.439335103 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.767187e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.770492e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.770492e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.746101e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.749574e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.749574e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.363131 sec - 12,895,914,281 cycles # 2.954 GHz - 39,231,307,091 instructions # 3.04 insn per cycle - 4.377380038 seconds time elapsed +TOTAL : 4.386760 sec + 12,904,358,144 cycles # 2.940 GHz + 39,232,199,416 instructions # 3.04 insn per cycle + 4.390963958 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:12949) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.462946e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.479149e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.479149e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.524004e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.541409e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.541409e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.947278 sec - 5,616,857,616 cycles # 2.878 GHz - 13,804,844,198 instructions # 2.46 insn per cycle - 1.960556077 seconds time elapsed +TOTAL : 1.932734 sec + 5,610,108,653 cycles # 2.898 GHz + 13,803,411,847 instructions # 2.46 insn per cycle + 1.936962663 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.535336e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.556895e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.556895e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.639734e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.661711e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.661711e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.729893 sec - 4,961,309,130 cycles # 2.862 GHz - 12,470,429,498 instructions # 2.51 insn per cycle - 1.742792817 seconds time elapsed +TOTAL : 1.710371 sec + 4,960,089,633 cycles # 2.894 GHz + 12,469,602,432 instructions # 2.51 insn per cycle + 1.714544326 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.735372e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.749607e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.749607e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.641349e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.655494e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.655494e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.130756 sec - 4,113,624,864 cycles # 1.928 GHz - 6,461,615,134 instructions # 1.57 insn per cycle - 2.143576859 seconds time elapsed +TOTAL : 2.155375 sec + 4,121,563,380 cycles # 1.909 GHz + 6,462,027,636 instructions # 1.57 insn per cycle + 2.159551855 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 3d8001724d..3549ac96e2 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:46:36 +DATE: 2023-10-29_12:28:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.232608e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.257163e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.259717e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.236458e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.259692e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.261510e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.537548 sec - 2,181,694,080 cycles # 2.844 GHz - 3,448,903,433 instructions # 1.58 insn per cycle - 0.824462802 seconds time elapsed +TOTAL : 0.525667 sec + 2,272,396,380 cycles # 3.025 GHz + 3,551,922,727 instructions # 1.56 insn per cycle + 0.808686600 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.772126e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.800608e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.801737e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.778016e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.804924e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.806042e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.306403 sec - 10,895,937,046 cycles # 3.061 GHz - 25,195,776,290 instructions # 2.31 insn per cycle - 3.618136138 seconds time elapsed +TOTAL : 3.281612 sec + 10,819,985,898 cycles # 3.062 GHz + 22,684,839,821 instructions # 2.10 insn per cycle + 3.590287386 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.475211e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.475697e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.475697e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.325840e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.326318e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.326318e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 36.656742 sec - 113,701,435,580 cycles # 3.102 GHz - 144,964,623,435 instructions # 1.27 insn per cycle - 36.660899030 seconds time elapsed +TOTAL : 37.921432 sec + 116,806,691,593 cycles # 3.081 GHz + 144,971,298,456 instructions # 1.24 insn per cycle + 37.925417296 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:21605) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.281464e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.284057e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.284057e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.265060e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.267841e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.267841e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.007154 sec - 14,757,637,040 cycles # 2.945 GHz - 37,577,566,768 instructions # 2.55 insn per cycle - 5.011445912 seconds time elapsed +TOTAL : 5.031972 sec + 14,725,469,626 cycles # 2.924 GHz + 37,577,727,036 instructions # 2.55 insn per cycle + 5.036076669 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68118) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.834630e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.849176e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.849176e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.815685e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.829648e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.829648e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.103000 sec - 6,138,951,255 cycles # 2.915 GHz - 13,063,781,443 instructions # 2.13 insn per cycle - 2.107315848 seconds time elapsed +TOTAL : 2.107581 sec + 6,118,001,268 cycles # 2.898 GHz + 13,063,536,139 instructions # 2.14 insn per cycle + 2.111708008 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.518402e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.540775e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.540775e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.935782e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.955743e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.955743e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.732398 sec - 5,064,058,853 cycles # 2.917 GHz - 11,441,974,869 instructions # 2.26 insn per cycle - 1.736638349 seconds time elapsed +TOTAL : 1.844918 sec + 5,058,291,283 cycles # 2.737 GHz + 11,442,442,223 instructions # 2.26 insn per cycle + 1.849050677 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.440975e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.454360e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.454360e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.921492e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.936664e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.936664e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.214373 sec - 3,981,991,377 cycles # 1.796 GHz - 5,944,372,586 instructions # 1.49 insn per cycle - 2.218672213 seconds time elapsed +TOTAL : 2.080077 sec + 3,980,326,008 cycles # 1.912 GHz + 5,944,493,220 instructions # 1.49 insn per cycle + 2.084182474 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index bc567145ef..1c09f892cc 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:47:43 +DATE: 2023-10-29_12:29:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.272416e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.297947e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.300035e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.235988e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.259270e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.261133e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.528298 sec - 2,302,052,159 cycles # 3.036 GHz - 3,620,830,785 instructions # 1.57 insn per cycle - 0.815937018 seconds time elapsed +TOTAL : 0.526496 sec + 2,247,303,462 cycles # 2.985 GHz + 3,499,379,368 instructions # 1.56 insn per cycle + 0.811083253 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.791562e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.820307e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.821476e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.792538e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.819690e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.820828e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.269219 sec - 10,870,262,953 cycles # 3.083 GHz - 25,015,120,646 instructions # 2.30 insn per cycle - 3.582561850 seconds time elapsed +TOTAL : 3.267329 sec + 10,790,118,888 cycles # 3.067 GHz + 22,896,159,672 instructions # 2.12 insn per cycle + 3.574848410 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.439633e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.440117e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.440117e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.406964e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.407451e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.407451e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 36.950462 sec - 114,331,814,225 cycles # 3.094 GHz - 145,554,942,207 instructions # 1.27 insn per cycle - 36.954719062 seconds time elapsed +TOTAL : 37.224548 sec + 114,578,349,510 cycles # 3.078 GHz + 145,557,021,090 instructions # 1.27 insn per cycle + 37.228737857 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:22248) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.203750e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.206253e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.206253e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.147318e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.149735e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.149735e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.128371 sec - 15,156,085,711 cycles # 2.954 GHz - 37,766,724,921 instructions # 2.49 insn per cycle - 5.132696438 seconds time elapsed +TOTAL : 5.219632 sec + 15,161,622,795 cycles # 2.903 GHz + 37,764,842,323 instructions # 2.49 insn per cycle + 5.223709363 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68446) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.538689e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.553791e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.553791e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.909825e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.924704e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.924704e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.185324 sec - 6,179,617,797 cycles # 2.824 GHz - 12,898,959,576 instructions # 2.09 insn per cycle - 2.189901544 seconds time elapsed +TOTAL : 2.082644 sec + 6,000,819,060 cycles # 2.878 GHz + 12,898,583,718 instructions # 2.15 insn per cycle + 2.086890157 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.387903e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.408655e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.408655e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.309651e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.329767e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.329767e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.756333 sec - 5,103,812,803 cycles # 2.900 GHz - 11,448,412,714 instructions # 2.24 insn per cycle - 1.760591620 seconds time elapsed +TOTAL : 1.770941 sec + 5,115,206,711 cycles # 2.883 GHz + 11,449,352,362 instructions # 2.24 insn per cycle + 1.775109224 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.063003e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.078866e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.078866e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.980771e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.996490e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.996490e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.043864 sec - 3,955,410,863 cycles # 1.932 GHz - 5,897,312,655 instructions # 1.49 insn per cycle - 2.048048607 seconds time elapsed +TOTAL : 2.064116 sec + 3,954,182,334 cycles # 1.913 GHz + 5,897,664,853 instructions # 1.49 insn per cycle + 2.068197382 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 3fbbac30c0..660075297a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:16:10 +DATE: 2023-10-29_12:13:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.297352e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.358144e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.364157e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.339705e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.395741e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.400838e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.478722 sec - 2,084,997,410 cycles # 3.001 GHz - 3,026,333,377 instructions # 1.45 insn per cycle - 0.781511130 seconds time elapsed +TOTAL : 0.475763 sec + 2,057,672,410 cycles # 2.986 GHz + 3,052,713,366 instructions # 1.48 insn per cycle + 0.746581939 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.497417e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.585779e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.589540e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.555274e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.615884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.618479e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.726967 sec - 6,020,957,355 cycles # 3.058 GHz - 11,595,600,563 instructions # 1.93 insn per cycle - 2.028330884 seconds time elapsed +TOTAL : 1.714580 sec + 5,941,986,620 cycles # 3.043 GHz + 11,613,062,987 instructions # 1.95 insn per cycle + 2.009554137 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.077603e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.078639e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.078639e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.052769e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.053796e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.053796e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.903289 sec - 24,637,646,455 cycles # 3.117 GHz - 78,134,944,640 instructions # 3.17 insn per cycle - 7.909956096 seconds time elapsed +TOTAL : 7.997684 sec + 24,647,826,294 cycles # 3.081 GHz + 78,133,404,833 instructions # 3.17 insn per cycle + 8.001567926 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.336898e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.349941e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.349941e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.444330e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.458278e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.458278e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.243261 sec - 6,492,799,837 cycles # 2.890 GHz - 20,126,015,053 instructions # 3.10 insn per cycle - 2.255396532 seconds time elapsed +TOTAL : 2.210880 sec + 6,475,959,596 cycles # 2.925 GHz + 20,124,226,877 instructions # 3.11 insn per cycle + 2.214850735 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.684888e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.692077e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.692077e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.684620e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.691679e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.691679e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.981200 sec - 2,874,990,715 cycles # 2.917 GHz - 6,992,497,699 instructions # 2.43 insn per cycle - 0.996477722 seconds time elapsed +TOTAL : 0.981226 sec + 2,838,391,865 cycles # 2.883 GHz + 6,991,632,737 instructions # 2.46 insn per cycle + 0.985252337 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.951838e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.960952e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.960952e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.936572e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.945981e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.945981e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.849043 sec - 2,488,916,995 cycles # 2.921 GHz - 6,299,592,317 instructions # 2.53 insn per cycle - 0.863735210 seconds time elapsed +TOTAL : 0.854580 sec + 2,487,675,347 cycles # 2.900 GHz + 6,298,671,273 instructions # 2.53 insn per cycle + 0.858538541 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.569339e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.575404e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.575404e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.550707e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.556652e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.556652e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.053473 sec - 2,048,152,009 cycles # 1.937 GHz - 3,269,571,231 instructions # 1.60 insn per cycle - 1.068752344 seconds time elapsed +TOTAL : 1.065338 sec + 2,046,941,871 cycles # 1.915 GHz + 3,268,840,945 instructions # 1.60 insn per cycle + 1.069371555 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index eec6c5d5ad..4c036c99c3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:57:13 +DATE: 2023-10-29_12:39:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.648710e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.349296e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.349296e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.664121e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.336972e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.336972e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.466841 sec - 2,045,819,276 cycles # 3.010 GHz - 3,040,046,648 instructions # 1.49 insn per cycle - 0.737733718 seconds time elapsed +TOTAL : 0.466129 sec + 2,040,389,926 cycles # 3.006 GHz + 3,051,497,739 instructions # 1.50 insn per cycle + 0.738056578 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.258231e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.472706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.472706e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.282897e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.477379e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.477379e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.870793 sec - 6,498,753,280 cycles # 3.085 GHz - 12,854,465,282 instructions # 1.98 insn per cycle - 2.162853434 seconds time elapsed +TOTAL : 1.865020 sec + 6,414,143,218 cycles # 3.055 GHz + 12,782,815,652 instructions # 1.99 insn per cycle + 2.156968870 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.065545e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.066590e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.066590e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.043137e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.044146e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.044146e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.950146 sec - 24,648,239,076 cycles # 3.099 GHz - 78,137,109,320 instructions # 3.17 insn per cycle - 7.954221606 seconds time elapsed +TOTAL : 8.037889 sec + 24,669,405,197 cycles # 3.070 GHz + 78,140,910,860 instructions # 3.17 insn per cycle + 8.041937243 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -122,14 +122,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.517104e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.531205e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.531205e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.455019e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.469395e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.469395e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.192162 sec - 6,488,348,501 cycles # 2.956 GHz - 20,134,095,524 instructions # 3.10 insn per cycle - 2.196338550 seconds time elapsed +TOTAL : 2.210219 sec + 6,484,247,623 cycles # 2.929 GHz + 20,133,288,157 instructions # 3.10 insn per cycle + 2.214394731 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.705707e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.712958e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.712958e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.682999e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.690944e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.690944e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.971759 sec - 2,847,061,553 cycles # 2.920 GHz - 7,001,791,044 instructions # 2.46 insn per cycle - 0.975993638 seconds time elapsed +TOTAL : 0.984973 sec + 2,848,560,725 cycles # 2.882 GHz + 7,001,581,075 instructions # 2.46 insn per cycle + 0.989096241 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -176,14 +176,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.951285e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.960917e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.960917e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.943649e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.953114e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953114e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.850872 sec - 2,498,585,399 cycles # 2.925 GHz - 6,308,774,444 instructions # 2.52 insn per cycle - 0.854972211 seconds time elapsed +TOTAL : 0.854370 sec + 2,496,393,077 cycles # 2.910 GHz + 6,308,615,210 instructions # 2.53 insn per cycle + 0.858619330 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -203,14 +203,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.565587e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.571426e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.571426e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.563693e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.569738e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.569738e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.058377 sec - 2,055,976,932 cycles # 1.936 GHz - 3,279,231,714 instructions # 1.59 insn per cycle - 1.062483891 seconds time elapsed +TOTAL : 1.059279 sec + 2,056,168,365 cycles # 1.935 GHz + 3,279,162,711 instructions # 1.59 insn per cycle + 1.063407139 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 8ce6fedd4e..c2530f89f8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_13:08:23 +DATE: 2023-10-29_12:50:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.330360e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.378634e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.383801e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.347743e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.395808e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.400916e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.457712 sec - 2,028,900,157 cycles # 3.042 GHz - 3,005,011,671 instructions # 1.48 insn per cycle - 0.724345206 seconds time elapsed +TOTAL : 0.462025 sec + 2,013,008,518 cycles # 3.002 GHz + 3,022,893,991 instructions # 1.50 insn per cycle + 0.729380261 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.546691e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.615539e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.618553e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.555420e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.624293e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.627291e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.794327 sec - 6,222,688,386 cycles # 3.079 GHz - 13,066,262,618 instructions # 2.10 insn per cycle - 2.078637061 seconds time elapsed +TOTAL : 1.793765 sec + 6,172,368,596 cycles # 3.052 GHz + 12,517,770,206 instructions # 2.03 insn per cycle + 2.079455660 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.073035e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.074088e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.074088e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.032399e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.033363e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.033363e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 7.920868 sec - 24,634,444,908 cycles # 3.109 GHz - 78,134,162,346 instructions # 3.17 insn per cycle - 7.924730928 seconds time elapsed +TOTAL : 8.079561 sec + 24,788,148,262 cycles # 3.067 GHz + 78,134,537,045 instructions # 3.15 insn per cycle + 8.083480982 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.426264e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.439620e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.439620e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.472033e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.485911e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.485911e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.217152 sec - 6,477,828,190 cycles # 2.918 GHz - 20,122,699,568 instructions # 3.11 insn per cycle - 2.221055188 seconds time elapsed +TOTAL : 2.203671 sec + 6,480,606,755 cycles # 2.937 GHz + 20,122,571,094 instructions # 3.11 insn per cycle + 2.207543629 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.699966e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.706900e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.706900e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.687582e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.694663e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.694663e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.973586 sec - 2,840,357,768 cycles # 2.908 GHz - 6,988,961,639 instructions # 2.46 insn per cycle - 0.977359190 seconds time elapsed +TOTAL : 0.981076 sec + 2,842,016,272 cycles # 2.890 GHz + 6,991,595,681 instructions # 2.46 insn per cycle + 0.985023868 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.940736e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.949914e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.949914e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.927331e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.936367e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.936367e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.853581 sec - 2,490,276,301 cycles # 2.906 GHz - 6,295,146,517 instructions # 2.53 insn per cycle - 0.857457556 seconds time elapsed +TOTAL : 0.859579 sec + 2,494,530,880 cycles # 2.891 GHz + 6,295,460,796 instructions # 2.52 insn per cycle + 0.863521439 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.565463e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.571759e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.571759e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.562110e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.568108e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.568108e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.057170 sec - 2,052,021,691 cycles # 1.935 GHz - 3,266,951,787 instructions # 1.59 insn per cycle - 1.061150766 seconds time elapsed +TOTAL : 1.058783 sec + 2,050,259,238 cycles # 1.930 GHz + 3,265,106,043 instructions # 1.59 insn per cycle + 1.062570526 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 571b572aaa..bfdbf141be 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_13:05:13 +DATE: 2023-10-29_12:47:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.349128e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.398453e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.403644e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.336678e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.385312e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.390845e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.459486 sec - 2,027,052,955 cycles # 3.037 GHz - 3,029,950,855 instructions # 1.49 insn per cycle - 0.726440068 seconds time elapsed +TOTAL : 0.460742 sec + 2,007,672,018 cycles # 2.996 GHz + 3,015,512,037 instructions # 1.50 insn per cycle + 0.727627371 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.553265e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.622123e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.625219e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.552495e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.620410e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.623354e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.743283 sec - 6,066,573,695 cycles # 3.087 GHz - 12,650,787,477 instructions # 2.09 insn per cycle - 2.025181321 seconds time elapsed +TOTAL : 1.747301 sec + 6,021,047,961 cycles # 3.053 GHz + 13,020,410,161 instructions # 2.16 insn per cycle + 2.028955284 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.081230e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.082284e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.082284e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.035464e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.036455e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.036455e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.888118 sec - 24,646,440,581 cycles # 3.123 GHz - 78,132,566,770 instructions # 3.17 insn per cycle - 7.892033942 seconds time elapsed +TOTAL : 8.065414 sec + 24,625,631,166 cycles # 3.052 GHz + 78,134,037,282 instructions # 3.17 insn per cycle + 8.069287807 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.477290e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.491849e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.491849e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.478619e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.492208e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.492208e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.201176 sec - 6,474,823,242 cycles # 2.937 GHz - 20,124,709,191 instructions # 3.11 insn per cycle - 2.205114496 seconds time elapsed +TOTAL : 2.201405 sec + 6,476,687,165 cycles # 2.939 GHz + 20,124,779,920 instructions # 3.11 insn per cycle + 2.205319131 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.708720e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.715921e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.715921e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.697170e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.704411e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.704411e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.967357 sec - 2,835,463,985 cycles # 2.921 GHz - 6,991,513,771 instructions # 2.47 insn per cycle - 0.971305594 seconds time elapsed +TOTAL : 0.974215 sec + 2,837,748,371 cycles # 2.904 GHz + 6,991,679,189 instructions # 2.46 insn per cycle + 0.978049576 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.953668e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.963014e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.963014e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.933335e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.942603e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.942603e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.846896 sec - 2,487,744,451 cycles # 2.926 GHz - 6,298,688,372 instructions # 2.53 insn per cycle - 0.850803162 seconds time elapsed +TOTAL : 0.856002 sec + 2,491,154,256 cycles # 2.899 GHz + 6,299,351,258 instructions # 2.53 insn per cycle + 0.859913657 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.569559e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.575559e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.575559e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.558338e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.564152e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.564152e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.052912 sec - 2,046,453,586 cycles # 1.938 GHz - 3,268,883,695 instructions # 1.60 insn per cycle - 1.056822517 seconds time elapsed +TOTAL : 1.060294 sec + 2,047,251,638 cycles # 1.926 GHz + 3,268,699,792 instructions # 1.60 insn per cycle + 1.064226963 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 84420cb925..2a5bf9ccb4 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_13:02:08 +DATE: 2023-10-29_12:44:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -45,14 +45,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.735944e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.378490e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.383692e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.759670e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.404786e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.409990e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.461987 sec - 2,019,341,022 cycles # 3.003 GHz - 3,035,678,392 instructions # 1.50 insn per cycle - 0.729326652 seconds time elapsed +TOTAL : 0.462005 sec + 2,013,506,806 cycles # 2.996 GHz + 2,990,269,144 instructions # 1.49 insn per cycle + 0.729870478 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -63,14 +63,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.489474e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.620676e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.623699e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.496196e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.623060e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.626145e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.822380 sec - 6,280,496,809 cycles # 3.067 GHz - 12,717,918,907 instructions # 2.02 insn per cycle - 2.114324946 seconds time elapsed +TOTAL : 1.818626 sec + 6,254,482,556 cycles # 3.061 GHz + 13,143,194,522 instructions # 2.10 insn per cycle + 2.109333253 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -85,14 +85,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.044051e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.045069e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.045069e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.047583e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.048599e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.048599e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.031121 sec - 24,802,943,888 cycles # 3.087 GHz - 78,133,794,834 instructions # 3.15 insn per cycle - 8.035042084 seconds time elapsed +TOTAL : 8.017612 sec + 24,631,242,469 cycles # 3.071 GHz + 78,133,422,190 instructions # 3.17 insn per cycle + 8.021430029 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -111,14 +111,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.497485e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.511473e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.511473e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.494353e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.508499e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.508499e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.194911 sec - 6,476,795,370 cycles # 2.946 GHz - 20,124,137,981 instructions # 3.11 insn per cycle - 2.198870375 seconds time elapsed +TOTAL : 2.196377 sec + 6,482,396,911 cycles # 2.947 GHz + 20,124,098,871 instructions # 3.10 insn per cycle + 2.200446498 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -137,14 +137,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.695476e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.702451e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.702451e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.591759e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.598073e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.598073e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.974748 sec - 2,836,609,217 cycles # 2.900 GHz - 6,991,479,798 instructions # 2.46 insn per cycle - 0.978581955 seconds time elapsed +TOTAL : 1.039253 sec + 2,839,846,615 cycles # 2.725 GHz + 6,991,737,789 instructions # 2.46 insn per cycle + 1.043348940 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -163,14 +163,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.941700e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.951152e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.951152e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.936135e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.945522e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.945522e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.852448 sec - 2,487,448,718 cycles # 2.907 GHz - 6,298,548,472 instructions # 2.53 insn per cycle - 0.856362339 seconds time elapsed +TOTAL : 0.854950 sec + 2,487,610,203 cycles # 2.898 GHz + 6,298,632,003 instructions # 2.53 insn per cycle + 0.858842248 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -189,14 +189,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.564061e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.570228e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.570228e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.551589e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.557558e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.557558e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.056307 sec - 2,046,735,304 cycles # 1.932 GHz - 3,268,520,300 instructions # 1.60 insn per cycle - 1.060189386 seconds time elapsed +TOTAL : 1.064952 sec + 2,046,768,081 cycles # 1.916 GHz + 3,268,598,641 instructions # 1.60 insn per cycle + 1.068818152 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 787629448b..d385f70da3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:16:38 +DATE: 2023-10-29_12:14:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.363659e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.425249e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.431303e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.383891e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.429788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.434430e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.476123 sec - 2,092,062,584 cycles # 3.022 GHz - 3,128,707,671 instructions # 1.50 insn per cycle - 0.781363036 seconds time elapsed +TOTAL : 0.474349 sec + 2,059,881,761 cycles # 3.002 GHz + 3,024,062,536 instructions # 1.47 insn per cycle + 0.744716236 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.479223e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.567586e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.570888e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.504646e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.564522e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.567260e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.722233 sec - 6,009,678,227 cycles # 3.078 GHz - 11,794,267,836 instructions # 1.96 insn per cycle - 2.011269808 seconds time elapsed +TOTAL : 1.716199 sec + 5,829,417,693 cycles # 3.001 GHz + 12,466,869,357 instructions # 2.14 insn per cycle + 2.001872021 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.083066e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.084131e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.084131e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.067051e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.068151e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.068151e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.881423 sec - 24,584,058,384 cycles # 3.118 GHz - 77,859,604,324 instructions # 3.17 insn per cycle - 7.887656228 seconds time elapsed +TOTAL : 7.942537 sec + 24,555,076,029 cycles # 3.091 GHz + 77,859,417,916 instructions # 3.17 insn per cycle + 7.946534296 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3113) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.586799e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.601341e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.601341e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.613412e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.627645e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.627645e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.169576 sec - 6,419,007,624 cycles # 2.953 GHz - 20,090,056,448 instructions # 3.13 insn per cycle - 2.180130900 seconds time elapsed +TOTAL : 2.161575 sec + 6,421,996,654 cycles # 2.967 GHz + 20,089,145,869 instructions # 3.13 insn per cycle + 2.165472003 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.648576e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.655314e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.655314e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.646851e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.653670e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.653670e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.002656 sec - 2,907,339,890 cycles # 2.887 GHz - 7,134,589,491 instructions # 2.45 insn per cycle - 1.018829694 seconds time elapsed +TOTAL : 1.003462 sec + 2,906,333,499 cycles # 2.887 GHz + 7,133,624,953 instructions # 2.45 insn per cycle + 1.007536703 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.852798e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.860765e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.860765e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.841959e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.850853e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.850853e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.894143 sec - 2,595,831,103 cycles # 2.894 GHz - 6,442,852,308 instructions # 2.48 insn per cycle - 0.909262679 seconds time elapsed +TOTAL : 0.897962 sec + 2,596,205,021 cycles # 2.881 GHz + 6,442,595,414 instructions # 2.48 insn per cycle + 0.902015019 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.497098e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.502652e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.502652e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.487496e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.493089e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.493089e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.104659 sec - 2,130,443,648 cycles # 1.924 GHz - 3,431,736,168 instructions # 1.61 insn per cycle - 1.116162652 seconds time elapsed +TOTAL : 1.110124 sec + 2,119,315,659 cycles # 1.903 GHz + 3,430,495,450 instructions # 1.62 insn per cycle + 1.114069574 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2912) (512y: 22) (512z: 9647) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index e834e4352b..795c20d58d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:48:51 +DATE: 2023-10-29_12:30:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.611186e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.652246e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.656683e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.587240e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.625455e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.629547e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.483998 sec - 2,133,361,842 cycles # 3.024 GHz - 3,230,847,730 instructions # 1.51 insn per cycle - 0.765662726 seconds time elapsed +TOTAL : 0.481885 sec + 2,117,745,718 cycles # 3.003 GHz + 3,191,599,744 instructions # 1.51 insn per cycle + 0.764055287 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.742805e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.802168e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.804740e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.697338e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.753855e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.756261e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.850637 sec - 6,419,942,603 cycles # 3.075 GHz - 13,456,993,119 instructions # 2.10 insn per cycle - 2.144581389 seconds time elapsed +TOTAL : 1.849188 sec + 6,395,041,151 cycles # 3.062 GHz + 12,949,203,489 instructions # 2.02 insn per cycle + 2.144785110 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.894043e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.894913e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.894913e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.857461e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.858298e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.858298e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.832817 sec - 86,243,642,188 cycles # 3.098 GHz - 135,564,087,009 instructions # 1.57 insn per cycle - 27.836819083 seconds time elapsed +TOTAL : 28.007303 sec + 86,373,801,967 cycles # 3.084 GHz + 135,568,854,976 instructions # 1.57 insn per cycle + 28.011278361 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:15486) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.222408e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.235825e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.235825e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.194621e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.207786e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.207786e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.280065 sec - 6,777,455,200 cycles # 2.969 GHz - 19,387,279,978 instructions # 2.86 insn per cycle - 2.284257922 seconds time elapsed +TOTAL : 2.288323 sec + 6,770,534,097 cycles # 2.955 GHz + 19,387,402,589 instructions # 2.86 insn per cycle + 2.292435220 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69680) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.519389e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.525011e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.525011e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.505364e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.510997e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.510997e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.087364 sec - 3,177,183,668 cycles # 2.913 GHz - 6,808,689,692 instructions # 2.14 insn per cycle - 1.091503470 seconds time elapsed +TOTAL : 1.097404 sec + 3,180,086,964 cycles # 2.889 GHz + 6,808,682,218 instructions # 2.14 insn per cycle + 1.101375815 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.822546e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.830764e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.830764e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.816170e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.824092e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.824092e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.907774 sec - 2,649,067,915 cycles # 2.907 GHz - 5,986,896,271 instructions # 2.26 insn per cycle - 0.911854373 seconds time elapsed +TOTAL : 0.910721 sec + 2,645,627,902 cycles # 2.895 GHz + 5,987,348,973 instructions # 2.26 insn per cycle + 0.914651942 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.530439e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.536016e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.536016e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.416573e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.421711e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.421711e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.080207 sec - 2,073,980,394 cycles # 1.914 GHz - 3,501,334,924 instructions # 1.69 insn per cycle - 1.084229483 seconds time elapsed +TOTAL : 1.166189 sec + 2,076,570,720 cycles # 1.775 GHz + 3,501,568,805 instructions # 1.69 insn per cycle + 1.170642712 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5198) (512y: 3) (512z:44822) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 20423fd7e0..f048f77dfa 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:49:42 +DATE: 2023-10-29_12:31:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.565977e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.605591e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.609830e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.544850e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.582821e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.586998e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.483247 sec - 2,122,376,478 cycles # 3.016 GHz - 3,184,647,472 instructions # 1.50 insn per cycle - 0.763868633 seconds time elapsed +TOTAL : 0.483630 sec + 2,128,300,469 cycles # 3.015 GHz + 3,245,581,722 instructions # 1.52 insn per cycle + 0.765205817 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.641371e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.700550e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.703083e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.646108e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.701977e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.704355e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.858297 sec - 6,447,116,808 cycles # 3.079 GHz - 12,363,047,673 instructions # 1.92 insn per cycle - 2.150450600 seconds time elapsed +TOTAL : 1.855036 sec + 6,392,715,962 cycles # 3.063 GHz + 13,400,099,020 instructions # 2.10 insn per cycle + 2.145671488 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.905776e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.906669e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.906669e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.864707e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.865536e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.865536e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.777840 sec - 86,048,956,183 cycles # 3.097 GHz - 135,905,128,444 instructions # 1.58 insn per cycle - 27.781792264 seconds time elapsed +TOTAL : 27.971924 sec + 86,103,749,943 cycles # 3.078 GHz + 135,903,748,742 instructions # 1.58 insn per cycle + 27.975875216 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:15910) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.167806e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.180707e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.180707e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.109444e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.121851e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.121851e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.295484 sec - 6,842,998,319 cycles # 2.977 GHz - 19,439,245,864 instructions # 2.84 insn per cycle - 2.299471036 seconds time elapsed +TOTAL : 2.314460 sec + 6,844,368,116 cycles # 2.953 GHz + 19,439,406,048 instructions # 2.84 insn per cycle + 2.318438191 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69722) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.556585e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.562569e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.562569e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.543754e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.549522e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.549522e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.061484 sec - 3,108,451,526 cycles # 2.919 GHz - 6,720,110,112 instructions # 2.16 insn per cycle - 1.065616350 seconds time elapsed +TOTAL : 1.070077 sec + 3,106,170,951 cycles # 2.894 GHz + 6,719,782,927 instructions # 2.16 insn per cycle + 1.073993053 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.851774e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.860510e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.860510e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.835006e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.843354e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.843354e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.893334 sec - 2,622,654,556 cycles # 2.924 GHz - 5,970,160,518 instructions # 2.28 insn per cycle - 0.897509853 seconds time elapsed +TOTAL : 0.901457 sec + 2,624,433,972 cycles # 2.901 GHz + 5,970,449,410 instructions # 2.27 insn per cycle + 0.905462557 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.536258e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.541992e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.541992e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.532980e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.538727e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.538727e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.075819 sec - 2,079,698,802 cycles # 1.927 GHz - 3,494,793,411 instructions # 1.68 insn per cycle - 1.079910024 seconds time elapsed +TOTAL : 1.077395 sec + 2,079,358,899 cycles # 1.924 GHz + 3,494,735,958 instructions # 1.68 insn per cycle + 1.081309612 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4162) (512y: 4) (512z:44465) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index fc3929dd7c..e38e343007 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:17:07 +DATE: 2023-10-29_12:14:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.477554e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.512240e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.514677e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.484533e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.509488e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.511369e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.520333 sec - 2,274,021,507 cycles # 3.029 GHz - 3,581,381,947 instructions # 1.57 insn per cycle - 0.824092689 seconds time elapsed +TOTAL : 0.524873 sec + 2,222,686,446 cycles # 2.943 GHz + 3,555,209,130 instructions # 1.60 insn per cycle + 0.817130396 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.124375e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.158377e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.159762e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.129234e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.157079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.158259e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.045972 sec - 10,220,318,610 cycles # 3.084 GHz - 23,185,856,250 instructions # 2.27 insn per cycle - 3.370823025 seconds time elapsed +TOTAL : 3.022745 sec + 9,960,554,491 cycles # 3.045 GHz + 21,965,328,321 instructions # 2.21 insn per cycle + 3.330581683 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.960978e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.961962e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.961962e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.955541e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.956517e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.956517e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.372619 sec - 25,927,701,919 cycles # 3.096 GHz - 79,445,735,322 instructions # 3.06 insn per cycle - 8.378788971 seconds time elapsed +TOTAL : 8.395958 sec + 25,922,435,513 cycles # 3.087 GHz + 79,445,850,764 instructions # 3.06 insn per cycle + 8.400030562 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4857) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.770608e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.774090e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.774090e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.710113e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.713502e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.713502e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.359859 sec - 12,705,941,284 cycles # 2.912 GHz - 38,554,881,381 instructions # 3.03 insn per cycle - 4.371809091 seconds time elapsed +TOTAL : 4.429333 sec + 12,654,536,981 cycles # 2.855 GHz + 38,554,693,632 instructions # 3.05 insn per cycle + 4.433473344 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13161) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.729702e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.747261e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.747261e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.603195e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.621501e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.621501e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.887926 sec - 5,515,713,882 cycles # 2.915 GHz - 13,486,487,118 instructions # 2.45 insn per cycle - 1.900317335 seconds time elapsed +TOTAL : 1.915640 sec + 5,512,650,774 cycles # 2.872 GHz + 13,483,893,647 instructions # 2.45 insn per cycle + 1.920241775 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.908364e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.931425e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.931425e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.769210e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.792381e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.792381e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.665049 sec - 4,873,545,679 cycles # 2.921 GHz - 12,139,621,615 instructions # 2.49 insn per cycle - 1.681410579 seconds time elapsed +TOTAL : 1.687921 sec + 4,877,838,805 cycles # 2.884 GHz + 12,140,748,552 instructions # 2.49 insn per cycle + 1.692215253 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.696915e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.711299e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.711299e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.633090e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.647067e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.647067e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.140292 sec - 4,143,085,794 cycles # 1.932 GHz - 6,339,665,426 instructions # 1.53 insn per cycle - 2.150706545 seconds time elapsed +TOTAL : 2.157889 sec + 4,141,783,103 cycles # 1.917 GHz + 6,338,750,212 instructions # 1.53 insn per cycle + 2.161967975 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1802) (512y: 93) (512z: 9358) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index b6547b5838..d11dc95778 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-28_12:17:43 +DATE: 2023-10-29_12:15:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.464615e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.498511e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.500865e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.478559e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.503352e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.505753e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.519765 sec - 2,251,399,245 cycles # 2.999 GHz - 3,530,654,299 instructions # 1.57 insn per cycle - 0.823876763 seconds time elapsed +TOTAL : 0.520804 sec + 2,184,791,372 cycles # 2.905 GHz + 3,376,581,256 instructions # 1.55 insn per cycle + 0.811785754 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.123316e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.157417e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.158825e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.149150e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.177326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.178490e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.029825 sec - 10,142,198,468 cycles # 3.086 GHz - 22,026,892,679 instructions # 2.17 insn per cycle - 3.342406718 seconds time elapsed +TOTAL : 3.012932 sec + 9,968,846,939 cycles # 3.058 GHz + 20,627,018,072 instructions # 2.07 insn per cycle + 3.318612821 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.959558e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.960509e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.960509e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.961292e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.962230e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.962230e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.380125 sec - 25,929,437,882 cycles # 3.094 GHz - 79,456,770,369 instructions # 3.06 insn per cycle - 8.386665351 seconds time elapsed +TOTAL : 8.371207 sec + 25,929,706,357 cycles # 3.097 GHz + 79,453,596,939 instructions # 3.06 insn per cycle + 8.375366432 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4504) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.758484e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.761785e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.761785e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.718438e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.721949e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.721949e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.373624 sec - 12,641,537,268 cycles # 2.889 GHz - 38,525,722,075 instructions # 3.05 insn per cycle - 4.385176363 seconds time elapsed +TOTAL : 4.419301 sec + 12,657,926,561 cycles # 2.863 GHz + 38,526,904,585 instructions # 3.04 insn per cycle + 4.423427459 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:12928) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.515683e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.533176e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.533176e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.548085e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.565730e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.565730e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.935232 sec - 5,559,372,538 cycles # 2.867 GHz - 13,612,500,451 instructions # 2.45 insn per cycle - 1.948093977 seconds time elapsed +TOTAL : 1.927664 sec + 5,555,221,826 cycles # 2.877 GHz + 13,611,096,098 instructions # 2.45 insn per cycle + 1.931892207 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.732819e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.754541e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.754541e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.741019e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.763202e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.763202e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.694082 sec - 4,916,495,574 cycles # 2.895 GHz - 12,276,847,031 instructions # 2.50 insn per cycle - 1.703505035 seconds time elapsed +TOTAL : 1.692726 sec + 4,914,547,918 cycles # 2.897 GHz + 12,273,936,309 instructions # 2.50 insn per cycle + 1.696947655 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.657791e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.671798e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.671798e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.519554e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.534279e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.534279e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.151504 sec - 4,144,116,107 cycles # 1.923 GHz - 6,445,788,205 instructions # 1.56 insn per cycle - 2.162091115 seconds time elapsed +TOTAL : 2.190306 sec + 4,149,749,215 cycles # 1.893 GHz + 6,445,825,629 instructions # 1.55 insn per cycle + 2.194442795 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1627) (512y: 191) (512z: 9356) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index c6205ff247..09b6b6cbc2 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-28_12:19:21 +DATE: 2023-10-29_12:17:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.071870e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.072287e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.072394e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.070904e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.071339e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.071439e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.452254 sec - 8,420,557,360 cycles # 3.080 GHz - 18,911,118,625 instructions # 2.25 insn per cycle - 2.866211248 seconds time elapsed +TOTAL : 2.419329 sec + 8,401,022,008 cycles # 3.070 GHz + 18,446,465,487 instructions # 2.20 insn per cycle + 2.795741903 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.214887e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.217088e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.217267e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.274265e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.276198e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.276405e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.992517 sec - 13,332,757,802 cycles # 3.088 GHz - 28,702,527,926 instructions # 2.15 insn per cycle - 4.373029872 seconds time elapsed +TOTAL : 3.988228 sec + 13,257,455,915 cycles # 3.077 GHz + 28,856,097,279 instructions # 2.18 insn per cycle + 4.367813074 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.576463e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.576710e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.576710e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.432562e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.432797e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.432797e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.160763 sec - 18,791,392,134 cycles # 3.054 GHz - 53,919,306,390 instructions # 2.87 insn per cycle - 6.167140362 seconds time elapsed +TOTAL : 6.260779 sec + 18,791,566,046 cycles # 3.000 GHz + 53,915,354,209 instructions # 2.87 insn per cycle + 6.264697174 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.681047e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.681139e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.681139e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.653371e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.653459e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.653459e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.151993 sec - 9,791,713,411 cycles # 3.111 GHz - 27,094,036,390 instructions # 2.77 insn per cycle - 3.167590114 seconds time elapsed +TOTAL : 3.198531 sec + 9,852,246,030 cycles # 3.078 GHz + 27,093,859,659 instructions # 2.75 insn per cycle + 3.202672159 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.618906e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.619340e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.619340e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.591620e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.592034e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.592034e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.465759 sec - 4,246,522,704 cycles # 2.894 GHz - 9,563,071,696 instructions # 2.25 insn per cycle - 1.475977407 seconds time elapsed +TOTAL : 1.474879 sec + 4,234,752,993 cycles # 2.865 GHz + 9,561,163,315 instructions # 2.26 insn per cycle + 1.478749190 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.853446e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.853992e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.853992e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.887602e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.888172e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.888172e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.375453 sec - 3,787,170,163 cycles # 2.751 GHz - 8,487,038,074 instructions # 2.24 insn per cycle - 1.387462037 seconds time elapsed +TOTAL : 1.364222 sec + 3,707,284,516 cycles # 2.711 GHz + 8,485,408,067 instructions # 2.29 insn per cycle + 1.368451998 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.787651e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.788212e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.788212e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.751042e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.751588e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.751588e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.402148 sec - 2,691,943,688 cycles # 1.919 GHz - 4,274,166,135 instructions # 1.59 insn per cycle - 1.414716458 seconds time elapsed +TOTAL : 1.413671 sec + 2,691,576,824 cycles # 1.900 GHz + 4,272,725,454 instructions # 1.59 insn per cycle + 1.417549502 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 8107c36efe..4ed3f7613b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-28_12:57:41 +DATE: 2023-10-29_12:39:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.066347e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.067227e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.067227e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.066172e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.067055e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.067055e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.360600 sec - 8,279,327,450 cycles # 3.086 GHz - 16,492,477,688 instructions # 1.99 insn per cycle - 2.742127313 seconds time elapsed +TOTAL : 2.361362 sec + 8,249,792,379 cycles # 3.082 GHz + 18,714,988,961 instructions # 2.27 insn per cycle + 2.737283147 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.226166e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.258649e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.258649e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.194613e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.227944e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.227944e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.971227 sec - 13,317,272,497 cycles # 3.104 GHz - 28,133,706,677 instructions # 2.11 insn per cycle - 4.349584526 seconds time elapsed +TOTAL : 3.985972 sec + 13,161,697,114 cycles # 3.051 GHz + 27,638,979,539 instructions # 2.10 insn per cycle + 4.373152230 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.644722e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.644991e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.644991e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.218003e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.218227e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.218227e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.112405 sec - 18,789,653,851 cycles # 3.073 GHz - 53,915,458,578 instructions # 2.87 insn per cycle - 6.116157623 seconds time elapsed +TOTAL : 6.421574 sec + 18,985,323,768 cycles # 2.956 GHz + 53,918,389,087 instructions # 2.84 insn per cycle + 6.425486302 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -122,14 +122,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.677688e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.677784e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.677784e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.664145e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.664234e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.664234e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.152699 sec - 9,805,914,819 cycles # 3.107 GHz - 27,093,723,279 instructions # 2.76 insn per cycle - 3.156638918 seconds time elapsed +TOTAL : 3.177725 sec + 9,870,540,766 cycles # 3.103 GHz + 27,093,857,072 instructions # 2.74 insn per cycle + 3.181670599 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.603040e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.603470e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.603470e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.612290e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.612737e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.612737e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.470078 sec - 4,266,632,570 cycles # 2.896 GHz - 9,562,227,058 instructions # 2.24 insn per cycle - 1.474010313 seconds time elapsed +TOTAL : 1.466827 sec + 4,247,445,086 cycles # 2.892 GHz + 9,562,690,361 instructions # 2.25 insn per cycle + 1.470661148 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -176,14 +176,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.169608e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.170175e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.170175e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.128508e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.129106e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.129106e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.271323 sec - 3,724,193,335 cycles # 2.922 GHz - 8,486,137,795 instructions # 2.28 insn per cycle - 1.275148729 seconds time elapsed +TOTAL : 1.284387 sec + 3,711,655,436 cycles # 2.883 GHz + 8,486,499,784 instructions # 2.29 insn per cycle + 1.288300158 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -203,14 +203,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.791970e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.792558e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.792558e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.750791e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.751396e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.751396e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.397787 sec - 2,697,774,472 cycles # 1.926 GHz - 4,274,022,064 instructions # 1.58 insn per cycle - 1.401674381 seconds time elapsed +TOTAL : 1.413171 sec + 2,694,525,552 cycles # 1.903 GHz + 4,274,309,180 instructions # 1.59 insn per cycle + 1.417173229 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 71518cb1d4..3b451f2364 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-28_12:20:23 +DATE: 2023-10-29_12:18:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.068790e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.069191e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.069278e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.064594e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.064974e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.065068e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.445436 sec - 8,480,438,455 cycles # 3.100 GHz - 18,702,623,557 instructions # 2.21 insn per cycle - 2.842114287 seconds time elapsed +TOTAL : 2.421165 sec + 8,377,767,733 cycles # 3.057 GHz + 18,112,400,064 instructions # 2.16 insn per cycle + 2.797669036 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.244121e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.246349e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.246537e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.253070e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.254944e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.255199e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.982941 sec - 13,359,085,274 cycles # 3.097 GHz - 31,355,023,643 instructions # 2.35 insn per cycle - 4.372267861 seconds time elapsed +TOTAL : 3.994421 sec + 13,218,747,295 cycles # 3.060 GHz + 30,671,321,018 instructions # 2.32 insn per cycle + 4.375241473 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.736509e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.736803e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.736803e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.699023e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.699265e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.699265e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.052404 sec - 18,740,603,980 cycles # 3.097 GHz - 53,924,964,923 instructions # 2.88 insn per cycle - 6.058366205 seconds time elapsed +TOTAL : 6.076584 sec + 18,738,838,490 cycles # 3.082 GHz + 53,924,142,173 instructions # 2.88 insn per cycle + 6.080511022 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.673532e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.673621e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.673621e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.662035e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.662125e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.662125e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.163162 sec - 9,814,413,597 cycles # 3.103 GHz - 27,091,104,327 instructions # 2.76 insn per cycle - 3.176882467 seconds time elapsed +TOTAL : 3.182171 sec + 9,798,645,272 cycles # 3.077 GHz + 27,091,008,920 instructions # 2.76 insn per cycle + 3.186200458 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.627999e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.628425e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.628425e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.595306e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.595723e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.595723e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.460351 sec - 4,255,793,582 cycles # 2.906 GHz - 9,562,618,143 instructions # 2.25 insn per cycle - 1.470731981 seconds time elapsed +TOTAL : 1.473366 sec + 4,252,117,926 cycles # 2.880 GHz + 9,561,777,933 instructions # 2.25 insn per cycle + 1.477305688 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.164521e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.165096e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.165096e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.146612e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.147215e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.147215e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.274408 sec - 3,702,081,595 cycles # 2.900 GHz - 8,486,823,882 instructions # 2.29 insn per cycle - 1.284572895 seconds time elapsed +TOTAL : 1.278368 sec + 3,696,184,702 cycles # 2.884 GHz + 8,485,068,150 instructions # 2.30 insn per cycle + 1.282315101 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.775460e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.776104e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.776104e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.746863e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.747439e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.747439e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.408391 sec - 2,699,143,093 cycles # 1.918 GHz - 4,277,591,937 instructions # 1.58 insn per cycle - 1.420662603 seconds time elapsed +TOTAL : 1.414600 sec + 2,691,872,612 cycles # 1.899 GHz + 4,276,070,886 instructions # 1.59 insn per cycle + 1.418491985 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 9dc310742d..8847407ea7 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-28_12:21:25 +DATE: 2023-10-29_12:19:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.766191e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.767057e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.767366e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.748205e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.749064e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.749368e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.692928 sec - 5,925,290,822 cycles # 3.058 GHz - 11,860,098,541 instructions # 2.00 insn per cycle - 2.048557571 seconds time elapsed +TOTAL : 1.657323 sec + 5,883,101,931 cycles # 3.046 GHz + 12,231,013,529 instructions # 2.08 insn per cycle + 1.988512157 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.317262e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.318069e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.318159e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.320925e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.321576e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.321673e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.922569 sec - 6,794,142,356 cycles # 3.085 GHz - 14,404,197,197 instructions # 2.12 insn per cycle - 2.259188183 seconds time elapsed +TOTAL : 1.909241 sec + 6,669,861,142 cycles # 3.055 GHz + 14,651,528,210 instructions # 2.20 insn per cycle + 2.242162305 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.093620e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.093900e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.093900e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.053372e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.053641e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.053641e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.812755 sec - 17,848,464,955 cycles # 3.071 GHz - 53,589,690,094 instructions # 3.00 insn per cycle - 5.819102168 seconds time elapsed +TOTAL : 5.836144 sec + 17,844,888,779 cycles # 3.057 GHz + 53,590,137,383 instructions # 3.00 insn per cycle + 5.839998800 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.603564e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.604045e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.604045e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.519009e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.519443e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.519443e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.474993 sec - 4,564,620,966 cycles # 3.098 GHz - 13,764,219,630 instructions # 3.02 insn per cycle - 1.487863434 seconds time elapsed +TOTAL : 1.504216 sec + 4,560,264,533 cycles # 3.025 GHz + 13,762,435,708 instructions # 3.02 insn per cycle + 1.508179950 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.193712e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.195612e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.195612e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.166323e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.168014e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.168014e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.740890 sec - 2,143,163,497 cycles # 2.884 GHz - 4,818,517,443 instructions # 2.25 insn per cycle - 0.752590497 seconds time elapsed +TOTAL : 0.741671 sec + 2,140,718,629 cycles # 2.873 GHz + 4,816,751,289 instructions # 2.25 insn per cycle + 0.745632802 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.737224e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.739389e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.739389e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.250142e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.252420e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.252420e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.690292 sec - 1,933,646,307 cycles # 2.799 GHz - 4,275,590,953 instructions # 2.21 insn per cycle - 0.703133859 seconds time elapsed +TOTAL : 0.645201 sec + 1,871,116,487 cycles # 2.886 GHz + 4,273,766,880 instructions # 2.28 insn per cycle + 0.649139975 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.583424e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.585692e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.585692e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.519444e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.521804e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.521804e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.704935 sec - 1,355,788,663 cycles # 1.924 GHz - 2,160,286,717 instructions # 1.59 insn per cycle - 0.718020553 seconds time elapsed +TOTAL : 0.707511 sec + 1,353,512,664 cycles # 1.905 GHz + 2,158,758,417 instructions # 1.59 insn per cycle + 0.711344847 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index b2fb322b17..413ba84a91 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-28_12:58:43 +DATE: 2023-10-29_12:40:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.797057e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.798838e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.798838e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.801150e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.802938e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.802938e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.588978 sec - 5,768,872,372 cycles # 3.086 GHz - 12,418,111,463 instructions # 2.15 insn per cycle - 1.925967720 seconds time elapsed +TOTAL : 1.589274 sec + 5,679,053,908 cycles # 3.038 GHz + 12,232,335,932 instructions # 2.15 insn per cycle + 1.926217189 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.292423e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.304760e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.304760e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.293744e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.306424e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.306424e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.888734 sec - 6,669,230,038 cycles # 3.083 GHz - 13,356,652,627 instructions # 2.00 insn per cycle - 2.222852759 seconds time elapsed +TOTAL : 1.865258 sec + 6,453,622,607 cycles # 3.013 GHz + 13,520,931,367 instructions # 2.10 insn per cycle + 2.202906400 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.069650e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.069942e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.069942e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.142136e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.142409e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.142409e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.826523 sec - 17,892,365,561 cycles # 3.071 GHz - 53,591,944,867 instructions # 3.00 insn per cycle - 5.830371536 seconds time elapsed +TOTAL : 5.780398 sec + 17,852,413,963 cycles # 3.087 GHz + 53,589,586,977 instructions # 3.00 insn per cycle + 5.784180836 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe @@ -122,14 +122,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.615160e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.615596e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.615596e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.602113e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.602529e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.602529e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.465426 sec - 4,558,917,635 cycles # 3.104 GHz - 13,763,183,900 instructions # 3.02 insn per cycle - 1.469300644 seconds time elapsed +TOTAL : 1.471274 sec + 4,560,991,212 cycles # 3.093 GHz + 13,763,276,208 instructions # 3.02 insn per cycle + 1.475196467 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.255421e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.257318e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.257318e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.236655e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.238396e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.238396e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.732719 sec - 2,140,098,411 cycles # 2.908 GHz - 4,817,587,358 instructions # 2.25 insn per cycle - 0.736572783 seconds time elapsed +TOTAL : 0.735565 sec + 2,141,109,995 cycles # 2.900 GHz + 4,817,785,277 instructions # 2.25 insn per cycle + 0.739384819 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe @@ -176,14 +176,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.330916e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.333248e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.333248e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.288287e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.290822e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.290822e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.639274 sec - 1,869,539,608 cycles # 2.909 GHz - 4,274,774,583 instructions # 2.29 insn per cycle - 0.643187647 seconds time elapsed +TOTAL : 0.641860 sec + 1,869,918,713 cycles # 2.899 GHz + 4,274,756,971 instructions # 2.29 insn per cycle + 0.645746716 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe @@ -203,14 +203,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.637115e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.639420e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.639420e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.520397e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.522901e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.522901e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.697331 sec - 1,353,326,359 cycles # 1.932 GHz - 2,159,426,349 instructions # 1.60 insn per cycle - 0.701134241 seconds time elapsed +TOTAL : 0.708206 sec + 1,353,775,636 cycles # 1.903 GHz + 2,159,770,339 instructions # 1.60 insn per cycle + 0.712072895 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 6397113aec..ebccc0c834 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-28_12:22:12 +DATE: 2023-10-29_12:20:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.773318e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.774160e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.774427e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.753299e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.754100e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.754395e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.691164 sec - 5,903,886,418 cycles # 3.060 GHz - 12,160,870,690 instructions # 2.06 insn per cycle - 2.042006435 seconds time elapsed +TOTAL : 1.655949 sec + 5,893,067,214 cycles # 3.058 GHz + 12,179,862,167 instructions # 2.07 insn per cycle + 1.986764530 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.324828e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.325611e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.325701e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.323799e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.324454e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.324538e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.915037 sec - 6,720,144,465 cycles # 3.071 GHz - 13,982,732,338 instructions # 2.08 insn per cycle - 2.247579041 seconds time elapsed +TOTAL : 1.911647 sec + 6,676,941,639 cycles # 3.054 GHz + 14,605,352,455 instructions # 2.19 insn per cycle + 2.243360479 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.040248e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.040519e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.040519e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.994054e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.994324e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.994324e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.845855 sec - 17,904,729,960 cycles # 3.064 GHz - 53,581,464,420 instructions # 2.99 insn per cycle - 5.852779763 seconds time elapsed +TOTAL : 5.872133 sec + 17,902,279,003 cycles # 3.048 GHz + 53,581,437,653 instructions # 2.99 insn per cycle + 5.876005280 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20206) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.592707e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.593153e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.593153e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.562387e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.562823e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.562823e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.477239 sec - 4,555,009,734 cycles # 3.081 GHz - 13,757,278,633 instructions # 3.02 insn per cycle - 1.489285896 seconds time elapsed +TOTAL : 1.486270 sec + 4,581,432,807 cycles # 3.076 GHz + 13,755,740,802 instructions # 3.00 insn per cycle + 1.490385140 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.103208e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.104897e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.104897e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.182291e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.184034e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.184034e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.750606 sec - 2,150,954,766 cycles # 2.860 GHz - 4,820,576,058 instructions # 2.24 insn per cycle - 0.762790911 seconds time elapsed +TOTAL : 0.739624 sec + 2,146,683,166 cycles # 2.889 GHz + 4,819,009,119 instructions # 2.24 insn per cycle + 0.743638234 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.251964e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.254359e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.254359e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.147676e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.149876e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.149876e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.646435 sec - 1,876,373,828 cycles # 2.895 GHz - 4,277,306,801 instructions # 2.28 insn per cycle - 0.658084791 seconds time elapsed +TOTAL : 0.652733 sec + 1,877,606,148 cycles # 2.862 GHz + 4,275,907,242 instructions # 2.28 insn per cycle + 0.656694870 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.508540e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.511102e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.511102e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.261873e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.264108e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.264108e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.711485 sec - 1,359,019,145 cycles # 1.910 GHz - 2,166,493,278 instructions # 1.59 insn per cycle - 0.722274294 seconds time elapsed +TOTAL : 0.733532 sec + 1,359,235,476 cycles # 1.844 GHz + 2,164,942,486 instructions # 1.59 insn per cycle + 0.737505632 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3475) (512y: 34) (512z:79492) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 9f2163d6ce..e0163c34ba 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-28_12:22:58 +DATE: 2023-10-29_12:21:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.691460e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.691972e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.692221e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.683760e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.684333e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.684498e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.212922 sec - 7,535,076,445 cycles # 3.027 GHz - 16,467,490,131 instructions # 2.19 insn per cycle - 2.613325869 seconds time elapsed +TOTAL : 2.167476 sec + 7,602,719,129 cycles # 3.057 GHz + 15,576,720,436 instructions # 2.05 insn per cycle + 2.543599173 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.113760e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.114078e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.114109e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.112366e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.112635e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112665e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.392868 sec - 11,401,569,365 cycles # 3.066 GHz - 26,441,417,037 instructions # 2.32 insn per cycle - 3.775155527 seconds time elapsed +TOTAL : 3.396701 sec + 11,407,687,365 cycles # 3.067 GHz + 25,837,749,358 instructions # 2.26 insn per cycle + 3.776238208 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.934954e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.935169e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.935169e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.003883e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.004091e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.004091e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.660710 sec - 19,098,835,520 cycles # 2.868 GHz - 54,153,520,068 instructions # 2.84 insn per cycle - 6.666941649 seconds time elapsed +TOTAL : 6.602461 sec + 19,113,434,872 cycles # 2.894 GHz + 54,152,340,955 instructions # 2.83 insn per cycle + 6.606261064 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32066) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.618452e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.618538e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.618538e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.622659e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.622745e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.622745e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.267639 sec - 9,481,668,630 cycles # 2.899 GHz - 26,160,746,738 instructions # 2.76 insn per cycle - 3.280820285 seconds time elapsed +TOTAL : 3.260329 sec + 9,430,303,290 cycles # 2.890 GHz + 26,158,904,223 instructions # 2.77 insn per cycle + 3.264308908 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96005) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.802442e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.802928e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.802928e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.791270e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.791761e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.791761e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.394161 sec - 4,059,860,002 cycles # 2.904 GHz - 9,228,244,008 instructions # 2.27 insn per cycle - 1.405054842 seconds time elapsed +TOTAL : 1.398894 sec + 4,041,947,049 cycles # 2.883 GHz + 9,227,684,782 instructions # 2.28 insn per cycle + 1.402820243 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.378380e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.378995e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.378995e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.375691e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.376429e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.376429e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.212246 sec - 3,518,495,788 cycles # 2.894 GHz - 8,175,753,097 instructions # 2.32 insn per cycle - 1.226324202 seconds time elapsed +TOTAL : 1.212124 sec + 3,516,816,252 cycles # 2.893 GHz + 8,175,115,382 instructions # 2.32 insn per cycle + 1.216050239 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.812511e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.813101e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.813101e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.662873e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.663431e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.663431e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.394533 sec - 2,662,670,072 cycles # 1.910 GHz - 4,156,543,307 instructions # 1.56 insn per cycle - 1.409486843 seconds time elapsed +TOTAL : 1.445998 sec + 2,655,437,043 cycles # 1.832 GHz + 4,154,909,564 instructions # 1.56 insn per cycle + 1.450034641 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2045) (512y: 93) (512z:78760) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 1ac764e4d9..dae5283598 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-28_12:23:59 +DATE: 2023-10-29_12:22:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.681933e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.682714e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.682894e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.683591e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.684087e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.684212e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.176043 sec - 7,528,195,044 cycles # 3.013 GHz - 15,513,498,355 instructions # 2.06 insn per cycle - 2.554313698 seconds time elapsed +TOTAL : 2.168416 sec + 7,615,842,573 cycles # 3.061 GHz + 16,645,936,739 instructions # 2.19 insn per cycle + 2.545194192 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProce Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.110886e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.111202e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.111234e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.109020e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.109288e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.109323e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.404045 sec - 11,460,907,535 cycles # 3.069 GHz - 25,656,367,525 instructions # 2.24 insn per cycle - 3.791106013 seconds time elapsed +TOTAL : 3.403324 sec + 11,443,355,704 cycles # 3.063 GHz + 26,118,564,830 instructions # 2.28 insn per cycle + 3.793005998 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.984019e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.984276e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.984276e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.020350e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.020572e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.020572e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.618927 sec - 19,066,354,137 cycles # 2.879 GHz - 54,153,376,772 instructions # 2.84 insn per cycle - 6.622902376 seconds time elapsed +TOTAL : 6.587484 sec + 19,062,995,670 cycles # 2.892 GHz + 54,153,638,640 instructions # 2.84 insn per cycle + 6.591376376 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32243) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.628425e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.628513e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.628513e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.630436e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.630523e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.630523e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.247465 sec - 9,407,247,603 cycles # 2.894 GHz - 26,078,265,911 instructions # 2.77 insn per cycle - 3.251522981 seconds time elapsed +TOTAL : 3.243832 sec + 9,385,283,319 cycles # 2.891 GHz + 26,078,617,665 instructions # 2.78 insn per cycle + 3.247970890 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:95899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.777947e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.778467e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.778467e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.765682e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.766144e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.766144e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.402798 sec - 4,072,847,386 cycles # 2.896 GHz - 9,213,321,159 instructions # 2.26 insn per cycle - 1.406857687 seconds time elapsed +TOTAL : 1.407556 sec + 4,070,347,952 cycles # 2.885 GHz + 9,213,853,616 instructions # 2.26 insn per cycle + 1.411538882 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.351083e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.351704e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.351704e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.329937e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.330543e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.330543e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.217933 sec - 3,539,688,188 cycles # 2.898 GHz - 8,168,083,097 instructions # 2.31 insn per cycle - 1.221962472 seconds time elapsed +TOTAL : 1.224392 sec + 3,539,648,643 cycles # 2.883 GHz + 8,168,030,339 instructions # 2.31 insn per cycle + 1.228230843 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.868457e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.869081e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.869081e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.704586e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.705182e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.705182e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.370226 sec - 2,622,254,330 cycles # 1.910 GHz - 4,153,294,010 instructions # 1.58 insn per cycle - 1.374355989 seconds time elapsed +TOTAL : 1.429754 sec + 2,619,590,928 cycles # 1.828 GHz + 4,153,526,089 instructions # 1.59 insn per cycle + 1.433808648 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1492) (512y: 175) (512z:78776) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index f85b83afc5..0e5cd865f7 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_11:11:52 +DATE: 2023-10-29_12:15:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.425505e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.236435e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.657072e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.897731e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.298032e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.609140e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.458189 sec - 1,993,657,564 cycles # 3.003 GHz - 2,804,107,618 instructions # 1.41 insn per cycle - 0.733844419 seconds time elapsed +TOTAL : 0.443157 sec + 1,968,673,062 cycles # 2.985 GHz + 2,764,263,671 instructions # 1.40 insn per cycle + 0.716925102 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.249352e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.087153e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.509076e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.626806e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.173467e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.522031e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.529582 sec - 2,292,930,937 cycles # 2.997 GHz - 3,293,249,585 instructions # 1.44 insn per cycle - 0.824142515 seconds time elapsed +TOTAL : 0.519132 sec + 2,252,678,411 cycles # 2.998 GHz + 3,262,908,846 instructions # 1.45 insn per cycle + 0.808982043 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.094181e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.117189e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.117189e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.098247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.121223e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.121223e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.519578 sec - 4,700,803,105 cycles # 3.087 GHz - 13,467,820,238 instructions # 2.87 insn per cycle - 1.523730874 seconds time elapsed +TOTAL : 1.513208 sec + 4,692,610,893 cycles # 3.095 GHz + 13,466,756,564 instructions # 2.87 insn per cycle + 1.517124584 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.988328e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.062679e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.062679e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.969129e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.042422e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.042422e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.845077 sec - 2,624,090,994 cycles # 3.092 GHz - 7,555,318,399 instructions # 2.88 insn per cycle - 0.849297883 seconds time elapsed +TOTAL : 0.853122 sec + 2,623,104,335 cycles # 3.061 GHz + 7,555,431,139 instructions # 2.88 insn per cycle + 0.857560586 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.381886e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.602535e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.602535e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.367757e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.586104e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.586104e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.505903 sec - 1,489,612,723 cycles # 2.924 GHz - 3,122,146,984 instructions # 2.10 insn per cycle - 0.510105162 seconds time elapsed +TOTAL : 0.507811 sec + 1,480,043,609 cycles # 2.894 GHz + 3,121,534,274 instructions # 2.11 insn per cycle + 0.511995434 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.752139e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.021247e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.021247e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.732684e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.002593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.002593e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.458220 sec - 1,341,657,342 cycles # 2.905 GHz - 2,984,216,057 instructions # 2.22 insn per cycle - 0.462365340 seconds time elapsed +TOTAL : 0.460313 sec + 1,342,434,609 cycles # 2.895 GHz + 2,983,928,435 instructions # 2.22 insn per cycle + 0.464435892 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.580684e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.707250e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.707250e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.310589e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.416870e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.416870e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.658521 sec - 1,326,321,680 cycles # 2.003 GHz - 1,955,661,971 instructions # 1.47 insn per cycle - 0.662818386 seconds time elapsed +TOTAL : 0.733968 sec + 1,333,261,128 cycles # 1.822 GHz + 1,957,621,712 instructions # 1.47 insn per cycle + 0.738175276 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index bb40575287..9471c8d9c9 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_11:13:59 +DATE: 2023-10-29_12:38:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.499145e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.000526e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.000526e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.706674e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.257120e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.257120e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.471660 sec - 2,056,274,121 cycles # 3.004 GHz - 3,032,901,904 instructions # 1.47 insn per cycle - 0.741611497 seconds time elapsed +TOTAL : 0.467849 sec + 2,076,528,325 cycles # 3.007 GHz + 3,074,178,340 instructions # 1.48 insn per cycle + 0.747915410 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.137166e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.151984e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.151984e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.385006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.385786e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.385786e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.782426 sec - 3,108,804,978 cycles # 2.985 GHz - 4,639,704,366 instructions # 1.49 insn per cycle - 1.102099164 seconds time elapsed +TOTAL : 0.735465 sec + 2,965,430,587 cycles # 3.028 GHz + 4,581,128,506 instructions # 1.54 insn per cycle + 1.038667246 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,14 +95,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.096327e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.119247e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.119247e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.095462e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.118569e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.118569e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.521465 sec - 4,727,662,680 cycles # 3.100 GHz - 13,471,889,912 instructions # 2.85 insn per cycle - 1.525740740 seconds time elapsed +TOTAL : 1.523268 sec + 4,722,842,839 cycles # 3.094 GHz + 13,474,034,086 instructions # 2.85 insn per cycle + 1.527549837 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe @@ -122,14 +122,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.976939e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.052328e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.052328e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.962183e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.036409e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.036409e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.856489 sec - 2,662,267,830 cycles # 3.095 GHz - 7,603,678,621 instructions # 2.86 insn per cycle - 0.860905365 seconds time elapsed +TOTAL : 0.861797 sec + 2,667,534,331 cycles # 3.083 GHz + 7,603,405,381 instructions # 2.85 insn per cycle + 0.866054368 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.387239e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.609264e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.609264e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.140932e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.345844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.345844e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.512281 sec - 1,513,864,705 cycles # 2.932 GHz - 3,172,887,101 instructions # 2.10 insn per cycle - 0.516813268 seconds time elapsed +TOTAL : 0.550990 sec + 1,514,470,383 cycles # 2.730 GHz + 3,172,744,661 instructions # 2.09 insn per cycle + 0.555284561 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe @@ -176,14 +176,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.731800e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.003313e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.003313e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.699108e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.964542e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.964542e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.467919 sec - 1,378,999,909 cycles # 2.923 GHz - 3,034,843,091 instructions # 2.20 insn per cycle - 0.472342300 seconds time elapsed +TOTAL : 0.470442 sec + 1,374,160,602 cycles # 2.900 GHz + 3,033,106,438 instructions # 2.21 insn per cycle + 0.474588436 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe @@ -203,14 +203,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.551387e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.681149e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.681149e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.542150e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.667124e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.667124e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.673053 sec - 1,359,798,395 cycles # 2.009 GHz - 1,995,251,436 instructions # 1.47 insn per cycle - 0.677423923 seconds time elapsed +TOTAL : 0.674855 sec + 1,357,449,990 cycles # 2.000 GHz + 1,995,572,075 instructions # 1.47 insn per cycle + 0.679206405 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 0c2864af4d..8747c162f1 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_11:12:11 +DATE: 2023-10-29_12:16:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.374698e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.066567e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.458475e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.847927e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.169090e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.488808e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.443172 sec - 1,984,389,740 cycles # 3.008 GHz - 2,813,702,209 instructions # 1.42 insn per cycle - 0.717001254 seconds time elapsed +TOTAL : 0.441061 sec + 1,959,960,145 cycles # 3.006 GHz + 2,776,169,145 instructions # 1.42 insn per cycle + 0.710628939 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.243499e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.022510e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.434945e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.599303e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.073860e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.414334e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.528031 sec - 2,306,782,100 cycles # 3.014 GHz - 3,293,790,173 instructions # 1.43 insn per cycle - 0.823170146 seconds time elapsed +TOTAL : 0.517772 sec + 2,256,780,305 cycles # 3.011 GHz + 3,226,937,079 instructions # 1.43 insn per cycle + 0.806382487 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.082757e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.105146e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.105146e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.093753e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.116419e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.116419e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.535031 sec - 4,702,151,242 cycles # 3.056 GHz - 13,460,848,480 instructions # 2.86 insn per cycle - 1.539172370 seconds time elapsed +TOTAL : 1.519650 sec + 4,717,083,625 cycles # 3.097 GHz + 13,460,945,405 instructions # 2.85 insn per cycle + 1.523612017 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.975479e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.051003e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.051003e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.983108e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.057211e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.057211e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.850537 sec - 2,627,273,138 cycles # 3.077 GHz - 7,554,652,083 instructions # 2.88 insn per cycle - 0.854779657 seconds time elapsed +TOTAL : 0.846574 sec + 2,623,843,957 cycles # 3.087 GHz + 7,554,608,982 instructions # 2.88 insn per cycle + 0.850719743 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3088) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.403057e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.631167e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.631167e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.252830e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.467198e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.467198e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.503108 sec - 1,479,372,046 cycles # 2.921 GHz - 3,120,535,969 instructions # 2.11 insn per cycle - 0.507264468 seconds time elapsed +TOTAL : 0.525510 sec + 1,480,793,679 cycles # 2.798 GHz + 3,120,328,557 instructions # 2.11 insn per cycle + 0.529923190 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.783590e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.056952e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.056952e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.775976e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.054371e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.054371e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.454143 sec - 1,339,187,575 cycles # 2.928 GHz - 2,981,219,136 instructions # 2.23 insn per cycle - 0.458255982 seconds time elapsed +TOTAL : 0.455057 sec + 1,340,948,697 cycles # 2.924 GHz + 2,981,199,299 instructions # 2.22 insn per cycle + 0.459129279 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.579822e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.705158e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.705158e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.565163e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.688364e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.688364e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.658524 sec - 1,326,723,838 cycles # 2.004 GHz - 1,953,998,504 instructions # 1.47 insn per cycle - 0.662655722 seconds time elapsed +TOTAL : 0.661815 sec + 1,324,894,233 cycles # 1.992 GHz + 1,953,995,087 instructions # 1.47 insn per cycle + 0.665958752 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 4efa63c73f..56c392a1e3 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_11:12:28 +DATE: 2023-10-29_12:16:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.846056e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.190837e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.345223e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.856448e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.239366e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.361186e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.441885 sec - 1,964,981,045 cycles # 3.008 GHz - 2,777,977,923 instructions # 1.41 insn per cycle - 0.712518260 seconds time elapsed +TOTAL : 0.433879 sec + 1,947,838,564 cycles # 3.019 GHz + 2,721,423,956 instructions # 1.40 insn per cycle + 0.702482263 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.228241e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.808928e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.953110e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.083707e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.835194e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.957264e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.474858 sec - 2,136,444,467 cycles # 3.019 GHz - 3,023,304,032 instructions # 1.42 insn per cycle - 0.764875305 seconds time elapsed +TOTAL : 0.468146 sec + 2,081,746,126 cycles # 3.012 GHz + 2,978,581,699 instructions # 1.43 insn per cycle + 0.748245258 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.162214e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188615e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.188615e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.159896e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.185810e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185810e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.429811 sec - 4,456,320,748 cycles # 3.110 GHz - 13,051,985,828 instructions # 2.93 insn per cycle - 1.433732093 seconds time elapsed +TOTAL : 1.432261 sec + 4,452,339,242 cycles # 3.102 GHz + 13,052,140,755 instructions # 2.93 insn per cycle + 1.436130562 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.111752e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.309723e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.309723e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.091620e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.287273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.287273e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.545843 sec - 1,699,369,970 cycles # 3.094 GHz - 4,514,916,171 instructions # 2.66 insn per cycle - 0.549893471 seconds time elapsed +TOTAL : 0.549197 sec + 1,699,183,684 cycles # 3.076 GHz + 4,515,090,875 instructions # 2.66 insn per cycle + 0.553151524 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.097094e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.867963e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.867963e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.108514e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.877352e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.877352e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.288960 sec - 851,373,452 cycles # 2.910 GHz - 1,898,371,980 instructions # 2.23 insn per cycle - 0.293051317 seconds time elapsed +TOTAL : 0.288021 sec + 852,159,000 cycles # 2.923 GHz + 1,898,157,553 instructions # 2.23 insn per cycle + 0.292076560 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.518614e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.412299e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.412299e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.087116e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.904152e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.904152e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.271412 sec - 800,455,858 cycles # 2.911 GHz - 1,821,467,150 instructions # 2.28 insn per cycle - 0.275547796 seconds time elapsed +TOTAL : 0.289840 sec + 799,954,134 cycles # 2.726 GHz + 1,821,377,935 instructions # 2.28 insn per cycle + 0.293914031 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.880135e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.364108e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.364108e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.880710e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.371257e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.371257e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.357728 sec - 738,502,166 cycles # 2.046 GHz - 1,305,590,914 instructions # 1.77 insn per cycle - 0.361826347 seconds time elapsed +TOTAL : 0.357485 sec + 738,787,686 cycles # 2.048 GHz + 1,305,534,766 instructions # 1.77 insn per cycle + 0.361510002 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index b3d6f455ab..414cfe1d51 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_11:14:17 +DATE: 2023-10-29_12:38:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -48,14 +48,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.590965e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.773915e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.773915e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.627054e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.270003e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.270003e+07 ) sec^-1 MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 -TOTAL : 0.450702 sec - 1,994,471,727 cycles # 3.019 GHz - 2,920,362,057 instructions # 1.46 insn per cycle - 0.717654721 seconds time elapsed +TOTAL : 0.449910 sec + 1,996,839,241 cycles # 3.004 GHz + 2,941,700,759 instructions # 1.47 insn per cycle + 0.722133966 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -72,14 +72,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.807015e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.180092e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.180092e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.307972e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.870443e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.870443e+07 ) sec^-1 MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 -TOTAL : 0.639942 sec - 2,501,192,232 cycles # 2.864 GHz - 3,760,146,551 instructions # 1.50 insn per cycle - 0.930803843 seconds time elapsed +TOTAL : 0.613187 sec + 2,525,944,425 cycles # 3.007 GHz + 3,862,520,366 instructions # 1.53 insn per cycle + 0.898879565 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,14 +95,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.163010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.189128e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.189128e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.090941e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.116413e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.116413e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.432348 sec - 4,468,926,407 cycles # 3.113 GHz - 13,056,471,140 instructions # 2.92 insn per cycle - 1.436449099 seconds time elapsed +TOTAL : 1.527051 sec + 4,478,023,823 cycles # 2.926 GHz + 13,056,814,715 instructions # 2.92 insn per cycle + 1.531327534 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe @@ -122,14 +122,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.101496e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.298069e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.298069e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.003695e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.196349e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.196349e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.551945 sec - 1,720,858,195 cycles # 3.099 GHz - 4,563,075,078 instructions # 2.65 insn per cycle - 0.556069064 seconds time elapsed +TOTAL : 0.569949 sec + 1,724,326,509 cycles # 3.007 GHz + 4,563,513,018 instructions # 2.65 insn per cycle + 0.574110319 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe @@ -149,14 +149,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.886684e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.625652e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.625652e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.019587e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.777056e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.777056e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.303879 sec - 878,433,693 cycles # 2.855 GHz - 1,935,192,566 instructions # 2.20 insn per cycle - 0.308372436 seconds time elapsed +TOTAL : 0.296032 sec + 869,572,713 cycles # 2.903 GHz + 1,935,510,451 instructions # 2.23 insn per cycle + 0.300055410 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe @@ -176,14 +176,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.388392e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.262834e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.262834e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.482654e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.361988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.361988e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.280915 sec - 821,362,202 cycles # 2.888 GHz - 1,858,402,043 instructions # 2.26 insn per cycle - 0.285072483 seconds time elapsed +TOTAL : 0.276219 sec + 818,806,491 cycles # 2.927 GHz + 1,858,442,685 instructions # 2.27 insn per cycle + 0.280314394 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe @@ -203,14 +203,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.870968e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.352273e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.352273e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.841697e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.318140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.318140e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.362591 sec - 758,880,170 cycles # 2.073 GHz - 1,347,085,517 instructions # 1.78 insn per cycle - 0.366695100 seconds time elapsed +TOTAL : 0.363863 sec + 758,289,883 cycles # 2.064 GHz + 1,347,291,326 instructions # 1.78 insn per cycle + 0.368075455 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 6315058ab2..52c7867ee8 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_11:12:44 +DATE: 2023-10-29_12:16:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.702162e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.156083e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.311905e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.794391e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.222395e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.344532e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.440240 sec - 1,914,660,193 cycles # 2.927 GHz - 2,640,669,055 instructions # 1.38 insn per cycle - 0.710962903 seconds time elapsed +TOTAL : 0.436791 sec + 1,938,256,203 cycles # 2.998 GHz + 2,764,073,446 instructions # 1.43 insn per cycle + 0.705965229 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.185142e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.784341e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.927350e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.996745e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.794315e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910293e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.478204 sec - 2,109,342,162 cycles # 3.006 GHz - 2,990,715,659 instructions # 1.42 insn per cycle - 0.761391105 seconds time elapsed +TOTAL : 0.470118 sec + 2,065,714,326 cycles # 2.989 GHz + 2,993,944,987 instructions # 1.45 insn per cycle + 0.749384322 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.160687e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.186899e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.186899e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.155710e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181692e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.181692e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.431367 sec - 4,453,449,929 cycles # 3.105 GHz - 13,033,473,248 instructions # 2.93 insn per cycle - 1.435338813 seconds time elapsed +TOTAL : 1.438479 sec + 4,451,382,506 cycles # 3.089 GHz + 13,033,002,507 instructions # 2.93 insn per cycle + 1.442396317 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.119834e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.322376e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.322376e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.114475e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.315383e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.315383e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.544693 sec - 1,689,425,653 cycles # 3.083 GHz - 4,511,028,047 instructions # 2.67 insn per cycle - 0.548780764 seconds time elapsed +TOTAL : 0.545728 sec + 1,690,815,772 cycles # 3.078 GHz + 4,510,987,679 instructions # 2.67 insn per cycle + 0.549857945 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.102572e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.869486e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.869486e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.071384e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.843278e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.843278e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.287991 sec - 850,946,422 cycles # 2.921 GHz - 1,895,366,615 instructions # 2.23 insn per cycle - 0.292004201 seconds time elapsed +TOTAL : 0.289313 sec + 852,077,910 cycles # 2.911 GHz + 1,895,413,833 instructions # 2.22 insn per cycle + 0.293386594 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.550351e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.453270e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.453270e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.491951e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.379361e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.379361e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.269823 sec - 800,276,038 cycles # 2.930 GHz - 1,817,389,395 instructions # 2.27 insn per cycle - 0.273798599 seconds time elapsed +TOTAL : 0.271830 sec + 799,281,655 cycles # 2.905 GHz + 1,817,541,912 instructions # 2.27 insn per cycle + 0.275768147 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.882771e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.367359e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.367359e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.843043e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.315522e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.315522e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.357346 sec - 740,492,059 cycles # 2.052 GHz - 1,303,318,805 instructions # 1.76 insn per cycle - 0.361466844 seconds time elapsed +TOTAL : 0.359842 sec + 739,487,192 cycles # 2.037 GHz + 1,303,203,838 instructions # 1.76 insn per cycle + 0.363738116 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1932) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 61f818ffd3..055869d4c7 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_11:13:00 +DATE: 2023-10-29_12:16:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.422535e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.222834e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.641036e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.898492e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.300415e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.619193e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.446535 sec - 1,997,383,761 cycles # 2.995 GHz - 2,799,829,441 instructions # 1.40 insn per cycle - 0.723708714 seconds time elapsed +TOTAL : 0.441230 sec + 1,951,511,543 cycles # 2.990 GHz + 2,730,665,273 instructions # 1.40 insn per cycle + 0.711120315 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.250735e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.126800e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.544299e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.638754e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.191258e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.545080e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.531299 sec - 2,306,027,111 cycles # 3.007 GHz - 3,282,970,610 instructions # 1.42 insn per cycle - 0.825811359 seconds time elapsed +TOTAL : 0.517201 sec + 2,223,252,021 cycles # 2.965 GHz + 3,157,515,011 instructions # 1.42 insn per cycle + 0.806918695 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.093896e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.116488e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.116488e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.066720e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.089280e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089280e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.519670 sec - 4,721,504,023 cycles # 3.100 GHz - 13,469,498,246 instructions # 2.85 insn per cycle - 1.523736357 seconds time elapsed +TOTAL : 1.558049 sec + 4,723,939,840 cycles # 3.026 GHz + 13,469,697,365 instructions # 2.85 insn per cycle + 1.562031550 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.013874e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.090091e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.090091e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.995370e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.071613e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.071613e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.834790 sec - 2,597,461,679 cycles # 3.098 GHz - 7,388,399,527 instructions # 2.84 insn per cycle - 0.839225828 seconds time elapsed +TOTAL : 0.842124 sec + 2,597,009,030 cycles # 3.072 GHz + 7,388,574,977 instructions # 2.85 insn per cycle + 0.846124608 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.421565e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.647240e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.647240e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.399585e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.625638e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.625638e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.500518 sec - 1,469,864,578 cycles # 2.915 GHz - 3,057,713,704 instructions # 2.08 insn per cycle - 0.504803616 seconds time elapsed +TOTAL : 0.503848 sec + 1,468,232,729 cycles # 2.894 GHz + 3,057,878,707 instructions # 2.08 insn per cycle + 0.508008971 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.862725e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.148513e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.148513e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.830445e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.114174e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.114174e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.445813 sec - 1,308,991,772 cycles # 2.913 GHz - 2,932,828,584 instructions # 2.24 insn per cycle - 0.449922923 seconds time elapsed +TOTAL : 0.448877 sec + 1,307,617,793 cycles # 2.891 GHz + 2,932,981,796 instructions # 2.24 insn per cycle + 0.452905180 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.488490e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.605792e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.605792e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.478580e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.593098e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.593098e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.682096 sec - 1,364,315,010 cycles # 1.990 GHz - 1,971,638,562 instructions # 1.45 insn per cycle - 0.686218540 seconds time elapsed +TOTAL : 0.684441 sec + 1,364,490,195 cycles # 1.983 GHz + 1,971,660,665 instructions # 1.44 insn per cycle + 0.688521345 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index b0f37524f2..2e003d77ff 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -36,7 +36,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_11:13:17 +DATE: 2023-10-29_12:17:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -44,14 +44,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.389893e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.052684e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.466443e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.880455e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.241562e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.559273e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.444473 sec - 2,007,835,279 cycles # 3.017 GHz - 2,790,075,561 instructions # 1.39 insn per cycle - 0.723819294 seconds time elapsed +TOTAL : 0.442811 sec + 1,959,336,232 cycles # 2.999 GHz + 2,780,948,557 instructions # 1.42 insn per cycle + 0.712811728 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -60,14 +60,14 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.213263e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.924808e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.325840e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.598164e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.048675e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.388139e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.526659 sec - 2,290,049,296 cycles # 3.001 GHz - 3,242,676,439 instructions # 1.42 insn per cycle - 0.821642314 seconds time elapsed +TOTAL : 0.515644 sec + 2,228,032,646 cycles # 2.974 GHz + 3,171,372,319 instructions # 1.42 insn per cycle + 0.806193652 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,14 +82,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.095275e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.118185e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.118185e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.093076e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.116510e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.116510e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.517345 sec - 4,725,285,176 cycles # 3.107 GHz - 13,455,381,107 instructions # 2.85 insn per cycle - 1.521506668 seconds time elapsed +TOTAL : 1.520318 sec + 4,726,169,759 cycles # 3.102 GHz + 13,455,687,409 instructions # 2.85 insn per cycle + 1.524513913 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe @@ -108,14 +108,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.003251e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.080875e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.080875e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.007168e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.082924e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.082924e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.839136 sec - 2,603,801,918 cycles # 3.090 GHz - 7,392,674,875 instructions # 2.84 insn per cycle - 0.843316365 seconds time elapsed +TOTAL : 0.836696 sec + 2,600,247,624 cycles # 3.095 GHz + 7,392,292,953 instructions # 2.84 insn per cycle + 0.840827824 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe @@ -134,14 +134,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.396132e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.618964e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.618964e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.414544e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.640892e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.640892e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.503966 sec - 1,469,332,997 cycles # 2.895 GHz - 3,057,816,888 instructions # 2.08 insn per cycle - 0.508181558 seconds time elapsed +TOTAL : 0.501049 sec + 1,466,869,763 cycles # 2.907 GHz + 3,057,603,398 instructions # 2.08 insn per cycle + 0.505187533 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.883747e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.172497e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.172497e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.876010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167875e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.167875e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.442976 sec - 1,307,708,508 cycles # 2.929 GHz - 2,933,584,495 instructions # 2.24 insn per cycle - 0.447189273 seconds time elapsed +TOTAL : 0.443787 sec + 1,306,217,967 cycles # 2.920 GHz + 2,933,470,615 instructions # 2.25 insn per cycle + 0.447942578 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe @@ -186,14 +186,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.475241e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.592855e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.592855e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.482286e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.598708e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.598708e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.685645 sec - 1,364,917,576 cycles # 1.981 GHz - 1,971,508,007 instructions # 1.44 insn per cycle - 0.689804896 seconds time elapsed +TOTAL : 0.683170 sec + 1,366,561,219 cycles # 1.990 GHz + 1,971,363,745 instructions # 1.44 insn per cycle + 0.687291870 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe From 6ddb1d3b5592b73843820eb4b11b05a1d8a53623 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 13:43:37 +0100 Subject: [PATCH 072/119] [oct23av] rerun 18 tmad tests - still failures in ggttggg (madevent crashes), but gqttq are now ok STARTED AT Sun Oct 29 12:51:07 PM CET 2023 ENDED AT Sun Oct 29 01:09:24 PM CET 2023 Status=0 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt --- .../log_eemumu_mad_d_inl0_hrd0.txt | 136 ++++++++--------- .../log_eemumu_mad_f_inl0_hrd0.txt | 136 ++++++++--------- .../log_eemumu_mad_m_inl0_hrd0.txt | 128 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0.txt | 134 ++++++++--------- .../log_ggtt_mad_f_inl0_hrd0.txt | 132 ++++++++--------- .../log_ggtt_mad_m_inl0_hrd0.txt | 134 ++++++++--------- .../log_ggttg_mad_d_inl0_hrd0.txt | 138 +++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 138 +++++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 134 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 138 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 138 +++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 10 +- .../log_ggttggg_mad_f_inl0_hrd0.txt | 10 +- .../log_ggttggg_mad_m_inl0_hrd0.txt | 8 +- .../log_gqttq_mad_d_inl0_hrd0.txt | 132 ++++++++--------- .../log_gqttq_mad_f_inl0_hrd0.txt | 136 ++++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 134 ++++++++--------- 18 files changed, 1026 insertions(+), 1026 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index a2fe9cfc4c..020b0a566d 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:14:47 +DATE: 2023-10-29_12:52:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6257s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6178s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6502s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6416s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.50E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1779s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1701s - [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1798s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1715s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.79E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4348s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3498s - [COUNTERS] Fortran MEs ( 1 ) : 0.0850s for 90112 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4164s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3314s + [COUNTERS] Fortran MEs ( 1 ) : 0.0851s for 90112 events => throughput is 1.06E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1799s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1807s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4102s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3401s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0701s for 90112 events => throughput is 1.29E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4130s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3422s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0708s for 90112 events => throughput is 1.27E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.260586e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.222593e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.273890e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.262620e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1831s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.11E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1851s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1812s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.10E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3805s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3369s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0437s for 90112 events => throughput is 2.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3820s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3382s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0438s for 90112 events => throughput is 2.06E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.015609e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.974908e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.071474e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.040258e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,8 +286,8 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1786s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1757s + [COUNTERS] PROGRAM TOTAL : 0.1803s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1774s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.80E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3706s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3382s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 90112 events => throughput is 2.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3741s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3406s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0335s for 90112 events => throughput is 2.69E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.649644e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.583060e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.777010e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.776569e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1796s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1767s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.82E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.93E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3673s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3358s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0314s for 90112 events => throughput is 2.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3697s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3381s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0316s for 90112 events => throughput is 2.85E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.754124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.778354e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.855874e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.956858e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -439,8 +439,8 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) [COUNTERS] PROGRAM TOTAL : 0.1823s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.30E+06 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1789s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.37E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3770s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3386s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0384s for 90112 events => throughput is 2.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3420s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0391s for 90112 events => throughput is 2.30E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.172472e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.130515e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.276219e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.239701e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5911s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5906s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.61E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6118s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6113s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.58E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7698s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7645s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 90112 events => throughput is 1.71E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7762s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.86E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.976614e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.405288e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.910314e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.726558e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.855305e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.047848e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.402134e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.444946e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.842796e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.026204e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.993414e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.031508e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.822100e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.037515e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.125212e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129735e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 067e1ee497..128d62e050 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 @@ -16,11 +16,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -make[1]: Nothing to be done for 'all'. +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -28,12 +27,13 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2023-10-28_13:15:03 +DATE: 2023-10-29_12:52:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6223s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6145s - [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6346s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6267s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1701s - [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 9.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1785s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1705s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4366s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3463s - [COUNTERS] Fortran MEs ( 1 ) : 0.0903s for 90112 events => throughput is 9.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4135s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3286s + [COUNTERS] Fortran MEs ( 1 ) : 0.0848s for 90112 events => throughput is 1.06E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166087172673] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1925s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1863s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 8192 events => throughput is 1.32E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1919s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1852s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501907796603360E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4207s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3527s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0680s for 90112 events => throughput is 1.32E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4095s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0683s for 90112 events => throughput is 1.32E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.229725e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.267380e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.180987e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.237299e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165570339780] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1937s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1911s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1809s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.23E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905322826635E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3983s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3681s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0302s for 90112 events => throughput is 2.99E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3755s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3471s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0284s for 90112 events => throughput is 3.17E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.070468e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.220565e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.258953e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.438590e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1991s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1966s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.33E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1847s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1825s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.72E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4015s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3733s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0282s for 90112 events => throughput is 3.19E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3676s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3431s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 90112 events => throughput is 3.67E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.518365e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.661910e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.665283e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.749870e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1832s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1810s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1824s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1802s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.68E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3863s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3618s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 90112 events => throughput is 3.68E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3660s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0243s for 90112 events => throughput is 3.70E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.606645e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497495e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.727018e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.700973e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166440400542] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1837s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.68E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1833s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.69E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501908978565555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3669s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3423s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 90112 events => throughput is 3.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3764s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3511s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0252s for 90112 events => throughput is 3.57E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.553347e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.571172e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.573933e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.542081e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166823487174] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5931s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5926s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.71E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5918s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5914s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.74E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501910542849674E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7539s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7493s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.96E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7548s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7502s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.97E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.318352e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.729601e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.853560e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.851176e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.080898e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.581866e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.054742e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.061926e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.090578e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.564496e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.167250e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.226059e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.381546e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.843740e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.438630e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.462335e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 7821d8bf4c..9ceddba9f6 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,10 +15,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:15:20 +DATE: 2023-10-29_12:52:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6407s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6327s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6425s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6347s + [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1816s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1735s - [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1774s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1696s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4234s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3353s - [COUNTERS] Fortran MEs ( 1 ) : 0.0881s for 90112 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4143s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3287s + [COUNTERS] Fortran MEs ( 1 ) : 0.0856s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211734] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1919s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1850s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1870s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.27E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4141s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3418s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0722s for 90112 events => throughput is 1.25E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4157s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3429s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0729s for 90112 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.218225e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.193150e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.251141e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.208603e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1823s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.13E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1824s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 8192 events => throughput is 2.19E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3796s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3380s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0417s for 90112 events => throughput is 2.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3792s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3374s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0419s for 90112 events => throughput is 2.15E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.041832e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.005845e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.190968e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.200623e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,8 +286,8 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1799s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1769s + [COUNTERS] PROGRAM TOTAL : 0.1816s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3699s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3368s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0331s for 90112 events => throughput is 2.73E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3820s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3472s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0348s for 90112 events => throughput is 2.59E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.665940e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.635841e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.683509e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.796266e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,8 +362,8 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1806s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1778s + [COUNTERS] PROGRAM TOTAL : 0.1816s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1788s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.92E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,8 +395,8 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3689s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3373s + [COUNTERS] PROGRAM TOTAL : 0.3728s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0316s for 90112 events => throughput is 2.85E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.662410e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.771010e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.918061e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.944150e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1833s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1800s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1840s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1808s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.50E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3783s + [COUNTERS] PROGRAM TOTAL : 0.3786s [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0371s for 90112 events => throughput is 2.43E+06 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0374s for 90112 events => throughput is 2.41E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.310202e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.183955e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.414668e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.303699e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169066587257] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5938s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5933s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5942s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5937s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.62E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919911173610E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7588s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7539s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.84E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7632s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7583s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.86E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.905469e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.344629e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.913840e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.860043e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.847313e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.047726e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.387824e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.150185e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.834525e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.038230e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.012912e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.951805e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.865488e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.025153e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.125515e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.125382e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 7f10f908fb..35a368fe62 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:15:37 +DATE: 2023-10-29_12:52:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.7268s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6856s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3110s + [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,8 +84,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3106s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2704s + [COUNTERS] PROGRAM TOTAL : 0.3066s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2664s [COUNTERS] Fortran MEs ( 1 ) : 0.0402s for 8192 events => throughput is 2.04E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6386s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1973s - [COUNTERS] Fortran MEs ( 1 ) : 0.4413s for 90112 events => throughput is 2.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6432s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2007s + [COUNTERS] Fortran MEs ( 1 ) : 0.4425s for 90112 events => throughput is 2.04E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3491s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3126s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0365s for 8192 events => throughput is 2.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3442s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3072s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0370s for 8192 events => throughput is 2.21E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775372] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6518s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2491s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4027s for 90112 events => throughput is 2.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6622s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2572s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4050s for 90112 events => throughput is 2.23E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.219817e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.193809e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.195772e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.165136e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3108s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2894s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3130s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2919s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.87E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4861s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2485s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2376s for 90112 events => throughput is 3.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4934s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2549s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2385s for 90112 events => throughput is 3.78E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.770420e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.758372e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.714739e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.715730e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2956s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2824s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2942s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2814s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0129s for 8192 events => throughput is 6.36E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3707s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2291s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1417s for 90112 events => throughput is 6.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3976s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2530s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1447s for 90112 events => throughput is 6.23E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.097285e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.727935e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.994538e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.905680e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,8 +362,8 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2921s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2806s + [COUNTERS] PROGRAM TOTAL : 0.3000s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2885s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0114s for 8192 events => throughput is 7.16E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3574s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2299s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1275s for 90112 events => throughput is 7.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3615s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2343s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1272s for 90112 events => throughput is 7.08E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.938371e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.782091e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.877095e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.814113e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3055s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0190s for 8192 events => throughput is 4.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3124s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2924s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 8192 events => throughput is 4.09E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4449s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2340s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2108s for 90112 events => throughput is 4.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4534s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2443s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2091s for 90112 events => throughput is 4.31E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.163799e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.020863e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.045277e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.055554e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6900s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6895s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6923s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.40E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6486s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6423s + [COUNTERS] PROGRAM TOTAL : 1.6522s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6459s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.002621e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.175587e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.703583e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.675882e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.103340e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.181638e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.079091e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.081491e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.065422e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.177436e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155854e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.159222e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.093445e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.205307e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.071429e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.016071e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index f0444104b4..8b3a04f1ca 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:16:03 +DATE: 2023-10-29_12:53:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3544s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3138s - [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3483s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3084s + [COUNTERS] Fortran MEs ( 1 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2675s - [COUNTERS] Fortran MEs ( 1 ) : 0.0401s for 8192 events => throughput is 2.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3063s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2659s + [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6371s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1961s - [COUNTERS] Fortran MEs ( 1 ) : 0.4410s for 90112 events => throughput is 2.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7229s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2550s + [COUNTERS] Fortran MEs ( 1 ) : 0.4679s for 90112 events => throughput is 1.93E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690706767555099] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3351s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3011s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0340s for 8192 events => throughput is 2.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3388s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3045s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0343s for 8192 events => throughput is 2.39E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782605295497] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6294s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2500s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3794s for 90112 events => throughput is 2.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6352s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2550s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3802s for 90112 events => throughput is 2.37E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.407105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.379417e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.379008e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.325925e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690702885183541] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3099s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2953s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0146s for 8192 events => throughput is 5.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2991s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2847s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.68E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223778858016772] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3905s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2312s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1593s for 90112 events => throughput is 5.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3930s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2310s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1619s for 90112 events => throughput is 5.56E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.339316e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.368260e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.342807e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.339718e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2779s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 8192 events => throughput is 1.07E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2794s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,8 +319,8 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3028s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2183s + [COUNTERS] PROGRAM TOTAL : 1.3106s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2261s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0845s for 90112 events => throughput is 1.07E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.031930e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.013919e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.050564e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.049085e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,8 +362,8 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2823s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2751s + [COUNTERS] PROGRAM TOTAL : 0.2832s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2760s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3275s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2486s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0789s for 90112 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3217s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2419s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0798s for 90112 events => throughput is 1.13E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082923e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.088263e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108645e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111517e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698914467276] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2897s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2795s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0102s for 8192 events => throughput is 8.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2912s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2812s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 8192 events => throughput is 8.23E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223780273983500] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3363s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2264s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1099s for 90112 events => throughput is 8.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3418s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2327s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1091s for 90112 events => throughput is 8.26E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.727236e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.726753e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.861479e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.654832e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690703397697980] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6913s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6907s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6926s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.53E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223786763175951] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6475s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.68E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6479s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.67E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.293690e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.284196e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.917959e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.946223e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.783289e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.169312e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.775118e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.754775e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.883043e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.163785e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.851465e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.871433e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.383799e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.701749e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.407914e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.427894e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 6bc95cf474..cf7aebd679 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:16:27 +DATE: 2023-10-29_12:53:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3474s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3070s - [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3507s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3105s + [COUNTERS] Fortran MEs ( 1 ) : 0.0402s for 8192 events => throughput is 2.04E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3042s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2642s - [COUNTERS] Fortran MEs ( 1 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3057s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2652s + [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6707s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2288s - [COUNTERS] Fortran MEs ( 1 ) : 0.4419s for 90112 events => throughput is 2.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6569s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2105s + [COUNTERS] Fortran MEs ( 1 ) : 0.4464s for 90112 events => throughput is 2.02E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,8 +134,8 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3423s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3051s + [COUNTERS] PROGRAM TOTAL : 0.3451s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3078s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0373s for 8192 events => throughput is 2.20E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7218s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2976s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4243s for 90112 events => throughput is 2.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6672s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2591s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4081s for 90112 events => throughput is 2.21E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.220322e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.207949e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.204411e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.193947e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,8 +210,8 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3083s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2878s + [COUNTERS] PROGRAM TOTAL : 0.3124s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2918s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4647s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2357s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2290s for 90112 events => throughput is 3.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4769s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2295s for 90112 events => throughput is 3.93E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.910371e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.715614e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.841808e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.728132e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2956s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2830s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0126s for 8192 events => throughput is 6.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3179s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3037s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.79E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3644s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2244s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1400s for 90112 events => throughput is 6.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4180s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2697s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1483s for 90112 events => throughput is 6.08E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.194929e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.078585e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.158421e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.196670e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2944s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2831s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2814s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0115s for 8192 events => throughput is 7.12E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3537s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2281s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1255s for 90112 events => throughput is 7.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3613s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2356s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1258s for 90112 events => throughput is 7.16E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.949915e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.899838e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.076342e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.905439e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3106s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2922s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0184s for 8192 events => throughput is 4.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3073s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2886s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 8192 events => throughput is 4.39E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4474s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2441s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2033s for 90112 events => throughput is 4.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4525s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2498s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2026s for 90112 events => throughput is 4.45E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.104872e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.219505e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.062333e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.133533e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6907s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6902s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7103s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7097s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782303744791] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6486s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6423s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6622s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6558s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.42E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.061036e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.164053e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.605008e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.591008e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.107641e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.186252e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.064944e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056436e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.102601e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.190060e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.137506e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.131852e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.083399e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.183442e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.034037e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.000787e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 3bbbee8e28..b1bb43404f 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:16:52 +DATE: 2023-10-29_12:54:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5361s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2241s - [COUNTERS] Fortran MEs ( 1 ) : 0.3120s for 8192 events => throughput is 2.63E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5360s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2216s + [COUNTERS] Fortran MEs ( 1 ) : 0.3144s for 8192 events => throughput is 2.61E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5301s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2172s - [COUNTERS] Fortran MEs ( 1 ) : 0.3129s for 8192 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5413s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2270s + [COUNTERS] Fortran MEs ( 1 ) : 0.3143s for 8192 events => throughput is 2.61E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8733s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3971s - [COUNTERS] Fortran MEs ( 1 ) : 3.4762s for 90112 events => throughput is 2.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8556s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3842s + [COUNTERS] Fortran MEs ( 1 ) : 3.4714s for 90112 events => throughput is 2.60E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470791E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3217s for 8192 events => throughput is 2.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5335s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3229s for 8192 events => throughput is 2.54E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2396s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6791s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5604s for 90112 events => throughput is 2.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2388s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6779s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5609s for 90112 events => throughput is 2.53E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.628768e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.602791e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.616955e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.594960e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470777E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5630s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3859s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1771s for 8192 events => throughput is 4.63E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5533s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3858s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1675s for 8192 events => throughput is 4.89E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.4252s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5492s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8760s for 90112 events => throughput is 4.80E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.3897s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5304s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8593s for 90112 events => throughput is 4.85E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.965550e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.966788e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.022472e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.966194e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3805s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2972s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0833s for 8192 events => throughput is 9.83E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3860s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3017s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0843s for 8192 events => throughput is 9.72E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3448s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4317s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9132s for 90112 events => throughput is 9.87E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3746s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4553s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9193s for 90112 events => throughput is 9.80E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.686669e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.932213e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.468054e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.535049e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3664s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0744s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3682s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2931s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2427s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4261s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8166s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2682s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8256s for 90112 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.136623e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.119921e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.141647e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108923e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4202s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3185s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1017s for 8192 events => throughput is 8.05E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4273s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3234s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1039s for 8192 events => throughput is 7.89E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.5849s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4598s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1252s for 90112 events => throughput is 8.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7039s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5049s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1990s for 90112 events => throughput is 7.52E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.875766e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.919203e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.969717e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.829536e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6499s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6445s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6537s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6482s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8029s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7803s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8360s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8133s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.96E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.614447e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.632436e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.355330e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.870914e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.811765e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.879790e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.239741e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.237987e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.840747e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.865865e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.251858e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.250210e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.882435e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.878270e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.741625e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.743237e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 1ddd406cf2..2a60fb5534 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:17:33 +DATE: 2023-10-29_12:54:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5330s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2187s - [COUNTERS] Fortran MEs ( 1 ) : 0.3143s for 8192 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2310s + [COUNTERS] Fortran MEs ( 1 ) : 0.3361s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5316s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s - [COUNTERS] Fortran MEs ( 1 ) : 0.3133s for 8192 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5677s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2310s + [COUNTERS] Fortran MEs ( 1 ) : 0.3367s for 8192 events => throughput is 2.43E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8235s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3733s - [COUNTERS] Fortran MEs ( 1 ) : 3.4502s for 90112 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9155s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4034s + [COUNTERS] Fortran MEs ( 1 ) : 3.5121s for 90112 events => throughput is 2.57E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349765248158E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8327s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5203s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3124s for 8192 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8523s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5274s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3248s for 8192 events => throughput is 2.52E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310860767768514E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2294s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7666s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4628s for 90112 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1874s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6887s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4987s for 90112 events => throughput is 2.58E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.701385e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.490289e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.714632e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.615209e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196334183509370E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4009s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0928s for 8192 events => throughput is 8.83E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4077s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3125s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0953s for 8192 events => throughput is 8.60E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847547651041E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.4684s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4441s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0243s for 90112 events => throughput is 8.80E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5054s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4660s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0394s for 90112 events => throughput is 8.67E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.982844e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.827172e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.952179e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.766055e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3013s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0429s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3093s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2656s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8658s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3951s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4706s for 90112 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8870s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4741s for 90112 events => throughput is 1.90E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.899231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.933741e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.918892e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.893629e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2928s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2543s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0385s for 8192 events => throughput is 2.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2962s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2568s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0393s for 8192 events => throughput is 2.08E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8142s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3916s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4226s for 90112 events => throughput is 2.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8774s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4386s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4388s for 90112 events => throughput is 2.05E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.152263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027794e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.147217e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.133357e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196344079460428E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3155s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2656s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0499s for 8192 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2706s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0504s for 8192 events => throughput is 1.62E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310857804286998E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9509s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4027s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5481s for 90112 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9823s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4267s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5557s for 90112 events => throughput is 1.62E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.632673e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.584134e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.596476e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.598776e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349366365994E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6420s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6411s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6480s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6472s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.68E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310864949473968E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8252s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8155s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0097s for 90112 events => throughput is 9.25E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8069s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7974s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.51E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.300629e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.335468e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.854879e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.856620e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.663339e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.750769e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.466672e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.282448e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.646830e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.752164e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.524923e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.539062e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.510623e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.591877e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.613146e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.619902e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 721eefb7dd..949ed85b68 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:18:09 +DATE: 2023-10-29_12:55:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5337s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2185s - [COUNTERS] Fortran MEs ( 1 ) : 0.3152s for 8192 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5366s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2209s + [COUNTERS] Fortran MEs ( 1 ) : 0.3157s for 8192 events => throughput is 2.59E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5318s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2188s - [COUNTERS] Fortran MEs ( 1 ) : 0.3130s for 8192 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5345s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2194s + [COUNTERS] Fortran MEs ( 1 ) : 0.3151s for 8192 events => throughput is 2.60E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8408s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3808s - [COUNTERS] Fortran MEs ( 1 ) : 3.4599s for 90112 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8683s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3866s + [COUNTERS] Fortran MEs ( 1 ) : 3.4817s for 90112 events => throughput is 2.59E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358763382007E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8640s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5392s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3248s for 8192 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8697s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3267s for 8192 events => throughput is 2.51E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872835011053E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2834s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6997s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5836s for 90112 events => throughput is 2.51E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3049s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6900s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6148s for 90112 events => throughput is 2.49E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.576974e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.572523e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.583988e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.540550e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358804670396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5380s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3748s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1632s for 8192 events => throughput is 5.02E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5528s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3888s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1639s for 8192 events => throughput is 5.00E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872836789727E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.3341s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5132s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8209s for 90112 events => throughput is 4.95E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.3320s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5232s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8088s for 90112 events => throughput is 4.98E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.045666e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.062468e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.075727e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.052914e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3820s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2999s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0821s for 8192 events => throughput is 9.98E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3045s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0836s for 8192 events => throughput is 9.80E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3416s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4347s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9069s for 90112 events => throughput is 9.94E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3724s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4554s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9170s for 90112 events => throughput is 9.83E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.012431e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001820e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.019636e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.771997e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3604s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2881s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0723s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3874s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3089s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0785s for 8192 events => throughput is 1.04E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2152s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4183s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7969s for 90112 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2882s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4675s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8207s for 90112 events => throughput is 1.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155859e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.118705e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.142606e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.118791e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358757578441E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4284s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3240s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1044s for 8192 events => throughput is 7.85E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3538s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1199s for 8192 events => throughput is 6.83E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872803699391E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6056s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4514s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1542s for 90112 events => throughput is 7.81E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6718s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4976s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1742s for 90112 events => throughput is 7.67E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.743118e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.724100e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.687089e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.724361e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358102981245E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6479s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6424s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6480s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872068634174E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8072s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7844s + [COUNTERS] PROGRAM TOTAL : 1.8132s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7904s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.622531e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.620312e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.055833e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.903689e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.842336e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.827852e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.236411e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234645e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.824593e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.822352e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.245346e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.243970e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.823254e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.862310e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.716220e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.715609e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index aac392d268..79de5a8a41 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:18:50 +DATE: 2023-10-29_12:56:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3901s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2742s - [COUNTERS] Fortran MEs ( 1 ) : 4.1159s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3648s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2743s + [COUNTERS] Fortran MEs ( 1 ) : 4.0905s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3341s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2715s - [COUNTERS] Fortran MEs ( 1 ) : 4.0627s for 8192 events => throughput is 2.02E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3631s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2705s + [COUNTERS] Fortran MEs ( 1 ) : 4.0926s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 46.8744s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8583s - [COUNTERS] Fortran MEs ( 1 ) : 45.0161s for 90112 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.1667s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8795s + [COUNTERS] Fortran MEs ( 1 ) : 45.2872s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.5573s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3683s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1891s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6271s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3999s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2272s for 8192 events => throughput is 1.94E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 52.8537s - [COUNTERS] Fortran Overhead ( 0 ) : 5.9783s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.8754s for 90112 events => throughput is 1.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.7641s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0183s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.7458s for 90112 events => throughput is 1.93E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.010493e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.987957e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.010686e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.991838e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.7647s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4538s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3108s for 8192 events => throughput is 3.55E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7379s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4755s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2623s for 8192 events => throughput is 3.62E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 28.7512s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1062s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.6450s for 90112 events => throughput is 3.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.9819s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0727s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.9092s for 90112 events => throughput is 3.62E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.797450e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.783292e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.797670e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.725405e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.1814s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2134s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9680s for 8192 events => throughput is 8.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.3014s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3232s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9782s for 8192 events => throughput is 8.37E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.5022s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8140s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.6882s for 90112 events => throughput is 8.43E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.6470s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8357s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.8113s for 90112 events => throughput is 8.33E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.687840e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.598183e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.709461e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.637409e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9534s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1017s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8517s for 8192 events => throughput is 9.62E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9665s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1096s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8569s for 8192 events => throughput is 9.56E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.2472s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7179s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5293s for 90112 events => throughput is 9.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.1418s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7198s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4220s for 90112 events => throughput is 9.56E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.935854e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.845982e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.898733e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.851814e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.3911s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3291s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0620s for 8192 events => throughput is 7.71E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4078s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3366s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0711s for 8192 events => throughput is 7.65E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.6332s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9213s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.7119s for 90112 events => throughput is 7.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.7364s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9384s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.7980s for 90112 events => throughput is 7.64E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.814694e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.624404e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.756632e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.706462e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8049s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7728s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0320s for 8192 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8040s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7719s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7088s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3616s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3472s for 90112 events => throughput is 2.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7244s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3743s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3501s for 90112 events => throughput is 2.57E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.294446e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.297814e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.526802e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.527310e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.115140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.102163e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.151029e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.155936e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.105278e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.100021e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.148035e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.139640e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.107513e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.109701e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.437634e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.439044e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index eda915608b..5857620d56 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2023-10-28_13:23:01 +DATE: 2023-10-29_13:00:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.4392s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2797s - [COUNTERS] Fortran MEs ( 1 ) : 4.1594s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3795s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2746s + [COUNTERS] Fortran MEs ( 1 ) : 4.1049s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3436s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2686s - [COUNTERS] Fortran MEs ( 1 ) : 4.0750s for 8192 events => throughput is 2.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3663s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2696s + [COUNTERS] Fortran MEs ( 1 ) : 4.0968s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 46.6315s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8499s - [COUNTERS] Fortran MEs ( 1 ) : 44.7816s for 90112 events => throughput is 2.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.2548s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8877s + [COUNTERS] Fortran MEs ( 1 ) : 45.3671s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396490802749E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.4026s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2286s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1741s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.3133s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2211s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.0923s for 8192 events => throughput is 2.00E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774602344628E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 50.5071s - [COUNTERS] Fortran Overhead ( 0 ) : 5.7965s - [COUNTERS] CudaCpp MEs ( 2 ) : 44.7106s for 90112 events => throughput is 2.02E+03 events/s + [COUNTERS] PROGRAM TOTAL : 51.0625s + [COUNTERS] Fortran Overhead ( 0 ) : 5.8484s + [COUNTERS] CudaCpp MEs ( 2 ) : 45.2141s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.074741e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.077086e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.057737e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.073397e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277389126121586E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.5511s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4494s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1017s for 8192 events => throughput is 7.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4836s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3656s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1180s for 8192 events => throughput is 7.33E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803771887543366E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 15.1225s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9526s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.1699s for 90112 events => throughput is 7.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.3066s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9801s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.3265s for 90112 events => throughput is 7.31E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.579541e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.486766e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.559073e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.500170e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.2361s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7465s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4896s for 8192 events => throughput is 1.67E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2621s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7667s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4954s for 8192 events => throughput is 1.65E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.7262s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3331s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.3931s for 90112 events => throughput is 1.67E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.7860s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3557s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4304s for 90112 events => throughput is 1.66E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.717269e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.672778e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.720207e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.671538e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1137s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6861s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4276s for 8192 events => throughput is 1.92E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1860s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7111s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4749s for 8192 events => throughput is 1.73E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 6.9951s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2754s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.7197s for 90112 events => throughput is 1.91E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.1015s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3009s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.8006s for 90112 events => throughput is 1.88E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.959355e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953167e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.901163e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.950659e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396394633404E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.3121s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7881s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5240s for 8192 events => throughput is 1.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3221s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7946s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5275s for 8192 events => throughput is 1.55E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803777741065333E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 8.1307s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3840s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7467s for 90112 events => throughput is 1.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.2283s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4081s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8202s for 90112 events => throughput is 1.55E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.586523e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.569783e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.580672e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.543872e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277400478491260E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7681s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7468s + [COUNTERS] PROGRAM TOTAL : 0.7651s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7439s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.85E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803779990154892E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.5688s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3337s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2351s for 90112 events => throughput is 3.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5781s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3444s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2337s for 90112 events => throughput is 3.86E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.589994e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.591648e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.944592e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.955455e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.486310e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.490718e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.668788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.636724e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.486395e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.500184e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.665691e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.635071e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.463738e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.481845e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.523197e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.521601e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index fe5b743267..7d59783520 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:26:17 +DATE: 2023-10-29_13:03:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3386s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2704s - [COUNTERS] Fortran MEs ( 1 ) : 4.0682s for 8192 events => throughput is 2.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3900s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2790s + [COUNTERS] Fortran MEs ( 1 ) : 4.1110s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3271s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2676s - [COUNTERS] Fortran MEs ( 1 ) : 4.0595s for 8192 events => throughput is 2.02E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3767s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2695s + [COUNTERS] Fortran MEs ( 1 ) : 4.1072s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 46.7393s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8552s - [COUNTERS] Fortran MEs ( 1 ) : 44.8841s for 90112 events => throughput is 2.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.1449s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8850s + [COUNTERS] Fortran MEs ( 1 ) : 45.2599s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277432965013E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.6228s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3950s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2278s for 8192 events => throughput is 1.94E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.9921s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4402s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.5519s for 8192 events => throughput is 1.80E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725813026109E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 52.8005s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0032s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.7973s for 90112 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 53.4502s + [COUNTERS] Fortran Overhead ( 0 ) : 6.2745s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.1757s for 90112 events => throughput is 1.91E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.984536e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.948358e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.986255e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.903929e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277430934464E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.6430s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4316s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2114s for 8192 events => throughput is 3.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6624s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4423s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2201s for 8192 events => throughput is 3.69E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725816246317E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 28.3439s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0168s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.3271s for 90112 events => throughput is 3.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.7188s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0748s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.6439s for 90112 events => throughput is 3.66E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.788272e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.795677e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.779315e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.792989e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.1986s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2363s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9623s for 8192 events => throughput is 8.51E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1727s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2124s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9604s for 8192 events => throughput is 8.53E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.4918s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8244s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.6674s for 90112 events => throughput is 8.45E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.5075s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8197s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.6878s for 90112 events => throughput is 8.43E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.777106e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.555876e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.780881e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.758315e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9468s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0986s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8482s for 8192 events => throughput is 9.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9665s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1095s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8570s for 8192 events => throughput is 9.56E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.0437s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6937s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.3500s for 90112 events => throughput is 9.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.0957s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7176s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.3781s for 90112 events => throughput is 9.61E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.926013e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.926904e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.924664e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.926451e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.3947s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3271s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0675s for 8192 events => throughput is 7.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4163s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3391s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0773s for 8192 events => throughput is 7.60E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.6834s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9387s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.7447s for 90112 events => throughput is 7.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.9339s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9969s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9369s for 90112 events => throughput is 7.55E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.783001e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.702011e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.767596e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.700967e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277293084707E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7998s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7683s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8061s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7739s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725738731039E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7129s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3677s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3453s for 90112 events => throughput is 2.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7160s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3691s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3469s for 90112 events => throughput is 2.60E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.282969e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.292904e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.538257e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.534454e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.114802e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.105706e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.177837e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154616e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.115570e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.118142e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.155287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168234e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.108118e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.106654e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.436982e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.434959e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 909442f839..fcea12d341 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:30:47 +DATE: 2023-10-29_13:09:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 58ec19bcc2..875efe8ee1 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:30:50 +DATE: 2023-10-29_13:09:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index f93dee6d06..4682066081 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-28_13:30:53 +DATE: 2023-10-29_13:09:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 82d7b93a8b..0737ae3641 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_11:19:45 +DATE: 2023-10-29_13:07:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3071s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2370s - [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3009s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2314s + [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,8 +84,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2972s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2278s + [COUNTERS] PROGRAM TOTAL : 0.2966s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2273s [COUNTERS] Fortran MEs ( 1 ) : 0.0693s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1557s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3979s - [COUNTERS] Fortran MEs ( 1 ) : 0.7578s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1643s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4082s + [COUNTERS] Fortran MEs ( 1 ) : 0.7562s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3888s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3140s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0748s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3843s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3093s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3174s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4958s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8215s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3262s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5019s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8242s for 90112 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.109455e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.090735e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.112734e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108359e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,8 +210,8 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3139s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2735s + [COUNTERS] PROGRAM TOTAL : 0.3141s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2739s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0403s for 8192 events => throughput is 2.03E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615872] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9098s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4660s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4439s for 90112 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9226s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4751s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4475s for 90112 events => throughput is 2.01E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.026202e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.007209e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.015864e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.014295e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2549s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0235s for 8192 events => throughput is 3.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2874s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2625s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0249s for 8192 events => throughput is 3.29E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6962s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4433s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2529s for 90112 events => throughput is 3.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7154s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4582s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2572s for 90112 events => throughput is 3.50E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.519852e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.470014e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.514255e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.550614e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2755s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2546s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2943s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2715s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6760s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4457s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2302s for 90112 events => throughput is 3.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8061s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5544s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2518s for 90112 events => throughput is 3.58E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.921611e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.837885e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.966924e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.941569e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3085s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2743s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0342s for 8192 events => throughput is 2.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2648s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0305s for 8192 events => throughput is 2.68E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8203s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4753s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3450s for 90112 events => throughput is 2.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8085s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4687s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3398s for 90112 events => throughput is 2.65E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.334278e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.555875e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.347346e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.512531e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6584s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6578s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6599s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6592s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615869] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8641s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8565s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.18E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8566s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8491s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 90112 events => throughput is 1.20E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.470058e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.626429e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.083688e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.164908e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.104258e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.514643e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.522760e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.532707e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.096774e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.528292e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.800964e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.808574e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.099625e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.524629e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.781256e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.780489e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index be890c5e3f..d29ea05be4 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,12 +15,12 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_11:22:06 +DATE: 2023-10-29_13:08:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.2990s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2303s - [COUNTERS] Fortran MEs ( 1 ) : 0.0687s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2366s + [COUNTERS] Fortran MEs ( 1 ) : 0.0699s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2971s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2281s - [COUNTERS] Fortran MEs ( 1 ) : 0.0691s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2993s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2295s + [COUNTERS] Fortran MEs ( 1 ) : 0.0698s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1544s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3991s - [COUNTERS] Fortran MEs ( 1 ) : 0.7553s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1612s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4019s + [COUNTERS] Fortran MEs ( 1 ) : 0.7593s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050316058770007] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3726s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3023s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0704s for 8192 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3756s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3032s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0724s for 8192 events => throughput is 1.13E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182797520666] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2661s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4911s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7750s for 90112 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2740s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4981s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7758s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.175314e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.177467e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.172660e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.179284e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313133963987] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2810s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2562s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2582s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 8192 events => throughput is 3.27E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179276862181] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4508s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2756s for 90112 events => throughput is 3.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7245s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4500s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2745s for 90112 events => throughput is 3.28E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.185187e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.180913e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.174762e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.242069e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2571s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2596s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2471s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.54E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5675s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4330s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1345s for 90112 events => throughput is 6.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5740s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4386s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1354s for 90112 events => throughput is 6.65E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.522440e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.359345e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.430322e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.441020e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2547s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2431s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2579s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2465s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.24E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5565s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1251s for 90112 events => throughput is 7.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5633s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1258s for 90112 events => throughput is 7.16E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.888139e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.843445e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.958387e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.005906e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050317064561834] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2628s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2475s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2653s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2499s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.33E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182143140752] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6690s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4890s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1801s for 90112 events => throughput is 5.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6127s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1702s for 90112 events => throughput is 5.30E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.448447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.123333e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.498943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.944707e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050319131407651] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6629s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6624s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6566s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6561s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801186038252196] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8530s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8470s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 90112 events => throughput is 1.51E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8574s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8516s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 90112 events => throughput is 1.55E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.626121e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.901744e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.433120e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.487017e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.189243e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.083051e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.716903e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.712159e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.171649e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.067564e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.785778e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.805074e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.864395e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.606078e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.004377e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.001322e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index c29c2a87c5..8819a1b530 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_11:24:26 +DATE: 2023-10-29_13:08:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.2999s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2315s - [COUNTERS] Fortran MEs ( 1 ) : 0.0684s for 8192 events => throughput is 1.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3025s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2324s + [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2970s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2284s - [COUNTERS] Fortran MEs ( 1 ) : 0.0687s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3004s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2308s + [COUNTERS] Fortran MEs ( 1 ) : 0.0696s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1561s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4003s - [COUNTERS] Fortran MEs ( 1 ) : 0.7559s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1848s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4220s + [COUNTERS] Fortran MEs ( 1 ) : 0.7628s for 90112 events => throughput is 1.18E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657206] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3817s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3068s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3828s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3082s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0746s for 8192 events => throughput is 1.10E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608796] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3168s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4948s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8220s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3233s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5007s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8226s for 90112 events => throughput is 1.10E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.093427e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.095408e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.109661e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.097494e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657201] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3099s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0387s for 8192 events => throughput is 2.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3103s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2713s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0389s for 8192 events => throughput is 2.10E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608810] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8886s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4607s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4279s for 90112 events => throughput is 2.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8953s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4667s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4285s for 90112 events => throughput is 2.10E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.037813e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.018304e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.067790e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.009040e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2773s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2543s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 8192 events => throughput is 3.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2791s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2562s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0229s for 8192 events => throughput is 3.58E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7024s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4488s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2536s for 90112 events => throughput is 3.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7088s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4549s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2539s for 90112 events => throughput is 3.55E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.543078e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.593623e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.581096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.486898e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2790s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2573s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0217s for 8192 events => throughput is 3.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2738s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0203s for 8192 events => throughput is 4.04E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6914s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4643s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2270s for 90112 events => throughput is 3.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6819s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4583s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2236s for 90112 events => throughput is 4.03E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.967089e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.970058e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.065644e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.025775e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2716s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0349s for 8192 events => throughput is 2.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2984s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2669s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8446s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4881s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3566s for 90112 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8465s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4832s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3632s for 90112 events => throughput is 2.48E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.525942e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.433476e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.487063e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.502868e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333301029693] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6573s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6566s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6561s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6555s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182637219935] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8595s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8518s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 90112 events => throughput is 1.18E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9667s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9586s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.11E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.461495e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.628198e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.077164e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.054061e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.105487e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.525972e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.495666e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.505112e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.099165e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.534479e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.801794e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.802635e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.099654e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.531969e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.781230e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.782148e+07 ) sec^-1 TEST COMPLETED From aea8b17ce795fbde933549d1e6b01af8168d5372 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 14:01:06 +0100 Subject: [PATCH 073/119] [oct23av] add to the repo heft_gg_h.sa/mg5.in which I had forgotten --- epochX/cudacpp/heft_gg_h.sa/mg5.in | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 epochX/cudacpp/heft_gg_h.sa/mg5.in diff --git a/epochX/cudacpp/heft_gg_h.sa/mg5.in b/epochX/cudacpp/heft_gg_h.sa/mg5.in new file mode 100644 index 0000000000..e6b7da69cc --- /dev/null +++ b/epochX/cudacpp/heft_gg_h.sa/mg5.in @@ -0,0 +1,6 @@ +set stdout_level DEBUG +set zerowidth_tchannel F +set auto_convert_model T +import model heft +generate g g > h +output standalone_cudacpp heft_gg_h.sa From ce9892dbc5ba643d8a65153bb8868e7b33fcfb8d Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 14:19:04 +0100 Subject: [PATCH 074/119] [oct23av] in CODEGEN fix cudacpp_src.mk for non SM processes (fix bug in my previous commit) --- .../madgraph/iolibs/template_files/gpu/cudacpp_src.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk index e3febf9a0c..25b6f8f7c8 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk @@ -257,9 +257,9 @@ $(BUILDDIR)/%%_cu.o : %%.cc *.h $(BUILDDIR)/.build.$(TAG) #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, Parameters_sm.o read_slha.o) +cxx_objects=$(addprefix $(BUILDDIR)/, Parameters_%(model)s.o read_slha.o) ifneq ($(NVCC),) -cu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_cu.o) +cu_objects=$(addprefix $(BUILDDIR)/, Parameters_%(model)s_cu.o) endif # Target (and build rules): common (src) library From 904d6886da625a98fc24b481cf6ccbe02c6be315 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 21:54:47 +0100 Subject: [PATCH 075/119] [oct23av/nobm] in CODEGEN check_sa.cc, enable FPEs in check_sa.cc to debug #733 (*only if CUDACPP_RUNTIME_ENABLEFPE is set and non empty*) This is a backport from nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu in branch nobm In nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu I now get the following: CUDA_HOME=none HRDCOD=1 make -j -f cudacpp.mk CUDACPP_RUNTIME_ENABLEFPE=on ./check.exe -p 1 8 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions (ompnumthreadsNotSetMeansOneThread) DEBUG: OMP_NUM_THREADS is not set: will use only 1 thread (ompnumthreadsNotSetMeansOneThread) omp_get_max_threads() = 1 INFO: The application is built for skylake-avx512 (AVX512VL) and the host supports it INFO: The application is built for skylake-avx512 (AVX512VL) and the host supports it Floating Point Exception (CPU) --- .../iolibs/template_files/gpu/check_sa.cc | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; From 354d511b0614d98a40ffd7035061b6833841f318 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 22:23:27 +0100 Subject: [PATCH 076/119] [oct23av] "regenerate" all processes with the new check_sa.cc that optionally enables FPEs Actually, copy the new file manually rather than regenerating for f in $(git ls-tree --name-only HEAD */SubProcesses/P*/check_sa.cc); do \cp CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc $f; done --- .../SubProcesses/P1_epem_mupmum/check_sa.cc | 31 +++++++++++++++++++ .../P1_Sigma_sm_epem_mupmum/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P1_gg_ttx/check_sa.cc | 31 +++++++++++++++++++ .../P1_Sigma_sm_gg_ttx/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P1_gg_ttx/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_gg_ttxg/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P1_gg_ttxg/check_sa.cc | 31 +++++++++++++++++++ .../P1_Sigma_sm_gg_ttxg/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P1_gg_ttxgg/check_sa.cc | 31 +++++++++++++++++++ .../P1_Sigma_sm_gg_ttxgg/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P1_gg_ttxggg/check_sa.cc | 31 +++++++++++++++++++ .../P1_Sigma_sm_gg_ttxggg/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P1_gu_ttxu/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P1_gux_ttxux/check_sa.cc | 31 +++++++++++++++++++ .../P1_Sigma_sm_gu_ttxu/check_sa.cc | 31 +++++++++++++++++++ .../P1_Sigma_sm_gux_ttxux/check_sa.cc | 31 +++++++++++++++++++ .../P1_Sigma_heft_gg_h/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P0_gg_ttx/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P0_uux_ttx/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P1_gg_ttxg/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P1_gu_ttxu/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P1_gux_ttxux/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P1_uux_ttxg/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_gg_ttxgg/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_gg_ttxuux/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_gu_ttxgu/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_gux_ttxgux/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_uc_ttxuc/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_ucx_ttxucx/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_uu_ttxuu/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_uux_ttxccx/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_uux_ttxgg/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_uux_ttxuux/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc | 31 +++++++++++++++++++ .../SubProcesses/P2_uxux_ttxuxux/check_sa.cc | 31 +++++++++++++++++++ 35 files changed, 1085 insertions(+) diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/check_sa.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/check_sa.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc index f5f08dc64e..d2af908f0d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc @@ -28,7 +28,9 @@ #include #include +#include // for feenableexcept #include +#include // for signal and SIGFPE #include #include #include @@ -73,6 +75,23 @@ usage( char* argv0, int ret = 1 ) return ret; } +#ifdef __CUDACC__ +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + inline void FPEhandler( int sig ) + { +#ifdef __CUDACC__ + std::cerr << "Floating Point Exception (GPU)" << std::endl; +#else + std::cerr << "Floating Point Exception (CPU)" << std::endl; +#endif + exit( 0 ); + } +} + int main( int argc, char** argv ) { @@ -83,6 +102,18 @@ main( int argc, char** argv ) using namespace mg5amcCpu; #endif + // Enable FPEs (test #701 and #733 - except on MacOS where feenableexcept is not defined #730) +#ifndef __APPLE__ + const char* enableFPEc = getenv( "CUDACPP_RUNTIME_ENABLEFPE" ); + const bool enableFPE = ( enableFPEc != 0 ) && ( std::string( enableFPEc ) != "" ); + if( enableFPE ) + { + std::cout << "WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions" << std::endl; + feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW ); // debug #701 + signal( SIGFPE, FPEhandler ); + } +#endif + // DEFAULTS FOR COMMAND LINE ARGUMENTS bool verbose = false; bool debug = false; From b6cdb0219d298ac655f3a3d00364b19bc464d186 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Sun, 29 Oct 2023 22:29:02 +0100 Subject: [PATCH 077/119] [oct23av] in tput tests, enable FPEs in check.exe by default (unless -nofpe is specified) --- epochX/cudacpp/tput/teeThroughputX.sh | 6 +++++- epochX/cudacpp/tput/throughputX.sh | 15 ++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/tput/teeThroughputX.sh b/epochX/cudacpp/tput/teeThroughputX.sh index 07a14e6b61..49dd587932 100755 --- a/epochX/cudacpp/tput/teeThroughputX.sh +++ b/epochX/cudacpp/tput/teeThroughputX.sh @@ -9,7 +9,7 @@ cd $scrdir function usage() { - echo "Usage: $0 [-sa] [-noalpaka] [-flt|-fltonly|-mix|-mixonly] [-inl|-inlonly] [-hrd|-hrdonly] [-common|-curhst] [-rmbhst|-bridge] [-makeonly] [-makeclean] [-makej] [-dlp ]" + echo "Usage: $0 [-sa] [-noalpaka] [-flt|-fltonly|-mix|-mixonly] [-inl|-inlonly] [-hrd|-hrdonly] [-common|-curhst] [-rmbhst|-bridge] [-makeonly] [-makeclean] [-makej] [-nofpe] [-dlp ]" exit 1 } @@ -30,6 +30,7 @@ rndgen= rmbsmp= steps="make test" makej= +nofpe= dlp= dlpset=0 @@ -110,6 +111,8 @@ for arg in $*; do fi elif [ "$arg" == "-makej" ]; then makej=-makej + elif [ "$arg" == "-nofpe" ]; then + nofpe=-nofpe else echo "ERROR! Invalid option '$arg'"; usage fi @@ -157,6 +160,7 @@ for step in $steps; do args="${args} ${alpaka}" # optionally disable alpaka tests args="${args} ${rndgen}" # optionally use common random numbers or curand on host args="${args} ${rmbsmp}" # optionally use rambo or bridge on host + args="${args} ${nofpe}" # optionally disable FPEs args="${args} -avxall" # avx, fptype, helinl and hrdcod are now supported for all processes if [ "${step}" == "makeclean" ]; then printf "\n%80s\n" |tr " " "*" diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index bd656c5b93..430a82df5e 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -12,7 +12,7 @@ topdir=$(cd $scrdir; cd ../../..; pwd) function usage() { - echo "Usage: $0 [-nocpp|[-avxall][-nocuda][-noneonly][-sse4only][-avx2only][-512yonly][-512zonly]] [-sa] [-noalpaka] [-flt|-fltonly|-mix|-mixonly] [-inl|-inlonly] [-hrd|-hrdonly] [-common|-curhst] [-rmbhst|-bridge] [-omp] [-makeonly|-makeclean|-makecleanonly|-dryrun] [-makej] [-3a3b] [-div] [-req] [-detailed] [-gtest] [-v] [-dlp ]" + echo "Usage: $0 [-nocpp|[-avxall][-nocuda][-noneonly][-sse4only][-avx2only][-512yonly][-512zonly]] [-sa] [-noalpaka] [-flt|-fltonly|-mix|-mixonly] [-inl|-inlonly] [-hrd|-hrdonly] [-common|-curhst] [-rmbhst|-bridge] [-omp] [-makeonly|-makeclean|-makecleanonly|-dryrun] [-makej] [-3a3b] [-div] [-req] [-detailed] [-gtest] [-nofpe] [-v] [-dlp ]" exit 1 } @@ -51,6 +51,7 @@ div=0 req=0 detailed=0 gtest=0 +nofpe=0 verbose=0 dlp= @@ -219,6 +220,9 @@ while [ "$1" != "" ]; do if [ "${cpp}" == "0" ]; then echo "ERROR! Options -gtest and -nocpp are incompatible"; usage; fi gtest=1 shift + elif [ "$1" == "-nofpe" ]; then + nofpe=1 + shift elif [ "$1" == "-v" ]; then verbose=1 shift @@ -239,6 +243,15 @@ if [ "${dlp}" != "" ]; then export DYLD_LIBRARY_PATH=$dlp fi +# Enable FPEs in check.exe by default (see #733) +if [ "${nofpe}" == "0" ]; then + echo "export CUDACPP_RUNTIME_ENABLEFPE=on" + export CUDACPP_RUNTIME_ENABLEFPE=on +else + echo "unset CUDACPP_RUNTIME_ENABLEFPE" + unset CUDACPP_RUNTIME_ENABLEFPE +fi + # Check that at least one process has been selected if [ "${eemumu}" == "0" ] && [ "${ggtt}" == "0" ] && [ "${ggttg}" == "0" ] && [ "${ggttgg}" == "0" ] && [ "${ggttggg}" == "0" ] && [ "${gqttq}" == "0" ] && [ "${heftggh}" == "0" ]; then usage; fi From 2a82105153d4378aa9e627256af6affd54cf7122 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 30 Oct 2023 08:24:11 +0100 Subject: [PATCH 078/119] [oct23av] rerun 78 tput tests, with FPEs enabled in the check executable - some failures in ggttg f/m and gqttq f (#783), no change in performance tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt:Floating Point Exception (CPU) tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt:Floating Point Exception (CPU) tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt:Floating Point Exception (CPU) tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt:Floating Point Exception (CPU) tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt:Floating Point Exception (CPU) tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt:Floating Point Exception (CPU) tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt:Floating Point Exception (CPU) tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt:Floating Point Exception (CPU) STARTED AT Sun Oct 29 10:38:58 PM CET 2023 ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean ENDED(1) AT Sun Oct 29 11:10:35 PM CET 2023 [Status=2] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Sun Oct 29 11:22:39 PM CET 2023 [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Sun Oct 29 11:31:39 PM CET 2023 [Status=2] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Sun Oct 29 11:34:48 PM CET 2023 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ENDED(5) AT Sun Oct 29 11:37:55 PM CET 2023 [Status=0] --- .../log_eemumu_mad_d_inl0_hrd0.txt | 94 +++++----- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 94 +++++----- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 94 +++++----- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 94 +++++----- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 94 +++++----- .../log_eemumu_mad_d_inl0_hrd1.txt | 94 +++++----- .../log_eemumu_mad_d_inl1_hrd0.txt | 94 +++++----- .../log_eemumu_mad_d_inl1_hrd1.txt | 94 +++++----- .../log_eemumu_mad_f_inl0_hrd0.txt | 94 +++++----- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 94 +++++----- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 94 +++++----- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 94 +++++----- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 94 +++++----- .../log_eemumu_mad_f_inl0_hrd1.txt | 94 +++++----- .../log_eemumu_mad_f_inl1_hrd0.txt | 94 +++++----- .../log_eemumu_mad_f_inl1_hrd1.txt | 94 +++++----- .../log_eemumu_mad_m_inl0_hrd0.txt | 94 +++++----- .../log_eemumu_mad_m_inl0_hrd1.txt | 94 +++++----- .../log_ggtt_mad_d_inl0_hrd0.txt | 94 +++++----- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 94 +++++----- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 94 +++++----- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 94 +++++----- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 94 +++++----- .../log_ggtt_mad_d_inl0_hrd1.txt | 94 +++++----- .../log_ggtt_mad_d_inl1_hrd0.txt | 94 +++++----- .../log_ggtt_mad_d_inl1_hrd1.txt | 94 +++++----- .../log_ggtt_mad_f_inl0_hrd0.txt | 94 +++++----- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 94 +++++----- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 94 +++++----- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 94 +++++----- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 94 +++++----- .../log_ggtt_mad_f_inl0_hrd1.txt | 94 +++++----- .../log_ggtt_mad_f_inl1_hrd0.txt | 94 +++++----- .../log_ggtt_mad_f_inl1_hrd1.txt | 94 +++++----- .../log_ggtt_mad_m_inl0_hrd0.txt | 94 +++++----- .../log_ggtt_mad_m_inl0_hrd1.txt | 94 +++++----- .../log_ggttg_mad_d_inl0_hrd0.txt | 109 ++++++------ .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 109 ++++++------ .../log_ggttg_mad_d_inl0_hrd1.txt | 109 ++++++------ .../log_ggttg_mad_f_inl0_hrd0.txt | 163 +++-------------- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 167 +++--------------- .../log_ggttg_mad_f_inl0_hrd1.txt | 163 +++-------------- .../log_ggttg_mad_m_inl0_hrd0.txt | 163 +++-------------- .../log_ggttg_mad_m_inl0_hrd1.txt | 163 +++-------------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 109 ++++++------ .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 109 ++++++------ .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 109 ++++++------ .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 109 ++++++------ .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 109 ++++++------ .../log_ggttgg_mad_d_inl0_hrd1.txt | 109 ++++++------ .../log_ggttgg_mad_d_inl1_hrd0.txt | 109 ++++++------ .../log_ggttgg_mad_d_inl1_hrd1.txt | 109 ++++++------ .../log_ggttgg_mad_f_inl0_hrd0.txt | 109 ++++++------ .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 109 ++++++------ .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 109 ++++++------ .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 109 ++++++------ .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 109 ++++++------ .../log_ggttgg_mad_f_inl0_hrd1.txt | 109 ++++++------ .../log_ggttgg_mad_f_inl1_hrd0.txt | 109 ++++++------ .../log_ggttgg_mad_f_inl1_hrd1.txt | 109 ++++++------ .../log_ggttgg_mad_m_inl0_hrd0.txt | 109 ++++++------ .../log_ggttgg_mad_m_inl0_hrd1.txt | 109 ++++++------ .../log_ggttggg_mad_d_inl0_hrd0.txt | 109 ++++++------ .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 109 ++++++------ .../log_ggttggg_mad_d_inl0_hrd1.txt | 109 ++++++------ .../log_ggttggg_mad_f_inl0_hrd0.txt | 109 ++++++------ .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 109 ++++++------ .../log_ggttggg_mad_f_inl0_hrd1.txt | 109 ++++++------ .../log_ggttggg_mad_m_inl0_hrd0.txt | 109 ++++++------ .../log_ggttggg_mad_m_inl0_hrd1.txt | 109 ++++++------ .../log_gqttq_mad_d_inl0_hrd0.txt | 109 ++++++------ .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 109 ++++++------ .../log_gqttq_mad_d_inl0_hrd1.txt | 109 ++++++------ .../log_gqttq_mad_f_inl0_hrd0.txt | 119 ++++++------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 119 ++++++------- .../log_gqttq_mad_f_inl0_hrd1.txt | 119 ++++++------- .../log_gqttq_mad_m_inl0_hrd0.txt | 109 ++++++------ .../log_gqttq_mad_m_inl0_hrd1.txt | 109 ++++++------ 78 files changed, 4138 insertions(+), 4128 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 3b679f61da..fcb1a09690 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:04:07 +DATE: 2023-10-29_22:52:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.690864e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.871080e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.072256e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.441278e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.808585e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.995356e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.657274 sec - 2,655,184,309 cycles # 2.998 GHz - 4,121,854,344 instructions # 1.55 insn per cycle - 0.945327418 seconds time elapsed +TOTAL : 0.659041 sec + 2,655,944,917 cycles # 2.986 GHz + 4,091,509,662 instructions # 1.54 insn per cycle + 0.949905999 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.147782e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.346714e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.346714e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.133342e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.331411e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.331411e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.875373 sec - 18,262,081,421 cycles # 3.106 GHz - 44,034,454,795 instructions # 2.41 insn per cycle - 5.880557547 seconds time elapsed +TOTAL : 5.947606 sec + 18,307,117,078 cycles # 3.078 GHz + 44,036,437,117 instructions # 2.41 insn per cycle + 5.953000211 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.692372e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.217423e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.217423e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.674713e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.194936e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.194936e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.111533 sec - 12,775,111,266 cycles # 3.105 GHz - 31,002,507,522 instructions # 2.43 insn per cycle - 4.116781762 seconds time elapsed +TOTAL : 4.155089 sec + 12,769,294,041 cycles # 3.071 GHz + 31,001,341,425 instructions # 2.43 insn per cycle + 4.160412827 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.108893e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.934033e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.934033e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.069533e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.891926e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.891926e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.372821 sec - 10,022,359,660 cycles # 2.968 GHz - 19,377,807,125 instructions # 1.93 insn per cycle - 3.377931371 seconds time elapsed +TOTAL : 3.433544 sec + 10,048,236,563 cycles # 2.924 GHz + 19,377,551,834 instructions # 1.93 insn per cycle + 3.439026139 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.187659e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.076628e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.076628e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.117957e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.984406e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.984406e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.262058 sec - 9,657,902,742 cycles # 2.957 GHz - 19,007,012,523 instructions # 1.97 insn per cycle - 3.267075123 seconds time elapsed +TOTAL : 3.366988 sec + 9,747,736,709 cycles # 2.891 GHz + 18,995,639,372 instructions # 1.95 insn per cycle + 3.372542324 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.858663e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.471817e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.471817e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.859802e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.480378e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.480378e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.779777 sec - 8,578,041,378 cycles # 2.267 GHz - 15,737,549,950 instructions # 1.83 insn per cycle - 3.784977813 seconds time elapsed +TOTAL : 3.777564 sec + 8,636,054,672 cycles # 2.284 GHz + 15,738,754,146 instructions # 1.82 insn per cycle + 3.782897527 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index ab94719473..f0a5d61068 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:35:23 +DATE: 2023-10-29_23:25:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.839257e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.789527e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.789527e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.723996e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.742880e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.742880e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.172159 sec - 7,387,905,092 cycles # 3.062 GHz - 13,235,801,583 instructions # 1.79 insn per cycle - 2.468887135 seconds time elapsed +TOTAL : 2.230974 sec + 7,379,150,488 cycles # 2.984 GHz + 13,205,445,723 instructions # 1.79 insn per cycle + 2.529344428 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -72,20 +75,21 @@ Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.102401e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.286601e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.286601e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.092591e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.273879e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273879e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.300116 sec - 19,508,744,656 cycles # 3.094 GHz - 44,261,234,942 instructions # 2.27 insn per cycle - 6.306471740 seconds time elapsed +TOTAL : 6.358221 sec + 19,566,096,744 cycles # 3.075 GHz + 44,262,224,036 instructions # 2.26 insn per cycle + 6.364626121 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -99,20 +103,21 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.575147e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.034583e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.034583e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.592498e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.055287e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.055287e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.597477 sec - 14,011,492,780 cycles # 3.044 GHz - 31,844,820,754 instructions # 2.27 insn per cycle - 4.603973232 seconds time elapsed +TOTAL : 4.554785 sec + 14,092,448,650 cycles # 3.091 GHz + 31,845,159,601 instructions # 2.26 insn per cycle + 4.561253879 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -126,20 +131,21 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.960593e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.674342e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.674342e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.943333e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.647219e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.647219e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.816035 sec - 11,316,824,245 cycles # 2.962 GHz - 20,738,410,012 instructions # 1.83 insn per cycle - 3.822348357 seconds time elapsed +TOTAL : 3.850255 sec + 11,400,433,618 cycles # 2.957 GHz + 20,739,426,744 instructions # 1.82 insn per cycle + 3.856612056 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -153,20 +159,21 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.017635e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.775212e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.775212e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.027888e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.786547e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.786547e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.721718 sec - 10,991,240,129 cycles # 2.950 GHz - 20,368,407,726 instructions # 1.85 insn per cycle - 3.728042967 seconds time elapsed +TOTAL : 3.709173 sec + 10,999,574,658 cycles # 2.961 GHz + 20,354,799,189 instructions # 1.85 insn per cycle + 3.715908173 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -180,20 +187,21 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.759549e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.300261e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.300261e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.731760e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.259965e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.259965e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.185030 sec - 9,952,016,559 cycles # 2.376 GHz - 16,884,994,577 instructions # 1.70 insn per cycle - 4.191402168 seconds time elapsed +TOTAL : 4.246654 sec + 9,990,693,259 cycles # 2.349 GHz + 16,883,565,363 instructions # 1.69 insn per cycle + 4.253083527 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 1f9b823b55..f947eb5e4f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:48:08 +DATE: 2023-10-29_23:38:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.829033e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.630053e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.003352e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.828756e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.593601e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.985424e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.308179 sec - 4,682,134,663 cycles # 3.050 GHz - 7,265,916,082 instructions # 1.55 insn per cycle - 1.592358280 seconds time elapsed +TOTAL : 1.301764 sec + 4,658,061,955 cycles # 3.045 GHz + 7,310,344,851 instructions # 1.57 insn per cycle + 1.586710861 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.141721e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.338591e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.338591e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.139293e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.337431e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.337431e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.257064 sec - 19,363,791,469 cycles # 3.093 GHz - 44,137,580,779 instructions # 2.28 insn per cycle - 6.262074313 seconds time elapsed +TOTAL : 6.266741 sec + 19,392,539,175 cycles # 3.092 GHz + 44,137,489,256 instructions # 2.28 insn per cycle + 6.271951422 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.672529e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.182072e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.182072e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.679246e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.201189e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.201189e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.506161 sec - 13,825,876,732 cycles # 3.065 GHz - 31,003,441,377 instructions # 2.24 insn per cycle - 4.511293914 seconds time elapsed +TOTAL : 4.486828 sec + 13,867,056,584 cycles # 3.088 GHz + 31,003,527,990 instructions # 2.24 insn per cycle + 4.491941017 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.095787e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.921433e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.921433e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.048775e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.856548e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.856548e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.739541 sec - 11,127,620,738 cycles # 2.972 GHz - 19,279,101,554 instructions # 1.73 insn per cycle - 3.744717536 seconds time elapsed +TOTAL : 3.823093 sec + 11,134,861,193 cycles # 2.909 GHz + 19,279,222,948 instructions # 1.73 insn per cycle + 3.828263919 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.178706e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.065359e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.065359e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.168575e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.075543e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.075543e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.636476 sec - 10,830,671,879 cycles # 2.975 GHz - 18,707,850,182 instructions # 1.73 insn per cycle - 3.641699159 seconds time elapsed +TOTAL : 3.648315 sec + 10,872,837,389 cycles # 2.977 GHz + 18,709,340,835 instructions # 1.72 insn per cycle + 3.653593087 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.860313e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.476557e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.476557e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.850581e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.463998e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.463998e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.137153 sec - 9,732,284,492 cycles # 2.350 GHz - 15,439,344,480 instructions # 1.59 insn per cycle - 4.142397756 seconds time elapsed +TOTAL : 4.155365 sec + 9,750,987,746 cycles # 2.345 GHz + 15,439,079,476 instructions # 1.58 insn per cycle + 4.160593254 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index a57c0afd5f..1a9ec3322a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:45:01 +DATE: 2023-10-29_23:35:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.857201e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.666270e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.055653e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.853721e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.668709e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.056055e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.957338 sec - 3,568,978,348 cycles # 3.013 GHz - 7,097,529,887 instructions # 1.99 insn per cycle - 1.241766786 seconds time elapsed +TOTAL : 0.960035 sec + 3,582,011,015 cycles # 3.020 GHz + 7,167,810,914 instructions # 2.00 insn per cycle + 1.244702782 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.140283e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.338232e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.338232e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.141208e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.339644e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.339644e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.913191 sec - 18,291,555,987 cycles # 3.092 GHz - 44,034,842,699 instructions # 2.41 insn per cycle - 5.918338832 seconds time elapsed +TOTAL : 5.916888 sec + 18,292,936,252 cycles # 3.089 GHz + 44,034,839,530 instructions # 2.41 insn per cycle + 5.922158252 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.652983e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.156618e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.156618e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.682506e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.200241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.200241e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.207372 sec - 12,748,693,047 cycles # 3.027 GHz - 31,000,802,407 instructions # 2.43 insn per cycle - 4.212630223 seconds time elapsed +TOTAL : 4.132115 sec + 12,770,466,467 cycles # 3.087 GHz + 31,001,130,857 instructions # 2.43 insn per cycle + 4.137366192 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.098198e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.924886e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.924886e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.092532e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.926894e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.926894e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.387559 sec - 10,061,207,969 cycles # 2.966 GHz - 19,378,764,597 instructions # 1.93 insn per cycle - 3.392871997 seconds time elapsed +TOTAL : 3.397172 sec + 10,064,493,963 cycles # 2.959 GHz + 19,377,092,544 instructions # 1.93 insn per cycle + 3.402411959 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.192299e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.089090e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.089090e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.185793e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.080619e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.080619e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.258563 sec - 9,691,496,400 cycles # 2.971 GHz - 18,995,322,883 instructions # 1.96 insn per cycle - 3.263721421 seconds time elapsed +TOTAL : 3.263660 sec + 9,723,240,197 cycles # 2.975 GHz + 19,005,869,214 instructions # 1.95 insn per cycle + 3.268973832 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.880972e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.505823e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.505823e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.878162e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.510088e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.510088e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.735319 sec - 8,600,046,261 cycles # 2.300 GHz - 15,737,440,798 instructions # 1.83 insn per cycle - 3.740664421 seconds time elapsed +TOTAL : 3.740304 sec + 8,619,487,033 cycles # 2.302 GHz + 15,737,558,338 instructions # 1.83 insn per cycle + 3.745598337 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 14e59ac3a7..aca39aea62 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,24 +37,26 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:41:52 +DATE: 2023-10-29_23:31:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.321455e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.580409e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.928218e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.246166e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.586744e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.910156e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.815521 sec - 6,300,479,484 cycles # 3.068 GHz - 11,616,768,393 instructions # 1.84 insn per cycle - 2.110018829 seconds time elapsed +TOTAL : 1.829625 sec + 6,294,255,275 cycles # 3.060 GHz + 11,456,031,669 instructions # 1.82 insn per cycle + 2.113395829 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -66,19 +69,20 @@ Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.135006e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.331115e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.331115e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.129837e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.329251e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329251e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.939454 sec - 18,283,010,727 cycles # 3.076 GHz - 44,034,450,286 instructions # 2.41 insn per cycle - 5.944452992 seconds time elapsed +TOTAL : 5.963077 sec + 18,329,614,420 cycles # 3.072 GHz + 44,034,602,066 instructions # 2.40 insn per cycle + 5.968302668 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -92,19 +96,20 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.667343e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.181731e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.181731e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.694414e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.218473e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.218473e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.173581 sec - 12,755,850,172 cycles # 3.055 GHz - 31,002,739,822 instructions # 2.43 insn per cycle - 4.178760665 seconds time elapsed +TOTAL : 4.104338 sec + 12,764,499,292 cycles # 3.107 GHz + 31,000,842,012 instructions # 2.43 insn per cycle + 4.109565892 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -118,19 +123,20 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.119818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.952641e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.952641e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.095276e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.926686e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.926686e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.353719 sec - 10,022,124,116 cycles # 2.985 GHz - 19,377,738,463 instructions # 1.93 insn per cycle - 3.358949031 seconds time elapsed +TOTAL : 3.389760 sec + 10,099,043,785 cycles # 2.976 GHz + 19,377,613,301 instructions # 1.92 insn per cycle + 3.395069920 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -144,19 +150,20 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.197647e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.094541e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.094541e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.181551e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.082266e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.082266e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.246854 sec - 9,691,440,441 cycles # 2.981 GHz - 19,006,411,877 instructions # 1.96 insn per cycle - 3.252278792 seconds time elapsed +TOTAL : 3.268062 sec + 9,751,904,057 cycles # 2.980 GHz + 19,005,554,616 instructions # 1.95 insn per cycle + 3.273237329 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -170,19 +177,20 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.892226e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.523289e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.523289e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.820421e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.422717e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.422717e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.715100 sec - 8,592,102,883 cycles # 2.310 GHz - 15,737,316,253 instructions # 1.83 insn per cycle - 3.720383119 seconds time elapsed +TOTAL : 3.855030 sec + 8,625,575,408 cycles # 2.239 GHz + 15,740,744,925 instructions # 1.82 insn per cycle + 3.860342979 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index f380f8f77b..6298beca69 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:04:39 +DATE: 2023-10-29_22:53:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.688679e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.878052e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.084038e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.449613e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.831842e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.043087e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.664702 sec - 2,527,358,473 cycles # 2.821 GHz - 3,950,149,218 instructions # 1.56 insn per cycle - 0.956523953 seconds time elapsed +TOTAL : 0.653604 sec + 2,663,258,576 cycles # 3.018 GHz + 4,119,868,167 instructions # 1.55 insn per cycle + 0.942343744 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.027708011645137e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.176849e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.393454e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.393454e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.200280e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.420964e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.420964e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.749146 sec - 17,418,245,221 cycles # 3.027 GHz - 41,881,264,051 instructions # 2.40 insn per cycle - 5.754425044 seconds time elapsed +TOTAL : 5.635099 sec + 17,435,146,635 cycles # 3.092 GHz + 41,881,003,220 instructions # 2.40 insn per cycle + 5.640492301 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.727663e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.274892e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.274892e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.732827e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.289974e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.289974e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.036880 sec - 12,443,172,071 cycles # 3.079 GHz - 30,163,295,291 instructions # 2.42 insn per cycle - 4.042069911 seconds time elapsed +TOTAL : 4.025576 sec + 12,500,170,045 cycles # 3.102 GHz + 30,164,396,598 instructions # 2.41 insn per cycle + 4.031081957 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.119192e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.962960e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.962960e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.083721e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.918251e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.918251e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.362573 sec - 9,914,887,002 cycles # 2.945 GHz - 19,109,568,676 instructions # 1.93 insn per cycle - 3.367772960 seconds time elapsed +TOTAL : 3.414417 sec + 9,979,865,065 cycles # 2.922 GHz + 19,111,810,651 instructions # 1.92 insn per cycle + 3.419772488 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1930) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.148695e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.010722e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.010722e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.188158e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.092301e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.092301e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.319719 sec - 9,606,034,124 cycles # 2.890 GHz - 18,764,443,608 instructions # 1.95 insn per cycle - 3.324776207 seconds time elapsed +TOTAL : 3.260474 sec + 9,653,421,879 cycles # 2.957 GHz + 18,775,249,287 instructions # 1.94 insn per cycle + 3.265820478 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1661) (512y: 178) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.926149e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.586513e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.586513e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.921655e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.594276e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.594276e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.658483 sec - 8,411,821,797 cycles # 2.297 GHz - 15,613,679,149 instructions # 1.86 insn per cycle - 3.663873663 seconds time elapsed +TOTAL : 3.664294 sec + 8,459,612,466 cycles # 2.306 GHz + 15,613,549,898 instructions # 1.85 insn per cycle + 3.669595515 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 886) (512y: 156) (512z: 1239) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 1c092f6985..e44106da5e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:25:03 +DATE: 2023-10-29_23:15:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.824995e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.672902e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.055784e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.506979e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.575771e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.024770e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.668648 sec - 2,691,554,614 cycles # 2.996 GHz - 4,161,203,730 instructions # 1.55 insn per cycle - 0.958727124 seconds time elapsed +TOTAL : 0.673639 sec + 2,731,565,956 cycles # 3.015 GHz + 4,234,870,315 instructions # 1.55 insn per cycle + 0.968176807 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.704241e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.185386e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.185386e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.651561e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.117234e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.117234e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.087797 sec - 12,666,124,359 cycles # 3.096 GHz - 32,577,303,385 instructions # 2.57 insn per cycle - 4.092925597 seconds time elapsed +TOTAL : 4.215246 sec + 12,681,764,053 cycles # 3.006 GHz + 32,577,102,204 instructions # 2.57 insn per cycle + 4.220638940 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.144087e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.061274e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.061274e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.153203e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.080454e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.080454e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.328636 sec - 10,261,573,946 cycles # 3.079 GHz - 24,505,950,861 instructions # 2.39 insn per cycle - 3.334196649 seconds time elapsed +TOTAL : 3.317199 sec + 10,262,912,467 cycles # 3.090 GHz + 24,505,351,557 instructions # 2.39 insn per cycle + 3.322594205 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.376914e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.481586e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.481586e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.274293e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.309246e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.309246e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.036227 sec - 9,059,680,784 cycles # 2.980 GHz - 16,942,142,440 instructions # 1.87 insn per cycle - 3.041553155 seconds time elapsed +TOTAL : 3.166884 sec + 9,109,096,651 cycles # 2.873 GHz + 16,941,882,854 instructions # 1.86 insn per cycle + 3.172295054 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.440750e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.596606e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.596606e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.412460e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.566691e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.566691e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.963120 sec - 8,832,825,912 cycles # 2.977 GHz - 16,357,330,244 instructions # 1.85 insn per cycle - 2.968310420 seconds time elapsed +TOTAL : 2.995372 sec + 8,910,476,559 cycles # 2.970 GHz + 16,368,475,958 instructions # 1.84 insn per cycle + 3.000901134 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.029944e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.804543e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.804543e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.075952e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.880689e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.880689e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.502018 sec - 7,914,855,069 cycles # 2.258 GHz - 14,594,378,606 instructions # 1.84 insn per cycle - 3.507229260 seconds time elapsed +TOTAL : 3.424948 sec + 7,930,435,697 cycles # 2.313 GHz + 14,592,571,272 instructions # 1.84 insn per cycle + 3.430388365 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 8aaee529f3..20af8dcdcf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:25:32 +DATE: 2023-10-29_23:15:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.832257e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.689820e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.107266e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.517058e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.581775e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.062501e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.660186 sec - 2,704,353,909 cycles # 3.037 GHz - 4,132,550,954 instructions # 1.53 insn per cycle - 0.950731320 seconds time elapsed +TOTAL : 0.670504 sec + 2,702,422,058 cycles # 2.997 GHz + 4,242,604,368 instructions # 1.57 insn per cycle + 0.961836694 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.027708011645137e-08 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.255392e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.177534e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.177534e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.250239e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.177317e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.177317e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.185155 sec - 9,847,530,364 cycles # 3.087 GHz - 25,457,133,972 instructions # 2.59 insn per cycle - 3.190284100 seconds time elapsed +TOTAL : 3.189113 sec + 9,869,010,862 cycles # 3.090 GHz + 25,457,600,406 instructions # 2.58 insn per cycle + 3.194393044 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.530932e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.892174e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.892174e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.513363e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.880454e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.880454e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.879384 sec - 8,913,895,098 cycles # 3.091 GHz - 21,514,459,263 instructions # 2.41 insn per cycle - 2.884612658 seconds time elapsed +TOTAL : 2.899337 sec + 8,953,224,169 cycles # 3.083 GHz + 21,513,951,345 instructions # 2.40 insn per cycle + 2.904812289 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.0277102294013186e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.545937e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.843167e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.843167e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.487189e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.770373e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.770373e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.859280 sec - 8,544,306,826 cycles # 2.985 GHz - 15,827,712,225 instructions # 1.85 insn per cycle - 2.864111025 seconds time elapsed +TOTAL : 2.923137 sec + 8,620,906,170 cycles # 2.945 GHz + 15,830,227,053 instructions # 1.84 insn per cycle + 2.928647932 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.608345e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.959405e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.959405e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.556435e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.885768e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.885768e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.796657 sec - 8,368,015,466 cycles # 2.988 GHz - 15,528,362,084 instructions # 1.86 insn per cycle - 2.801823903 seconds time elapsed +TOTAL : 2.850788 sec + 8,407,782,646 cycles # 2.944 GHz + 15,528,675,382 instructions # 1.85 insn per cycle + 2.856241127 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 1.0277088906338675e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.254251e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.213319e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.213319e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.204312e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.139539e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.139539e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.181418 sec - 7,557,201,393 cycles # 2.374 GHz - 14,292,258,011 instructions # 1.89 insn per cycle - 3.186495390 seconds time elapsed +TOTAL : 3.251251 sec + 7,600,805,312 cycles # 2.336 GHz + 14,294,099,971 instructions # 1.88 insn per cycle + 3.256637720 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 7f46bde5e9..3d3b3506a7 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:05:12 +DATE: 2023-10-29_22:53:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.569808e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.303498e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.281562e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.488398e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.270591e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.269806e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.555717 sec - 2,383,859,082 cycles # 3.022 GHz - 3,700,757,768 instructions # 1.55 insn per cycle - 0.846227370 seconds time elapsed +TOTAL : 0.561353 sec + 2,351,261,799 cycles # 3.014 GHz + 3,609,899,167 instructions # 1.54 insn per cycle + 0.839537640 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.170731e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.387851e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.387851e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.166292e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.382162e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.382162e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.728025 sec - 17,798,264,765 cycles # 3.105 GHz - 43,613,321,746 instructions # 2.45 insn per cycle - 5.732968535 seconds time elapsed +TOTAL : 5.751431 sec + 17,807,322,192 cycles # 3.094 GHz + 43,613,437,781 instructions # 2.45 insn per cycle + 5.756676764 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.416371e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.689737e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.689737e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.354841e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.597055e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.597055e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.958460 sec - 9,200,489,884 cycles # 3.106 GHz - 21,925,512,062 instructions # 2.38 insn per cycle - 2.963425630 seconds time elapsed +TOTAL : 3.029120 sec + 9,234,524,948 cycles # 3.044 GHz + 21,925,460,951 instructions # 2.37 insn per cycle + 3.034350215 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.606473e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.987849e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.987849e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.442806e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.733746e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.733746e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.760956 sec - 8,262,496,420 cycles # 2.988 GHz - 15,591,539,878 instructions # 1.89 insn per cycle - 2.766076443 seconds time elapsed +TOTAL : 2.936007 sec + 8,334,441,599 cycles # 2.835 GHz + 15,592,659,950 instructions # 1.87 insn per cycle + 2.941196934 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.607993e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.019342e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.019342e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.606054e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.023531e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.023531e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.765968 sec - 8,198,371,730 cycles # 2.961 GHz - 15,435,025,113 instructions # 1.88 insn per cycle - 2.771149365 seconds time elapsed +TOTAL : 2.762889 sec + 8,219,765,977 cycles # 2.971 GHz + 15,435,430,943 instructions # 1.88 insn per cycle + 2.768115820 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.657990e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.094383e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.094383e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.651104e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.096112e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.096112e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.718566 sec - 6,606,742,247 cycles # 2.427 GHz - 12,869,980,452 instructions # 1.95 insn per cycle - 2.723582960 seconds time elapsed +TOTAL : 2.727241 sec + 6,620,001,417 cycles # 2.424 GHz + 12,869,662,338 instructions # 1.94 insn per cycle + 2.732371752 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 0b4db02b86..ebeb0f01b9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:35:59 +DATE: 2023-10-29_23:26:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.474804e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.962112e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.962112e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.502083e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.940730e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.940730e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.636795 sec - 5,643,077,305 cycles # 3.027 GHz - 10,280,576,945 instructions # 1.82 insn per cycle - 1.921179131 seconds time elapsed +TOTAL : 1.637195 sec + 5,696,849,282 cycles # 3.057 GHz + 10,301,658,227 instructions # 1.81 insn per cycle + 1.920689810 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -72,20 +75,21 @@ Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.130619e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.334867e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.334867e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.144224e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.352294e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.352294e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.026937 sec - 18,463,069,696 cycles # 3.061 GHz - 43,762,559,108 instructions # 2.37 insn per cycle - 6.032712961 seconds time elapsed +TOTAL : 5.958755 sec + 18,470,081,002 cycles # 3.097 GHz + 43,762,371,634 instructions # 2.37 insn per cycle + 5.964749827 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -99,20 +103,21 @@ Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.291047e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.416048e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.416048e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.232128e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.341106e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.341106e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.224000 sec - 9,974,231,409 cycles # 3.089 GHz - 23,261,628,829 instructions # 2.33 insn per cycle - 3.229980045 seconds time elapsed +TOTAL : 3.308376 sec + 10,011,937,118 cycles # 3.025 GHz + 23,263,465,661 instructions # 2.32 insn per cycle + 3.314430093 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -126,20 +131,21 @@ Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.470068e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.722431e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.722431e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.453144e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.695130e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.695130e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.014309 sec - 9,033,599,745 cycles # 2.992 GHz - 16,710,722,683 instructions # 1.85 insn per cycle - 3.020418073 seconds time elapsed +TOTAL : 3.039152 sec + 9,088,378,508 cycles # 2.986 GHz + 16,711,868,096 instructions # 1.84 insn per cycle + 3.045311635 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -153,20 +159,21 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.485239e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.752852e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.752852e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.433792e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.682044e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.682044e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.005083 sec - 8,983,180,192 cycles # 2.986 GHz - 16,560,789,697 instructions # 1.84 insn per cycle - 3.010982814 seconds time elapsed +TOTAL : 3.069362 sec + 9,004,918,207 cycles # 2.929 GHz + 16,555,674,510 instructions # 1.84 insn per cycle + 3.075484578 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -180,20 +187,21 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.502480e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.764480e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.764480e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.497941e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.758140e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.758140e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.985922 sec - 7,387,231,495 cycles # 2.471 GHz - 14,076,334,743 instructions # 1.91 insn per cycle - 2.991991008 seconds time elapsed +TOTAL : 2.998963 sec + 7,428,209,049 cycles # 2.472 GHz + 14,077,163,296 instructions # 1.90 insn per cycle + 3.005100581 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 2b0f219a41..9c2267690a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:48:43 +DATE: 2023-10-29_23:38:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.387494e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.214508e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.248425e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.386778e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.211673e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.242393e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.186437 sec - 4,201,334,068 cycles # 2.989 GHz - 6,627,660,432 instructions # 1.58 insn per cycle - 1.463153599 seconds time elapsed +TOTAL : 1.152217 sec + 4,164,140,679 cycles # 3.038 GHz + 6,703,652,142 instructions # 1.61 insn per cycle + 1.427512326 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.165195e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.383351e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.383351e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.132621e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.342281e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.342281e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.072926 sec - 18,835,057,538 cycles # 3.100 GHz - 43,796,928,480 instructions # 2.33 insn per cycle - 6.077944722 seconds time elapsed +TOTAL : 6.243644 sec + 18,819,488,848 cycles # 3.013 GHz + 43,795,864,417 instructions # 2.33 insn per cycle + 6.248647210 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.342008e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.584442e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.584442e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.368858e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.630852e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.630852e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.369100 sec - 10,266,001,012 cycles # 3.044 GHz - 22,007,821,518 instructions # 2.14 insn per cycle - 3.374095696 seconds time elapsed +TOTAL : 3.327996 sec + 10,249,694,277 cycles # 3.076 GHz + 22,006,771,964 instructions # 2.15 insn per cycle + 3.333022700 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.561455e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.927682e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.927682e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.521017e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.927680e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.927680e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.127417 sec - 9,261,755,413 cycles # 2.958 GHz - 15,502,016,343 instructions # 1.67 insn per cycle - 3.132444295 seconds time elapsed +TOTAL : 3.166104 sec + 9,360,780,233 cycles # 2.953 GHz + 15,501,778,068 instructions # 1.66 insn per cycle + 3.171157605 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.611431e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.034996e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.034996e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.592580e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.021290e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.021290e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.079991 sec - 9,215,198,289 cycles # 2.988 GHz - 15,143,893,450 instructions # 1.64 insn per cycle - 3.084905456 seconds time elapsed +TOTAL : 3.099079 sec + 9,270,625,553 cycles # 2.987 GHz + 15,143,813,419 instructions # 1.63 insn per cycle + 3.104067295 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.640665e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.084466e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.084466e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.636377e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.091702e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.091702e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.056004 sec - 7,613,812,981 cycles # 2.488 GHz - 12,579,209,488 instructions # 1.65 insn per cycle - 3.061105590 seconds time elapsed +TOTAL : 3.062456 sec + 7,649,798,731 cycles # 2.495 GHz + 12,579,371,966 instructions # 1.64 insn per cycle + 3.067527608 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 7c4facf8c1..a1984b2dbd 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:45:34 +DATE: 2023-10-29_23:35:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.392329e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.226962e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.280172e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.392536e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.227239e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.283755e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.834755 sec - 3,177,702,111 cycles # 3.023 GHz - 6,449,142,343 instructions # 2.03 insn per cycle - 1.110221614 seconds time elapsed +TOTAL : 0.834415 sec + 3,191,587,990 cycles # 3.032 GHz + 6,503,299,462 instructions # 2.04 insn per cycle + 1.110001966 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.139739e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.351221e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.351221e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.150249e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.364647e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.364647e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.884327 sec - 17,821,810,664 cycles # 3.027 GHz - 43,614,131,030 instructions # 2.45 insn per cycle - 5.889383739 seconds time elapsed +TOTAL : 5.834169 sec + 17,836,446,946 cycles # 3.058 GHz + 43,616,934,542 instructions # 2.45 insn per cycle + 5.839014033 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.398185e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.659097e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.659097e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.313883e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.536871e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.536871e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.980585 sec - 9,234,568,261 cycles # 3.094 GHz - 21,925,493,552 instructions # 2.37 insn per cycle - 2.985551520 seconds time elapsed +TOTAL : 3.087694 sec + 9,244,349,657 cycles # 2.990 GHz + 21,925,829,555 instructions # 2.37 insn per cycle + 3.092652027 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.595452e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.963542e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.963542e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.593910e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.976295e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.976295e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.770209 sec - 8,264,942,329 cycles # 2.979 GHz - 15,590,380,822 instructions # 1.89 insn per cycle - 2.775128570 seconds time elapsed +TOTAL : 2.773398 sec + 8,292,004,994 cycles # 2.985 GHz + 15,590,392,482 instructions # 1.88 insn per cycle + 2.778369879 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.614288e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.036058e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.036058e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.597463e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.017057e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.017057e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.755945 sec - 8,229,430,317 cycles # 2.981 GHz - 15,439,224,332 instructions # 1.88 insn per cycle - 2.761056638 seconds time elapsed +TOTAL : 2.771894 sec + 8,246,987,403 cycles # 2.971 GHz + 15,434,206,063 instructions # 1.87 insn per cycle + 2.776950739 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.566410e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.944778e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.944778e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.605490e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.021590e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.021590e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.814055 sec - 6,615,971,523 cycles # 2.349 GHz - 12,870,676,545 instructions # 1.95 insn per cycle - 2.819075054 seconds time elapsed +TOTAL : 2.772142 sec + 6,653,271,489 cycles # 2.397 GHz + 12,870,994,579 instructions # 1.93 insn per cycle + 2.777366105 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index a9154c2748..6b52d68a52 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,24 +37,26 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:42:25 +DATE: 2023-10-29_23:32:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.235922e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.189314e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.175908e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.505082e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.185860e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.164906e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.452913 sec - 5,098,838,507 cycles # 3.053 GHz - 9,239,267,347 instructions # 1.81 insn per cycle - 1.728632717 seconds time elapsed +TOTAL : 1.429658 sec + 5,048,204,041 cycles # 3.061 GHz + 9,247,012,831 instructions # 1.83 insn per cycle + 1.705876120 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -66,19 +69,20 @@ Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.170247e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.386746e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.386746e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.168037e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.384858e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.384858e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.731230 sec - 17,811,008,964 cycles # 3.105 GHz - 43,613,062,162 instructions # 2.45 insn per cycle - 5.736148226 seconds time elapsed +TOTAL : 5.742087 sec + 17,808,913,038 cycles # 3.099 GHz + 43,613,121,215 instructions # 2.45 insn per cycle + 5.747051229 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -92,19 +96,20 @@ Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.338265e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.556723e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.556723e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.388428e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.655638e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.655638e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.060662 sec - 9,221,238,318 cycles # 3.009 GHz - 21,925,884,562 instructions # 2.38 insn per cycle - 3.065656017 seconds time elapsed +TOTAL : 2.992667 sec + 9,228,123,251 cycles # 3.080 GHz + 21,925,408,791 instructions # 2.38 insn per cycle + 2.997686168 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -118,19 +123,20 @@ Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.575598e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.926277e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.926277e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.483923e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.793533e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.793533e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.795328 sec - 8,274,157,710 cycles # 2.956 GHz - 15,590,638,637 instructions # 1.88 insn per cycle - 2.800362174 seconds time elapsed +TOTAL : 2.891836 sec + 8,313,512,459 cycles # 2.871 GHz + 15,590,846,489 instructions # 1.88 insn per cycle + 2.896841741 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -144,19 +150,20 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.608597e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.004444e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.004444e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.610985e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.040279e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.040279e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.765892 sec - 8,210,872,152 cycles # 2.964 GHz - 15,434,199,512 instructions # 1.88 insn per cycle - 2.771052894 seconds time elapsed +TOTAL : 2.757901 sec + 8,238,182,515 cycles # 2.984 GHz + 15,434,374,890 instructions # 1.87 insn per cycle + 2.763073101 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -170,19 +177,20 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check.exe -p 2048 256 12 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.639167e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.056020e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.056020e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.588475e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.981440e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.981440e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.736960 sec - 6,603,283,532 cycles # 2.409 GHz - 12,869,076,714 instructions # 1.95 insn per cycle - 2.741922035 seconds time elapsed +TOTAL : 2.788426 sec + 6,630,836,794 cycles # 2.375 GHz + 12,869,285,496 instructions # 1.94 insn per cycle + 2.793399713 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 57bc44590f..a453d1a288 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:05:41 +DATE: 2023-10-29_22:54:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.573323e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.316951e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.316598e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.491668e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.296332e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.301135e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.555253 sec - 2,382,172,389 cycles # 3.020 GHz - 3,675,857,664 instructions # 1.54 insn per cycle - 0.846167400 seconds time elapsed +TOTAL : 0.558982 sec + 2,359,148,885 cycles # 3.021 GHz + 3,677,736,298 instructions # 1.56 insn per cycle + 0.838194831 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.249737e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.499844e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.499844e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.247833e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.499900e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.499900e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.388471 sec - 16,729,282,863 cycles # 3.102 GHz - 41,371,471,590 instructions # 2.47 insn per cycle - 5.393316082 seconds time elapsed +TOTAL : 5.395526 sec + 16,752,728,675 cycles # 3.103 GHz + 41,371,323,147 instructions # 2.47 insn per cycle + 5.400397415 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.479747e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.842891e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.842891e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.466250e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.827670e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.827670e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.890771 sec - 8,980,628,419 cycles # 3.102 GHz - 21,229,293,902 instructions # 2.36 insn per cycle - 2.895816535 seconds time elapsed +TOTAL : 2.908072 sec + 9,031,894,595 cycles # 3.101 GHz + 21,230,343,156 instructions # 2.35 insn per cycle + 2.913398603 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1841) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.553067e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.911755e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.911755e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.616510e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.025153e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.025153e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.818350 sec - 8,236,815,339 cycles # 2.918 GHz - 15,424,982,194 instructions # 1.87 insn per cycle - 2.823290241 seconds time elapsed +TOTAL : 2.750320 sec + 8,230,475,467 cycles # 2.988 GHz + 15,424,564,830 instructions # 1.87 insn per cycle + 2.755330494 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2536) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.628835e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.060475e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.060475e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.653005e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.130225e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.130225e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.744370 sec - 8,095,406,107 cycles # 2.945 GHz - 15,243,714,977 instructions # 1.88 insn per cycle - 2.749338594 seconds time elapsed +TOTAL : 2.720808 sec + 8,079,864,793 cycles # 2.965 GHz + 15,238,277,190 instructions # 1.89 insn per cycle + 2.725957575 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.596823e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.990816e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.990816e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.654013e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.095619e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.095619e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.778794 sec - 6,577,973,479 cycles # 2.364 GHz - 12,848,266,614 instructions # 1.95 insn per cycle - 2.783917309 seconds time elapsed +TOTAL : 2.722630 sec + 6,599,502,119 cycles # 2.420 GHz + 12,848,005,127 instructions # 1.95 insn per cycle + 2.727814362 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1705) (512y: 18) (512z: 1427) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 6be58ed42d..c38059bc0e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:25:59 +DATE: 2023-10-29_23:16:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.381038e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.216517e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.259040e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.302992e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.184554e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.261690e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.564554 sec - 2,368,656,023 cycles # 3.011 GHz - 3,688,760,637 instructions # 1.56 insn per cycle - 0.843324142 seconds time elapsed +TOTAL : 0.571699 sec + 2,381,453,624 cycles # 2.998 GHz + 3,711,037,714 instructions # 1.56 insn per cycle + 0.852798458 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.741983e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.273463e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.273463e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.739818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.271465e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.271465e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.966724 sec - 12,194,181,082 cycles # 3.075 GHz - 32,520,249,828 instructions # 2.67 insn per cycle - 3.971213420 seconds time elapsed +TOTAL : 3.973603 sec + 12,176,355,502 cycles # 3.061 GHz + 32,521,127,628 instructions # 2.67 insn per cycle + 3.978805450 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.244813035273009e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.759372e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.661299e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.661299e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.837673e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.797869e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.797869e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.644286 sec - 7,955,830,690 cycles # 3.004 GHz - 18,690,505,489 instructions # 2.35 insn per cycle - 2.649232282 seconds time elapsed +TOTAL : 2.573600 sec + 7,975,655,707 cycles # 3.094 GHz + 18,690,683,706 instructions # 2.34 insn per cycle + 2.578826897 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1554) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 5.583829420356249e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.950641e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.852361e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.852361e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.881382e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.764080e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.764080e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.481745 sec - 7,418,882,379 cycles # 2.984 GHz - 14,254,122,903 instructions # 1.92 insn per cycle - 2.486684765 seconds time elapsed +TOTAL : 2.543156 sec + 7,446,918,262 cycles # 2.924 GHz + 14,254,158,064 instructions # 1.91 insn per cycle + 2.548346891 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.028679e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.073597e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.073597e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.960433e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.954394e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.954394e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.428935 sec - 7,266,876,500 cycles # 2.991 GHz - 13,949,467,084 instructions # 1.92 insn per cycle - 2.433605636 seconds time elapsed +TOTAL : 2.480137 sec + 7,295,815,180 cycles # 2.936 GHz + 13,946,321,934 instructions # 1.91 insn per cycle + 2.485375165 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.730559e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.262408e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.262408e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.685033e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.203277e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.203277e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.656095 sec - 6,484,738,554 cycles # 2.438 GHz - 13,421,284,492 instructions # 2.07 insn per cycle - 2.661161068 seconds time elapsed +TOTAL : 2.700255 sec + 6,507,297,033 cycles # 2.406 GHz + 13,421,510,099 instructions # 2.06 insn per cycle + 2.705598441 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index ea1b5c4ce9..358806b93a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:26:25 +DATE: 2023-10-29_23:16:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.378042e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.230898e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.303636e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.305534e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188557e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.268373e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.572348 sec - 2,315,088,058 cycles # 2.918 GHz - 3,606,090,617 instructions # 1.56 insn per cycle - 0.851746680 seconds time elapsed +TOTAL : 0.571817 sec + 2,353,207,749 cycles # 2.961 GHz + 3,723,614,170 instructions # 1.58 insn per cycle + 0.852487976 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.337523e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.407077e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.407077e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.325349e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.401417e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.401417e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.047145 sec - 9,399,501,328 cycles # 3.081 GHz - 25,306,524,755 instructions # 2.69 insn per cycle - 3.052129026 seconds time elapsed +TOTAL : 3.066168 sec + 9,413,178,324 cycles # 3.066 GHz + 25,308,930,169 instructions # 2.69 insn per cycle + 3.071456258 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 1.2589928273811243e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.148384e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.855784e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.855784e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.159701e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.912786e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.912786e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.361555 sec - 7,165,256,881 cycles # 3.028 GHz - 16,902,533,123 instructions # 2.36 insn per cycle - 2.366898623 seconds time elapsed +TOTAL : 2.348370 sec + 7,237,807,765 cycles # 3.077 GHz + 16,901,642,876 instructions # 2.34 insn per cycle + 2.353566647 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.064233e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.200064e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.200064e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.100952e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.335032e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.335032e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.402350 sec - 7,155,498,870 cycles # 2.973 GHz - 13,619,819,517 instructions # 1.90 insn per cycle - 2.407497506 seconds time elapsed +TOTAL : 2.377716 sec + 7,114,981,233 cycles # 2.987 GHz + 13,618,980,317 instructions # 1.91 insn per cycle + 2.382769738 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.161218e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.465591e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.465591e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.095662e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.380517e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.380517e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.343466 sec - 7,029,083,635 cycles # 2.994 GHz - 13,435,784,739 instructions # 1.91 insn per cycle - 2.348424312 seconds time elapsed +TOTAL : 2.388573 sec + 7,050,432,929 cycles # 2.946 GHz + 13,435,636,130 instructions # 1.91 insn per cycle + 2.393927733 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.828979e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.532554e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.532554e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.809551e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.481545e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.481545e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.576228 sec - 6,306,840,953 cycles # 2.444 GHz - 13,154,129,713 instructions # 2.09 insn per cycle - 2.581443302 seconds time elapsed +TOTAL : 2.594484 sec + 6,324,738,625 cycles # 2.435 GHz + 13,153,553,215 instructions # 2.08 insn per cycle + 2.599854412 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index f01a9a57a3..7b38b05e62 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:06:10 +DATE: 2023-10-29_22:54:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.681696e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.844881e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.020418e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.447730e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.791443e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.966822e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.657315 sec - 2,598,665,017 cycles # 2.936 GHz - 4,028,861,679 instructions # 1.55 insn per cycle - 0.945030332 seconds time elapsed +TOTAL : 0.654025 sec + 2,666,039,210 cycles # 3.017 GHz + 4,098,761,288 instructions # 1.54 insn per cycle + 0.944531212 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.100332e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.287076e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.287076e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.109099e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.296066e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.296066e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.113513 sec - 18,734,521,807 cycles # 3.062 GHz - 44,287,146,095 instructions # 2.36 insn per cycle - 6.118759717 seconds time elapsed +TOTAL : 6.070816 sec + 18,741,596,344 cycles # 3.086 GHz + 44,286,641,637 instructions # 2.36 insn per cycle + 6.076086314 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.757716e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.321897e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.321897e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.722516e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.273779e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.273779e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.969796 sec - 12,337,756,331 cycles # 3.105 GHz - 30,959,811,407 instructions # 2.51 insn per cycle - 3.974750304 seconds time elapsed +TOTAL : 4.052104 sec + 12,400,528,226 cycles # 3.057 GHz + 30,960,047,988 instructions # 2.50 insn per cycle + 4.057597540 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.090570e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.906912e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.906912e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.005605e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.791319e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.791319e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.396973 sec - 10,093,757,140 cycles # 2.968 GHz - 19,398,694,320 instructions # 1.92 insn per cycle - 3.402037195 seconds time elapsed +TOTAL : 3.534624 sec + 10,128,109,972 cycles # 2.862 GHz + 19,399,010,623 instructions # 1.92 insn per cycle + 3.540053791 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.152608e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.020125e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.020125e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.173708e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.076945e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.076945e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.313893 sec - 9,678,205,730 cycles # 2.917 GHz - 18,980,795,577 instructions # 1.96 insn per cycle - 3.318879598 seconds time elapsed +TOTAL : 3.281691 sec + 9,748,054,010 cycles # 2.966 GHz + 18,981,255,153 instructions # 1.95 insn per cycle + 3.287089431 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1859) (512y: 188) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.849324e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.478638e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.478638e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.936624e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.615295e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.615295e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.806963 sec - 8,367,265,353 cycles # 2.196 GHz - 15,064,288,991 instructions # 1.80 insn per cycle - 3.812130834 seconds time elapsed +TOTAL : 3.640905 sec + 8,391,288,862 cycles # 2.302 GHz + 15,064,533,737 instructions # 1.80 insn per cycle + 3.646211880 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1023) (512y: 155) (512z: 1316) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 9edf8700e8..0eed8e2d69 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_12:06:43 +DATE: 2023-10-29_22:55:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.664259e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.857808e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.041274e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.454271e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.832017e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.038340e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.667696 sec - 2,606,641,308 cycles # 2.888 GHz - 4,060,337,867 instructions # 1.56 insn per cycle - 0.968428850 seconds time elapsed +TOTAL : 0.653806 sec + 2,668,469,279 cycles # 3.020 GHz + 4,079,450,696 instructions # 1.53 insn per cycle + 0.943662062 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.159840e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.365547e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.365547e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.167637e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.374513e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.374513e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.819162 sec - 17,933,037,739 cycles # 3.080 GHz - 42,536,132,640 instructions # 2.37 insn per cycle - 5.824253591 seconds time elapsed +TOTAL : 5.781308 sec + 17,933,067,465 cycles # 3.100 GHz + 42,535,595,028 instructions # 2.37 insn per cycle + 5.786442058 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.757614e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.338049e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.338049e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.752061e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.330676e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.330676e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.979221 sec - 12,132,707,817 cycles # 3.048 GHz - 30,269,707,726 instructions # 2.49 insn per cycle - 3.984583996 seconds time elapsed +TOTAL : 3.989523 sec + 12,191,986,873 cycles # 3.053 GHz + 30,267,026,279 instructions # 2.48 insn per cycle + 3.994946276 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.082667e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.901054e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.901054e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.104170e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.950168e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.950168e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.412907 sec - 9,984,888,667 cycles # 2.922 GHz - 19,281,802,256 instructions # 1.93 insn per cycle - 3.417989674 seconds time elapsed +TOTAL : 3.378896 sec + 10,036,789,390 cycles # 2.966 GHz + 19,281,934,589 instructions # 1.92 insn per cycle + 3.384394357 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2162) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.167358e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.053730e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.053730e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.193547e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.112581e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.112581e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.295521 sec - 9,655,639,183 cycles # 2.926 GHz - 18,781,731,091 instructions # 1.95 insn per cycle - 3.300616608 seconds time elapsed +TOTAL : 3.252743 sec + 9,664,135,628 cycles # 2.967 GHz + 18,781,593,266 instructions # 1.94 insn per cycle + 3.258138009 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1833) (512y: 191) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check.exe -p 2048 256 12 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.985792e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.694413e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.694413e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.981542e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.693938e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.693938e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.562394 sec - 8,262,941,640 cycles # 2.317 GHz - 14,988,031,204 instructions # 1.81 insn per cycle - 3.567536273 seconds time elapsed +TOTAL : 3.566812 sec + 8,278,244,012 cycles # 2.318 GHz + 14,988,373,891 instructions # 1.81 insn per cycle + 3.572200010 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1020) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index cf07c78786..8faf80849d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:07:15 +DATE: 2023-10-29_22:55:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.136997e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.176783e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273393e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.022553e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169186e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.268682e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.511809 sec - 2,253,660,474 cycles # 3.022 GHz - 3,223,175,085 instructions # 1.43 insn per cycle - 0.803611548 seconds time elapsed +TOTAL : 0.515209 sec + 2,233,987,909 cycles # 3.011 GHz + 3,236,100,958 instructions # 1.45 insn per cycle + 0.801892412 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.200834e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.264758e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.264758e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.182452e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.246743e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.246743e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.867099 sec - 15,140,743,768 cycles # 3.108 GHz - 38,436,444,633 instructions # 2.54 insn per cycle - 4.872245498 seconds time elapsed +TOTAL : 4.907642 sec + 15,152,192,071 cycles # 3.085 GHz + 38,436,494,690 instructions # 2.54 insn per cycle + 4.912915615 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.626756e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.818405e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.818405e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.679759e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.887370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.887370e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.994726 sec - 9,137,139,873 cycles # 3.047 GHz - 24,590,940,060 instructions # 2.69 insn per cycle - 3.000053290 seconds time elapsed +TOTAL : 2.956512 sec + 9,092,266,094 cycles # 3.071 GHz + 24,590,422,031 instructions # 2.70 insn per cycle + 2.961838870 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.983253e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.504163e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.504163e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.603494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.103886e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.103886e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.854838 sec - 5,452,843,992 cycles # 2.933 GHz - 11,265,041,070 instructions # 2.07 insn per cycle - 1.860040563 seconds time elapsed +TOTAL : 1.980943 sec + 5,471,545,616 cycles # 2.756 GHz + 11,266,361,130 instructions # 2.06 insn per cycle + 1.986440255 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.659391e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.292943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.292943e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.611609e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.258293e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.258293e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.676612 sec - 4,925,739,130 cycles # 2.930 GHz - 10,571,474,421 instructions # 2.15 insn per cycle - 1.681870831 seconds time elapsed +TOTAL : 1.688302 sec + 4,962,792,293 cycles # 2.931 GHz + 10,569,518,172 instructions # 2.13 insn per cycle + 1.693732006 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.095664e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.330650e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.330650e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.090511e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.328359e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.328359e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.663023 sec - 5,382,784,208 cycles # 2.018 GHz - 7,804,881,730 instructions # 1.45 insn per cycle - 2.668299561 seconds time elapsed +TOTAL : 2.667965 sec + 5,390,988,174 cycles # 2.017 GHz + 7,804,802,792 instructions # 1.45 insn per cycle + 2.673353935 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 25a1d7368c..4a146055f5 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:36:31 +DATE: 2023-10-29_23:26:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.647828e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.026321e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.026321e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.640905e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.017146e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.017146e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.795675 sec - 3,150,339,357 cycles # 3.032 GHz - 4,902,081,574 instructions # 1.56 insn per cycle - 1.097297675 seconds time elapsed +TOTAL : 0.795504 sec + 3,127,754,708 cycles # 3.022 GHz + 4,889,520,861 instructions # 1.56 insn per cycle + 1.092841714 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -72,20 +75,21 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.105579e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.164982e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.164982e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.149858e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.212968e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.212968e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.159176 sec - 15,505,788,287 cycles # 3.003 GHz - 38,497,293,687 instructions # 2.48 insn per cycle - 5.165450381 seconds time elapsed +TOTAL : 5.058597 sec + 15,696,511,300 cycles # 3.100 GHz + 38,498,945,603 instructions # 2.45 insn per cycle + 5.065152610 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -99,20 +103,21 @@ Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.663091e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.858706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.858706e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.611071e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.805644e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.805644e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.040555 sec - 9,431,019,118 cycles # 3.096 GHz - 24,774,385,658 instructions # 2.63 insn per cycle - 3.046912316 seconds time elapsed +TOTAL : 3.087844 sec + 9,458,665,415 cycles # 3.058 GHz + 24,775,783,056 instructions # 2.62 insn per cycle + 3.094441427 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -126,20 +131,21 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.863542e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.365394e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.365394e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.772811e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.281889e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.281889e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.967510 sec - 5,786,297,263 cycles # 2.932 GHz - 11,552,071,765 instructions # 2.00 insn per cycle - 1.974044258 seconds time elapsed +TOTAL : 2.004130 sec + 5,804,812,481 cycles # 2.888 GHz + 11,553,800,215 instructions # 1.99 insn per cycle + 2.011016331 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -153,20 +159,21 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.472456e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.085835e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.085835e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.447831e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.064850e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.064850e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.800429 sec - 5,305,491,699 cycles # 2.938 GHz - 10,859,048,691 instructions # 2.05 insn per cycle - 1.806998436 seconds time elapsed +TOTAL : 1.808534 sec + 5,312,663,346 cycles # 2.930 GHz + 10,859,775,036 instructions # 2.04 insn per cycle + 1.815109451 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -180,20 +187,21 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.032410e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.258848e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.258848e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.011582e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.238367e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.238367e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.781006 sec - 5,749,297,000 cycles # 2.063 GHz - 8,049,410,634 instructions # 1.40 insn per cycle - 2.787426015 seconds time elapsed +TOTAL : 2.796689 sec + 5,765,378,482 cycles # 2.057 GHz + 8,049,035,674 instructions # 1.40 insn per cycle + 2.803259503 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index cea02f65d0..852b6969ed 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:49:15 +DATE: 2023-10-29_23:39:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.728812e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.164066e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.276046e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.737152e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.159854e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.268362e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.608013 sec - 2,509,243,315 cycles # 3.014 GHz - 3,608,155,847 instructions # 1.44 insn per cycle - 0.891602544 seconds time elapsed +TOTAL : 0.607361 sec + 2,522,338,369 cycles # 3.013 GHz + 3,662,390,525 instructions # 1.45 insn per cycle + 0.895938793 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.192667e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.256790e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.256790e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.194281e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.258156e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.258156e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.943798 sec - 15,329,589,495 cycles # 3.098 GHz - 38,452,781,754 instructions # 2.51 insn per cycle - 4.949051931 seconds time elapsed +TOTAL : 4.940662 sec + 15,324,069,943 cycles # 3.099 GHz + 38,452,708,280 instructions # 2.51 insn per cycle + 4.945884999 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.710860e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.912537e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.912537e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.685629e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.885512e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.885512e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.986722 sec - 9,267,137,535 cycles # 3.098 GHz - 24,590,320,806 instructions # 2.65 insn per cycle - 2.991908911 seconds time elapsed +TOTAL : 3.007465 sec + 9,278,655,226 cycles # 3.081 GHz + 24,590,238,814 instructions # 2.65 insn per cycle + 3.012796450 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.913517e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.427131e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.427131e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.840961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.350814e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.350814e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.935767 sec - 5,673,483,420 cycles # 2.927 GHz - 11,248,913,442 instructions # 1.98 insn per cycle - 1.941003372 seconds time elapsed +TOTAL : 1.958737 sec + 5,644,158,885 cycles # 2.875 GHz + 11,248,092,256 instructions # 1.99 insn per cycle + 1.963882920 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.389406e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.990338e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.990338e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.249720e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.861953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.861953e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.803078 sec - 5,137,798,097 cycles # 2.848 GHz - 10,521,456,294 instructions # 2.05 insn per cycle - 1.808296325 seconds time elapsed +TOTAL : 1.847755 sec + 5,135,966,832 cycles # 2.773 GHz + 10,522,578,708 instructions # 2.05 insn per cycle + 1.853336904 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.055914e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.287900e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.287900e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.012886e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.244665e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.244665e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.748891 sec - 5,590,724,584 cycles # 2.031 GHz - 7,756,083,386 instructions # 1.39 insn per cycle - 2.754238360 seconds time elapsed +TOTAL : 2.776977 sec + 5,559,598,432 cycles # 1.999 GHz + 7,754,154,174 instructions # 1.39 insn per cycle + 2.782273535 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index 32065e8c80..a8b2f7d0ee 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:46:03 +DATE: 2023-10-29_23:36:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.752520e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.165198e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.275080e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.742144e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.162516e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.272033e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.550572 sec - 2,362,516,573 cycles # 2.997 GHz - 3,675,267,695 instructions # 1.56 insn per cycle - 0.847304373 seconds time elapsed +TOTAL : 0.550222 sec + 2,352,867,175 cycles # 3.012 GHz + 3,685,140,796 instructions # 1.57 insn per cycle + 0.840259072 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.189601e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.253029e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.253029e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.194117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.257904e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.257904e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.892729 sec - 15,145,617,059 cycles # 3.093 GHz - 38,436,286,492 instructions # 2.54 insn per cycle - 4.898049782 seconds time elapsed +TOTAL : 4.882304 sec + 15,146,493,935 cycles # 3.100 GHz + 38,436,498,659 instructions # 2.54 insn per cycle + 4.887583335 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.684613e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.884352e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.884352e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.703201e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.905185e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.905185e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.949502 sec - 9,111,469,827 cycles # 3.085 GHz - 24,591,911,713 instructions # 2.70 insn per cycle - 2.954800828 seconds time elapsed +TOTAL : 2.934975 sec + 9,100,538,588 cycles # 3.096 GHz + 24,590,797,051 instructions # 2.70 insn per cycle + 2.940311227 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.740095e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.238065e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.238065e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.949109e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.463374e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.463374e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.931930 sec - 5,475,741,469 cycles # 2.828 GHz - 11,265,347,233 instructions # 2.06 insn per cycle - 1.937332643 seconds time elapsed +TOTAL : 1.865224 sec + 5,462,840,274 cycles # 2.922 GHz + 11,264,977,168 instructions # 2.06 insn per cycle + 1.870595769 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.605310e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.238263e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.238263e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.642159e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.287062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.287062e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.690772 sec - 4,944,100,857 cycles # 2.918 GHz - 10,570,436,520 instructions # 2.14 insn per cycle - 1.696028312 seconds time elapsed +TOTAL : 1.680715 sec + 4,938,481,524 cycles # 2.930 GHz + 10,571,461,567 instructions # 2.14 insn per cycle + 1.686022288 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.118850e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.357411e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.357411e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.087400e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.324514e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.324514e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.648858 sec - 5,377,849,755 cycles # 2.027 GHz - 7,805,055,706 instructions # 1.45 insn per cycle - 2.654091921 seconds time elapsed +TOTAL : 2.668501 sec + 5,400,613,890 cycles # 2.020 GHz + 7,804,891,421 instructions # 1.45 insn per cycle + 2.673655683 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 246fbcacdc..6535b6cfe4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,24 +37,26 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:42:56 +DATE: 2023-10-29_23:32:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.960530e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.160163e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.275575e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.045776e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.160228e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269064e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.696058 sec - 2,780,887,356 cycles # 3.015 GHz - 4,385,594,335 instructions # 1.58 insn per cycle - 0.979294954 seconds time elapsed +TOTAL : 0.693511 sec + 2,792,166,398 cycles # 3.034 GHz + 4,398,875,647 instructions # 1.58 insn per cycle + 0.979299052 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -66,19 +69,20 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.170580e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.234040e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.234040e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.191609e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.257007e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.257007e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.935348 sec - 15,155,957,484 cycles # 3.068 GHz - 38,436,941,468 instructions # 2.54 insn per cycle - 4.940714673 seconds time elapsed +TOTAL : 4.889577 sec + 15,150,373,861 cycles # 3.096 GHz + 38,436,083,829 instructions # 2.54 insn per cycle + 4.894924037 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -92,19 +96,20 @@ Relative difference = 3.258803994438787e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.699905e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.901140e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.901140e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.715982e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.923240e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.923240e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.938495 sec - 9,096,664,653 cycles # 3.091 GHz - 24,590,704,048 instructions # 2.70 insn per cycle - 2.943788052 seconds time elapsed +TOTAL : 2.924496 sec + 9,096,467,047 cycles # 3.106 GHz + 24,590,726,023 instructions # 2.70 insn per cycle + 2.929631165 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -118,19 +123,20 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.920011e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.436855e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.436855e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.954558e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.476070e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.476070e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.873761 sec - 5,479,875,596 cycles # 2.918 GHz - 11,265,025,893 instructions # 2.06 insn per cycle - 1.879007737 seconds time elapsed +TOTAL : 1.864371 sec + 5,489,366,756 cycles # 2.937 GHz + 11,265,437,305 instructions # 2.05 insn per cycle + 1.869765983 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -144,19 +150,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.591031e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.220898e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.220898e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.498968e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.136400e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.136400e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.693862 sec - 4,936,721,786 cycles # 2.906 GHz - 10,571,567,660 instructions # 2.14 insn per cycle - 1.699207961 seconds time elapsed +TOTAL : 1.717689 sec + 4,945,186,310 cycles # 2.871 GHz + 10,569,892,258 instructions # 2.14 insn per cycle + 1.723024481 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -170,19 +177,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.096221e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.331880e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.331880e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.103096e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.341060e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.341060e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.662905 sec - 5,374,981,222 cycles # 2.015 GHz - 7,804,817,945 instructions # 1.45 insn per cycle - 2.668129038 seconds time elapsed +TOTAL : 2.658567 sec + 5,391,246,150 cycles # 2.025 GHz + 7,805,025,014 instructions # 1.45 insn per cycle + 2.663924818 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 63b4155174..28991481e3 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:07:41 +DATE: 2023-10-29_22:56:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.132498e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.171034e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.266333e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.025981e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169481e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.266361e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.514455 sec - 2,220,091,958 cycles # 2.993 GHz - 3,194,234,694 instructions # 1.44 insn per cycle - 0.799874670 seconds time elapsed +TOTAL : 0.511975 sec + 2,237,170,226 cycles # 3.018 GHz + 3,235,319,101 instructions # 1.45 insn per cycle + 0.797845351 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.215738e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.280974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.280974e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.215548e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.281173e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.281173e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.835319 sec - 15,019,433,345 cycles # 3.103 GHz - 40,163,621,810 instructions # 2.67 insn per cycle - 4.840634277 seconds time elapsed +TOTAL : 4.836086 sec + 15,032,256,829 cycles # 3.106 GHz + 40,164,623,116 instructions # 2.67 insn per cycle + 4.841265154 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.884500e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.107942e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.107942e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.894720e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.118587e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.118587e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.803037 sec - 8,665,440,053 cycles # 3.087 GHz - 23,685,025,671 instructions # 2.73 insn per cycle - 2.808510683 seconds time elapsed +TOTAL : 2.794710 sec + 8,676,040,838 cycles # 3.099 GHz + 23,684,337,981 instructions # 2.73 insn per cycle + 2.800202741 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2069) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.310538e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.719059e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.719059e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.305864e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.724393e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.724393e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.077453 sec - 6,107,652,411 cycles # 2.934 GHz - 13,074,978,434 instructions # 2.14 insn per cycle - 2.082820176 seconds time elapsed +TOTAL : 2.078632 sec + 6,088,003,456 cycles # 2.923 GHz + 13,074,476,696 instructions # 2.15 insn per cycle + 2.083971169 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.591839e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.032483e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.032483e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.589272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.048410e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.048410e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.978049 sec - 5,776,625,422 cycles # 2.915 GHz - 12,334,731,018 instructions # 2.14 insn per cycle - 1.983376605 seconds time elapsed +TOTAL : 1.977912 sec + 5,804,117,930 cycles # 2.928 GHz + 12,332,655,739 instructions # 2.12 insn per cycle + 1.983152296 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 294) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.717467e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.916519e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.916519e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.700119e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.903509e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.903509e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.923702 sec - 5,814,117,462 cycles # 1.986 GHz - 9,613,349,209 instructions # 1.65 insn per cycle - 2.928942693 seconds time elapsed +TOTAL : 2.941360 sec + 5,814,448,158 cycles # 1.974 GHz + 9,613,559,466 instructions # 1.65 insn per cycle + 2.946710269 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1510) (512y: 209) (512z: 1971) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index f681280332..67ce42c701 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:26:50 +DATE: 2023-10-29_23:17:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.725703e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.159692e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.268883e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.570060e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.154967e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269382e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.516069 sec - 2,246,320,767 cycles # 3.015 GHz - 3,236,311,212 instructions # 1.44 insn per cycle - 0.803428844 seconds time elapsed +TOTAL : 0.522726 sec + 2,265,340,168 cycles # 2.993 GHz + 3,245,639,537 instructions # 1.43 insn per cycle + 0.816182159 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.555106e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.642080e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.642080e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.556809e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.644599e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.644599e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.207931 sec - 13,014,141,427 cycles # 3.090 GHz - 34,406,405,912 instructions # 2.64 insn per cycle - 4.213404340 seconds time elapsed +TOTAL : 4.205292 sec + 13,015,864,783 cycles # 3.092 GHz + 34,405,849,374 instructions # 2.64 insn per cycle + 4.211064725 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.092906e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.236549e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.236549e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.147270e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.292062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.292062e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.496208 sec - 10,596,570,849 cycles # 3.027 GHz - 24,023,369,316 instructions # 2.27 insn per cycle - 3.501419893 seconds time elapsed +TOTAL : 3.436163 sec + 10,617,721,779 cycles # 3.086 GHz + 24,023,060,269 instructions # 2.26 insn per cycle + 3.441752058 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.853734e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.191560e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.191560e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.819238e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.163536e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.163536e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.263215 sec - 6,594,620,627 cycles # 2.908 GHz - 12,414,751,758 instructions # 1.88 insn per cycle - 2.268614948 seconds time elapsed +TOTAL : 2.279956 sec + 6,619,549,425 cycles # 2.900 GHz + 12,415,373,269 instructions # 1.88 insn per cycle + 2.285571623 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3156) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.171099e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.557909e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.557909e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.082600e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.455787e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.455787e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.132052 sec - 6,231,185,857 cycles # 2.916 GHz - 11,589,413,017 instructions # 1.86 insn per cycle - 2.137607872 seconds time elapsed +TOTAL : 2.166233 sec + 6,311,826,799 cycles # 2.907 GHz + 11,588,393,146 instructions # 1.84 insn per cycle + 2.171681945 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2692) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 3.2588037208240405e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.139272e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.380011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.380011e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.122487e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.365337e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.365337e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.637341 sec - 5,333,432,205 cycles # 2.019 GHz - 9,309,292,233 instructions # 1.75 insn per cycle - 2.642779358 seconds time elapsed +TOTAL : 2.648103 sec + 5,340,501,776 cycles # 2.014 GHz + 9,309,235,597 instructions # 1.74 insn per cycle + 2.653678227 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2116) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index b6f385dd32..62680c581a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:27:17 +DATE: 2023-10-29_23:17:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.723495e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.157942e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.265661e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.564062e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.147826e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.264986e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.515428 sec - 2,250,011,882 cycles # 3.016 GHz - 3,234,085,121 instructions # 1.44 insn per cycle - 0.803968951 seconds time elapsed +TOTAL : 0.519306 sec + 2,255,189,511 cycles # 3.009 GHz + 3,221,417,076 instructions # 1.43 insn per cycle + 0.809166285 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.702731e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.800396e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.800396e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.695140e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.795404e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.795404e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.982690 sec - 12,369,134,181 cycles # 3.102 GHz - 35,058,846,564 instructions # 2.83 insn per cycle - 3.987863298 seconds time elapsed +TOTAL : 3.994496 sec + 12,384,425,334 cycles # 3.097 GHz + 35,058,885,666 instructions # 2.83 insn per cycle + 3.999892654 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.061923e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.202369e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.202369e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.136077e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.280277e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.280277e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.530068 sec - 10,676,553,253 cycles # 3.020 GHz - 23,099,794,307 instructions # 2.16 insn per cycle - 3.535561320 seconds time elapsed +TOTAL : 3.446583 sec + 10,684,717,975 cycles # 3.096 GHz + 23,099,617,393 instructions # 2.16 insn per cycle + 3.451955165 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.225421e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.617585e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.617585e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.209600e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.607410e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.607410e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.109541 sec - 6,165,675,874 cycles # 2.917 GHz - 11,969,858,026 instructions # 1.94 insn per cycle - 2.114857829 seconds time elapsed +TOTAL : 2.115939 sec + 6,169,983,144 cycles # 2.909 GHz + 11,969,796,573 instructions # 1.94 insn per cycle + 2.121437158 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2511) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.370200e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.777020e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.777020e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.312624e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.722857e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.722857e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.055254 sec - 5,996,913,166 cycles # 2.911 GHz - 11,143,393,215 instructions # 1.86 insn per cycle - 2.060568204 seconds time elapsed +TOTAL : 2.077114 sec + 6,023,533,872 cycles # 2.893 GHz + 11,143,307,100 instructions # 1.85 insn per cycle + 2.082671772 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 174) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.195526e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.443172e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.443172e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.192169e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.444249e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.444249e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.602515 sec - 5,216,067,649 cycles # 2.001 GHz - 9,033,452,178 instructions # 1.73 insn per cycle - 2.607965373 seconds time elapsed +TOTAL : 2.605068 sec + 5,223,191,580 cycles # 2.002 GHz + 9,033,300,795 instructions # 1.73 insn per cycle + 2.610459589 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index df5aa8ac18..aa352fe01e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:08:08 +DATE: 2023-10-29_22:56:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.068609e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.700374e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.971314e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.029207e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.660992e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.952559e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.476640 sec - 1,995,636,481 cycles # 2.842 GHz - 2,842,978,749 instructions # 1.42 insn per cycle - 0.759763821 seconds time elapsed +TOTAL : 0.471116 sec + 2,075,735,853 cycles # 2.996 GHz + 2,991,480,396 instructions # 1.44 insn per cycle + 0.749941019 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.337599e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.414470e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.414470e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.340367e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.416459e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.416459e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.568864 sec - 14,161,294,667 cycles # 3.097 GHz - 38,393,773,103 instructions # 2.71 insn per cycle - 4.573929482 seconds time elapsed +TOTAL : 4.563941 sec + 14,152,524,452 cycles # 3.098 GHz + 38,393,574,090 instructions # 2.71 insn per cycle + 4.568822559 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.238510e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.663923e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.663923e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.252416e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.682693e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.682693e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.084946 sec - 6,469,899,480 cycles # 3.097 GHz - 15,829,500,280 instructions # 2.45 insn per cycle - 2.089957611 seconds time elapsed +TOTAL : 2.078659 sec + 6,471,097,125 cycles # 3.107 GHz + 15,829,624,258 instructions # 2.45 insn per cycle + 2.083554513 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.562959e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.098086e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.098086e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.593020e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.101399e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.101399e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.179401 sec - 3,459,426,980 cycles # 2.925 GHz - 7,607,068,208 instructions # 2.20 insn per cycle - 1.184590061 seconds time elapsed +TOTAL : 1.175125 sec + 3,457,161,071 cycles # 2.931 GHz + 7,606,572,370 instructions # 2.20 insn per cycle + 1.180355415 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.027581e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.192505e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.192505e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.022780e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188618e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.102233 sec - 3,244,602,392 cycles # 2.932 GHz - 7,214,608,245 instructions # 2.22 insn per cycle - 1.107242684 seconds time elapsed +TOTAL : 1.110409 sec + 3,250,510,189 cycles # 2.917 GHz + 7,216,053,398 instructions # 2.22 insn per cycle + 1.115909184 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.608210e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.462624e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.462624e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.406337e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.252882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.252882e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.460661 sec - 3,052,035,122 cycles # 2.083 GHz - 5,845,582,428 instructions # 1.92 insn per cycle - 1.465683285 seconds time elapsed +TOTAL : 1.500461 sec + 3,058,902,515 cycles # 2.034 GHz + 5,844,747,311 instructions # 1.91 insn per cycle + 1.505609823 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index b5be0b4e18..5ab757a50c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:36:58 +DATE: 2023-10-29_23:27:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.895050e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.686304e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.686304e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.342154e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.755663e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.755663e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.669131 sec - 2,666,038,605 cycles # 2.984 GHz - 4,131,575,976 instructions # 1.55 insn per cycle - 0.951764894 seconds time elapsed +TOTAL : 0.658303 sec + 2,665,727,908 cycles # 3.020 GHz + 4,113,714,963 instructions # 1.54 insn per cycle + 0.941565209 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -72,20 +75,21 @@ Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.329231e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.405966e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.405966e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.333682e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.410114e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.410114e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.626382 sec - 14,341,300,654 cycles # 3.097 GHz - 38,435,549,708 instructions # 2.68 insn per cycle - 4.632329822 seconds time elapsed +TOTAL : 4.618102 sec + 14,338,054,509 cycles # 3.102 GHz + 38,436,340,206 instructions # 2.68 insn per cycle + 4.624142820 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -99,20 +103,21 @@ Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.192819e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.611750e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.611750e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.183610e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.607484e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.607484e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.145322 sec - 6,664,477,255 cycles # 3.099 GHz - 16,109,970,801 instructions # 2.42 insn per cycle - 2.151392542 seconds time elapsed +TOTAL : 2.151042 sec + 6,679,436,155 cycles # 3.098 GHz + 16,110,006,537 instructions # 2.41 insn per cycle + 2.157129366 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -126,20 +131,21 @@ Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.486569e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.087327e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.087327e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.414037e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.079639e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.079639e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.231584 sec - 3,644,407,413 cycles # 2.947 GHz - 7,844,021,819 instructions # 2.15 insn per cycle - 1.237558832 seconds time elapsed +TOTAL : 1.240366 sec + 3,659,881,370 cycles # 2.938 GHz + 7,844,165,630 instructions # 2.14 insn per cycle + 1.246487540 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -153,20 +159,21 @@ Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.007607e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169689e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.169689e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.010699e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.171922e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.171922e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.167898 sec - 3,439,867,530 cycles # 2.932 GHz - 7,453,045,112 instructions # 2.17 insn per cycle - 1.173934717 seconds time elapsed +TOTAL : 1.164913 sec + 3,451,308,059 cycles # 2.949 GHz + 7,453,101,304 instructions # 2.16 insn per cycle + 1.171040454 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -180,20 +187,21 @@ Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.499425e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.333958e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.333958e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.500212e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.340447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.340447e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.525803 sec - 3,266,944,202 cycles # 2.134 GHz - 6,100,961,311 instructions # 1.87 insn per cycle - 1.531830770 seconds time elapsed +TOTAL : 1.526899 sec + 3,275,626,557 cycles # 2.138 GHz + 6,100,657,130 instructions # 1.86 insn per cycle + 1.533130362 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 0acb7a9e73..c3d81bddda 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:49:41 +DATE: 2023-10-29_23:39:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.844201e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.663361e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.970041e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.798695e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.634515e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.944433e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.554826 sec - 2,327,069,146 cycles # 3.010 GHz - 3,431,677,365 instructions # 1.47 insn per cycle - 0.830631626 seconds time elapsed +TOTAL : 0.556545 sec + 2,327,252,717 cycles # 3.008 GHz + 3,421,736,970 instructions # 1.47 insn per cycle + 0.832373460 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.321408e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.398513e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.398513e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.327580e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.402841e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.402841e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.656867 sec - 14,315,574,695 cycles # 3.072 GHz - 38,422,268,817 instructions # 2.68 insn per cycle - 4.662075448 seconds time elapsed +TOTAL : 4.642532 sec + 14,315,105,147 cycles # 3.081 GHz + 38,421,802,206 instructions # 2.68 insn per cycle + 4.647613343 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.231554e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.659617e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.659617e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.037912e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.461691e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.461691e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.139911 sec - 6,633,974,828 cycles # 3.094 GHz - 15,842,107,200 instructions # 2.39 insn per cycle - 2.145051071 seconds time elapsed +TOTAL : 2.223517 sec + 6,632,406,040 cycles # 2.977 GHz + 15,841,248,477 instructions # 2.39 insn per cycle + 2.228830767 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.165587e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.054986e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.054986e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.226907e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.061731e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.061731e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.286221 sec - 3,615,431,093 cycles # 2.801 GHz - 7,591,036,822 instructions # 2.10 insn per cycle - 1.291338242 seconds time elapsed +TOTAL : 1.274705 sec + 3,630,232,417 cycles # 2.838 GHz + 7,590,830,281 instructions # 2.09 insn per cycle + 1.279899279 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.026813e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.192367e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.192367e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.647063e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.119085e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.119085e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.160600 sec - 3,412,626,284 cycles # 2.929 GHz - 7,165,387,866 instructions # 2.10 insn per cycle - 1.165674406 seconds time elapsed +TOTAL : 1.228082 sec + 3,431,774,015 cycles # 2.795 GHz + 7,168,267,152 instructions # 2.09 insn per cycle + 1.233468520 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.382760e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.206265e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.206265e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.970825e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.735405e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.735405e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.556726 sec - 3,230,120,120 cycles # 2.069 GHz - 5,796,867,387 instructions # 1.79 insn per cycle - 1.561748387 seconds time elapsed +TOTAL : 1.647433 sec + 3,235,860,034 cycles # 1.959 GHz + 5,797,322,103 instructions # 1.79 insn per cycle + 1.652600396 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index a7a3d3f332..023fba572a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:46:30 +DATE: 2023-10-29_23:36:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.856308e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.667298e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.972666e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.809222e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.645957e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.947353e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.502947 sec - 2,165,124,054 cycles # 3.003 GHz - 3,377,369,122 instructions # 1.56 insn per cycle - 0.778634889 seconds time elapsed +TOTAL : 0.503446 sec + 2,185,473,363 cycles # 3.028 GHz + 3,416,682,681 instructions # 1.56 insn per cycle + 0.779206316 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.334502e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.411133e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.411133e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.298249e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.373831e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.373831e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.574512 sec - 14,158,114,653 cycles # 3.092 GHz - 38,394,139,315 instructions # 2.71 insn per cycle - 4.579594412 seconds time elapsed +TOTAL : 4.647478 sec + 14,148,019,955 cycles # 3.042 GHz + 38,392,518,660 instructions # 2.71 insn per cycle + 4.652491461 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.120883e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.529305e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.529305e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.232862e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.665849e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.665849e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.130821 sec - 6,475,593,553 cycles # 3.033 GHz - 15,829,500,247 instructions # 2.44 insn per cycle - 2.135849031 seconds time elapsed +TOTAL : 2.086994 sec + 6,478,231,222 cycles # 3.098 GHz + 15,829,954,128 instructions # 2.44 insn per cycle + 2.092206985 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.472598e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.087489e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.087489e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.466868e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.088473e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.088473e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.191416 sec - 3,457,290,525 cycles # 2.891 GHz - 7,606,423,962 instructions # 2.20 insn per cycle - 1.196523405 seconds time elapsed +TOTAL : 1.190025 sec + 3,460,027,857 cycles # 2.897 GHz + 7,606,585,097 instructions # 2.20 insn per cycle + 1.195060645 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.029636e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.194772e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.194772e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.024370e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.190854e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.190854e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.100935 sec - 3,247,410,753 cycles # 2.938 GHz - 7,216,104,427 instructions # 2.22 insn per cycle - 1.106041400 seconds time elapsed +TOTAL : 1.105334 sec + 3,257,431,655 cycles # 2.935 GHz + 7,214,579,277 instructions # 2.21 insn per cycle + 1.110486214 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.601959e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.469102e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.469102e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.208977e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.010764e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.010764e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.460971 sec - 3,065,417,357 cycles # 2.092 GHz - 5,846,314,702 instructions # 1.91 insn per cycle - 1.466145311 seconds time elapsed +TOTAL : 1.539764 sec + 3,065,042,679 cycles # 1.985 GHz + 5,845,805,718 instructions # 1.91 insn per cycle + 1.545069845 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 8ab92e024f..cf74897fe6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,24 +37,26 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:43:22 +DATE: 2023-10-29_23:33:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.120390e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.647328e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.944985e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.224135e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.649986e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.947822e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.602037 sec - 2,439,189,303 cycles # 2.975 GHz - 3,768,649,868 instructions # 1.55 insn per cycle - 0.877319776 seconds time elapsed +TOTAL : 0.601699 sec + 2,513,463,986 cycles # 3.041 GHz + 3,911,746,126 instructions # 1.56 insn per cycle + 0.884178922 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% @@ -66,19 +69,20 @@ Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.334080e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.410077e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.410077e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.350355e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.426473e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.426473e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.577301 sec - 14,152,405,326 cycles # 3.089 GHz - 38,392,358,857 instructions # 2.71 insn per cycle - 4.582307407 seconds time elapsed +TOTAL : 4.544385 sec + 14,150,131,454 cycles # 3.111 GHz + 38,392,663,057 instructions # 2.71 insn per cycle + 4.549365415 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -92,19 +96,20 @@ Relative difference = 4.819651478256564e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.230806e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.656536e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.656536e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.250294e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.679751e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.679751e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.088193 sec - 6,472,075,371 cycles # 3.093 GHz - 15,830,276,009 instructions # 2.45 insn per cycle - 2.093482433 seconds time elapsed +TOTAL : 2.079738 sec + 6,469,789,265 cycles # 3.104 GHz + 15,829,612,923 instructions # 2.45 insn per cycle + 2.084975976 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -118,19 +123,20 @@ Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.567124e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.097679e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.097679e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.594124e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.103315e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103315e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.180111 sec - 3,461,207,927 cycles # 2.925 GHz - 7,606,883,443 instructions # 2.20 insn per cycle - 1.185275654 seconds time elapsed +TOTAL : 1.174775 sec + 3,463,679,492 cycles # 2.937 GHz + 7,606,485,307 instructions # 2.20 insn per cycle + 1.179888489 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -144,19 +150,20 @@ Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.023458e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188704e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.188704e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.017682e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.180616e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.180616e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.108734 sec - 3,249,015,929 cycles # 2.919 GHz - 7,215,561,707 instructions # 2.22 insn per cycle - 1.113795101 seconds time elapsed +TOTAL : 1.112712 sec + 3,249,854,503 cycles # 2.909 GHz + 7,214,706,775 instructions # 2.22 insn per cycle + 1.117999772 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -170,19 +177,20 @@ Relative difference = 8.302595855806234e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check.exe -p 2048 256 2 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.172083e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.957523e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.957523e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.605648e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.487831e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.487831e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.547659 sec - 3,061,807,613 cycles # 1.974 GHz - 5,846,435,749 instructions # 1.91 insn per cycle - 1.552775097 seconds time elapsed +TOTAL : 1.462432 sec + 3,073,167,609 cycles # 2.096 GHz + 5,846,117,857 instructions # 1.90 insn per cycle + 1.467652309 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 4a15365f27..0a996f834c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:08:30 +DATE: 2023-10-29_22:57:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.073100e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.732202e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.024409e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.037758e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.719252e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.016040e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.472590 sec - 2,049,324,950 cycles # 2.965 GHz - 2,988,118,481 instructions # 1.46 insn per cycle - 0.749805878 seconds time elapsed +TOTAL : 0.470036 sec + 2,072,283,761 cycles # 2.995 GHz + 2,983,284,438 instructions # 1.44 insn per cycle + 0.749471768 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.286965e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.360277e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.360277e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.253114e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.323651e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.323651e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.668841 sec - 14,415,454,850 cycles # 3.085 GHz - 39,884,803,964 instructions # 2.77 insn per cycle - 4.673868552 seconds time elapsed +TOTAL : 4.737408 sec + 14,418,215,974 cycles # 3.042 GHz + 39,883,810,988 instructions # 2.77 insn per cycle + 4.742190490 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 4.790961076489297e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.067369e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.650323e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.650323e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.075422e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.663303e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.663303e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.811456 sec - 5,581,772,796 cycles # 3.075 GHz - 15,299,386,125 instructions # 2.74 insn per cycle - 1.816518063 seconds time elapsed +TOTAL : 1.809220 sec + 5,592,465,189 cycles # 3.084 GHz + 15,299,712,088 instructions # 2.74 insn per cycle + 1.814483780 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.748963824709674e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.765373e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.445236e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.445236e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.834505e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.534184e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.534184e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.632307 sec - 4,737,058,603 cycles # 2.894 GHz - 9,747,529,930 instructions # 2.06 insn per cycle - 1.637455307 seconds time elapsed +TOTAL : 1.616733 sec + 4,735,474,011 cycles # 2.922 GHz + 9,748,366,481 instructions # 2.06 insn per cycle + 1.621819739 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3710) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.802562e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.489807e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.489807e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.003908e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.735430e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.735430e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.624786 sec - 4,624,929,511 cycles # 2.839 GHz - 9,339,461,956 instructions # 2.02 insn per cycle - 1.629929018 seconds time elapsed +TOTAL : 1.580130 sec + 4,630,502,921 cycles # 2.923 GHz + 9,338,331,720 instructions # 2.02 insn per cycle + 1.585139169 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 1.0391259163456515e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.198562e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.758096e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.758096e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.214571e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.784183e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.784183e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.775153 sec - 3,657,127,414 cycles # 2.055 GHz - 7,046,249,834 instructions # 1.93 insn per cycle - 1.780322386 seconds time elapsed +TOTAL : 1.770080 sec + 3,656,314,511 cycles # 2.061 GHz + 7,046,094,328 instructions # 1.93 insn per cycle + 1.775184056 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2606) (512y: 12) (512z: 2221) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 0b94ca6efd..a467b5d8bb 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:27:43 +DATE: 2023-10-29_23:17:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.748019e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.648137e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.948823e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.434987e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.650833e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.976397e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.472128 sec - 2,084,648,086 cycles # 3.008 GHz - 3,003,071,400 instructions # 1.44 insn per cycle - 0.750693235 seconds time elapsed +TOTAL : 0.475936 sec + 2,110,435,140 cycles # 2.995 GHz + 3,008,103,642 instructions # 1.43 insn per cycle + 0.763528431 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.622703e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.718199e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.718199e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.588741e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.682897e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.682897e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.080807 sec - 12,597,466,937 cycles # 3.084 GHz - 34,393,194,192 instructions # 2.73 insn per cycle - 4.085827485 seconds time elapsed +TOTAL : 4.134770 sec + 12,607,914,642 cycles # 3.046 GHz + 34,393,723,738 instructions # 2.73 insn per cycle + 4.139979205 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.593372e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.080103e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.080103e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.345993e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.816916e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.816916e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.957639 sec - 6,081,852,390 cycles # 3.100 GHz - 14,874,109,615 instructions # 2.45 insn per cycle - 1.962732822 seconds time elapsed +TOTAL : 2.046835 sec + 6,102,010,194 cycles # 2.975 GHz + 14,874,304,720 instructions # 2.44 insn per cycle + 2.052076023 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.8746278463897685e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.283773e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.100706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.100706e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.619423e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.507763e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.507763e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.523796 sec - 4,273,696,688 cycles # 2.804 GHz - 9,044,872,196 instructions # 2.12 insn per cycle - 1.528963268 seconds time elapsed +TOTAL : 1.459297 sec + 4,276,678,420 cycles # 2.922 GHz + 9,041,560,375 instructions # 2.11 insn per cycle + 1.464491491 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4445) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.673227e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.553201e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.553201e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.754754e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.682518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.682518e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.450509 sec - 4,204,067,849 cycles # 2.891 GHz - 8,677,293,067 instructions # 2.06 insn per cycle - 1.455611235 seconds time elapsed +TOTAL : 1.434608 sec + 4,197,492,913 cycles # 2.917 GHz + 8,676,910,228 instructions # 2.07 insn per cycle + 1.439911295 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 9.857617164523888e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.766332e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.255676e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.255676e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.823540e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.320310e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.320310e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.903201 sec - 3,830,827,038 cycles # 2.008 GHz - 7,820,163,115 instructions # 2.04 insn per cycle - 1.908322170 seconds time elapsed +TOTAL : 1.884749 sec + 3,839,699,644 cycles # 2.033 GHz + 7,820,050,517 instructions # 2.04 insn per cycle + 1.890103108 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 0) (512z: 2556) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 0c4c8a0ffc..d6d7f1e6ab 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:28:06 +DATE: 2023-10-29_23:18:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.862319e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.711280e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.025159e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.503817e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.701861e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.039467e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.472511 sec - 2,088,068,567 cycles # 3.006 GHz - 3,003,865,089 instructions # 1.44 insn per cycle - 0.751825355 seconds time elapsed +TOTAL : 0.475921 sec + 2,117,417,472 cycles # 2.982 GHz + 2,996,840,505 instructions # 1.42 insn per cycle + 0.767027235 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 1.9210746159747678e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.787349e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.894785e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.894785e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.806204e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.918538e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.918538e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 3.845279 sec - 11,754,758,395 cycles # 3.054 GHz - 35,130,095,118 instructions # 2.99 insn per cycle - 3.850208573 seconds time elapsed +TOTAL : 3.820998 sec + 11,761,780,361 cycles # 3.075 GHz + 35,129,373,494 instructions # 2.99 insn per cycle + 3.826162396 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.718455e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.230618e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.230618e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.674605e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.182829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.182829e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.917322 sec - 5,948,729,930 cycles # 3.096 GHz - 14,483,816,532 instructions # 2.43 insn per cycle - 1.922400974 seconds time elapsed +TOTAL : 1.931572 sec + 5,953,219,823 cycles # 3.075 GHz + 14,483,788,778 instructions # 2.43 insn per cycle + 1.936660694 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.874050e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.799685e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.799685e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.777554e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.711884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.711884e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.413124 sec - 4,150,858,124 cycles # 2.928 GHz - 8,888,765,602 instructions # 2.14 insn per cycle - 1.418367650 seconds time elapsed +TOTAL : 1.430796 sec + 4,168,678,841 cycles # 2.904 GHz + 8,888,432,605 instructions # 2.13 insn per cycle + 1.436220799 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3576) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.916161e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.854206e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.854206e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.428501e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.293244e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.293244e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.406270 sec - 4,132,950,739 cycles # 2.930 GHz - 8,425,066,086 instructions # 2.04 insn per cycle - 1.411315775 seconds time elapsed +TOTAL : 1.495387 sec + 4,130,818,711 cycles # 2.754 GHz + 8,424,408,773 instructions # 2.04 insn per cycle + 1.500716701 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3320) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.981602e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.498052e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.498052e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.895021e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.402173e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.402173e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.835868 sec - 3,793,142,232 cycles # 2.062 GHz - 7,713,295,934 instructions # 2.03 insn per cycle - 1.841064820 seconds time elapsed +TOTAL : 1.862766 sec + 3,790,389,540 cycles # 2.030 GHz + 7,712,878,846 instructions # 2.03 insn per cycle + 1.868015050 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3436) (512y: 0) (512z: 2108) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 3105030e00..a57a0e7d63 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:08:54 +DATE: 2023-10-29_22:57:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.131497e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.176298e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.272818e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.024652e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.170014e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.267198e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.511653 sec - 2,228,669,200 cycles # 3.008 GHz - 3,208,057,648 instructions # 1.44 insn per cycle - 0.799834209 seconds time elapsed +TOTAL : 0.514329 sec + 2,230,556,647 cycles # 2.995 GHz + 3,160,835,951 instructions # 1.42 insn per cycle + 0.801821216 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.156329e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.218093e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.218093e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.169903e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.233066e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.233066e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.966196 sec - 15,259,732,402 cycles # 3.070 GHz - 38,638,245,819 instructions # 2.53 insn per cycle - 4.971450935 seconds time elapsed +TOTAL : 4.935048 sec + 15,258,964,089 cycles # 3.089 GHz + 38,638,103,907 instructions # 2.53 insn per cycle + 4.940346598 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.635241e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.828042e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.828042e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.764548e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.972599e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.972599e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.987841 sec - 8,928,273,750 cycles # 2.984 GHz - 24,239,215,531 instructions # 2.71 insn per cycle - 2.993088872 seconds time elapsed +TOTAL : 2.889853 sec + 8,945,301,665 cycles # 3.091 GHz + 24,238,281,209 instructions # 2.71 insn per cycle + 2.895063314 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.602670e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.086544e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.086544e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.830803e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.370680e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.370680e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.978132 sec - 5,419,631,994 cycles # 2.733 GHz - 11,287,865,943 instructions # 2.08 insn per cycle - 1.983532568 seconds time elapsed +TOTAL : 1.902240 sec + 5,478,028,889 cycles # 2.878 GHz + 11,289,175,549 instructions # 2.06 insn per cycle + 1.907742702 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.761038e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.429385e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.429385e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.746213e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.429778e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.429778e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.653810 sec - 4,846,105,915 cycles # 2.924 GHz - 10,538,364,912 instructions # 2.17 insn per cycle - 1.659126356 seconds time elapsed +TOTAL : 1.656189 sec + 4,847,967,355 cycles # 2.919 GHz + 10,535,656,532 instructions # 2.17 insn per cycle + 1.661647670 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.255470e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.509653e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.509653e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.240952e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.495480e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.495480e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.566802 sec - 5,210,176,310 cycles # 2.027 GHz - 7,615,240,991 instructions # 1.46 insn per cycle - 2.572256684 seconds time elapsed +TOTAL : 2.575099 sec + 5,211,387,602 cycles # 2.020 GHz + 7,613,456,375 instructions # 1.46 insn per cycle + 2.580291999 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 86fd27a7a6..4678e0a139 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,23 +37,25 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_12:09:20 +DATE: 2023-10-29_22:57:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.147140e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.180559e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.277278e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.037826e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.174975e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.272869e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.511444 sec - 2,244,424,459 cycles # 3.012 GHz - 3,230,920,098 instructions # 1.44 insn per cycle - 0.802219968 seconds time elapsed +TOTAL : 0.513772 sec + 2,233,336,477 cycles # 3.003 GHz + 3,219,059,296 instructions # 1.44 insn per cycle + 0.800421259 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- @@ -64,19 +67,20 @@ Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.160319e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.222253e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.222253e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.142916e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.204636e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.204636e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.957043 sec - 15,381,768,580 cycles # 3.101 GHz - 40,434,677,859 instructions # 2.63 insn per cycle - 4.962325085 seconds time elapsed +TOTAL : 4.996995 sec + 15,391,509,592 cycles # 3.079 GHz + 40,434,267,052 instructions # 2.63 insn per cycle + 5.002292693 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe @@ -90,19 +94,20 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.984906e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.218127e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.218127e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.907128e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.140585e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.140585e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.734927 sec - 8,488,131,666 cycles # 3.099 GHz - 23,269,666,895 instructions # 2.74 insn per cycle - 2.740313634 seconds time elapsed +TOTAL : 2.788965 sec + 8,494,141,525 cycles # 3.041 GHz + 23,269,631,247 instructions # 2.74 insn per cycle + 2.794501830 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe @@ -116,19 +121,20 @@ Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.169260e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.554990e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.554990e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.029207e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.400708e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.400708e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.131213 sec - 6,244,993,149 cycles # 2.925 GHz - 12,974,275,312 instructions # 2.08 insn per cycle - 2.136479402 seconds time elapsed +TOTAL : 2.189330 sec + 6,250,586,683 cycles # 2.854 GHz + 12,975,656,099 instructions # 2.08 insn per cycle + 2.194673870 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe @@ -142,19 +148,20 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.404477e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.818375e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.818375e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.390803e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.817317e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.817317e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.042856 sec - 5,910,711,102 cycles # 2.887 GHz - 12,249,599,385 instructions # 2.07 insn per cycle - 2.048130482 seconds time elapsed +TOTAL : 2.047933 sec + 5,931,368,799 cycles # 2.890 GHz + 12,251,527,312 instructions # 2.07 insn per cycle + 2.053330268 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe @@ -168,19 +175,20 @@ Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check.exe -p 2048 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.919702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.138324e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.138324e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.844812e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.055411e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.055411e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.777954 sec - 5,597,813,424 cycles # 2.012 GHz - 8,753,565,299 instructions # 1.56 insn per cycle - 2.783109376 seconds time elapsed +TOTAL : 2.831180 sec + 5,608,290,183 cycles # 1.978 GHz + 8,753,452,498 instructions # 1.56 insn per cycle + 2.836498489 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 901d2a6b79..b768c889a2 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_12:09:47 +DATE: 2023-10-29_22:58:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.965049e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.053793e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.067077e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.909398e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.046859e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059893e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.459845 sec - 2,017,798,845 cycles # 3.006 GHz - 2,871,664,576 instructions # 1.42 insn per cycle - 0.730427502 seconds time elapsed +TOTAL : 0.459234 sec + 2,023,935,385 cycles # 3.005 GHz + 2,917,136,859 instructions # 1.44 insn per cycle + 0.729830918 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.121331e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.323555e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.335068e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.113454e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.319134e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.331179e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.588290 sec - 2,481,557,979 cycles # 3.021 GHz - 3,747,764,070 instructions # 1.51 insn per cycle - 0.880057164 seconds time elapsed +TOTAL : 0.591882 sec + 2,481,489,165 cycles # 3.009 GHz + 3,788,031,297 instructions # 1.53 insn per cycle + 0.882711359 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.552316e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.564667e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.564667e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.583482e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.595900e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.595900e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.441317 sec - 19,792,438,479 cycles # 3.071 GHz - 59,610,211,281 instructions # 3.01 insn per cycle - 6.445390765 seconds time elapsed +TOTAL : 6.363888 sec + 19,707,598,906 cycles # 3.095 GHz + 59,610,259,834 instructions # 3.02 insn per cycle + 6.367917888 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.806418e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.849994e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.849994e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.862811e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.907560e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.907560e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.430990 sec - 10,359,487,693 cycles # 3.016 GHz - 30,678,330,855 instructions # 2.96 insn per cycle - 3.435079540 seconds time elapsed +TOTAL : 3.391185 sec + 10,358,012,674 cycles # 3.051 GHz + 30,678,593,588 instructions # 2.96 insn per cycle + 3.395348889 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.900243e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.008050e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.008050e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.892098e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.007261e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.007261e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.676763 sec - 4,883,160,122 cycles # 2.907 GHz - 11,021,724,288 instructions # 2.26 insn per cycle - 1.680921849 seconds time elapsed +TOTAL : 1.678228 sec + 4,878,476,340 cycles # 2.901 GHz + 11,021,244,802 instructions # 2.26 insn per cycle + 1.682311815 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.107653e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.130101e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.130101e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.101682e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.123817e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.123817e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.501039 sec - 4,367,829,020 cycles # 2.903 GHz - 10,298,584,119 instructions # 2.36 insn per cycle - 1.505199219 seconds time elapsed +TOTAL : 1.509246 sec + 4,368,027,313 cycles # 2.888 GHz + 10,298,348,380 instructions # 2.36 insn per cycle + 1.513387497 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.510567e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.621371e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.621371e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.817252e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.930114e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.930114e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.206299 sec - 4,102,761,181 cycles # 1.857 GHz - 5,845,915,507 instructions # 1.42 insn per cycle - 2.210468411 seconds time elapsed +TOTAL : 2.119992 sec + 4,099,162,887 cycles # 1.931 GHz + 5,845,650,996 instructions # 1.43 insn per cycle + 2.124059548 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index cd7df250dc..7350cb044e 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_12:37:21 +DATE: 2023-10-29_23:27:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.693217e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.926702e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.926702e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.596022e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.815455e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.815455e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.487867 sec - 2,121,769,325 cycles # 3.008 GHz - 3,150,750,186 instructions # 1.48 insn per cycle - 0.763645513 seconds time elapsed +TOTAL : 0.490769 sec + 2,050,018,894 cycles # 2.922 GHz + 3,062,877,922 instructions # 1.49 insn per cycle + 0.759712446 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -65,6 +68,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -72,14 +76,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.791626e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.637606e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.637606e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.755073e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.665591e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.665591e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.807637 sec - 3,237,204,957 cycles # 3.044 GHz - 5,104,521,537 instructions # 1.58 insn per cycle - 1.126122223 seconds time elapsed +TOTAL : 0.814041 sec + 3,193,964,813 cycles # 3.017 GHz + 4,996,785,588 instructions # 1.56 insn per cycle + 1.121110193 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -89,20 +93,21 @@ Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.554780e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.567292e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.567292e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.581861e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.594640e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.594640e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.442685 sec - 19,734,537,817 cycles # 3.065 GHz - 59,621,480,724 instructions # 3.02 insn per cycle - 6.446840072 seconds time elapsed +TOTAL : 6.374768 sec + 19,744,253,603 cycles # 3.096 GHz + 59,617,039,083 instructions # 3.02 insn per cycle + 6.379015840 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -116,20 +121,21 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.790530e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.835100e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.835100e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.947779e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.993430e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.993430e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.448787 sec - 10,392,677,425 cycles # 3.010 GHz - 30,726,890,032 instructions # 2.96 insn per cycle - 3.453282456 seconds time elapsed +TOTAL : 3.340025 sec + 10,392,403,164 cycles # 3.108 GHz + 30,728,140,044 instructions # 2.96 insn per cycle + 3.344437514 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -143,20 +149,21 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.761124e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.945111e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.945111e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.893651e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.007560e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.007560e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.707553 sec - 4,918,690,921 cycles # 2.875 GHz - 11,070,254,110 instructions # 2.25 insn per cycle - 1.711850509 seconds time elapsed +TOTAL : 1.685465 sec + 4,920,068,373 cycles # 2.913 GHz + 11,072,615,091 instructions # 2.25 insn per cycle + 1.689979160 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -170,20 +177,21 @@ Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.105869e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.128347e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.128347e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.103775e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.126572e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.126572e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.510363 sec - 4,398,214,508 cycles # 2.905 GHz - 10,349,345,987 instructions # 2.35 insn per cycle - 1.514719920 seconds time elapsed +TOTAL : 1.513600 sec + 4,404,328,448 cycles # 2.903 GHz + 10,349,844,115 instructions # 2.35 insn per cycle + 1.518016539 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -197,20 +205,21 @@ Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.811562e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.924385e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.924385e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.735077e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.845863e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.845863e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.128054 sec - 4,134,017,356 cycles # 1.939 GHz - 5,885,473,567 instructions # 1.42 insn per cycle - 2.132603203 seconds time elapsed +TOTAL : 2.148156 sec + 4,131,669,019 cycles # 1.920 GHz + 5,883,675,910 instructions # 1.42 insn per cycle + 2.152458390 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 1c0f440c0f..7f78fd761b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_12:10:16 +DATE: 2023-10-29_22:58:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.914584e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.046212e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059276e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.886775e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.044014e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.057508e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.458950 sec - 2,025,257,806 cycles # 3.014 GHz - 2,838,342,308 instructions # 1.40 insn per cycle - 0.729521236 seconds time elapsed +TOTAL : 0.460031 sec + 2,016,314,028 cycles # 3.002 GHz + 2,898,893,774 instructions # 1.44 insn per cycle + 0.730273188 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.114844e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.314618e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.326004e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.109210e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.313076e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.324737e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.588803 sec - 2,478,020,834 cycles # 3.022 GHz - 3,797,746,550 instructions # 1.53 insn per cycle - 0.880923119 seconds time elapsed +TOTAL : 0.586833 sec + 2,478,382,973 cycles # 3.032 GHz + 3,721,282,569 instructions # 1.50 insn per cycle + 0.878275821 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.622100e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.634932e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.634932e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.586514e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.599421e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.599421e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.270409 sec - 19,469,015,028 cycles # 3.103 GHz - 58,801,088,362 instructions # 3.02 insn per cycle - 6.274552233 seconds time elapsed +TOTAL : 6.357316 sec + 19,481,235,659 cycles # 3.063 GHz + 58,801,855,367 instructions # 3.02 insn per cycle + 6.361426900 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.995468e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.041449e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.041449e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.984548e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.029958e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.029958e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.301245 sec - 10,235,413,277 cycles # 3.097 GHz - 30,349,704,494 instructions # 2.97 insn per cycle - 3.305304820 seconds time elapsed +TOTAL : 3.309163 sec + 10,243,122,552 cycles # 3.093 GHz + 30,350,909,518 instructions # 2.96 insn per cycle + 3.313312968 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.573009e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.744339e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.744339e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.127917e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.294877e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.294877e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.733129 sec - 5,040,264,850 cycles # 2.903 GHz - 11,485,470,427 instructions # 2.28 insn per cycle - 1.737235033 seconds time elapsed +TOTAL : 1.817988 sec + 5,048,703,510 cycles # 2.771 GHz + 11,485,918,392 instructions # 2.28 insn per cycle + 1.822406507 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.040240e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.060102e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.060102e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.042469e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.062305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.062305e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.596698 sec - 4,645,794,162 cycles # 2.904 GHz - 10,844,658,112 instructions # 2.33 insn per cycle - 1.600851477 seconds time elapsed +TOTAL : 1.592971 sec + 4,643,388,624 cycles # 2.909 GHz + 10,844,105,717 instructions # 2.34 insn per cycle + 1.597053361 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.683288e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.795724e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.795724e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.817606e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.928535e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.928535e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.157094 sec - 4,112,611,788 cycles # 1.904 GHz - 6,110,068,673 instructions # 1.49 insn per cycle - 2.161179215 seconds time elapsed +TOTAL : 2.119744 sec + 4,110,353,353 cycles # 1.936 GHz + 6,109,536,368 instructions # 1.49 insn per cycle + 2.123912437 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 17451a57e1..82a23d37c8 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_12:10:44 +DATE: 2023-10-29_22:59:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.540736e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.325871e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.412262e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.554213e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.342270e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.443645e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.439495 sec - 1,960,974,135 cycles # 3.010 GHz - 2,773,234,745 instructions # 1.41 insn per cycle - 0.708896009 seconds time elapsed +TOTAL : 0.442810 sec + 1,955,213,568 cycles # 2.996 GHz + 2,785,838,310 instructions # 1.42 insn per cycle + 0.711742691 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.414003e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.468594e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.537406e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.352858e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.428374e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.497564e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.487228 sec - 2,128,818,676 cycles # 3.001 GHz - 3,089,467,042 instructions # 1.45 insn per cycle - 0.767090383 seconds time elapsed +TOTAL : 0.486265 sec + 2,126,515,219 cycles # 3.007 GHz + 3,068,678,928 instructions # 1.44 insn per cycle + 0.765884317 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,10 @@ Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.655254e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.668863e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.668863e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.191108 sec - 19,080,061,273 cycles # 3.080 GHz - 58,965,657,443 instructions # 3.09 insn per cycle - 6.195129418 seconds time elapsed +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions + 31,909,914 cycles # 2.821 GHz + 48,600,152 instructions # 1.52 insn per cycle + 0.011853069 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe @@ -97,114 +92,6 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 +Floating Point Exception (CPU) +Avg ME (C++/C++) = Avg ME (F77/C++) = 1.4129858051842916 -Relative difference = 1.3787518662898538e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.508212e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.656701e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.656701e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.946211 sec - 5,855,964,511 cycles # 3.004 GHz - 16,697,592,998 instructions # 2.85 insn per cycle - 1.950434492 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5766) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412987e+00 -Avg ME (F77/C++) = 1.4129865669244737 -Relative difference = 3.06496469061158e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.883140e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.951820e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.951820e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.890090 sec - 2,589,711,743 cycles # 2.898 GHz - 5,983,428,902 instructions # 2.31 insn per cycle - 0.894597805 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.992770e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.073104e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.073104e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.843057 sec - 2,349,625,279 cycles # 2.776 GHz - 5,605,182,274 instructions # 2.39 insn per cycle - 0.847063086 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.587481e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.635187e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.635187e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.053998 sec - 2,052,367,530 cycles # 1.941 GHz - 3,335,862,386 instructions # 1.63 insn per cycle - 1.057979764 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2131) (512y: 39) (512z: 3668) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 991838f582..5226a06993 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_12:37:50 +DATE: 2023-10-29_23:28:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.108982e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.182336e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.182336e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.094146e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.120453e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.120453e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.453117 sec - 1,988,494,564 cycles # 3.009 GHz - 2,951,709,903 instructions # 1.48 insn per cycle - 0.719180627 seconds time elapsed +TOTAL : 0.453599 sec + 1,984,525,540 cycles # 3.003 GHz + 2,948,801,406 instructions # 1.49 insn per cycle + 0.718890176 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -65,6 +68,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -72,14 +76,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.832102e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.641855e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.641855e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.830521e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.657375e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.657375e+07 ) sec^-1 MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.627359 sec - 2,595,598,338 cycles # 3.023 GHz - 3,998,508,208 instructions # 1.54 insn per cycle - 0.916218226 seconds time elapsed +TOTAL : 0.628524 sec + 2,620,144,582 cycles # 3.014 GHz + 3,959,615,987 instructions # 1.51 insn per cycle + 0.926992755 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -89,20 +93,11 @@ Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.663266e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.676995e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.676995e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.175351 sec - 19,098,383,794 cycles # 3.091 GHz - 58,969,986,603 instructions # 3.09 insn per cycle - 6.179430024 seconds time elapsed + 38,544,754 cycles # 2.892 GHz + 52,041,513 instructions # 1.35 insn per cycle + 0.013873487 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe @@ -110,118 +105,6 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 +Floating Point Exception (CPU) +Avg ME (C++/C++) = Avg ME (F77/C++) = 1.4129858051842916 -Relative difference = 1.3787518662898538e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.759009e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.910670e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.910670e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.894239 sec - 5,869,520,004 cycles # 3.093 GHz - 16,744,812,460 instructions # 2.85 insn per cycle - 1.898329669 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5766) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412987e+00 -Avg ME (F77/C++) = 1.4129865669244737 -Relative difference = 3.06496469061158e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.880282e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.947896e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.947896e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.895429 sec - 2,603,817,732 cycles # 2.896 GHz - 6,019,674,166 instructions # 2.31 insn per cycle - 0.899627318 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4917) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.079121e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.162628e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.162628e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.812546 sec - 2,366,541,582 cycles # 2.900 GHz - 5,641,622,908 instructions # 2.38 insn per cycle - 0.816726130 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4645) (512y: 36) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.601201e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.649761e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.649761e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.049478 sec - 2,071,790,293 cycles # 1.968 GHz - 3,377,452,170 instructions # 1.63 insn per cycle - 1.053461167 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2131) (512y: 39) (512z: 3668) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 4594e108f9..95a01f73e7 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_12:11:08 +DATE: 2023-10-29_22:59:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.564685e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.375281e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.467611e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.572009e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.356666e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.443338e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.441255 sec - 1,955,562,909 cycles # 3.007 GHz - 2,787,251,095 instructions # 1.43 insn per cycle - 0.709632874 seconds time elapsed +TOTAL : 0.440187 sec + 1,957,058,162 cycles # 3.008 GHz + 2,791,592,510 instructions # 1.43 insn per cycle + 0.709336021 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.351322e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.360521e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.430524e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.352108e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.420935e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.490027e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.486324 sec - 2,131,058,818 cycles # 3.002 GHz - 3,095,610,657 instructions # 1.45 insn per cycle - 0.766843952 seconds time elapsed +TOTAL : 0.486998 sec + 2,128,766,537 cycles # 3.001 GHz + 3,082,933,797 instructions # 1.45 insn per cycle + 0.767019388 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,10 @@ Relative difference = 0.00043425681546129636 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.669588e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.683524e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.683524e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.157456 sec - 18,971,452,097 cycles # 3.080 GHz - 58,707,978,021 instructions # 3.09 insn per cycle - 6.161349222 seconds time elapsed +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions + 31,187,967 cycles # 2.806 GHz + 47,783,574 instructions # 1.53 insn per cycle + 0.011561032 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe @@ -97,114 +92,6 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 +Floating Point Exception (CPU) +Avg ME (C++/C++) = Avg ME (F77/C++) = 1.4129858051842916 -Relative difference = 1.3787518662898538e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.176492e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.344165e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.344165e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.804802 sec - 5,594,484,187 cycles # 3.094 GHz - 16,513,342,463 instructions # 2.95 insn per cycle - 1.808828925 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5552) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.412987e+00 -Avg ME (F77/C++) = 1.4129865669244737 -Relative difference = 3.06496469061158e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.641560e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.692808e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.692808e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 1.018009 sec - 2,972,835,880 cycles # 2.910 GHz - 6,636,773,662 instructions # 2.23 insn per cycle - 1.022091022 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5568) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.775150e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.835465e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.835465e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008855e+02 +- 5.002467e+01 ) GeV^-2 -TOTAL : 0.942822 sec - 2,750,756,278 cycles # 2.908 GHz - 6,258,176,694 instructions # 2.28 insn per cycle - 0.946897871 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5279) (512y: 25) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133161655815059 -Relative difference = 1.1715816267550621e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.467022e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.508165e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.508165e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.138853 sec - 2,225,844,994 cycles # 1.949 GHz - 3,700,498,375 instructions # 1.66 insn per cycle - 1.142939136 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2378) (512y: 29) (512z: 3963) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index f54a59d5fe..6475091e5b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_12:11:32 +DATE: 2023-10-29_22:59:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.905164e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.044247e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.057360e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.869757e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.043160e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056606e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.462541 sec - 1,972,895,984 cycles # 2.929 GHz - 2,903,856,371 instructions # 1.47 insn per cycle - 0.732236757 seconds time elapsed +TOTAL : 0.462395 sec + 1,986,146,263 cycles # 2.946 GHz + 2,899,086,399 instructions # 1.46 insn per cycle + 0.733165110 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.120425e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.322230e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.333819e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.112775e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.315776e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.327405e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.594644 sec - 2,492,989,257 cycles # 3.011 GHz - 3,840,726,093 instructions # 1.54 insn per cycle - 0.886407043 seconds time elapsed +TOTAL : 0.591959 sec + 2,485,887,019 cycles # 3.012 GHz + 3,855,572,801 instructions # 1.55 insn per cycle + 0.884123063 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,10 @@ Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.530579e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.542965e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.542965e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.497943 sec - 20,014,842,539 cycles # 3.079 GHz - 60,540,154,499 instructions # 3.02 insn per cycle - 6.502158930 seconds time elapsed +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions + 35,128,155 cycles # 2.838 GHz + 49,861,109 instructions # 1.42 insn per cycle + 0.012921109 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe @@ -97,114 +92,6 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 +Floating Point Exception (CPU) +Avg ME (C++/C++) = Avg ME (F77/C++) = 1.4131213859069593 -Relative difference = 4.345647726386255e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.010482e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.057204e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.057204e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.291332 sec - 10,183,408,251 cycles # 3.091 GHz - 30,388,862,594 instructions # 2.98 insn per cycle - 3.295386231 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5280) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213792564823 -Relative difference = 4.392710025734405e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.889739e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.006782e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.006782e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.678792 sec - 4,866,504,720 cycles # 2.893 GHz - 10,982,241,057 instructions # 2.26 insn per cycle - 1.682949636 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4623) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.131863e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155501e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.155501e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.469235 sec - 4,275,763,092 cycles # 2.903 GHz - 10,250,853,336 instructions # 2.40 insn per cycle - 1.473398330 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4279) (512y: 82) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.621843e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.728247e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.728247e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.173637 sec - 4,203,419,623 cycles # 1.931 GHz - 6,049,226,551 instructions # 1.44 insn per cycle - 2.177772868 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2065) (512y: 117) (512z: 3540) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213786174055 -Relative difference = 4.3972324717191576e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index a14cca51af..843ed33318 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_12:12:01 +DATE: 2023-10-29_22:59:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.852098e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.037871e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.050490e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.846988e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.038654e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.051345e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.467984 sec - 1,998,838,445 cycles # 2.896 GHz - 2,912,610,966 instructions # 1.46 insn per cycle - 0.748728175 seconds time elapsed +TOTAL : 0.463837 sec + 1,953,484,101 cycles # 2.884 GHz + 2,839,955,062 instructions # 1.45 insn per cycle + 0.734557378 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.108040e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.305385e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.316521e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.103431e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.304634e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.316508e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.586972 sec - 2,485,257,138 cycles # 3.003 GHz - 3,752,100,969 instructions # 1.51 insn per cycle - 0.885534164 seconds time elapsed +TOTAL : 0.589162 sec + 2,482,997,752 cycles # 3.024 GHz + 3,772,838,208 instructions # 1.52 insn per cycle + 0.882532404 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,10 @@ Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.553890e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.566027e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.566027e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.437822 sec - 19,867,702,905 cycles # 3.085 GHz - 59,941,439,802 instructions # 3.02 insn per cycle - 6.441937921 seconds time elapsed +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions + 33,991,965 cycles # 2.796 GHz + 49,234,796 instructions # 1.45 insn per cycle + 0.012707522 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe @@ -97,114 +92,6 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcess ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 +Floating Point Exception (CPU) +Avg ME (C++/C++) = Avg ME (F77/C++) = 1.4131213859069593 -Relative difference = 4.345647726386255e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.902669e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.948654e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.948654e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.363921 sec - 10,066,901,567 cycles # 2.990 GHz - 30,102,442,654 instructions # 2.99 insn per cycle - 3.368170033 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5082) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213792564823 -Relative difference = 4.392710025734405e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.655442e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.828218e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.828218e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.718351 sec - 5,011,970,294 cycles # 2.911 GHz - 11,485,977,959 instructions # 2.29 insn per cycle - 1.722449030 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4722) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.050454e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.070781e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.070781e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.580973 sec - 4,589,831,248 cycles # 2.897 GHz - 10,813,098,589 instructions # 2.36 insn per cycle - 1.585135297 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4284) (512y: 234) (512z: 0) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.633290e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.739141e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.739141e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.170052 sec - 4,216,147,141 cycles # 1.940 GHz - 6,278,929,582 instructions # 1.49 insn per cycle - 2.174195697 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1960) (512y: 163) (512z: 3617) -------------------------------------------------------------------------- -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest.exe -[ PASSED ] 6 tests. -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213786174055 -Relative difference = 4.3972324717191576e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 11c2c1ab32..b9b691e247 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:12:29 +DATE: 2023-10-29_22:59:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.522753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.548627e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.550750e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.501935e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.526920e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.529281e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.518045 sec - 2,253,392,327 cycles # 3.016 GHz - 3,541,073,735 instructions # 1.57 insn per cycle - 0.806095149 seconds time elapsed +TOTAL : 0.520893 sec + 2,248,497,475 cycles # 2.996 GHz + 3,510,999,754 instructions # 1.56 insn per cycle + 0.810696709 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.133203e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.161116e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.162261e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.136856e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.165550e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.166829e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.022651 sec - 10,067,556,986 cycles # 3.077 GHz - 21,723,155,404 instructions # 2.16 insn per cycle - 3.328454491 seconds time elapsed +TOTAL : 3.027243 sec + 9,828,661,032 cycles # 3.001 GHz + 20,508,968,170 instructions # 2.09 insn per cycle + 3.335037567 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.960753e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.961679e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.961679e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.950736e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.951642e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.951642e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.374018 sec - 25,670,008,434 cycles # 3.066 GHz - 78,945,127,859 instructions # 3.08 insn per cycle - 8.378169528 seconds time elapsed +TOTAL : 8.417605 sec + 25,648,294,457 cycles # 3.046 GHz + 78,942,458,574 instructions # 3.08 insn per cycle + 8.421676976 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.761609e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765116e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765116e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.753458e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.756891e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.756891e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.368935 sec - 12,930,681,857 cycles # 2.957 GHz - 39,284,393,094 instructions # 3.04 insn per cycle - 4.373129403 seconds time elapsed +TOTAL : 4.379085 sec + 12,933,361,776 cycles # 2.952 GHz + 39,285,416,086 instructions # 3.04 insn per cycle + 4.383232825 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.588352e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.606225e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.606225e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.595894e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.614051e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.614051e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.918707 sec - 5,579,192,973 cycles # 2.903 GHz - 13,689,174,313 instructions # 2.45 insn per cycle - 1.922851888 seconds time elapsed +TOTAL : 1.917047 sec + 5,576,412,013 cycles # 2.904 GHz + 13,689,574,709 instructions # 2.45 insn per cycle + 1.921242752 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.788874e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.811382e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.811382e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.805413e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.828813e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.828813e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.684655 sec - 4,896,071,309 cycles # 2.900 GHz - 12,344,332,584 instructions # 2.52 insn per cycle - 1.688799152 seconds time elapsed +TOTAL : 1.681874 sec + 4,891,493,480 cycles # 2.903 GHz + 12,344,327,535 instructions # 2.52 insn per cycle + 1.686035582 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.598473e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.612773e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.612773e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.649115e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.663220e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.663220e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.167854 sec - 4,117,500,482 cycles # 1.897 GHz - 6,337,115,934 instructions # 1.54 insn per cycle - 2.172030896 seconds time elapsed +TOTAL : 2.153677 sec + 4,116,714,094 cycles # 1.909 GHz + 6,336,926,711 instructions # 1.54 insn per cycle + 2.157934772 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 5fa11c735c..2e2c6e625b 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:38:47 +DATE: 2023-10-29_23:28:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.178531e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.491763e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.491763e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.159927e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.489906e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489906e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.509461 sec - 2,248,167,105 cycles # 3.025 GHz - 3,581,747,687 instructions # 1.59 insn per cycle - 0.803251034 seconds time elapsed +TOTAL : 0.509818 sec + 2,212,202,628 cycles # 2.996 GHz + 3,518,939,068 instructions # 1.59 insn per cycle + 0.797946076 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -65,6 +68,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -72,14 +76,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.638113e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.098100e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.098100e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.645371e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.111843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.111843e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.295657 sec - 10,940,660,237 cycles # 3.073 GHz - 21,897,117,038 instructions # 2.00 insn per cycle - 3.619731570 seconds time elapsed +TOTAL : 3.298230 sec + 10,943,704,523 cycles # 3.065 GHz + 22,588,127,082 instructions # 2.06 insn per cycle + 3.630609877 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -89,20 +93,21 @@ Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.961024e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.962005e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.962005e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.968844e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.969820e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.969820e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.376264 sec - 25,654,322,049 cycles # 3.062 GHz - 78,950,853,496 instructions # 3.08 insn per cycle - 8.380554018 seconds time elapsed +TOTAL : 8.343532 sec + 25,669,775,275 cycles # 3.076 GHz + 78,950,360,530 instructions # 3.08 insn per cycle + 8.347863671 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -116,20 +121,21 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.720177e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.723485e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.723485e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.740147e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.743760e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.743760e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.421209 sec - 13,021,231,521 cycles # 2.943 GHz - 39,297,463,556 instructions # 3.02 insn per cycle - 4.425572459 seconds time elapsed +TOTAL : 4.397609 sec + 12,952,185,539 cycles # 2.943 GHz + 39,298,996,582 instructions # 3.03 insn per cycle + 4.402071213 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -143,20 +149,21 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.595418e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.613259e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.613259e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.514829e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.532691e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.532691e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.921008 sec - 5,588,903,977 cycles # 2.904 GHz - 13,699,707,384 instructions # 2.45 insn per cycle - 1.925217698 seconds time elapsed +TOTAL : 1.939528 sec + 5,601,303,316 cycles # 2.883 GHz + 13,700,433,238 instructions # 2.45 insn per cycle + 1.944054566 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -170,20 +177,21 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.790376e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.814781e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.814781e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.801046e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.825904e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.825904e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.688290 sec - 4,906,672,712 cycles # 2.900 GHz - 12,355,095,289 instructions # 2.52 insn per cycle - 1.692593198 seconds time elapsed +TOTAL : 1.686556 sec + 4,907,372,738 cycles # 2.904 GHz + 12,355,220,111 instructions # 2.52 insn per cycle + 1.690956063 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -197,20 +205,21 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.665562e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.680550e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.680550e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.677162e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.691349e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.691349e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.152887 sec - 4,135,096,676 cycles # 1.918 GHz - 6,348,239,646 instructions # 1.54 insn per cycle - 2.157236836 seconds time elapsed +TOTAL : 2.149521 sec + 4,132,629,395 cycles # 1.919 GHz + 6,348,639,035 instructions # 1.54 insn per cycle + 2.153997044 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 6e69eca414..45cfbfeec0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:50:04 +DATE: 2023-10-29_23:40:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.488953e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.515779e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.518130e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.473243e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.498908e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.501062e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.505230 sec - 2,209,587,629 cycles # 3.008 GHz - 3,455,452,601 instructions # 1.56 insn per cycle - 0.795320871 seconds time elapsed +TOTAL : 0.509563 sec + 2,216,642,937 cycles # 2.956 GHz + 3,422,181,548 instructions # 1.54 insn per cycle + 0.816227831 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.142087e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.173823e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.175176e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.142930e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.175050e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176382e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.119931 sec - 10,305,373,783 cycles # 3.060 GHz - 22,236,923,118 instructions # 2.16 insn per cycle - 3.427874463 seconds time elapsed +TOTAL : 3.111141 sec + 10,329,507,816 cycles # 3.073 GHz + 22,291,732,023 instructions # 2.16 insn per cycle + 3.417604733 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.981434e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.982406e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.982406e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.969664e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.970586e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.970586e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.288289 sec - 25,654,633,072 cycles # 3.094 GHz - 78,941,515,466 instructions # 3.08 insn per cycle - 8.292332759 seconds time elapsed +TOTAL : 8.336836 sec + 25,640,820,337 cycles # 3.077 GHz + 78,945,663,895 instructions # 3.08 insn per cycle + 8.340853167 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.739349e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.742888e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.742888e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.762983e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.766419e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.766419e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.396528 sec - 12,944,055,846 cycles # 2.943 GHz - 39,285,289,410 instructions # 3.04 insn per cycle - 4.400528334 seconds time elapsed +TOTAL : 4.368876 sec + 12,926,628,008 cycles # 2.958 GHz + 39,285,203,910 instructions # 3.04 insn per cycle + 4.372893822 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.586617e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.603945e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.603945e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.601104e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.618518e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.618518e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.920517 sec - 5,579,544,952 cycles # 2.900 GHz - 13,688,140,330 instructions # 2.45 insn per cycle - 1.924582456 seconds time elapsed +TOTAL : 1.917538 sec + 5,581,699,180 cycles # 2.906 GHz + 13,688,893,679 instructions # 2.45 insn per cycle + 1.921672417 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.777785e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.801156e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.801156e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.770281e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.793597e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.793597e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.687811 sec - 4,898,373,572 cycles # 2.897 GHz - 12,342,306,196 instructions # 2.52 insn per cycle - 1.691792505 seconds time elapsed +TOTAL : 1.689348 sec + 4,900,240,520 cycles # 2.895 GHz + 12,343,143,577 instructions # 2.52 insn per cycle + 1.693358610 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.662466e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.677203e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.677203e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.684168e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.698568e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.698568e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.151329 sec - 4,122,708,782 cycles # 1.914 GHz - 6,334,974,901 instructions # 1.54 insn per cycle - 2.155459573 seconds time elapsed +TOTAL : 2.145238 sec + 4,121,500,968 cycles # 1.918 GHz + 6,334,916,294 instructions # 1.54 insn per cycle + 2.149260659 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index e3c3365666..75f16009d0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:46:52 +DATE: 2023-10-29_23:36:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.470491e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.496835e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.498894e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.502504e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.529294e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.532228e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.502429 sec - 2,239,802,104 cycles # 3.025 GHz - 3,519,837,635 instructions # 1.57 insn per cycle - 0.810113153 seconds time elapsed +TOTAL : 0.500079 sec + 2,195,316,551 cycles # 3.018 GHz + 3,432,745,100 instructions # 1.56 insn per cycle + 0.787073582 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.144423e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.176580e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.177932e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.139755e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.171825e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.173189e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.064197 sec - 10,177,900,222 cycles # 3.071 GHz - 23,744,760,110 instructions # 2.33 insn per cycle - 3.371080698 seconds time elapsed +TOTAL : 3.057644 sec + 10,111,910,098 cycles # 3.059 GHz + 21,575,647,114 instructions # 2.13 insn per cycle + 3.365562161 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.958926e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.959917e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.959917e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.965142e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.966066e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.966066e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.381577 sec - 25,640,723,341 cycles # 3.058 GHz - 78,941,748,743 instructions # 3.08 insn per cycle - 8.385627979 seconds time elapsed +TOTAL : 8.354767 sec + 25,655,459,462 cycles # 3.071 GHz + 78,944,356,715 instructions # 3.08 insn per cycle + 8.358730269 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.732687e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.736090e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.736090e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.747153e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.750615e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.750615e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.402944 sec - 12,923,451,618 cycles # 2.933 GHz - 39,285,462,858 instructions # 3.04 insn per cycle - 4.407052351 seconds time elapsed +TOTAL : 4.385850 sec + 12,919,824,066 cycles # 2.944 GHz + 39,284,399,104 instructions # 3.04 insn per cycle + 4.389897028 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.554505e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.572181e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.572181e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.556610e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.573675e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.573675e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.926338 sec - 5,575,425,154 cycles # 2.889 GHz - 13,689,061,442 instructions # 2.46 insn per cycle - 1.930424805 seconds time elapsed +TOTAL : 1.925765 sec + 5,581,219,157 cycles # 2.893 GHz + 13,690,157,786 instructions # 2.45 insn per cycle + 1.929949148 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.784330e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.807171e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.807171e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.787105e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.810178e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.810178e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.685516 sec - 4,893,132,186 cycles # 2.897 GHz - 12,344,280,166 instructions # 2.52 insn per cycle - 1.689579681 seconds time elapsed +TOTAL : 1.685035 sec + 4,895,907,097 cycles # 2.900 GHz + 12,344,567,987 instructions # 2.52 insn per cycle + 1.689171944 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.645425e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.659225e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.659225e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.689935e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.704356e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.704356e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.154439 sec - 4,120,191,344 cycles # 1.910 GHz - 6,336,875,208 instructions # 1.54 insn per cycle - 2.158495161 seconds time elapsed +TOTAL : 2.142125 sec + 4,119,816,697 cycles # 1.921 GHz + 6,337,302,977 instructions # 1.54 insn per cycle + 2.146222342 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index fcc45ae054..a44127a607 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,41 +37,44 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:43:45 +DATE: 2023-10-29_23:33:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.227066e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.516305e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.518459e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.203236e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.506741e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.509379e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.505902 sec - 2,232,711,490 cycles # 3.013 GHz - 3,431,478,383 instructions # 1.54 insn per cycle - 0.805137058 seconds time elapsed +TOTAL : 0.513902 sec + 2,108,484,975 cycles # 2.844 GHz + 3,365,051,902 instructions # 1.60 insn per cycle + 0.803659720 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.745719e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.173586e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.174904e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.749252e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167948e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169271e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.183069 sec - 10,549,506,794 cycles # 3.073 GHz - 23,826,364,648 instructions # 2.26 insn per cycle - 3.489321072 seconds time elapsed +TOTAL : 3.179282 sec + 10,356,463,103 cycles # 3.021 GHz + 21,875,646,322 instructions # 2.11 insn per cycle + 3.486218185 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -80,19 +84,20 @@ Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.963957e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.964914e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.964914e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.983903e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.984833e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984833e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.359554 sec - 25,640,588,853 cycles # 3.066 GHz - 78,941,935,823 instructions # 3.08 insn per cycle - 8.363595068 seconds time elapsed +TOTAL : 8.275893 sec + 25,633,045,497 cycles # 3.096 GHz + 78,941,513,287 instructions # 3.08 insn per cycle + 8.279863548 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -106,19 +111,20 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.752315e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.755741e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.755741e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.760169e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.763953e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.763953e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.379894 sec - 12,924,609,236 cycles # 2.949 GHz - 39,284,528,815 instructions # 3.04 insn per cycle - 4.384005600 seconds time elapsed +TOTAL : 4.371058 sec + 12,924,406,923 cycles # 2.956 GHz + 39,285,568,868 instructions # 3.04 insn per cycle + 4.375178844 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -132,19 +138,20 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.534929e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.553394e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.553394e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.550074e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.567051e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.567051e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.930859 sec - 5,574,251,796 cycles # 2.882 GHz - 13,689,065,070 instructions # 2.46 insn per cycle - 1.935072396 seconds time elapsed +TOTAL : 1.927137 sec + 5,593,914,621 cycles # 2.898 GHz + 13,688,980,575 instructions # 2.45 insn per cycle + 1.931116092 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -158,19 +165,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.763710e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.787175e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.787175e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.762400e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.786054e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.786054e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.688908 sec - 4,893,973,419 cycles # 2.892 GHz - 12,344,401,694 instructions # 2.52 insn per cycle - 1.692959328 seconds time elapsed +TOTAL : 1.689205 sec + 4,892,556,855 cycles # 2.891 GHz + 12,344,299,662 instructions # 2.52 insn per cycle + 1.693311350 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -184,19 +192,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.661706e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.675864e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.675864e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.715915e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.730071e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.730071e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.149704 sec - 4,115,580,289 cycles # 1.912 GHz - 6,336,846,442 instructions # 1.54 insn per cycle - 2.153669716 seconds time elapsed +TOTAL : 2.135105 sec + 4,115,673,540 cycles # 1.925 GHz + 6,336,830,329 instructions # 1.54 insn per cycle + 2.139046159 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index a515f7b22d..0c19155fd8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:13:05 +DATE: 2023-10-29_23:00:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.471472e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.495309e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497296e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.466481e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.490614e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.492798e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.517888 sec - 2,245,194,212 cycles # 3.014 GHz - 3,545,171,662 instructions # 1.58 insn per cycle - 0.806452485 seconds time elapsed +TOTAL : 0.522728 sec + 2,248,657,448 cycles # 2.992 GHz + 3,499,106,897 instructions # 1.56 insn per cycle + 0.812353279 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.140818e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.168858e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.170012e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.139007e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167719e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168940e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.011124 sec - 10,016,658,248 cycles # 3.072 GHz - 21,347,400,055 instructions # 2.13 insn per cycle - 3.316522661 seconds time elapsed +TOTAL : 3.017562 sec + 10,017,603,348 cycles # 3.065 GHz + 22,790,569,141 instructions # 2.28 insn per cycle + 3.325124866 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.946476e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.947448e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.947448e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.973379e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.974310e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.974310e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.435258 sec - 25,604,767,793 cycles # 3.036 GHz - 78,717,648,044 instructions # 3.07 insn per cycle - 8.439335103 seconds time elapsed +TOTAL : 8.319686 sec + 25,596,530,612 cycles # 3.076 GHz + 78,714,234,691 instructions # 3.08 insn per cycle + 8.323809308 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.746101e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.749574e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.749574e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.755569e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.759059e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.759059e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.386760 sec - 12,904,358,144 cycles # 2.940 GHz - 39,232,199,416 instructions # 3.04 insn per cycle - 4.390963958 seconds time elapsed +TOTAL : 4.376087 sec + 12,893,815,616 cycles # 2.944 GHz + 39,230,432,926 instructions # 3.04 insn per cycle + 4.380264389 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:12949) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.524004e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.541409e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.541409e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.530044e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.547339e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.547339e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.932734 sec - 5,610,108,653 cycles # 2.898 GHz - 13,803,411,847 instructions # 2.46 insn per cycle - 1.936962663 seconds time elapsed +TOTAL : 1.931884 sec + 5,615,440,019 cycles # 2.902 GHz + 13,803,106,773 instructions # 2.46 insn per cycle + 1.936074443 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.639734e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.661711e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.661711e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.381888e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.403598e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.403598e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.710371 sec - 4,960,089,633 cycles # 2.894 GHz - 12,469,602,432 instructions # 2.51 insn per cycle - 1.714544326 seconds time elapsed +TOTAL : 1.757912 sec + 4,966,926,503 cycles # 2.820 GHz + 12,470,574,071 instructions # 2.51 insn per cycle + 1.762075007 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.641349e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.655494e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.655494e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.629417e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.644208e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.644208e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.155375 sec - 4,121,563,380 cycles # 1.909 GHz - 6,462,027,636 instructions # 1.57 insn per cycle - 2.159551855 seconds time elapsed +TOTAL : 2.159291 sec + 4,114,806,278 cycles # 1.903 GHz + 6,460,944,575 instructions # 1.57 insn per cycle + 2.163526475 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 3549ac96e2..9fad0334f6 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:28:29 +DATE: 2023-10-29_23:18:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.236458e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.259692e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.261510e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.230395e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.255737e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.257706e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.525667 sec - 2,272,396,380 cycles # 3.025 GHz - 3,551,922,727 instructions # 1.56 insn per cycle - 0.808686600 seconds time elapsed +TOTAL : 0.532206 sec + 2,283,349,210 cycles # 2.977 GHz + 3,463,784,089 instructions # 1.52 insn per cycle + 0.826165595 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.778016e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.804924e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.806042e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.778542e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.805844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.806965e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.281612 sec - 10,819,985,898 cycles # 3.062 GHz - 22,684,839,821 instructions # 2.10 insn per cycle - 3.590287386 seconds time elapsed +TOTAL : 3.282940 sec + 10,785,228,501 cycles # 3.048 GHz + 24,078,177,869 instructions # 2.23 insn per cycle + 3.593801795 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.325840e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.326318e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.326318e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.421380e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.421873e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.421873e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.921432 sec - 116,806,691,593 cycles # 3.081 GHz - 144,971,298,456 instructions # 1.24 insn per cycle - 37.925417296 seconds time elapsed +TOTAL : 37.102394 sec + 113,622,568,884 cycles # 3.063 GHz + 144,969,242,443 instructions # 1.28 insn per cycle + 37.106567085 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:21605) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.265060e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.267841e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.267841e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.174528e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.177033e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.177033e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.031972 sec - 14,725,469,626 cycles # 2.924 GHz - 37,577,727,036 instructions # 2.55 insn per cycle - 5.036076669 seconds time elapsed +TOTAL : 5.175282 sec + 14,731,493,074 cycles # 2.845 GHz + 37,577,936,961 instructions # 2.55 insn per cycle + 5.179533221 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68118) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.815685e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.829648e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.829648e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.715325e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.729397e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.729397e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.107581 sec - 6,118,001,268 cycles # 2.898 GHz - 13,063,536,139 instructions # 2.14 insn per cycle - 2.111708008 seconds time elapsed +TOTAL : 2.135055 sec + 6,125,540,781 cycles # 2.864 GHz + 13,063,422,144 instructions # 2.13 insn per cycle + 2.139301000 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.935782e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.955743e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.955743e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.348039e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.369239e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.369239e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.844918 sec - 5,058,291,283 cycles # 2.737 GHz - 11,442,442,223 instructions # 2.26 insn per cycle - 1.849050677 seconds time elapsed +TOTAL : 1.763762 sec + 5,103,130,887 cycles # 2.888 GHz + 11,442,116,049 instructions # 2.24 insn per cycle + 1.767944894 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.921492e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.936664e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.936664e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.858034e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.873595e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.873595e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.080077 sec - 3,980,326,008 cycles # 1.912 GHz - 5,944,493,220 instructions # 1.49 insn per cycle - 2.084182474 seconds time elapsed +TOTAL : 2.096754 sec + 3,979,885,259 cycles # 1.895 GHz + 5,943,652,624 instructions # 1.49 insn per cycle + 2.100889558 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 1c09f892cc..7d78741b13 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:29:38 +DATE: 2023-10-29_23:19:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.235988e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.259270e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.261133e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.229187e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.254155e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.256147e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.526496 sec - 2,247,303,462 cycles # 2.985 GHz - 3,499,379,368 instructions # 1.56 insn per cycle - 0.811083253 seconds time elapsed +TOTAL : 0.528872 sec + 2,265,218,371 cycles # 3.000 GHz + 3,542,082,296 instructions # 1.56 insn per cycle + 0.812727976 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.792538e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.819690e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.820828e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.790936e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.818767e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.819879e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.267329 sec - 10,790,118,888 cycles # 3.067 GHz - 22,896,159,672 instructions # 2.12 insn per cycle - 3.574848410 seconds time elapsed +TOTAL : 3.267182 sec + 10,779,068,223 cycles # 3.064 GHz + 24,334,052,418 instructions # 2.26 insn per cycle + 3.576518904 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.406964e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.407451e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.407451e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.372665e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.373139e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.373139e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.224548 sec - 114,578,349,510 cycles # 3.078 GHz - 145,557,021,090 instructions # 1.27 insn per cycle - 37.228737857 seconds time elapsed +TOTAL : 37.516758 sec + 114,319,717,939 cycles # 3.047 GHz + 145,558,224,840 instructions # 1.27 insn per cycle + 37.520892679 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:22248) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.147318e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.149735e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.149735e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.184252e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.186780e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.186780e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.219632 sec - 15,161,622,795 cycles # 2.903 GHz - 37,764,842,323 instructions # 2.49 insn per cycle - 5.223709363 seconds time elapsed +TOTAL : 5.159423 sec + 15,157,677,068 cycles # 2.936 GHz + 37,764,655,454 instructions # 2.49 insn per cycle + 5.163667570 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68446) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.909825e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.924704e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.924704e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.897428e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.912320e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.912320e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.082644 sec - 6,000,819,060 cycles # 2.878 GHz - 12,898,583,718 instructions # 2.15 insn per cycle - 2.086890157 seconds time elapsed +TOTAL : 2.086125 sec + 6,004,123,487 cycles # 2.873 GHz + 12,897,748,633 instructions # 2.15 insn per cycle + 2.090367475 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.309651e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.329767e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.329767e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.272095e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.293177e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.293177e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.770941 sec - 5,115,206,711 cycles # 2.883 GHz - 11,449,352,362 instructions # 2.24 insn per cycle - 1.775109224 seconds time elapsed +TOTAL : 1.778215 sec + 5,106,915,257 cycles # 2.866 GHz + 11,448,534,055 instructions # 2.24 insn per cycle + 1.782424367 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.980771e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.996490e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.996490e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.932893e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.948239e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.948239e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.064116 sec - 3,954,182,334 cycles # 1.913 GHz - 5,897,664,853 instructions # 1.49 insn per cycle - 2.068197382 seconds time elapsed +TOTAL : 2.076887 sec + 3,960,443,665 cycles # 1.904 GHz + 5,897,308,540 instructions # 1.49 insn per cycle + 2.081105689 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 660075297a..2550e0adc4 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:13:41 +DATE: 2023-10-29_23:01:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.339705e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.395741e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.400838e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.322174e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.374765e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.380198e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.475763 sec - 2,057,672,410 cycles # 2.986 GHz - 3,052,713,366 instructions # 1.48 insn per cycle - 0.746581939 seconds time elapsed +TOTAL : 0.475861 sec + 2,066,753,206 cycles # 3.001 GHz + 3,079,685,533 instructions # 1.49 insn per cycle + 0.746470599 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.555274e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.615884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.618479e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.527278e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.590065e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.592727e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.714580 sec - 5,941,986,620 cycles # 3.043 GHz - 11,613,062,987 instructions # 1.95 insn per cycle - 2.009554137 seconds time elapsed +TOTAL : 1.727640 sec + 5,892,330,784 cycles # 3.012 GHz + 11,643,535,001 instructions # 1.98 insn per cycle + 2.013344128 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.052769e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.053796e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.053796e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.064978e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.066007e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.066007e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.997684 sec - 24,647,826,294 cycles # 3.081 GHz - 78,133,404,833 instructions # 3.17 insn per cycle - 8.001567926 seconds time elapsed +TOTAL : 7.950627 sec + 24,639,973,311 cycles # 3.098 GHz + 78,133,272,515 instructions # 3.17 insn per cycle + 7.954643603 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.444330e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.458278e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.458278e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.481948e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.496066e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.496066e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.210880 sec - 6,475,959,596 cycles # 2.925 GHz - 20,124,226,877 instructions # 3.11 insn per cycle - 2.214850735 seconds time elapsed +TOTAL : 2.199609 sec + 6,472,284,957 cycles # 2.938 GHz + 20,124,192,482 instructions # 3.11 insn per cycle + 2.203660429 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.684620e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.691679e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.691679e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.688878e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.695997e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.695997e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.981226 sec - 2,838,391,865 cycles # 2.883 GHz - 6,991,632,737 instructions # 2.46 insn per cycle - 0.985252337 seconds time elapsed +TOTAL : 0.979124 sec + 2,835,302,738 cycles # 2.886 GHz + 6,991,499,113 instructions # 2.47 insn per cycle + 0.983232961 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.936572e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.945981e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.945981e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.934792e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.944433e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.944433e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.854580 sec - 2,487,675,347 cycles # 2.900 GHz - 6,298,671,273 instructions # 2.53 insn per cycle - 0.858538541 seconds time elapsed +TOTAL : 0.855339 sec + 2,488,802,833 cycles # 2.899 GHz + 6,299,019,671 instructions # 2.53 insn per cycle + 0.859314450 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.550707e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.556652e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.556652e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.553309e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.559451e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.559451e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.065338 sec - 2,046,941,871 cycles # 1.915 GHz - 3,268,840,945 instructions # 1.60 insn per cycle - 1.069371555 seconds time elapsed +TOTAL : 1.063776 sec + 2,046,725,428 cycles # 1.918 GHz + 3,268,827,002 instructions # 1.60 insn per cycle + 1.067732468 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 4c036c99c3..99764dd9c6 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:39:23 +DATE: 2023-10-29_23:29:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.664121e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.336972e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.336972e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.657882e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.324484e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.324484e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.466129 sec - 2,040,389,926 cycles # 3.006 GHz - 3,051,497,739 instructions # 1.50 insn per cycle - 0.738056578 seconds time elapsed +TOTAL : 0.463164 sec + 2,030,384,273 cycles # 3.017 GHz + 3,057,153,374 instructions # 1.51 insn per cycle + 0.730152926 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -65,6 +68,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -72,14 +76,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.282897e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.477379e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.477379e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.246179e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.485541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.485541e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.865020 sec - 6,414,143,218 cycles # 3.055 GHz - 12,782,815,652 instructions # 1.99 insn per cycle - 2.156968870 seconds time elapsed +TOTAL : 1.869143 sec + 6,439,845,410 cycles # 3.054 GHz + 13,291,763,279 instructions # 2.06 insn per cycle + 2.165242433 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -89,20 +93,21 @@ Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.043137e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.044146e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.044146e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.048042e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.049108e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.049108e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.037889 sec - 24,669,405,197 cycles # 3.070 GHz - 78,140,910,860 instructions # 3.17 insn per cycle - 8.041937243 seconds time elapsed +TOTAL : 8.018291 sec + 24,650,165,006 cycles # 3.073 GHz + 78,137,558,802 instructions # 3.17 insn per cycle + 8.022347939 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -116,20 +121,21 @@ Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.455019e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.469395e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.469395e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.184439e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.198380e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.198380e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.210219 sec - 6,484,247,623 cycles # 2.929 GHz - 20,133,288,157 instructions # 3.10 insn per cycle - 2.214394731 seconds time elapsed +TOTAL : 2.293305 sec + 6,489,676,835 cycles # 2.825 GHz + 20,133,677,992 instructions # 3.10 insn per cycle + 2.297604790 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -143,20 +149,21 @@ Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.682999e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.690944e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.690944e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.682303e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.689607e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.689607e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.984973 sec - 2,848,560,725 cycles # 2.882 GHz - 7,001,581,075 instructions # 2.46 insn per cycle - 0.989096241 seconds time elapsed +TOTAL : 0.985266 sec + 2,845,233,923 cycles # 2.877 GHz + 7,002,190,101 instructions # 2.46 insn per cycle + 0.989581277 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -170,20 +177,21 @@ Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.943649e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.953114e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.953114e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.933392e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.942827e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.942827e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.854370 sec - 2,496,393,077 cycles # 2.910 GHz - 6,308,615,210 instructions # 2.53 insn per cycle - 0.858619330 seconds time elapsed +TOTAL : 0.858559 sec + 2,499,743,037 cycles # 2.900 GHz + 6,308,867,727 instructions # 2.52 insn per cycle + 0.862640697 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -197,20 +205,21 @@ Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.563693e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.569738e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.569738e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.556514e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.562471e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.562471e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.059279 sec - 2,056,168,365 cycles # 1.935 GHz - 3,279,162,711 instructions # 1.59 insn per cycle - 1.063407139 seconds time elapsed +TOTAL : 1.063936 sec + 2,054,890,911 cycles # 1.926 GHz + 3,279,276,140 instructions # 1.60 insn per cycle + 1.068102252 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index c2530f89f8..9aadf1e49e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:50:40 +DATE: 2023-10-29_23:40:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.347743e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.395808e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.400916e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.360801e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.409705e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.415226e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.462025 sec - 2,013,008,518 cycles # 3.002 GHz - 3,022,893,991 instructions # 1.50 insn per cycle - 0.729380261 seconds time elapsed +TOTAL : 0.458838 sec + 2,014,922,139 cycles # 3.003 GHz + 2,968,563,369 instructions # 1.47 insn per cycle + 0.727778705 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.555420e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.624293e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.627291e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.551763e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.620574e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.623617e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.793765 sec - 6,172,368,596 cycles # 3.052 GHz - 12,517,770,206 instructions # 2.03 insn per cycle - 2.079455660 seconds time elapsed +TOTAL : 1.791509 sec + 6,204,146,397 cycles # 3.077 GHz + 12,314,042,171 instructions # 1.98 insn per cycle + 2.073334786 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.032399e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033363e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.033363e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.033598e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.034600e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.034600e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.079561 sec - 24,788,148,262 cycles # 3.067 GHz - 78,134,537,045 instructions # 3.15 insn per cycle - 8.083480982 seconds time elapsed +TOTAL : 8.074494 sec + 24,637,155,665 cycles # 3.051 GHz + 78,134,535,655 instructions # 3.17 insn per cycle + 8.078409262 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.472033e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.485911e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.485911e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.381443e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.395247e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.395247e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.203671 sec - 6,480,606,755 cycles # 2.937 GHz - 20,122,571,094 instructions # 3.11 insn per cycle - 2.207543629 seconds time elapsed +TOTAL : 2.231358 sec + 6,485,886,508 cycles # 2.905 GHz + 20,125,397,330 instructions # 3.10 insn per cycle + 2.235670022 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.687582e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.694663e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.694663e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.696486e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.703565e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.703565e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.981076 sec - 2,842,016,272 cycles # 2.890 GHz - 6,991,595,681 instructions # 2.46 insn per cycle - 0.985023868 seconds time elapsed +TOTAL : 0.975961 sec + 2,842,850,006 cycles # 2.903 GHz + 6,990,872,447 instructions # 2.46 insn per cycle + 0.979788752 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.927331e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.936367e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.936367e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.942937e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.952287e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.952287e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.859579 sec - 2,494,530,880 cycles # 2.891 GHz - 6,295,460,796 instructions # 2.52 insn per cycle - 0.863521439 seconds time elapsed +TOTAL : 0.853484 sec + 2,494,278,243 cycles # 2.911 GHz + 6,297,258,751 instructions # 2.52 insn per cycle + 0.857337152 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --common OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.562110e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.568108e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.568108e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.558330e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.564443e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.564443e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.058783 sec - 2,050,259,238 cycles # 1.930 GHz - 3,265,106,043 instructions # 1.59 insn per cycle - 1.062570526 seconds time elapsed +TOTAL : 1.061341 sec + 2,049,269,495 cycles # 1.925 GHz + 3,265,041,464 instructions # 1.59 insn per cycle + 1.065312777 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index bfdbf141be..8ffc8a4b0e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:47:28 +DATE: 2023-10-29_23:37:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.336678e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.385312e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.390845e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.359870e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.408459e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.413919e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.460742 sec - 2,007,672,018 cycles # 2.996 GHz - 3,015,512,037 instructions # 1.50 insn per cycle - 0.727627371 seconds time elapsed +TOTAL : 0.459399 sec + 2,014,984,352 cycles # 3.016 GHz + 3,031,573,414 instructions # 1.50 insn per cycle + 0.727065527 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.552495e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.620410e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.623354e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.554349e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.623648e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.626648e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.747301 sec - 6,021,047,961 cycles # 3.053 GHz - 13,020,410,161 instructions # 2.16 insn per cycle - 2.028955284 seconds time elapsed +TOTAL : 1.743056 sec + 6,019,438,540 cycles # 3.063 GHz + 11,904,365,698 instructions # 1.98 insn per cycle + 2.024061522 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.035464e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.036455e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.036455e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.060194e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.061198e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.061198e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.065414 sec - 24,625,631,166 cycles # 3.052 GHz - 78,134,037,282 instructions # 3.17 insn per cycle - 8.069287807 seconds time elapsed +TOTAL : 7.968397 sec + 24,657,666,535 cycles # 3.093 GHz + 78,133,764,363 instructions # 3.17 insn per cycle + 7.972247869 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.478619e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.492208e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.492208e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.474435e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.488457e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.488457e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.201405 sec - 6,476,687,165 cycles # 2.939 GHz - 20,124,779,920 instructions # 3.11 insn per cycle - 2.205319131 seconds time elapsed +TOTAL : 2.202643 sec + 6,473,343,175 cycles # 2.935 GHz + 20,124,043,111 instructions # 3.11 insn per cycle + 2.206628940 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.697170e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.704411e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.704411e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.668655e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.675400e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.675400e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.974215 sec - 2,837,748,371 cycles # 2.904 GHz - 6,991,679,189 instructions # 2.46 insn per cycle - 0.978049576 seconds time elapsed +TOTAL : 0.990850 sec + 2,839,325,240 cycles # 2.856 GHz + 6,991,575,836 instructions # 2.46 insn per cycle + 0.994896587 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.933335e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.942603e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.942603e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.869187e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.878346e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.878346e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.856002 sec - 2,491,154,256 cycles # 2.899 GHz - 6,299,351,258 instructions # 2.53 insn per cycle - 0.859913657 seconds time elapsed +TOTAL : 0.885448 sec + 2,488,789,244 cycles # 2.800 GHz + 6,298,807,707 instructions # 2.53 insn per cycle + 0.889708860 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --curhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.558338e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.564152e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.564152e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.560892e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.566889e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.566889e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.060294 sec - 2,047,251,638 cycles # 1.926 GHz - 3,268,699,792 instructions # 1.60 insn per cycle - 1.064226963 seconds time elapsed +TOTAL : 1.058353 sec + 2,046,415,009 cycles # 1.928 GHz + 3,268,898,492 instructions # 1.60 insn per cycle + 1.062296484 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 2a5bf9ccb4..1809c5bbfa 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,41 +37,44 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:44:21 +DATE: 2023-10-29_23:34:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.759670e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.404786e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.409990e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.769597e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.402152e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.407372e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.462005 sec - 2,013,506,806 cycles # 2.996 GHz - 2,990,269,144 instructions # 1.49 insn per cycle - 0.729870478 seconds time elapsed +TOTAL : 0.460989 sec + 2,020,137,019 cycles # 3.007 GHz + 3,045,447,444 instructions # 1.51 insn per cycle + 0.728139522 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.496196e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.623060e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.626145e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.513625e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.635748e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.638872e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.818626 sec - 6,254,482,556 cycles # 3.061 GHz - 13,143,194,522 instructions # 2.10 insn per cycle - 2.109333253 seconds time elapsed +TOTAL : 1.816261 sec + 6,253,507,004 cycles # 3.064 GHz + 13,207,865,239 instructions # 2.11 insn per cycle + 2.107003696 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -80,19 +84,20 @@ Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.047583e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.048599e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.048599e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.058799e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.059847e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.059847e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.017612 sec - 24,631,242,469 cycles # 3.071 GHz - 78,133,422,190 instructions # 3.17 insn per cycle - 8.021430029 seconds time elapsed +TOTAL : 7.974525 sec + 24,651,702,657 cycles # 3.092 GHz + 78,135,595,549 instructions # 3.17 insn per cycle + 7.978414186 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -106,19 +111,20 @@ Relative difference = 4.92840687132121e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.494353e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.508499e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.508499e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.413674e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.427119e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.427119e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.196377 sec - 6,482,396,911 cycles # 2.947 GHz - 20,124,098,871 instructions # 3.10 insn per cycle - 2.200446498 seconds time elapsed +TOTAL : 2.220147 sec + 6,475,013,625 cycles # 2.912 GHz + 20,124,108,640 instructions # 3.11 insn per cycle + 2.224169840 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -132,19 +138,20 @@ Relative difference = 2.2029847170826283e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.591759e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.598073e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.598073e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.699076e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.706343e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.706343e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.039253 sec - 2,839,846,615 cycles # 2.725 GHz - 6,991,737,789 instructions # 2.46 insn per cycle - 1.043348940 seconds time elapsed +TOTAL : 0.972965 sec + 2,833,755,146 cycles # 2.903 GHz + 6,991,226,206 instructions # 2.47 insn per cycle + 0.976886423 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -158,19 +165,20 @@ Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.936135e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.945522e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.945522e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.905927e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.914866e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.914866e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.854950 sec - 2,487,610,203 cycles # 2.898 GHz - 6,298,632,003 instructions # 2.53 insn per cycle - 0.858842248 seconds time elapsed +TOTAL : 0.868406 sec + 2,490,916,734 cycles # 2.858 GHz + 6,299,110,262 instructions # 2.53 insn per cycle + 0.872421287 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -184,19 +192,20 @@ Relative difference = 2.7544470208782633e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 --rmbhst OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.551589e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.557558e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.557558e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.540209e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.545935e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.545935e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.064952 sec - 2,046,768,081 cycles # 1.916 GHz - 3,268,598,641 instructions # 1.60 insn per cycle - 1.068818152 seconds time elapsed +TOTAL : 1.073003 sec + 2,047,997,550 cycles # 1.903 GHz + 3,268,606,033 instructions # 1.60 insn per cycle + 1.076897689 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index d385f70da3..628e905e0e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:14:09 +DATE: 2023-10-29_23:01:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.383891e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.429788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.434430e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.309702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.360225e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.365503e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.474349 sec - 2,059,881,761 cycles # 3.002 GHz - 3,024,062,536 instructions # 1.47 insn per cycle - 0.744716236 seconds time elapsed +TOTAL : 0.479838 sec + 2,082,146,805 cycles # 3.006 GHz + 3,133,924,846 instructions # 1.51 insn per cycle + 0.750267000 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.504646e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.564522e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.567260e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.575947e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.638817e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.641652e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.716199 sec - 5,829,417,693 cycles # 3.001 GHz - 12,466,869,357 instructions # 2.14 insn per cycle - 2.001872021 seconds time elapsed +TOTAL : 1.717190 sec + 5,940,356,621 cycles # 3.060 GHz + 12,714,538,766 instructions # 2.14 insn per cycle + 2.000578013 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.8371612387547027e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.067051e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.068151e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.068151e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.069561e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.070606e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.070606e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.942537 sec - 24,555,076,029 cycles # 3.091 GHz - 77,859,417,916 instructions # 3.17 insn per cycle - 7.946534296 seconds time elapsed +TOTAL : 7.932369 sec + 24,594,739,093 cycles # 3.099 GHz + 77,859,512,765 instructions # 3.17 insn per cycle + 7.936332823 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3113) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 5.630135835748959e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.613412e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.627645e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.627645e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.656788e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.671963e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.671963e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.161575 sec - 6,421,996,654 cycles # 2.967 GHz - 20,089,145,869 instructions # 3.13 insn per cycle - 2.165472003 seconds time elapsed +TOTAL : 2.149647 sec + 6,426,261,682 cycles # 2.985 GHz + 20,089,337,132 instructions # 3.13 insn per cycle + 2.153885954 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.211071647257023e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.646851e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.653670e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.653670e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.658839e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.665670e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.665670e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.003462 sec - 2,906,333,499 cycles # 2.887 GHz - 7,133,624,953 instructions # 2.45 insn per cycle - 1.007536703 seconds time elapsed +TOTAL : 0.996029 sec + 2,903,440,785 cycles # 2.905 GHz + 7,133,263,572 instructions # 2.46 insn per cycle + 1.000043624 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.841959e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.850853e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.850853e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.850795e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.859329e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.859329e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.897962 sec - 2,596,205,021 cycles # 2.881 GHz - 6,442,595,414 instructions # 2.48 insn per cycle - 0.902015019 seconds time elapsed +TOTAL : 0.893558 sec + 2,594,860,728 cycles # 2.894 GHz + 6,442,081,422 instructions # 2.48 insn per cycle + 0.897467277 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 5.008498817890231e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.487496e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.493089e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.493089e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.439166e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.444373e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.444373e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.110124 sec - 2,119,315,659 cycles # 1.903 GHz - 3,430,495,450 instructions # 1.62 insn per cycle - 1.114069574 seconds time elapsed +TOTAL : 1.147399 sec + 2,120,827,658 cycles # 1.843 GHz + 3,430,692,895 instructions # 1.62 insn per cycle + 1.151630204 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2912) (512y: 22) (512z: 9647) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 795c20d58d..2ffec24382 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:30:46 +DATE: 2023-10-29_23:20:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.587240e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.625455e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.629547e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.622301e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.663450e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.667993e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.481885 sec - 2,117,745,718 cycles # 3.003 GHz - 3,191,599,744 instructions # 1.51 insn per cycle - 0.764055287 seconds time elapsed +TOTAL : 0.481979 sec + 2,128,550,563 cycles # 3.010 GHz + 3,224,823,637 instructions # 1.52 insn per cycle + 0.764400517 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.697338e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.753855e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.756261e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.691781e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.751772e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.754417e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.849188 sec - 6,395,041,151 cycles # 3.062 GHz - 12,949,203,489 instructions # 2.02 insn per cycle - 2.144785110 seconds time elapsed +TOTAL : 1.854635 sec + 6,378,019,041 cycles # 3.051 GHz + 13,626,178,470 instructions # 2.14 insn per cycle + 2.147701941 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.836238137986709e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.857461e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.858298e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.858298e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.848557e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.849443e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.849443e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 28.007303 sec - 86,373,801,967 cycles # 3.084 GHz - 135,568,854,976 instructions # 1.57 insn per cycle - 28.011278361 seconds time elapsed +TOTAL : 28.049326 sec + 86,454,539,554 cycles # 3.082 GHz + 135,563,506,729 instructions # 1.57 insn per cycle + 28.053340432 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:15486) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 4.195614963669944e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.194621e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.207786e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.207786e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.843609e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.855341e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.855341e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.288323 sec - 6,770,534,097 cycles # 2.955 GHz - 19,387,402,589 instructions # 2.86 insn per cycle - 2.292435220 seconds time elapsed +TOTAL : 2.405505 sec + 6,781,976,354 cycles # 2.816 GHz + 19,387,648,512 instructions # 2.86 insn per cycle + 2.409654435 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69680) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 4.0849182767952624e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.505364e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.510997e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.510997e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.508642e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.514273e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.514273e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.097404 sec - 3,180,086,964 cycles # 2.889 GHz - 6,808,682,218 instructions # 2.14 insn per cycle - 1.101375815 seconds time elapsed +TOTAL : 1.095108 sec + 3,177,327,867 cycles # 2.893 GHz + 6,809,010,526 instructions # 2.14 insn per cycle + 1.099141144 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.816170e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.824092e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.824092e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.812604e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.820636e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.820636e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.910721 sec - 2,645,627,902 cycles # 2.895 GHz - 5,987,348,973 instructions # 2.26 insn per cycle - 0.914651942 seconds time elapsed +TOTAL : 0.912693 sec + 2,652,876,144 cycles # 2.896 GHz + 5,986,819,747 instructions # 2.26 insn per cycle + 0.916660495 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.3520194007978538e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.416573e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.421711e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.421711e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.529382e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.535256e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.535256e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.166189 sec - 2,076,570,720 cycles # 1.775 GHz - 3,501,568,805 instructions # 1.69 insn per cycle - 1.170642712 seconds time elapsed +TOTAL : 1.080553 sec + 2,075,263,732 cycles # 1.915 GHz + 3,501,293,642 instructions # 1.69 insn per cycle + 1.084542679 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5198) (512y: 3) (512z:44822) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index f048f77dfa..ccddbcb261 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:31:37 +DATE: 2023-10-29_23:21:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.544850e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.582821e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.586998e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.542926e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.583869e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.588032e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.483630 sec - 2,128,300,469 cycles # 3.015 GHz - 3,245,581,722 instructions # 1.52 insn per cycle - 0.765205817 seconds time elapsed +TOTAL : 0.483025 sec + 2,121,528,102 cycles # 3.016 GHz + 3,204,060,218 instructions # 1.51 insn per cycle + 0.763343575 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.646108e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.701977e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.704355e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.643855e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.701871e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.704319e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.855036 sec - 6,392,715,962 cycles # 3.063 GHz - 13,400,099,020 instructions # 2.10 insn per cycle - 2.145671488 seconds time elapsed +TOTAL : 1.857658 sec + 6,390,679,996 cycles # 3.057 GHz + 13,951,131,700 instructions # 2.18 insn per cycle + 2.149753987 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.836238137986709e-05 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.864707e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.865536e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.865536e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.871054e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.871882e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.871882e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.971924 sec - 86,103,749,943 cycles # 3.078 GHz - 135,903,748,742 instructions # 1.58 insn per cycle - 27.975875216 seconds time elapsed +TOTAL : 27.942204 sec + 86,129,095,595 cycles # 3.082 GHz + 135,903,968,419 instructions # 1.58 insn per cycle + 27.946202072 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:15910) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 4.0361421941458736e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.109444e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.121851e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.121851e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.102139e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.115090e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.115090e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.314460 sec - 6,844,368,116 cycles # 2.953 GHz - 19,439,406,048 instructions # 2.84 insn per cycle - 2.318438191 seconds time elapsed +TOTAL : 2.317068 sec + 6,849,367,008 cycles # 2.952 GHz + 19,439,508,900 instructions # 2.84 insn per cycle + 2.321301652 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69722) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 4.170542995014107e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.543754e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.549522e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.549522e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.550639e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.556581e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.556581e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.070077 sec - 3,106,170,951 cycles # 2.894 GHz - 6,719,782,927 instructions # 2.16 insn per cycle - 1.073993053 seconds time elapsed +TOTAL : 1.065503 sec + 3,104,717,730 cycles # 2.905 GHz + 6,719,677,292 instructions # 2.16 insn per cycle + 1.069490769 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.835006e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.843354e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.843354e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.830158e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.838511e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.838511e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.901457 sec - 2,624,433,972 cycles # 2.901 GHz - 5,970,449,410 instructions # 2.27 insn per cycle - 0.905462557 seconds time elapsed +TOTAL : 0.903933 sec + 2,625,287,611 cycles # 2.893 GHz + 5,970,265,330 instructions # 2.27 insn per cycle + 0.908100788 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.4912983202981302e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.532980e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.538727e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.538727e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.532935e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.538602e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.538602e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.077395 sec - 2,079,358,899 cycles # 1.924 GHz - 3,494,735,958 instructions # 1.68 insn per cycle - 1.081309612 seconds time elapsed +TOTAL : 1.077860 sec + 2,081,643,160 cycles # 1.926 GHz + 3,494,996,935 instructions # 1.68 insn per cycle + 1.081878939 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4162) (512y: 4) (512z:44465) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index e38e343007..18cdd9e817 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:14:38 +DATE: 2023-10-29_23:02:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.484533e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.509488e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.511369e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.507203e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.532823e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.534908e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.524873 sec - 2,222,686,446 cycles # 2.943 GHz - 3,555,209,130 instructions # 1.60 insn per cycle - 0.817130396 seconds time elapsed +TOTAL : 0.519965 sec + 2,259,116,977 cycles # 3.016 GHz + 3,550,677,907 instructions # 1.57 insn per cycle + 0.808664205 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.129234e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.157079e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.158259e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.129982e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.158539e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.159767e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.022745 sec - 9,960,554,491 cycles # 3.045 GHz - 21,965,328,321 instructions # 2.21 insn per cycle - 3.330581683 seconds time elapsed +TOTAL : 3.020683 sec + 9,742,552,779 cycles # 2.978 GHz + 22,171,425,890 instructions # 2.28 insn per cycle + 3.330856073 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.955541e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.956517e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.956517e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.954603e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.955508e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.955508e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.395958 sec - 25,922,435,513 cycles # 3.087 GHz - 79,445,850,764 instructions # 3.06 insn per cycle - 8.400030562 seconds time elapsed +TOTAL : 8.399933 sec + 25,932,121,674 cycles # 3.087 GHz + 79,444,894,057 instructions # 3.06 insn per cycle + 8.404035787 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4857) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.710113e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.713502e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.713502e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.775508e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.778880e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.778880e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.429333 sec - 12,654,536,981 cycles # 2.855 GHz - 38,554,693,632 instructions # 3.05 insn per cycle - 4.433473344 seconds time elapsed +TOTAL : 4.353141 sec + 12,647,497,564 cycles # 2.903 GHz + 38,553,800,907 instructions # 3.05 insn per cycle + 4.357433600 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13161) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.603195e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.621501e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.621501e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.666348e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.684102e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.684102e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.915640 sec - 5,512,650,774 cycles # 2.872 GHz - 13,483,893,647 instructions # 2.45 insn per cycle - 1.920241775 seconds time elapsed +TOTAL : 1.901675 sec + 5,517,462,587 cycles # 2.896 GHz + 13,483,872,666 instructions # 2.44 insn per cycle + 1.905984462 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.769210e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.792381e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.792381e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.813799e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.836969e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.836969e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.687921 sec - 4,877,838,805 cycles # 2.884 GHz - 12,140,748,552 instructions # 2.49 insn per cycle - 1.692215253 seconds time elapsed +TOTAL : 1.680364 sec + 4,866,264,721 cycles # 2.890 GHz + 12,140,855,849 instructions # 2.49 insn per cycle + 1.684505888 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.633090e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.647067e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.647067e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.626772e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.640752e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.640752e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.157889 sec - 4,141,783,103 cycles # 1.917 GHz - 6,338,750,212 instructions # 1.53 insn per cycle - 2.161967975 seconds time elapsed +TOTAL : 2.159806 sec + 4,141,315,567 cycles # 1.915 GHz + 6,338,871,155 instructions # 1.53 insn per cycle + 2.164035991 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1802) (512y: 93) (512z: 9358) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index d11dc95778..a91e61ac33 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_12:15:14 +DATE: 2023-10-29_23:02:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.478559e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.503352e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.505753e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.484779e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.510802e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.512835e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.520804 sec - 2,184,791,372 cycles # 2.905 GHz - 3,376,581,256 instructions # 1.55 insn per cycle - 0.811785754 seconds time elapsed +TOTAL : 0.520622 sec + 2,247,347,861 cycles # 3.005 GHz + 3,405,515,964 instructions # 1.52 insn per cycle + 0.810033779 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.149150e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.177326e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.178490e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.131849e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.160416e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.161588e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.012932 sec - 9,968,846,939 cycles # 3.058 GHz - 20,627,018,072 instructions # 2.07 insn per cycle - 3.318612821 seconds time elapsed +TOTAL : 3.012933 sec + 10,067,953,075 cycles # 3.072 GHz + 22,554,266,954 instructions # 2.24 insn per cycle + 3.332777337 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.961292e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.962230e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.962230e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.963198e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.964177e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.964177e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.371207 sec - 25,929,706,357 cycles # 3.097 GHz - 79,453,596,939 instructions # 3.06 insn per cycle - 8.375366432 seconds time elapsed +TOTAL : 8.362399 sec + 25,931,586,977 cycles # 3.100 GHz + 79,453,998,538 instructions # 3.06 insn per cycle + 8.366440006 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4504) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.718438e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.721949e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.721949e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.702418e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.705684e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.705684e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.419301 sec - 12,657,926,561 cycles # 2.863 GHz - 38,526,904,585 instructions # 3.04 insn per cycle - 4.423427459 seconds time elapsed +TOTAL : 4.438259 sec + 12,658,801,526 cycles # 2.851 GHz + 38,526,780,330 instructions # 3.04 insn per cycle + 4.442516538 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:12928) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.548085e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.565730e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.565730e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.609178e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.626314e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.626314e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.927664 sec - 5,555,221,826 cycles # 2.877 GHz - 13,611,096,098 instructions # 2.45 insn per cycle - 1.931892207 seconds time elapsed +TOTAL : 1.914007 sec + 5,552,680,604 cycles # 2.896 GHz + 13,609,436,822 instructions # 2.45 insn per cycle + 1.918108421 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.741019e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.763202e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.763202e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.728302e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.751459e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.751459e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.692726 sec - 4,914,547,918 cycles # 2.897 GHz - 12,273,936,309 instructions # 2.50 insn per cycle - 1.696947655 seconds time elapsed +TOTAL : 1.708232 sec + 4,915,904,141 cycles # 2.894 GHz + 12,276,160,328 instructions # 2.50 insn per cycle + 1.712407371 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.519554e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.534279e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.534279e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.302364e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.315642e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.315642e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.190306 sec - 4,149,749,215 cycles # 1.893 GHz - 6,445,825,629 instructions # 1.55 insn per cycle - 2.194442795 seconds time elapsed +TOTAL : 2.255296 sec + 4,145,511,406 cycles # 1.835 GHz + 6,445,337,128 instructions # 1.55 insn per cycle + 2.259721639 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1627) (512y: 191) (512z: 9356) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 09b6b6cbc2..891912d002 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_12:17:30 +DATE: 2023-10-29_23:04:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.070904e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.071339e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.071439e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.067375e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.067765e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.067910e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.419329 sec - 8,401,022,008 cycles # 3.070 GHz - 18,446,465,487 instructions # 2.20 insn per cycle - 2.795741903 seconds time elapsed +TOTAL : 2.420440 sec + 8,330,926,782 cycles # 3.027 GHz + 17,804,207,702 instructions # 2.14 insn per cycle + 2.808869723 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.274265e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.276198e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.276405e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.234125e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.236107e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.236370e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.988228 sec - 13,257,455,915 cycles # 3.077 GHz - 28,856,097,279 instructions # 2.18 insn per cycle - 4.367813074 seconds time elapsed +TOTAL : 3.987841 sec + 13,233,324,917 cycles # 3.068 GHz + 29,085,210,527 instructions # 2.20 insn per cycle + 4.369016357 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.432562e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.432797e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.432797e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.361191e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.361428e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.361428e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.260779 sec - 18,791,566,046 cycles # 3.000 GHz - 53,915,354,209 instructions # 2.87 insn per cycle - 6.264697174 seconds time elapsed +TOTAL : 6.328766 sec + 18,835,837,198 cycles # 2.975 GHz + 53,915,472,988 instructions # 2.86 insn per cycle + 6.332862012 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.653371e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.653459e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.653459e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.665611e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.665699e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.665699e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.198531 sec - 9,852,246,030 cycles # 3.078 GHz - 27,093,859,659 instructions # 2.75 insn per cycle - 3.202672159 seconds time elapsed +TOTAL : 3.175599 sec + 9,816,016,304 cycles # 3.089 GHz + 27,093,946,906 instructions # 2.76 insn per cycle + 3.179704503 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.591620e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.592034e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.592034e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.574862e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.575309e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.575309e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.474879 sec - 4,234,752,993 cycles # 2.865 GHz - 9,561,163,315 instructions # 2.26 insn per cycle - 1.478749190 seconds time elapsed +TOTAL : 1.484453 sec + 4,283,307,071 cycles # 2.879 GHz + 9,561,322,868 instructions # 2.23 insn per cycle + 1.488611761 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.887602e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.888172e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.888172e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.098539e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.099104e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.099104e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.364222 sec - 3,707,284,516 cycles # 2.711 GHz - 8,485,408,067 instructions # 2.29 insn per cycle - 1.368451998 seconds time elapsed +TOTAL : 1.293453 sec + 3,744,455,330 cycles # 2.888 GHz + 8,485,338,626 instructions # 2.27 insn per cycle + 1.297331235 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.751042e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.751588e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.751588e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.736334e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.737024e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.737024e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.413671 sec - 2,691,576,824 cycles # 1.900 GHz - 4,272,725,454 instructions # 1.59 insn per cycle - 1.417549502 seconds time elapsed +TOTAL : 1.418268 sec + 2,698,588,731 cycles # 1.899 GHz + 4,273,475,075 instructions # 1.58 insn per cycle + 1.422259166 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 4ed3f7613b..779aff9608 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_12:39:52 +DATE: 2023-10-29_23:29:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.066172e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.067055e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.067055e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.065298e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.066291e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.066291e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.361362 sec - 8,249,792,379 cycles # 3.082 GHz - 18,714,988,961 instructions # 2.27 insn per cycle - 2.737283147 seconds time elapsed +TOTAL : 2.356632 sec + 8,208,405,954 cycles # 3.067 GHz + 18,386,996,274 instructions # 2.24 insn per cycle + 2.732467891 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) @@ -65,6 +68,7 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -72,14 +76,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.194613e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.227944e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.227944e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.233933e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.265435e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.265435e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.985972 sec - 13,161,697,114 cycles # 3.051 GHz - 27,638,979,539 instructions # 2.10 insn per cycle - 4.373152230 seconds time elapsed +TOTAL : 3.977046 sec + 12,960,052,062 cycles # 3.016 GHz + 26,871,950,594 instructions # 2.07 insn per cycle + 4.356794911 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -89,20 +93,21 @@ Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.218003e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.218227e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.218227e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.131686e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.131907e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.131907e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.421574 sec - 18,985,323,768 cycles # 2.956 GHz - 53,918,389,087 instructions # 2.84 insn per cycle - 6.425486302 seconds time elapsed +TOTAL : 6.503787 sec + 18,827,889,283 cycles # 2.894 GHz + 53,917,168,926 instructions # 2.86 insn per cycle + 6.507727499 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -116,20 +121,21 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.664145e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.664234e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.664234e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.661819e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.661909e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.661909e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.177725 sec - 9,870,540,766 cycles # 3.103 GHz - 27,093,857,072 instructions # 2.74 insn per cycle - 3.181670599 seconds time elapsed +TOTAL : 3.182091 sec + 9,868,729,705 cycles # 3.098 GHz + 27,093,635,927 instructions # 2.75 insn per cycle + 3.186061824 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -143,20 +149,21 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.612290e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.612737e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.612737e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.594572e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.594989e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.594989e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.466827 sec - 4,247,445,086 cycles # 2.892 GHz - 9,562,690,361 instructions # 2.25 insn per cycle - 1.470661148 seconds time elapsed +TOTAL : 1.473792 sec + 4,242,652,846 cycles # 2.872 GHz + 9,562,224,257 instructions # 2.25 insn per cycle + 1.477842985 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -170,20 +177,21 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.128508e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.129106e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.129106e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.925609e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.926127e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.926127e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.284387 sec - 3,711,655,436 cycles # 2.883 GHz - 8,486,499,784 instructions # 2.29 insn per cycle - 1.288300158 seconds time elapsed +TOTAL : 1.349109 sec + 3,712,707,805 cycles # 2.746 GHz + 8,486,481,321 instructions # 2.29 insn per cycle + 1.352971004 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -197,20 +205,21 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.750791e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.751396e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.751396e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.766105e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.766661e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.766661e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.413171 sec - 2,694,525,552 cycles # 1.903 GHz - 4,274,309,180 instructions # 1.59 insn per cycle - 1.417173229 seconds time elapsed +TOTAL : 1.407314 sec + 2,697,937,870 cycles # 1.913 GHz + 4,273,842,399 instructions # 1.58 insn per cycle + 1.411205772 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 3b451f2364..0a16c68e9b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_12:18:33 +DATE: 2023-10-29_23:06:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.064594e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.064974e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.065068e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.064560e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.064977e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.065132e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.421165 sec - 8,377,767,733 cycles # 3.057 GHz - 18,112,400,064 instructions # 2.16 insn per cycle - 2.797669036 seconds time elapsed +TOTAL : 2.424959 sec + 8,290,876,355 cycles # 3.020 GHz + 17,883,558,055 instructions # 2.16 insn per cycle + 2.804470986 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.253070e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.254944e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.255199e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.280396e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.282381e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.282562e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.994421 sec - 13,218,747,295 cycles # 3.060 GHz - 30,671,321,018 instructions # 2.32 insn per cycle - 4.375241473 seconds time elapsed +TOTAL : 3.980459 sec + 13,239,563,134 cycles # 3.078 GHz + 30,624,142,528 instructions # 2.31 insn per cycle + 4.360149338 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.699023e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.699265e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.699265e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.553878e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.554116e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.554116e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.076584 sec - 18,738,838,490 cycles # 3.082 GHz - 53,924,142,173 instructions # 2.88 insn per cycle - 6.080511022 seconds time elapsed +TOTAL : 6.194684 sec + 18,952,500,242 cycles # 3.058 GHz + 53,924,708,560 instructions # 2.85 insn per cycle + 6.198735883 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.662035e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.662125e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.662125e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.665149e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.665236e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.665236e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.182171 sec - 9,798,645,272 cycles # 3.077 GHz - 27,091,008,920 instructions # 2.76 insn per cycle - 3.186200458 seconds time elapsed +TOTAL : 3.175912 sec + 9,816,369,770 cycles # 3.088 GHz + 27,089,777,031 instructions # 2.76 insn per cycle + 3.179926705 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.595306e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.595723e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.595723e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.605988e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.606417e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.606417e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.473366 sec - 4,252,117,926 cycles # 2.880 GHz - 9,561,777,933 instructions # 2.25 insn per cycle - 1.477305688 seconds time elapsed +TOTAL : 1.469433 sec + 4,254,356,021 cycles # 2.889 GHz + 9,561,341,195 instructions # 2.25 insn per cycle + 1.473424767 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.146612e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.147215e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.147215e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.146642e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.147225e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.147225e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.278368 sec - 3,696,184,702 cycles # 2.884 GHz - 8,485,068,150 instructions # 2.30 insn per cycle - 1.282315101 seconds time elapsed +TOTAL : 1.278652 sec + 3,698,810,481 cycles # 2.885 GHz + 8,485,246,341 instructions # 2.29 insn per cycle + 1.282579911 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.746863e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.747439e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.747439e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.745293e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.745840e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.745840e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.414600 sec - 2,691,872,612 cycles # 1.899 GHz - 4,276,070,886 instructions # 1.59 insn per cycle - 1.418491985 seconds time elapsed +TOTAL : 1.415634 sec + 2,695,445,463 cycles # 1.900 GHz + 4,276,122,167 instructions # 1.59 insn per cycle + 1.419604673 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 8847407ea7..a61eaa07ac 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_12:19:35 +DATE: 2023-10-29_23:07:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.748205e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.749064e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.749368e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.746580e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.747476e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.747723e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.657323 sec - 5,883,101,931 cycles # 3.046 GHz - 12,231,013,529 instructions # 2.08 insn per cycle - 1.988512157 seconds time elapsed +TOTAL : 1.661004 sec + 5,893,714,693 cycles # 3.049 GHz + 11,380,357,662 instructions # 1.93 insn per cycle + 1.992297624 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.320925e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.321576e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.321673e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.300856e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.301560e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.301692e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.909241 sec - 6,669,861,142 cycles # 3.055 GHz - 14,651,528,210 instructions # 2.20 insn per cycle - 2.242162305 seconds time elapsed +TOTAL : 1.915327 sec + 6,706,835,190 cycles # 3.061 GHz + 13,746,825,081 instructions # 2.05 insn per cycle + 2.247074611 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.053372e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.053641e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.053641e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.876487e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.876755e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.876755e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.836144 sec - 17,844,888,779 cycles # 3.057 GHz - 53,590,137,383 instructions # 3.00 insn per cycle - 5.839998800 seconds time elapsed +TOTAL : 5.948808 sec + 17,851,291,221 cycles # 3.000 GHz + 53,589,394,022 instructions # 3.00 insn per cycle + 5.952703310 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.519009e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.519443e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.519443e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.592563e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.593075e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.593075e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.504216 sec - 4,560,264,533 cycles # 3.025 GHz - 13,762,435,708 instructions # 3.02 insn per cycle - 1.508179950 seconds time elapsed +TOTAL : 1.474526 sec + 4,564,867,274 cycles # 3.089 GHz + 13,762,394,850 instructions # 3.01 insn per cycle + 1.478506345 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.166323e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.168014e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.168014e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.253655e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.255407e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.255407e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.741671 sec - 2,140,718,629 cycles # 2.873 GHz - 4,816,751,289 instructions # 2.25 insn per cycle - 0.745632802 seconds time elapsed +TOTAL : 0.733937 sec + 2,134,486,559 cycles # 2.895 GHz + 4,816,669,045 instructions # 2.26 insn per cycle + 0.737877141 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.250142e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.252420e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.252420e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.237971e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.240196e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.240196e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.645201 sec - 1,871,116,487 cycles # 2.886 GHz - 4,273,766,880 instructions # 2.28 insn per cycle - 0.649139975 seconds time elapsed +TOTAL : 0.645797 sec + 1,869,947,742 cycles # 2.881 GHz + 4,273,861,309 instructions # 2.29 insn per cycle + 0.649761140 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.519444e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.521804e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.521804e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.544900e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.547370e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.547370e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.707511 sec - 1,353,512,664 cycles # 1.905 GHz - 2,158,758,417 instructions # 1.59 insn per cycle - 0.711344847 seconds time elapsed +TOTAL : 0.706355 sec + 1,353,540,790 cycles # 1.907 GHz + 2,158,526,163 instructions # 1.59 insn per cycle + 0.710313630 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 413ba84a91..b589ac844d 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_12:40:54 +DATE: 2023-10-29_23:30:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.801150e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.802938e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.802938e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.797174e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.798921e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.798921e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.589274 sec - 5,679,053,908 cycles # 3.038 GHz - 12,232,335,932 instructions # 2.15 insn per cycle - 1.926217189 seconds time elapsed +TOTAL : 1.593040 sec + 5,715,261,288 cycles # 3.064 GHz + 12,320,056,102 instructions # 2.16 insn per cycle + 1.922596959 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) @@ -65,6 +68,7 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -72,14 +76,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.293744e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.306424e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.306424e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.325074e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.337787e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.337787e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.865258 sec - 6,453,622,607 cycles # 3.013 GHz - 13,520,931,367 instructions # 2.10 insn per cycle - 2.202906400 seconds time elapsed +TOTAL : 1.872710 sec + 6,507,354,412 cycles # 3.033 GHz + 12,800,799,336 instructions # 1.97 insn per cycle + 2.202472280 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -89,20 +93,21 @@ Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.142136e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.142409e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.142409e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.085047e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.085375e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.085375e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.780398 sec - 17,852,413,963 cycles # 3.087 GHz - 53,589,586,977 instructions # 3.00 insn per cycle - 5.784180836 seconds time elapsed +TOTAL : 5.815218 sec + 17,878,109,599 cycles # 3.073 GHz + 53,590,970,232 instructions # 3.00 insn per cycle + 5.819062963 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe @@ -116,20 +121,21 @@ Relative difference = 2.1197698286506752e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.602113e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.602529e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.602529e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.598031e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.598474e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.598474e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.471274 sec - 4,560,991,212 cycles # 3.093 GHz - 13,763,276,208 instructions # 3.02 insn per cycle - 1.475196467 seconds time elapsed +TOTAL : 1.472581 sec + 4,566,761,974 cycles # 3.095 GHz + 13,763,691,081 instructions # 3.01 insn per cycle + 1.476554455 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe @@ -143,20 +149,21 @@ Relative difference = 3.151388282563952e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.236655e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.238396e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.238396e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.210371e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.212129e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.212129e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.735565 sec - 2,141,109,995 cycles # 2.900 GHz - 4,817,785,277 instructions # 2.25 insn per cycle - 0.739384819 seconds time elapsed +TOTAL : 0.737467 sec + 2,136,565,892 cycles # 2.884 GHz + 4,818,072,795 instructions # 2.26 insn per cycle + 0.741455543 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe @@ -170,20 +177,21 @@ Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.288287e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.290822e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.290822e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.265438e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.267751e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.267751e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.641860 sec - 1,869,918,713 cycles # 2.899 GHz - 4,274,756,971 instructions # 2.29 insn per cycle - 0.645746716 seconds time elapsed +TOTAL : 0.644070 sec + 1,873,290,392 cycles # 2.894 GHz + 4,274,881,772 instructions # 2.28 insn per cycle + 0.647956211 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe @@ -197,20 +205,21 @@ Relative difference = 1.858823877057982e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 2 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.520397e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.522901e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.522901e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.512610e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.515015e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.515015e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.708206 sec - 1,353,775,636 cycles # 1.903 GHz - 2,159,770,339 instructions # 1.60 insn per cycle - 0.712072895 seconds time elapsed +TOTAL : 0.708003 sec + 1,353,240,321 cycles # 1.902 GHz + 2,159,591,869 instructions # 1.60 insn per cycle + 0.711943065 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index ebccc0c834..6a1d91cc81 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_12:20:22 +DATE: 2023-10-29_23:07:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.753299e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.754100e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.754395e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.750398e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.751240e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.751507e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.655949 sec - 5,893,067,214 cycles # 3.058 GHz - 12,179,862,167 instructions # 2.07 insn per cycle - 1.986764530 seconds time elapsed +TOTAL : 1.657470 sec + 5,914,573,027 cycles # 3.060 GHz + 12,635,679,749 instructions # 2.14 insn per cycle + 1.989578773 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.323799e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.324454e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.324538e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.319293e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.319958e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.320039e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.911647 sec - 6,676,941,639 cycles # 3.054 GHz - 14,605,352,455 instructions # 2.19 insn per cycle - 2.243360479 seconds time elapsed +TOTAL : 1.904048 sec + 6,719,558,763 cycles # 3.078 GHz + 14,445,072,539 instructions # 2.15 insn per cycle + 2.238962809 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 0.0021934350433631634 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.994054e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.994324e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.994324e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.960737e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.961007e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.961007e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.872133 sec - 17,902,279,003 cycles # 3.048 GHz - 53,581,437,653 instructions # 2.99 insn per cycle - 5.876005280 seconds time elapsed +TOTAL : 5.894682 sec + 17,919,993,214 cycles # 3.038 GHz + 53,579,919,039 instructions # 2.99 insn per cycle + 5.898545332 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20206) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 2.1198118933954545e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.562387e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.562823e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.562823e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.600672e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.601097e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.601097e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.486270 sec - 4,581,432,807 cycles # 3.076 GHz - 13,755,740,802 instructions # 3.00 insn per cycle - 1.490385140 seconds time elapsed +TOTAL : 1.471215 sec + 4,553,870,476 cycles # 3.092 GHz + 13,755,590,686 instructions # 3.02 insn per cycle + 1.475236912 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 3.151694379513441e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.182291e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.184034e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.184034e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.718983e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.720596e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.720596e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.739624 sec - 2,146,683,166 cycles # 2.889 GHz - 4,819,009,119 instructions # 2.24 insn per cycle - 0.743638234 seconds time elapsed +TOTAL : 0.791741 sec + 2,154,835,228 cycles # 2.710 GHz + 4,819,009,366 instructions # 2.24 insn per cycle + 0.795908499 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.147676e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.149876e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.149876e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.270564e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.272850e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.272850e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.652733 sec - 1,877,606,148 cycles # 2.862 GHz - 4,275,907,242 instructions # 2.28 insn per cycle - 0.656694870 seconds time elapsed +TOTAL : 0.643390 sec + 1,877,734,991 cycles # 2.903 GHz + 4,275,647,592 instructions # 2.28 insn per cycle + 0.647331184 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 1.8588234562202478e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.261873e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.264108e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.264108e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.500722e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.502983e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.502983e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.733532 sec - 1,359,235,476 cycles # 1.844 GHz - 2,164,942,486 instructions # 1.59 insn per cycle - 0.737505632 seconds time elapsed +TOTAL : 0.708980 sec + 1,359,249,206 cycles # 1.908 GHz + 2,164,821,480 instructions # 1.59 insn per cycle + 0.712779208 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3475) (512y: 34) (512z:79492) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index e0163c34ba..d35904f255 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_12:21:08 +DATE: 2023-10-29_23:08:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.683760e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.684333e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.684498e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.693455e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.694040e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.694151e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.167476 sec - 7,602,719,129 cycles # 3.057 GHz - 15,576,720,436 instructions # 2.05 insn per cycle - 2.543599173 seconds time elapsed +TOTAL : 2.165996 sec + 7,604,287,841 cycles # 3.058 GHz + 15,250,589,739 instructions # 2.01 insn per cycle + 2.543560163 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.112366e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.112635e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.112665e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.112978e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.113253e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113284e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.396701 sec - 11,407,687,365 cycles # 3.067 GHz - 25,837,749,358 instructions # 2.26 insn per cycle - 3.776238208 seconds time elapsed +TOTAL : 3.396804 sec + 11,372,878,062 cycles # 3.056 GHz + 26,091,634,911 instructions # 2.29 insn per cycle + 3.777850660 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.003883e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.004091e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.004091e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.056300e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.056513e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.056513e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.602461 sec - 19,113,434,872 cycles # 2.894 GHz - 54,152,340,955 instructions # 2.83 insn per cycle - 6.606261064 seconds time elapsed +TOTAL : 6.562282 sec + 19,137,224,572 cycles # 2.915 GHz + 54,152,596,150 instructions # 2.83 insn per cycle + 6.566118044 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32066) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.622659e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.622745e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.622745e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.614488e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.614584e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.614584e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.260329 sec - 9,430,303,290 cycles # 2.890 GHz - 26,158,904,223 instructions # 2.77 insn per cycle - 3.264308908 seconds time elapsed +TOTAL : 3.275480 sec + 9,428,014,238 cycles # 2.876 GHz + 26,160,822,763 instructions # 2.77 insn per cycle + 3.279507093 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96005) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.791270e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.791761e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.791761e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.792295e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.792790e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.792790e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.398894 sec - 4,041,947,049 cycles # 2.883 GHz - 9,227,684,782 instructions # 2.28 insn per cycle - 1.402820243 seconds time elapsed +TOTAL : 1.397176 sec + 4,040,299,499 cycles # 2.884 GHz + 9,227,715,152 instructions # 2.28 insn per cycle + 1.401325201 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.375691e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.376429e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.376429e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.363999e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.364611e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.364611e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.212124 sec - 3,516,816,252 cycles # 2.893 GHz - 8,175,115,382 instructions # 2.32 insn per cycle - 1.216050239 seconds time elapsed +TOTAL : 1.216328 sec + 3,520,985,317 cycles # 2.887 GHz + 8,175,010,783 instructions # 2.32 insn per cycle + 1.220282338 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.662873e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.663431e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.663431e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.797607e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.798256e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.798256e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.445998 sec - 2,655,437,043 cycles # 1.832 GHz - 4,154,909,564 instructions # 1.56 insn per cycle - 1.450034641 seconds time elapsed +TOTAL : 1.395271 sec + 2,657,055,348 cycles # 1.900 GHz + 4,154,616,344 instructions # 1.56 insn per cycle + 1.399324691 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2045) (512y: 93) (512z:78760) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index dae5283598..82205091a1 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_12:22:09 +DATE: 2023-10-29_23:09:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.683591e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.684087e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.684212e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.672183e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.672714e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.672860e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.168416 sec - 7,615,842,573 cycles # 3.061 GHz - 16,645,936,739 instructions # 2.19 insn per cycle - 2.545194192 seconds time elapsed +TOTAL : 2.170042 sec + 7,549,016,338 cycles # 3.030 GHz + 16,754,265,205 instructions # 2.22 insn per cycle + 2.548274846 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.109020e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.109288e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.109323e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.108308e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.108579e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108609e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.403324 sec - 11,443,355,704 cycles # 3.063 GHz - 26,118,564,830 instructions # 2.28 insn per cycle - 3.793005998 seconds time elapsed +TOTAL : 3.405791 sec + 11,417,750,948 cycles # 3.062 GHz + 24,521,841,592 instructions # 2.15 insn per cycle + 3.785623947 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.020350e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.020572e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.020572e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.985475e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.985690e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.985690e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.587484 sec - 19,062,995,670 cycles # 2.892 GHz - 54,153,638,640 instructions # 2.84 insn per cycle - 6.591376376 seconds time elapsed +TOTAL : 6.617999 sec + 19,125,113,032 cycles # 2.889 GHz + 54,154,595,717 instructions # 2.83 insn per cycle + 6.621882562 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32243) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.630436e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.630523e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.630523e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.625621e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.625713e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.625713e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.243832 sec - 9,385,283,319 cycles # 2.891 GHz - 26,078,617,665 instructions # 2.78 insn per cycle - 3.247970890 seconds time elapsed +TOTAL : 3.253373 sec + 9,379,546,230 cycles # 2.880 GHz + 26,078,178,786 instructions # 2.78 insn per cycle + 3.257296595 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:95899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.765682e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.766144e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.766144e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.735058e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.735513e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.735513e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.407556 sec - 4,070,347,952 cycles # 2.885 GHz - 9,213,853,616 instructions # 2.26 insn per cycle - 1.411538882 seconds time elapsed +TOTAL : 1.420184 sec + 4,100,951,658 cycles # 2.883 GHz + 9,213,534,111 instructions # 2.25 insn per cycle + 1.424137554 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.329937e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.330543e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.330543e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.337906e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.338510e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.338510e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.224392 sec - 3,539,648,643 cycles # 2.883 GHz - 8,168,030,339 instructions # 2.31 insn per cycle - 1.228230843 seconds time elapsed +TOTAL : 1.222656 sec + 3,534,053,802 cycles # 2.883 GHz + 8,168,175,850 instructions # 2.31 insn per cycle + 1.226575004 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check.exe -p 1 256 2 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.704586e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.705182e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.705182e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.838981e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.839568e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.839568e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.429754 sec - 2,619,590,928 cycles # 1.828 GHz - 4,153,526,089 instructions # 1.59 insn per cycle - 1.433808648 seconds time elapsed +TOTAL : 1.379834 sec + 2,624,626,162 cycles # 1.898 GHz + 4,153,410,244 instructions # 1.58 insn per cycle + 1.383695622 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1492) (512y: 175) (512z:78776) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 0e5cd865f7..fa6a790ead 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_12:15:50 +DATE: 2023-10-29_23:03:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.897731e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.298032e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.609140e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.828674e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.329373e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.682112e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.443157 sec - 1,968,673,062 cycles # 2.985 GHz - 2,764,263,671 instructions # 1.40 insn per cycle - 0.716925102 seconds time elapsed +TOTAL : 0.442044 sec + 1,954,521,689 cycles # 2.982 GHz + 2,755,776,497 instructions # 1.41 insn per cycle + 0.712294869 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.626806e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.173467e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.522031e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.578863e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.153240e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.516574e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.519132 sec - 2,252,678,411 cycles # 2.998 GHz - 3,262,908,846 instructions # 1.45 insn per cycle - 0.808982043 seconds time elapsed +TOTAL : 0.520851 sec + 2,260,462,105 cycles # 3.006 GHz + 3,201,459,405 instructions # 1.42 insn per cycle + 0.811051172 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.098247e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.121223e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.121223e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.097810e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.120561e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.120561e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.513208 sec - 4,692,610,893 cycles # 3.095 GHz - 13,466,756,564 instructions # 2.87 insn per cycle - 1.517124584 seconds time elapsed +TOTAL : 1.514573 sec + 4,694,462,296 cycles # 3.093 GHz + 13,466,786,719 instructions # 2.87 insn per cycle + 1.518543550 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.969129e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.042422e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.042422e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.978958e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.053789e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.053789e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.853122 sec - 2,623,104,335 cycles # 3.061 GHz - 7,555,431,139 instructions # 2.88 insn per cycle - 0.857560586 seconds time elapsed +TOTAL : 0.849069 sec + 2,625,204,853 cycles # 3.079 GHz + 7,555,561,159 instructions # 2.88 insn per cycle + 0.853206224 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.367757e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.586104e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.586104e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.376965e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.601651e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.601651e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.507811 sec - 1,480,043,609 cycles # 2.894 GHz - 3,121,534,274 instructions # 2.11 insn per cycle - 0.511995434 seconds time elapsed +TOTAL : 0.506898 sec + 1,480,175,540 cycles # 2.900 GHz + 3,122,207,376 instructions # 2.11 insn per cycle + 0.511062678 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.732684e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.002593e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.002593e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.751482e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.019340e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.019340e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.460313 sec - 1,342,434,609 cycles # 2.895 GHz - 2,983,928,435 instructions # 2.22 insn per cycle - 0.464435892 seconds time elapsed +TOTAL : 0.458339 sec + 1,342,741,453 cycles # 2.907 GHz + 2,984,124,260 instructions # 2.22 insn per cycle + 0.462497124 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.310589e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.416870e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.416870e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.286674e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.393099e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.393099e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.733968 sec - 1,333,261,128 cycles # 1.822 GHz - 1,957,621,712 instructions # 1.47 insn per cycle - 0.738175276 seconds time elapsed +TOTAL : 0.741182 sec + 1,331,264,860 cycles # 1.801 GHz + 1,958,218,603 instructions # 1.47 insn per cycle + 0.745390166 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 9471c8d9c9..03b047ce45 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_12:38:14 +DATE: 2023-10-29_23:28:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.706674e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.257120e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.257120e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.704780e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.258212e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.258212e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.467849 sec - 2,076,528,325 cycles # 3.007 GHz - 3,074,178,340 instructions # 1.48 insn per cycle - 0.747915410 seconds time elapsed +TOTAL : 0.466821 sec + 2,068,559,520 cycles # 3.008 GHz + 3,048,703,179 instructions # 1.47 insn per cycle + 0.746274498 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -65,6 +68,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -72,14 +76,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.385006e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.385786e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.385786e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.350548e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.384141e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.384141e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.735465 sec - 2,965,430,587 cycles # 3.028 GHz - 4,581,128,506 instructions # 1.54 insn per cycle - 1.038667246 seconds time elapsed +TOTAL : 0.740457 sec + 2,965,577,785 cycles # 3.011 GHz + 4,535,211,847 instructions # 1.53 insn per cycle + 1.044362602 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -89,20 +93,21 @@ Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.095462e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.118569e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.118569e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.098304e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.121425e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.121425e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.523268 sec - 4,722,842,839 cycles # 3.094 GHz - 13,474,034,086 instructions # 2.85 insn per cycle - 1.527549837 seconds time elapsed +TOTAL : 1.518611 sec + 4,721,828,193 cycles # 3.102 GHz + 13,472,214,405 instructions # 2.85 insn per cycle + 1.522843368 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe @@ -116,20 +121,21 @@ Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.962183e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.036409e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.036409e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.971534e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.046066e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.046066e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.861797 sec - 2,667,534,331 cycles # 3.083 GHz - 7,603,405,381 instructions # 2.85 insn per cycle - 0.866054368 seconds time elapsed +TOTAL : 0.858759 sec + 2,658,811,149 cycles # 3.087 GHz + 7,605,857,267 instructions # 2.86 insn per cycle + 0.863212665 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe @@ -143,20 +149,21 @@ Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.140932e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.345844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.345844e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.374731e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.600629e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.600629e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.550990 sec - 1,514,470,383 cycles # 2.730 GHz - 3,172,744,661 instructions # 2.09 insn per cycle - 0.555284561 seconds time elapsed +TOTAL : 0.513772 sec + 1,512,102,304 cycles # 2.921 GHz + 3,172,818,782 instructions # 2.10 insn per cycle + 0.518159822 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe @@ -170,20 +177,21 @@ Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.699108e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.964542e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.964542e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.715581e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.982318e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.982318e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.470442 sec - 1,374,160,602 cycles # 2.900 GHz - 3,033,106,438 instructions # 2.21 insn per cycle - 0.474588436 seconds time elapsed +TOTAL : 0.468578 sec + 1,372,931,684 cycles # 2.908 GHz + 3,033,222,325 instructions # 2.21 insn per cycle + 0.472803521 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe @@ -197,20 +205,21 @@ Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.542150e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.667124e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.667124e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.550728e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.673818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.673818e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.674855 sec - 1,357,449,990 cycles # 2.000 GHz - 1,995,572,075 instructions # 1.47 insn per cycle - 0.679206405 seconds time elapsed +TOTAL : 0.672513 sec + 1,356,905,701 cycles # 2.007 GHz + 1,995,421,313 instructions # 1.47 insn per cycle + 0.676777093 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 8747c162f1..f4cb87ae3b 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_12:16:07 +DATE: 2023-10-29_23:03:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.847927e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.169090e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.488808e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.841504e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.240938e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.561566e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.441061 sec - 1,959,960,145 cycles # 3.006 GHz - 2,776,169,145 instructions # 1.42 insn per cycle - 0.710628939 seconds time elapsed +TOTAL : 0.442720 sec + 2,000,348,753 cycles # 3.007 GHz + 2,826,931,969 instructions # 1.41 insn per cycle + 0.722757887 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.599303e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.073860e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.414334e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.552475e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.039915e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.385680e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.517772 sec - 2,256,780,305 cycles # 3.011 GHz - 3,226,937,079 instructions # 1.43 insn per cycle - 0.806382487 seconds time elapsed +TOTAL : 0.521737 sec + 2,253,159,816 cycles # 2.989 GHz + 3,247,267,932 instructions # 1.44 insn per cycle + 0.812591785 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.093753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.116419e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.116419e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.079531e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.102188e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.102188e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.519650 sec - 4,717,083,625 cycles # 3.097 GHz - 13,460,945,405 instructions # 2.85 insn per cycle - 1.523612017 seconds time elapsed +TOTAL : 1.539492 sec + 4,711,862,622 cycles # 3.054 GHz + 13,460,909,244 instructions # 2.86 insn per cycle + 1.543571266 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 5.286896509487005e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.983108e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.057211e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.057211e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.964754e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.038833e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.038833e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.846574 sec - 2,623,843,957 cycles # 3.087 GHz - 7,554,608,982 instructions # 2.88 insn per cycle - 0.850719743 seconds time elapsed +TOTAL : 0.854964 sec + 2,629,162,097 cycles # 3.063 GHz + 7,554,735,733 instructions # 2.87 insn per cycle + 0.859083535 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3088) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 5.28689651338321e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.252830e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.467198e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.467198e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.023252e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.218241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.218241e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.525510 sec - 1,480,793,679 cycles # 2.798 GHz - 3,120,328,557 instructions # 2.11 insn per cycle - 0.529923190 seconds time elapsed +TOTAL : 0.565372 sec + 1,485,590,301 cycles # 2.610 GHz + 3,120,866,984 instructions # 2.10 insn per cycle + 0.569840427 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.775976e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.054371e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.054371e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.463048e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.709967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.709967e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.455057 sec - 1,340,948,697 cycles # 2.924 GHz - 2,981,199,299 instructions # 2.22 insn per cycle - 0.459129279 seconds time elapsed +TOTAL : 0.495664 sec + 1,346,960,365 cycles # 2.697 GHz + 2,981,663,042 instructions # 2.21 insn per cycle + 0.500027734 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 5.286901344678233e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.565163e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.688364e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.688364e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.579015e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.706333e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.706333e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.661815 sec - 1,324,894,233 cycles # 1.992 GHz - 1,953,995,087 instructions # 1.47 insn per cycle - 0.665958752 seconds time elapsed +TOTAL : 0.658262 sec + 1,325,238,025 cycles # 2.003 GHz + 1,954,041,030 instructions # 1.47 insn per cycle + 0.662328784 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 56c392a1e3..73b671c916 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_12:16:24 +DATE: 2023-10-29_23:03:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.856448e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.239366e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.361186e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.727976e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.233769e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.357459e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.433879 sec - 1,947,838,564 cycles # 3.019 GHz - 2,721,423,956 instructions # 1.40 insn per cycle - 0.702482263 seconds time elapsed +TOTAL : 0.435999 sec + 1,946,637,043 cycles # 3.006 GHz + 2,763,626,164 instructions # 1.42 insn per cycle + 0.705054657 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.083707e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.835194e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.957264e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.910471e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.824070e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.948085e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.468146 sec - 2,081,746,126 cycles # 3.012 GHz - 2,978,581,699 instructions # 1.43 insn per cycle - 0.748245258 seconds time elapsed +TOTAL : 0.472776 sec + 2,098,689,676 cycles # 3.011 GHz + 2,996,762,466 instructions # 1.43 insn per cycle + 0.756532757 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 0.00036713209996037764 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.159896e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.185810e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.185810e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.155452e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181319e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.181319e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.432261 sec - 4,452,339,242 cycles # 3.102 GHz - 13,052,140,755 instructions # 2.93 insn per cycle - 1.436130562 seconds time elapsed +TOTAL : 1.438216 sec + 4,456,836,483 cycles # 3.092 GHz + 13,052,733,016 instructions # 2.93 insn per cycle + 1.442173441 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.091620e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.287273e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.287273e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.076378e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.271943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.271943e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.549197 sec - 1,699,183,684 cycles # 3.076 GHz - 4,515,090,875 instructions # 2.66 insn per cycle - 0.553151524 seconds time elapsed +TOTAL : 0.552302 sec + 1,700,637,697 cycles # 3.061 GHz + 4,515,277,775 instructions # 2.66 insn per cycle + 0.556547518 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.108514e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.877352e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.877352e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.074886e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.842784e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.842784e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.288021 sec - 852,159,000 cycles # 2.923 GHz - 1,898,157,553 instructions # 2.23 insn per cycle - 0.292076560 seconds time elapsed +TOTAL : 0.289526 sec + 851,602,274 cycles # 2.906 GHz + 1,898,406,404 instructions # 2.23 insn per cycle + 0.293636457 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.087116e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.904152e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.904152e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.871182e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.650540e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.650540e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.289840 sec - 799,954,134 cycles # 2.726 GHz - 1,821,377,935 instructions # 2.28 insn per cycle - 0.293914031 seconds time elapsed +TOTAL : 0.300648 sec + 801,992,801 cycles # 2.634 GHz + 1,821,710,793 instructions # 2.27 insn per cycle + 0.304975702 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe @@ -181,19 +189,10 @@ Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.880710e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.371257e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.371257e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.357485 sec - 738,787,686 cycles # 2.048 GHz - 1,305,534,766 instructions # 1.77 insn per cycle - 0.361510002 seconds time elapsed +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions + 29,796,412 cycles # 2.683 GHz + 41,509,798 instructions # 1.39 insn per cycle + 0.011615045 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe @@ -201,10 +200,6 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 +Floating Point Exception (CPU) +Avg ME (C++/C++) = Avg ME (F77/C++) = 0.14247489383243206 -Relative difference = 4.32888033512879e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 414cfe1d51..b6472d3fd2 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,11 +37,12 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_12:38:31 +DATE: 2023-10-29_23:28:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -48,15 +50,16 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.627054e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.270003e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.270003e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.743623e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.310347e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.310347e+07 ) sec^-1 MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 -TOTAL : 0.449910 sec - 1,996,839,241 cycles # 3.004 GHz - 2,941,700,759 instructions # 1.47 insn per cycle - 0.722133966 seconds time elapsed +TOTAL : 0.448981 sec + 2,004,329,821 cycles # 2.995 GHz + 2,873,609,945 instructions # 1.43 insn per cycle + 0.726675606 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -65,6 +68,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -72,14 +76,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.307972e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.870443e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.870443e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.180958e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.827959e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.827959e+07 ) sec^-1 MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 -TOTAL : 0.613187 sec - 2,525,944,425 cycles # 3.007 GHz - 3,862,520,366 instructions # 1.53 insn per cycle - 0.898879565 seconds time elapsed +TOTAL : 0.612717 sec + 2,517,756,396 cycles # 2.992 GHz + 3,800,816,210 instructions # 1.51 insn per cycle + 0.899069258 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -89,20 +93,21 @@ Relative difference = 0.00036713209996037764 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.090941e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.116413e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.116413e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.150165e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.176057e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.176057e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.527051 sec - 4,478,023,823 cycles # 2.926 GHz - 13,056,814,715 instructions # 2.92 insn per cycle - 1.531327534 seconds time elapsed +TOTAL : 1.448346 sec + 4,473,116,485 cycles # 3.081 GHz + 13,057,070,780 instructions # 2.92 insn per cycle + 1.452474949 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe @@ -116,20 +121,21 @@ Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.003695e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.196349e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.196349e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.862398e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.049929e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.049929e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.569949 sec - 1,724,326,509 cycles # 3.007 GHz - 4,563,513,018 instructions # 2.65 insn per cycle - 0.574110319 seconds time elapsed +TOTAL : 0.598237 sec + 1,729,464,558 cycles # 2.874 GHz + 4,563,458,100 instructions # 2.64 insn per cycle + 0.602509718 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe @@ -143,20 +149,21 @@ Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.019587e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.777056e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.777056e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.014478e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.759179e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.759179e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.296032 sec - 869,572,713 cycles # 2.903 GHz - 1,935,510,451 instructions # 2.23 insn per cycle - 0.300055410 seconds time elapsed +TOTAL : 0.296266 sec + 870,816,020 cycles # 2.904 GHz + 1,935,237,213 instructions # 2.22 insn per cycle + 0.300456476 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe @@ -170,20 +177,21 @@ Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.482654e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.361988e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.361988e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.445000e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.322163e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.322163e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.276219 sec - 818,806,491 cycles # 2.927 GHz - 1,858,442,685 instructions # 2.27 insn per cycle - 0.280314394 seconds time elapsed +TOTAL : 0.279268 sec + 819,313,174 cycles # 2.910 GHz + 1,858,559,504 instructions # 2.27 insn per cycle + 0.283327146 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe @@ -197,20 +205,11 @@ Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.841697e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.318140e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.318140e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.363863 sec - 758,289,883 cycles # 2.064 GHz - 1,347,291,326 instructions # 1.78 insn per cycle - 0.368075455 seconds time elapsed + 36,884,052 cycles # 2.768 GHz + 50,707,665 instructions # 1.37 insn per cycle + 0.013695975 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe @@ -218,10 +217,6 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 +Floating Point Exception (CPU) +Avg ME (C++/C++) = Avg ME (F77/C++) = 0.14247489383243206 -Relative difference = 4.32888033512879e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 52c7867ee8..fe20c20dcc 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_12:16:40 +DATE: 2023-10-29_23:04:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.794391e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.222395e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.344532e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.682003e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.220722e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.350681e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.436791 sec - 1,938,256,203 cycles # 2.998 GHz - 2,764,073,446 instructions # 1.43 insn per cycle - 0.705965229 seconds time elapsed +TOTAL : 0.437751 sec + 1,900,811,899 cycles # 2.921 GHz + 2,678,626,262 instructions # 1.41 insn per cycle + 0.708103168 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.996745e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.794315e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910293e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.826661e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.778531e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.900966e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.470118 sec - 2,065,714,326 cycles # 2.989 GHz - 2,993,944,987 instructions # 1.45 insn per cycle - 0.749384322 seconds time elapsed +TOTAL : 0.477309 sec + 2,050,802,627 cycles # 2.895 GHz + 2,895,521,209 instructions # 1.41 insn per cycle + 0.766032667 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 0.00036713209996037764 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.155710e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.181692e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.181692e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.067807e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092193e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092193e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.438479 sec - 4,451,382,506 cycles # 3.089 GHz - 13,033,002,507 instructions # 2.93 insn per cycle - 1.442396317 seconds time elapsed +TOTAL : 1.556709 sec + 4,458,740,276 cycles # 2.859 GHz + 13,033,433,763 instructions # 2.92 insn per cycle + 1.560929816 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 1.7265064590569047e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.114475e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.315383e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.315383e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.971891e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.161237e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.161237e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.545728 sec - 1,690,815,772 cycles # 3.078 GHz - 4,510,987,679 instructions # 2.67 insn per cycle - 0.549857945 seconds time elapsed +TOTAL : 0.571534 sec + 1,693,291,421 cycles # 2.944 GHz + 4,511,037,766 instructions # 2.66 insn per cycle + 0.575691046 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 2.5853054135974944e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.071384e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.843278e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.843278e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.252716e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.876495e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.876495e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.289313 sec - 852,077,910 cycles # 2.911 GHz - 1,895,413,833 instructions # 2.22 insn per cycle - 0.293386594 seconds time elapsed +TOTAL : 0.333958 sec + 856,921,901 cycles # 2.537 GHz + 1,895,644,728 instructions # 2.21 insn per cycle + 0.338408955 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.491951e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.379361e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.379361e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.076897e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.901066e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.901066e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.271830 sec - 799,281,655 cycles # 2.905 GHz - 1,817,541,912 instructions # 2.27 insn per cycle - 0.275768147 seconds time elapsed +TOTAL : 0.290472 sec + 802,971,603 cycles # 2.729 GHz + 1,817,512,004 instructions # 2.26 insn per cycle + 0.294883099 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe @@ -181,19 +189,10 @@ Relative difference = 4.784894739577799e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.843043e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.315522e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.315522e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.359842 sec - 739,487,192 cycles # 2.037 GHz - 1,303,203,838 instructions # 1.76 insn per cycle - 0.363738116 seconds time elapsed +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions + 29,041,231 cycles # 2.621 GHz + 40,698,285 instructions # 1.40 insn per cycle + 0.011491609 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1932) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe @@ -201,10 +200,6 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcess ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 +Floating Point Exception (CPU) +Avg ME (C++/C++) = Avg ME (F77/C++) = 0.14247489383243206 -Relative difference = 4.32888033512879e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 055869d4c7..becb5dfcbd 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_12:16:56 +DATE: 2023-10-29_23:04:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.898492e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.300415e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.619193e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.831468e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.322857e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.656480e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.441230 sec - 1,951,511,543 cycles # 2.990 GHz - 2,730,665,273 instructions # 1.40 insn per cycle - 0.711120315 seconds time elapsed +TOTAL : 0.444027 sec + 1,947,288,172 cycles # 2.948 GHz + 2,771,783,460 instructions # 1.42 insn per cycle + 0.718120536 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.638754e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.191258e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.545080e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.574015e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.147143e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.504038e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.517201 sec - 2,223,252,021 cycles # 2.965 GHz - 3,157,515,011 instructions # 1.42 insn per cycle - 0.806918695 seconds time elapsed +TOTAL : 0.523217 sec + 2,233,613,263 cycles # 2.937 GHz + 3,181,755,320 instructions # 1.42 insn per cycle + 0.818164165 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.066720e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.089280e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.089280e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.062623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.085021e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.085021e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.558049 sec - 4,723,939,840 cycles # 3.026 GHz - 13,469,697,365 instructions # 2.85 insn per cycle - 1.562031550 seconds time elapsed +TOTAL : 1.564713 sec + 4,730,586,650 cycles # 3.017 GHz + 13,469,637,389 instructions # 2.85 insn per cycle + 1.568788491 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.995370e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.071613e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.071613e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.958286e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.032593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.032593e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.842124 sec - 2,597,009,030 cycles # 3.072 GHz - 7,388,574,977 instructions # 2.85 insn per cycle - 0.846124608 seconds time elapsed +TOTAL : 0.858196 sec + 2,600,600,273 cycles # 3.018 GHz + 7,388,563,593 instructions # 2.84 insn per cycle + 0.862457888 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.399585e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.625638e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.625638e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.359172e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.581590e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.581590e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.503848 sec - 1,468,232,729 cycles # 2.894 GHz - 3,057,878,707 instructions # 2.08 insn per cycle - 0.508008971 seconds time elapsed +TOTAL : 0.510154 sec + 1,469,200,333 cycles # 2.860 GHz + 3,058,240,905 instructions # 2.08 insn per cycle + 0.514384798 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.830445e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.114174e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.114174e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.858971e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.147479e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.147479e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.448877 sec - 1,307,617,793 cycles # 2.891 GHz - 2,932,981,796 instructions # 2.24 insn per cycle - 0.452905180 seconds time elapsed +TOTAL : 0.445930 sec + 1,306,643,742 cycles # 2.907 GHz + 2,932,728,091 instructions # 2.24 insn per cycle + 0.450107947 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.478580e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.593098e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.593098e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.479170e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.595374e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.595374e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.684441 sec - 1,364,490,195 cycles # 1.983 GHz - 1,971,660,665 instructions # 1.44 insn per cycle - 0.688521345 seconds time elapsed +TOTAL : 0.684274 sec + 1,364,891,420 cycles # 1.985 GHz + 1,971,900,750 instructions # 1.44 insn per cycle + 0.688421555 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 2e003d77ff..3487666f7b 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,3 +1,4 @@ +export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/test CXXNAMESUFFIX= @@ -36,38 +37,41 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_12:17:13 +DATE: 2023-10-29_23:04:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.880455e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.241562e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.559273e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.767907e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.061775e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.381370e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.442811 sec - 1,959,336,232 cycles # 2.999 GHz - 2,780,948,557 instructions # 1.42 insn per cycle - 0.712811728 seconds time elapsed +TOTAL : 0.451890 sec + 1,869,343,512 cycles # 2.805 GHz + 2,663,696,528 instructions # 1.42 insn per cycle + 0.724191615 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.598164e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.048675e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.388139e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.537991e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.991480e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.337355e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.515644 sec - 2,228,032,646 cycles # 2.974 GHz - 3,171,372,319 instructions # 1.42 insn per cycle - 0.806193652 seconds time elapsed +TOTAL : 0.536567 sec + 2,153,208,878 cycles # 2.809 GHz + 3,128,087,959 instructions # 1.45 insn per cycle + 0.830189616 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -77,19 +81,20 @@ Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.093076e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.116510e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.116510e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.082660e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.105229e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.105229e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.520318 sec - 4,726,169,759 cycles # 3.102 GHz - 13,455,687,409 instructions # 2.85 insn per cycle - 1.524513913 seconds time elapsed +TOTAL : 1.534849 sec + 4,727,993,898 cycles # 3.074 GHz + 13,455,770,647 instructions # 2.85 insn per cycle + 1.538976463 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe @@ -103,19 +108,20 @@ Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.007168e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.082924e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.082924e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.999489e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.074232e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.074232e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.836696 sec - 2,600,247,624 cycles # 3.095 GHz - 7,392,292,953 instructions # 2.84 insn per cycle - 0.840827824 seconds time elapsed +TOTAL : 0.840475 sec + 2,602,065,176 cycles # 3.083 GHz + 7,392,552,725 instructions # 2.84 insn per cycle + 0.844609048 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe @@ -129,19 +135,20 @@ Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.414544e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.640892e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.640892e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.396942e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.618271e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.618271e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.501049 sec - 1,466,869,763 cycles # 2.907 GHz - 3,057,603,398 instructions # 2.08 insn per cycle - 0.505187533 seconds time elapsed +TOTAL : 0.503588 sec + 1,468,648,729 cycles # 2.896 GHz + 3,058,145,449 instructions # 2.08 insn per cycle + 0.507802923 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe @@ -155,19 +162,20 @@ Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.876010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.167875e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.167875e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.819056e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.101366e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.101366e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.443787 sec - 1,306,217,967 cycles # 2.920 GHz - 2,933,470,615 instructions # 2.25 insn per cycle - 0.447942578 seconds time elapsed +TOTAL : 0.450598 sec + 1,309,476,408 cycles # 2.884 GHz + 2,933,279,716 instructions # 2.24 insn per cycle + 0.454801614 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe @@ -181,19 +189,20 @@ Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check.exe -p 64 256 10 OMP= +WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.482286e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.598708e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.598708e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.472084e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.586767e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.586767e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.683170 sec - 1,366,561,219 cycles # 1.990 GHz - 1,971,363,745 instructions # 1.44 insn per cycle - 0.687291870 seconds time elapsed +TOTAL : 0.686109 sec + 1,365,054,250 cycles # 1.980 GHz + 1,971,628,186 instructions # 1.44 insn per cycle + 0.690314680 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe From dd6fefe6a28564054503410948cf9abc2a1eece1 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 30 Oct 2023 08:24:59 +0100 Subject: [PATCH 079/119] [oct23av] rerun 18 tmad tests (while rerunning also tput with FPEs enabled), no change in functionality or performance STARTED AT Sun Oct 29 11:41:09 PM CET 2023 ENDED AT Mon Oct 30 12:00:07 AM CET 2023 Status=0 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt --- .../log_eemumu_mad_d_inl0_hrd0.txt | 138 +++++++++--------- .../log_eemumu_mad_f_inl0_hrd0.txt | 134 ++++++++--------- .../log_eemumu_mad_m_inl0_hrd0.txt | 132 ++++++++--------- .../log_ggtt_mad_d_inl0_hrd0.txt | 138 +++++++++--------- .../log_ggtt_mad_f_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggtt_mad_m_inl0_hrd0.txt | 130 ++++++++--------- .../log_ggttg_mad_d_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 132 ++++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 8 +- .../log_ggttggg_mad_f_inl0_hrd0.txt | 6 +- .../log_ggttggg_mad_m_inl0_hrd0.txt | 10 +- .../log_gqttq_mad_d_inl0_hrd0.txt | 138 +++++++++--------- .../log_gqttq_mad_f_inl0_hrd0.txt | 134 ++++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 134 ++++++++--------- 18 files changed, 1025 insertions(+), 1025 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 020b0a566d..4be6aa1159 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,12 +1,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z @@ -16,12 +16,12 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_12:52:01 +DATE: 2023-10-29_23:42:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6502s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6416s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6291s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6211s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1798s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1715s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1777s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1697s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4164s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3314s - [COUNTERS] Fortran MEs ( 1 ) : 0.0851s for 90112 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4192s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3335s + [COUNTERS] Fortran MEs ( 1 ) : 0.0857s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1807s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1880s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1814s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4130s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3422s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0708s for 90112 events => throughput is 1.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4182s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3478s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0704s for 90112 events => throughput is 1.28E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.222593e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.226336e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.262620e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.193204e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1851s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1806s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.11E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3820s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3382s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0438s for 90112 events => throughput is 2.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0444s for 90112 events => throughput is 2.03E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.974908e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984622e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.040258e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.040449e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1803s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1774s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.80E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1834s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1803s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3741s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3406s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0335s for 90112 events => throughput is 2.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3407s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 90112 events => throughput is 2.73E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.583060e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.610169e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.776569e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.827839e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1833s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.93E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1825s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1796s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.81E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3697s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3381s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0316s for 90112 events => throughput is 2.85E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3789s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3463s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0327s for 90112 events => throughput is 2.76E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.778354e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.719646e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.956858e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.850027e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1823s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1789s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1822s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1786s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.28E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3420s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0391s for 90112 events => throughput is 2.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3856s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3468s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0388s for 90112 events => throughput is 2.33E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.130515e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.213461e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.239701e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.237597e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.6118s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6113s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.58E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5989s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5985s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.69E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7762s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.86E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7645s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7597s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.87E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.405288e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.294220e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.726558e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.981280e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.047848e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.997915e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.444946e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.335110e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.026204e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.023994e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.031508e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.958940e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.037515e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.020188e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.129735e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.116154e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 128d62e050..aa9e7c4e40 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,9 +1,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 @@ -15,10 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -27,13 +28,12 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2023-10-29_12:52:18 +DATE: 2023-10-29_23:43:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6267s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6323s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6244s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1785s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1705s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1788s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1704s + [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4135s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3286s - [COUNTERS] Fortran MEs ( 1 ) : 0.0848s for 90112 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4163s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3312s + [COUNTERS] Fortran MEs ( 1 ) : 0.0852s for 90112 events => throughput is 1.06E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166087172673] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1919s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1852s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1874s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1813s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501907796603360E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4095s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0683s for 90112 events => throughput is 1.32E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4109s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3423s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0686s for 90112 events => throughput is 1.31E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.267380e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.253383e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.237299e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.294219e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165570339780] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1809s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1815s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.11E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905322826635E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3755s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0284s for 90112 events => throughput is 3.17E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3713s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3439s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0273s for 90112 events => throughput is 3.30E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.220565e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.130086e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.438590e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.376194e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1847s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1825s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.72E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1832s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.63E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3676s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3431s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 90112 events => throughput is 3.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3667s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 90112 events => throughput is 3.63E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.661910e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.471852e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.749870e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.761957e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1824s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1802s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.68E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1848s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1826s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.66E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3660s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0243s for 90112 events => throughput is 3.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3697s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3450s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 90112 events => throughput is 3.64E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.497495e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.629967e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.700973e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617716e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166440400542] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1850s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.70E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501908978565555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3764s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3511s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0252s for 90112 events => throughput is 3.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3711s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3462s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0249s for 90112 events => throughput is 3.62E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.571172e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.468172e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.542081e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.642139e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166823487174] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5918s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5914s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.74E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5944s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5939s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.66E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501910542849674E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7548s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7502s + [COUNTERS] PROGRAM TOTAL : 0.7608s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7562s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.97E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.729601e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.669429e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.851176e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.803655e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.581866e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.522788e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.061926e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.045986e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.564496e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.458177e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.226059e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.245797e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.843740e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.767122e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.462335e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.440537e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 9ceddba9f6..9ef9326058 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -3,9 +3,9 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_12:52:35 +DATE: 2023-10-29_23:43:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6425s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6347s - [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6434s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6356s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1774s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1696s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1784s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1703s + [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4143s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3287s - [COUNTERS] Fortran MEs ( 1 ) : 0.0856s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4177s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3316s + [COUNTERS] Fortran MEs ( 1 ) : 0.0861s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211734] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1906s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1836s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4157s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3429s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0729s for 90112 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4162s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3439s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0722s for 90112 events => throughput is 1.25E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.193150e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.172991e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.208603e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.216868e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1824s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 8192 events => throughput is 2.19E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1796s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0038s for 8192 events => throughput is 2.17E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3792s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3374s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0419s for 90112 events => throughput is 2.15E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3829s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3408s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0421s for 90112 events => throughput is 2.14E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.005845e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.075634e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.200623e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.184462e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1816s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1826s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1795s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.60E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3820s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3472s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0348s for 90112 events => throughput is 2.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3738s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3405s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0333s for 90112 events => throughput is 2.71E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.635841e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.669400e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.796266e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.660847e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1816s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1788s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.92E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1809s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1780s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.78E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3728s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0316s for 90112 events => throughput is 2.85E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3766s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3452s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0314s for 90112 events => throughput is 2.87E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.771010e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.736292e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.944150e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.813805e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1840s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1808s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1843s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1809s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.39E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3786s + [COUNTERS] PROGRAM TOTAL : 0.3782s [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0374s for 90112 events => throughput is 2.41E+06 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0370s for 90112 events => throughput is 2.44E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.183955e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.304060e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.303699e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.408915e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169066587257] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5942s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5937s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.62E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5924s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.67E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919911173610E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7632s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7583s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.86E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7588s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7540s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.87E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.344629e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.334962e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.860043e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.968237e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.047726e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.011382e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.150185e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.365199e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.038230e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.010775e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.951805e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.944396e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.025153e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.029059e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.125382e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.138841e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 35a368fe62..10c5a40d89 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_12:52:52 +DATE: 2023-10-29_23:43:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3516s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3110s - [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3509s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3100s + [COUNTERS] Fortran MEs ( 1 ) : 0.0409s for 8192 events => throughput is 2.00E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3066s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2664s - [COUNTERS] Fortran MEs ( 1 ) : 0.0402s for 8192 events => throughput is 2.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3084s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2678s + [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6432s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2007s - [COUNTERS] Fortran MEs ( 1 ) : 0.4425s for 90112 events => throughput is 2.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6676s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2160s + [COUNTERS] Fortran MEs ( 1 ) : 0.4516s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3442s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3072s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0370s for 8192 events => throughput is 2.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3434s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0369s for 8192 events => throughput is 2.22E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775372] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6622s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4050s for 90112 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6734s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2619s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4115s for 90112 events => throughput is 2.19E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.193809e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.212985e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.165136e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.172710e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3130s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2919s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3136s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.82E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4934s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2549s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2385s for 90112 events => throughput is 3.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4819s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2471s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2348s for 90112 events => throughput is 3.84E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.758372e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.637745e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.715730e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.664607e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2942s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2814s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0129s for 8192 events => throughput is 6.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2996s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2866s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.31E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2530s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1447s for 90112 events => throughput is 6.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3865s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2420s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1445s for 90112 events => throughput is 6.24E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.727935e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.044297e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.905680e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.969961e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3000s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2885s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0114s for 8192 events => throughput is 7.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2940s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2824s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.08E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3615s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2343s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1272s for 90112 events => throughput is 7.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3694s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2402s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1293s for 90112 events => throughput is 6.97E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.782091e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.688301e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.814113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.666529e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3124s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2924s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 8192 events => throughput is 4.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3223s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3011s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.85E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4534s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2443s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2091s for 90112 events => throughput is 4.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5097s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2882s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2215s for 90112 events => throughput is 4.07E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.020863e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.040762e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.055554e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.038513e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6929s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6923s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.40E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6947s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6941s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6522s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6459s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6596s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6533s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.44E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.175587e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.154027e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.675882e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.702234e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.181638e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.205697e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.081491e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.069293e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.177436e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.190602e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159222e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.154485e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.205307e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.200679e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.016071e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.004061e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 8b3a04f1ca..bccfa25524 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -4,9 +4,9 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_12:53:17 +DATE: 2023-10-29_23:44:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3483s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3084s - [COUNTERS] Fortran MEs ( 1 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3495s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3091s + [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3063s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2659s - [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3062s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2656s + [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7229s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2550s - [COUNTERS] Fortran MEs ( 1 ) : 0.4679s for 90112 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6510s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2078s + [COUNTERS] Fortran MEs ( 1 ) : 0.4433s for 90112 events => throughput is 2.03E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690706767555099] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3388s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3045s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0343s for 8192 events => throughput is 2.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3555s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3206s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0349s for 8192 events => throughput is 2.35E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782605295497] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6352s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2550s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3802s for 90112 events => throughput is 2.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6345s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2534s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3811s for 90112 events => throughput is 2.36E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.379417e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.371355e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.325925e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.396792e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690702885183541] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2991s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2847s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3003s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2858s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.67E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223778858016772] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3930s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2310s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1619s for 90112 events => throughput is 5.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3998s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2390s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1608s for 90112 events => throughput is 5.60E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.368260e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.252169e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.339718e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.352354e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2794s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2852s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2777s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3106s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2261s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0845s for 90112 events => throughput is 1.07E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3213s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2364s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0849s for 90112 events => throughput is 1.06E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.013919e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.012335e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.049085e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.003051e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2832s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2760s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2848s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2776s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.13E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3217s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2419s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0798s for 90112 events => throughput is 1.13E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3140s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2344s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0796s for 90112 events => throughput is 1.13E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.088263e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.094330e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111517e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.091106e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698914467276] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2912s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 8192 events => throughput is 8.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2904s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2804s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 8192 events => throughput is 8.21E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223780273983500] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3418s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2327s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1091s for 90112 events => throughput is 8.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3456s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2362s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1094s for 90112 events => throughput is 8.24E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.726753e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.541227e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.654832e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.649452e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690703397697980] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6926s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6921s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.53E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6946s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6940s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.49E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223786763175951] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6479s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6425s + [COUNTERS] PROGRAM TOTAL : 1.6540s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6486s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.67E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.284196e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.197490e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.946223e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.947842e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.169312e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.136310e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.754775e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.752811e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.163785e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.162233e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.871433e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.852365e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.701749e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.689881e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.427894e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.432722e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index cf7aebd679..277c27f299 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -18,10 +18,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_12:53:42 +DATE: 2023-10-29_23:44:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3507s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3105s - [COUNTERS] Fortran MEs ( 1 ) : 0.0402s for 8192 events => throughput is 2.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3519s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3114s + [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3057s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2652s - [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2674s + [COUNTERS] Fortran MEs ( 1 ) : 0.0403s for 8192 events => throughput is 2.04E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6569s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2105s - [COUNTERS] Fortran MEs ( 1 ) : 0.4464s for 90112 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6509s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2074s + [COUNTERS] Fortran MEs ( 1 ) : 0.4435s for 90112 events => throughput is 2.03E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3451s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3078s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0373s for 8192 events => throughput is 2.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3457s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0377s for 8192 events => throughput is 2.17E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6672s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2591s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4081s for 90112 events => throughput is 2.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6643s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2559s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4084s for 90112 events => throughput is 2.21E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.207949e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.163062e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.193947e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.217486e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3124s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2918s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3126s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2919s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 8192 events => throughput is 3.96E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4769s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2475s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2295s for 90112 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4786s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2489s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2296s for 90112 events => throughput is 3.92E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.715614e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.804143e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.728132e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.857305e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3179s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3037s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2962s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2833s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0129s for 8192 events => throughput is 6.34E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4180s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2697s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1483s for 90112 events => throughput is 6.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3819s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2394s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1425s for 90112 events => throughput is 6.33E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.078585e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.058038e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.196670e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.109676e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2929s + [COUNTERS] PROGRAM TOTAL : 0.2926s [COUNTERS] Fortran Overhead ( 0 ) : 0.2814s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0115s for 8192 events => throughput is 7.12E+05 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.28E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3613s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2356s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1258s for 90112 events => throughput is 7.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3749s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2482s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1267s for 90112 events => throughput is 7.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.899838e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.024816e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.905439e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.053810e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3073s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2886s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 8192 events => throughput is 4.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3154s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2972s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0182s for 8192 events => throughput is 4.50E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4525s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2498s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2026s for 90112 events => throughput is 4.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4615s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2546s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2069s for 90112 events => throughput is 4.36E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.219505e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.205277e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.133533e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.230271e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.7103s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7097s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6938s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6933s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782303744791] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6622s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6558s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6553s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6490s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.44E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.164053e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.003541e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.591008e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.600448e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.186252e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.175653e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.056436e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.060114e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.190060e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.194865e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.131852e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.143139e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.183442e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.181293e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.000787e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.968173e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index b1bb43404f..9730f11338 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_12:54:07 +DATE: 2023-10-29_23:44:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5360s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2216s - [COUNTERS] Fortran MEs ( 1 ) : 0.3144s for 8192 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5433s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2287s + [COUNTERS] Fortran MEs ( 1 ) : 0.3146s for 8192 events => throughput is 2.60E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5413s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2270s - [COUNTERS] Fortran MEs ( 1 ) : 0.3143s for 8192 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5326s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2185s + [COUNTERS] Fortran MEs ( 1 ) : 0.3141s for 8192 events => throughput is 2.61E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8556s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3842s - [COUNTERS] Fortran MEs ( 1 ) : 3.4714s for 90112 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9522s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4138s + [COUNTERS] Fortran MEs ( 1 ) : 3.5384s for 90112 events => throughput is 2.55E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470791E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8564s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5335s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3229s for 8192 events => throughput is 2.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8782s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5546s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3236s for 8192 events => throughput is 2.53E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2388s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6779s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5609s for 90112 events => throughput is 2.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3815s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7220s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6595s for 90112 events => throughput is 2.46E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.602791e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.594077e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.594960e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.572076e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470777E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5533s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3858s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1675s for 8192 events => throughput is 4.89E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5574s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3872s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1702s for 8192 events => throughput is 4.81E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.3897s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5304s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8593s for 90112 events => throughput is 4.85E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4101s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5437s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8665s for 90112 events => throughput is 4.83E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.966788e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.921909e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.966194e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.905097e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3860s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3017s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0843s for 8192 events => throughput is 9.72E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3022s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0837s for 8192 events => throughput is 9.79E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3746s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4553s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9193s for 90112 events => throughput is 9.80E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3897s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4614s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9283s for 90112 events => throughput is 9.71E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.932213e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.915492e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.535049e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.906893e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3682s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2931s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3667s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0746s for 8192 events => throughput is 1.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2682s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4425s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8256s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2583s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4372s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8210s for 90112 events => throughput is 1.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.119921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.120723e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.101513e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4273s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3234s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1039s for 8192 events => throughput is 7.89E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4265s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3236s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1029s for 8192 events => throughput is 7.96E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.7039s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5049s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1990s for 90112 events => throughput is 7.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6156s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4731s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1425s for 90112 events => throughput is 7.89E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.919203e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.888561e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.829536e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.896267e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6537s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6482s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6539s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6485s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8360s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8133s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.96E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8182s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7954s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0229s for 90112 events => throughput is 3.94E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.632436e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.619063e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.870914e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.241010e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.879790e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.874326e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.237987e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236994e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.865865e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.898969e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.250210e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.246371e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.878270e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.839298e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.743237e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.748686e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 2a60fb5534..0d3786c3f4 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_12:54:48 +DATE: 2023-10-29_23:45:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5672s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2310s - [COUNTERS] Fortran MEs ( 1 ) : 0.3361s for 8192 events => throughput is 2.44E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5334s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2186s + [COUNTERS] Fortran MEs ( 1 ) : 0.3148s for 8192 events => throughput is 2.60E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5677s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2310s - [COUNTERS] Fortran MEs ( 1 ) : 0.3367s for 8192 events => throughput is 2.43E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5331s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2181s + [COUNTERS] Fortran MEs ( 1 ) : 0.3150s for 8192 events => throughput is 2.60E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.9155s + [COUNTERS] PROGRAM TOTAL : 4.9081s [COUNTERS] Fortran Overhead ( 0 ) : 1.4034s - [COUNTERS] Fortran MEs ( 1 ) : 3.5121s for 90112 events => throughput is 2.57E+04 events/s + [COUNTERS] Fortran MEs ( 1 ) : 3.5047s for 90112 events => throughput is 2.57E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349765248158E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8523s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5274s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3248s for 8192 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8377s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5247s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3130s for 8192 events => throughput is 2.62E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310860767768514E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.1874s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6887s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4987s for 90112 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1217s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6768s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4449s for 90112 events => throughput is 2.62E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.490289e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.703655e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.615209e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.695629e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196334183509370E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4077s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3125s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0953s for 8192 events => throughput is 8.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4022s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0932s for 8192 events => throughput is 8.79E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847547651041E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.5054s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4660s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0394s for 90112 events => throughput is 8.67E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4816s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4534s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0282s for 90112 events => throughput is 8.76E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.827172e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.865454e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.766055e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.842894e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3093s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2656s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3053s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2620s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8870s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4129s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4741s for 90112 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8828s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4092s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4736s for 90112 events => throughput is 1.90E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.933741e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.907252e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.893629e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910135e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2962s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2568s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0393s for 8192 events => throughput is 2.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2957s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2572s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0386s for 8192 events => throughput is 2.12E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8774s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4386s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4388s for 90112 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8300s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4034s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4266s for 90112 events => throughput is 2.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.027794e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.131125e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.133357e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143236e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196344079460428E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3211s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2706s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0504s for 8192 events => throughput is 1.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3190s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2692s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0498s for 8192 events => throughput is 1.64E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310857804286998E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9823s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4267s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5557s for 90112 events => throughput is 1.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0404s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4584s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5820s for 90112 events => throughput is 1.55E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.584134e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.578722e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.598776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.494576e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349366365994E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6480s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6472s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.68E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6758s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6750s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.72E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310864949473968E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8069s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7974s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8049s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7954s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.45E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.335468e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.336673e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.856620e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.859827e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.750769e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.723714e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.282448e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.344003e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.752164e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.714107e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.539062e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.460288e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.591877e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.582435e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.619902e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621813e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 949ed85b68..fdfb4bc804 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_12:55:25 +DATE: 2023-10-29_23:46:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5366s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2209s - [COUNTERS] Fortran MEs ( 1 ) : 0.3157s for 8192 events => throughput is 2.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5514s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2299s + [COUNTERS] Fortran MEs ( 1 ) : 0.3216s for 8192 events => throughput is 2.55E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5345s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2194s - [COUNTERS] Fortran MEs ( 1 ) : 0.3151s for 8192 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5357s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2189s + [COUNTERS] Fortran MEs ( 1 ) : 0.3168s for 8192 events => throughput is 2.59E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8683s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3866s - [COUNTERS] Fortran MEs ( 1 ) : 3.4817s for 90112 events => throughput is 2.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8536s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3859s + [COUNTERS] Fortran MEs ( 1 ) : 3.4677s for 90112 events => throughput is 2.60E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358763382007E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8697s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5430s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3267s for 8192 events => throughput is 2.51E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8663s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5405s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3258s for 8192 events => throughput is 2.51E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872835011053E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.3049s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6900s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6148s for 90112 events => throughput is 2.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3038s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6919s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6119s for 90112 events => throughput is 2.49E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.572523e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.559078e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.540550e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.559032e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358804670396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5528s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3888s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1639s for 8192 events => throughput is 5.00E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5492s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3823s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1668s for 8192 events => throughput is 4.91E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872836789727E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.3320s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5232s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8088s for 90112 events => throughput is 4.98E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.3533s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5274s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8259s for 90112 events => throughput is 4.94E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.062468e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.012905e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.052914e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.083717e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3881s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3045s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0836s for 8192 events => throughput is 9.80E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3015s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0840s for 8192 events => throughput is 9.75E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3724s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4554s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9170s for 90112 events => throughput is 9.83E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3727s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4507s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9220s for 90112 events => throughput is 9.77E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.001820e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.988170e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.771997e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.998635e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3874s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3089s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0785s for 8192 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3690s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2954s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0736s for 8192 events => throughput is 1.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2882s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4675s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8207s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2518s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4420s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8098s for 90112 events => throughput is 1.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.118705e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.136004e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.118791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.143932e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358757578441E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3538s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1199s for 8192 events => throughput is 6.83E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4322s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3251s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1070s for 8192 events => throughput is 7.65E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872803699391E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6718s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4976s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1742s for 90112 events => throughput is 7.67E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6667s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4847s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1821s for 90112 events => throughput is 7.62E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.724100e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.705632e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.724361e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.555384e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358102981245E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6480s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6425s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6572s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6518s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872068634174E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8132s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7904s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8152s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7925s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.97E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.620312e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.631448e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.903689e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.958483e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.827852e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.843306e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.234645e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.233362e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.822352e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.852055e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.243970e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.245148e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.862310e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.845495e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.715609e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.722805e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 79de5a8a41..e657d4ab45 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_12:56:06 +DATE: 2023-10-29_23:46:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3648s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2743s - [COUNTERS] Fortran MEs ( 1 ) : 4.0905s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3573s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2786s + [COUNTERS] Fortran MEs ( 1 ) : 4.0787s for 8192 events => throughput is 2.01E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3631s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2705s - [COUNTERS] Fortran MEs ( 1 ) : 4.0926s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2765s + [COUNTERS] Fortran MEs ( 1 ) : 4.1908s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.1667s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8795s - [COUNTERS] Fortran MEs ( 1 ) : 45.2872s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.0635s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8834s + [COUNTERS] Fortran MEs ( 1 ) : 45.1800s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.6271s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3999s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2272s for 8192 events => throughput is 1.94E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6057s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3898s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2159s for 8192 events => throughput is 1.94E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 52.7641s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0183s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.7458s for 90112 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.7110s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0688s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.6422s for 90112 events => throughput is 1.93E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.987957e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.993883e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.991838e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.996553e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.7379s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4755s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2623s for 8192 events => throughput is 3.62E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7879s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4764s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3115s for 8192 events => throughput is 3.54E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 28.9819s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0727s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.9092s for 90112 events => throughput is 3.62E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.9921s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1750s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.8171s for 90112 events => throughput is 3.63E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.783292e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.776261e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.725405e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.776054e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.3014s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3232s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9782s for 8192 events => throughput is 8.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.2047s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2221s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9826s for 8192 events => throughput is 8.34E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.6470s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8357s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.8113s for 90112 events => throughput is 8.33E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.6452s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8479s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.7973s for 90112 events => throughput is 8.35E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.598183e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.631774e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.637409e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.588857e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9665s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1096s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8569s for 8192 events => throughput is 9.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9597s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8524s for 8192 events => throughput is 9.61E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.1418s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7198s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4220s for 90112 events => throughput is 9.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.3300s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7280s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.6020s for 90112 events => throughput is 9.38E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.845982e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.801101e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.851814e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.852804e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4078s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3366s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0711s for 8192 events => throughput is 7.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4084s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3386s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0698s for 8192 events => throughput is 7.66E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.7364s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9384s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.7980s for 90112 events => throughput is 7.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.7032s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9539s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.7493s for 90112 events => throughput is 7.67E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.624404e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.736846e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.706462e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.663907e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8040s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7719s + [COUNTERS] PROGRAM TOTAL : 0.8047s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7726s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7244s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3743s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3501s for 90112 events => throughput is 2.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7121s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3652s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3469s for 90112 events => throughput is 2.60E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.297814e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.286508e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.527310e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.523126e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.102163e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.117684e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.155936e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.164738e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.100021e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.122122e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.139640e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.167581e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.109701e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.109077e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.439044e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.431461e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 5857620d56..99b68d9b86 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_13:00:19 +DATE: 2023-10-29_23:51:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3795s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2746s - [COUNTERS] Fortran MEs ( 1 ) : 4.1049s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4892s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2748s + [COUNTERS] Fortran MEs ( 1 ) : 4.2144s for 8192 events => throughput is 1.94E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3663s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2696s - [COUNTERS] Fortran MEs ( 1 ) : 4.0968s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3576s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2692s + [COUNTERS] Fortran MEs ( 1 ) : 4.0884s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.2548s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8877s - [COUNTERS] Fortran MEs ( 1 ) : 45.3671s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.0340s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8721s + [COUNTERS] Fortran MEs ( 1 ) : 45.1619s for 90112 events => throughput is 2.00E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396490802749E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.3133s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2211s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.0923s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.4522s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2593s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1929s for 8192 events => throughput is 1.95E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774602344628E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 51.0625s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8484s - [COUNTERS] CudaCpp MEs ( 2 ) : 45.2141s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 51.1297s + [COUNTERS] Fortran Overhead ( 0 ) : 5.8662s + [COUNTERS] CudaCpp MEs ( 2 ) : 45.2635s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.077086e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.064759e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.073397e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.025024e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277389126121586E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4836s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3656s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1180s for 8192 events => throughput is 7.33E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4799s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3651s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1147s for 8192 events => throughput is 7.35E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803771887543366E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 15.3066s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9801s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.3265s for 90112 events => throughput is 7.31E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.2606s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9688s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.2918s for 90112 events => throughput is 7.33E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.486766e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.498295e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.500170e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.494570e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.2621s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7667s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4954s for 8192 events => throughput is 1.65E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2578s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7591s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4987s for 8192 events => throughput is 1.64E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.7860s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3557s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4304s for 90112 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.7693s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3504s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4189s for 90112 events => throughput is 1.66E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.672778e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.704918e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.671538e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.708973e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1860s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7111s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4749s for 8192 events => throughput is 1.73E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1390s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6947s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4444s for 8192 events => throughput is 1.84E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.1015s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3009s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.8006s for 90112 events => throughput is 1.88E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.0587s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2991s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.7596s for 90112 events => throughput is 1.89E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.953167e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.954262e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.950659e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953218e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396394633404E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.3221s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7946s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5275s for 8192 events => throughput is 1.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3188s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7939s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5250s for 8192 events => throughput is 1.56E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803777741065333E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 8.2283s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4081s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.8202s for 90112 events => throughput is 1.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.1837s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3911s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.7926s for 90112 events => throughput is 1.56E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.569783e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.565914e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.543872e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.577747e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277400478491260E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7651s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7439s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7649s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7436s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.84E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803779990154892E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.5781s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3444s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2337s for 90112 events => throughput is 3.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6087s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2351s for 90112 events => throughput is 3.83E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.591648e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.608601e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.955455e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.948328e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.490718e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.498781e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.636724e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.725121e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.500184e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.489018e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.635071e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.670344e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.481845e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.486657e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.521601e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.522733e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 7d59783520..2f24ab2c69 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -16,11 +16,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_13:03:37 +DATE: 2023-10-29_23:54:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3900s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2790s - [COUNTERS] Fortran MEs ( 1 ) : 4.1110s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3628s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2741s + [COUNTERS] Fortran MEs ( 1 ) : 4.0887s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3767s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2695s - [COUNTERS] Fortran MEs ( 1 ) : 4.1072s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3578s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2698s + [COUNTERS] Fortran MEs ( 1 ) : 4.0879s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.1449s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8850s - [COUNTERS] Fortran MEs ( 1 ) : 45.2599s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.1743s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8791s + [COUNTERS] Fortran MEs ( 1 ) : 45.2953s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277432965013E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.9921s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4402s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.5519s for 8192 events => throughput is 1.80E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6820s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4279s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2541s for 8192 events => throughput is 1.93E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725813026109E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 53.4502s - [COUNTERS] Fortran Overhead ( 0 ) : 6.2745s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.1757s for 90112 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 53.1743s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0425s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.1318s for 90112 events => throughput is 1.91E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.948358e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.972012e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.903929e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.980651e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277430934464E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.6624s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4423s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2201s for 8192 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6421s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4340s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2081s for 8192 events => throughput is 3.71E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725816246317E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 28.7188s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0748s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.6439s for 90112 events => throughput is 3.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.5147s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0474s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.4673s for 90112 events => throughput is 3.68E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.795677e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.763063e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.792989e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.790148e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.1727s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2124s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9604s for 8192 events => throughput is 8.53E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1780s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2112s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9667s for 8192 events => throughput is 8.47E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.5075s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8197s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.6878s for 90112 events => throughput is 8.43E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.5745s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8645s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.7100s for 90112 events => throughput is 8.41E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.555876e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.734001e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.758315e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.720628e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9665s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1095s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8570s for 8192 events => throughput is 9.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9481s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8483s for 8192 events => throughput is 9.66E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.0957s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7176s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.3781s for 90112 events => throughput is 9.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.0943s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7073s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.3870s for 90112 events => throughput is 9.60E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.926904e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.695446e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.926451e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.714293e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4163s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3391s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0773s for 8192 events => throughput is 7.60E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4113s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3354s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0759s for 8192 events => throughput is 7.61E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.9339s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9969s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9369s for 90112 events => throughput is 7.55E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.8070s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9573s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8497s for 90112 events => throughput is 7.60E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.702011e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.670668e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.700967e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.569813e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277293084707E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8061s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7739s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7761s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 8192 events => throughput is 2.58E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725738731039E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7160s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3469s for 90112 events => throughput is 2.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7597s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3971s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3626s for 90112 events => throughput is 2.48E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.292904e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.299489e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.534454e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.543169e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.105706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.113219e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.154616e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169609e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.118142e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.114662e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168234e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168580e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.106654e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.106978e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.434959e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.432506e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index fcea12d341..8c0eb8987a 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_13:09:15 +DATE: 2023-10-29_23:59:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 875efe8ee1..d1b8675317 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_13:09:18 +DATE: 2023-10-30_00:00:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 4682066081..054c4600d8 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,11 +15,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -28,12 +27,13 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2023-10-29_13:09:21 +DATE: 2023-10-30_00:00:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 0737ae3641..5741f0dc15 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_13:07:50 +DATE: 2023-10-29_23:58:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3009s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2314s - [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3048s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2352s + [COUNTERS] Fortran MEs ( 1 ) : 0.0697s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2966s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2273s - [COUNTERS] Fortran MEs ( 1 ) : 0.0693s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2979s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2279s + [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1643s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4082s - [COUNTERS] Fortran MEs ( 1 ) : 0.7562s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1663s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4084s + [COUNTERS] Fortran MEs ( 1 ) : 0.7578s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3843s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3093s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3809s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3052s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0757s for 8192 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3262s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5019s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8242s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3528s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5177s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8351s for 90112 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.090735e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.098466e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108359e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.074362e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3141s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2739s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0403s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2724s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615872] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9226s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4751s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4475s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9207s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4750s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4457s for 90112 events => throughput is 2.02E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.007209e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.973377e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.014295e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.025745e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2874s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2625s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0249s for 8192 events => throughput is 3.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2789s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2557s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0232s for 8192 events => throughput is 3.54E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7154s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4582s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2572s for 90112 events => throughput is 3.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7317s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4710s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2607s for 90112 events => throughput is 3.46E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.470014e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508702e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.550614e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.351363e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2943s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2715s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2758s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2551s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 8192 events => throughput is 3.96E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8061s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5544s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2518s for 90112 events => throughput is 3.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6787s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4494s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2292s for 90112 events => throughput is 3.93E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.837885e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.848142e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.941569e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.827447e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2648s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0305s for 8192 events => throughput is 2.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2976s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2659s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8085s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4687s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3398s for 90112 events => throughput is 2.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8054s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4648s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3406s for 90112 events => throughput is 2.65E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.555875e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.498249e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.512531e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.567253e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6599s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6592s + [COUNTERS] PROGRAM TOTAL : 0.6574s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6567s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615869] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8566s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8491s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 90112 events => throughput is 1.20E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8627s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8552s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0074s for 90112 events => throughput is 1.21E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.626429e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.634464e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.164908e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.995571e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.514643e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.534001e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.532707e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.508239e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.528292e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.523244e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.808574e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.792185e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.524629e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.530343e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.780489e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.779192e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index d29ea05be4..23ccd83cc9 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,12 +15,12 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_13:08:18 +DATE: 2023-10-29_23:59:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2366s - [COUNTERS] Fortran MEs ( 1 ) : 0.0699s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3006s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2311s + [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2993s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2295s - [COUNTERS] Fortran MEs ( 1 ) : 0.0698s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3032s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2341s + [COUNTERS] Fortran MEs ( 1 ) : 0.0691s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1612s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4019s - [COUNTERS] Fortran MEs ( 1 ) : 0.7593s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1794s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4177s + [COUNTERS] Fortran MEs ( 1 ) : 0.7617s for 90112 events => throughput is 1.18E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050316058770007] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3756s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3032s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0724s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3035s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0702s for 8192 events => throughput is 1.17E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182797520666] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2740s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4981s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7758s for 90112 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2755s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4978s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7777s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.177467e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.169647e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.179284e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.180347e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313133963987] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2833s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2582s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 8192 events => throughput is 3.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2835s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2578s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0257s for 8192 events => throughput is 3.19E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179276862181] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7245s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4500s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2745s for 90112 events => throughput is 3.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7226s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4468s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2758s for 90112 events => throughput is 3.27E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.180913e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.228255e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.242069e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.237646e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2596s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2577s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2454s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0122s for 8192 events => throughput is 6.70E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5740s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4386s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1354s for 90112 events => throughput is 6.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6613s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5168s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1445s for 90112 events => throughput is 6.24E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.359345e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.559338e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.441020e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.496926e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2579s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2465s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2722s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2605s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0117s for 8192 events => throughput is 7.00E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5633s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4375s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1258s for 90112 events => throughput is 7.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5592s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4340s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1252s for 90112 events => throughput is 7.20E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.843445e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.989366e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.005906e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.214148e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050317064561834] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2653s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2499s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2631s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2476s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.29E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182143140752] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6127s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4425s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1702s for 90112 events => throughput is 5.30E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6089s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4384s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1705s for 90112 events => throughput is 5.29E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.123333e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.961081e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.944707e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.937970e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050319131407651] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6566s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6561s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6617s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.48E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801186038252196] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8574s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8516s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 90112 events => throughput is 1.55E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8523s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8464s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 90112 events => throughput is 1.54E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.901744e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.828358e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.487017e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.418352e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.083051e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.091778e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.712159e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.699103e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.067564e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.103721e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.805074e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.771665e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.606078e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.574079e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.001322e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.932223e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 8819a1b530..406946ea16 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_13:08:45 +DATE: 2023-10-29_23:59:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3025s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2324s - [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3015s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2320s + [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3004s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2308s - [COUNTERS] Fortran MEs ( 1 ) : 0.0696s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2983s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2284s + [COUNTERS] Fortran MEs ( 1 ) : 0.0698s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1848s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4220s - [COUNTERS] Fortran MEs ( 1 ) : 0.7628s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1556s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3996s + [COUNTERS] Fortran MEs ( 1 ) : 0.7560s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657206] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3828s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3082s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0746s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3834s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0753s for 8192 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608796] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3233s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5007s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8226s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3329s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5099s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8230s for 90112 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.095408e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106713e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.097494e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.115003e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657201] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3103s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2713s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0389s for 8192 events => throughput is 2.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3287s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2871s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608810] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8953s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4667s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4285s for 90112 events => throughput is 2.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9251s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4869s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4382s for 90112 events => throughput is 2.06E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.018304e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.926102e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.009040e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.940203e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2791s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2562s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0229s for 8192 events => throughput is 3.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2861s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2633s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7088s + [COUNTERS] PROGRAM TOTAL : 1.7068s [COUNTERS] Fortran Overhead ( 0 ) : 1.4549s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2539s for 90112 events => throughput is 3.55E+05 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2519s for 90112 events => throughput is 3.58E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.593623e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.487761e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.486898e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.590355e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -363,8 +363,8 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) [COUNTERS] PROGRAM TOTAL : 0.2738s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0203s for 8192 events => throughput is 4.04E+05 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2540s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0199s for 8192 events => throughput is 4.12E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6819s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4583s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2236s for 90112 events => throughput is 4.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6687s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4454s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2233s for 90112 events => throughput is 4.04E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.970058e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.994889e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.025775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.929775e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2984s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2669s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3021s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2704s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 8192 events => throughput is 2.58E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8465s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4832s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3632s for 90112 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8127s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4617s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3509s for 90112 events => throughput is 2.57E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.433476e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.535867e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.502868e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.478315e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333301029693] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6561s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6555s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6548s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6541s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182637219935] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9667s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9586s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.11E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8602s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.628198e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.592188e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.054061e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.007808e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.525972e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.525016e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.505112e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.529324e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.534479e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.521994e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.802635e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.809421e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.531969e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.534283e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.782148e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.778088e+07 ) sec^-1 TEST COMPLETED From 8caf257be17947e1609378d339da743bcd1a507a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 30 Oct 2023 17:47:55 +0100 Subject: [PATCH 080/119] [oct23av] in CODEGEN patchMad.sh, reorder leading make_ops lines before #end_of_make_opts_variables to improve reproducibility --- epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh index a88ac5cb0a..c96b78dbfc 100755 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh @@ -64,7 +64,10 @@ touch ${dir}/Events/.keep # this file should already be present (mg5amcnlo copie if [ "${patchlevel}" == "2" ]; then cd ${dir} if [ "${tmadmode}" != "0" ]; then - sed -i 's/DEFAULT_F2PY_COMPILER=f2py3.*/DEFAULT_F2PY_COMPILER=f2py3/' Source/make_opts + sed -i 's/DEFAULT_F2PY_COMPILER=f2py.*/DEFAULT_F2PY_COMPILER=f2py3/' Source/make_opts + cat Source/make_opts | sed '/#end/q' | sort > Source/make_opts.new + cat Source/make_opts | sed -n -e '/#end/,$p' >> Source/make_opts.new + \mv Source/make_opts.new Source/make_opts fi echo "DEBUG: cd ${PWD}; patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common" if ! patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common; then status=1; fi From 134b57f5bd5e80728138b5d8ae2ac30557cddf96 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 30 Oct 2023 18:03:27 +0100 Subject: [PATCH 081/119] [oct23av] in CODEGEN, add a quieter (-q) option to generateAndCompare.sh --- epochX/cudacpp/CODEGEN/generateAndCompare.sh | 64 ++++++++++++-------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index e88b6bd5e6..84ef801acc 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -17,7 +17,7 @@ function codeGenAndDiff() cmdext="$2" if [ "$3" != "" ]; then echo -e "INTERNAL ERROR!\nUsage: ${FUNCNAME[0]} []"; exit 1; fi # Process-dependent hardcoded configuration - echo -e "\n================================================================" + echo -e "================================================================" cmd= case "${proc}" in ee_mumu) @@ -318,13 +318,15 @@ gensym EOF done # Compare the existing generated code to the newly generated code for the specific process - pushd ${OUTDIR} >& /dev/null - echo -e "\n+++ Compare old and new code generation log for $proc\n" - ###if diff -c ${proc}.${autosuffix}.BKP/${outproc}_log.txt ${proc}.${autosuffix}; then echo "Old and new code generation logs are identical"; fi # context diff - if diff ${proc}.${autosuffix}.BKP/$(basename ${outproc})_log.txt ${proc}.${autosuffix}; then echo "Old and new code generation logs are identical"; fi # context diff - echo -e "\n+++ Compare old and new generated code for $proc\n" - if $SCRDIR/diffCode.sh ${BRIEF} -r -c ${proc}.${autosuffix}.BKP ${proc}.${autosuffix}; then echo "Old and new generated codes are identical"; else echo -e "\nWARNING! Old and new generated codes differ"; fi - popd >& /dev/null + if [ "$QUIET" != "1" ]; then + pushd ${OUTDIR} >& /dev/null + echo -e "\n+++ Compare old and new code generation log for $proc\n" + ###if diff -c ${proc}.${autosuffix}.BKP/${outproc}_log.txt ${proc}.${autosuffix}; then echo "Old and new code generation logs are identical"; fi # context diff + if diff ${proc}.${autosuffix}.BKP/$(basename ${outproc})_log.txt ${proc}.${autosuffix}; then echo "Old and new code generation logs are identical"; fi # context diff + echo -e "\n+++ Compare old and new generated code for $proc\n" + if $SCRDIR/diffCode.sh ${BRIEF} -r -c ${proc}.${autosuffix}.BKP ${proc}.${autosuffix}; then echo "Old and new generated codes are identical"; else echo -e "\nWARNING! Old and new generated codes differ"; fi + popd >& /dev/null + fi # Compare the existing manually developed code to the newly generated code for the specific process if [ "${OUTBCK}" == "cudacpp" ] || [ "${OUTBCK}" == "gridpack" ]; then pushd ${OUTDIR} >& /dev/null @@ -333,10 +335,12 @@ EOF popd >& /dev/null fi # Print a summary of the available code - echo - echo -e "Manually developed code is\n ${OUTDIR}/${proc}" - echo -e "Old generated code moved to\n ${OUTDIR}/${proc}.${autosuffix}.BKP" - echo -e "New generated code moved to\n ${OUTDIR}/${proc}.${autosuffix}" + if [ "$QUIET" != "1" ]; then + echo + echo -e "Manually developed code is\n ${OUTDIR}/${proc}" + echo -e "Old generated code moved to\n ${OUTDIR}/${proc}.${autosuffix}.BKP" + echo -e "New generated code moved to\n ${OUTDIR}/${proc}.${autosuffix}" + fi } #-------------------------------------------------------------------------------------- @@ -354,7 +358,7 @@ function usage() echo "ERROR! alpaka mode is no longer supported by this script!"; exit 1 else # NB: all options with $SCRBCK=cudacpp use the 311 branch by default and always disable helicity recycling - echo "Usage: $0 [--nobrief] [--cpp|--gpu|--madnovec|--madonly|--mad|--madcpp*|--madgpu] [--nopatch|--upstream] [-c ''] " + echo "Usage: $0 [-q|--nobrief] [--cpp|--gpu|--madnovec|--madonly|--mad|--madcpp*|--madgpu] [--nopatch|--upstream] [-c ''] " echo " (*Note: the --madcpp option exists but code generation fails for it)" echo " (**Note: will be used as a relative path in ${OUTDIR} and should not contain '/' characters" echo "Example: $0 gg_tt --mad" @@ -398,7 +402,8 @@ SCRBCK=$(basename $OUTDIR) # e.g. cudacpp if $OUTDIR=epochX/cudacpp # Default output backend (in the cudacpp directory this can be changed using commad line options like --cpp, --gpu or --mad) OUTBCK=$SCRBCK -# Default: brief diffs (use --nobrief to use full diffs) +# Default: brief diffs (use --nobrief to use full diffs, use -q to be much quieter) +QUIET= BRIEF=--brief # Default for gridpacks: untar gridpack.tar.gz but do not regenerate it (use --nountaronly to regenerate it) @@ -418,8 +423,10 @@ proc= while [ "$1" != "" ]; do if [ "$1" == "-h" ] || [ "$1" == "--help" ]; then usage - elif [ "$1" == "--nobrief" ]; then + elif [ "$1" == "--nobrief" ] && [ "$QUIET" != "1" ]; then BRIEF= + elif [ "$1" == "-q" ] && [ "$BRIEF" != "" ]; then + QUIET=1 elif [ "$1" == "--nopatch" ] && [ "${PATCHLEVEL}" == "" ]; then PATCHLEVEL=--nopatch elif [ "$1" == "--upstream" ] && [ "${PATCHLEVEL}" == "" ]; then @@ -463,6 +470,7 @@ echo "SCRBCK=${SCRBCK} (uppercase=${SCRBCK^^})" echo "OUTBCK=${OUTBCK}" echo "BRIEF=${BRIEF}" +echo "QUIET=${QUIET}" echo "proc=${proc}" # Make sure that python3 is installed @@ -504,16 +512,20 @@ echo -e "\nDefault MG5AMC_HOME=$MG5AMC_HOME on $(hostname)\n" if ! git --version >& /dev/null; then echo -e "ERROR! git is not installed: cannot retrieve git properties of MG5aMC_HOME\n"; exit 1 fi -echo -e "Using $(git --version)" cd ${MG5AMC_HOME} -echo -e "Retrieving git information about MG5AMC_HOME" +if [ "$QUIET" != "1" ]; then + echo -e "Using $(git --version)" + echo -e "Retrieving git information about MG5AMC_HOME" +fi if ! git log -n1 >& /dev/null; then echo -e "ERROR! MG5AMC_HOME is not a git clone\n"; exit 1 fi branch_mg5amc=$(git branch --no-color | \grep ^* | awk '{print $2}') -echo -e "Current git branch of MG5AMC_HOME is '${branch_mg5amc}'" commit_mg5amc=$(git log --oneline -n1 | awk '{print $1}') -echo -e "Current git commit of MG5AMC_HOME is '${commit_mg5amc}'" +if [ "$QUIET" != "1" ]; then + echo -e "Current git branch of MG5AMC_HOME is '${branch_mg5amc}'" + echo -e "Current git commit of MG5AMC_HOME is '${commit_mg5amc}'" +fi cd - > /dev/null # Copy MG5AMC ad-hoc patches if any (unless --upstream is specified) @@ -538,12 +550,14 @@ cleanup_MG5AMC_HOME ###cleanup_MG5AMC_PLUGIN # Print differences in MG5AMC with respect to git after copying ad-hoc patches -cd ${MG5AMC_HOME} -echo -e "\n***************** Differences to the current git commit ${commit_patches} [START]" -###if [ "$(git diff)" == "" ]; then echo -e "[No differences]"; else git diff; fi -if [ "$(git diff)" == "" ]; then echo -e "[No differences]"; else git diff --name-status; fi -echo -e "***************** Differences to the current git commit ${commit_patches} [END]" -cd - > /dev/null +if [ "$QUIET" != "1" ]; then + cd ${MG5AMC_HOME} + echo -e "\n***************** Differences to the current git commit ${commit_patches} [START]" + ###if [ "$(git diff)" == "" ]; then echo -e "[No differences]"; else git diff; fi + if [ "$(git diff)" == "" ]; then echo -e "[No differences]"; else git diff --name-status; fi + echo -e "***************** Differences to the current git commit ${commit_patches} [END]\n" + cd - > /dev/null +fi # Copy the new plugin to MG5AMC_HOME (if the script directory backend is cudacpp or alpaka) #if [ "${SCRBCK}" == "cudacpp" ]; then From 87c43d620743e50f17b9c7acb33668f382959090 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 30 Oct 2023 18:17:47 +0100 Subject: [PATCH 082/119] [oct23av] in CODEGEN, minor improvements in generateAndCompare.sh (nicer dump of MG5 commands) --- epochX/cudacpp/CODEGEN/generateAndCompare.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index 84ef801acc..9d43ada677 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -172,7 +172,7 @@ function codeGenAndDiff() fi echo "set stdout_level DEBUG" >> ${outproc}.mg # does not help (log is essentially identical) but add it anyway echo "set zerowidth_tchannel F" >> ${outproc}.mg # workaround for #476: do not use a zero top quark width in fortran (~E-3 effect on physics) - echo "${cmd}" >> ${outproc}.mg + echo "${cmd}" | sed "s/;/\n/g" | sed "s/ *$//" | sed "s/^ *//" >> ${outproc}.mg if [ "${SCRBCK}" == "gridpack" ]; then # $SCRBCK=$OUTBCK=gridpack ###echo "output ${outproc} ${helrecopt}" >> ${outproc}.mg ###echo "launch" >> ${outproc}.mg @@ -266,7 +266,7 @@ function codeGenAndDiff() cp -dpr ${outprocauto} ${OUTDIR}/${proc}.${autosuffix} echo -e "\nOutput source code has been copied to ${OUTDIR}/${proc}.${autosuffix}" # Add file mg5.in as in Stephan's runCodegen.sh script - cat ${MG5AMC_HOME}/${outproc}.mg | sed "s|${outproc}|${proc}.${autosuffix}|" | sed "s/;/\n/g" | sed "s/ *$//" | sed "s/^ *//" > ${OUTDIR}/${proc}.${autosuffix}/mg5.in + cat ${MG5AMC_HOME}/${outproc}.mg | sed "s|${outproc}|${proc}.${autosuffix}|" > ${OUTDIR}/${proc}.${autosuffix}/mg5.in # Fix build errors which arise because the autogenerated directories are not relocatable (see #400) if [ "${OUTBCK}" == "madnovec" ] || [ "${OUTBCK}" == "madonly" ] || [ "${OUTBCK}" == "mad" ] || [ "${OUTBCK}" == "madcpp" ] || [ "${OUTBCK}" == "madgpu" ]; then cat ${OUTDIR}/${proc}.${autosuffix}/Cards/me5_configuration.txt | sed 's/mg5_path/#mg5_path/' > ${OUTDIR}/${proc}.${autosuffix}/Cards/me5_configuration.txt.new From 8f96c0a29b7a3cfda524c570a876ac2440d99b04 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 30 Oct 2023 19:38:04 +0100 Subject: [PATCH 083/119] [oct23av] in tput/throughputX.sh, BUG FIX (remove the build of topdir/test which is no longer relevant!) --- epochX/cudacpp/tput/throughputX.sh | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index 430a82df5e..f11bc2a612 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -407,13 +407,12 @@ if [ "${maketype}" == "-dryrun" ]; then else - pushd $topdir/test >& /dev/null - echo "Building in $(pwd)" - make; echo # avoid issues with googletest in parallel builds - popd >& /dev/null + # (1) Fixme? build googletest once and for all to avoid issues in parallel builds? + # However, $topdir/test is NO LONGER RELEVANT and googletest must be built from one specific process + # In addition, CXXNAMESUFFIX must be set by cudacpp.mk, so googletest must be built from one P1 directory + # (2) Iterate over all directories (the first one will build googletest) for dir in $dirs; do - export USEBUILDDIR=1 pushd $dir >& /dev/null echo "Building in $(pwd)" @@ -443,7 +442,6 @@ else export HRDCOD= export HELINL= export FPTYPE= - done if [ "${maketype}" == "-makecleanonly" ]; then printf "MAKE CLEANALL COMPLETED\n"; exit 0; fi From 7929623ccba0b5b0b436c642647239b96084c547 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 30 Oct 2023 19:52:07 +0100 Subject: [PATCH 084/119] [oct23av] in CODEGEN, add target gtestlibs in cudacpp.mk to allow the build of googletest alone in an easy way --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 2f4474c1d6..3a75b8063c 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) From cc11757b4fba539b5c2204ece5c2cbc2c63f1e4b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 30 Oct 2023 20:00:14 +0100 Subject: [PATCH 085/119] [oct23av] in CODEGEN finally add allGenerateAndCompare.sh to generate all default 15 processes --- .../cudacpp/CODEGEN/allGenerateAndCompare.sh | 33 +++++++++++++++++++ epochX/cudacpp/CODEGEN/generateAndCompare.sh | 4 +++ 2 files changed, 37 insertions(+) create mode 100755 epochX/cudacpp/CODEGEN/allGenerateAndCompare.sh diff --git a/epochX/cudacpp/CODEGEN/allGenerateAndCompare.sh b/epochX/cudacpp/CODEGEN/allGenerateAndCompare.sh new file mode 100755 index 0000000000..ed73959a27 --- /dev/null +++ b/epochX/cudacpp/CODEGEN/allGenerateAndCompare.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: A. Valassi (Oct 2023) for the MG5aMC CUDACPP plugin. +# Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. + +set -e # fail on error + +cd $(dirname $0)/.. + +./CODEGEN/generateAndCompare.sh -q ee_mumu +./CODEGEN/generateAndCompare.sh -q ee_mumu --mad + +./CODEGEN/generateAndCompare.sh -q gg_tt +./CODEGEN/generateAndCompare.sh -q gg_tt --mad + +./CODEGEN/generateAndCompare.sh -q gg_ttg +./CODEGEN/generateAndCompare.sh -q gg_ttg --mad + +./CODEGEN/generateAndCompare.sh -q gg_ttgg +./CODEGEN/generateAndCompare.sh -q gg_ttgg --mad + +./CODEGEN/generateAndCompare.sh -q gg_ttggg +./CODEGEN/generateAndCompare.sh -q gg_ttggg --mad + +./CODEGEN/generateAndCompare.sh -q gq_ttq +./CODEGEN/generateAndCompare.sh -q gq_ttq --mad + +./CODEGEN/generateAndCompare.sh -q heft_gg_h + +./CODEGEN/generateAndCompare.sh -q gg_tt01g --mad + +./CODEGEN/generateAndCompare.sh -q pp_tt012j --mad diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index 9d43ada677..b68959bc88 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -605,3 +605,7 @@ elif [ "${OUTBCK}" == "mad" ]; then exit 1 fi fi + +echo +echo "********************************************************************************" +echo From 3e2af0197886c6886190f90609a572f8e0808c49 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 30 Oct 2023 21:21:36 +0100 Subject: [PATCH 086/119] [oct23av] in tput/throughputX.sh, improve the bug fix to build gtestlibs (use the newly added gtestlibs target in cudacpp.mk) --- epochX/cudacpp/tput/throughputX.sh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index f11bc2a612..34aa26c45b 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -407,15 +407,19 @@ if [ "${maketype}" == "-dryrun" ]; then else - # (1) Fixme? build googletest once and for all to avoid issues in parallel builds? - # However, $topdir/test is NO LONGER RELEVANT and googletest must be built from one specific process - # In addition, CXXNAMESUFFIX must be set by cudacpp.mk, so googletest must be built from one P1 directory - - # (2) Iterate over all directories (the first one will build googletest) + # Iterate over all directories (the first one will build googletest) + gtestlibs=0 for dir in $dirs; do export USEBUILDDIR=1 pushd $dir >& /dev/null echo "Building in $(pwd)" + if [ "${gtestlibs}" == "0" ]; then + # Build googletest once and for all to avoid issues in parallel builds + # NB1: $topdir/test is NO LONGER RELEVANT and googletest must be built from one specific process + # NB2: CXXNAMESUFFIX must be set by cudacpp.mk, so googletest must be built from one P1 directory + gtestlibs=1 + make -f cudacpp.mk gtestlibs + fi if [ "${maketype}" == "-makeclean" ]; then make cleanall; echo; fi if [ "${maketype}" == "-makecleanonly" ]; then make cleanall; echo; continue; fi for hrdcod in $hrdcods; do From 1ab6b784a69532a370d07815a265e8ea39c83d00 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 30 Oct 2023 22:08:29 +0100 Subject: [PATCH 087/119] [oct23av] regenerate all 8 mad and 7 sa processes with latest CODEGEN (add gtestlibs and an EFT fix in cudacpp.mk, reorder all make_opts, no other code changes) --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 14 ++--- epochX/cudacpp/ee_mumu.mad/Source/make_opts | 11 ++-- .../ee_mumu.mad/SubProcesses/cudacpp.mk | 6 +++ .../CODEGEN_cudacpp_ee_mumu_log.txt | 12 ++--- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 6 +++ .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 12 ++--- epochX/cudacpp/gg_tt.mad/Source/make_opts | 11 ++-- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 6 +++ .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 ++-- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 6 +++ .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 23 ++++---- epochX/cudacpp/gg_tt01g.mad/Source/make_opts | 11 ++-- .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 6 +++ .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 18 +++---- epochX/cudacpp/gg_ttg.mad/Source/make_opts | 11 ++-- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 6 +++ .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 10 ++-- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 6 +++ .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +++---- epochX/cudacpp/gg_ttgg.mad/Source/make_opts | 11 ++-- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 6 +++ .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 ++--- .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 6 +++ .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 +++---- epochX/cudacpp/gg_ttggg.mad/Source/make_opts | 11 ++-- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 6 +++ .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 ++--- .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 6 +++ .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 19 +++---- epochX/cudacpp/gq_ttq.mad/Source/make_opts | 11 ++-- .../gq_ttq.mad/SubProcesses/cudacpp.mk | 6 +++ .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 15 +++--- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 6 +++ .../CODEGEN_cudacpp_heft_gg_h_log.txt | 10 ++-- .../heft_gg_h.sa/SubProcesses/cudacpp.mk | 6 +++ .../cudacpp/heft_gg_h.sa/src/cudacpp_src.mk | 4 +- .../CODEGEN_mad_pp_tt012j_log.txt | 52 +++++++++---------- epochX/cudacpp/pp_tt012j.mad/Source/make_opts | 11 ++-- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 6 +++ 39 files changed, 274 insertions(+), 171 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 082b29214b..7fa45fb028 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005426645278930664  +DEBUG: model prefixing takes 0.005362033843994141  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,19 +191,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.097 s +Wrote files for 8 helas calls in 0.098 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.195 s +ALOHA: aloha creates 3 routines in 0.197 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.248 s +ALOHA: aloha creates 7 routines in 0.250 s FFV1 FFV1 FFV2 @@ -250,9 +250,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.886s +real 0m1.902s user 0m1.610s -sys 0m0.215s +sys 0m0.252s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.mad/Source/make_opts b/epochX/cudacpp/ee_mumu.mad/Source/make_opts index 57f5f7bb96..6409e99a49 100644 --- a/epochX/cudacpp/ee_mumu.mad/Source/make_opts +++ b/epochX/cudacpp/ee_mumu.mad/Source/make_opts @@ -1,12 +1,13 @@ -GLOBAL_FLAG=-O3 -ffast-math -fbounds-check DEFAULT_CPP_COMPILER=g++ -MACFLAG= -STDLIB=-lstdc++ -STDLIB_FLAG= -DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +DEFAULT_F_COMPILER=gfortran +#end_of_make_opts_variables +GLOBAL_FLAG=-O3 -ffast-math -fbounds-check +MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled +STDLIB_FLAG= +STDLIB=-lstdc++ #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 4feb407ff8..a43de6210d 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005356311798095703  +DEBUG: model prefixing takes 0.00529170036315918  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.005 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Load PLUGIN.CUDACPP_OUTPUT @@ -181,7 +181,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.265 s +ALOHA: aloha creates 4 routines in 0.263 s FFV1 FFV1 FFV2 @@ -202,6 +202,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.648s -user 0m0.580s -sys 0m0.063s +real 0m0.767s +user 0m0.591s +sys 0m0.069s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 3967086980..9370a7cd0f 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005898714065551758  +DEBUG: model prefixing takes 0.005549192428588867  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -200,7 +200,7 @@ ALOHA: aloha creates 2 routines in 0.142 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.135 s VVV1 FFV1 FFV1 @@ -239,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.715s -user 0m1.464s -sys 0m0.218s +real 0m1.863s +user 0m1.505s +sys 0m0.227s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.mad/Source/make_opts b/epochX/cudacpp/gg_tt.mad/Source/make_opts index 57f5f7bb96..6409e99a49 100644 --- a/epochX/cudacpp/gg_tt.mad/Source/make_opts +++ b/epochX/cudacpp/gg_tt.mad/Source/make_opts @@ -1,12 +1,13 @@ -GLOBAL_FLAG=-O3 -ffast-math -fbounds-check DEFAULT_CPP_COMPILER=g++ -MACFLAG= -STDLIB=-lstdc++ -STDLIB_FLAG= -DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +DEFAULT_F_COMPILER=gfortran +#end_of_make_opts_variables +GLOBAL_FLAG=-O3 -ffast-math -fbounds-check +MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled +STDLIB_FLAG= +STDLIB=-lstdc++ #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 28306d5b90..01c5f35334 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005451679229736328  +DEBUG: model prefixing takes 0.005320310592651367  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.148 s VVV1 FFV1 FFV1 @@ -197,6 +197,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.589s -user 0m0.464s -sys 0m0.056s +real 0m0.645s +user 0m0.480s +sys 0m0.053s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 5e19f38510..5ce931bf1e 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -58,11 +58,11 @@ The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F -generate g g > t t~; add process g g > t t~ g +generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005849123001098633  +DEBUG: model prefixing takes 0.005847454071044922  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,8 +155,9 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams +add process g g > t t~ g INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED @@ -184,7 +185,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -201,7 +202,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,14 +218,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.240 s +Wrote files for 46 helas calls in 0.241 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.322 s +ALOHA: aloha creates 5 routines in 0.320 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -232,7 +233,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.312 s +ALOHA: aloha creates 10 routines in 0.305 s VVV1 VVV1 FFV1 @@ -284,9 +285,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.335s -user 0m2.111s -sys 0m0.220s +real 0m2.290s +user 0m2.032s +sys 0m0.236s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/make_opts b/epochX/cudacpp/gg_tt01g.mad/Source/make_opts index 57f5f7bb96..6409e99a49 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Source/make_opts +++ b/epochX/cudacpp/gg_tt01g.mad/Source/make_opts @@ -1,12 +1,13 @@ -GLOBAL_FLAG=-O3 -ffast-math -fbounds-check DEFAULT_CPP_COMPILER=g++ -MACFLAG= -STDLIB=-lstdc++ -STDLIB_FLAG= -DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +DEFAULT_F_COMPILER=gfortran +#end_of_make_opts_variables +GLOBAL_FLAG=-O3 -ffast-math -fbounds-check +MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled +STDLIB_FLAG= +STDLIB=-lstdc++ #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index de38108943..85ab3645db 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005337953567504883  +DEBUG: model prefixing takes 0.00560450553894043  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,14 +191,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.146 s +Wrote files for 36 helas calls in 0.147 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.321 s +ALOHA: aloha creates 5 routines in 0.325 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.309 s VVV1 VVV1 FFV1 @@ -254,9 +254,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.143s -user 0m1.908s -sys 0m0.230s +real 0m3.233s +user 0m1.929s +sys 0m0.241s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/Source/make_opts b/epochX/cudacpp/gg_ttg.mad/Source/make_opts index 57f5f7bb96..6409e99a49 100644 --- a/epochX/cudacpp/gg_ttg.mad/Source/make_opts +++ b/epochX/cudacpp/gg_ttg.mad/Source/make_opts @@ -1,12 +1,13 @@ -GLOBAL_FLAG=-O3 -ffast-math -fbounds-check DEFAULT_CPP_COMPILER=g++ -MACFLAG= -STDLIB=-lstdc++ -STDLIB_FLAG= -DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +DEFAULT_F_COMPILER=gfortran +#end_of_make_opts_variables +GLOBAL_FLAG=-O3 -ffast-math -fbounds-check +MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled +STDLIB_FLAG= +STDLIB=-lstdc++ #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index ddd57aa8e0..579bb1b9cf 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005626201629638672  +DEBUG: model prefixing takes 0.005326509475708008  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.321 s +ALOHA: aloha creates 5 routines in 0.322 s VVV1 VVV1 FFV1 @@ -205,6 +205,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.792s -user 0m0.705s -sys 0m0.057s +real 0m1.065s +user 0m0.708s +sys 0m0.052s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 843fa9b46f..869cf86dfe 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005464076995849609  +DEBUG: model prefixing takes 0.0052950382232666016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.156 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.421 s -Wrote files for 222 helas calls in 0.680 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s +Wrote files for 222 helas calls in 0.682 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.325 s +ALOHA: aloha creates 5 routines in 0.327 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.314 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -257,9 +257,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.241s -user 0m2.970s -sys 0m0.249s +real 0m3.234s +user 0m2.983s +sys 0m0.244s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/make_opts b/epochX/cudacpp/gg_ttgg.mad/Source/make_opts index 57f5f7bb96..6409e99a49 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Source/make_opts +++ b/epochX/cudacpp/gg_ttgg.mad/Source/make_opts @@ -1,12 +1,13 @@ -GLOBAL_FLAG=-O3 -ffast-math -fbounds-check DEFAULT_CPP_COMPILER=g++ -MACFLAG= -STDLIB=-lstdc++ -STDLIB_FLAG= -DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +DEFAULT_F_COMPILER=gfortran +#end_of_make_opts_variables +GLOBAL_FLAG=-O3 -ffast-math -fbounds-check +MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled +STDLIB_FLAG= +STDLIB=-lstdc++ #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 753184fea8..de0b790f71 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005289793014526367  +DEBUG: model prefixing takes 0.005513191223144531  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.159 s +1 processes with 123 diagrams generated in 0.156 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.421 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.415 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.313 s +ALOHA: aloha creates 5 routines in 0.318 s VVV1 VVV1 FFV1 @@ -208,6 +208,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m1.487s -user 0m1.350s -sys 0m0.062s +real 0m1.502s +user 0m1.341s +sys 0m0.065s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 14b1e83a4e..f3a2f68dc9 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00543212890625  +DEBUG: model prefixing takes 0.005324363708496094  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.854 s +1 processes with 1240 diagrams generated in 1.841 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.461 s -Wrote files for 2281 helas calls in 18.375 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.486 s +Wrote files for 2281 helas calls in 18.712 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.311 s +ALOHA: aloha creates 5 routines in 0.335 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.327 s VVV1 VVV1 FFV1 @@ -259,9 +259,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m28.887s -user 0m28.381s -sys 0m0.404s +real 0m29.340s +user 0m28.825s +sys 0m0.401s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/make_opts b/epochX/cudacpp/gg_ttggg.mad/Source/make_opts index 57f5f7bb96..6409e99a49 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Source/make_opts +++ b/epochX/cudacpp/gg_ttggg.mad/Source/make_opts @@ -1,12 +1,13 @@ -GLOBAL_FLAG=-O3 -ffast-math -fbounds-check DEFAULT_CPP_COMPILER=g++ -MACFLAG= -STDLIB=-lstdc++ -STDLIB_FLAG= -DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +DEFAULT_F_COMPILER=gfortran +#end_of_make_opts_variables +GLOBAL_FLAG=-O3 -ffast-math -fbounds-check +MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled +STDLIB_FLAG= +STDLIB=-lstdc++ #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 938b434a63..1fad08a270 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052831172943115234  +DEBUG: model prefixing takes 0.0054934024810791016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.893 s +1 processes with 1240 diagrams generated in 1.864 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.593 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.563 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.344 s +ALOHA: aloha creates 5 routines in 0.358 s VVV1 VVV1 FFV1 @@ -208,6 +208,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m13.247s -user 0m12.792s -sys 0m0.117s +real 0m13.203s +user 0m12.914s +sys 0m0.127s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 30f3409999..d1d8f03b76 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -58,10 +58,10 @@ The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F -define q = u c d s u~ c~ d~ s~; generate g q > t t~ q +define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053250789642333984  +DEBUG: model prefixing takes 0.005591630935668945  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,6 +150,7 @@ Defined multiparticle vl = ve vm vt Defined multiparticle vl~ = ve~ vm~ vt~ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ e+ mu+ t b t~ b~ z w+ h w- ta- ta+ Defined multiparticle q = u c d s u~ c~ d~ s~ +generate g q > t t~ q INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED @@ -169,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.076 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -197,7 +198,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -214,7 +215,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -229,12 +230,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s Wrote files for 32 helas calls in 0.215 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.142 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines @@ -295,8 +296,8 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.002s -user 0m1.736s +real 0m2.149s +user 0m1.674s sys 0m0.228s ************************************************************ * * diff --git a/epochX/cudacpp/gq_ttq.mad/Source/make_opts b/epochX/cudacpp/gq_ttq.mad/Source/make_opts index 57f5f7bb96..6409e99a49 100644 --- a/epochX/cudacpp/gq_ttq.mad/Source/make_opts +++ b/epochX/cudacpp/gq_ttq.mad/Source/make_opts @@ -1,12 +1,13 @@ -GLOBAL_FLAG=-O3 -ffast-math -fbounds-check DEFAULT_CPP_COMPILER=g++ -MACFLAG= -STDLIB=-lstdc++ -STDLIB_FLAG= -DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +DEFAULT_F_COMPILER=gfortran +#end_of_make_opts_variables +GLOBAL_FLAG=-O3 -ffast-math -fbounds-check +MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled +STDLIB_FLAG= +STDLIB=-lstdc++ #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 1894407637..2cd643e021 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -58,10 +58,10 @@ The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F -define q = u c d s u~ c~ d~ s~; generate g q > t t~ q +define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005591869354248047  +DEBUG: model prefixing takes 0.0052640438079833984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,6 +150,7 @@ Defined multiparticle vl = ve vm vt Defined multiparticle vl~ = ve~ vm~ vt~ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ e+ mu+ t b t~ b~ z w+ h w- ta- ta+ Defined multiparticle q = u c d s u~ c~ d~ s~ +generate g q > t t~ q INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED @@ -169,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.076 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -210,7 +211,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.144 s +ALOHA: aloha creates 2 routines in 0.140 s FFV1 FFV1 FFV1 @@ -228,6 +229,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.998s -user 0m0.583s -sys 0m0.054s +real 0m0.783s +user 0m0.565s +sys 0m0.062s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index f9fb551ac7..87bea43991 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -58,9 +58,10 @@ The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F -set auto_convert_model T; import model heft; generate g g > h +set auto_convert_model T save options auto_convert_model save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +import model heft INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -123,6 +124,7 @@ Defined multiparticle l- = e- mu- Defined multiparticle vl = ve vm vt Defined multiparticle vl~ = ve~ vm~ vt~ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ e+ mu+ t b t~ b~ z w+ h h1 w- ta- ta+ +generate g g > h INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: g g > h HIG<=1 HIW<=1 WEIGHTED<=2 @1 @@ -165,6 +167,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.582s -user 0m0.368s -sys 0m0.042s +real 0m0.417s +user 0m0.362s +sys 0m0.046s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) diff --git a/epochX/cudacpp/heft_gg_h.sa/src/cudacpp_src.mk b/epochX/cudacpp/heft_gg_h.sa/src/cudacpp_src.mk index d4cc628aec..0bd815c9b3 100644 --- a/epochX/cudacpp/heft_gg_h.sa/src/cudacpp_src.mk +++ b/epochX/cudacpp/heft_gg_h.sa/src/cudacpp_src.mk @@ -257,9 +257,9 @@ $(BUILDDIR)/%_cu.o : %.cc *.h $(BUILDDIR)/.build.$(TAG) #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, Parameters_sm.o read_slha.o) +cxx_objects=$(addprefix $(BUILDDIR)/, Parameters_heft.o read_slha.o) ifneq ($(NVCC),) -cu_objects=$(addprefix $(BUILDDIR)/, Parameters_sm_cu.o) +cu_objects=$(addprefix $(BUILDDIR)/, Parameters_heft_cu.o) endif # Target (and build rules): common (src) library diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index a1f590687e..f5d02691c0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005295515060424805  +DEBUG: model prefixing takes 0.005496025085449219  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.793 s +65 processes with 1119 diagrams generated in 1.800 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,8 +801,8 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.263 s -Wrote files for 810 helas calls in 3.193 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.265 s +Wrote files for 810 helas calls in 3.185 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.309 s VVV1 VVV1 FFV1 @@ -1030,9 +1030,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m8.705s -user 0m8.197s -sys 0m0.465s +real 0m8.698s +user 0m8.189s +sys 0m0.479s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/make_opts b/epochX/cudacpp/pp_tt012j.mad/Source/make_opts index 57f5f7bb96..6409e99a49 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Source/make_opts +++ b/epochX/cudacpp/pp_tt012j.mad/Source/make_opts @@ -1,12 +1,13 @@ -GLOBAL_FLAG=-O3 -ffast-math -fbounds-check DEFAULT_CPP_COMPILER=g++ -MACFLAG= -STDLIB=-lstdc++ -STDLIB_FLAG= -DEFAULT_F_COMPILER=gfortran DEFAULT_F2PY_COMPILER=f2py3 +DEFAULT_F_COMPILER=gfortran +#end_of_make_opts_variables +GLOBAL_FLAG=-O3 -ffast-math -fbounds-check +MACFLAG= MG5AMC_VERSION=SpecifiedByMG5aMCAtRunTime PYTHIA8_PATH=NotInstalled +STDLIB_FLAG= +STDLIB=-lstdc++ #end_of_make_opts_variables BIASLIBDIR=../../../lib/ diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index 0d604e747c..471a7dec51 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -706,9 +706,15 @@ $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_object $(NVCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 $(GTESTLIBS): ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else $(MAKE) -C $(TESTDIR) From 6c12785385919d544d1bc26ee4dba8152e4fe490 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 30 Oct 2023 22:17:57 +0100 Subject: [PATCH 088/119] [oct23av] in tput/throughputX.sh, further improve the bug fix to build gtestlibs (skip gtestlibs build during makecleanonly) --- epochX/cudacpp/tput/throughputX.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index 34aa26c45b..8160f7fbb9 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -413,7 +413,7 @@ else export USEBUILDDIR=1 pushd $dir >& /dev/null echo "Building in $(pwd)" - if [ "${gtestlibs}" == "0" ]; then + if [ "${maketype}" != "-makecleanonly" ] && [ "${gtestlibs}" == "0" ]; then # Build googletest once and for all to avoid issues in parallel builds # NB1: $topdir/test is NO LONGER RELEVANT and googletest must be built from one specific process # NB2: CXXNAMESUFFIX must be set by cudacpp.mk, so googletest must be built from one P1 directory From 30fbebb92f62d5b802b9da9a99f92e3778c1ba31 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 31 Oct 2023 09:39:34 +0100 Subject: [PATCH 089/119] [oct23av] rerun 78 tput tests, with FPEs enabled in the check executable - some failures in ggttg f/m and gqttq f (#783), no change in performance STARTED AT Mon Oct 30 10:32:51 PM CET 2023 ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean ENDED(1) AT Mon Oct 30 10:56:58 PM CET 2023 [Status=2] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Mon Oct 30 11:06:26 PM CET 2023 [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Mon Oct 30 11:15:35 PM CET 2023 [Status=2] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Mon Oct 30 11:18:48 PM CET 2023 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ENDED(5) AT Mon Oct 30 11:22:00 PM CET 2023 [Status=0] --- .../log_eemumu_mad_d_inl0_hrd0.txt | 98 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 98 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 98 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 98 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 98 +++++++-------- .../log_eemumu_mad_d_inl0_hrd1.txt | 98 +++++++-------- .../log_eemumu_mad_d_inl1_hrd0.txt | 98 +++++++-------- .../log_eemumu_mad_d_inl1_hrd1.txt | 98 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0.txt | 98 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 98 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 98 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 98 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 98 +++++++-------- .../log_eemumu_mad_f_inl0_hrd1.txt | 98 +++++++-------- .../log_eemumu_mad_f_inl1_hrd0.txt | 98 +++++++-------- .../log_eemumu_mad_f_inl1_hrd1.txt | 98 +++++++-------- .../log_eemumu_mad_m_inl0_hrd0.txt | 98 +++++++-------- .../log_eemumu_mad_m_inl0_hrd1.txt | 98 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0.txt | 98 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 98 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 98 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 98 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 98 +++++++-------- .../log_ggtt_mad_d_inl0_hrd1.txt | 98 +++++++-------- .../log_ggtt_mad_d_inl1_hrd0.txt | 98 +++++++-------- .../log_ggtt_mad_d_inl1_hrd1.txt | 98 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0.txt | 98 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 98 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 98 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 98 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 98 +++++++-------- .../log_ggtt_mad_f_inl0_hrd1.txt | 98 +++++++-------- .../log_ggtt_mad_f_inl1_hrd0.txt | 98 +++++++-------- .../log_ggtt_mad_f_inl1_hrd1.txt | 98 +++++++-------- .../log_ggtt_mad_m_inl0_hrd0.txt | 98 +++++++-------- .../log_ggtt_mad_m_inl0_hrd1.txt | 98 +++++++-------- .../log_ggttg_mad_d_inl0_hrd0.txt | 112 +++++++++--------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 112 +++++++++--------- .../log_ggttg_mad_d_inl0_hrd1.txt | 112 +++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 48 ++++---- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 48 ++++---- .../log_ggttg_mad_f_inl0_hrd1.txt | 48 ++++---- .../log_ggttg_mad_m_inl0_hrd0.txt | 48 ++++---- .../log_ggttg_mad_m_inl0_hrd1.txt | 48 ++++---- .../log_ggttgg_mad_d_inl0_hrd0.txt | 112 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 112 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 112 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 112 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 112 +++++++++--------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 112 +++++++++--------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 112 +++++++++--------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 112 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 112 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 112 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 112 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 112 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 112 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 112 +++++++++--------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 112 +++++++++--------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 112 +++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 112 +++++++++--------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 112 +++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 112 +++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 112 +++++++++--------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 112 +++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 112 +++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 112 +++++++++--------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 112 +++++++++--------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 112 +++++++++--------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 112 +++++++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 112 +++++++++--------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 112 +++++++++--------- .../log_gqttq_mad_d_inl0_hrd1.txt | 112 +++++++++--------- .../log_gqttq_mad_f_inl0_hrd0.txt | 104 ++++++++-------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 104 ++++++++-------- .../log_gqttq_mad_f_inl0_hrd1.txt | 104 ++++++++-------- .../log_gqttq_mad_m_inl0_hrd0.txt | 112 +++++++++--------- .../log_gqttq_mad_m_inl0_hrd1.txt | 112 +++++++++--------- 78 files changed, 4100 insertions(+), 3788 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index fcb1a09690..d9773fa9e0 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_22:52:42 +DATE: 2023-10-30_22:38:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.441278e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.808585e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.995356e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.999458e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.940961e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.067088e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.659041 sec - 2,655,944,917 cycles # 2.986 GHz - 4,091,509,662 instructions # 1.54 insn per cycle - 0.949905999 seconds time elapsed +TOTAL : 0.786611 sec + 2,626,513,820 cycles # 3.012 GHz + 4,089,953,760 instructions # 1.56 insn per cycle + 1.073481584 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.133342e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.331411e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.331411e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.125739e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.324458e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.324458e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.947606 sec - 18,307,117,078 cycles # 3.078 GHz - 44,036,437,117 instructions # 2.41 insn per cycle - 5.953000211 seconds time elapsed +TOTAL : 5.985518 sec + 18,327,786,554 cycles # 3.060 GHz + 44,036,033,273 instructions # 2.40 insn per cycle + 5.990684166 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.674713e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.194936e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.194936e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.691045e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.213177e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.213177e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.155089 sec - 12,769,294,041 cycles # 3.071 GHz - 31,001,341,425 instructions # 2.43 insn per cycle - 4.160412827 seconds time elapsed +TOTAL : 4.113874 sec + 12,767,822,386 cycles # 3.101 GHz + 31,002,879,427 instructions # 2.43 insn per cycle + 4.119112324 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.069533e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.891926e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.891926e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.100650e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.950531e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.950531e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.433544 sec - 10,048,236,563 cycles # 2.924 GHz - 19,377,551,834 instructions # 1.93 insn per cycle - 3.439026139 seconds time elapsed +TOTAL : 3.380885 sec + 10,086,974,506 cycles # 2.980 GHz + 19,377,326,262 instructions # 1.92 insn per cycle + 3.386108586 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.117957e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.984406e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.984406e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.189272e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.095103e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.095103e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.366988 sec - 9,747,736,709 cycles # 2.891 GHz - 18,995,639,372 instructions # 1.95 insn per cycle - 3.372542324 seconds time elapsed +TOTAL : 3.258705 sec + 9,718,547,501 cycles # 2.979 GHz + 19,005,874,298 instructions # 1.96 insn per cycle + 3.263850249 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.859802e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.480378e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.480378e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.790268e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.367426e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.367426e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.777564 sec - 8,636,054,672 cycles # 2.284 GHz - 15,738,754,146 instructions # 1.82 insn per cycle - 3.782897527 seconds time elapsed +TOTAL : 3.915927 sec + 8,595,619,480 cycles # 2.193 GHz + 15,738,404,294 instructions # 1.83 insn per cycle + 3.921168735 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index f0a5d61068..6c990677ae 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:25:38 +DATE: 2023-10-30_23:09:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.723996e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.742880e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.742880e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.798784e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.770897e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.770897e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.230974 sec - 7,379,150,488 cycles # 2.984 GHz - 13,205,445,723 instructions # 1.79 insn per cycle - 2.529344428 seconds time elapsed +TOTAL : 2.186945 sec + 7,434,601,508 cycles # 3.065 GHz + 13,256,508,970 instructions # 1.78 insn per cycle + 2.483222320 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -82,14 +86,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.092591e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.273879e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273879e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.085908e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.266356e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.266356e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.358221 sec - 19,566,096,744 cycles # 3.075 GHz - 44,262,224,036 instructions # 2.26 insn per cycle - 6.364626121 seconds time elapsed +TOTAL : 6.396306 sec + 19,502,356,005 cycles # 3.047 GHz + 44,261,832,646 instructions # 2.27 insn per cycle + 6.402537575 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -110,14 +114,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.592498e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.055287e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.055287e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.605106e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.073231e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.073231e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.554785 sec - 14,092,448,650 cycles # 3.091 GHz - 31,845,159,601 instructions # 2.26 insn per cycle - 4.561253879 seconds time elapsed +TOTAL : 4.510790 sec + 14,025,769,628 cycles # 3.106 GHz + 31,843,541,012 instructions # 2.27 insn per cycle + 4.517109862 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -138,14 +142,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.943333e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.647219e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.647219e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.966793e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.679363e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.679363e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.850255 sec - 11,400,433,618 cycles # 2.957 GHz - 20,739,426,744 instructions # 1.82 insn per cycle - 3.856612056 seconds time elapsed +TOTAL : 3.795442 sec + 11,317,692,643 cycles # 2.978 GHz + 20,737,726,615 instructions # 1.83 insn per cycle + 3.801846848 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -166,14 +170,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.027888e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.786547e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.786547e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.980473e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.725571e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.725571e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.709173 sec - 10,999,574,658 cycles # 2.961 GHz - 20,354,799,189 instructions # 1.85 insn per cycle - 3.715908173 seconds time elapsed +TOTAL : 3.783151 sec + 10,943,929,913 cycles # 2.889 GHz + 20,366,389,809 instructions # 1.86 insn per cycle + 3.789497899 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -194,14 +198,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.731760e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.259965e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.259965e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.744882e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.277335e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.277335e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.246654 sec - 9,990,693,259 cycles # 2.349 GHz - 16,883,565,363 instructions # 1.69 insn per cycle - 4.253083527 seconds time elapsed +TOTAL : 4.213123 sec + 9,903,146,048 cycles # 2.348 GHz + 16,883,717,328 instructions # 1.70 insn per cycle + 4.219745362 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index f947eb5e4f..6344cc7ba8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:38:09 +DATE: 2023-10-30_23:22:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.828756e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.593601e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.985424e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.825460e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.615556e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.978319e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.301764 sec - 4,658,061,955 cycles # 3.045 GHz - 7,310,344,851 instructions # 1.57 insn per cycle - 1.586710861 seconds time elapsed +TOTAL : 1.303776 sec + 4,657,124,266 cycles # 3.040 GHz + 7,186,636,754 instructions # 1.54 insn per cycle + 1.588682023 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.139293e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.337431e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.337431e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.134096e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.332205e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.332205e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.266741 sec - 19,392,539,175 cycles # 3.092 GHz - 44,137,489,256 instructions # 2.28 insn per cycle - 6.271951422 seconds time elapsed +TOTAL : 6.296140 sec + 19,421,926,510 cycles # 3.083 GHz + 44,137,585,352 instructions # 2.27 insn per cycle + 6.301319249 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.679246e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.201189e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.201189e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.651480e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.154634e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.154634e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.486828 sec - 13,867,056,584 cycles # 3.088 GHz - 31,003,527,990 instructions # 2.24 insn per cycle - 4.491941017 seconds time elapsed +TOTAL : 4.560540 sec + 13,873,540,395 cycles # 3.044 GHz + 31,009,056,044 instructions # 2.24 insn per cycle + 4.565789838 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.048775e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.856548e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.856548e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.097326e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.928723e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.928723e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.823093 sec - 11,134,861,193 cycles # 2.909 GHz - 19,279,222,948 instructions # 1.73 insn per cycle - 3.828263919 seconds time elapsed +TOTAL : 3.742619 sec + 11,169,147,793 cycles # 2.981 GHz + 19,279,405,520 instructions # 1.73 insn per cycle + 3.747767217 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.168575e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.075543e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.075543e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.190762e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.098141e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.098141e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.648315 sec - 10,872,837,389 cycles # 2.977 GHz - 18,709,340,835 instructions # 1.72 insn per cycle - 3.653593087 seconds time elapsed +TOTAL : 3.617262 sec + 10,818,196,768 cycles # 2.987 GHz + 18,695,491,265 instructions # 1.73 insn per cycle + 3.622515413 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.850581e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.463998e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.463998e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.865525e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.485419e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.485419e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.155365 sec - 9,750,987,746 cycles # 2.345 GHz - 15,439,079,476 instructions # 1.58 insn per cycle - 4.160593254 seconds time elapsed +TOTAL : 4.125743 sec + 9,708,020,978 cycles # 2.351 GHz + 15,438,456,670 instructions # 1.59 insn per cycle + 4.131079365 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 1a9ec3322a..3aefc59966 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:35:02 +DATE: 2023-10-30_23:19:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.853721e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.668709e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.056055e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.850388e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.635176e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.005200e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.960035 sec - 3,582,011,015 cycles # 3.020 GHz - 7,167,810,914 instructions # 2.00 insn per cycle - 1.244702782 seconds time elapsed +TOTAL : 0.953840 sec + 3,625,690,902 cycles # 3.045 GHz + 7,247,981,304 instructions # 2.00 insn per cycle + 1.247604337 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.141208e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.339644e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.339644e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.136488e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.334373e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.334373e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.916888 sec - 18,292,936,252 cycles # 3.089 GHz - 44,034,839,530 instructions # 2.41 insn per cycle - 5.922158252 seconds time elapsed +TOTAL : 5.934612 sec + 18,310,624,957 cycles # 3.084 GHz + 44,036,944,062 instructions # 2.40 insn per cycle + 5.939841225 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.682506e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.200241e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.200241e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.688951e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.208841e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.208841e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.132115 sec - 12,770,466,467 cycles # 3.087 GHz - 31,001,130,857 instructions # 2.43 insn per cycle - 4.137366192 seconds time elapsed +TOTAL : 4.120879 sec + 12,781,805,897 cycles # 3.099 GHz + 31,000,901,178 instructions # 2.43 insn per cycle + 4.125959736 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.092532e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.926894e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.926894e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.109438e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.946978e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.946978e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.397172 sec - 10,064,493,963 cycles # 2.959 GHz - 19,377,092,544 instructions # 1.93 insn per cycle - 3.402411959 seconds time elapsed +TOTAL : 3.371386 sec + 10,049,111,074 cycles # 2.977 GHz + 19,378,382,951 instructions # 1.93 insn per cycle + 3.376527896 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.185793e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.080619e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.080619e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.185993e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.087896e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.087896e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.263660 sec - 9,723,240,197 cycles # 2.975 GHz - 19,005,869,214 instructions # 1.95 insn per cycle - 3.268973832 seconds time elapsed +TOTAL : 3.266619 sec + 9,718,371,404 cycles # 2.971 GHz + 18,994,616,347 instructions # 1.95 insn per cycle + 3.271821523 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.878162e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.510088e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.510088e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.896345e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.528531e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.528531e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.740304 sec - 8,619,487,033 cycles # 2.302 GHz - 15,737,558,338 instructions # 1.83 insn per cycle - 3.745598337 seconds time elapsed +TOTAL : 3.708220 sec + 8,592,027,434 cycles # 2.315 GHz + 15,737,406,155 instructions # 1.83 insn per cycle + 3.713476400 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index aca39aea62..dce8f8cf57 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:31:53 +DATE: 2023-10-30_23:15:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.246166e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.586744e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.910156e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.295907e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.598257e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.930198e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.829625 sec - 6,294,255,275 cycles # 3.060 GHz - 11,456,031,669 instructions # 1.82 insn per cycle - 2.113395829 seconds time elapsed +TOTAL : 1.822441 sec + 6,267,828,253 cycles # 3.060 GHz + 11,571,964,951 instructions # 1.85 insn per cycle + 2.106182309 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -75,14 +79,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.129837e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.329251e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329251e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.122306e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.317017e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.317017e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.963077 sec - 18,329,614,420 cycles # 3.072 GHz - 44,034,602,066 instructions # 2.40 insn per cycle - 5.968302668 seconds time elapsed +TOTAL : 6.006713 sec + 18,284,116,238 cycles # 3.042 GHz + 44,035,103,509 instructions # 2.41 insn per cycle + 6.011881366 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -102,14 +106,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.694414e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.218473e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.218473e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.695489e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.215832e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.215832e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.104338 sec - 12,764,499,292 cycles # 3.107 GHz - 31,000,842,012 instructions # 2.43 insn per cycle - 4.109565892 seconds time elapsed +TOTAL : 4.104459 sec + 12,751,493,356 cycles # 3.104 GHz + 31,001,134,395 instructions # 2.43 insn per cycle + 4.109688724 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,14 +133,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.095276e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.926686e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.926686e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.115266e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.947026e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.947026e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.389760 sec - 10,099,043,785 cycles # 2.976 GHz - 19,377,613,301 instructions # 1.92 insn per cycle - 3.395069920 seconds time elapsed +TOTAL : 3.360992 sec + 10,038,778,756 cycles # 2.983 GHz + 19,377,215,963 instructions # 1.93 insn per cycle + 3.366349925 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -156,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.181551e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.082266e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.082266e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.188854e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.095807e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.095807e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.268062 sec - 9,751,904,057 cycles # 2.980 GHz - 19,005,554,616 instructions # 1.95 insn per cycle - 3.273237329 seconds time elapsed +TOTAL : 3.263860 sec + 9,699,497,416 cycles # 2.968 GHz + 18,994,878,614 instructions # 1.96 insn per cycle + 3.269116510 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -183,14 +187,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.820421e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.422717e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.422717e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.887543e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.510355e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.510355e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.855030 sec - 8,625,575,408 cycles # 2.239 GHz - 15,740,744,925 instructions # 1.82 insn per cycle - 3.860342979 seconds time elapsed +TOTAL : 3.724044 sec + 8,601,500,033 cycles # 2.307 GHz + 15,737,301,137 instructions # 1.83 insn per cycle + 3.729408714 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 6298beca69..34fd48853a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_22:53:15 +DATE: 2023-10-30_22:39:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.449613e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.831842e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.043087e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.002468e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.955424e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.101943e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.653604 sec - 2,663,258,576 cycles # 3.018 GHz - 4,119,868,167 instructions # 1.55 insn per cycle - 0.942343744 seconds time elapsed +TOTAL : 0.642654 sec + 2,632,177,698 cycles # 3.036 GHz + 4,095,145,359 instructions # 1.56 insn per cycle + 0.928848819 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.200280e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.420964e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.420964e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.197679e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.417716e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.417716e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.635099 sec - 17,435,146,635 cycles # 3.092 GHz - 41,881,003,220 instructions # 2.40 insn per cycle - 5.640492301 seconds time elapsed +TOTAL : 5.648487 sec + 17,453,071,160 cycles # 3.088 GHz + 41,883,122,891 instructions # 2.40 insn per cycle + 5.653445764 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.732827e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.289974e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.289974e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.734097e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.283067e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.283067e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.025576 sec - 12,500,170,045 cycles # 3.102 GHz - 30,164,396,598 instructions # 2.41 insn per cycle - 4.031081957 seconds time elapsed +TOTAL : 4.022159 sec + 12,443,939,665 cycles # 3.091 GHz + 30,163,658,084 instructions # 2.42 insn per cycle + 4.027468023 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.083721e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.918251e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.918251e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.127395e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.983414e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.983414e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.414417 sec - 9,979,865,065 cycles # 2.922 GHz - 19,111,810,651 instructions # 1.92 insn per cycle - 3.419772488 seconds time elapsed +TOTAL : 3.344989 sec + 9,961,675,461 cycles # 2.974 GHz + 19,109,589,410 instructions # 1.92 insn per cycle + 3.350100783 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1930) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.188158e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.092301e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.092301e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.208451e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.135186e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.135186e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.260474 sec - 9,653,421,879 cycles # 2.957 GHz - 18,775,249,287 instructions # 1.94 insn per cycle - 3.265820478 seconds time elapsed +TOTAL : 3.233377 sec + 9,653,095,855 cycles # 2.981 GHz + 18,775,836,987 instructions # 1.95 insn per cycle + 3.238819698 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1661) (512y: 178) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.921655e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.594276e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.594276e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.882766e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.524894e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.524894e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.664294 sec - 8,459,612,466 cycles # 2.306 GHz - 15,613,549,898 instructions # 1.85 insn per cycle - 3.669595515 seconds time elapsed +TOTAL : 3.739273 sec + 8,432,587,246 cycles # 2.253 GHz + 15,614,997,197 instructions # 1.85 insn per cycle + 3.744502296 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 886) (512y: 156) (512z: 1239) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index e44106da5e..b66677f31e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl1_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:15:15 +DATE: 2023-10-30_22:58:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.506979e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.575771e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.024770e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.821878e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.665537e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.054112e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.673639 sec - 2,731,565,956 cycles # 3.015 GHz - 4,234,870,315 instructions # 1.55 insn per cycle - 0.968176807 seconds time elapsed +TOTAL : 0.673776 sec + 2,711,188,081 cycles # 2.996 GHz + 4,231,389,791 instructions # 1.56 insn per cycle + 0.964580036 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.651561e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.117234e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.117234e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.701921e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.180269e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.180269e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.215246 sec - 12,681,764,053 cycles # 3.006 GHz - 32,577,102,204 instructions # 2.57 insn per cycle - 4.220638940 seconds time elapsed +TOTAL : 4.094769 sec + 12,660,917,906 cycles # 3.089 GHz + 32,576,964,737 instructions # 2.57 insn per cycle + 4.099999084 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.153203e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.080454e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.080454e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.158145e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.082236e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.082236e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.317199 sec - 10,262,912,467 cycles # 3.090 GHz - 24,505,351,557 instructions # 2.39 insn per cycle - 3.322594205 seconds time elapsed +TOTAL : 3.309714 sec + 10,251,966,583 cycles # 3.093 GHz + 24,505,268,154 instructions # 2.39 insn per cycle + 3.315131551 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.274293e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.309246e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.309246e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.243411e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.297771e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.297771e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.166884 sec - 9,109,096,651 cycles # 2.873 GHz - 16,941,882,854 instructions # 1.86 insn per cycle - 3.172295054 seconds time elapsed +TOTAL : 3.210802 sec + 9,128,872,534 cycles # 2.839 GHz + 16,941,891,489 instructions # 1.86 insn per cycle + 3.216298233 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.412460e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.566691e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.566691e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.425644e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.597095e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.597095e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.995372 sec - 8,910,476,559 cycles # 2.970 GHz - 16,368,475,958 instructions # 1.84 insn per cycle - 3.000901134 seconds time elapsed +TOTAL : 2.981437 sec + 8,897,923,450 cycles # 2.980 GHz + 16,357,694,772 instructions # 1.84 insn per cycle + 2.986734200 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.075952e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.880689e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.880689e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.086825e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.890582e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.890582e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.424948 sec - 7,930,435,697 cycles # 2.313 GHz - 14,592,571,272 instructions # 1.84 insn per cycle - 3.430388365 seconds time elapsed +TOTAL : 3.406662 sec + 7,939,123,317 cycles # 2.328 GHz + 14,593,713,630 instructions # 1.84 insn per cycle + 3.412033100 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 20af8dcdcf..fd9bfff43c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl1_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:15:44 +DATE: 2023-10-30_22:59:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.517058e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.581775e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.062501e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.828772e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.682181e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.097744e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.670504 sec - 2,702,422,058 cycles # 2.997 GHz - 4,242,604,368 instructions # 1.57 insn per cycle - 0.961836694 seconds time elapsed +TOTAL : 0.664226 sec + 2,733,772,329 cycles # 3.032 GHz + 4,252,663,384 instructions # 1.56 insn per cycle + 0.964052579 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.250239e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.177317e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.177317e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.208707e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.120592e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.120592e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.189113 sec - 9,869,010,862 cycles # 3.090 GHz - 25,457,600,406 instructions # 2.58 insn per cycle - 3.194393044 seconds time elapsed +TOTAL : 3.248886 sec + 9,853,414,421 cycles # 3.029 GHz + 25,456,960,363 instructions # 2.58 insn per cycle + 3.254418759 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.513363e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.880454e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.880454e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.501986e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.851139e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.851139e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.899337 sec - 8,953,224,169 cycles # 3.083 GHz - 21,513,951,345 instructions # 2.40 insn per cycle - 2.904812289 seconds time elapsed +TOTAL : 2.911448 sec + 8,944,084,931 cycles # 3.067 GHz + 21,514,467,636 instructions # 2.41 insn per cycle + 2.916830419 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.487189e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.770373e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.770373e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.516582e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.815233e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.815233e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.923137 sec - 8,620,906,170 cycles # 2.945 GHz - 15,830,227,053 instructions # 1.84 insn per cycle - 2.928647932 seconds time elapsed +TOTAL : 2.895060 sec + 8,584,608,205 cycles # 2.961 GHz + 15,829,588,796 instructions # 1.84 insn per cycle + 2.900312973 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.556435e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.885768e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.885768e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.567342e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.937683e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.937683e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.850788 sec - 8,407,782,646 cycles # 2.944 GHz - 15,528,675,382 instructions # 1.85 insn per cycle - 2.856241127 seconds time elapsed +TOTAL : 2.840443 sec + 8,453,151,631 cycles # 2.972 GHz + 15,539,804,379 instructions # 1.84 insn per cycle + 2.845864538 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.204312e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.139539e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.139539e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.242996e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.198969e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.198969e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.251251 sec - 7,600,805,312 cycles # 2.336 GHz - 14,294,099,971 instructions # 1.88 insn per cycle - 3.256637720 seconds time elapsed +TOTAL : 3.196952 sec + 7,563,786,645 cycles # 2.363 GHz + 14,293,986,386 instructions # 1.89 insn per cycle + 3.202372288 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 3d3b3506a7..794573167f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_22:53:48 +DATE: 2023-10-30_22:40:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.488398e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.270591e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.269806e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.637729e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.329324e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.277725e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.561353 sec - 2,351,261,799 cycles # 3.014 GHz - 3,609,899,167 instructions # 1.54 insn per cycle - 0.839537640 seconds time elapsed +TOTAL : 0.554589 sec + 2,330,144,606 cycles # 3.018 GHz + 3,601,132,107 instructions # 1.55 insn per cycle + 0.831243618 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.166292e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.382162e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.382162e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.157878e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.372623e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.372623e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.751431 sec - 17,807,322,192 cycles # 3.094 GHz - 43,613,437,781 instructions # 2.45 insn per cycle - 5.756676764 seconds time elapsed +TOTAL : 5.792880 sec + 17,806,778,218 cycles # 3.072 GHz + 43,613,328,536 instructions # 2.45 insn per cycle + 5.797645456 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.354841e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.597055e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.597055e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.410827e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.674479e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.674479e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.029120 sec - 9,234,524,948 cycles # 3.044 GHz - 21,925,460,951 instructions # 2.37 insn per cycle - 3.034350215 seconds time elapsed +TOTAL : 2.964239 sec + 9,221,580,079 cycles # 3.107 GHz + 21,925,566,713 instructions # 2.38 insn per cycle + 2.969030583 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.442806e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.733746e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.733746e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.607797e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.989703e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.989703e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.936007 sec - 8,334,441,599 cycles # 2.835 GHz - 15,592,659,950 instructions # 1.87 insn per cycle - 2.941196934 seconds time elapsed +TOTAL : 2.761029 sec + 8,275,934,414 cycles # 2.994 GHz + 15,591,456,247 instructions # 1.88 insn per cycle + 2.765970000 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.606054e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.023531e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.023531e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.642139e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.064710e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.064710e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.762889 sec - 8,219,765,977 cycles # 2.971 GHz - 15,435,430,943 instructions # 1.88 insn per cycle - 2.768115820 seconds time elapsed +TOTAL : 2.727414 sec + 8,176,411,188 cycles # 2.993 GHz + 15,433,931,357 instructions # 1.89 insn per cycle + 2.732285465 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.651104e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.096112e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.096112e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.615030e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.018431e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.018431e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.727241 sec - 6,620,001,417 cycles # 2.424 GHz - 12,869,662,338 instructions # 1.94 insn per cycle - 2.732371752 seconds time elapsed +TOTAL : 2.763565 sec + 6,616,617,623 cycles # 2.392 GHz + 12,870,037,151 instructions # 1.95 insn per cycle + 2.768592591 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index ebeb0f01b9..840e18a636 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:26:15 +DATE: 2023-10-30_23:10:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.502083e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.940730e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.940730e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.550916e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.988783e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.988783e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.637195 sec - 5,696,849,282 cycles # 3.057 GHz - 10,301,658,227 instructions # 1.81 insn per cycle - 1.920689810 seconds time elapsed +TOTAL : 1.629479 sec + 5,634,078,784 cycles # 3.040 GHz + 10,242,090,703 instructions # 1.82 insn per cycle + 1.911926836 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -82,14 +86,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.144224e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.352294e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.352294e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.148295e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.355569e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.355569e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.958755 sec - 18,470,081,002 cycles # 3.097 GHz - 43,762,371,634 instructions # 2.37 insn per cycle - 5.964749827 seconds time elapsed +TOTAL : 5.938443 sec + 18,473,541,682 cycles # 3.108 GHz + 43,762,296,031 instructions # 2.37 insn per cycle + 5.944353296 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -110,14 +114,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.232128e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.341106e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.341106e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.309795e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.452502e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.452502e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.308376 sec - 10,011,937,118 cycles # 3.025 GHz - 23,263,465,661 instructions # 2.32 insn per cycle - 3.314430093 seconds time elapsed +TOTAL : 3.198061 sec + 9,971,648,922 cycles # 3.113 GHz + 23,260,266,333 instructions # 2.33 insn per cycle + 3.204013668 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -138,14 +142,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.453144e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.695130e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.695130e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.489278e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.735887e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.735887e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.039152 sec - 9,088,378,508 cycles # 2.986 GHz - 16,711,868,096 instructions # 1.84 insn per cycle - 3.045311635 seconds time elapsed +TOTAL : 2.993393 sec + 9,020,025,084 cycles # 3.008 GHz + 16,710,769,598 instructions # 1.85 insn per cycle + 2.999420475 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -166,14 +170,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.433792e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.682044e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.682044e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.443126e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.672909e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.672909e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.069362 sec - 9,004,918,207 cycles # 2.929 GHz - 16,555,674,510 instructions # 1.84 insn per cycle - 3.075484578 seconds time elapsed +TOTAL : 3.053646 sec + 8,967,225,018 cycles # 2.935 GHz + 16,558,461,611 instructions # 1.85 insn per cycle + 3.059730658 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -194,14 +198,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.497941e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.758140e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.758140e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.522735e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.787130e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.787130e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.998963 sec - 7,428,209,049 cycles # 2.472 GHz - 14,077,163,296 instructions # 1.90 insn per cycle - 3.005100581 seconds time elapsed +TOTAL : 2.967360 sec + 7,377,554,587 cycles # 2.482 GHz + 14,076,363,533 instructions # 1.91 insn per cycle + 2.973262874 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 9c2267690a..6773ae0a0d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:38:45 +DATE: 2023-10-30_23:22:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.386778e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.211673e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.242393e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.383036e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.202546e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.214819e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.152217 sec - 4,164,140,679 cycles # 3.038 GHz - 6,703,652,142 instructions # 1.61 insn per cycle - 1.427512326 seconds time elapsed +TOTAL : 1.153231 sec + 4,139,430,436 cycles # 3.020 GHz + 6,625,012,847 instructions # 1.60 insn per cycle + 1.429182628 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.132621e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.342281e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.342281e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.154531e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.367995e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.367995e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.243644 sec - 18,819,488,848 cycles # 3.013 GHz - 43,795,864,417 instructions # 2.33 insn per cycle - 6.248647210 seconds time elapsed +TOTAL : 6.132854 sec + 18,828,647,542 cycles # 3.068 GHz + 43,796,198,955 instructions # 2.33 insn per cycle + 6.137841998 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.368858e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.630852e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.630852e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.373084e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.623301e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.623301e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.327996 sec - 10,249,694,277 cycles # 3.076 GHz - 22,006,771,964 instructions # 2.15 insn per cycle - 3.333022700 seconds time elapsed +TOTAL : 3.327964 sec + 10,225,038,004 cycles # 3.069 GHz + 22,007,341,056 instructions # 2.15 insn per cycle + 3.333079465 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.521017e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.927680e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.927680e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.563130e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.939831e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.939831e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.166104 sec - 9,360,780,233 cycles # 2.953 GHz - 15,501,778,068 instructions # 1.66 insn per cycle - 3.171157605 seconds time elapsed +TOTAL : 3.124450 sec + 9,294,040,657 cycles # 2.971 GHz + 15,501,961,660 instructions # 1.67 insn per cycle + 3.129427880 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.592580e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.021290e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.021290e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.589303e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.004535e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.004535e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.099079 sec - 9,270,625,553 cycles # 2.987 GHz - 15,143,813,419 instructions # 1.63 insn per cycle - 3.104067295 seconds time elapsed +TOTAL : 3.111634 sec + 9,245,896,562 cycles # 2.968 GHz + 15,144,115,387 instructions # 1.64 insn per cycle + 3.116587905 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.636377e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.091702e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.091702e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.625191e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.046723e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.046723e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.062456 sec - 7,649,798,731 cycles # 2.495 GHz - 12,579,371,966 instructions # 1.64 insn per cycle - 3.067527608 seconds time elapsed +TOTAL : 3.076968 sec + 7,636,124,568 cycles # 2.478 GHz + 12,579,210,314 instructions # 1.65 insn per cycle + 3.082030809 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index a1984b2dbd..72ae922772 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:35:35 +DATE: 2023-10-30_23:19:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.392536e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.227239e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.283755e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.391937e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.221657e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.272523e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.834415 sec - 3,191,587,990 cycles # 3.032 GHz - 6,503,299,462 instructions # 2.04 insn per cycle - 1.110001966 seconds time elapsed +TOTAL : 0.835423 sec + 3,175,610,956 cycles # 3.015 GHz + 6,448,969,321 instructions # 2.03 insn per cycle + 1.110247901 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.150249e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.364647e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.364647e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.166233e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.381715e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.381715e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.834169 sec - 17,836,446,946 cycles # 3.058 GHz - 43,616,934,542 instructions # 2.45 insn per cycle - 5.839014033 seconds time elapsed +TOTAL : 5.750185 sec + 17,810,111,860 cycles # 3.095 GHz + 43,613,391,486 instructions # 2.45 insn per cycle + 5.755174931 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.313883e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.536871e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.536871e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.400092e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.665371e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.665371e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.087694 sec - 9,244,349,657 cycles # 2.990 GHz - 21,925,829,555 instructions # 2.37 insn per cycle - 3.092652027 seconds time elapsed +TOTAL : 2.978065 sec + 9,233,112,440 cycles # 3.096 GHz + 21,926,388,449 instructions # 2.37 insn per cycle + 2.983291462 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.593910e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.976295e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.976295e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.550721e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.893542e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.893542e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.773398 sec - 8,292,004,994 cycles # 2.985 GHz - 15,590,392,482 instructions # 1.88 insn per cycle - 2.778369879 seconds time elapsed +TOTAL : 2.818534 sec + 8,293,108,403 cycles # 2.938 GHz + 15,590,838,460 instructions # 1.88 insn per cycle + 2.823621763 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.597463e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.017057e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.017057e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.601734e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.008891e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.008891e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.771894 sec - 8,246,987,403 cycles # 2.971 GHz - 15,434,206,063 instructions # 1.87 insn per cycle - 2.776950739 seconds time elapsed +TOTAL : 2.769509 sec + 8,228,017,164 cycles # 2.967 GHz + 15,441,173,199 instructions # 1.88 insn per cycle + 2.774410515 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.605490e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.021590e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.021590e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.613335e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.006719e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.006719e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.772142 sec - 6,653,271,489 cycles # 2.397 GHz - 12,870,994,579 instructions # 1.93 insn per cycle - 2.777366105 seconds time elapsed +TOTAL : 2.769013 sec + 6,622,512,740 cycles # 2.389 GHz + 12,869,806,868 instructions # 1.94 insn per cycle + 2.774035451 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index 6b52d68a52..c6afedfd67 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:32:27 +DATE: 2023-10-30_23:16:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.505082e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.185860e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.164906e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.417354e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.176792e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.147986e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.429658 sec - 5,048,204,041 cycles # 3.061 GHz - 9,247,012,831 instructions # 1.83 insn per cycle - 1.705876120 seconds time elapsed +TOTAL : 1.434919 sec + 5,054,404,953 cycles # 3.056 GHz + 9,199,574,501 instructions # 1.82 insn per cycle + 1.711405816 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -75,14 +79,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.168037e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.384858e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.384858e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.168343e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.383929e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.383929e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.742087 sec - 17,808,913,038 cycles # 3.099 GHz - 43,613,121,215 instructions # 2.45 insn per cycle - 5.747051229 seconds time elapsed +TOTAL : 5.739834 sec + 17,800,816,655 cycles # 3.099 GHz + 43,614,163,599 instructions # 2.45 insn per cycle + 5.744981592 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -102,14 +106,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.388428e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.655638e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.655638e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.399660e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.661286e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.661286e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.992667 sec - 9,228,123,251 cycles # 3.080 GHz - 21,925,408,791 instructions # 2.38 insn per cycle - 2.997686168 seconds time elapsed +TOTAL : 2.980617 sec + 9,247,779,186 cycles # 3.098 GHz + 21,925,450,174 instructions # 2.37 insn per cycle + 2.985625850 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,14 +133,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.483923e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.793533e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.793533e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.594289e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.975137e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.975137e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.891836 sec - 8,313,512,459 cycles # 2.871 GHz - 15,590,846,489 instructions # 1.88 insn per cycle - 2.896841741 seconds time elapsed +TOTAL : 2.771450 sec + 8,292,170,915 cycles # 2.989 GHz + 15,591,583,098 instructions # 1.88 insn per cycle + 2.776542292 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -156,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.610985e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.040279e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.040279e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.627498e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.045740e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.045740e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.757901 sec - 8,238,182,515 cycles # 2.984 GHz - 15,434,374,890 instructions # 1.87 insn per cycle - 2.763073101 seconds time elapsed +TOTAL : 2.742639 sec + 8,207,084,430 cycles # 2.988 GHz + 15,434,630,659 instructions # 1.88 insn per cycle + 2.747705166 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -183,14 +187,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.588475e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.981440e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.981440e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.561763e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.934099e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.934099e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.788426 sec - 6,630,836,794 cycles # 2.375 GHz - 12,869,285,496 instructions # 1.94 insn per cycle - 2.793399713 seconds time elapsed +TOTAL : 2.816193 sec + 6,657,072,695 cycles # 2.360 GHz + 12,869,581,963 instructions # 1.93 insn per cycle + 2.821211378 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index a453d1a288..1fbb44ee5c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_22:54:17 +DATE: 2023-10-30_22:40:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.491668e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.296332e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.301135e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.642128e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.346376e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.326204e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.558982 sec - 2,359,148,885 cycles # 3.021 GHz - 3,677,736,298 instructions # 1.56 insn per cycle - 0.838194831 seconds time elapsed +TOTAL : 0.551760 sec + 2,330,967,972 cycles # 3.025 GHz + 3,597,652,139 instructions # 1.54 insn per cycle + 0.827558698 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.247833e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.499900e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.499900e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.251222e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.502030e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.502030e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.395526 sec - 16,752,728,675 cycles # 3.103 GHz - 41,371,323,147 instructions # 2.47 insn per cycle - 5.400397415 seconds time elapsed +TOTAL : 5.380932 sec + 16,725,808,463 cycles # 3.106 GHz + 41,372,320,155 instructions # 2.47 insn per cycle + 5.385666902 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.466250e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.827670e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.827670e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.465709e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.828337e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.828337e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.908072 sec - 9,031,894,595 cycles # 3.101 GHz - 21,230,343,156 instructions # 2.35 insn per cycle - 2.913398603 seconds time elapsed +TOTAL : 2.909540 sec + 8,989,559,108 cycles # 3.085 GHz + 21,229,913,779 instructions # 2.36 insn per cycle + 2.914409561 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1841) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.616510e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.025153e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.025153e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.611896e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.015253e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.015253e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.750320 sec - 8,230,475,467 cycles # 2.988 GHz - 15,424,564,830 instructions # 1.87 insn per cycle - 2.755330494 seconds time elapsed +TOTAL : 2.758608 sec + 8,224,469,886 cycles # 2.977 GHz + 15,425,098,172 instructions # 1.88 insn per cycle + 2.763474022 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2536) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.653005e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.130225e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.130225e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.672314e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.144716e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.144716e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.720808 sec - 8,079,864,793 cycles # 2.965 GHz - 15,238,277,190 instructions # 1.89 insn per cycle - 2.725957575 seconds time elapsed +TOTAL : 2.700190 sec + 8,077,137,333 cycles # 2.987 GHz + 15,243,670,128 instructions # 1.89 insn per cycle + 2.705044215 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.654013e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.095619e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.095619e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.668548e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.123575e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.123575e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.722630 sec - 6,599,502,119 cycles # 2.420 GHz - 12,848,005,127 instructions # 1.95 insn per cycle - 2.727814362 seconds time elapsed +TOTAL : 2.707559 sec + 6,594,740,119 cycles # 2.432 GHz + 12,847,978,977 instructions # 1.95 insn per cycle + 2.712589393 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1705) (512y: 18) (512z: 1427) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index c38059bc0e..67d4c4703a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl1_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:16:12 +DATE: 2023-10-30_22:59:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.302992e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.184554e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.261690e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.378185e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.223465e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.277552e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.571699 sec - 2,381,453,624 cycles # 2.998 GHz - 3,711,037,714 instructions # 1.56 insn per cycle - 0.852798458 seconds time elapsed +TOTAL : 0.564996 sec + 2,374,437,058 cycles # 3.015 GHz + 3,681,105,263 instructions # 1.55 insn per cycle + 0.844506702 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.739818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.271465e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.271465e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.758646e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.303527e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.303527e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.973603 sec - 12,176,355,502 cycles # 3.061 GHz - 32,521,127,628 instructions # 2.67 insn per cycle - 3.978805450 seconds time elapsed +TOTAL : 3.935199 sec + 12,172,098,963 cycles # 3.090 GHz + 32,521,086,001 instructions # 2.67 insn per cycle + 3.940316830 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.837673e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.797869e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.797869e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.825516e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.767678e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.767678e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.573600 sec - 7,975,655,707 cycles # 3.094 GHz - 18,690,683,706 instructions # 2.34 insn per cycle - 2.578826897 seconds time elapsed +TOTAL : 2.583311 sec + 7,985,144,128 cycles # 3.086 GHz + 18,690,459,382 instructions # 2.34 insn per cycle + 2.588535770 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1554) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.881382e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.764080e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.764080e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.954275e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.871745e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.871745e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.543156 sec - 7,446,918,262 cycles # 2.924 GHz - 14,254,158,064 instructions # 1.91 insn per cycle - 2.548346891 seconds time elapsed +TOTAL : 2.483708 sec + 7,425,889,374 cycles # 2.985 GHz + 14,254,038,906 instructions # 1.92 insn per cycle + 2.488804683 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.960433e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.954394e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.954394e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.010885e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.042869e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.042869e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.480137 sec - 7,295,815,180 cycles # 2.936 GHz - 13,946,321,934 instructions # 1.91 insn per cycle - 2.485375165 seconds time elapsed +TOTAL : 2.441015 sec + 7,296,345,350 cycles # 2.984 GHz + 13,951,979,933 instructions # 1.91 insn per cycle + 2.446211141 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.685033e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.203277e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.203277e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.704736e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.262639e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.262639e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.700255 sec - 6,507,297,033 cycles # 2.406 GHz - 13,421,510,099 instructions # 2.06 insn per cycle - 2.705598441 seconds time elapsed +TOTAL : 2.681048 sec + 6,545,536,751 cycles # 2.438 GHz + 13,421,408,114 instructions # 2.05 insn per cycle + 2.686191227 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 358806b93a..19283f01af 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl1_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_23:16:38 +DATE: 2023-10-30_23:00:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.305534e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188557e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.268373e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.382020e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.233872e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.306668e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.571817 sec - 2,353,207,749 cycles # 2.961 GHz - 3,723,614,170 instructions # 1.58 insn per cycle - 0.852487976 seconds time elapsed +TOTAL : 0.567071 sec + 2,382,516,653 cycles # 3.027 GHz + 3,627,793,245 instructions # 1.52 insn per cycle + 0.846725967 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.325349e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.401417e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.401417e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.226592e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.215249e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.215249e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.066168 sec - 9,413,178,324 cycles # 3.066 GHz - 25,308,930,169 instructions # 2.69 insn per cycle - 3.071456258 seconds time elapsed +TOTAL : 3.194239 sec + 9,429,587,787 cycles # 2.955 GHz + 25,311,541,209 instructions # 2.68 insn per cycle + 3.199238329 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.159701e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.912786e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.912786e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.045220e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.672632e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.672632e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.348370 sec - 7,237,807,765 cycles # 3.077 GHz - 16,901,642,876 instructions # 2.34 insn per cycle - 2.353566647 seconds time elapsed +TOTAL : 2.438450 sec + 7,222,375,461 cycles # 2.957 GHz + 16,901,888,826 instructions # 2.34 insn per cycle + 2.443522059 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.100952e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.335032e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.335032e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.117489e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.340330e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.340330e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.377716 sec - 7,114,981,233 cycles # 2.987 GHz - 13,618,980,317 instructions # 1.91 insn per cycle - 2.382769738 seconds time elapsed +TOTAL : 2.367857 sec + 7,080,165,072 cycles # 2.985 GHz + 13,619,148,647 instructions # 1.92 insn per cycle + 2.373101167 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.095662e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.380517e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.380517e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.158861e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.457557e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.457557e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.388573 sec - 7,050,432,929 cycles # 2.946 GHz - 13,435,636,130 instructions # 1.91 insn per cycle - 2.393927733 seconds time elapsed +TOTAL : 2.340958 sec + 7,015,657,902 cycles # 2.991 GHz + 13,430,116,599 instructions # 1.91 insn per cycle + 2.346201265 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.809551e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.481545e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.481545e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.805452e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.487556e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.487556e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.594484 sec - 6,324,738,625 cycles # 2.435 GHz - 13,153,553,215 instructions # 2.08 insn per cycle - 2.599854412 seconds time elapsed +TOTAL : 2.596913 sec + 6,326,204,144 cycles # 2.432 GHz + 13,152,923,009 instructions # 2.08 insn per cycle + 2.602133383 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 7b38b05e62..2812ac0b53 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_22:54:46 +DATE: 2023-10-30_22:41:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.447730e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.791443e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.966822e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.988074e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.909786e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.002115e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.654025 sec - 2,666,039,210 cycles # 3.017 GHz - 4,098,761,288 instructions # 1.54 insn per cycle - 0.944531212 seconds time elapsed +TOTAL : 0.648456 sec + 2,594,556,028 cycles # 2.963 GHz + 4,040,130,517 instructions # 1.56 insn per cycle + 0.932684475 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -73,14 +77,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.109099e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.296066e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.296066e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.097041e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.280907e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.280907e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.070816 sec - 18,741,596,344 cycles # 3.086 GHz - 44,286,641,637 instructions # 2.36 insn per cycle - 6.076086314 seconds time elapsed +TOTAL : 6.134086 sec + 18,716,855,271 cycles # 3.049 GHz + 44,287,379,187 instructions # 2.37 insn per cycle + 6.139244754 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.722516e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.273779e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.273779e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.751235e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.312434e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.312434e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.052104 sec - 12,400,528,226 cycles # 3.057 GHz - 30,960,047,988 instructions # 2.50 insn per cycle - 4.057597540 seconds time elapsed +TOTAL : 3.986301 sec + 12,329,366,151 cycles # 3.090 GHz + 30,959,681,512 instructions # 2.51 insn per cycle + 3.991578409 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.005605e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.791319e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.791319e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.078430e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.890762e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.890762e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.534624 sec - 10,128,109,972 cycles # 2.862 GHz - 19,399,010,623 instructions # 1.92 insn per cycle - 3.540053791 seconds time elapsed +TOTAL : 3.414968 sec + 10,142,587,821 cycles # 2.967 GHz + 19,399,691,686 instructions # 1.91 insn per cycle + 3.420034144 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.173708e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.076945e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.076945e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.176833e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.087447e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.087447e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.281691 sec - 9,748,054,010 cycles # 2.966 GHz - 18,981,255,153 instructions # 1.95 insn per cycle - 3.287089431 seconds time elapsed +TOTAL : 3.276978 sec + 9,769,804,904 cycles # 2.978 GHz + 18,982,166,653 instructions # 1.94 insn per cycle + 3.282084297 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1859) (512y: 188) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.936624e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.615295e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.615295e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.950559e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.628157e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.628157e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.640905 sec - 8,391,288,862 cycles # 2.302 GHz - 15,064,533,737 instructions # 1.80 insn per cycle - 3.646211880 seconds time elapsed +TOTAL : 3.618415 sec + 8,360,946,549 cycles # 2.308 GHz + 15,064,923,745 instructions # 1.80 insn per cycle + 3.623523733 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1023) (512y: 155) (512z: 1316) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 0eed8e2d69..995b7d3ff6 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-29_22:55:19 +DATE: 2023-10-30_22:41:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.454271e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.832017e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.038340e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.003728e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.948322e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.086359e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.653806 sec - 2,668,469,279 cycles # 3.020 GHz - 4,079,450,696 instructions # 1.53 insn per cycle - 0.943662062 seconds time elapsed +TOTAL : 0.644574 sec + 2,626,626,234 cycles # 3.016 GHz + 4,025,842,605 instructions # 1.53 insn per cycle + 0.931663719 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -73,14 +77,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.167637e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.374513e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.374513e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.152263e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.357614e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.357614e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.781308 sec - 17,933,067,465 cycles # 3.100 GHz - 42,535,595,028 instructions # 2.37 insn per cycle - 5.786442058 seconds time elapsed +TOTAL : 5.857814 sec + 17,950,593,870 cycles # 3.062 GHz + 42,536,443,801 instructions # 2.37 insn per cycle + 5.862908022 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.752061e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.330676e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.330676e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.781795e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.366816e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.366816e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.989523 sec - 12,191,986,873 cycles # 3.053 GHz - 30,267,026,279 instructions # 2.48 insn per cycle - 3.994946276 seconds time elapsed +TOTAL : 3.925776 sec + 12,158,268,283 cycles # 3.094 GHz + 30,266,774,018 instructions # 2.49 insn per cycle + 3.930856861 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.104170e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.950168e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.950168e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.107093e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.951411e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.951411e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.378896 sec - 10,036,789,390 cycles # 2.966 GHz - 19,281,934,589 instructions # 1.92 insn per cycle - 3.384394357 seconds time elapsed +TOTAL : 3.373013 sec + 10,033,049,034 cycles # 2.971 GHz + 19,281,562,841 instructions # 1.92 insn per cycle + 3.378152041 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2162) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.193547e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.112581e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.112581e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.193939e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.106247e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.106247e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.252743 sec - 9,664,135,628 cycles # 2.967 GHz - 18,781,593,266 instructions # 1.94 insn per cycle - 3.258138009 seconds time elapsed +TOTAL : 3.253124 sec + 9,643,205,186 cycles # 2.961 GHz + 18,781,851,111 instructions # 1.95 insn per cycle + 3.258335142 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1833) (512y: 191) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.981542e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.693938e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.693938e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.953784e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.646715e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.646715e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.566812 sec - 8,278,244,012 cycles # 2.318 GHz - 14,988,373,891 instructions # 1.81 insn per cycle - 3.572200010 seconds time elapsed +TOTAL : 3.613479 sec + 8,269,801,205 cycles # 2.286 GHz + 14,988,618,301 instructions # 1.81 insn per cycle + 3.618555203 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1020) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 8faf80849d..d91837223d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_22:55:51 +DATE: 2023-10-30_22:42:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.022553e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169186e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.268682e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.280545e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.182594e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276893e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.515209 sec - 2,233,987,909 cycles # 3.011 GHz - 3,236,100,958 instructions # 1.45 insn per cycle - 0.801892412 seconds time elapsed +TOTAL : 0.509947 sec + 2,245,406,404 cycles # 3.004 GHz + 3,238,247,605 instructions # 1.44 insn per cycle + 0.806066214 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.182452e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.246743e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.246743e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.122811e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.185621e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.185621e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.907642 sec - 15,152,192,071 cycles # 3.085 GHz - 38,436,494,690 instructions # 2.54 insn per cycle - 4.912915615 seconds time elapsed +TOTAL : 5.047558 sec + 15,156,486,389 cycles # 3.002 GHz + 38,440,455,348 instructions # 2.54 insn per cycle + 5.052769245 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.679759e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.887370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.887370e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.702211e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.903740e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.903740e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.956512 sec - 9,092,266,094 cycles # 3.071 GHz - 24,590,422,031 instructions # 2.70 insn per cycle - 2.961838870 seconds time elapsed +TOTAL : 2.936396 sec + 9,105,668,983 cycles # 3.097 GHz + 24,592,502,332 instructions # 2.70 insn per cycle + 2.941522608 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.603494e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.103886e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.103886e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.960855e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.471891e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.471891e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.980943 sec - 5,471,545,616 cycles # 2.756 GHz - 11,266,361,130 instructions # 2.06 insn per cycle - 1.986440255 seconds time elapsed +TOTAL : 1.862437 sec + 5,450,005,076 cycles # 2.919 GHz + 11,265,361,123 instructions # 2.07 insn per cycle + 1.867565340 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.611609e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.258293e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.258293e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.618799e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.273168e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.273168e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.688302 sec - 4,962,792,293 cycles # 2.931 GHz - 10,569,518,172 instructions # 2.13 insn per cycle - 1.693732006 seconds time elapsed +TOTAL : 1.686602 sec + 4,953,592,768 cycles # 2.929 GHz + 10,570,210,152 instructions # 2.13 insn per cycle + 1.691821851 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.090511e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.328359e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.328359e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.081730e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.316605e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.316605e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.667965 sec - 5,390,988,174 cycles # 2.017 GHz - 7,804,802,792 instructions # 1.45 insn per cycle - 2.673353935 seconds time elapsed +TOTAL : 2.673039 sec + 5,387,031,443 cycles # 2.012 GHz + 7,805,348,613 instructions # 1.45 insn per cycle + 2.678234005 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 4a146055f5..5370f7c935 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:26:46 +DATE: 2023-10-30_23:10:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.640905e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.017146e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.017146e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.655944e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.023463e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.023463e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.795504 sec - 3,127,754,708 cycles # 3.022 GHz - 4,889,520,861 instructions # 1.56 insn per cycle - 1.092841714 seconds time elapsed +TOTAL : 0.795812 sec + 3,140,135,292 cycles # 3.032 GHz + 4,880,880,783 instructions # 1.55 insn per cycle + 1.093165843 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -82,14 +86,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.149858e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.212968e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.212968e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.190219e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.253906e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.253906e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.058597 sec - 15,696,511,300 cycles # 3.100 GHz - 38,498,945,603 instructions # 2.45 insn per cycle - 5.065152610 seconds time elapsed +TOTAL : 4.971251 sec + 15,474,705,468 cycles # 3.109 GHz + 38,496,723,540 instructions # 2.49 insn per cycle + 4.977932153 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -110,14 +114,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.611071e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.805644e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.805644e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.690707e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.889062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.889062e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.087844 sec - 9,458,665,415 cycles # 3.058 GHz - 24,775,783,056 instructions # 2.62 insn per cycle - 3.094441427 seconds time elapsed +TOTAL : 3.021057 sec + 9,434,067,456 cycles # 3.117 GHz + 24,774,366,858 instructions # 2.63 insn per cycle + 3.027514850 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -138,14 +142,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.772811e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.281889e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.281889e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.629438e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.092140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.092140e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.004130 sec - 5,804,812,481 cycles # 2.888 GHz - 11,553,800,215 instructions # 1.99 insn per cycle - 2.011016331 seconds time elapsed +TOTAL : 2.049046 sec + 5,805,137,288 cycles # 2.825 GHz + 11,552,400,768 instructions # 1.99 insn per cycle + 2.055626829 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -166,14 +170,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.447831e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.064850e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.064850e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.394764e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.988019e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.988019e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.808534 sec - 5,312,663,346 cycles # 2.930 GHz - 10,859,775,036 instructions # 2.04 insn per cycle - 1.815109451 seconds time elapsed +TOTAL : 1.821677 sec + 5,305,081,925 cycles # 2.904 GHz + 10,857,512,913 instructions # 2.05 insn per cycle + 1.828081824 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -194,14 +198,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.011582e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.238367e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.238367e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.993136e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.216683e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.216683e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.796689 sec - 5,765,378,482 cycles # 2.057 GHz - 8,049,035,674 instructions # 1.40 insn per cycle - 2.803259503 seconds time elapsed +TOTAL : 2.811888 sec + 5,748,853,229 cycles # 2.042 GHz + 8,049,831,971 instructions # 1.40 insn per cycle + 2.818538068 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 852b6969ed..bb48e05236 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:39:16 +DATE: 2023-10-30_23:23:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.737152e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.159854e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.268362e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.736795e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.161054e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269806e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.607361 sec - 2,522,338,369 cycles # 3.013 GHz - 3,662,390,525 instructions # 1.45 insn per cycle - 0.895938793 seconds time elapsed +TOTAL : 0.615761 sec + 2,516,032,944 cycles # 2.984 GHz + 3,646,627,135 instructions # 1.45 insn per cycle + 0.900193240 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.194281e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.258156e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.258156e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.191283e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.255076e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.255076e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.940662 sec - 15,324,069,943 cycles # 3.099 GHz - 38,452,708,280 instructions # 2.51 insn per cycle - 4.945884999 seconds time elapsed +TOTAL : 4.949371 sec + 15,346,914,541 cycles # 3.098 GHz + 38,453,059,865 instructions # 2.51 insn per cycle + 4.954545090 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.685629e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.885512e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.885512e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.699240e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.903594e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.903594e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.007465 sec - 9,278,655,226 cycles # 3.081 GHz - 24,590,238,814 instructions # 2.65 insn per cycle - 3.012796450 seconds time elapsed +TOTAL : 2.998035 sec + 9,284,549,452 cycles # 3.092 GHz + 24,590,432,125 instructions # 2.65 insn per cycle + 3.003369046 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.840961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.350814e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.350814e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.897632e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.421703e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.421703e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.958737 sec - 5,644,158,885 cycles # 2.875 GHz - 11,248,092,256 instructions # 1.99 insn per cycle - 1.963882920 seconds time elapsed +TOTAL : 1.941152 sec + 5,665,929,312 cycles # 2.912 GHz + 11,247,969,812 instructions # 1.99 insn per cycle + 1.946452907 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.249720e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.861953e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.861953e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.630334e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.280700e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.280700e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.847755 sec - 5,135,966,832 cycles # 2.773 GHz - 10,522,578,708 instructions # 2.05 insn per cycle - 1.853336904 seconds time elapsed +TOTAL : 1.745934 sec + 5,133,074,927 cycles # 2.932 GHz + 10,520,747,164 instructions # 2.05 insn per cycle + 1.751320373 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.012886e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.244665e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.244665e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.918342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.141024e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.141024e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.776977 sec - 5,559,598,432 cycles # 1.999 GHz - 7,754,154,174 instructions # 1.39 insn per cycle - 2.782273535 seconds time elapsed +TOTAL : 2.843108 sec + 5,573,130,501 cycles # 1.957 GHz + 7,754,567,913 instructions # 1.39 insn per cycle + 2.848428437 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index a8b2f7d0ee..3fd09fe61e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:36:04 +DATE: 2023-10-30_23:20:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.742144e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.162516e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.272033e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.737517e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.161761e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270287e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.550222 sec - 2,352,867,175 cycles # 3.012 GHz - 3,685,140,796 instructions # 1.57 insn per cycle - 0.840259072 seconds time elapsed +TOTAL : 0.551873 sec + 2,358,028,619 cycles # 2.998 GHz + 3,692,132,213 instructions # 1.57 insn per cycle + 0.846295423 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.194117e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.257904e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.257904e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.184443e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.248148e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.248148e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.882304 sec - 15,146,493,935 cycles # 3.100 GHz - 38,436,498,659 instructions # 2.54 insn per cycle - 4.887583335 seconds time elapsed +TOTAL : 4.904538 sec + 15,160,595,332 cycles # 3.088 GHz + 38,436,526,776 instructions # 2.54 insn per cycle + 4.910021872 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.703201e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.905185e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.905185e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.693975e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.894111e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.894111e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.934975 sec - 9,100,538,588 cycles # 3.096 GHz - 24,590,797,051 instructions # 2.70 insn per cycle - 2.940311227 seconds time elapsed +TOTAL : 2.943132 sec + 9,120,478,043 cycles # 3.094 GHz + 24,590,860,131 instructions # 2.70 insn per cycle + 2.948509619 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.949109e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.463374e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.463374e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.916332e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.436269e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.436269e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.865224 sec - 5,462,840,274 cycles # 2.922 GHz - 11,264,977,168 instructions # 2.06 insn per cycle - 1.870595769 seconds time elapsed +TOTAL : 1.877491 sec + 5,483,595,754 cycles # 2.914 GHz + 11,265,750,688 instructions # 2.05 insn per cycle + 1.882860646 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.642159e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.287062e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.287062e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.553762e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.202447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.202447e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.680715 sec - 4,938,481,524 cycles # 2.930 GHz - 10,571,461,567 instructions # 2.14 insn per cycle - 1.686022288 seconds time elapsed +TOTAL : 1.704603 sec + 4,969,259,538 cycles # 2.907 GHz + 10,571,999,117 instructions # 2.13 insn per cycle + 1.710109506 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.087400e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.324514e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.324514e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.915873e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.132745e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.132745e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.668501 sec - 5,400,613,890 cycles # 2.020 GHz - 7,804,891,421 instructions # 1.45 insn per cycle - 2.673655683 seconds time elapsed +TOTAL : 2.784277 sec + 5,389,942,641 cycles # 1.933 GHz + 7,806,239,729 instructions # 1.45 insn per cycle + 2.789657703 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 6535b6cfe4..d894a954b8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:32:57 +DATE: 2023-10-30_23:16:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.045776e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.160228e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.269064e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.718006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.158908e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.267607e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.693511 sec - 2,792,166,398 cycles # 3.034 GHz - 4,398,875,647 instructions # 1.58 insn per cycle - 0.979299052 seconds time elapsed +TOTAL : 0.705827 sec + 2,822,729,450 cycles # 3.034 GHz + 4,418,624,178 instructions # 1.57 insn per cycle + 0.990011938 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -75,14 +79,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.191609e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.257007e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.257007e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.186659e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.250039e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.250039e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.889577 sec - 15,150,373,861 cycles # 3.096 GHz - 38,436,083,829 instructions # 2.54 insn per cycle - 4.894924037 seconds time elapsed +TOTAL : 4.899766 sec + 15,151,167,976 cycles # 3.089 GHz + 38,437,118,151 instructions # 2.54 insn per cycle + 4.905336258 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -102,14 +106,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.715982e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.923240e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.923240e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.632977e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.833738e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.833738e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.924496 sec - 9,096,467,047 cycles # 3.106 GHz - 24,590,726,023 instructions # 2.70 insn per cycle - 2.929631165 seconds time elapsed +TOTAL : 2.990641 sec + 9,104,000,531 cycles # 3.040 GHz + 24,591,172,861 instructions # 2.70 insn per cycle + 2.995857286 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -129,14 +133,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.954558e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.476070e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.476070e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.956023e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.485444e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.485444e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.864371 sec - 5,489,366,756 cycles # 2.937 GHz - 11,265,437,305 instructions # 2.05 insn per cycle - 1.869765983 seconds time elapsed +TOTAL : 1.865029 sec + 5,485,610,630 cycles # 2.934 GHz + 11,265,262,528 instructions # 2.05 insn per cycle + 1.870379155 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -156,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.498968e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.136400e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.136400e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.617372e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.266189e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.266189e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.717689 sec - 4,945,186,310 cycles # 2.871 GHz - 10,569,892,258 instructions # 2.14 insn per cycle - 1.723024481 seconds time elapsed +TOTAL : 1.689765 sec + 4,937,700,060 cycles # 2.914 GHz + 10,571,493,395 instructions # 2.14 insn per cycle + 1.695143677 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -183,14 +187,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.103096e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.341060e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.341060e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.099579e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.343259e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.343259e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.658567 sec - 5,391,246,150 cycles # 2.025 GHz - 7,805,025,014 instructions # 1.45 insn per cycle - 2.663924818 seconds time elapsed +TOTAL : 2.663038 sec + 5,397,595,899 cycles # 2.025 GHz + 7,805,657,902 instructions # 1.45 insn per cycle + 2.668426378 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 28991481e3..d3de8228b3 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_22:56:17 +DATE: 2023-10-30_22:42:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.025981e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169481e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.266361e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.271764e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.178316e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270685e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.511975 sec - 2,237,170,226 cycles # 3.018 GHz - 3,235,319,101 instructions # 1.45 insn per cycle - 0.797845351 seconds time elapsed +TOTAL : 0.509733 sec + 2,227,538,194 cycles # 3.008 GHz + 3,225,921,656 instructions # 1.45 insn per cycle + 0.797383843 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.215548e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.281173e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.281173e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.204961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.270121e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.270121e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.836086 sec - 15,032,256,829 cycles # 3.106 GHz - 40,164,623,116 instructions # 2.67 insn per cycle - 4.841265154 seconds time elapsed +TOTAL : 4.859233 sec + 15,030,887,746 cycles # 3.091 GHz + 40,165,178,900 instructions # 2.67 insn per cycle + 4.864240792 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.894720e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.118587e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.118587e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.793268e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.013838e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.013838e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.794710 sec - 8,676,040,838 cycles # 3.099 GHz - 23,684,337,981 instructions # 2.73 insn per cycle - 2.800202741 seconds time elapsed +TOTAL : 2.868683 sec + 8,677,905,475 cycles # 3.025 GHz + 23,687,254,858 instructions # 2.73 insn per cycle + 2.873954741 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2069) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.305864e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.724393e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.724393e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.272133e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.685740e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.685740e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.078632 sec - 6,088,003,456 cycles # 2.923 GHz - 13,074,476,696 instructions # 2.15 insn per cycle - 2.083971169 seconds time elapsed +TOTAL : 2.093033 sec + 6,095,283,841 cycles # 2.906 GHz + 13,076,094,976 instructions # 2.15 insn per cycle + 2.098361435 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.589272e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.048410e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.048410e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.561590e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.013027e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.013027e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.977912 sec - 5,804,117,930 cycles # 2.928 GHz - 12,332,655,739 instructions # 2.12 insn per cycle - 1.983152296 seconds time elapsed +TOTAL : 1.988737 sec + 5,807,094,296 cycles # 2.913 GHz + 12,333,429,397 instructions # 2.12 insn per cycle + 1.994110135 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 294) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.700119e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.903509e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.903509e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.770161e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.968869e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.968869e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.941360 sec - 5,814,448,158 cycles # 1.974 GHz - 9,613,559,466 instructions # 1.65 insn per cycle - 2.946710269 seconds time elapsed +TOTAL : 2.883990 sec + 5,806,828,747 cycles # 2.010 GHz + 9,613,660,096 instructions # 1.66 insn per cycle + 2.889230202 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1510) (512y: 209) (512z: 1971) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 67ce42c701..30aaca97e3 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl1_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:17:03 +DATE: 2023-10-30_23:00:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.570060e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.154967e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.269382e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.730699e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.161168e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269447e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.522726 sec - 2,265,340,168 cycles # 2.993 GHz - 3,245,639,537 instructions # 1.43 insn per cycle - 0.816182159 seconds time elapsed +TOTAL : 0.516638 sec + 2,238,038,759 cycles # 3.003 GHz + 3,164,086,777 instructions # 1.41 insn per cycle + 0.804338452 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.556809e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.644599e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.644599e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.561892e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.650324e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.650324e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.205292 sec - 13,015,864,783 cycles # 3.092 GHz - 34,405,849,374 instructions # 2.64 insn per cycle - 4.211064725 seconds time elapsed +TOTAL : 4.199726 sec + 13,022,340,636 cycles # 3.098 GHz + 34,407,560,979 instructions # 2.64 insn per cycle + 4.205140576 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.147270e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.292062e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.292062e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.072749e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.213016e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.213016e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.436163 sec - 10,617,721,779 cycles # 3.086 GHz - 24,023,060,269 instructions # 2.26 insn per cycle - 3.441752058 seconds time elapsed +TOTAL : 3.519661 sec + 10,617,291,793 cycles # 3.015 GHz + 24,026,447,750 instructions # 2.26 insn per cycle + 3.525235539 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.819238e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.163536e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.163536e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.849441e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.192796e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.192796e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.279956 sec - 6,619,549,425 cycles # 2.900 GHz - 12,415,373,269 instructions # 1.88 insn per cycle - 2.285571623 seconds time elapsed +TOTAL : 2.266906 sec + 6,599,977,627 cycles # 2.905 GHz + 12,414,641,303 instructions # 1.88 insn per cycle + 2.272301949 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3156) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.082600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.455787e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.455787e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.137878e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.521965e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.521965e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.166233 sec - 6,311,826,799 cycles # 2.907 GHz - 11,588,393,146 instructions # 1.84 insn per cycle - 2.171681945 seconds time elapsed +TOTAL : 2.143814 sec + 6,256,595,870 cycles # 2.912 GHz + 11,588,487,458 instructions # 1.85 insn per cycle + 2.149310332 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2692) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.122487e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.365337e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.365337e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.124446e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.363360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.363360e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.648103 sec - 5,340,501,776 cycles # 2.014 GHz - 9,309,235,597 instructions # 1.74 insn per cycle - 2.653678227 seconds time elapsed +TOTAL : 2.646562 sec + 5,351,277,730 cycles # 2.019 GHz + 9,309,073,211 instructions # 1.74 insn per cycle + 2.651978158 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2116) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 62680c581a..ce4e266692 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl1_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:17:30 +DATE: 2023-10-30_23:01:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.564062e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.147826e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.264986e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.729487e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.162433e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270183e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.519306 sec - 2,255,189,511 cycles # 3.009 GHz - 3,221,417,076 instructions # 1.43 insn per cycle - 0.809166285 seconds time elapsed +TOTAL : 0.516858 sec + 2,237,295,030 cycles # 2.996 GHz + 3,174,562,297 instructions # 1.42 insn per cycle + 0.805124267 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -73,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.695140e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.795404e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.795404e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.696103e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.793176e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.793176e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.994496 sec - 12,384,425,334 cycles # 3.097 GHz - 35,058,885,666 instructions # 2.83 insn per cycle - 3.999892654 seconds time elapsed +TOTAL : 3.993532 sec + 12,372,967,111 cycles # 3.095 GHz + 35,058,726,353 instructions # 2.83 insn per cycle + 3.998904701 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.136077e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.280277e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.280277e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.074608e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.212813e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.212813e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.446583 sec - 10,684,717,975 cycles # 3.096 GHz - 23,099,617,393 instructions # 2.16 insn per cycle - 3.451955165 seconds time elapsed +TOTAL : 3.515611 sec + 10,695,053,716 cycles # 3.038 GHz + 23,101,257,936 instructions # 2.16 insn per cycle + 3.521049881 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.209600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.607410e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.607410e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.247576e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.650304e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.650304e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.115939 sec - 6,169,983,144 cycles # 2.909 GHz - 11,969,796,573 instructions # 1.94 insn per cycle - 2.121437158 seconds time elapsed +TOTAL : 2.101997 sec + 6,159,177,623 cycles # 2.924 GHz + 11,969,990,350 instructions # 1.94 insn per cycle + 2.107463553 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2511) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.312624e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.722857e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.722857e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.036588e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.425695e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.425695e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.077114 sec - 6,023,533,872 cycles # 2.893 GHz - 11,143,307,100 instructions # 1.85 insn per cycle - 2.082671772 seconds time elapsed +TOTAL : 2.188313 sec + 6,014,477,005 cycles # 2.742 GHz + 11,143,899,041 instructions # 1.85 insn per cycle + 2.193874742 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 174) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.192169e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.444249e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.444249e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.156544e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.400305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.400305e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.605068 sec - 5,223,191,580 cycles # 2.002 GHz - 9,033,300,795 instructions # 1.73 insn per cycle - 2.610459589 seconds time elapsed +TOTAL : 2.627559 sec + 5,253,056,551 cycles # 1.996 GHz + 9,033,435,385 instructions # 1.72 insn per cycle + 2.633049022 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index aa352fe01e..6da43fd641 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_22:56:44 +DATE: 2023-10-30_22:43:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.029207e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.660992e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.952559e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.106542e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.720684e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.980250e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.471116 sec - 2,075,735,853 cycles # 2.996 GHz - 2,991,480,396 instructions # 1.44 insn per cycle - 0.749941019 seconds time elapsed +TOTAL : 0.468593 sec + 2,057,005,401 cycles # 2.993 GHz + 2,917,040,882 instructions # 1.42 insn per cycle + 0.745160706 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.340367e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.416459e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.416459e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.329290e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.404591e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.404591e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.563941 sec - 14,152,524,452 cycles # 3.098 GHz - 38,393,574,090 instructions # 2.71 insn per cycle - 4.568822559 seconds time elapsed +TOTAL : 4.586315 sec + 14,172,393,957 cycles # 3.087 GHz + 38,391,636,412 instructions # 2.71 insn per cycle + 4.591225371 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.252416e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.682693e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.682693e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.089249e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.493979e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.493979e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.078659 sec - 6,471,097,125 cycles # 3.107 GHz - 15,829,624,258 instructions # 2.45 insn per cycle - 2.083554513 seconds time elapsed +TOTAL : 2.144092 sec + 6,471,307,430 cycles # 3.012 GHz + 15,829,826,382 instructions # 2.45 insn per cycle + 2.149125793 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.593020e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.101399e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.101399e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.905728e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.021084e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.021084e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.175125 sec - 3,457,161,071 cycles # 2.931 GHz - 7,606,572,370 instructions # 2.20 insn per cycle - 1.180355415 seconds time elapsed +TOTAL : 1.264061 sec + 3,460,763,676 cycles # 2.728 GHz + 7,607,180,903 instructions # 2.20 insn per cycle + 1.269495301 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.022780e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188618e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.188618e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.027805e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.193250e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.193250e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.110409 sec - 3,250,510,189 cycles # 2.917 GHz - 7,216,053,398 instructions # 2.22 insn per cycle - 1.115909184 seconds time elapsed +TOTAL : 1.102476 sec + 3,246,168,576 cycles # 2.933 GHz + 7,215,794,390 instructions # 2.22 insn per cycle + 1.107709326 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.406337e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.252882e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.252882e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.600644e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.456495e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.456495e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.500461 sec - 3,058,902,515 cycles # 2.034 GHz - 5,844,747,311 instructions # 1.91 insn per cycle - 1.505609823 seconds time elapsed +TOTAL : 1.462918 sec + 3,063,851,596 cycles # 2.088 GHz + 5,846,117,154 instructions # 1.91 insn per cycle + 1.467990395 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 5ab757a50c..7f7e1d606d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:27:14 +DATE: 2023-10-30_23:11:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.342154e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.755663e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.755663e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.366832e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.822977e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.822977e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.658303 sec - 2,665,727,908 cycles # 3.020 GHz - 4,113,714,963 instructions # 1.54 insn per cycle - 0.941565209 seconds time elapsed +TOTAL : 0.653541 sec + 2,694,324,767 cycles # 3.042 GHz + 4,193,830,258 instructions # 1.56 insn per cycle + 0.942698763 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -82,14 +86,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.333682e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.410114e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.410114e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.342376e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.418135e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.418135e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.618102 sec - 14,338,054,509 cycles # 3.102 GHz - 38,436,340,206 instructions # 2.68 insn per cycle - 4.624142820 seconds time elapsed +TOTAL : 4.602060 sec + 14,334,608,868 cycles # 3.111 GHz + 38,435,670,602 instructions # 2.68 insn per cycle + 4.608235736 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -110,14 +114,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.183610e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.607484e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.607484e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.213893e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.643183e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.643183e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.151042 sec - 6,679,436,155 cycles # 3.098 GHz - 16,110,006,537 instructions # 2.41 insn per cycle - 2.157129366 seconds time elapsed +TOTAL : 2.139790 sec + 6,672,789,863 cycles # 3.111 GHz + 16,109,837,908 instructions # 2.41 insn per cycle + 2.145928460 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -138,14 +142,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.414037e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.079639e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.079639e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.467509e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.085520e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.085520e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.240366 sec - 3,659,881,370 cycles # 2.938 GHz - 7,844,165,630 instructions # 2.14 insn per cycle - 1.246487540 seconds time elapsed +TOTAL : 1.235887 sec + 3,669,653,224 cycles # 2.956 GHz + 7,844,588,977 instructions # 2.14 insn per cycle + 1.241993093 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -166,14 +170,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.010699e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.171922e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.171922e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.015093e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.176261e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.176261e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.164913 sec - 3,451,308,059 cycles # 2.949 GHz - 7,453,101,304 instructions # 2.16 insn per cycle - 1.171040454 seconds time elapsed +TOTAL : 1.161149 sec + 3,452,043,722 cycles # 2.960 GHz + 7,453,312,449 instructions # 2.16 insn per cycle + 1.167242704 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -194,14 +198,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.500212e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.340447e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.340447e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.587543e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.430774e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.430774e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.526899 sec - 3,275,626,557 cycles # 2.138 GHz - 6,100,657,130 instructions # 1.86 insn per cycle - 1.533130362 seconds time elapsed +TOTAL : 1.510256 sec + 3,262,706,450 cycles # 2.153 GHz + 6,100,300,493 instructions # 1.87 insn per cycle + 1.516346990 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index c3d81bddda..4c350b0716 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:39:43 +DATE: 2023-10-30_23:23:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.798695e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.634515e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.944433e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.801264e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.639438e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.941307e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.556545 sec - 2,327,252,717 cycles # 3.008 GHz - 3,421,736,970 instructions # 1.47 insn per cycle - 0.832373460 seconds time elapsed +TOTAL : 0.557664 sec + 2,331,543,515 cycles # 2.993 GHz + 3,413,686,462 instructions # 1.46 insn per cycle + 0.836667519 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.327580e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.402841e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.402841e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.279854e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.351747e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.351747e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.642532 sec - 14,315,105,147 cycles # 3.081 GHz - 38,421,802,206 instructions # 2.68 insn per cycle - 4.647613343 seconds time elapsed +TOTAL : 4.736031 sec + 14,322,445,358 cycles # 3.022 GHz + 38,422,566,575 instructions # 2.68 insn per cycle + 4.740915131 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.037912e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.461691e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.461691e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.166961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.588776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.588776e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.223517 sec - 6,632,406,040 cycles # 2.977 GHz - 15,841,248,477 instructions # 2.39 insn per cycle - 2.228830767 seconds time elapsed +TOTAL : 2.167189 sec + 6,643,980,938 cycles # 3.061 GHz + 15,842,875,312 instructions # 2.38 insn per cycle + 2.172258926 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.226907e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.061731e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.061731e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.593001e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.102557e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.102557e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.274705 sec - 3,630,232,417 cycles # 2.838 GHz - 7,590,830,281 instructions # 2.09 insn per cycle - 1.279899279 seconds time elapsed +TOTAL : 1.228870 sec + 3,628,474,043 cycles # 2.942 GHz + 7,590,788,545 instructions # 2.09 insn per cycle + 1.233998812 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.647063e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.119085e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.119085e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.019085e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.186087e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.186087e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.228082 sec - 3,431,774,015 cycles # 2.795 GHz - 7,168,267,152 instructions # 2.09 insn per cycle - 1.233468520 seconds time elapsed +TOTAL : 1.166612 sec + 3,422,071,573 cycles # 2.922 GHz + 7,166,231,514 instructions # 2.09 insn per cycle + 1.171724120 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.970825e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.735405e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.735405e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.583272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.437719e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.437719e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.647433 sec - 3,235,860,034 cycles # 1.959 GHz - 5,797,322,103 instructions # 1.79 insn per cycle - 1.652600396 seconds time elapsed +TOTAL : 1.521831 sec + 3,238,239,834 cycles # 2.122 GHz + 5,797,083,051 instructions # 1.79 insn per cycle + 1.527076370 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index 023fba572a..c1a2ea0227 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:36:30 +DATE: 2023-10-30_23:20:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.809222e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.645957e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.947353e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.862897e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.663387e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.966562e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.503446 sec - 2,185,473,363 cycles # 3.028 GHz - 3,416,682,681 instructions # 1.56 insn per cycle - 0.779206316 seconds time elapsed +TOTAL : 0.501383 sec + 2,181,381,336 cycles # 3.006 GHz + 3,424,313,283 instructions # 1.57 insn per cycle + 0.782844227 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.298249e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.373831e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.373831e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.331003e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.407484e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.407484e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.647478 sec - 14,148,019,955 cycles # 3.042 GHz - 38,392,518,660 instructions # 2.71 insn per cycle - 4.652491461 seconds time elapsed +TOTAL : 4.582165 sec + 14,152,562,935 cycles # 3.086 GHz + 38,392,294,188 instructions # 2.71 insn per cycle + 4.587123865 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.232862e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.665849e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.665849e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.203403e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.628400e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.628400e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.086994 sec - 6,478,231,222 cycles # 3.098 GHz - 15,829,954,128 instructions # 2.44 insn per cycle - 2.092206985 seconds time elapsed +TOTAL : 2.100151 sec + 6,477,299,825 cycles # 3.078 GHz + 15,829,339,843 instructions # 2.44 insn per cycle + 2.105215947 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.466868e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.088473e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.088473e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.592522e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.100397e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100397e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.190025 sec - 3,460,027,857 cycles # 2.897 GHz - 7,606,585,097 instructions # 2.20 insn per cycle - 1.195060645 seconds time elapsed +TOTAL : 1.176672 sec + 3,456,621,724 cycles # 2.927 GHz + 7,606,538,399 instructions # 2.20 insn per cycle + 1.181720411 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.024370e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.190854e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.190854e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.017882e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.180154e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.180154e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.105334 sec - 3,257,431,655 cycles # 2.935 GHz - 7,214,579,277 instructions # 2.21 insn per cycle - 1.110486214 seconds time elapsed +TOTAL : 1.113532 sec + 3,248,683,790 cycles # 2.906 GHz + 7,215,461,932 instructions # 2.22 insn per cycle + 1.118566030 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.208977e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.010764e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.010764e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.292205e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.091909e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.091909e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.539764 sec - 3,065,042,679 cycles # 1.985 GHz - 5,845,805,718 instructions # 1.91 insn per cycle - 1.545069845 seconds time elapsed +TOTAL : 1.523124 sec + 3,068,382,625 cycles # 2.009 GHz + 5,846,492,637 instructions # 1.91 insn per cycle + 1.528127506 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index cf74897fe6..7f6619cd33 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:33:23 +DATE: 2023-10-30_23:17:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.224135e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.649986e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.947822e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.174030e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.645464e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.946490e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.601699 sec - 2,513,463,986 cycles # 3.041 GHz - 3,911,746,126 instructions # 1.56 insn per cycle - 0.884178922 seconds time elapsed +TOTAL : 0.601645 sec + 2,504,719,694 cycles # 3.039 GHz + 3,887,630,578 instructions # 1.55 insn per cycle + 0.883248378 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -75,14 +79,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.350355e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.426473e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.426473e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.347093e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.424329e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.424329e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.544385 sec - 14,150,131,454 cycles # 3.111 GHz - 38,392,663,057 instructions # 2.71 insn per cycle - 4.549365415 seconds time elapsed +TOTAL : 4.550575 sec + 14,154,277,861 cycles # 3.108 GHz + 38,392,177,110 instructions # 2.71 insn per cycle + 4.555522154 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -102,14 +106,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.250294e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.679751e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.679751e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.252432e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.682421e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.682421e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.079738 sec - 6,469,789,265 cycles # 3.104 GHz - 15,829,612,923 instructions # 2.45 insn per cycle - 2.084975976 seconds time elapsed +TOTAL : 2.079439 sec + 6,469,167,580 cycles # 3.105 GHz + 15,829,428,699 instructions # 2.45 insn per cycle + 2.084347706 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -129,14 +133,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.594124e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.103315e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.103315e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.637369e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.106605e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106605e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.174775 sec - 3,463,679,492 cycles # 2.937 GHz - 7,606,485,307 instructions # 2.20 insn per cycle - 1.179888489 seconds time elapsed +TOTAL : 1.170723 sec + 3,458,271,351 cycles # 2.943 GHz + 7,606,680,804 instructions # 2.20 insn per cycle + 1.175720703 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -156,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.017682e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.180616e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.180616e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.034498e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.200618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.200618e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.112712 sec - 3,249,854,503 cycles # 2.909 GHz - 7,214,706,775 instructions # 2.22 insn per cycle - 1.117999772 seconds time elapsed +TOTAL : 1.095956 sec + 3,250,338,121 cycles # 2.954 GHz + 7,214,870,285 instructions # 2.22 insn per cycle + 1.101160397 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -183,14 +187,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.605648e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.487831e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.487831e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.653433e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.514463e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.514463e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.462432 sec - 3,073,167,609 cycles # 2.096 GHz - 5,846,117,857 instructions # 1.90 insn per cycle - 1.467652309 seconds time elapsed +TOTAL : 1.452578 sec + 3,058,889,602 cycles # 2.100 GHz + 5,845,641,266 instructions # 1.91 insn per cycle + 1.457631752 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 0a996f834c..2e156cf238 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_22:57:07 +DATE: 2023-10-30_22:43:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.037758e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.719252e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.016040e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.114345e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.750527e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.014575e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.470036 sec - 2,072,283,761 cycles # 2.995 GHz - 2,983,284,438 instructions # 1.44 insn per cycle - 0.749471768 seconds time elapsed +TOTAL : 0.467188 sec + 2,061,300,508 cycles # 3.013 GHz + 2,979,795,053 instructions # 1.45 insn per cycle + 0.742618348 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.253114e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.323651e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.323651e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.295508e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.368349e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.368349e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.737408 sec - 14,418,215,974 cycles # 3.042 GHz - 39,883,810,988 instructions # 2.77 insn per cycle - 4.742190490 seconds time elapsed +TOTAL : 4.651284 sec + 14,414,978,676 cycles # 3.097 GHz + 39,885,747,501 instructions # 2.77 insn per cycle + 4.656156481 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.075422e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.663303e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.663303e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.108362e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.705438e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.705438e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.809220 sec - 5,592,465,189 cycles # 3.084 GHz - 15,299,712,088 instructions # 2.74 insn per cycle - 1.814483780 seconds time elapsed +TOTAL : 1.800005 sec + 5,593,021,873 cycles # 3.100 GHz + 15,299,807,626 instructions # 2.74 insn per cycle + 1.805212831 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.834505e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.534184e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.534184e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.787179e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.474335e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.474335e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.616733 sec - 4,735,474,011 cycles # 2.922 GHz - 9,748,366,481 instructions # 2.06 insn per cycle - 1.621819739 seconds time elapsed +TOTAL : 1.627313 sec + 4,741,036,728 cycles # 2.906 GHz + 9,747,765,661 instructions # 2.06 insn per cycle + 1.632312389 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3710) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.003908e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.735430e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.735430e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.999958e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.729374e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.729374e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.580130 sec - 4,630,502,921 cycles # 2.923 GHz - 9,338,331,720 instructions # 2.02 insn per cycle - 1.585139169 seconds time elapsed +TOTAL : 1.580177 sec + 4,620,710,026 cycles # 2.916 GHz + 9,339,228,525 instructions # 2.02 insn per cycle + 1.585058401 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.214571e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.784183e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.784183e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.137462e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.683087e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.683087e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.770080 sec - 3,656,314,511 cycles # 2.061 GHz - 7,046,094,328 instructions # 1.93 insn per cycle - 1.775184056 seconds time elapsed +TOTAL : 1.791618 sec + 3,651,443,842 cycles # 2.033 GHz + 7,045,651,657 instructions # 1.93 insn per cycle + 1.796536804 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2606) (512y: 12) (512z: 2221) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index a467b5d8bb..3e1f931c4f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl1_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:17:56 +DATE: 2023-10-30_23:01:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.434987e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.650833e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.976397e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.802566e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.662871e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.967885e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.475936 sec - 2,110,435,140 cycles # 2.995 GHz - 3,008,103,642 instructions # 1.43 insn per cycle - 0.763528431 seconds time elapsed +TOTAL : 0.474180 sec + 2,101,252,491 cycles # 3.000 GHz + 3,030,117,960 instructions # 1.44 insn per cycle + 0.758250415 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.588741e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.682897e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.682897e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.641227e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.737778e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.737778e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.134770 sec - 12,607,914,642 cycles # 3.046 GHz - 34,393,723,738 instructions # 2.73 insn per cycle - 4.139979205 seconds time elapsed +TOTAL : 4.054902 sec + 12,605,264,157 cycles # 3.106 GHz + 34,394,645,325 instructions # 2.73 insn per cycle + 4.060052613 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.345993e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.816916e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.816916e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.576010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.067873e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.067873e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.046835 sec - 6,102,010,194 cycles # 2.975 GHz - 14,874,304,720 instructions # 2.44 insn per cycle - 2.052076023 seconds time elapsed +TOTAL : 1.965839 sec + 6,089,585,063 cycles # 3.091 GHz + 14,875,054,278 instructions # 2.44 insn per cycle + 1.971084739 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.619423e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.507763e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.507763e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.615830e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.504043e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.504043e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.459297 sec - 4,276,678,420 cycles # 2.922 GHz - 9,041,560,375 instructions # 2.11 insn per cycle - 1.464491491 seconds time elapsed +TOTAL : 1.461335 sec + 4,275,220,530 cycles # 2.917 GHz + 9,042,872,048 instructions # 2.12 insn per cycle + 1.466711798 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4445) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.754754e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.682518e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.682518e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.713493e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.603354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.603354e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.434608 sec - 4,197,492,913 cycles # 2.917 GHz - 8,676,910,228 instructions # 2.07 insn per cycle - 1.439911295 seconds time elapsed +TOTAL : 1.443103 sec + 4,201,605,081 cycles # 2.905 GHz + 8,678,273,624 instructions # 2.07 insn per cycle + 1.448450717 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.823540e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.320310e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.320310e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.754828e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.235250e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.235250e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.884749 sec - 3,839,699,644 cycles # 2.033 GHz - 7,820,050,517 instructions # 2.04 insn per cycle - 1.890103108 seconds time elapsed +TOTAL : 1.906937 sec + 3,847,864,916 cycles # 2.015 GHz + 7,820,928,515 instructions # 2.03 insn per cycle + 1.912353157 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 0) (512z: 2556) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index d6d7f1e6ab..8a69e175ae 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl1_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_23:18:20 +DATE: 2023-10-30_23:02:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.503817e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.701861e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.039467e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.853070e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.693882e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.006213e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.475921 sec - 2,117,417,472 cycles # 2.982 GHz - 2,996,840,505 instructions # 1.42 insn per cycle - 0.767027235 seconds time elapsed +TOTAL : 0.470775 sec + 2,095,007,668 cycles # 3.024 GHz + 2,980,363,168 instructions # 1.42 insn per cycle + 0.749660365 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 @@ -73,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.806204e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.918538e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.918538e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.827771e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.938870e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.938870e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 3.820998 sec - 11,761,780,361 cycles # 3.075 GHz - 35,129,373,494 instructions # 2.99 insn per cycle - 3.826162396 seconds time elapsed +TOTAL : 3.792247 sec + 11,758,748,490 cycles # 3.097 GHz + 35,129,457,123 instructions # 2.99 insn per cycle + 3.797289248 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.674605e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.182829e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.182829e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.738410e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.255110e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.255110e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.931572 sec - 5,953,219,823 cycles # 3.075 GHz - 14,483,788,778 instructions # 2.43 insn per cycle - 1.936660694 seconds time elapsed +TOTAL : 1.911877 sec + 5,951,196,288 cycles # 3.106 GHz + 14,483,812,665 instructions # 2.43 insn per cycle + 1.917070183 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.777554e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.711884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.711884e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.892954e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.829064e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.829064e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.430796 sec - 4,168,678,841 cycles # 2.904 GHz - 8,888,432,605 instructions # 2.13 insn per cycle - 1.436220799 seconds time elapsed +TOTAL : 1.410830 sec + 4,155,679,226 cycles # 2.936 GHz + 8,888,215,628 instructions # 2.14 insn per cycle + 1.416036036 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3576) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.428501e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.293244e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.293244e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.947895e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.896742e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.896742e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.495387 sec - 4,130,818,711 cycles # 2.754 GHz - 8,424,408,773 instructions # 2.04 insn per cycle - 1.500716701 seconds time elapsed +TOTAL : 1.402197 sec + 4,131,982,697 cycles # 2.937 GHz + 8,425,111,238 instructions # 2.04 insn per cycle + 1.407381420 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3320) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.895021e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.402173e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.402173e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.979339e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.495089e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.495089e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.862766 sec - 3,790,389,540 cycles # 2.030 GHz - 7,712,878,846 instructions # 2.03 insn per cycle - 1.868015050 seconds time elapsed +TOTAL : 1.837942 sec + 3,799,678,092 cycles # 2.063 GHz + 7,713,472,925 instructions # 2.03 insn per cycle + 1.843097802 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3436) (512y: 0) (512z: 2108) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index a57a0e7d63..9c8f35e78d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_22:57:30 +DATE: 2023-10-30_22:43:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.024652e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.170014e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.267198e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.272066e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.179135e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.271243e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.514329 sec - 2,230,556,647 cycles # 2.995 GHz - 3,160,835,951 instructions # 1.42 insn per cycle - 0.801821216 seconds time elapsed +TOTAL : 0.508054 sec + 2,187,282,267 cycles # 2.978 GHz + 3,169,296,860 instructions # 1.45 insn per cycle + 0.792015861 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -73,14 +77,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.169903e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.233066e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.233066e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.169756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.233247e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.233247e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.935048 sec - 15,258,964,089 cycles # 3.089 GHz - 38,638,103,907 instructions # 2.53 insn per cycle - 4.940346598 seconds time elapsed +TOTAL : 4.936357 sec + 15,268,315,849 cycles # 3.090 GHz + 38,638,138,492 instructions # 2.53 insn per cycle + 4.941625774 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.764548e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.972599e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.972599e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.681342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.882250e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.882250e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.889853 sec - 8,945,301,665 cycles # 3.091 GHz - 24,238,281,209 instructions # 2.71 insn per cycle - 2.895063314 seconds time elapsed +TOTAL : 2.951744 sec + 8,930,153,014 cycles # 3.021 GHz + 24,239,674,824 instructions # 2.71 insn per cycle + 2.957006740 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.830803e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.370680e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.370680e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.988456e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.517430e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.517430e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.902240 sec - 5,478,028,889 cycles # 2.878 GHz - 11,289,175,549 instructions # 2.06 insn per cycle - 1.907742702 seconds time elapsed +TOTAL : 1.854670 sec + 5,412,053,444 cycles # 2.911 GHz + 11,287,904,185 instructions # 2.09 insn per cycle + 1.859798747 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.746213e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.429778e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.429778e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.705564e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.369171e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.369171e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.656189 sec - 4,847,967,355 cycles # 2.919 GHz - 10,535,656,532 instructions # 2.17 insn per cycle - 1.661647670 seconds time elapsed +TOTAL : 1.666815 sec + 4,863,774,902 cycles # 2.910 GHz + 10,539,053,403 instructions # 2.17 insn per cycle + 1.672233801 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.240952e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.495480e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.495480e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.216828e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.465613e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.465613e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.575099 sec - 5,211,387,602 cycles # 2.020 GHz - 7,613,456,375 instructions # 1.46 insn per cycle - 2.580291999 seconds time elapsed +TOTAL : 2.589300 sec + 5,225,981,137 cycles # 2.015 GHz + 7,613,810,266 instructions # 1.46 insn per cycle + 2.594475099 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 4678e0a139..e20993a621 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-29_22:57:56 +DATE: 2023-10-30_22:44:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.037826e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.174975e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.272869e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.276731e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.178753e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.271013e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.513772 sec - 2,233,336,477 cycles # 3.003 GHz - 3,219,059,296 instructions # 1.44 insn per cycle - 0.800421259 seconds time elapsed +TOTAL : 0.517169 sec + 2,141,581,999 cycles # 2.876 GHz + 3,095,928,504 instructions # 1.45 insn per cycle + 0.802876427 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -73,14 +77,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.142916e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.204636e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.204636e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.130678e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.193208e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.193208e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.996995 sec - 15,391,509,592 cycles # 3.079 GHz - 40,434,267,052 instructions # 2.63 insn per cycle - 5.002292693 seconds time elapsed +TOTAL : 5.026708 sec + 15,377,164,375 cycles # 3.057 GHz + 40,433,126,230 instructions # 2.63 insn per cycle + 5.031817846 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe @@ -100,14 +104,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.907128e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.140585e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.140585e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.967029e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.198905e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.198905e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.788965 sec - 8,494,141,525 cycles # 3.041 GHz - 23,269,631,247 instructions # 2.74 insn per cycle - 2.794501830 seconds time elapsed +TOTAL : 2.746345 sec + 8,514,726,291 cycles # 3.096 GHz + 23,270,087,626 instructions # 2.73 insn per cycle + 2.751510263 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe @@ -127,14 +131,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.029207e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.400708e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.400708e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.149152e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.533872e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.533872e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.189330 sec - 6,250,586,683 cycles # 2.854 GHz - 12,975,656,099 instructions # 2.08 insn per cycle - 2.194673870 seconds time elapsed +TOTAL : 2.139503 sec + 6,253,820,119 cycles # 2.917 GHz + 12,973,626,048 instructions # 2.07 insn per cycle + 2.144732816 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe @@ -154,14 +158,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.390803e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.817317e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.817317e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.443292e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.870409e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.870409e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.047933 sec - 5,931,368,799 cycles # 2.890 GHz - 12,251,527,312 instructions # 2.07 insn per cycle - 2.053330268 seconds time elapsed +TOTAL : 2.028380 sec + 5,926,517,981 cycles # 2.916 GHz + 12,251,272,217 instructions # 2.07 insn per cycle + 2.033497640 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe @@ -181,14 +185,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.844812e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.055411e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.055411e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.902539e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.114637e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.114637e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.831180 sec - 5,608,290,183 cycles # 1.978 GHz - 8,753,452,498 instructions # 1.56 insn per cycle - 2.836498489 seconds time elapsed +TOTAL : 2.790099 sec + 5,604,940,647 cycles # 2.006 GHz + 8,753,839,257 instructions # 1.56 insn per cycle + 2.795468352 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index b768c889a2..2b18dea98b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_22:58:23 +DATE: 2023-10-30_22:44:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.909398e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.046859e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059893e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.047714e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.048356e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.060229e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.459234 sec - 2,023,935,385 cycles # 3.005 GHz - 2,917,136,859 instructions # 1.44 insn per cycle - 0.729830918 seconds time elapsed +TOTAL : 0.458296 sec + 1,980,316,208 cycles # 2.971 GHz + 2,806,704,970 instructions # 1.42 insn per cycle + 0.726080878 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.113454e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319134e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.331179e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.127623e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.321219e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.332184e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.591882 sec - 2,481,489,165 cycles # 3.009 GHz - 3,788,031,297 instructions # 1.53 insn per cycle - 0.882711359 seconds time elapsed +TOTAL : 0.588967 sec + 2,472,364,873 cycles # 3.018 GHz + 3,811,466,454 instructions # 1.54 insn per cycle + 0.878680525 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.583482e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.595900e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.595900e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.570721e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.583060e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.583060e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.363888 sec - 19,707,598,906 cycles # 3.095 GHz - 59,610,259,834 instructions # 3.02 insn per cycle - 6.367917888 seconds time elapsed +TOTAL : 6.395142 sec + 19,691,711,207 cycles # 3.078 GHz + 59,609,727,892 instructions # 3.03 insn per cycle + 6.399269206 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.862811e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.907560e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.907560e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.903843e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.950080e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.950080e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.391185 sec - 10,358,012,674 cycles # 3.051 GHz - 30,678,593,588 instructions # 2.96 insn per cycle - 3.395348889 seconds time elapsed +TOTAL : 3.362474 sec + 10,363,244,702 cycles # 3.079 GHz + 30,678,298,427 instructions # 2.96 insn per cycle + 3.366802524 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.892098e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.007261e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.007261e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.772112e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.953675e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.953675e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.678228 sec - 4,878,476,340 cycles # 2.901 GHz - 11,021,244,802 instructions # 2.26 insn per cycle - 1.682311815 seconds time elapsed +TOTAL : 1.699046 sec + 4,888,603,432 cycles # 2.874 GHz + 11,021,968,804 instructions # 2.25 insn per cycle + 1.703340915 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.101682e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.123817e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.123817e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.107137e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.129586e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.129586e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.509246 sec - 4,368,027,313 cycles # 2.888 GHz - 10,298,348,380 instructions # 2.36 insn per cycle - 1.513387497 seconds time elapsed +TOTAL : 1.501377 sec + 4,365,749,377 cycles # 2.901 GHz + 10,298,360,400 instructions # 2.36 insn per cycle + 1.505506942 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.817252e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.930114e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.930114e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.829294e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.941822e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.941822e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.119992 sec - 4,099,162,887 cycles # 1.931 GHz - 5,845,650,996 instructions # 1.43 insn per cycle - 2.124059548 seconds time elapsed +TOTAL : 2.116256 sec + 4,095,240,477 cycles # 1.932 GHz + 5,845,721,781 instructions # 1.43 insn per cycle + 2.120470673 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 7350cb044e..7f4e11c6ec 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_23:27:37 +DATE: 2023-10-30_23:11:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.596022e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.815455e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.815455e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.715097e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.892341e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.892341e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.490769 sec - 2,050,018,894 cycles # 2.922 GHz - 3,062,877,922 instructions # 1.49 insn per cycle - 0.759712446 seconds time elapsed +TOTAL : 0.484083 sec + 2,100,235,497 cycles # 3.030 GHz + 3,147,476,737 instructions # 1.50 insn per cycle + 0.751147595 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -76,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.755073e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.665591e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.665591e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.790572e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.688655e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.688655e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.814041 sec - 3,193,964,813 cycles # 3.017 GHz - 4,996,785,588 instructions # 1.56 insn per cycle - 1.121110193 seconds time elapsed +TOTAL : 0.816202 sec + 3,149,587,745 cycles # 2.965 GHz + 5,015,800,272 instructions # 1.59 insn per cycle + 1.123090986 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.581861e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.594640e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.594640e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.596929e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.609765e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.609765e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.374768 sec - 19,744,253,603 cycles # 3.096 GHz - 59,617,039,083 instructions # 3.02 insn per cycle - 6.379015840 seconds time elapsed +TOTAL : 6.337947 sec + 19,723,644,395 cycles # 3.110 GHz + 59,617,270,726 instructions # 3.02 insn per cycle + 6.342151556 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -128,14 +132,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.947779e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.993430e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.993430e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.783981e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.829454e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.829454e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.340025 sec - 10,392,403,164 cycles # 3.108 GHz - 30,728,140,044 instructions # 2.96 insn per cycle - 3.344437514 seconds time elapsed +TOTAL : 3.454609 sec + 10,400,623,917 cycles # 3.008 GHz + 30,728,596,829 instructions # 2.95 insn per cycle + 3.458894302 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -156,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.893651e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.007560e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.007560e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.941214e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.012298e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.012298e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.685465 sec - 4,920,068,373 cycles # 2.913 GHz - 11,072,615,091 instructions # 2.25 insn per cycle - 1.689979160 seconds time elapsed +TOTAL : 1.676940 sec + 4,910,616,642 cycles # 2.922 GHz + 11,072,028,053 instructions # 2.25 insn per cycle + 1.681365069 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -184,14 +188,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.103775e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.126572e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.126572e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.111710e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.134977e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.134977e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.513600 sec - 4,404,328,448 cycles # 2.903 GHz - 10,349,844,115 instructions # 2.35 insn per cycle - 1.518016539 seconds time elapsed +TOTAL : 1.502610 sec + 4,398,472,196 cycles # 2.920 GHz + 10,349,003,194 instructions # 2.35 insn per cycle + 1.506934340 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -212,14 +216,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.735077e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.845863e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.845863e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.894007e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.009716e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.009716e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.148156 sec - 4,131,669,019 cycles # 1.920 GHz - 5,883,675,910 instructions # 1.42 insn per cycle - 2.152458390 seconds time elapsed +TOTAL : 2.105022 sec + 4,124,940,789 cycles # 1.956 GHz + 5,883,373,573 instructions # 1.43 insn per cycle + 2.109305029 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 7f78fd761b..ab5cc3d006 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_22:58:52 +DATE: 2023-10-30_22:45:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.886775e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.044014e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.057508e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.004676e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.045395e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.057644e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.460031 sec - 2,016,314,028 cycles # 3.002 GHz - 2,898,893,774 instructions # 1.44 insn per cycle - 0.730273188 seconds time elapsed +TOTAL : 0.456331 sec + 2,013,110,564 cycles # 3.011 GHz + 2,877,607,904 instructions # 1.43 insn per cycle + 0.725772586 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.109210e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.313076e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.324737e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.125680e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.318433e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329333e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.586833 sec - 2,478,382,973 cycles # 3.032 GHz - 3,721,282,569 instructions # 1.50 insn per cycle - 0.878275821 seconds time elapsed +TOTAL : 0.584479 sec + 2,474,094,451 cycles # 3.008 GHz + 3,746,277,121 instructions # 1.51 insn per cycle + 0.881448510 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.586514e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.599421e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.599421e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.579392e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.592087e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.592087e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.357316 sec - 19,481,235,659 cycles # 3.063 GHz - 58,801,855,367 instructions # 3.02 insn per cycle - 6.361426900 seconds time elapsed +TOTAL : 6.374569 sec + 19,486,493,121 cycles # 3.055 GHz + 58,801,850,212 instructions # 3.02 insn per cycle + 6.378723001 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.984548e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.029958e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.029958e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.903956e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.949132e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.949132e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.309163 sec - 10,243,122,552 cycles # 3.093 GHz - 30,350,909,518 instructions # 2.96 insn per cycle - 3.313312968 seconds time elapsed +TOTAL : 3.362761 sec + 10,234,416,457 cycles # 3.040 GHz + 30,349,712,353 instructions # 2.97 insn per cycle + 3.367008307 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.127917e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.294877e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.294877e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.551434e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.719575e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.719575e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.817988 sec - 5,048,703,510 cycles # 2.771 GHz - 11,485,918,392 instructions # 2.28 insn per cycle - 1.822406507 seconds time elapsed +TOTAL : 1.736713 sec + 5,047,702,052 cycles # 2.901 GHz + 11,486,114,404 instructions # 2.28 insn per cycle + 1.740911711 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.042469e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.062305e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.062305e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.040957e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.060570e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.060570e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.592971 sec - 4,643,388,624 cycles # 2.909 GHz - 10,844,105,717 instructions # 2.34 insn per cycle - 1.597053361 seconds time elapsed +TOTAL : 1.595201 sec + 4,643,633,681 cycles # 2.905 GHz + 10,843,683,440 instructions # 2.34 insn per cycle + 1.599371741 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.817606e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.928535e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.928535e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.774921e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.884495e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.884495e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.119744 sec - 4,110,353,353 cycles # 1.936 GHz - 6,109,536,368 instructions # 1.49 insn per cycle - 2.123912437 seconds time elapsed +TOTAL : 2.131248 sec + 4,124,512,371 cycles # 1.933 GHz + 6,110,551,427 instructions # 1.48 insn per cycle + 2.135412381 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 82a23d37c8..59717c723e 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_22:59:20 +DATE: 2023-10-30_22:45:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.554213e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.342270e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.443645e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.588155e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.368447e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.458711e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.442810 sec - 1,955,213,568 cycles # 2.996 GHz - 2,785,838,310 instructions # 1.42 insn per cycle - 0.711742691 seconds time elapsed +TOTAL : 0.439302 sec + 1,948,206,203 cycles # 2.995 GHz + 2,663,043,056 instructions # 1.37 insn per cycle + 0.707785219 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.352858e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.428374e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.497564e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.434228e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.440816e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.505746e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.486265 sec - 2,126,515,219 cycles # 3.007 GHz - 3,068,678,928 instructions # 1.44 insn per cycle - 0.765884317 seconds time elapsed +TOTAL : 0.484053 sec + 2,112,707,518 cycles # 3.007 GHz + 3,087,457,780 instructions # 1.46 insn per cycle + 0.761071480 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,9 +86,9 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 31,909,914 cycles # 2.821 GHz - 48,600,152 instructions # 1.52 insn per cycle - 0.011853069 seconds time elapsed + 31,803,585 cycles # 2.876 GHz + 48,606,177 instructions # 1.53 insn per cycle + 0.011601940 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 5226a06993..c7a6da602a 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_23:28:05 +DATE: 2023-10-30_23:11:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.094146e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.120453e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.120453e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.165376e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.169114e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.169114e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.453599 sec - 1,984,525,540 cycles # 3.003 GHz - 2,948,801,406 instructions # 1.49 insn per cycle - 0.718890176 seconds time elapsed +TOTAL : 0.452789 sec + 1,990,263,936 cycles # 3.017 GHz + 2,962,281,360 instructions # 1.49 insn per cycle + 0.718372655 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -76,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.830521e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.657375e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.657375e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.885886e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.626317e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.626317e+07 ) sec^-1 MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.628524 sec - 2,620,144,582 cycles # 3.014 GHz - 3,959,615,987 instructions # 1.51 insn per cycle - 0.926992755 seconds time elapsed +TOTAL : 0.625236 sec + 2,547,368,142 cycles # 2.988 GHz + 3,892,559,884 instructions # 1.53 insn per cycle + 0.911620575 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -95,9 +99,9 @@ OK (relative difference <= 5E-3) runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) - 38,544,754 cycles # 2.892 GHz - 52,041,513 instructions # 1.35 insn per cycle - 0.013873487 seconds time elapsed + 38,496,254 cycles # 2.918 GHz + 52,021,012 instructions # 1.35 insn per cycle + 0.013672287 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 95a01f73e7..280e1cd39b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_22:59:29 +DATE: 2023-10-30_22:45:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.572009e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.356666e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.443338e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.578827e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.307502e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.398717e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.440187 sec - 1,957,058,162 cycles # 3.008 GHz - 2,791,592,510 instructions # 1.43 insn per cycle - 0.709336021 seconds time elapsed +TOTAL : 0.438268 sec + 1,948,761,926 cycles # 3.007 GHz + 2,775,625,164 instructions # 1.42 insn per cycle + 0.705619986 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.352108e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.420935e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.490027e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.421305e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.421321e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.485464e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.486998 sec - 2,128,766,537 cycles # 3.001 GHz - 3,082,933,797 instructions # 1.45 insn per cycle - 0.767019388 seconds time elapsed +TOTAL : 0.483714 sec + 2,118,065,382 cycles # 3.006 GHz + 3,095,007,309 instructions # 1.46 insn per cycle + 0.761647794 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,9 +86,9 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 31,187,967 cycles # 2.806 GHz - 47,783,574 instructions # 1.53 insn per cycle - 0.011561032 seconds time elapsed + 31,592,173 cycles # 2.841 GHz + 47,578,248 instructions # 1.51 insn per cycle + 0.011580777 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 6475091e5b..c5f0926802 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_22:59:38 +DATE: 2023-10-30_22:45:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.869757e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.043160e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.056606e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.034866e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.047326e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059480e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.462395 sec - 1,986,146,263 cycles # 2.946 GHz - 2,899,086,399 instructions # 1.46 insn per cycle - 0.733165110 seconds time elapsed +TOTAL : 0.459825 sec + 1,972,886,422 cycles # 2.942 GHz + 2,894,736,686 instructions # 1.47 insn per cycle + 0.728551974 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.112775e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.315776e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.327405e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.126651e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.319776e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.330754e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.591959 sec - 2,485,887,019 cycles # 3.012 GHz - 3,855,572,801 instructions # 1.55 insn per cycle - 0.884123063 seconds time elapsed +TOTAL : 0.590117 sec + 2,420,412,755 cycles # 2.944 GHz + 3,622,346,173 instructions # 1.50 insn per cycle + 0.881208904 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -82,9 +86,9 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 35,128,155 cycles # 2.838 GHz - 49,861,109 instructions # 1.42 insn per cycle - 0.012921109 seconds time elapsed + 35,060,457 cycles # 2.849 GHz + 49,936,885 instructions # 1.42 insn per cycle + 0.012855343 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 843ed33318..9d2665cd6b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-29_22:59:47 +DATE: 2023-10-30_22:46:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.846988e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.038654e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.051345e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.993809e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.038657e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.050632e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.463837 sec - 1,953,484,101 cycles # 2.884 GHz - 2,839,955,062 instructions # 1.45 insn per cycle - 0.734557378 seconds time elapsed +TOTAL : 0.458870 sec + 2,006,533,182 cycles # 3.002 GHz + 2,878,017,962 instructions # 1.43 insn per cycle + 0.727203702 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.103431e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.304634e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.316508e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.115406e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.304600e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.315286e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.589162 sec - 2,482,997,752 cycles # 3.024 GHz - 3,772,838,208 instructions # 1.52 insn per cycle - 0.882532404 seconds time elapsed +TOTAL : 0.582507 sec + 2,454,240,813 cycles # 3.016 GHz + 3,723,169,840 instructions # 1.52 insn per cycle + 0.871289973 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -82,9 +86,9 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 33,991,965 cycles # 2.796 GHz - 49,234,796 instructions # 1.45 insn per cycle - 0.012707522 seconds time elapsed + 33,992,309 cycles # 2.807 GHz + 49,173,808 instructions # 1.45 insn per cycle + 0.012476569 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index b9b691e247..16d6268c40 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_22:59:56 +DATE: 2023-10-30_22:46:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.501935e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.526920e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.529281e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.517624e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.541229e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.543235e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.520893 sec - 2,248,497,475 cycles # 2.996 GHz - 3,510,999,754 instructions # 1.56 insn per cycle - 0.810696709 seconds time elapsed +TOTAL : 0.521844 sec + 2,186,654,995 cycles # 2.913 GHz + 3,461,003,117 instructions # 1.58 insn per cycle + 0.810135449 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.136856e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.165550e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.166829e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.126621e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.153137e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154229e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.027243 sec - 9,828,661,032 cycles # 3.001 GHz - 20,508,968,170 instructions # 2.09 insn per cycle - 3.335037567 seconds time elapsed +TOTAL : 3.020145 sec + 10,054,555,514 cycles # 3.078 GHz + 21,149,963,871 instructions # 2.10 insn per cycle + 3.325573136 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.950736e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.951642e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.951642e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.989223e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.990154e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.990154e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.417605 sec - 25,648,294,457 cycles # 3.046 GHz - 78,942,458,574 instructions # 3.08 insn per cycle - 8.421676976 seconds time elapsed +TOTAL : 8.253925 sec + 25,625,598,282 cycles # 3.104 GHz + 78,941,361,841 instructions # 3.08 insn per cycle + 8.257942730 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.753458e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.756891e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.756891e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.733656e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.737015e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.737015e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.379085 sec - 12,933,361,776 cycles # 2.952 GHz - 39,285,416,086 instructions # 3.04 insn per cycle - 4.383232825 seconds time elapsed +TOTAL : 4.401533 sec + 12,932,063,488 cycles # 2.936 GHz + 39,285,534,388 instructions # 3.04 insn per cycle + 4.405671091 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.595894e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.614051e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.614051e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.580201e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.597746e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.597746e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.917047 sec - 5,576,412,013 cycles # 2.904 GHz - 13,689,574,709 instructions # 2.45 insn per cycle - 1.921242752 seconds time elapsed +TOTAL : 1.920486 sec + 5,578,833,054 cycles # 2.900 GHz + 13,688,993,603 instructions # 2.45 insn per cycle + 1.924638190 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.805413e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.828813e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.828813e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.806434e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.828989e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.828989e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.681874 sec - 4,891,493,480 cycles # 2.903 GHz - 12,344,327,535 instructions # 2.52 insn per cycle - 1.686035582 seconds time elapsed +TOTAL : 1.681542 sec + 4,894,665,759 cycles # 2.905 GHz + 12,344,293,266 instructions # 2.52 insn per cycle + 1.685694216 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.649115e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.663220e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.663220e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.698382e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.713237e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.713237e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.153677 sec - 4,116,714,094 cycles # 1.909 GHz - 6,336,926,711 instructions # 1.54 insn per cycle - 2.157934772 seconds time elapsed +TOTAL : 2.140663 sec + 4,116,399,810 cycles # 1.920 GHz + 6,337,006,175 instructions # 1.54 insn per cycle + 2.144975590 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 2e2c6e625b..4ce822b792 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:28:48 +DATE: 2023-10-30_23:12:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.159927e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.489906e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.489906e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.143732e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.472083e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.472083e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.509818 sec - 2,212,202,628 cycles # 2.996 GHz - 3,518,939,068 instructions # 1.59 insn per cycle - 0.797946076 seconds time elapsed +TOTAL : 0.511660 sec + 2,209,336,126 cycles # 2.997 GHz + 3,509,975,220 instructions # 1.59 insn per cycle + 0.799578324 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -76,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.645371e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.111843e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.111843e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.645060e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.109225e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.109225e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.298230 sec - 10,943,704,523 cycles # 3.065 GHz - 22,588,127,082 instructions # 2.06 insn per cycle - 3.630609877 seconds time elapsed +TOTAL : 3.299496 sec + 10,577,204,570 cycles # 2.971 GHz + 22,771,356,342 instructions # 2.15 insn per cycle + 3.626012375 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.968844e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.969820e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.969820e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.952360e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.953336e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953336e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.343532 sec - 25,669,775,275 cycles # 3.076 GHz - 78,950,360,530 instructions # 3.08 insn per cycle - 8.347863671 seconds time elapsed +TOTAL : 8.414053 sec + 25,654,559,721 cycles # 3.047 GHz + 78,949,536,483 instructions # 3.08 insn per cycle + 8.422420973 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -128,14 +132,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.740147e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.743760e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.743760e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.695279e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.698962e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.698962e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.397609 sec - 12,952,185,539 cycles # 2.943 GHz - 39,298,996,582 instructions # 3.03 insn per cycle - 4.402071213 seconds time elapsed +TOTAL : 4.451385 sec + 12,941,077,341 cycles # 2.905 GHz + 39,297,696,039 instructions # 3.04 insn per cycle + 4.455839883 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -156,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.514829e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.532691e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.532691e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.533387e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.553090e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.553090e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.939528 sec - 5,601,303,316 cycles # 2.883 GHz - 13,700,433,238 instructions # 2.45 insn per cycle - 1.944054566 seconds time elapsed +TOTAL : 1.936377 sec + 5,585,739,994 cycles # 2.879 GHz + 13,699,929,834 instructions # 2.45 insn per cycle + 1.941109655 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -184,14 +188,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.801046e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.825904e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.825904e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.872929e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.896714e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.896714e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.686556 sec - 4,907,372,738 cycles # 2.904 GHz - 12,355,220,111 instructions # 2.52 insn per cycle - 1.690956063 seconds time elapsed +TOTAL : 1.674197 sec + 4,907,725,132 cycles # 2.925 GHz + 12,355,150,511 instructions # 2.52 insn per cycle + 1.678573718 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -212,14 +216,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.677162e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.691349e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.691349e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.649611e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.664664e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.664664e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.149521 sec - 4,132,629,395 cycles # 1.919 GHz - 6,348,639,035 instructions # 1.54 insn per cycle - 2.153997044 seconds time elapsed +TOTAL : 2.157363 sec + 4,129,296,848 cycles # 1.911 GHz + 6,347,814,458 instructions # 1.54 insn per cycle + 2.161875950 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 45cfbfeec0..fbbc78bbe3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:40:07 +DATE: 2023-10-30_23:24:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.473243e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.498908e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.501062e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.483910e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.509921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.512008e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.509563 sec - 2,216,642,937 cycles # 2.956 GHz - 3,422,181,548 instructions # 1.54 insn per cycle - 0.816227831 seconds time elapsed +TOTAL : 0.506434 sec + 2,214,704,532 cycles # 2.981 GHz + 3,386,390,521 instructions # 1.53 insn per cycle + 0.810038936 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.142930e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.175050e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.176382e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.140721e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.172434e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.173756e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.111141 sec - 10,329,507,816 cycles # 3.073 GHz - 22,291,732,023 instructions # 2.16 insn per cycle - 3.417604733 seconds time elapsed +TOTAL : 3.129864 sec + 10,198,907,692 cycles # 3.020 GHz + 22,581,252,712 instructions # 2.21 insn per cycle + 3.437157552 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.969664e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.970586e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.970586e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.967449e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.968407e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.968407e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.336836 sec - 25,640,820,337 cycles # 3.077 GHz - 78,945,663,895 instructions # 3.08 insn per cycle - 8.340853167 seconds time elapsed +TOTAL : 8.346802 sec + 25,643,377,035 cycles # 3.071 GHz + 78,941,953,126 instructions # 3.08 insn per cycle + 8.350720328 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.762983e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.766419e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.766419e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.739832e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.743118e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.743118e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.368876 sec - 12,926,628,008 cycles # 2.958 GHz - 39,285,203,910 instructions # 3.04 insn per cycle - 4.372893822 seconds time elapsed +TOTAL : 4.396405 sec + 12,921,349,789 cycles # 2.937 GHz + 39,283,878,297 instructions # 3.04 insn per cycle + 4.400450369 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.601104e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.618518e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.618518e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.487098e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.504383e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.504383e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.917538 sec - 5,581,699,180 cycles # 2.906 GHz - 13,688,893,679 instructions # 2.45 insn per cycle - 1.921672417 seconds time elapsed +TOTAL : 1.943181 sec + 5,580,010,383 cycles # 2.867 GHz + 13,688,267,157 instructions # 2.45 insn per cycle + 1.947132122 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.770281e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.793597e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.793597e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.702345e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.725432e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.725432e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.689348 sec - 4,900,240,520 cycles # 2.895 GHz - 12,343,143,577 instructions # 2.52 insn per cycle - 1.693358610 seconds time elapsed +TOTAL : 1.701247 sec + 4,897,512,852 cycles # 2.873 GHz + 12,342,531,060 instructions # 2.52 insn per cycle + 1.705369212 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.684168e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.698568e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.698568e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.589277e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.603475e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.603475e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.145238 sec - 4,121,500,968 cycles # 1.918 GHz - 6,334,916,294 instructions # 1.54 insn per cycle - 2.149260659 seconds time elapsed +TOTAL : 2.172225 sec + 4,123,967,720 cycles # 1.896 GHz + 6,335,720,246 instructions # 1.54 insn per cycle + 2.176249842 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index 75f16009d0..043c4626ae 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:36:53 +DATE: 2023-10-30_23:20:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.502504e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.529294e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.532228e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.471163e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.497248e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.499419e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.500079 sec - 2,195,316,551 cycles # 3.018 GHz - 3,432,745,100 instructions # 1.56 insn per cycle - 0.787073582 seconds time elapsed +TOTAL : 0.505895 sec + 2,223,447,521 cycles # 2.997 GHz + 3,509,751,489 instructions # 1.58 insn per cycle + 0.813952724 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.139755e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.171825e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.173189e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.134766e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.166730e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168082e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.057644 sec - 10,111,910,098 cycles # 3.059 GHz - 21,575,647,114 instructions # 2.13 insn per cycle - 3.365562161 seconds time elapsed +TOTAL : 3.059208 sec + 10,034,940,988 cycles # 3.032 GHz + 22,819,661,276 instructions # 2.27 insn per cycle + 3.366574625 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.965142e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.966066e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.966066e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.983353e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.984333e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984333e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.354767 sec - 25,655,459,462 cycles # 3.071 GHz - 78,944,356,715 instructions # 3.08 insn per cycle - 8.358730269 seconds time elapsed +TOTAL : 8.278074 sec + 25,644,473,579 cycles # 3.097 GHz + 78,941,498,794 instructions # 3.08 insn per cycle + 8.282145719 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.747153e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.750615e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.750615e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.757781e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.761243e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.761243e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.385850 sec - 12,919,824,066 cycles # 2.944 GHz - 39,284,399,104 instructions # 3.04 insn per cycle - 4.389897028 seconds time elapsed +TOTAL : 4.373408 sec + 12,920,719,076 cycles # 2.952 GHz + 39,284,359,428 instructions # 3.04 insn per cycle + 4.377582936 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.556610e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.573675e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.573675e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.589418e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.606399e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.606399e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.925765 sec - 5,581,219,157 cycles # 2.893 GHz - 13,690,157,786 instructions # 2.45 insn per cycle - 1.929949148 seconds time elapsed +TOTAL : 1.918676 sec + 5,578,217,582 cycles # 2.903 GHz + 13,689,618,564 instructions # 2.45 insn per cycle + 1.922724613 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.787105e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.810178e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.810178e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.748063e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.770913e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.770913e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.685035 sec - 4,895,907,097 cycles # 2.900 GHz - 12,344,567,987 instructions # 2.52 insn per cycle - 1.689171944 seconds time elapsed +TOTAL : 1.691750 sec + 4,896,349,233 cycles # 2.888 GHz + 12,344,390,321 instructions # 2.52 insn per cycle + 1.695929786 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.689935e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.704356e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.704356e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.287653e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.300208e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.300208e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.142125 sec - 4,119,816,697 cycles # 1.921 GHz - 6,337,302,977 instructions # 1.54 insn per cycle - 2.146222342 seconds time elapsed +TOTAL : 2.259681 sec + 4,122,896,631 cycles # 1.822 GHz + 6,337,234,987 instructions # 1.54 insn per cycle + 2.263766017 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index a44127a607..b9c5d6cae4 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:33:46 +DATE: 2023-10-30_23:17:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.203236e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.506741e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.509379e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.222738e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.511031e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.513181e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.513902 sec - 2,108,484,975 cycles # 2.844 GHz - 3,365,051,902 instructions # 1.60 insn per cycle - 0.803659720 seconds time elapsed +TOTAL : 0.504611 sec + 2,250,444,021 cycles # 3.031 GHz + 3,479,864,190 instructions # 1.55 insn per cycle + 0.810696520 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -67,14 +71,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.749252e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.167948e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.169271e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.753926e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.181085e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.182473e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.179282 sec - 10,356,463,103 cycles # 3.021 GHz - 21,875,646,322 instructions # 2.11 insn per cycle - 3.486218185 seconds time elapsed +TOTAL : 3.184117 sec + 10,464,163,533 cycles # 3.047 GHz + 23,737,906,553 instructions # 2.27 insn per cycle + 3.492903854 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,14 +94,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.983903e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.984833e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.984833e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.974826e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.975768e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.975768e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.275893 sec - 25,633,045,497 cycles # 3.096 GHz - 78,941,513,287 instructions # 3.08 insn per cycle - 8.279863548 seconds time elapsed +TOTAL : 8.313623 sec + 25,699,589,266 cycles # 3.090 GHz + 78,941,867,221 instructions # 3.07 insn per cycle + 8.317593698 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -117,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.760169e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.763953e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.763953e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.701465e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.704860e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.704860e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.371058 sec - 12,924,406,923 cycles # 2.956 GHz - 39,285,568,868 instructions # 3.04 insn per cycle - 4.375178844 seconds time elapsed +TOTAL : 4.439632 sec + 12,929,764,158 cycles # 2.910 GHz + 39,284,341,231 instructions # 3.04 insn per cycle + 4.443728539 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -144,14 +148,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.550074e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.567051e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.567051e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.596958e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.614431e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.614431e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.927137 sec - 5,593,914,621 cycles # 2.898 GHz - 13,688,980,575 instructions # 2.45 insn per cycle - 1.931116092 seconds time elapsed +TOTAL : 1.916972 sec + 5,577,800,783 cycles # 2.904 GHz + 13,688,986,742 instructions # 2.45 insn per cycle + 1.921087847 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -171,14 +175,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.762400e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.786054e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.786054e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.752841e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.775978e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.775978e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.689205 sec - 4,892,556,855 cycles # 2.891 GHz - 12,344,299,662 instructions # 2.52 insn per cycle - 1.693311350 seconds time elapsed +TOTAL : 1.690810 sec + 4,894,650,981 cycles # 2.889 GHz + 12,344,413,507 instructions # 2.52 insn per cycle + 1.694845631 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -198,14 +202,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.715915e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.730071e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.730071e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.693148e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.707263e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.707263e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.135105 sec - 4,115,673,540 cycles # 1.925 GHz - 6,336,830,329 instructions # 1.54 insn per cycle - 2.139046159 seconds time elapsed +TOTAL : 2.141292 sec + 4,115,064,829 cycles # 1.919 GHz + 6,336,964,394 instructions # 1.54 insn per cycle + 2.145349336 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 0c19155fd8..0d3326ca56 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:00:32 +DATE: 2023-10-30_22:46:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.466481e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.490614e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.492798e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.491367e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.514776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.516713e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.522728 sec - 2,248,657,448 cycles # 2.992 GHz - 3,499,106,897 instructions # 1.56 insn per cycle - 0.812353279 seconds time elapsed +TOTAL : 0.516171 sec + 2,249,295,382 cycles # 3.028 GHz + 3,533,941,890 instructions # 1.57 insn per cycle + 0.803191953 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.139007e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.167719e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.168940e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.139994e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.166707e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.167859e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.017562 sec - 10,017,603,348 cycles # 3.065 GHz - 22,790,569,141 instructions # 2.28 insn per cycle - 3.325124866 seconds time elapsed +TOTAL : 3.011297 sec + 9,979,296,942 cycles # 3.062 GHz + 20,588,122,768 instructions # 2.06 insn per cycle + 3.316869688 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.973379e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.974310e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.974310e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.989568e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.990537e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.990537e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.319686 sec - 25,596,530,612 cycles # 3.076 GHz - 78,714,234,691 instructions # 3.08 insn per cycle - 8.323809308 seconds time elapsed +TOTAL : 8.251964 sec + 25,599,012,720 cycles # 3.101 GHz + 78,714,499,821 instructions # 3.07 insn per cycle + 8.256078525 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.755569e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.759059e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.759059e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.700215e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.703473e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.703473e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.376087 sec - 12,893,815,616 cycles # 2.944 GHz - 39,230,432,926 instructions # 3.04 insn per cycle - 4.380264389 seconds time elapsed +TOTAL : 4.440870 sec + 12,882,751,008 cycles # 2.899 GHz + 39,230,659,978 instructions # 3.05 insn per cycle + 4.445036011 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:12949) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.530044e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.547339e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.547339e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.515969e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.533415e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.533415e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.931884 sec - 5,615,440,019 cycles # 2.902 GHz - 13,803,106,773 instructions # 2.46 insn per cycle - 1.936074443 seconds time elapsed +TOTAL : 1.934902 sec + 5,614,399,765 cycles # 2.897 GHz + 13,803,271,495 instructions # 2.46 insn per cycle + 1.939082893 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.381888e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.403598e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.403598e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.429736e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.452404e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.452404e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.757912 sec - 4,966,926,503 cycles # 2.820 GHz - 12,470,574,071 instructions # 2.51 insn per cycle - 1.762075007 seconds time elapsed +TOTAL : 1.748526 sec + 4,960,981,981 cycles # 2.831 GHz + 12,469,682,563 instructions # 2.51 insn per cycle + 1.752792616 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.629417e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.644208e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.644208e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.693652e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.707695e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.707695e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.159291 sec - 4,114,806,278 cycles # 1.903 GHz - 6,460,944,575 instructions # 1.57 insn per cycle - 2.163526475 seconds time elapsed +TOTAL : 2.141122 sec + 4,116,386,291 cycles # 1.920 GHz + 6,461,786,548 instructions # 1.57 insn per cycle + 2.145389447 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 9fad0334f6..b0e2abe9e7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl1_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:18:42 +DATE: 2023-10-30_23:02:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.230395e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.255737e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.257706e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.230348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.253293e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.255230e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.532206 sec - 2,283,349,210 cycles # 2.977 GHz - 3,463,784,089 instructions # 1.52 insn per cycle - 0.826165595 seconds time elapsed +TOTAL : 0.526433 sec + 2,273,262,486 cycles # 3.018 GHz + 3,600,910,991 instructions # 1.58 insn per cycle + 0.810817494 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.778542e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.805844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.806965e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.776696e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.803592e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.804745e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.282940 sec - 10,785,228,501 cycles # 3.048 GHz - 24,078,177,869 instructions # 2.23 insn per cycle - 3.593801795 seconds time elapsed +TOTAL : 3.293946 sec + 10,657,689,079 cycles # 3.004 GHz + 22,487,370,214 instructions # 2.11 insn per cycle + 3.605934146 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.421380e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.421873e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.421873e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.456004e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.456479e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.456479e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.102394 sec - 113,622,568,884 cycles # 3.063 GHz - 144,969,242,443 instructions # 1.28 insn per cycle - 37.106567085 seconds time elapsed +TOTAL : 36.814419 sec + 113,714,815,794 cycles # 3.089 GHz + 144,971,695,251 instructions # 1.27 insn per cycle + 36.818652323 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:21605) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.174528e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.177033e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.177033e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.289352e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.292012e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.292012e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.175282 sec - 14,731,493,074 cycles # 2.845 GHz - 37,577,936,961 instructions # 2.55 insn per cycle - 5.179533221 seconds time elapsed +TOTAL : 4.994765 sec + 14,705,948,574 cycles # 2.942 GHz + 37,577,579,529 instructions # 2.56 insn per cycle + 4.999059436 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68118) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.715325e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.729397e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.729397e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.838791e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.853570e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.853570e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.135055 sec - 6,125,540,781 cycles # 2.864 GHz - 13,063,422,144 instructions # 2.13 insn per cycle - 2.139301000 seconds time elapsed +TOTAL : 2.101447 sec + 6,122,264,653 cycles # 2.909 GHz + 13,063,480,967 instructions # 2.13 insn per cycle + 2.105553780 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.348039e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.369239e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.369239e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.467933e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.491324e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.491324e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.763762 sec - 5,103,130,887 cycles # 2.888 GHz - 11,442,116,049 instructions # 2.24 insn per cycle - 1.767944894 seconds time elapsed +TOTAL : 1.741382 sec + 5,060,449,342 cycles # 2.900 GHz + 11,442,110,060 instructions # 2.26 insn per cycle + 1.745548105 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.858034e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.873595e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.873595e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.971385e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.987269e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.987269e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.096754 sec - 3,979,885,259 cycles # 1.895 GHz - 5,943,652,624 instructions # 1.49 insn per cycle - 2.100889558 seconds time elapsed +TOTAL : 2.067036 sec + 3,978,997,180 cycles # 1.922 GHz + 5,943,536,872 instructions # 1.49 insn per cycle + 2.071156830 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 7d78741b13..0d0978bb51 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl1_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:19:50 +DATE: 2023-10-30_23:03:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.229187e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.254155e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.256147e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.220052e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.243046e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.245124e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.528872 sec - 2,265,218,371 cycles # 3.000 GHz - 3,542,082,296 instructions # 1.56 insn per cycle - 0.812727976 seconds time elapsed +TOTAL : 0.530319 sec + 2,259,848,260 cycles # 2.956 GHz + 3,491,848,314 instructions # 1.55 insn per cycle + 0.823546608 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.790936e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.818767e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.819879e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.792897e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.819967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.821084e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.267182 sec - 10,779,068,223 cycles # 3.064 GHz - 24,334,052,418 instructions # 2.26 insn per cycle - 3.576518904 seconds time elapsed +TOTAL : 3.266999 sec + 10,784,156,266 cycles # 3.066 GHz + 23,481,416,267 instructions # 2.18 insn per cycle + 3.573859817 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.372665e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.373139e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.373139e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.432329e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.432803e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.432803e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.516758 sec - 114,319,717,939 cycles # 3.047 GHz - 145,558,224,840 instructions # 1.27 insn per cycle - 37.520892679 seconds time elapsed +TOTAL : 37.010471 sec + 114,295,370,649 cycles # 3.088 GHz + 145,556,328,098 instructions # 1.27 insn per cycle + 37.014451848 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:22248) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.184252e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.186780e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.186780e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.199736e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.202207e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.202207e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.159423 sec - 15,157,677,068 cycles # 2.936 GHz - 37,764,655,454 instructions # 2.49 insn per cycle - 5.163667570 seconds time elapsed +TOTAL : 5.134326 sec + 15,166,468,724 cycles # 2.952 GHz + 37,764,533,543 instructions # 2.49 insn per cycle + 5.138511572 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68446) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.897428e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.912320e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.912320e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.958529e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.973629e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.973629e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.086125 sec - 6,004,123,487 cycles # 2.873 GHz - 12,897,748,633 instructions # 2.15 insn per cycle - 2.090367475 seconds time elapsed +TOTAL : 2.070228 sec + 6,003,805,381 cycles # 2.895 GHz + 12,898,512,881 instructions # 2.15 insn per cycle + 2.074445232 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.272095e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.293177e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.293177e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.276328e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.296939e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.296939e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.778215 sec - 5,106,915,257 cycles # 2.866 GHz - 11,448,534,055 instructions # 2.24 insn per cycle - 1.782424367 seconds time elapsed +TOTAL : 1.777498 sec + 5,108,668,456 cycles # 2.868 GHz + 11,448,668,674 instructions # 2.24 insn per cycle + 1.781716101 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.932893e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.948239e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.948239e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.969395e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.982565e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.982565e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.076887 sec - 3,960,443,665 cycles # 1.904 GHz - 5,897,308,540 instructions # 1.49 insn per cycle - 2.081105689 seconds time elapsed +TOTAL : 2.363302 sec + 3,965,199,500 cycles # 1.675 GHz + 5,898,552,595 instructions # 1.49 insn per cycle + 2.367905311 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 2550e0adc4..b04f88d136 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:01:08 +DATE: 2023-10-30_22:47:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.322174e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.374765e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.380198e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.350401e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.395183e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.400130e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.475861 sec - 2,066,753,206 cycles # 3.001 GHz - 3,079,685,533 instructions # 1.49 insn per cycle - 0.746470599 seconds time elapsed +TOTAL : 0.472466 sec + 2,064,287,043 cycles # 3.019 GHz + 3,070,903,047 instructions # 1.49 insn per cycle + 0.740974707 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.527278e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.590065e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.592727e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.525886e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.583348e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.585805e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.727640 sec - 5,892,330,784 cycles # 3.012 GHz - 11,643,535,001 instructions # 1.98 insn per cycle - 2.013344128 seconds time elapsed +TOTAL : 1.710390 sec + 5,933,399,529 cycles # 3.072 GHz + 11,392,988,350 instructions # 1.92 insn per cycle + 1.990611745 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.064978e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.066007e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.066007e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.057262e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.058265e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.058265e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.950627 sec - 24,639,973,311 cycles # 3.098 GHz - 78,133,272,515 instructions # 3.17 insn per cycle - 7.954643603 seconds time elapsed +TOTAL : 7.980037 sec + 24,615,607,958 cycles # 3.083 GHz + 78,133,332,813 instructions # 3.17 insn per cycle + 7.984035116 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.481948e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.496066e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.496066e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.492217e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.506280e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.506280e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.199609 sec - 6,472,284,957 cycles # 2.938 GHz - 20,124,192,482 instructions # 3.11 insn per cycle - 2.203660429 seconds time elapsed +TOTAL : 2.196903 sec + 6,474,538,578 cycles # 2.943 GHz + 20,124,136,855 instructions # 3.11 insn per cycle + 2.200972137 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.688878e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.695997e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.695997e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.709474e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.716682e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.716682e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.979124 sec - 2,835,302,738 cycles # 2.886 GHz - 6,991,499,113 instructions # 2.47 insn per cycle - 0.983232961 seconds time elapsed +TOTAL : 0.967644 sec + 2,835,284,176 cycles # 2.920 GHz + 6,991,695,841 instructions # 2.47 insn per cycle + 0.971765057 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.934792e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.944433e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.944433e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.947430e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.956519e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.956519e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.855339 sec - 2,488,802,833 cycles # 2.899 GHz - 6,299,019,671 instructions # 2.53 insn per cycle - 0.859314450 seconds time elapsed +TOTAL : 0.849627 sec + 2,487,648,681 cycles # 2.916 GHz + 6,298,810,862 instructions # 2.53 insn per cycle + 0.853767432 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.553309e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.559451e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.559451e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.556501e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.562383e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.562383e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.063776 sec - 2,046,725,428 cycles # 1.918 GHz - 3,268,827,002 instructions # 1.60 insn per cycle - 1.067732468 seconds time elapsed +TOTAL : 1.061313 sec + 2,046,593,820 cycles # 1.923 GHz + 3,269,352,925 instructions # 1.60 insn per cycle + 1.065402899 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 99764dd9c6..74c634601d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:29:24 +DATE: 2023-10-30_23:13:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.657882e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.324484e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.324484e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.671948e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.316331e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.316331e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.463164 sec - 2,030,384,273 cycles # 3.017 GHz - 3,057,153,374 instructions # 1.51 insn per cycle - 0.730152926 seconds time elapsed +TOTAL : 0.462676 sec + 2,024,244,638 cycles # 3.009 GHz + 3,041,248,244 instructions # 1.50 insn per cycle + 0.730268417 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -76,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.246179e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.485541e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.485541e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.275155e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.478385e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.478385e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.869143 sec - 6,439,845,410 cycles # 3.054 GHz - 13,291,763,279 instructions # 2.06 insn per cycle - 2.165242433 seconds time elapsed +TOTAL : 1.871289 sec + 6,489,134,544 cycles # 3.072 GHz + 12,828,495,799 instructions # 1.98 insn per cycle + 2.169427134 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.048042e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.049108e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.049108e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.057674e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.058683e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.058683e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.018291 sec - 24,650,165,006 cycles # 3.073 GHz - 78,137,558,802 instructions # 3.17 insn per cycle - 8.022347939 seconds time elapsed +TOTAL : 7.980160 sec + 24,660,723,418 cycles # 3.091 GHz + 78,140,421,281 instructions # 3.17 insn per cycle + 7.984129751 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -128,14 +132,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.184439e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.198380e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.198380e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.532234e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.546273e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.546273e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.293305 sec - 6,489,676,835 cycles # 2.825 GHz - 20,133,677,992 instructions # 3.10 insn per cycle - 2.297604790 seconds time elapsed +TOTAL : 2.187667 sec + 6,485,446,957 cycles # 2.960 GHz + 20,133,607,632 instructions # 3.10 insn per cycle + 2.191951636 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -156,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.682303e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.689607e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.689607e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.687386e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.694462e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.694462e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.985266 sec - 2,845,233,923 cycles # 2.877 GHz - 7,002,190,101 instructions # 2.46 insn per cycle - 0.989581277 seconds time elapsed +TOTAL : 0.982044 sec + 2,845,455,999 cycles # 2.887 GHz + 7,001,460,428 instructions # 2.46 insn per cycle + 0.986146209 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -184,14 +188,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.933392e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.942827e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.942827e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.946050e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.955574e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.955574e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.858559 sec - 2,499,743,037 cycles # 2.900 GHz - 6,308,867,727 instructions # 2.52 insn per cycle - 0.862640697 seconds time elapsed +TOTAL : 0.853102 sec + 2,498,282,365 cycles # 2.916 GHz + 6,308,731,962 instructions # 2.53 insn per cycle + 0.857331584 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -212,14 +216,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.556514e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.562471e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.562471e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.547234e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.552924e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.552924e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.063936 sec - 2,054,890,911 cycles # 1.926 GHz - 3,279,276,140 instructions # 1.60 insn per cycle - 1.068102252 seconds time elapsed +TOTAL : 1.070786 sec + 2,056,266,891 cycles # 1.914 GHz + 3,279,522,920 instructions # 1.59 insn per cycle + 1.074961383 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 9aadf1e49e..720048ec3c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:40:42 +DATE: 2023-10-30_23:24:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.360801e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.409705e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.415226e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.334807e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.383679e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.389054e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.458838 sec - 2,014,922,139 cycles # 3.003 GHz - 2,968,563,369 instructions # 1.47 insn per cycle - 0.727778705 seconds time elapsed +TOTAL : 0.464332 sec + 2,004,763,386 cycles # 2.983 GHz + 3,003,176,836 instructions # 1.50 insn per cycle + 0.731505024 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.551763e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.620574e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.623617e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.548345e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.616460e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.619482e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.791509 sec - 6,204,146,397 cycles # 3.077 GHz - 12,314,042,171 instructions # 1.98 insn per cycle - 2.073334786 seconds time elapsed +TOTAL : 1.797034 sec + 6,178,315,733 cycles # 3.040 GHz + 13,028,330,035 instructions # 2.11 insn per cycle + 2.088729594 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.033598e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.034600e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.034600e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.026245e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.027281e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027281e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.074494 sec - 24,637,155,665 cycles # 3.051 GHz - 78,134,535,655 instructions # 3.17 insn per cycle - 8.078409262 seconds time elapsed +TOTAL : 8.104730 sec + 24,633,844,566 cycles # 3.039 GHz + 78,139,304,517 instructions # 3.17 insn per cycle + 8.108845161 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.381443e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.395247e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.395247e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.385038e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.398761e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.398761e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.231358 sec - 6,485,886,508 cycles # 2.905 GHz - 20,125,397,330 instructions # 3.10 insn per cycle - 2.235670022 seconds time elapsed +TOTAL : 2.229439 sec + 6,483,373,401 cycles # 2.904 GHz + 20,123,730,824 instructions # 3.10 insn per cycle + 2.233500667 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.696486e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.703565e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.703565e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.668417e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.675217e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.675217e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.975961 sec - 2,842,850,006 cycles # 2.903 GHz - 6,990,872,447 instructions # 2.46 insn per cycle - 0.979788752 seconds time elapsed +TOTAL : 0.991992 sec + 2,842,155,618 cycles # 2.856 GHz + 6,989,719,162 instructions # 2.46 insn per cycle + 0.996014634 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.942937e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.952287e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.952287e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.918755e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.928461e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.928461e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.853484 sec - 2,494,278,243 cycles # 2.911 GHz - 6,297,258,751 instructions # 2.52 insn per cycle - 0.857337152 seconds time elapsed +TOTAL : 0.863724 sec + 2,490,611,110 cycles # 2.873 GHz + 6,295,213,713 instructions # 2.53 insn per cycle + 0.867676126 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.558330e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.564443e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.564443e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.531685e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.537774e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.537774e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.061341 sec - 2,049,269,495 cycles # 1.925 GHz - 3,265,041,464 instructions # 1.59 insn per cycle - 1.065312777 seconds time elapsed +TOTAL : 1.079892 sec + 2,051,094,812 cycles # 1.893 GHz + 3,265,110,091 instructions # 1.59 insn per cycle + 1.083901539 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 8ffc8a4b0e..7804f4bf8a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:37:29 +DATE: 2023-10-30_23:21:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.359870e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.408459e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.413919e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.349443e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.397501e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.402622e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.459399 sec - 2,014,984,352 cycles # 3.016 GHz - 3,031,573,414 instructions # 1.50 insn per cycle - 0.727065527 seconds time elapsed +TOTAL : 0.459481 sec + 2,011,901,926 cycles # 3.015 GHz + 3,051,434,225 instructions # 1.52 insn per cycle + 0.726336676 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.554349e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.623648e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.626648e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.555513e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.624649e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.627678e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.743056 sec - 6,019,438,540 cycles # 3.063 GHz - 11,904,365,698 instructions # 1.98 insn per cycle - 2.024061522 seconds time elapsed +TOTAL : 1.740664 sec + 6,016,957,836 cycles # 3.061 GHz + 12,432,593,817 instructions # 2.07 insn per cycle + 2.022269033 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.060194e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.061198e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.061198e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.062365e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.063397e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.063397e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.968397 sec - 24,657,666,535 cycles # 3.093 GHz - 78,133,764,363 instructions # 3.17 insn per cycle - 7.972247869 seconds time elapsed +TOTAL : 7.960046 sec + 24,649,032,652 cycles # 3.095 GHz + 78,133,271,650 instructions # 3.17 insn per cycle + 7.964074741 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.474435e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.488457e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.488457e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.451293e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.465184e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.465184e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.202643 sec - 6,473,343,175 cycles # 2.935 GHz - 20,124,043,111 instructions # 3.11 insn per cycle - 2.206628940 seconds time elapsed +TOTAL : 2.208659 sec + 6,503,127,198 cycles # 2.941 GHz + 20,125,558,857 instructions # 3.09 insn per cycle + 2.212485154 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.668655e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.675400e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.675400e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.699821e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.706920e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.706920e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.990850 sec - 2,839,325,240 cycles # 2.856 GHz - 6,991,575,836 instructions # 2.46 insn per cycle - 0.994896587 seconds time elapsed +TOTAL : 0.972443 sec + 2,836,265,238 cycles # 2.907 GHz + 6,991,517,704 instructions # 2.47 insn per cycle + 0.976302090 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.869187e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.878346e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.878346e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.768782e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.777060e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.777060e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.885448 sec - 2,488,789,244 cycles # 2.800 GHz - 6,298,807,707 instructions # 2.53 insn per cycle - 0.889708860 seconds time elapsed +TOTAL : 0.935326 sec + 2,490,998,791 cycles # 2.653 GHz + 6,298,940,736 instructions # 2.53 insn per cycle + 0.939567745 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.560892e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.566889e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.566889e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.536870e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.542938e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.542938e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.058353 sec - 2,046,415,009 cycles # 1.928 GHz - 3,268,898,492 instructions # 1.60 insn per cycle - 1.062296484 seconds time elapsed +TOTAL : 1.074951 sec + 2,049,510,603 cycles # 1.901 GHz + 3,269,159,659 instructions # 1.60 insn per cycle + 1.078898146 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 1809c5bbfa..312baf5e91 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:34:22 +DATE: 2023-10-30_23:18:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -47,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.769597e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.402152e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.407372e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.799496e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.394968e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.400157e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.460989 sec - 2,020,137,019 cycles # 3.007 GHz - 3,045,447,444 instructions # 1.51 insn per cycle - 0.728139522 seconds time elapsed +TOTAL : 0.462459 sec + 2,017,920,392 cycles # 3.011 GHz + 2,978,442,542 instructions # 1.48 insn per cycle + 0.728973268 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -67,14 +71,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.513625e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.635748e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.638872e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.510554e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.618370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.621351e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.816261 sec - 6,253,507,004 cycles # 3.064 GHz - 13,207,865,239 instructions # 2.11 insn per cycle - 2.107003696 seconds time elapsed +TOTAL : 1.821247 sec + 6,276,478,242 cycles # 3.066 GHz + 12,339,707,675 instructions # 1.97 insn per cycle + 2.109895530 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,14 +94,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.058799e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.059847e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.059847e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.069803e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.070843e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.070843e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.974525 sec - 24,651,702,657 cycles # 3.092 GHz - 78,135,595,549 instructions # 3.17 insn per cycle - 7.978414186 seconds time elapsed +TOTAL : 7.931969 sec + 24,636,660,510 cycles # 3.105 GHz + 78,133,102,110 instructions # 3.17 insn per cycle + 7.935964631 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -117,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.413674e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.427119e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.427119e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.246085e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.259813e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.259813e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.220147 sec - 6,475,013,625 cycles # 2.912 GHz - 20,124,108,640 instructions # 3.11 insn per cycle - 2.224169840 seconds time elapsed +TOTAL : 2.271476 sec + 6,476,732,405 cycles # 2.847 GHz + 20,124,333,621 instructions # 3.11 insn per cycle + 2.275698592 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -144,14 +148,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.699076e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.706343e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.706343e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.697774e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.705196e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.705196e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.972965 sec - 2,833,755,146 cycles # 2.903 GHz - 6,991,226,206 instructions # 2.47 insn per cycle - 0.976886423 seconds time elapsed +TOTAL : 0.974034 sec + 2,841,486,993 cycles # 2.907 GHz + 6,991,546,734 instructions # 2.46 insn per cycle + 0.977976633 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -171,14 +175,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.905927e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.914866e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.914866e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.910241e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.919448e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.919448e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.868406 sec - 2,490,916,734 cycles # 2.858 GHz - 6,299,110,262 instructions # 2.53 insn per cycle - 0.872421287 seconds time elapsed +TOTAL : 0.866300 sec + 2,488,264,122 cycles # 2.862 GHz + 6,298,648,645 instructions # 2.53 insn per cycle + 0.870222768 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -198,14 +202,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.540209e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.545935e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.545935e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.577066e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.582967e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.582967e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.073003 sec - 2,047,997,550 cycles # 1.903 GHz - 3,268,606,033 instructions # 1.60 insn per cycle - 1.076897689 seconds time elapsed +TOTAL : 1.047804 sec + 2,047,024,012 cycles # 1.947 GHz + 3,268,367,464 instructions # 1.60 insn per cycle + 1.051771742 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 628e905e0e..4efafebf91 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:01:36 +DATE: 2023-10-30_22:47:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.309702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.360225e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.365503e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.359603e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.405982e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.411135e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.479838 sec - 2,082,146,805 cycles # 3.006 GHz - 3,133,924,846 instructions # 1.51 insn per cycle - 0.750267000 seconds time elapsed +TOTAL : 0.477129 sec + 2,034,373,832 cycles # 2.961 GHz + 3,044,023,237 instructions # 1.50 insn per cycle + 0.745923336 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.575947e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.638817e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.641652e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.581293e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.639577e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.642053e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.717190 sec - 5,940,356,621 cycles # 3.060 GHz - 12,714,538,766 instructions # 2.14 insn per cycle - 2.000578013 seconds time elapsed +TOTAL : 1.709849 sec + 5,921,893,801 cycles # 3.061 GHz + 11,251,944,201 instructions # 1.90 insn per cycle + 1.991252364 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.069561e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.070606e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.070606e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.061559e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.062588e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.062588e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.932369 sec - 24,594,739,093 cycles # 3.099 GHz - 77,859,512,765 instructions # 3.17 insn per cycle - 7.936332823 seconds time elapsed +TOTAL : 7.963268 sec + 24,535,327,018 cycles # 3.080 GHz + 77,859,389,754 instructions # 3.17 insn per cycle + 7.967328124 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3113) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.656788e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.671963e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.671963e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.641093e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.655983e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.655983e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.149647 sec - 6,426,261,682 cycles # 2.985 GHz - 20,089,337,132 instructions # 3.13 insn per cycle - 2.153885954 seconds time elapsed +TOTAL : 2.154162 sec + 6,421,039,489 cycles # 2.976 GHz + 20,089,349,313 instructions # 3.13 insn per cycle + 2.158481887 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.658839e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.665670e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.665670e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.660885e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.667784e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.667784e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.996029 sec - 2,903,440,785 cycles # 2.905 GHz - 7,133,263,572 instructions # 2.46 insn per cycle - 1.000043624 seconds time elapsed +TOTAL : 0.994893 sec + 2,904,128,875 cycles # 2.909 GHz + 7,133,196,313 instructions # 2.46 insn per cycle + 0.998901924 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.850795e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.859329e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.859329e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.791241e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.798983e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.798983e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.893558 sec - 2,594,860,728 cycles # 2.894 GHz - 6,442,081,422 instructions # 2.48 insn per cycle - 0.897467277 seconds time elapsed +TOTAL : 0.923346 sec + 2,599,913,719 cycles # 2.805 GHz + 6,442,215,820 instructions # 2.48 insn per cycle + 0.927419565 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.439166e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.444373e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.444373e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.509030e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.514644e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.514644e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.147399 sec - 2,120,827,658 cycles # 1.843 GHz - 3,430,692,895 instructions # 1.62 insn per cycle - 1.151630204 seconds time elapsed +TOTAL : 1.094451 sec + 2,118,917,788 cycles # 1.930 GHz + 3,430,381,413 instructions # 1.62 insn per cycle + 1.098405290 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2912) (512y: 22) (512z: 9647) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 2ffec24382..b668bec12a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl1_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:20:59 +DATE: 2023-10-30_23:04:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.622301e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.663450e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.667993e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.574079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.613132e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.617511e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.481979 sec - 2,128,550,563 cycles # 3.010 GHz - 3,224,823,637 instructions # 1.52 insn per cycle - 0.764400517 seconds time elapsed +TOTAL : 0.482740 sec + 2,126,131,620 cycles # 3.020 GHz + 3,206,350,299 instructions # 1.51 insn per cycle + 0.764160349 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.691781e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.751772e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.754417e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.745425e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.801933e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.804339e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.854635 sec - 6,378,019,041 cycles # 3.051 GHz - 13,626,178,470 instructions # 2.14 insn per cycle - 2.147701941 seconds time elapsed +TOTAL : 1.847491 sec + 6,452,122,354 cycles # 3.082 GHz + 13,948,621,951 instructions # 2.16 insn per cycle + 2.149801382 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.848557e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.849443e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.849443e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.868653e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.869502e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.869502e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 28.049326 sec - 86,454,539,554 cycles # 3.082 GHz - 135,563,506,729 instructions # 1.57 insn per cycle - 28.053340432 seconds time elapsed +TOTAL : 27.953627 sec + 86,115,472,235 cycles # 3.081 GHz + 135,575,076,729 instructions # 1.57 insn per cycle + 27.957670222 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:15486) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.843609e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.855341e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.855341e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.206534e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.219355e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.219355e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.405505 sec - 6,781,976,354 cycles # 2.816 GHz - 19,387,648,512 instructions # 2.86 insn per cycle - 2.409654435 seconds time elapsed +TOTAL : 2.283492 sec + 6,773,099,534 cycles # 2.962 GHz + 19,387,740,254 instructions # 2.86 insn per cycle + 2.287633055 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69680) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.508642e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.514273e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.514273e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.514138e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.519767e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.519767e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.095108 sec - 3,177,327,867 cycles # 2.893 GHz - 6,809,010,526 instructions # 2.14 insn per cycle - 1.099141144 seconds time elapsed +TOTAL : 1.090954 sec + 3,177,223,779 cycles # 2.904 GHz + 6,808,757,079 instructions # 2.14 insn per cycle + 1.095061938 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.812604e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.820636e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.820636e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.811722e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.820106e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.820106e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.912693 sec - 2,652,876,144 cycles # 2.896 GHz - 5,986,819,747 instructions # 2.26 insn per cycle - 0.916660495 seconds time elapsed +TOTAL : 0.913032 sec + 2,651,999,117 cycles # 2.894 GHz + 5,987,075,507 instructions # 2.26 insn per cycle + 0.916949136 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.529382e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.535256e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.535256e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.538318e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.543956e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.543956e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.080553 sec - 2,075,263,732 cycles # 1.915 GHz - 3,501,293,642 instructions # 1.69 insn per cycle - 1.084542679 seconds time elapsed +TOTAL : 1.073994 sec + 2,072,605,882 cycles # 1.924 GHz + 3,501,780,959 instructions # 1.69 insn per cycle + 1.077935198 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5198) (512y: 3) (512z:44822) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index ccddbcb261..5e744a93e3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl1_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:21:50 +DATE: 2023-10-30_23:05:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.542926e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.583869e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.588032e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.524936e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.563450e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.568376e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.483025 sec - 2,121,528,102 cycles # 3.016 GHz - 3,204,060,218 instructions # 1.51 insn per cycle - 0.763343575 seconds time elapsed +TOTAL : 0.488640 sec + 2,025,619,146 cycles # 2.859 GHz + 3,037,506,653 instructions # 1.50 insn per cycle + 0.770645006 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.643855e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.701871e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.704319e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.649797e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.705127e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.707537e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.857658 sec - 6,390,679,996 cycles # 3.057 GHz - 13,951,131,700 instructions # 2.18 insn per cycle - 2.149753987 seconds time elapsed +TOTAL : 1.858266 sec + 6,431,342,673 cycles # 3.079 GHz + 13,514,328,149 instructions # 2.10 insn per cycle + 2.148737908 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.871054e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.871882e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.871882e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.901255e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.902089e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.902089e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.942204 sec - 86,129,095,595 cycles # 3.082 GHz - 135,903,968,419 instructions # 1.58 insn per cycle - 27.946202072 seconds time elapsed +TOTAL : 27.799387 sec + 86,050,210,121 cycles # 3.095 GHz + 135,904,121,070 instructions # 1.58 insn per cycle + 27.803438431 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:15910) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.102139e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.115090e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.115090e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.137168e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.149873e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.149873e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.317068 sec - 6,849,367,008 cycles # 2.952 GHz - 19,439,508,900 instructions # 2.84 insn per cycle - 2.321301652 seconds time elapsed +TOTAL : 2.305713 sec + 6,851,279,837 cycles # 2.969 GHz + 19,440,056,931 instructions # 2.84 insn per cycle + 2.309801766 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69722) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.550639e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.556581e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.556581e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.547310e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.553162e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.553162e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.065503 sec - 3,104,717,730 cycles # 2.905 GHz - 6,719,677,292 instructions # 2.16 insn per cycle - 1.069490769 seconds time elapsed +TOTAL : 1.067714 sec + 3,107,929,943 cycles # 2.902 GHz + 6,719,698,155 instructions # 2.16 insn per cycle + 1.071722984 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.830158e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.838511e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.838511e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.843070e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.851764e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.851764e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.903933 sec - 2,625,287,611 cycles # 2.893 GHz - 5,970,265,330 instructions # 2.27 insn per cycle - 0.908100788 seconds time elapsed +TOTAL : 0.897455 sec + 2,626,652,350 cycles # 2.916 GHz + 5,970,213,844 instructions # 2.27 insn per cycle + 0.901540279 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.532935e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.538602e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.538602e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.539415e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.545162e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.545162e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.077860 sec - 2,081,643,160 cycles # 1.926 GHz - 3,494,996,935 instructions # 1.68 insn per cycle - 1.081878939 seconds time elapsed +TOTAL : 1.073421 sec + 2,077,996,637 cycles # 1.930 GHz + 3,494,858,382 instructions # 1.68 insn per cycle + 1.077375599 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4162) (512y: 4) (512z:44465) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 18cdd9e817..2f0b8b31cf 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:02:05 +DATE: 2023-10-30_22:48:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.507203e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.532823e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.534908e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.492406e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.515496e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.517439e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.519965 sec - 2,259,116,977 cycles # 3.016 GHz - 3,550,677,907 instructions # 1.57 insn per cycle - 0.808664205 seconds time elapsed +TOTAL : 0.515757 sec + 2,237,825,305 cycles # 3.009 GHz + 3,541,071,069 instructions # 1.58 insn per cycle + 0.803705171 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.129982e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.158539e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.159767e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.139310e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.165996e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.167100e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.020683 sec - 9,742,552,779 cycles # 2.978 GHz - 22,171,425,890 instructions # 2.28 insn per cycle - 3.330856073 seconds time elapsed +TOTAL : 3.020287 sec + 10,063,525,082 cycles # 3.080 GHz + 22,322,079,468 instructions # 2.22 insn per cycle + 3.324599235 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.954603e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.955508e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.955508e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.971138e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.972060e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.972060e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.399933 sec - 25,932,121,674 cycles # 3.087 GHz - 79,444,894,057 instructions # 3.06 insn per cycle - 8.404035787 seconds time elapsed +TOTAL : 8.329783 sec + 25,914,783,719 cycles # 3.110 GHz + 79,442,464,075 instructions # 3.07 insn per cycle + 8.333954442 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4857) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.775508e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.778880e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.778880e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.644383e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.647633e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.647633e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.353141 sec - 12,647,497,564 cycles # 2.903 GHz - 38,553,800,907 instructions # 3.05 insn per cycle - 4.357433600 seconds time elapsed +TOTAL : 4.509094 sec + 12,959,080,361 cycles # 2.872 GHz + 38,555,729,789 instructions # 2.98 insn per cycle + 4.513261823 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13161) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.666348e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.684102e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.684102e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.730078e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.748568e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.748568e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.901675 sec - 5,517,462,587 cycles # 2.896 GHz - 13,483,872,666 instructions # 2.44 insn per cycle - 1.905984462 seconds time elapsed +TOTAL : 1.887755 sec + 5,512,242,363 cycles # 2.915 GHz + 13,486,515,249 instructions # 2.45 insn per cycle + 1.891845030 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.813799e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.836969e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.836969e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.879764e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.903641e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.903641e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.680364 sec - 4,866,264,721 cycles # 2.890 GHz - 12,140,855,849 instructions # 2.49 insn per cycle - 1.684505888 seconds time elapsed +TOTAL : 1.669158 sec + 4,872,534,032 cycles # 2.913 GHz + 12,139,368,739 instructions # 2.49 insn per cycle + 1.673328458 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.626772e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.640752e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.640752e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.640128e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.654087e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.654087e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.159806 sec - 4,141,315,567 cycles # 1.915 GHz - 6,338,871,155 instructions # 1.53 insn per cycle - 2.164035991 seconds time elapsed +TOTAL : 2.155921 sec + 4,141,154,094 cycles # 1.918 GHz + 6,338,702,815 instructions # 1.53 insn per cycle + 2.160133515 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1802) (512y: 93) (512z: 9358) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index a91e61ac33..ccac2b3fbf 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-29_23:02:41 +DATE: 2023-10-30_22:49:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.484779e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.510802e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.512835e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.490192e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.514534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.516400e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.520622 sec - 2,247,347,861 cycles # 3.005 GHz - 3,405,515,964 instructions # 1.52 insn per cycle - 0.810033779 seconds time elapsed +TOTAL : 0.517992 sec + 2,239,749,066 cycles # 3.007 GHz + 3,465,742,535 instructions # 1.55 insn per cycle + 0.806009054 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.131849e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.160416e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.161588e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.150690e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.177532e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.178659e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.012933 sec - 10,067,953,075 cycles # 3.072 GHz - 22,554,266,954 instructions # 2.24 insn per cycle - 3.332777337 seconds time elapsed +TOTAL : 3.007631 sec + 10,038,232,454 cycles # 3.083 GHz + 21,636,576,131 instructions # 2.16 insn per cycle + 3.312819700 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.963198e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.964177e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.964177e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.966819e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.967730e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.967730e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.362399 sec - 25,931,586,977 cycles # 3.100 GHz - 79,453,998,538 instructions # 3.06 insn per cycle - 8.366440006 seconds time elapsed +TOTAL : 8.347331 sec + 25,957,942,010 cycles # 3.109 GHz + 79,453,544,444 instructions # 3.06 insn per cycle + 8.351342416 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4504) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.702418e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.705684e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.705684e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.615181e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.618274e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.618274e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.438259 sec - 12,658,801,526 cycles # 2.851 GHz - 38,526,780,330 instructions # 3.04 insn per cycle - 4.442516538 seconds time elapsed +TOTAL : 4.545254 sec + 12,649,362,853 cycles # 2.781 GHz + 38,527,203,698 instructions # 3.05 insn per cycle + 4.549380271 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:12928) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.609178e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.626314e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.626314e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.633408e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.651576e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.651576e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.914007 sec - 5,552,680,604 cycles # 2.896 GHz - 13,609,436,822 instructions # 2.45 insn per cycle - 1.918108421 seconds time elapsed +TOTAL : 1.908898 sec + 5,566,883,210 cycles # 2.911 GHz + 13,611,086,667 instructions # 2.45 insn per cycle + 1.912985527 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.728302e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.751459e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.751459e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.821954e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.844810e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.844810e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.708232 sec - 4,915,904,141 cycles # 2.894 GHz - 12,276,160,328 instructions # 2.50 insn per cycle - 1.712407371 seconds time elapsed +TOTAL : 1.678540 sec + 4,911,467,844 cycles # 2.920 GHz + 12,274,224,547 instructions # 2.50 insn per cycle + 1.682639558 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.302364e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.315642e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.315642e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.669286e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.683741e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.683741e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.255296 sec - 4,145,511,406 cycles # 1.835 GHz - 6,445,337,128 instructions # 1.55 insn per cycle - 2.259721639 seconds time elapsed +TOTAL : 2.147327 sec + 4,147,311,338 cycles # 1.929 GHz + 6,446,476,893 instructions # 1.55 insn per cycle + 2.151443442 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1627) (512y: 191) (512z: 9356) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 891912d002..dd384d9a9e 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_23:04:57 +DATE: 2023-10-30_22:51:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.067375e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.067765e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.067910e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.071806e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.072188e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.072290e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.420440 sec - 8,330,926,782 cycles # 3.027 GHz - 17,804,207,702 instructions # 2.14 insn per cycle - 2.808869723 seconds time elapsed +TOTAL : 2.415811 sec + 8,328,884,135 cycles # 3.048 GHz + 17,466,962,527 instructions # 2.10 insn per cycle + 2.791455375 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.234125e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.236107e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.236370e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.218962e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.220743e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.220937e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.987841 sec - 13,233,324,917 cycles # 3.068 GHz - 29,085,210,527 instructions # 2.20 insn per cycle - 4.369016357 seconds time elapsed +TOTAL : 3.995999 sec + 13,328,179,760 cycles # 3.087 GHz + 28,796,282,685 instructions # 2.16 insn per cycle + 4.374280178 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.361191e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.361428e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.361428e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.508851e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.509092e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.509092e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.328766 sec - 18,835,837,198 cycles # 2.975 GHz - 53,915,472,988 instructions # 2.86 insn per cycle - 6.332862012 seconds time elapsed +TOTAL : 6.219499 sec + 18,779,011,052 cycles # 3.018 GHz + 53,915,285,892 instructions # 2.87 insn per cycle + 6.223403710 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.665611e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.665699e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.665699e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.681222e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.681310e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.681310e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.175599 sec - 9,816,016,304 cycles # 3.089 GHz - 27,093,946,906 instructions # 2.76 insn per cycle - 3.179704503 seconds time elapsed +TOTAL : 3.146190 sec + 9,792,919,230 cycles # 3.110 GHz + 27,092,459,718 instructions # 2.77 insn per cycle + 3.150238199 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.574862e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.575309e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.575309e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.616553e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.616985e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.616985e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.484453 sec - 4,283,307,071 cycles # 2.879 GHz - 9,561,322,868 instructions # 2.23 insn per cycle - 1.488611761 seconds time elapsed +TOTAL : 1.464498 sec + 4,237,325,660 cycles # 2.888 GHz + 9,561,899,697 instructions # 2.26 insn per cycle + 1.468620001 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.098539e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.099104e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.099104e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.153027e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.153572e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.153572e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.293453 sec - 3,744,455,330 cycles # 2.888 GHz - 8,485,338,626 instructions # 2.27 insn per cycle - 1.297331235 seconds time elapsed +TOTAL : 1.277062 sec + 3,703,837,435 cycles # 2.893 GHz + 8,485,212,359 instructions # 2.29 insn per cycle + 1.280922003 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.736334e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.737024e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.737024e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.762009e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.762555e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.762555e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.418268 sec - 2,698,588,731 cycles # 1.899 GHz - 4,273,475,075 instructions # 1.58 insn per cycle - 1.422259166 seconds time elapsed +TOTAL : 1.409289 sec + 2,693,541,093 cycles # 1.907 GHz + 4,272,936,710 instructions # 1.59 insn per cycle + 1.413386706 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 779aff9608..639241e990 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_23:29:53 +DATE: 2023-10-30_23:13:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.065298e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.066291e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.066291e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.065386e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.066269e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.066269e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.356632 sec - 8,208,405,954 cycles # 3.067 GHz - 18,386,996,274 instructions # 2.24 insn per cycle - 2.732467891 seconds time elapsed +TOTAL : 2.349881 sec + 8,241,377,872 cycles # 3.087 GHz + 18,573,308,608 instructions # 2.25 insn per cycle + 2.725675058 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -76,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.233933e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.265435e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.265435e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.221612e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.252857e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.252857e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.977046 sec - 12,960,052,062 cycles # 3.016 GHz - 26,871,950,594 instructions # 2.07 insn per cycle - 4.356794911 seconds time elapsed +TOTAL : 3.974706 sec + 13,200,930,977 cycles # 3.073 GHz + 30,240,222,320 instructions # 2.29 insn per cycle + 4.351773020 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.131686e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.131907e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.131907e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.291402e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.291627e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.291627e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.503787 sec - 18,827,889,283 cycles # 2.894 GHz - 53,917,168,926 instructions # 2.86 insn per cycle - 6.507727499 seconds time elapsed +TOTAL : 6.376409 sec + 18,809,228,656 cycles # 2.949 GHz + 53,916,582,682 instructions # 2.87 insn per cycle + 6.380275769 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -128,14 +132,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.661819e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.661909e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.661909e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.664099e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.664190e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.664190e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.182091 sec - 9,868,729,705 cycles # 3.098 GHz - 27,093,635,927 instructions # 2.75 insn per cycle - 3.186061824 seconds time elapsed +TOTAL : 3.178127 sec + 9,832,061,157 cycles # 3.091 GHz + 27,094,448,733 instructions # 2.76 insn per cycle + 3.182138725 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -156,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.594572e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.594989e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.594989e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.637569e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.638054e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.638054e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.473792 sec - 4,242,652,846 cycles # 2.872 GHz - 9,562,224,257 instructions # 2.25 insn per cycle - 1.477842985 seconds time elapsed +TOTAL : 1.457515 sec + 4,240,447,705 cycles # 2.904 GHz + 9,562,677,956 instructions # 2.26 insn per cycle + 1.461476328 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -184,14 +188,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.925609e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.926127e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.926127e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.148572e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.149184e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.149184e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.349109 sec - 3,712,707,805 cycles # 2.746 GHz - 8,486,481,321 instructions # 2.29 insn per cycle - 1.352971004 seconds time elapsed +TOTAL : 1.277569 sec + 3,716,396,189 cycles # 2.902 GHz + 8,486,430,712 instructions # 2.28 insn per cycle + 1.281428385 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -212,14 +216,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.766105e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.766661e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.766661e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.751319e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.751878e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.751878e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.407314 sec - 2,697,937,870 cycles # 1.913 GHz - 4,273,842,399 instructions # 1.58 insn per cycle - 1.411205772 seconds time elapsed +TOTAL : 1.412799 sec + 2,691,848,606 cycles # 1.901 GHz + 4,273,756,865 instructions # 1.59 insn per cycle + 1.416761861 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 0a16c68e9b..bf7c77a5e4 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_23:06:00 +DATE: 2023-10-30_22:52:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.064560e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.064977e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.065132e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.062263e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.062685e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.062819e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.424959 sec - 8,290,876,355 cycles # 3.020 GHz - 17,883,558,055 instructions # 2.16 insn per cycle - 2.804470986 seconds time elapsed +TOTAL : 2.429730 sec + 8,238,728,563 cycles # 2.986 GHz + 17,116,797,941 instructions # 2.08 insn per cycle + 2.815386546 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.280396e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.282381e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.282562e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.262982e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.264788e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.264964e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.980459 sec - 13,239,563,134 cycles # 3.078 GHz - 30,624,142,528 instructions # 2.31 insn per cycle - 4.360149338 seconds time elapsed +TOTAL : 3.989275 sec + 13,288,251,530 cycles # 3.081 GHz + 31,068,913,236 instructions # 2.34 insn per cycle + 4.371305973 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.553878e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.554116e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.554116e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.614917e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.615157e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.615157e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.194684 sec - 18,952,500,242 cycles # 3.058 GHz - 53,924,708,560 instructions # 2.85 insn per cycle - 6.198735883 seconds time elapsed +TOTAL : 6.134510 sec + 18,886,125,284 cycles # 3.077 GHz + 53,924,616,535 instructions # 2.86 insn per cycle + 6.138383350 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.665149e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.665236e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.665236e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.671266e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.671363e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.671363e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.175912 sec - 9,816,369,770 cycles # 3.088 GHz - 27,089,777,031 instructions # 2.76 insn per cycle - 3.179926705 seconds time elapsed +TOTAL : 3.164494 sec + 9,802,153,603 cycles # 3.095 GHz + 27,090,486,636 instructions # 2.76 insn per cycle + 3.168549098 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.605988e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.606417e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.606417e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.619789e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.620217e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.620217e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.469433 sec - 4,254,356,021 cycles # 2.889 GHz - 9,561,341,195 instructions # 2.25 insn per cycle - 1.473424767 seconds time elapsed +TOTAL : 1.463092 sec + 4,245,007,171 cycles # 2.895 GHz + 9,561,802,040 instructions # 2.25 insn per cycle + 1.467091762 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.146642e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.147225e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.147225e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.158600e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.159171e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.159171e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.278652 sec - 3,698,810,481 cycles # 2.885 GHz - 8,485,246,341 instructions # 2.29 insn per cycle - 1.282579911 seconds time elapsed +TOTAL : 1.274480 sec + 3,696,559,414 cycles # 2.893 GHz + 8,485,931,929 instructions # 2.30 insn per cycle + 1.278469617 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.745293e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.745840e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.745840e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.763810e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.764369e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.764369e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.415634 sec - 2,695,445,463 cycles # 1.900 GHz - 4,276,122,167 instructions # 1.59 insn per cycle - 1.419604673 seconds time elapsed +TOTAL : 1.408262 sec + 2,689,637,134 cycles # 1.905 GHz + 4,276,228,536 instructions # 1.59 insn per cycle + 1.412254518 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index a61eaa07ac..a97a68dc6e 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_23:07:02 +DATE: 2023-10-30_22:53:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.746580e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.747476e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.747723e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.748686e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.749873e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.750128e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.661004 sec - 5,893,714,693 cycles # 3.049 GHz - 11,380,357,662 instructions # 1.93 insn per cycle - 1.992297624 seconds time elapsed +TOTAL : 1.656102 sec + 5,926,019,855 cycles # 3.073 GHz + 12,736,616,385 instructions # 2.15 insn per cycle + 1.985610858 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.300856e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.301560e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.301692e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.339808e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.340454e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.340531e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.915327 sec - 6,706,835,190 cycles # 3.061 GHz - 13,746,825,081 instructions # 2.05 insn per cycle - 2.247074611 seconds time elapsed +TOTAL : 1.885166 sec + 6,654,335,499 cycles # 3.077 GHz + 13,084,980,952 instructions # 1.97 insn per cycle + 2.218919529 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.876487e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.876755e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.876755e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.021345e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.021625e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.021625e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.948808 sec - 17,851,291,221 cycles # 3.000 GHz - 53,589,394,022 instructions # 3.00 insn per cycle - 5.952703310 seconds time elapsed +TOTAL : 5.858145 sec + 17,881,884,830 cycles # 3.055 GHz + 53,592,678,992 instructions # 3.00 insn per cycle + 5.862065521 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.592563e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.593075e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.593075e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.600099e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.600523e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.600523e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.474526 sec - 4,564,867,274 cycles # 3.089 GHz - 13,762,394,850 instructions # 3.01 insn per cycle - 1.478506345 seconds time elapsed +TOTAL : 1.472157 sec + 4,568,508,892 cycles # 3.097 GHz + 13,762,283,879 instructions # 3.01 insn per cycle + 1.476097215 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.253655e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.255407e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.255407e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.239872e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.241574e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.241574e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.733937 sec - 2,134,486,559 cycles # 2.895 GHz - 4,816,669,045 instructions # 2.26 insn per cycle - 0.737877141 seconds time elapsed +TOTAL : 0.734175 sec + 2,134,092,407 cycles # 2.894 GHz + 4,816,927,075 instructions # 2.26 insn per cycle + 0.738107512 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.237971e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.240196e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.240196e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.319564e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.321778e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.321778e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.645797 sec - 1,869,947,742 cycles # 2.881 GHz - 4,273,861,309 instructions # 2.29 insn per cycle - 0.649761140 seconds time elapsed +TOTAL : 0.639518 sec + 1,869,432,943 cycles # 2.909 GHz + 4,274,112,596 instructions # 2.29 insn per cycle + 0.643443684 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.544900e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.547370e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.547370e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.529460e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.531730e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.531730e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.706355 sec - 1,353,540,790 cycles # 1.907 GHz - 2,158,526,163 instructions # 1.59 insn per cycle - 0.710313630 seconds time elapsed +TOTAL : 0.706303 sec + 1,352,907,244 cycles # 1.906 GHz + 2,158,709,001 instructions # 1.60 insn per cycle + 0.710217457 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index b589ac844d..70e303847d 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_23:30:55 +DATE: 2023-10-30_23:14:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.797174e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.798921e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.798921e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.793897e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.795846e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.795846e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.593040 sec - 5,715,261,288 cycles # 3.064 GHz - 12,320,056,102 instructions # 2.16 insn per cycle - 1.922596959 seconds time elapsed +TOTAL : 1.608399 sec + 5,800,830,027 cycles # 3.073 GHz + 12,113,811,498 instructions # 2.09 insn per cycle + 1.943977837 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -76,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.325074e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.337787e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.337787e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.322809e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.335242e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.335242e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.872710 sec - 6,507,354,412 cycles # 3.033 GHz - 12,800,799,336 instructions # 1.97 insn per cycle - 2.202472280 seconds time elapsed +TOTAL : 1.875097 sec + 6,569,119,147 cycles # 3.057 GHz + 13,956,037,742 instructions # 2.12 insn per cycle + 2.205562052 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.085047e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.085375e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.085375e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.135151e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.135458e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.135458e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.815218 sec - 17,878,109,599 cycles # 3.073 GHz - 53,590,970,232 instructions # 3.00 insn per cycle - 5.819062963 seconds time elapsed +TOTAL : 5.788302 sec + 17,919,872,006 cycles # 3.094 GHz + 53,589,483,238 instructions # 2.99 insn per cycle + 5.792489017 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe @@ -128,14 +132,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.598031e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.598474e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.598474e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.596719e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.597153e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.597153e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.472581 sec - 4,566,761,974 cycles # 3.095 GHz - 13,763,691,081 instructions # 3.01 insn per cycle - 1.476554455 seconds time elapsed +TOTAL : 1.472955 sec + 4,563,554,686 cycles # 3.091 GHz + 13,763,441,312 instructions # 3.02 insn per cycle + 1.476926496 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe @@ -156,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.210371e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.212129e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.212129e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.761763e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.763329e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.763329e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.737467 sec - 2,136,565,892 cycles # 2.884 GHz - 4,818,072,795 instructions # 2.26 insn per cycle - 0.741455543 seconds time elapsed +TOTAL : 0.785466 sec + 2,137,347,048 cycles # 2.710 GHz + 4,817,859,383 instructions # 2.25 insn per cycle + 0.789305461 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe @@ -184,14 +188,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.265438e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.267751e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.267751e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.207081e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.209309e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.209309e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.644070 sec - 1,873,290,392 cycles # 2.894 GHz - 4,274,881,772 instructions # 2.28 insn per cycle - 0.647956211 seconds time elapsed +TOTAL : 0.648749 sec + 1,882,456,319 cycles # 2.887 GHz + 4,274,769,656 instructions # 2.27 insn per cycle + 0.652761200 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe @@ -212,14 +216,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.512610e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.515015e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.515015e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.539970e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.542209e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.542209e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.708003 sec - 1,353,240,321 cycles # 1.902 GHz - 2,159,591,869 instructions # 1.60 insn per cycle - 0.711943065 seconds time elapsed +TOTAL : 0.705380 sec + 1,353,054,715 cycles # 1.910 GHz + 2,159,449,357 instructions # 1.60 insn per cycle + 0.709198096 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 6a1d91cc81..e7f54d6d2b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_23:07:48 +DATE: 2023-10-30_22:54:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.750398e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.751240e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.751507e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.770874e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.771789e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.772038e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.657470 sec - 5,914,573,027 cycles # 3.060 GHz - 12,635,679,749 instructions # 2.14 insn per cycle - 1.989578773 seconds time elapsed +TOTAL : 1.647024 sec + 5,887,181,959 cycles # 3.065 GHz + 12,015,496,021 instructions # 2.04 insn per cycle + 1.977473721 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.319293e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.319958e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.320039e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.320055e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.320688e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.320771e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.904048 sec - 6,719,558,763 cycles # 3.078 GHz - 14,445,072,539 instructions # 2.15 insn per cycle - 2.238962809 seconds time elapsed +TOTAL : 1.902397 sec + 6,695,503,483 cycles # 3.076 GHz + 14,006,039,580 instructions # 2.09 insn per cycle + 2.233097385 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.960737e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.961007e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.961007e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.023182e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.023466e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.023466e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.894682 sec - 17,919,993,214 cycles # 3.038 GHz - 53,579,919,039 instructions # 2.99 insn per cycle - 5.898545332 seconds time elapsed +TOTAL : 5.854767 sec + 17,892,204,982 cycles # 3.054 GHz + 53,579,913,182 instructions # 2.99 insn per cycle + 5.858666128 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20206) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.600672e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.601097e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.601097e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.571699e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.572125e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.572125e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.471215 sec - 4,553,870,476 cycles # 3.092 GHz - 13,755,590,686 instructions # 3.02 insn per cycle - 1.475236912 seconds time elapsed +TOTAL : 1.483000 sec + 4,554,187,119 cycles # 3.064 GHz + 13,755,949,994 instructions # 3.02 insn per cycle + 1.487135379 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.718983e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.720596e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.720596e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.181275e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.183090e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.183090e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.791741 sec - 2,154,835,228 cycles # 2.710 GHz - 4,819,009,366 instructions # 2.24 insn per cycle - 0.795908499 seconds time elapsed +TOTAL : 0.740225 sec + 2,148,260,465 cycles # 2.889 GHz + 4,819,022,606 instructions # 2.24 insn per cycle + 0.744129170 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.270564e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.272850e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.272850e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.261554e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.263763e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.263763e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.643390 sec - 1,877,734,991 cycles # 2.903 GHz - 4,275,647,592 instructions # 2.28 insn per cycle - 0.647331184 seconds time elapsed +TOTAL : 0.643969 sec + 1,875,691,757 cycles # 2.898 GHz + 4,276,314,641 instructions # 2.28 insn per cycle + 0.647880426 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.500722e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.502983e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.502983e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.552883e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.555127e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.555127e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.708980 sec - 1,359,249,206 cycles # 1.908 GHz - 2,164,821,480 instructions # 1.59 insn per cycle - 0.712779208 seconds time elapsed +TOTAL : 0.703972 sec + 1,355,506,444 cycles # 1.916 GHz + 2,165,111,812 instructions # 1.60 insn per cycle + 0.707801876 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3475) (512y: 34) (512z:79492) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index d35904f255..69178ef9b8 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_23:08:35 +DATE: 2023-10-30_22:54:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.693455e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.694040e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.694151e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.693924e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.694449e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.694675e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.165996 sec - 7,604,287,841 cycles # 3.058 GHz - 15,250,589,739 instructions # 2.01 insn per cycle - 2.543560163 seconds time elapsed +TOTAL : 2.167248 sec + 7,509,767,168 cycles # 3.023 GHz + 16,389,976,098 instructions # 2.18 insn per cycle + 2.544503385 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.112978e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.113253e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.113284e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.115466e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.115725e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.115755e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.396804 sec - 11,372,878,062 cycles # 3.056 GHz - 26,091,634,911 instructions # 2.29 insn per cycle - 3.777850660 seconds time elapsed +TOTAL : 3.395163 sec + 11,404,310,500 cycles # 3.070 GHz + 26,402,744,024 instructions # 2.32 insn per cycle + 3.773859216 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.056300e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.056513e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.056513e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.989120e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.989332e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.989332e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.562282 sec - 19,137,224,572 cycles # 2.915 GHz - 54,152,596,150 instructions # 2.83 insn per cycle - 6.566118044 seconds time elapsed +TOTAL : 6.646182 sec + 19,129,776,834 cycles # 2.885 GHz + 54,153,273,486 instructions # 2.83 insn per cycle + 6.650112221 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32066) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.614488e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.614584e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.614584e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.628095e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.628183e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.628183e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.275480 sec - 9,428,014,238 cycles # 2.876 GHz - 26,160,822,763 instructions # 2.77 insn per cycle - 3.279507093 seconds time elapsed +TOTAL : 3.248782 sec + 9,415,772,907 cycles # 2.896 GHz + 26,160,926,400 instructions # 2.78 insn per cycle + 3.252909826 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96005) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.792295e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.792790e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.792790e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.801898e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.802416e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.802416e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.397176 sec - 4,040,299,499 cycles # 2.884 GHz - 9,227,715,152 instructions # 2.28 insn per cycle - 1.401325201 seconds time elapsed +TOTAL : 1.394669 sec + 4,044,411,295 cycles # 2.895 GHz + 9,227,963,992 instructions # 2.28 insn per cycle + 1.398693419 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.363999e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.364611e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.364611e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.399382e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.400021e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.400021e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.216328 sec - 3,520,985,317 cycles # 2.887 GHz - 8,175,010,783 instructions # 2.32 insn per cycle - 1.220282338 seconds time elapsed +TOTAL : 1.205178 sec + 3,515,102,818 cycles # 2.909 GHz + 8,175,049,106 instructions # 2.33 insn per cycle + 1.209109436 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.797607e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.798256e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.798256e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.827743e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.828372e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.828372e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.395271 sec - 2,657,055,348 cycles # 1.900 GHz - 4,154,616,344 instructions # 1.56 insn per cycle - 1.399324691 seconds time elapsed +TOTAL : 1.384871 sec + 2,648,995,532 cycles # 1.909 GHz + 4,154,654,547 instructions # 1.57 insn per cycle + 1.388825055 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2045) (512y: 93) (512z:78760) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 82205091a1..de63defaec 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-29_23:09:36 +DATE: 2023-10-30_22:56:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.672183e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.672714e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.672860e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.674499e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.675054e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.675214e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.170042 sec - 7,549,016,338 cycles # 3.030 GHz - 16,754,265,205 instructions # 2.22 insn per cycle - 2.548274846 seconds time elapsed +TOTAL : 2.173682 sec + 7,622,451,046 cycles # 3.058 GHz + 15,269,915,811 instructions # 2.00 insn per cycle + 2.549506521 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.108308e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.108579e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.108609e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.113208e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.113470e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113496e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.405791 sec - 11,417,750,948 cycles # 3.062 GHz - 24,521,841,592 instructions # 2.15 insn per cycle - 3.785623947 seconds time elapsed +TOTAL : 3.390779 sec + 11,439,695,311 cycles # 3.082 GHz + 23,466,887,699 instructions # 2.05 insn per cycle + 3.768718600 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.985475e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.985690e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.985690e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.476067e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.476300e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.476300e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.617999 sec - 19,125,113,032 cycles # 2.889 GHz - 54,154,595,717 instructions # 2.83 insn per cycle - 6.621882562 seconds time elapsed +TOTAL : 6.235077 sec + 19,078,518,974 cycles # 3.059 GHz + 54,152,325,581 instructions # 2.84 insn per cycle + 6.238938674 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32243) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.625621e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.625713e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.625713e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.635029e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.635114e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.635114e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.253373 sec - 9,379,546,230 cycles # 2.880 GHz - 26,078,178,786 instructions # 2.78 insn per cycle - 3.257296595 seconds time elapsed +TOTAL : 3.234145 sec + 9,372,109,067 cycles # 2.895 GHz + 26,079,565,451 instructions # 2.78 insn per cycle + 3.238069698 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:95899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.735058e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.735513e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.735513e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.777387e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.777857e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.777857e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.420184 sec - 4,100,951,658 cycles # 2.883 GHz - 9,213,534,111 instructions # 2.25 insn per cycle - 1.424137554 seconds time elapsed +TOTAL : 1.403764 sec + 4,069,542,076 cycles # 2.892 GHz + 9,213,514,277 instructions # 2.26 insn per cycle + 1.407702679 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.337906e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.338510e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.338510e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.341206e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.341838e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.341838e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.222656 sec - 3,534,053,802 cycles # 2.883 GHz - 8,168,175,850 instructions # 2.31 insn per cycle - 1.226575004 seconds time elapsed +TOTAL : 1.221671 sec + 3,533,596,743 cycles # 2.884 GHz + 8,168,189,541 instructions # 2.31 insn per cycle + 1.225733749 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.838981e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.839568e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.839568e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.857360e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.857975e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.857975e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.379834 sec - 2,624,626,162 cycles # 1.898 GHz - 4,153,410,244 instructions # 1.58 insn per cycle - 1.383695622 seconds time elapsed +TOTAL : 1.374279 sec + 2,619,392,465 cycles # 1.902 GHz + 4,154,053,123 instructions # 1.59 insn per cycle + 1.378252659 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1492) (512y: 175) (512z:78776) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index fa6a790ead..13e7273074 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_23:03:17 +DATE: 2023-10-30_22:49:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.828674e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.329373e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.682112e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.973651e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.318745e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.639621e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.442044 sec - 1,954,521,689 cycles # 2.982 GHz - 2,755,776,497 instructions # 1.41 insn per cycle - 0.712294869 seconds time elapsed +TOTAL : 0.439457 sec + 1,962,130,491 cycles # 3.016 GHz + 2,773,909,314 instructions # 1.41 insn per cycle + 0.708454238 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.578863e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.153240e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.516574e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.708042e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.153949e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.486889e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.520851 sec - 2,260,462,105 cycles # 3.006 GHz - 3,201,459,405 instructions # 1.42 insn per cycle - 0.811051172 seconds time elapsed +TOTAL : 0.515792 sec + 2,268,451,135 cycles # 3.042 GHz + 3,263,513,511 instructions # 1.44 insn per cycle + 0.802925124 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.097810e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.120561e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.120561e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.107249e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.130516e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.130516e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.514573 sec - 4,694,462,296 cycles # 3.093 GHz - 13,466,786,719 instructions # 2.87 insn per cycle - 1.518543550 seconds time elapsed +TOTAL : 1.501091 sec + 4,691,660,297 cycles # 3.119 GHz + 13,466,853,090 instructions # 2.87 insn per cycle + 1.505086869 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.978958e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.053789e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.053789e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.999256e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.074586e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.074586e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.849069 sec - 2,625,204,853 cycles # 3.079 GHz - 7,555,561,159 instructions # 2.88 insn per cycle - 0.853206224 seconds time elapsed +TOTAL : 0.840405 sec + 2,622,747,297 cycles # 3.108 GHz + 7,555,461,285 instructions # 2.88 insn per cycle + 0.844567020 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.376965e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.601651e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.601651e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.426098e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.657593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.657593e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.506898 sec - 1,480,175,540 cycles # 2.900 GHz - 3,122,207,376 instructions # 2.11 insn per cycle - 0.511062678 seconds time elapsed +TOTAL : 0.499544 sec + 1,475,119,745 cycles # 2.933 GHz + 3,121,837,620 instructions # 2.12 insn per cycle + 0.503611366 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.751482e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.019340e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.019340e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.782086e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.058198e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.058198e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.458339 sec - 1,342,741,453 cycles # 2.907 GHz - 2,984,124,260 instructions # 2.22 insn per cycle - 0.462497124 seconds time elapsed +TOTAL : 0.454992 sec + 1,341,924,139 cycles # 2.929 GHz + 2,984,324,634 instructions # 2.22 insn per cycle + 0.459135591 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.286674e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.393099e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.393099e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.589079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.713488e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.713488e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.741182 sec - 1,331,264,860 cycles # 1.801 GHz - 1,958,218,603 instructions # 1.47 insn per cycle - 0.745390166 seconds time elapsed +TOTAL : 0.655941 sec + 1,323,693,549 cycles # 2.008 GHz + 1,955,727,668 instructions # 1.48 insn per cycle + 0.659915394 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 03b047ce45..f866b77efa 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_23:28:15 +DATE: 2023-10-30_23:12:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.704780e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.258212e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.258212e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.730145e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.272081e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.272081e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.466821 sec - 2,068,559,520 cycles # 3.008 GHz - 3,048,703,179 instructions # 1.47 insn per cycle - 0.746274498 seconds time elapsed +TOTAL : 0.466796 sec + 1,998,477,799 cycles # 2.941 GHz + 2,930,077,505 instructions # 1.47 insn per cycle + 0.737311250 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -76,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.350548e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.384141e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.384141e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.395450e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.390061e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.390061e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.740457 sec - 2,965,577,785 cycles # 3.011 GHz - 4,535,211,847 instructions # 1.53 insn per cycle - 1.044362602 seconds time elapsed +TOTAL : 0.731935 sec + 2,979,024,996 cycles # 3.040 GHz + 4,603,881,403 instructions # 1.55 insn per cycle + 1.037777142 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -100,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.098304e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.121425e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.121425e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.102555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.125269e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.125269e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.518611 sec - 4,721,828,193 cycles # 3.102 GHz - 13,472,214,405 instructions # 2.85 insn per cycle - 1.522843368 seconds time elapsed +TOTAL : 1.513100 sec + 4,722,087,835 cycles # 3.114 GHz + 13,474,075,896 instructions # 2.85 insn per cycle + 1.517291651 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe @@ -128,14 +132,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.971534e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.046066e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.046066e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.980738e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.057162e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.057162e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.858759 sec - 2,658,811,149 cycles # 3.087 GHz - 7,605,857,267 instructions # 2.86 insn per cycle - 0.863212665 seconds time elapsed +TOTAL : 0.855659 sec + 2,660,648,265 cycles # 3.096 GHz + 7,605,299,110 instructions # 2.86 insn per cycle + 0.860039908 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe @@ -156,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.374731e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.600629e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.600629e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.391170e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.611775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.611775e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.513772 sec - 1,512,102,304 cycles # 2.921 GHz - 3,172,818,782 instructions # 2.10 insn per cycle - 0.518159822 seconds time elapsed +TOTAL : 0.511637 sec + 1,512,122,172 cycles # 2.935 GHz + 3,172,695,983 instructions # 2.10 insn per cycle + 0.515934668 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe @@ -184,14 +188,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.715581e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.982318e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.982318e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.745688e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.016672e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.016672e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.468578 sec - 1,372,931,684 cycles # 2.908 GHz - 3,033,222,325 instructions # 2.21 insn per cycle - 0.472803521 seconds time elapsed +TOTAL : 0.464905 sec + 1,371,264,682 cycles # 2.926 GHz + 3,033,027,472 instructions # 2.21 insn per cycle + 0.469116581 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe @@ -212,14 +216,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.550728e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.673818e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.673818e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.565004e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.691739e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.691739e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.672513 sec - 1,356,905,701 cycles # 2.007 GHz - 1,995,421,313 instructions # 1.47 insn per cycle - 0.676777093 seconds time elapsed +TOTAL : 0.668760 sec + 1,356,402,541 cycles # 2.017 GHz + 1,995,346,777 instructions # 1.47 insn per cycle + 0.673109753 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index f4cb87ae3b..08d1ff56a8 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_d_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_23:03:34 +DATE: 2023-10-30_22:49:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.841504e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.240938e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.561566e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.951461e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.258664e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.565594e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.442720 sec - 2,000,348,753 cycles # 3.007 GHz - 2,826,931,969 instructions # 1.41 insn per cycle - 0.722757887 seconds time elapsed +TOTAL : 0.441527 sec + 1,956,451,794 cycles # 2.998 GHz + 2,789,819,750 instructions # 1.43 insn per cycle + 0.709902590 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.552475e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.039915e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.385680e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.677767e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.046823e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.369504e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.521737 sec - 2,253,159,816 cycles # 2.989 GHz - 3,247,267,932 instructions # 1.44 insn per cycle - 0.812591785 seconds time elapsed +TOTAL : 0.517612 sec + 2,231,335,512 cycles # 2.987 GHz + 3,256,861,084 instructions # 1.46 insn per cycle + 0.805480224 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.079531e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.102188e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.102188e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.097921e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.120832e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.120832e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.539492 sec - 4,711,862,622 cycles # 3.054 GHz - 13,460,909,244 instructions # 2.86 insn per cycle - 1.543571266 seconds time elapsed +TOTAL : 1.513492 sec + 4,715,532,801 cycles # 3.109 GHz + 13,460,856,046 instructions # 2.85 insn per cycle + 1.517628249 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.964754e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.038833e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.038833e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.989508e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.064579e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.064579e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.854964 sec - 2,629,162,097 cycles # 3.063 GHz - 7,554,735,733 instructions # 2.87 insn per cycle - 0.859083535 seconds time elapsed +TOTAL : 0.843927 sec + 2,623,248,024 cycles # 3.096 GHz + 7,554,408,256 instructions # 2.88 insn per cycle + 0.847974886 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3088) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.023252e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.218241e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.218241e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.379456e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.600576e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.600576e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.565372 sec - 1,485,590,301 cycles # 2.610 GHz - 3,120,866,984 instructions # 2.10 insn per cycle - 0.569840427 seconds time elapsed +TOTAL : 0.505877 sec + 1,477,736,959 cycles # 2.902 GHz + 3,120,500,586 instructions # 2.11 insn per cycle + 0.510031020 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.463048e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.709967e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.709967e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.707124e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.978219e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.978219e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.495664 sec - 1,346,960,365 cycles # 2.697 GHz - 2,981,663,042 instructions # 2.21 insn per cycle - 0.500027734 seconds time elapsed +TOTAL : 0.463655 sec + 1,341,577,851 cycles # 2.872 GHz + 2,981,136,774 instructions # 2.22 insn per cycle + 0.467697604 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.579015e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.706333e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.706333e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.548823e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.673906e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.673906e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.658262 sec - 1,325,238,025 cycles # 2.003 GHz - 1,954,041,030 instructions # 1.47 insn per cycle - 0.662328784 seconds time elapsed +TOTAL : 0.666039 sec + 1,325,973,811 cycles # 1.981 GHz + 1,954,047,154 instructions # 1.47 insn per cycle + 0.670126568 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 73b671c916..02220fa6e1 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_23:03:51 +DATE: 2023-10-30_22:50:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.727976e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.233769e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.357459e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.967331e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.214482e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.337385e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.435999 sec - 1,946,637,043 cycles # 3.006 GHz - 2,763,626,164 instructions # 1.42 insn per cycle - 0.705054657 seconds time elapsed +TOTAL : 0.434980 sec + 1,942,062,183 cycles # 3.007 GHz + 2,746,890,880 instructions # 1.41 insn per cycle + 0.703360888 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.910471e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.824070e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.948085e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.292022e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.833149e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.949301e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.472776 sec - 2,098,689,676 cycles # 3.011 GHz - 2,996,762,466 instructions # 1.43 insn per cycle - 0.756532757 seconds time elapsed +TOTAL : 0.470018 sec + 2,086,659,072 cycles # 3.025 GHz + 2,994,580,206 instructions # 1.44 insn per cycle + 0.748012233 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.155452e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.181319e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.181319e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.161070e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.187807e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.187807e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.438216 sec - 4,456,836,483 cycles # 3.092 GHz - 13,052,733,016 instructions # 2.93 insn per cycle - 1.442173441 seconds time elapsed +TOTAL : 1.430744 sec + 4,454,323,954 cycles # 3.106 GHz + 13,052,174,805 instructions # 2.93 insn per cycle + 1.434671601 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.076378e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.271943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.271943e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.903986e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.090947e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.090947e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.552302 sec - 1,700,637,697 cycles # 3.061 GHz - 4,515,277,775 instructions # 2.66 insn per cycle - 0.556547518 seconds time elapsed +TOTAL : 0.584766 sec + 1,709,460,469 cycles # 2.906 GHz + 4,515,259,006 instructions # 2.64 insn per cycle + 0.588806620 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.074886e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.842784e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.842784e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.106741e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.879293e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.879293e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.289526 sec - 851,602,274 cycles # 2.906 GHz - 1,898,406,404 instructions # 2.23 insn per cycle - 0.293636457 seconds time elapsed +TOTAL : 0.287960 sec + 851,659,590 cycles # 2.922 GHz + 1,898,593,134 instructions # 2.23 insn per cycle + 0.292038777 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.871182e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.650540e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.650540e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.596776e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.492603e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.492603e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.300648 sec - 801,992,801 cycles # 2.634 GHz - 1,821,710,793 instructions # 2.27 insn per cycle - 0.304975702 seconds time elapsed +TOTAL : 0.268043 sec + 798,185,067 cycles # 2.940 GHz + 1,821,244,750 instructions # 2.28 insn per cycle + 0.272109561 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe @@ -190,9 +194,9 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 29,796,412 cycles # 2.683 GHz - 41,509,798 instructions # 1.39 insn per cycle - 0.011615045 seconds time elapsed + 27,701,954 cycles # 2.643 GHz + 41,567,168 instructions # 1.50 insn per cycle + 0.010909835 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index b6472d3fd2..b48a6fcec3 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_23:28:32 +DATE: 2023-10-30_23:12:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.743623e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.310347e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.310347e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.414946e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.217784e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.217784e+07 ) sec^-1 MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 -TOTAL : 0.448981 sec - 2,004,329,821 cycles # 2.995 GHz - 2,873,609,945 instructions # 1.43 insn per cycle - 0.726675606 seconds time elapsed +TOTAL : 0.452341 sec + 1,900,339,500 cycles # 2.864 GHz + 2,799,669,730 instructions # 1.47 insn per cycle + 0.721098960 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -76,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.180958e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.827959e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.827959e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.349106e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.876504e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.876504e+07 ) sec^-1 MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 -TOTAL : 0.612717 sec - 2,517,756,396 cycles # 2.992 GHz - 3,800,816,210 instructions # 1.51 insn per cycle - 0.899069258 seconds time elapsed +TOTAL : 0.608431 sec + 2,571,734,972 cycles # 3.036 GHz + 3,893,707,345 instructions # 1.51 insn per cycle + 0.906254720 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -100,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.150165e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.176057e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.176057e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.163465e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.189480e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.189480e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.448346 sec - 4,473,116,485 cycles # 3.081 GHz - 13,057,070,780 instructions # 2.92 insn per cycle - 1.452474949 seconds time elapsed +TOTAL : 1.430810 sec + 4,466,454,752 cycles # 3.114 GHz + 13,056,490,171 instructions # 2.92 insn per cycle + 1.434790628 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe @@ -128,14 +132,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.862398e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.049929e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.049929e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.094784e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.288488e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.288488e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.598237 sec - 1,729,464,558 cycles # 2.874 GHz - 4,563,458,100 instructions # 2.64 insn per cycle - 0.602509718 seconds time elapsed +TOTAL : 0.552738 sec + 1,719,469,732 cycles # 3.091 GHz + 4,563,277,713 instructions # 2.65 insn per cycle + 0.556970147 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe @@ -156,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.014478e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.759179e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.759179e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.070520e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.859545e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.859545e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.296266 sec - 870,816,020 cycles # 2.904 GHz - 1,935,237,213 instructions # 2.22 insn per cycle - 0.300456476 seconds time elapsed +TOTAL : 0.293554 sec + 871,649,791 cycles # 2.936 GHz + 1,935,490,228 instructions # 2.22 insn per cycle + 0.297628852 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe @@ -184,14 +188,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.445000e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.322163e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.322163e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.465694e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.353147e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.353147e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.279268 sec - 819,313,174 cycles # 2.910 GHz - 1,858,559,504 instructions # 2.27 insn per cycle - 0.283327146 seconds time elapsed +TOTAL : 0.277041 sec + 819,154,003 cycles # 2.920 GHz + 1,858,008,427 instructions # 2.27 insn per cycle + 0.281050664 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe @@ -207,9 +211,9 @@ OK (relative difference <= 5E-3) runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) - 36,884,052 cycles # 2.768 GHz - 50,707,665 instructions # 1.37 insn per cycle - 0.013695975 seconds time elapsed + 36,661,274 cycles # 2.764 GHz + 50,499,779 instructions # 1.38 insn per cycle + 0.013630840 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index fe20c20dcc..ee4cf36aec 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_f_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_23:04:07 +DATE: 2023-10-30_22:50:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.682003e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.220722e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.350681e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.971727e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.217793e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.336731e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.437751 sec - 1,900,811,899 cycles # 2.921 GHz - 2,678,626,262 instructions # 1.41 insn per cycle - 0.708103168 seconds time elapsed +TOTAL : 0.433842 sec + 1,909,081,900 cycles # 2.967 GHz + 2,630,455,270 instructions # 1.38 insn per cycle + 0.700771922 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.826661e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.778531e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.900966e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.242783e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.807399e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.919355e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.477309 sec - 2,050,802,627 cycles # 2.895 GHz - 2,895,521,209 instructions # 1.41 insn per cycle - 0.766032667 seconds time elapsed +TOTAL : 0.468249 sec + 2,069,885,834 cycles # 3.011 GHz + 2,962,006,936 instructions # 1.43 insn per cycle + 0.745153246 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.067807e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092193e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.092193e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.163734e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.189710e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.189710e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.556709 sec - 4,458,740,276 cycles # 2.859 GHz - 13,033,433,763 instructions # 2.92 insn per cycle - 1.560929816 seconds time elapsed +TOTAL : 1.427125 sec + 4,448,877,888 cycles # 3.110 GHz + 13,033,054,677 instructions # 2.93 insn per cycle + 1.431045061 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.971891e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.161237e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.161237e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.130048e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.328370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328370e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.571534 sec - 1,693,291,421 cycles # 2.944 GHz - 4,511,037,766 instructions # 2.66 insn per cycle - 0.575691046 seconds time elapsed +TOTAL : 0.542235 sec + 1,688,384,240 cycles # 3.096 GHz + 4,511,025,762 instructions # 2.67 insn per cycle + 0.546181384 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.252716e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.876495e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.876495e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.112726e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.884376e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.884376e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.333958 sec - 856,921,901 cycles # 2.537 GHz - 1,895,644,728 instructions # 2.21 insn per cycle - 0.338408955 seconds time elapsed +TOTAL : 0.287485 sec + 851,316,568 cycles # 2.926 GHz + 1,895,161,178 instructions # 2.23 insn per cycle + 0.291541754 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.076897e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.901066e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.901066e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.539172e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.428313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.428313e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.290472 sec - 802,971,603 cycles # 2.729 GHz - 1,817,512,004 instructions # 2.26 insn per cycle - 0.294883099 seconds time elapsed +TOTAL : 0.270331 sec + 799,518,850 cycles # 2.921 GHz + 1,817,399,076 instructions # 2.27 insn per cycle + 0.274476762 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe @@ -190,9 +194,9 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 29,041,231 cycles # 2.621 GHz - 40,698,285 instructions # 1.40 insn per cycle - 0.011491609 seconds time elapsed + 28,310,705 cycles # 2.739 GHz + 40,898,386 instructions # 1.44 insn per cycle + 0.010855318 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1932) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index becb5dfcbd..1050b324b7 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_23:04:23 +DATE: 2023-10-30_22:50:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.831468e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.322857e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.656480e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.987462e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.350900e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.659598e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.444027 sec - 1,947,288,172 cycles # 2.948 GHz - 2,771,783,460 instructions # 1.42 insn per cycle - 0.718120536 seconds time elapsed +TOTAL : 0.438127 sec + 1,961,591,121 cycles # 3.019 GHz + 2,745,718,346 instructions # 1.40 insn per cycle + 0.707121708 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.574015e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.147143e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.504038e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.710817e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.169634e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.502781e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.523217 sec - 2,233,613,263 cycles # 2.937 GHz - 3,181,755,320 instructions # 1.42 insn per cycle - 0.818164165 seconds time elapsed +TOTAL : 0.513425 sec + 2,258,016,655 cycles # 3.040 GHz + 3,240,568,814 instructions # 1.44 insn per cycle + 0.800127554 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.062623e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.085021e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.085021e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.094539e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.117012e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.117012e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.564713 sec - 4,730,586,650 cycles # 3.017 GHz - 13,469,637,389 instructions # 2.85 insn per cycle - 1.568788491 seconds time elapsed +TOTAL : 1.518561 sec + 4,720,608,486 cycles # 3.102 GHz + 13,469,279,631 instructions # 2.85 insn per cycle + 1.522489455 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.958286e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.032593e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.032593e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.026111e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.102690e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.102690e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.858196 sec - 2,600,600,273 cycles # 3.018 GHz - 7,388,563,593 instructions # 2.84 insn per cycle - 0.862457888 seconds time elapsed +TOTAL : 0.829732 sec + 2,596,663,847 cycles # 3.117 GHz + 7,388,668,463 instructions # 2.85 insn per cycle + 0.833756324 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.359172e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.581590e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.581590e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.448801e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.679453e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.679453e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.510154 sec - 1,469,200,333 cycles # 2.860 GHz - 3,058,240,905 instructions # 2.08 insn per cycle - 0.514384798 seconds time elapsed +TOTAL : 0.495919 sec + 1,465,592,723 cycles # 2.935 GHz + 3,058,112,296 instructions # 2.09 insn per cycle + 0.500045412 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.858971e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.147479e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.147479e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.879819e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.166647e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.166647e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.445930 sec - 1,306,643,742 cycles # 2.907 GHz - 2,932,728,091 instructions # 2.24 insn per cycle - 0.450107947 seconds time elapsed +TOTAL : 0.443605 sec + 1,306,750,305 cycles # 2.923 GHz + 2,933,003,661 instructions # 2.24 insn per cycle + 0.447663683 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.479170e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.595374e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.595374e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.302695e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.408241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.408241e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.684274 sec - 1,364,891,420 cycles # 1.985 GHz - 1,971,900,750 instructions # 1.44 insn per cycle - 0.688421555 seconds time elapsed +TOTAL : 0.736183 sec + 1,366,860,503 cycles # 1.849 GHz + 1,972,222,648 instructions # 1.44 insn per cycle + 0.740334587 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 3487666f7b..f752ce36e3 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,10 +1,14 @@ export CUDACPP_RUNTIME_ENABLEFPE=on -Building in /data/avalassi/GPU2023/madgraph4gpuX/test -CXXNAMESUFFIX= -make: Nothing to be done for 'all'. - Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +OMPFLAGS=-fopenmp +AVX=512y +FPTYPE=d +HELINL=0 +HRDCOD=0 +RNDGEN=hasCurand +Building in BUILDDIR=build.512y_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is set = 1) +make: Nothing to be done for 'gtestlibs'. CUDACPP_BUILDDIR='build.512y_m_inl0_hrd1' make USEBUILDDIR=1 AVX=none @@ -37,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-29_23:04:40 +DATE: 2023-10-30_22:51:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -46,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.767907e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.061775e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.381370e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.941380e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.208820e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.498284e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.451890 sec - 1,869,343,512 cycles # 2.805 GHz - 2,663,696,528 instructions # 1.42 insn per cycle - 0.724191615 seconds time elapsed +TOTAL : 0.438407 sec + 1,956,379,222 cycles # 3.008 GHz + 2,770,878,160 instructions # 1.42 insn per cycle + 0.707058917 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -64,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.537991e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.991480e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.337355e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.668828e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.009198e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.330148e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.536567 sec - 2,153,208,878 cycles # 2.809 GHz - 3,128,087,959 instructions # 1.45 insn per cycle - 0.830189616 seconds time elapsed +TOTAL : 0.513962 sec + 2,253,699,576 cycles # 3.029 GHz + 3,213,129,563 instructions # 1.43 insn per cycle + 0.801248370 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -87,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.082660e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.105229e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.105229e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.096779e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.119383e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.119383e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.534849 sec - 4,727,993,898 cycles # 3.074 GHz - 13,455,770,647 instructions # 2.85 insn per cycle - 1.538976463 seconds time elapsed +TOTAL : 1.515324 sec + 4,726,250,058 cycles # 3.112 GHz + 13,455,652,183 instructions # 2.85 insn per cycle + 1.519521309 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe @@ -114,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.999489e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.074232e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.074232e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.999365e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.074695e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.074695e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.840475 sec - 2,602,065,176 cycles # 3.083 GHz - 7,392,552,725 instructions # 2.84 insn per cycle - 0.844609048 seconds time elapsed +TOTAL : 0.839998 sec + 2,599,518,552 cycles # 3.082 GHz + 7,392,404,713 instructions # 2.84 insn per cycle + 0.844137925 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe @@ -141,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.396942e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.618271e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.618271e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.371429e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.593127e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.593127e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.503588 sec - 1,468,648,729 cycles # 2.896 GHz - 3,058,145,449 instructions # 2.08 insn per cycle - 0.507802923 seconds time elapsed +TOTAL : 0.507572 sec + 1,469,854,218 cycles # 2.875 GHz + 3,058,124,169 instructions # 2.08 insn per cycle + 0.511713729 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe @@ -168,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.819056e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.101366e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.101366e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.877641e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.168441e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168441e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.450598 sec - 1,309,476,408 cycles # 2.884 GHz - 2,933,279,716 instructions # 2.24 insn per cycle - 0.454801614 seconds time elapsed +TOTAL : 0.443851 sec + 1,307,337,681 cycles # 2.922 GHz + 2,933,611,821 instructions # 2.24 insn per cycle + 0.448146499 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe @@ -195,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.472084e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.586767e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.586767e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.504769e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.622888e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.622888e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.686109 sec - 1,365,054,250 cycles # 1.980 GHz - 1,971,628,186 instructions # 1.44 insn per cycle - 0.690314680 seconds time elapsed +TOTAL : 0.677398 sec + 1,366,250,754 cycles # 2.007 GHz + 1,971,487,169 instructions # 1.44 insn per cycle + 0.681481143 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe From 201f8809e31642597fada14a627150d40a62fbc5 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 31 Oct 2023 09:40:16 +0100 Subject: [PATCH 090/119] [oct23av] rerun 18 tmad tests (while rerunning also tput with FPEs enabled), no change in functionality or performance STARTED AT Mon Oct 30 11:25:17 PM CET 2023 ENDED AT Mon Oct 30 11:43:39 PM CET 2023 Status=0 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt --- .../log_eemumu_mad_d_inl0_hrd0.txt | 136 ++++++++--------- .../log_eemumu_mad_f_inl0_hrd0.txt | 138 ++++++++--------- .../log_eemumu_mad_m_inl0_hrd0.txt | 130 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggtt_mad_f_inl0_hrd0.txt | 140 +++++++++--------- .../log_ggtt_mad_m_inl0_hrd0.txt | 138 ++++++++--------- .../log_ggttg_mad_d_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 138 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 132 ++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 8 +- .../log_ggttggg_mad_f_inl0_hrd0.txt | 8 +- .../log_ggttggg_mad_m_inl0_hrd0.txt | 8 +- .../log_gqttq_mad_d_inl0_hrd0.txt | 136 ++++++++--------- .../log_gqttq_mad_f_inl0_hrd0.txt | 134 ++++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 128 ++++++++-------- 18 files changed, 1027 insertions(+), 1027 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 4be6aa1159..525d780972 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,12 +1,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 - -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:42:44 +DATE: 2023-10-30_23:26:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6291s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6211s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6317s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6235s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1777s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1697s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1808s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1726s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4192s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3335s - [COUNTERS] Fortran MEs ( 1 ) : 0.0857s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4200s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3341s + [COUNTERS] Fortran MEs ( 1 ) : 0.0859s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1880s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1814s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1845s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4182s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3478s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0704s for 90112 events => throughput is 1.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4371s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3658s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0713s for 90112 events => throughput is 1.26E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.226336e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.225597e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.193204e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.237094e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1845s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1806s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.11E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1856s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1817s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.10E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0444s for 90112 events => throughput is 2.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3892s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3447s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0445s for 90112 events => throughput is 2.02E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.984622e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.991464e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.040449e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.992806e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1834s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1803s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1797s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.74E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3407s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 90112 events => throughput is 2.73E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3761s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0337s for 90112 events => throughput is 2.68E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.610169e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.530051e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.827839e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.718767e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1825s + [COUNTERS] PROGRAM TOTAL : 0.1827s [COUNTERS] Fortran Overhead ( 0 ) : 0.1796s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.81E+06 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.63E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3789s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3463s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0327s for 90112 events => throughput is 2.76E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3433s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 90112 events => throughput is 2.81E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.719646e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.686220e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.850027e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.846999e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1822s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1786s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1847s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1811s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.30E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3856s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0388s for 90112 events => throughput is 2.33E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3898s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3501s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0397s for 90112 events => throughput is 2.27E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.213461e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.103998e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.237597e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.334399e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5989s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5985s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.69E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5940s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5935s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.63E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7645s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7597s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.87E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7665s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7617s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.85E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.294220e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.297070e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.981280e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.494836e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.997915e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.153646e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.335110e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.383787e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.023994e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.165062e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.958940e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.969138e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.020188e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168544e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.116154e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.115108e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index aa9e7c4e40..983675a1b5 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:43:01 +DATE: 2023-10-30_23:26:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6323s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6244s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6371s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6292s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1788s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1704s - [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1797s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1715s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4163s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3312s - [COUNTERS] Fortran MEs ( 1 ) : 0.0852s for 90112 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4167s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3307s + [COUNTERS] Fortran MEs ( 1 ) : 0.0860s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166087172673] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1874s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1813s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2068s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2005s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501907796603360E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4109s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3423s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0686s for 90112 events => throughput is 1.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4378s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3647s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0730s for 90112 events => throughput is 1.23E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.253383e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185853e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.294219e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.278973e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165570339780] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1815s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1788s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.11E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1935s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1907s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.00E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905322826635E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3713s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3439s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0273s for 90112 events => throughput is 3.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3655s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3377s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 90112 events => throughput is 3.24E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.130086e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.212841e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.376194e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.324039e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1832s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1837s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1814s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.61E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3667s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 90112 events => throughput is 3.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3731s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3482s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0249s for 90112 events => throughput is 3.62E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.471852e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.494857e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.761957e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.578201e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1848s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1826s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.66E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1847s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.78E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3697s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3450s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 90112 events => throughput is 3.64E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3724s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3478s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 90112 events => throughput is 3.66E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.629967e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.552484e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.617716e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.686130e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166440400542] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1850s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1828s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1850s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.66E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501908978565555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3711s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3462s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0249s for 90112 events => throughput is 3.62E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3730s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3478s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 90112 events => throughput is 3.57E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.468172e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.427450e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.642139e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.720969e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166823487174] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5944s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5939s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.66E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5945s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5940s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.74E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501910542849674E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7608s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7562s + [COUNTERS] PROGRAM TOTAL : 0.7583s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7538s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.97E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.669429e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.891593e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.803655e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.844741e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.522788e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.841028e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.045986e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.052490e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.458177e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.882792e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.245797e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.115822e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.767122e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.073812e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.440537e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.447499e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 9ef9326058..c323dc9862 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -6,8 +6,8 @@ make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -15,10 +15,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:43:19 +DATE: 2023-10-30_23:26:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6434s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6356s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6306s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6226s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1703s - [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1794s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1716s + [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4177s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3316s - [COUNTERS] Fortran MEs ( 1 ) : 0.0861s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4167s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3309s + [COUNTERS] Fortran MEs ( 1 ) : 0.0858s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211734] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1906s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1836s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1888s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 8192 events => throughput is 1.19E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4162s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3439s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0722s for 90112 events => throughput is 1.25E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4175s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3443s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0733s for 90112 events => throughput is 1.23E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.172991e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.158891e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.216868e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.176701e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1833s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1796s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0038s for 8192 events => throughput is 2.17E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 2.02E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3829s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3408s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0421s for 90112 events => throughput is 2.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3854s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 90112 events => throughput is 2.13E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.075634e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.028245e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.184462e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.167584e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1795s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.60E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1832s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1801s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3738s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3405s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0333s for 90112 events => throughput is 2.71E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3761s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3422s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 90112 events => throughput is 2.66E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.669400e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.640928e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.660847e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.695641e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1809s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1780s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1821s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1793s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3766s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3452s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0314s for 90112 events => throughput is 2.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3487s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 90112 events => throughput is 2.78E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.736292e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.691960e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.813805e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.911478e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1843s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1809s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1794s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.50E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3782s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0370s for 90112 events => throughput is 2.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3826s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3449s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0377s for 90112 events => throughput is 2.39E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.304060e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.177892e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.408915e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.311961e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -516,7 +516,7 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [UNWEIGHT] Wrote 1611 events (found 1616 events) [COUNTERS] PROGRAM TOTAL : 0.5929s [COUNTERS] Fortran Overhead ( 0 ) : 0.5924s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.67E+07 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.61E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919911173610E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7588s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7540s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.87E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7616s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.84E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.334962e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.200888e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.968237e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.927076e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.011382e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.188164e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.365199e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.384660e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.010775e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.185460e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.944396e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.889571e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.029059e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.184598e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.138841e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.123279e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 10c5a40d89..370bafb4b0 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:43:36 +DATE: 2023-10-30_23:27:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3509s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3100s - [COUNTERS] Fortran MEs ( 1 ) : 0.0409s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3542s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3123s + [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3084s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2678s - [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3103s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2698s + [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6676s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2160s - [COUNTERS] Fortran MEs ( 1 ) : 0.4516s for 90112 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6683s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2204s + [COUNTERS] Fortran MEs ( 1 ) : 0.4479s for 90112 events => throughput is 2.01E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3434s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3065s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0369s for 8192 events => throughput is 2.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3459s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3086s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0373s for 8192 events => throughput is 2.20E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775372] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6734s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2619s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4115s for 90112 events => throughput is 2.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6795s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2710s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4085s for 90112 events => throughput is 2.21E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.212985e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.120628e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.172710e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.186499e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3136s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2921s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3149s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2933s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0216s for 8192 events => throughput is 3.79E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4819s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2348s for 90112 events => throughput is 3.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5593s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3113s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2479s for 90112 events => throughput is 3.63E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.637745e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.636571e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.664607e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.778780e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2996s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2866s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3002s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 8192 events => throughput is 6.21E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3865s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2420s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1445s for 90112 events => throughput is 6.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3995s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2550s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1445s for 90112 events => throughput is 6.23E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.044297e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.008439e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.969961e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.106080e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2940s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2824s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2976s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2856s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0119s for 8192 events => throughput is 6.87E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3694s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2402s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1293s for 90112 events => throughput is 6.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3695s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1283s for 90112 events => throughput is 7.02E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.688301e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.765143e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.666529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.703708e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3223s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3011s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3108s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2915s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0193s for 8192 events => throughput is 4.24E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.5097s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2882s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2215s for 90112 events => throughput is 4.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4685s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2118s for 90112 events => throughput is 4.25E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.040762e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.089044e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.038513e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.044371e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6947s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6941s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6948s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6596s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6533s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.44E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6643s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6581s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.44E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.154027e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.194981e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.702234e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.690509e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.205697e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.290641e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.069293e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.072428e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.190602e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.325122e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.154485e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.154985e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.200679e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.317983e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.004061e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.054471e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index bccfa25524..86b5bafaea 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -2,11 +2,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=avx2 -make USEBUILDDIR=1 AVX=sse4 + +make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:44:02 +DATE: 2023-10-30_23:27:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3495s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3091s - [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3526s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3116s + [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3062s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2656s - [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3093s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2682s + [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 2.00E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6510s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2078s - [COUNTERS] Fortran MEs ( 1 ) : 0.4433s for 90112 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6575s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2112s + [COUNTERS] Fortran MEs ( 1 ) : 0.4463s for 90112 events => throughput is 2.02E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690706767555099] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3555s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3206s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0349s for 8192 events => throughput is 2.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3403s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3051s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0352s for 8192 events => throughput is 2.32E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782605295497] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6345s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3811s for 90112 events => throughput is 2.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6425s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2608s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3818s for 90112 events => throughput is 2.36E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.371355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.382222e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.396792e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.388990e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690702885183541] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3003s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2858s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3058s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2911s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0146s for 8192 events => throughput is 5.60E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223778858016772] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3998s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2390s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1608s for 90112 events => throughput is 5.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4148s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2514s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1634s for 90112 events => throughput is 5.51E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.252169e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.330431e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.352354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.341045e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2852s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2777s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2866s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2785s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,8 +319,8 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3213s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2364s + [COUNTERS] PROGRAM TOTAL : 1.3231s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2381s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0849s for 90112 events => throughput is 1.06E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.012335e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.023298e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.003051e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.030857e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2848s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2776s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.13E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2774s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3140s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2344s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0796s for 90112 events => throughput is 1.13E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3185s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2392s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0793s for 90112 events => throughput is 1.14E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.094330e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103573e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.091106e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110688e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698914467276] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2904s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2804s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 8192 events => throughput is 8.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2923s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2824s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 8192 events => throughput is 8.27E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223780273983500] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3456s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2362s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1094s for 90112 events => throughput is 8.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3711s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2571s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1140s for 90112 events => throughput is 7.90E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.541227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.478441e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.649452e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.631045e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690703397697980] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6946s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6940s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.49E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6988s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6983s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.51E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223786763175951] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6540s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6486s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.67E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6625s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6570s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 90112 events => throughput is 1.64E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.197490e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.480019e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.947842e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.050077e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.136310e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.413949e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.752811e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.754978e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.162233e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.425389e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.852365e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.844977e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.689881e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.891833e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.432722e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.412374e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 277c27f299..318c167090 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 + +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2023-10-29_23:44:27 +DATE: 2023-10-30_23:27:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3519s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3114s - [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3538s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3130s + [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2674s - [COUNTERS] Fortran MEs ( 1 ) : 0.0403s for 8192 events => throughput is 2.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2712s + [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6509s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2074s - [COUNTERS] Fortran MEs ( 1 ) : 0.4435s for 90112 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7731s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3014s + [COUNTERS] Fortran MEs ( 1 ) : 0.4717s for 90112 events => throughput is 1.91E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3457s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3080s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0377s for 8192 events => throughput is 2.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3482s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3107s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0375s for 8192 events => throughput is 2.18E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6643s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2559s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4084s for 90112 events => throughput is 2.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7134s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2939s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4195s for 90112 events => throughput is 2.15E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.163062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.192172e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.217486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.182032e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3126s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2919s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 8192 events => throughput is 3.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3138s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2929s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.91E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4786s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2489s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2296s for 90112 events => throughput is 3.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4780s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2494s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2286s for 90112 events => throughput is 3.94E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.804143e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.824287e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.857305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.853636e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2962s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0129s for 8192 events => throughput is 6.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2980s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2849s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.24E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3819s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2394s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1425s for 90112 events => throughput is 6.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3838s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1414s for 90112 events => throughput is 6.37E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.058038e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.319893e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.109676e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.021389e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2926s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2814s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2945s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2826s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0119s for 8192 events => throughput is 6.89E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3749s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2482s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1267s for 90112 events => throughput is 7.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3684s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2419s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1265s for 90112 events => throughput is 7.12E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.024816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.899333e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.053810e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.948599e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3154s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2972s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0182s for 8192 events => throughput is 4.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3082s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2893s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0189s for 8192 events => throughput is 4.34E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4615s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2546s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2069s for 90112 events => throughput is 4.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4583s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2510s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2072s for 90112 events => throughput is 4.35E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.205277e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.137027e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.230271e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.272346e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6938s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6933s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6968s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6962s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782303744791] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6553s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6490s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.44E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6666s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6602s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.41E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.003541e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.149612e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.600448e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.592954e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.175653e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.307478e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.060114e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.054254e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.194865e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.298580e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.143139e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.137759e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.181293e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.297095e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.968173e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.015457e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 9730f11338..c309bf7674 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,14 +15,14 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:44:52 +DATE: 2023-10-30_23:28:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5433s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2287s - [COUNTERS] Fortran MEs ( 1 ) : 0.3146s for 8192 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5408s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2234s + [COUNTERS] Fortran MEs ( 1 ) : 0.3174s for 8192 events => throughput is 2.58E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5326s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2185s - [COUNTERS] Fortran MEs ( 1 ) : 0.3141s for 8192 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5382s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2207s + [COUNTERS] Fortran MEs ( 1 ) : 0.3175s for 8192 events => throughput is 2.58E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.9522s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4138s - [COUNTERS] Fortran MEs ( 1 ) : 3.5384s for 90112 events => throughput is 2.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8715s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3912s + [COUNTERS] Fortran MEs ( 1 ) : 3.4803s for 90112 events => throughput is 2.59E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470791E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8782s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5546s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3236s for 8192 events => throughput is 2.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8633s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5369s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3263s for 8192 events => throughput is 2.51E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.3815s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7220s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6595s for 90112 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2770s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6914s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5855s for 90112 events => throughput is 2.51E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.594077e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.584657e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.572076e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.596787e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470777E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5574s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3872s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1702s for 8192 events => throughput is 4.81E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5556s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3862s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1694s for 8192 events => throughput is 4.84E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.4101s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5437s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8665s for 90112 events => throughput is 4.83E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4182s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5481s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8700s for 90112 events => throughput is 4.82E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.921909e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.974443e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.905097e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.961157e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3022s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0837s for 8192 events => throughput is 9.79E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3880s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3039s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0842s for 8192 events => throughput is 9.73E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3897s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4614s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9283s for 90112 events => throughput is 9.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3753s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4487s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9265s for 90112 events => throughput is 9.73E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.915492e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.842938e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.906893e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.942981e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3667s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2921s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0746s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3742s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2993s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2583s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4372s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8210s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2797s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4533s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8264s for 90112 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.120723e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103246e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.101513e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110259e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4265s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3236s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1029s for 8192 events => throughput is 7.96E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4287s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3241s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1045s for 8192 events => throughput is 7.84E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6156s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4731s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1425s for 90112 events => throughput is 7.89E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6347s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4831s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1515s for 90112 events => throughput is 7.83E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.888561e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.792470e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.896267e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.840493e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6539s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6485s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6515s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6461s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.49E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8182s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7954s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0229s for 90112 events => throughput is 3.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8274s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8047s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.96E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.619063e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.632907e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.241010e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.028665e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.874326e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001329e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.236994e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.238062e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.898969e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000560e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.246371e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.246369e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.839298e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.997732e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.748686e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.736542e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 0d3786c3f4..0d53ce5f29 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:45:33 +DATE: 2023-10-30_23:29:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5334s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2186s - [COUNTERS] Fortran MEs ( 1 ) : 0.3148s for 8192 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5383s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2205s + [COUNTERS] Fortran MEs ( 1 ) : 0.3178s for 8192 events => throughput is 2.58E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5331s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2181s - [COUNTERS] Fortran MEs ( 1 ) : 0.3150s for 8192 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5374s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2208s + [COUNTERS] Fortran MEs ( 1 ) : 0.3166s for 8192 events => throughput is 2.59E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.9081s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4034s - [COUNTERS] Fortran MEs ( 1 ) : 3.5047s for 90112 events => throughput is 2.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8863s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3948s + [COUNTERS] Fortran MEs ( 1 ) : 3.4914s for 90112 events => throughput is 2.58E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349765248158E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8377s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5247s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3130s for 8192 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8470s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5289s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3181s for 8192 events => throughput is 2.58E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310860767768514E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.1217s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6768s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4449s for 90112 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1676s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6889s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4787s for 90112 events => throughput is 2.59E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.703655e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.664891e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.695629e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.678201e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196334183509370E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4022s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0932s for 8192 events => throughput is 8.79E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4053s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3112s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0941s for 8192 events => throughput is 8.71E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847547651041E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.4816s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4534s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0282s for 90112 events => throughput is 8.76E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4984s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4624s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0361s for 90112 events => throughput is 8.70E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.865454e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.849596e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.842894e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.839850e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3053s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2620s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3049s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2618s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0432s for 8192 events => throughput is 1.90E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8828s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4092s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4736s for 90112 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8877s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4102s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4775s for 90112 events => throughput is 1.89E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.907252e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910833e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.910135e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.922964e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2957s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0386s for 8192 events => throughput is 2.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2974s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2578s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0396s for 8192 events => throughput is 2.07E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8300s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4034s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4266s for 90112 events => throughput is 2.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8399s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4111s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4288s for 90112 events => throughput is 2.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.131125e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.103227e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.143236e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.100634e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196344079460428E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3190s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2692s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0498s for 8192 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3223s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2717s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0506s for 8192 events => throughput is 1.62E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310857804286998E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.0404s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4584s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5820s for 90112 events => throughput is 1.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9829s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4266s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5563s for 90112 events => throughput is 1.62E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.578722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.585816e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.494576e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.607898e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349366365994E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6758s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6750s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.72E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6463s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6455s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.65E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310864949473968E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8049s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7954s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.45E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8629s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8534s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.51E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.336673e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.331511e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.859827e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.854816e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.723714e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.761923e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.344003e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.387832e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.714107e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.746229e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.460288e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.509978e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.582435e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.611958e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.621813e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.615043e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index fdfb4bc804..aa573e43f0 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:46:10 +DATE: 2023-10-30_23:29:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5514s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2299s - [COUNTERS] Fortran MEs ( 1 ) : 0.3216s for 8192 events => throughput is 2.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5412s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2212s + [COUNTERS] Fortran MEs ( 1 ) : 0.3200s for 8192 events => throughput is 2.56E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5357s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2189s - [COUNTERS] Fortran MEs ( 1 ) : 0.3168s for 8192 events => throughput is 2.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5409s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2221s + [COUNTERS] Fortran MEs ( 1 ) : 0.3187s for 8192 events => throughput is 2.57E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8536s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3859s - [COUNTERS] Fortran MEs ( 1 ) : 3.4677s for 90112 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9079s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3978s + [COUNTERS] Fortran MEs ( 1 ) : 3.5102s for 90112 events => throughput is 2.57E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358763382007E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8663s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5405s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3258s for 8192 events => throughput is 2.51E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8743s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5440s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3303s for 8192 events => throughput is 2.48E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872835011053E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.3038s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6919s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6119s for 90112 events => throughput is 2.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3386s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7039s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6346s for 90112 events => throughput is 2.48E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.559078e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.525161e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.559032e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.525191e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358804670396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5492s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3823s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1668s for 8192 events => throughput is 4.91E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5616s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3842s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1775s for 8192 events => throughput is 4.62E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872836789727E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.3533s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5274s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8259s for 90112 events => throughput is 4.94E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.3758s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5413s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8344s for 90112 events => throughput is 4.91E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.012905e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.044459e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.083717e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.000983e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3015s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0840s for 8192 events => throughput is 9.75E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3897s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3046s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0851s for 8192 events => throughput is 9.63E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3727s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4507s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9220s for 90112 events => throughput is 9.77E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4817s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5101s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9716s for 90112 events => throughput is 9.27E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.988170e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.731456e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.998635e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.036086e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3690s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2954s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0736s for 8192 events => throughput is 1.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 8192 events => throughput is 1.06E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2518s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4420s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8098s for 90112 events => throughput is 1.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2548s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4438s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8110s for 90112 events => throughput is 1.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.136004e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.143906e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.143932e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.130138e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358757578441E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4322s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3251s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1070s for 8192 events => throughput is 7.65E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4344s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3275s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1069s for 8192 events => throughput is 7.66E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872803699391E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6667s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4847s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1821s for 90112 events => throughput is 7.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7432s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5145s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2287s for 90112 events => throughput is 7.33E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.705632e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.663897e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.555384e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.610555e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358102981245E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6572s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6518s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6510s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872068634174E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8152s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7925s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.97E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8300s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8072s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.631448e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.631020e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.958483e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.229081e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.843306e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.964249e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.233362e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234210e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.852055e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.975956e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.245148e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.242767e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.845495e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.983181e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.722805e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.714684e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index e657d4ab45..e14c658652 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:46:51 +DATE: 2023-10-30_23:30:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3573s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2786s - [COUNTERS] Fortran MEs ( 1 ) : 4.0787s for 8192 events => throughput is 2.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2753s + [COUNTERS] Fortran MEs ( 1 ) : 4.1157s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.4672s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2765s - [COUNTERS] Fortran MEs ( 1 ) : 4.1908s for 8192 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3946s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2730s + [COUNTERS] Fortran MEs ( 1 ) : 4.1216s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.0635s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8834s - [COUNTERS] Fortran MEs ( 1 ) : 45.1800s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.4735s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8980s + [COUNTERS] Fortran MEs ( 1 ) : 45.5755s for 90112 events => throughput is 1.98E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.6057s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3898s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2159s for 8192 events => throughput is 1.94E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6632s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4139s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2493s for 8192 events => throughput is 1.93E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 52.7110s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0688s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.6422s for 90112 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 53.0552s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0362s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.0189s for 90112 events => throughput is 1.92E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.993883e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.978140e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.996553e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.981159e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.7879s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4764s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3115s for 8192 events => throughput is 3.54E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7419s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4794s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2625s for 8192 events => throughput is 3.62E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 28.9921s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1750s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.8171s for 90112 events => throughput is 3.63E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.4316s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1191s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.3125s for 90112 events => throughput is 3.56E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.776261e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.738679e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.776054e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.620314e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.2047s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2221s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9826s for 8192 events => throughput is 8.34E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.2993s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2837s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0156s for 8192 events => throughput is 8.07E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.6452s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8479s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.7973s for 90112 events => throughput is 8.35E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.0894s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9408s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.1486s for 90112 events => throughput is 8.08E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.631774e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.633351e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.588857e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.639574e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9597s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1073s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8524s for 8192 events => throughput is 9.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9660s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1071s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8588s for 8192 events => throughput is 9.54E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.3300s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7280s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.6020s for 90112 events => throughput is 9.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.1848s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7263s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4585s for 90112 events => throughput is 9.53E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.801101e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.781280e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.852804e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.795193e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4084s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3386s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0698s for 8192 events => throughput is 7.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4520s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3493s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1027s for 8192 events => throughput is 7.43E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.7032s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9539s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.7493s for 90112 events => throughput is 7.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.8334s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9616s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8718s for 90112 events => throughput is 7.59E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.736846e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.653125e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.663907e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.171391e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8047s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7726s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8248s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7916s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0332s for 8192 events => throughput is 2.47E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7121s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3652s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3469s for 90112 events => throughput is 2.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7303s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3763s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3540s for 90112 events => throughput is 2.55E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.286508e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.292253e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.523126e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.518305e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.117684e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.103023e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.164738e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.141573e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.122122e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.119711e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.167581e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.166642e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.109077e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.121489e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.431461e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.435333e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 99b68d9b86..5e7aa12c64 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:51:03 +DATE: 2023-10-30_23:34:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.4892s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2748s - [COUNTERS] Fortran MEs ( 1 ) : 4.2144s for 8192 events => throughput is 1.94E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4205s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2761s + [COUNTERS] Fortran MEs ( 1 ) : 4.1444s for 8192 events => throughput is 1.98E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3576s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2692s - [COUNTERS] Fortran MEs ( 1 ) : 4.0884s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3809s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2723s + [COUNTERS] Fortran MEs ( 1 ) : 4.1087s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.0340s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8721s - [COUNTERS] Fortran MEs ( 1 ) : 45.1619s for 90112 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.2595s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8860s + [COUNTERS] Fortran MEs ( 1 ) : 45.3735s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396490802749E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.4522s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2593s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1929s for 8192 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.3601s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2377s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1224s for 8192 events => throughput is 1.99E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774602344628E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 51.1297s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8662s - [COUNTERS] CudaCpp MEs ( 2 ) : 45.2635s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 51.1932s + [COUNTERS] Fortran Overhead ( 0 ) : 5.8889s + [COUNTERS] CudaCpp MEs ( 2 ) : 45.3043s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.064759e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.064791e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.025024e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.056153e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277389126121586E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4799s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3651s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1147s for 8192 events => throughput is 7.35E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4698s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3636s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1062s for 8192 events => throughput is 7.41E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803771887543366E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 15.2606s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9688s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.2918s for 90112 events => throughput is 7.33E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.4000s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9874s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.4125s for 90112 events => throughput is 7.26E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.498295e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.510038e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.494570e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.463864e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.2578s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7591s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4987s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2604s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7598s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5006s for 8192 events => throughput is 1.64E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.7693s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3504s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4189s for 90112 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.8736s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3745s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4990s for 90112 events => throughput is 1.64E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.704918e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.710708e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.708973e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.710081e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1390s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6947s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4444s for 8192 events => throughput is 1.84E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1198s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6914s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4284s for 8192 events => throughput is 1.91E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.0587s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2991s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.7596s for 90112 events => throughput is 1.89E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.0277s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2859s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.7418s for 90112 events => throughput is 1.90E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.954262e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.964173e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.953218e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.952364e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396394633404E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.3188s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7939s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5250s for 8192 events => throughput is 1.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3259s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7968s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5290s for 8192 events => throughput is 1.55E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803777741065333E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 8.1837s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3911s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7926s for 90112 events => throughput is 1.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.1714s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3874s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.7840s for 90112 events => throughput is 1.56E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.565914e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.576795e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.577747e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.574102e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277400478491260E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7649s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7436s + [COUNTERS] PROGRAM TOTAL : 0.7681s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7468s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.84E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803779990154892E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.6087s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3737s + [COUNTERS] PROGRAM TOTAL : 2.5744s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3393s [COUNTERS] CudaCpp MEs ( 2 ) : 0.2351s for 90112 events => throughput is 3.83E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.608601e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.596987e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.948328e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.950725e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.498781e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.494407e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.725121e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.637374e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.489018e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.503985e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.670344e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.725521e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.486657e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.484632e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.522733e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.523442e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 2f24ab2c69..b9c1204e09 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -17,13 +17,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:54:21 +DATE: 2023-10-30_23:37:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3628s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2741s - [COUNTERS] Fortran MEs ( 1 ) : 4.0887s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3521s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2754s + [COUNTERS] Fortran MEs ( 1 ) : 4.0767s for 8192 events => throughput is 2.01E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3578s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2698s - [COUNTERS] Fortran MEs ( 1 ) : 4.0879s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3925s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2709s + [COUNTERS] Fortran MEs ( 1 ) : 4.1216s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.1743s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8791s - [COUNTERS] Fortran MEs ( 1 ) : 45.2953s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 46.9733s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8635s + [COUNTERS] Fortran MEs ( 1 ) : 45.1098s for 90112 events => throughput is 2.00E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277432965013E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.6820s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4279s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2541s for 8192 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6564s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4133s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2432s for 8192 events => throughput is 1.93E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725813026109E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 53.1743s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0425s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.1318s for 90112 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.8661s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0399s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.8261s for 90112 events => throughput is 1.92E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.972012e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.985030e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.980651e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.976483e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277430934464E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.6421s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4340s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2081s for 8192 events => throughput is 3.71E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6813s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4741s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2072s for 8192 events => throughput is 3.71E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725816246317E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 28.5147s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0474s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.4673s for 90112 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.4509s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0381s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.4128s for 90112 events => throughput is 3.69E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.763063e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.810181e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.790148e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.800338e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.1780s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2112s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9667s for 8192 events => throughput is 8.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1824s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2147s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9677s for 8192 events => throughput is 8.47E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.5745s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8645s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.7100s for 90112 events => throughput is 8.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.3669s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8024s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.5645s for 90112 events => throughput is 8.53E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.734001e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.724129e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.720628e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.790107e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9481s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0997s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8483s for 8192 events => throughput is 9.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9418s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0966s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8452s for 8192 events => throughput is 9.69E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.0943s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7073s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.3870s for 90112 events => throughput is 9.60E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.1278s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7193s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4085s for 90112 events => throughput is 9.58E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.695446e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.923637e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.714293e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.767125e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4113s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3354s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0759s for 8192 events => throughput is 7.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5489s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4546s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0943s for 8192 events => throughput is 7.49E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.8070s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9573s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8497s for 90112 events => throughput is 7.60E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.7117s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9290s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.7827s for 90112 events => throughput is 7.65E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.670668e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.665232e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.569813e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.734538e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277293084707E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7761s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 8192 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8050s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7728s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0322s for 8192 events => throughput is 2.54E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725738731039E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7597s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3971s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3626s for 90112 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7237s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3786s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3451s for 90112 events => throughput is 2.61E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.299489e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.289069e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.543169e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.526584e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.113219e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.118346e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.169609e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.165672e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.114662e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.110701e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168580e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168512e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.106978e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.101237e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.432506e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.440700e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 8c0eb8987a..43f15bcd5b 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,8 +15,9 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. @@ -25,7 +26,6 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:59:58 +DATE: 2023-10-30_23:43:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index d1b8675317..a18f7f44a8 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_00:00:01 +DATE: 2023-10-30_23:43:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 054c4600d8..63fd239456 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2023-10-30_00:00:05 +DATE: 2023-10-30_23:43:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 5741f0dc15..8b97b424e2 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:58:33 +DATE: 2023-10-30_23:42:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3048s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2352s - [COUNTERS] Fortran MEs ( 1 ) : 0.0697s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3064s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2366s + [COUNTERS] Fortran MEs ( 1 ) : 0.0698s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2979s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2279s - [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3011s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2317s + [COUNTERS] Fortran MEs ( 1 ) : 0.0694s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1663s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4084s - [COUNTERS] Fortran MEs ( 1 ) : 0.7578s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2048s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4352s + [COUNTERS] Fortran MEs ( 1 ) : 0.7697s for 90112 events => throughput is 1.17E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3809s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3052s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0757s for 8192 events => throughput is 1.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3893s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0764s for 8192 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3528s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5177s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8351s for 90112 events => throughput is 1.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3510s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5225s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8285s for 90112 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.098466e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.083219e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.074362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082834e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3127s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2724s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3183s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2773s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615872] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9207s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4750s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4457s for 90112 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0676s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5790s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4885s for 90112 events => throughput is 1.84E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.973377e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.011280e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.025745e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.004922e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2789s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2557s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0232s for 8192 events => throughput is 3.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2820s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2578s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7317s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4710s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2607s for 90112 events => throughput is 3.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7150s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4572s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2578s for 90112 events => throughput is 3.50E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.508702e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.422980e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.351363e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.485503e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2758s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2551s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 8192 events => throughput is 3.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2770s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2561s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.92E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6787s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4494s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2292s for 90112 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6961s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4641s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2320s for 90112 events => throughput is 3.88E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.848142e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.764338e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.827447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.770277e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2659s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3010s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2692s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 8192 events => throughput is 2.57E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8054s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4648s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3406s for 90112 events => throughput is 2.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8116s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4693s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3423s for 90112 events => throughput is 2.63E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.498249e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.478678e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.567253e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.631815e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6574s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6567s + [COUNTERS] PROGRAM TOTAL : 0.6626s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6619s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615869] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8627s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8552s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0074s for 90112 events => throughput is 1.21E+07 events/s + [COUNTERS] PROGRAM TOTAL : 2.2433s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2357s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.634464e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.610218e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.995571e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.161418e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.534001e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.629399e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.508239e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.492469e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.523244e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.615993e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.792185e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.799999e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.530343e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.604556e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.779192e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.786702e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 23ccd83cc9..be2917d26e 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2023-10-29_23:59:02 +DATE: 2023-10-30_23:42:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3006s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2311s - [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3083s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2383s + [COUNTERS] Fortran MEs ( 1 ) : 0.0700s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3032s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2341s - [COUNTERS] Fortran MEs ( 1 ) : 0.0691s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3017s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2316s + [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1794s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4177s - [COUNTERS] Fortran MEs ( 1 ) : 0.7617s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1785s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4139s + [COUNTERS] Fortran MEs ( 1 ) : 0.7646s for 90112 events => throughput is 1.18E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050316058770007] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3035s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0702s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3802s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3092s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0709s for 8192 events => throughput is 1.15E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182797520666] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2755s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4978s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7777s for 90112 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3016s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5173s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7843s for 90112 events => throughput is 1.15E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.169647e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.162040e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.180347e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.151940e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313133963987] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2835s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2578s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0257s for 8192 events => throughput is 3.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2603s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 8192 events => throughput is 3.23E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179276862181] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7226s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2758s for 90112 events => throughput is 3.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7398s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4632s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2765s for 90112 events => throughput is 3.26E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.228255e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.205576e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.237646e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.147401e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2577s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2454s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0122s for 8192 events => throughput is 6.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2599s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2472s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0127s for 8192 events => throughput is 6.43E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6613s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5168s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1445s for 90112 events => throughput is 6.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6035s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4659s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1376s for 90112 events => throughput is 6.55E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.559338e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.338397e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.496926e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.343589e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2722s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2605s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0117s for 8192 events => throughput is 7.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2592s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2480s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.27E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5592s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4340s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1252s for 90112 events => throughput is 7.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5684s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4421s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1263s for 90112 events => throughput is 7.13E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.989366e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.758514e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.214148e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.052098e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050317064561834] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2631s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2476s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2663s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2510s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.33E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182143140752] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6089s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4384s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1705s for 90112 events => throughput is 5.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6192s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4470s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1722s for 90112 events => throughput is 5.23E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.961081e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.921448e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.937970e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.119546e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050319131407651] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6617s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.48E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6579s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6574s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801186038252196] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8523s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8464s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 90112 events => throughput is 1.54E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8671s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8612s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 90112 events => throughput is 1.52E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.828358e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.914110e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.418352e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.440910e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.091778e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.334370e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.699103e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.703577e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.103721e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.243059e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.771665e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.770259e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.574079e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.746993e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.932223e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.939439e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 406946ea16..b248c00c80 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -15,10 +15,9 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' @@ -26,6 +25,7 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-29_23:59:29 +DATE: 2023-10-30_23:43:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3015s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2320s - [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3027s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2334s + [COUNTERS] Fortran MEs ( 1 ) : 0.0693s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2983s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2284s - [COUNTERS] Fortran MEs ( 1 ) : 0.0698s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3025s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2324s + [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1556s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3996s - [COUNTERS] Fortran MEs ( 1 ) : 0.7560s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1797s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4155s + [COUNTERS] Fortran MEs ( 1 ) : 0.7642s for 90112 events => throughput is 1.18E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657206] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3834s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3080s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0753s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3865s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3107s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0758s for 8192 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608796] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3329s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5099s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8230s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3561s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5216s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8345s for 90112 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.106713e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.091158e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.115003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082266e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657201] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3287s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2871s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3137s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0392s for 8192 events => throughput is 2.09E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608810] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9251s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4869s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4382s for 90112 events => throughput is 2.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9130s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4797s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4333s for 90112 events => throughput is 2.08E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.926102e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.035583e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.940203e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.014225e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2633s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2821s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2589s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0232s for 8192 events => throughput is 3.54E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7068s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4549s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2519s for 90112 events => throughput is 3.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7607s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4976s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2631s for 90112 events => throughput is 3.42E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.487761e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.500656e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.590355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.486676e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2738s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2540s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0199s for 8192 events => throughput is 4.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2828s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2617s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6687s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4454s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2233s for 90112 events => throughput is 4.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7154s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4856s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2298s for 90112 events => throughput is 3.92E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.994889e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.920991e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.929775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.905025e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3021s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 8192 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2991s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2672s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 8192 events => throughput is 2.56E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8127s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4617s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3509s for 90112 events => throughput is 2.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8242s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4689s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3553s for 90112 events => throughput is 2.54E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.535867e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.437291e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.478315e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.499461e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333301029693] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6548s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6541s + [COUNTERS] PROGRAM TOTAL : 0.6574s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6568s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182637219935] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8602s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8527s + [COUNTERS] PROGRAM TOTAL : 1.8600s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8524s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.592188e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.578688e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.007808e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.112656e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.525016e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.619097e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.529324e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.484643e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.521994e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.622729e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.809421e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.817708e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.534283e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.610768e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.778088e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.779631e+07 ) sec^-1 TEST COMPLETED From e4768a440a981981a1ec7262dd5c7dffc62216a9 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 1 Nov 2023 16:06:17 +0100 Subject: [PATCH 091/119] [oct23av] in CODEGEN output.py, modify a few comments as suggested in Olivier's review of PR #780 --- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 20e609eedf..524045919c 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -1,6 +1,6 @@ # Copyright (C) 2020-2023 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). -# Created by: O. Mattelaer (Sep 2021) for the MG5aMC CUDACPP plugin. +# Created by: A. Valassi (Sep 2021) for the MG5aMC CUDACPP plugin. # Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2021-2023) for the MG5aMC CUDACPP plugin. import os @@ -204,7 +204,6 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): MG5options are all the options of the main interface outputflags is a list of options provided when doing the output command""" misc.sprint('Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self)) - misc.sprint(self.in_madevent_mode) if self.in_madevent_mode: self.add_input_for_banner() if 'CUDACPP_CODEGEN_PATCHLEVEL' in os.environ: patchlevel = os.environ['CUDACPP_CODEGEN_PATCHLEVEL'] @@ -222,16 +221,16 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): # logger.info("####### \n stderr is \n %s", stderr) # raise Exception('ERROR! the O/S call to patchMad.sh failed') # NEW implementation (OM PR #764) - # **NB** AV: patchMad.sh may silently fail, for instance because 'madevent treatcards run' may silently fail - # **NB** AV: currently, error checking is done by looking for error strings on the full generation log - # **NB** AV: for this reason, but also because I want to always see the output, I change the Popen call to always dump stdout and stderr + # **NB** AV: change the Popen call to always dump stdout and stderr, because I want to always see the output + # **NB** AV: this also allows error checking by looking for error strings on the generation log if patchMad.sh silently fails + # **NB** AV: (e.g. this did happen in the past, when patchMad.sh was calling 'madevent treatcards run', and the latter silently failed) plugin_path = os.path.dirname(os.path.realpath( __file__ )) ###p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)], ### stdout=subprocess.PIPE, stderr=subprocess.PIPE) p = subprocess.Popen([pjoin(plugin_path, 'patchMad.sh'), self.dir_path , 'PROD', str(patchlevel)]) # AV always dump patchMad.sh stdout/stderr stdout, stderr = p.communicate() misc.sprint(p.returncode) - if p.returncode != 0: # AV: WARNING! this may silently fail, for instance because 'madevent treatcards run' may silently fail + if p.returncode != 0: # AV: WARNING! do not fully trust this check! patchMad.sh was observed to silently fail in the past... logger.debug("####### \n stdout is \n %s", stdout) logger.info("####### \n stderr is \n %s", stderr) logger.info("return code is %s\n", p.returncode) From 5bf9589c4e28da1067aa433304f1f381cafe5bf3 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 1 Nov 2023 16:29:19 +0100 Subject: [PATCH 092/119] [oct23av] in CODEGEN output.py, move 'tmadmode' patches from patchMad.sh to generateAndCompare.sh as suggested in Olivier's review of PR #780 --- .../PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh | 32 ------------------- epochX/cudacpp/CODEGEN/generateAndCompare.sh | 28 ++++++++++++++-- 2 files changed, 25 insertions(+), 35 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh index c96b78dbfc..7edafba599 100755 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh @@ -40,51 +40,19 @@ dir_patches=$2 if [ ! -e ${dir} ]; then echo "ERROR! Directory $dir does not exist"; exit 1; fi -# AV Recover special 'tmad' mode used by generateAndCompare.sh, after OM's changes that commented this out in patchMad.sh -tmadmode=0 -if [ "${CUDACPP_CODEGEN_TMADMODE}" != "" ]; then - tmadmode=1 - echo "DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=${CUDACPP_CODEGEN_TMADMODE})" -fi - # Exit here for patchlevel 0 (--upstream) if [ "${patchlevel}" == "0" ]; then exit $status; fi -# Add global flag '-O3 -ffast-math -fbounds-check' as in previous gridpacks -if [ "${tmadmode}" != "0" ]; then - echo "GLOBAL_FLAG=-O3 -ffast-math -fbounds-check" > ${dir}/Source/make_opts.new - cat ${dir}/Source/make_opts >> ${dir}/Source/make_opts.new - \mv ${dir}/Source/make_opts.new ${dir}/Source/make_opts -fi - # Patch the default Fortran code to provide the integration with the cudacpp plugin # (1) Process-independent patches touch ${dir}/Events/.keep # this file should already be present (mg5amcnlo copies it from Template/LO/Events/.keep) \cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/fbridge_common.inc ${dir}/SubProcesses # new file if [ "${patchlevel}" == "2" ]; then cd ${dir} - if [ "${tmadmode}" != "0" ]; then - sed -i 's/DEFAULT_F2PY_COMPILER=f2py.*/DEFAULT_F2PY_COMPILER=f2py3/' Source/make_opts - cat Source/make_opts | sed '/#end/q' | sort > Source/make_opts.new - cat Source/make_opts | sed -n -e '/#end/,$p' >> Source/make_opts.new - \mv Source/make_opts.new Source/make_opts - fi echo "DEBUG: cd ${PWD}; patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common" if ! patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common; then status=1; fi \rm -f Source/*.orig \rm -f bin/internal/*.orig - if [ "${tmadmode}" != "0" ]; then - echo " -#********************************************************************* -# Options for the cudacpp plugin -#********************************************************************* - -# Set cudacpp-specific values of non-cudacpp-specific options --O3 -ffast-math -fbounds-check = global_flag ! build flags for Fortran code (for a fair comparison to cudacpp) - -# New cudacpp-specific options (default values are defined in banner.py) -CPP = cudacpp_backend ! valid backends are FORTRAN, CPP, CUDA" >> Cards/run_card.dat - fi cd - > /dev/null fi for p1dir in ${dir}/SubProcesses/P*; do diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index b68959bc88..1f416ac5b9 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -6,9 +6,6 @@ set -e # fail on error -# AV Recover special 'tmad' mode used by generateAndCompare.sh, after OM's changes that commented this out in patchMad.sh -export CUDACPP_CODEGEN_TMADMODE=1 - #-------------------------------------------------------------------------------------- function codeGenAndDiff() @@ -224,6 +221,28 @@ function codeGenAndDiff() \rm -rf ${outproc}/bin/internal/ufomodel/py3_model.pkl \rm -rf ${outproc}/bin/internal/ufomodel/__pycache__ touch ${outproc}/HTML/.keep # new file + if [ "${patchlevel}" != "0" ]; then + # Add global flag '-O3 -ffast-math -fbounds-check' as in previous gridpacks + echo "GLOBAL_FLAG=-O3 -ffast-math -fbounds-check" > ${outproc}/Source/make_opts.new + cat ${outproc}/Source/make_opts >> ${outproc}/Source/make_opts.new + \mv ${outproc}/Source/make_opts.new ${outproc}/Source/make_opts + fi + if [ "${patchlevel}" == "2" ]; then + sed -i 's/DEFAULT_F2PY_COMPILER=f2py.*/DEFAULT_F2PY_COMPILER=f2py3/' ${outproc}/Source/make_opts + cat ${outproc}/Source/make_opts | sed '/#end/q' | sort > ${outproc}/Source/make_opts.new + cat ${outproc}/Source/make_opts | sed -n -e '/#end/,$p' >> ${outproc}/Source/make_opts.new + \mv ${outproc}/Source/make_opts.new ${outproc}/Source/make_opts + echo " +#********************************************************************* +# Options for the cudacpp plugin +#********************************************************************* + +# Set cudacpp-specific values of non-cudacpp-specific options +-O3 -ffast-math -fbounds-check = global_flag ! build flags for Fortran code (for a fair comparison to cudacpp) + +# New cudacpp-specific options (default values are defined in banner.py) +CPP = cudacpp_backend ! valid backends are FORTRAN, CPP, CUDA" >> ${outproc}/Cards/run_card.dat + fi fi # Check the code generation log for errors if [ -d ${outproc} ] && ! grep -q "Please report this bug" ${outproc}_log.txt; then @@ -411,6 +430,7 @@ UNTARONLY=1 # Default: apply all patches in patchMad.sh (--nopatch is ignored unless --mad is also specified) PATCHLEVEL= +patchlevel=2 # [DEFAULT] complete generation of cudacpp .sa/.mad (copy templates and apply patch commands) # Default for gridpacks: use helicity recycling (use --nohelrec to disable it) # (export the value to the untarGridpack.sh script) @@ -429,8 +449,10 @@ while [ "$1" != "" ]; do QUIET=1 elif [ "$1" == "--nopatch" ] && [ "${PATCHLEVEL}" == "" ]; then PATCHLEVEL=--nopatch + patchlevel=1 # [--nopatch] modify upstream MG5AMC but do not apply patch commands (reference to prepare new patches) elif [ "$1" == "--upstream" ] && [ "${PATCHLEVEL}" == "" ]; then PATCHLEVEL=--upstream + patchlevel=0 # [--upstream] out of the box codegen from upstream MG5AMC (do not even copy templates) elif [ "$1" == "--nountaronly" ] && [ "${SCRBCK}" == "gridpack" ]; then ###UNTARONLY=0 echo "ERROR! gridpack mode is no longer supported by this script!"; exit 1 From a2149a2b2fb6021ce748b0e7812f7a41407f8db3 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 1 Nov 2023 16:32:43 +0100 Subject: [PATCH 093/119] [oct23av] in CODEGEN output.py, try to reenable exception in CPPRunCard.reset_simd as suggested in Olivier's review of PR #780 This fails code generation for eemumu.mad: run_card missed argument cudacpp_backend. Takes default: CPP run_card missed argument cudacpp_backend. Takes default: CPP run_card missed argument cudacpp_backend. Takes default: CPP Command "treatcards run" interrupted with error: Exception : INTERNAL ERROR! CPPRunCard instance has no attribute path Please report this bug on https://bugs.launchpad.net/mg5amcnlo More information is found in 'ME5_debug'. Please attach this file to your report. --- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py | 6 +++--- epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 11 +++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py index c9d1c7706a..d35d5457db 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py @@ -49,9 +49,9 @@ def compile(self, *args, **opts): class CPPRunCard(banner_mod.RunCardLO): def reset_simd(self, old_value, new_value, name): if not hasattr(self, 'path'): - logger.warning('WARNING! CPPRunCard instance has no attribute path') - return - ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') + ###logger.warning('WARNING! CPPRunCard instance has no attribute path') + ###return + raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') if name == "vector_size" and new_value <= int(old_value): # code can handle the new size -> do not recompile return diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 01c5f35334..97fcae84d0 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005320310592651367  +DEBUG: model prefixing takes 0.0054814815521240234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.148 s +ALOHA: aloha creates 2 routines in 0.146 s VVV1 FFV1 FFV1 @@ -194,9 +194,8 @@ FileWriter for / INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  -DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.645s -user 0m0.480s -sys 0m0.053s +real 0m0.539s +user 0m0.468s +sys 0m0.065s From efc1b0cbeb83c405413ae14e4281ff5ea8fb6126 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 1 Nov 2023 16:35:13 +0100 Subject: [PATCH 094/119] [oct23av] in CODEGEN output.py, disable again the exception in CPPRunCard.reset_simd Revert "[oct23av] in CODEGEN output.py, try to reenable exception in CPPRunCard.reset_simd as suggested in Olivier's review of PR #780" This reverts commit a2149a2b2fb6021ce748b0e7812f7a41407f8db3. --- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py | 6 +++--- epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 11 ++++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py index d35d5457db..c9d1c7706a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py @@ -49,9 +49,9 @@ def compile(self, *args, **opts): class CPPRunCard(banner_mod.RunCardLO): def reset_simd(self, old_value, new_value, name): if not hasattr(self, 'path'): - ###logger.warning('WARNING! CPPRunCard instance has no attribute path') - ###return - raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') + logger.warning('WARNING! CPPRunCard instance has no attribute path') + return + ###raise Exception('INTERNAL ERROR! CPPRunCard instance has no attribute path') if name == "vector_size" and new_value <= int(old_value): # code can handle the new size -> do not recompile return diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 97fcae84d0..01c5f35334 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0054814815521240234  +DEBUG: model prefixing takes 0.005320310592651367  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.146 s +ALOHA: aloha creates 2 routines in 0.148 s VVV1 FFV1 FFV1 @@ -194,8 +194,9 @@ FileWriter for / INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  +DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.539s -user 0m0.468s -sys 0m0.065s +real 0m0.645s +user 0m0.480s +sys 0m0.053s From 07b10908f390b63e0fc121f3450b272ec8801de7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 1 Nov 2023 16:56:20 +0100 Subject: [PATCH 095/119] [oct23av] (complete oct23av?) regenerate all 15 processes, no changes except in codegen logs --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 25 ++++---- .../CODEGEN_cudacpp_ee_mumu_log.txt | 13 ++-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 24 +++---- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 11 ++-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 28 ++++---- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 26 ++++---- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 15 ++--- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 26 ++++---- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 15 ++--- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 26 ++++---- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 15 ++--- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 28 ++++---- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 15 ++--- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 9 ++- .../CODEGEN_mad_pp_tt012j_log.txt | 64 +++++++++---------- 15 files changed, 167 insertions(+), 173 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 7fa45fb028..5ae99a41fc 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005362033843994141  +DEBUG: model prefixing takes 0.005468845367431641  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.004 s +1 processes with 2 diagrams generated in 0.005 s Total: 1 processes with 2 diagrams output madevent ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,19 +191,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.098 s +Wrote files for 8 helas calls in 0.103 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.197 s +ALOHA: aloha creates 3 routines in 0.209 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.250 s +ALOHA: aloha creates 7 routines in 0.262 s FFV1 FFV1 FFV2 @@ -227,8 +227,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: self.in_madevent_mode =  True [output.py at line 207]  -DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile @@ -243,16 +241,16 @@ patching file matrix1.f Hunk #3 succeeded at 230 (offset 9 lines). Hunk #4 succeeded at 267 (offset 18 lines). Hunk #5 succeeded at 312 (offset 18 lines). -DEBUG: p.returncode =  0 [output.py at line 233]  +DEBUG: p.returncode =  0 [output.py at line 232]  Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README Run "open index.html" to see more information about this process. quit -real 0m1.902s -user 0m1.610s -sys 0m0.252s +real 0m2.024s +user 0m1.716s +sys 0m0.214s ************************************************************ * * * W E L C O M E to * @@ -280,6 +278,9 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run +run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP WARNING! CPPRunCard instance has no attribute path quit INFO: diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index a43de6210d..62d2920ca0 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00529170036315918  +DEBUG: model prefixing takes 0.005658149719238281  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,14 +174,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.263 s +ALOHA: aloha creates 4 routines in 0.282 s FFV1 FFV1 FFV2 @@ -199,9 +199,8 @@ FileWriter for / INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  -DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.767s -user 0m0.591s -sys 0m0.069s +real 0m0.697s +user 0m0.636s +sys 0m0.055s diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 9370a7cd0f..a4d60ee071 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005549192428588867  +DEBUG: model prefixing takes 0.0059032440185546875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,17 +190,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.101 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s +Wrote files for 10 helas calls in 0.108 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.151 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.135 s +ALOHA: aloha creates 4 routines in 0.136 s VVV1 FFV1 FFV1 @@ -220,8 +220,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: self.in_madevent_mode =  True [output.py at line 207]  -DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile @@ -232,16 +230,16 @@ DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/S patching file auto_dsig1.f patching file driver.f patching file matrix1.f -DEBUG: p.returncode =  0 [output.py at line 233]  +DEBUG: p.returncode =  0 [output.py at line 232]  Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README Run "open index.html" to see more information about this process. quit -real 0m1.863s -user 0m1.505s -sys 0m0.227s +real 0m1.900s +user 0m1.531s +sys 0m0.233s ************************************************************ * * * W E L C O M E to * @@ -269,6 +267,8 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run +run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP WARNING! CPPRunCard instance has no attribute path quit INFO: diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 01c5f35334..e6413cd3e0 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005320310592651367  +DEBUG: model prefixing takes 0.005649089813232422  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.148 s +ALOHA: aloha creates 2 routines in 0.150 s VVV1 FFV1 FFV1 @@ -194,9 +194,8 @@ FileWriter for / INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  -DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.645s -user 0m0.480s -sys 0m0.053s +real 0m0.580s +user 0m0.511s +sys 0m0.060s diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 5ce931bf1e..3cd7342a49 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005847454071044922  +DEBUG: model prefixing takes 0.005325794219970703  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -163,7 +163,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.019 s +1 processes with 16 diagrams generated in 0.020 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -185,7 +185,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -202,7 +202,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,15 +217,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.241 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s +Wrote files for 46 helas calls in 0.253 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.320 s +ALOHA: aloha creates 5 routines in 0.340 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -233,7 +233,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.305 s +ALOHA: aloha creates 10 routines in 0.320 s VVV1 VVV1 FFV1 @@ -258,8 +258,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: self.in_madevent_mode =  True [output.py at line 207]  -DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile @@ -278,16 +276,16 @@ Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -DEBUG: p.returncode =  0 [output.py at line 233]  +DEBUG: p.returncode =  0 [output.py at line 232]  Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README Run "open index.html" to see more information about this process. quit -real 0m2.290s -user 0m2.032s -sys 0m0.236s +real 0m2.398s +user 0m2.098s +sys 0m0.286s ************************************************************ * * * W E L C O M E to * @@ -315,6 +313,8 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run +run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP WARNING! CPPRunCard instance has no attribute path quit INFO: diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 85ab3645db..ff1c677ef2 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00560450553894043  +DEBUG: model prefixing takes 0.005454063415527344  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.147 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Wrote files for 36 helas calls in 0.155 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.325 s +ALOHA: aloha creates 5 routines in 0.337 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.309 s +ALOHA: aloha creates 10 routines in 0.320 s VVV1 VVV1 FFV1 @@ -231,8 +231,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: self.in_madevent_mode =  True [output.py at line 207]  -DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile @@ -247,16 +245,16 @@ Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -DEBUG: p.returncode =  0 [output.py at line 233]  +DEBUG: p.returncode =  0 [output.py at line 232]  Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README Run "open index.html" to see more information about this process. quit -real 0m3.233s -user 0m1.929s -sys 0m0.241s +real 0m2.259s +user 0m2.015s +sys 0m0.240s ************************************************************ * * * W E L C O M E to * @@ -284,6 +282,8 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run +run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP WARNING! CPPRunCard instance has no attribute path quit INFO: diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 579bb1b9cf..29caa2cb81 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005326509475708008  +DEBUG: model prefixing takes 0.005802631378173828  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.023 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.322 s +ALOHA: aloha creates 5 routines in 0.341 s VVV1 VVV1 FFV1 @@ -202,9 +202,8 @@ FileWriter for / INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  -DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m1.065s -user 0m0.708s -sys 0m0.052s +real 0m0.840s +user 0m0.764s +sys 0m0.063s diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 869cf86dfe..9a52869de7 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052950382232666016  +DEBUG: model prefixing takes 0.005549907684326172  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.161 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s -Wrote files for 222 helas calls in 0.682 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.436 s +Wrote files for 222 helas calls in 0.713 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.347 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.325 s VVV1 VVV1 FFV1 @@ -234,8 +234,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: self.in_madevent_mode =  True [output.py at line 207]  -DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile @@ -250,16 +248,16 @@ Hunk #2 succeeded at 191 (offset 48 lines). Hunk #3 succeeded at 269 (offset 48 lines). Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). -DEBUG: p.returncode =  0 [output.py at line 233]  +DEBUG: p.returncode =  0 [output.py at line 232]  Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README Run "open index.html" to see more information about this process. quit -real 0m3.234s -user 0m2.983s -sys 0m0.244s +real 0m3.391s +user 0m3.123s +sys 0m0.258s ************************************************************ * * * W E L C O M E to * @@ -287,6 +285,8 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run +run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP WARNING! CPPRunCard instance has no attribute path quit INFO: diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index de0b790f71..a5d16ee4c0 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005513191223144531  +DEBUG: model prefixing takes 0.005487680435180664  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.156 s +1 processes with 123 diagrams generated in 0.163 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.415 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.437 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.318 s +ALOHA: aloha creates 5 routines in 0.330 s VVV1 VVV1 FFV1 @@ -205,9 +205,8 @@ FileWriter for / INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  -DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m1.502s -user 0m1.341s -sys 0m0.065s +real 0m1.510s +user 0m1.434s +sys 0m0.059s diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index f3a2f68dc9..aae87d4baa 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005324363708496094  +DEBUG: model prefixing takes 0.005763530731201172  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.841 s +1 processes with 1240 diagrams generated in 1.923 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.486 s -Wrote files for 2281 helas calls in 18.712 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.759 s +Wrote files for 2281 helas calls in 19.202 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.335 s +ALOHA: aloha creates 5 routines in 0.334 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.327 s +ALOHA: aloha creates 10 routines in 0.324 s VVV1 VVV1 FFV1 @@ -236,8 +236,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: self.in_madevent_mode =  True [output.py at line 207]  -DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile @@ -252,16 +250,16 @@ Hunk #2 succeeded at 255 (offset 112 lines). Hunk #3 succeeded at 333 (offset 112 lines). Hunk #4 succeeded at 361 (offset 112 lines). Hunk #5 succeeded at 406 (offset 112 lines). -DEBUG: p.returncode =  0 [output.py at line 233]  +DEBUG: p.returncode =  0 [output.py at line 232]  Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README Run "open index.html" to see more information about this process. quit -real 0m29.340s -user 0m28.825s -sys 0m0.401s +real 0m30.289s +user 0m29.746s +sys 0m0.431s ************************************************************ * * * W E L C O M E to * @@ -289,6 +287,8 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run +run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP WARNING! CPPRunCard instance has no attribute path quit INFO: diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 1fad08a270..cea1c6ed0d 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0054934024810791016  +DEBUG: model prefixing takes 0.005706787109375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.864 s +1 processes with 1240 diagrams generated in 1.898 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.563 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.714 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.358 s +ALOHA: aloha creates 5 routines in 0.372 s VVV1 VVV1 FFV1 @@ -205,9 +205,8 @@ FileWriter for / INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  -DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m13.203s -user 0m12.914s -sys 0m0.127s +real 0m13.424s +user 0m13.250s +sys 0m0.118s diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index d1d8f03b76..1d6763011b 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005591630935668945  +DEBUG: model prefixing takes 0.005635261535644531  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.076 s +8 processes with 40 diagrams generated in 0.079 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -198,7 +198,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -215,7 +215,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -230,17 +230,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s -Wrote files for 32 helas calls in 0.215 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s +Wrote files for 32 helas calls in 0.227 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.154 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.131 s +ALOHA: aloha creates 4 routines in 0.137 s FFV1 FFV1 FFV1 @@ -261,8 +261,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: self.in_madevent_mode =  True [output.py at line 207]  -DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile @@ -289,16 +287,16 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 247 (offset 26 lines). Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). -DEBUG: p.returncode =  0 [output.py at line 233]  +DEBUG: p.returncode =  0 [output.py at line 232]  Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README Run "open index.html" to see more information about this process. quit -real 0m2.149s -user 0m1.674s -sys 0m0.228s +real 0m2.013s +user 0m1.785s +sys 0m0.222s ************************************************************ * * * W E L C O M E to * @@ -326,6 +324,8 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run +run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP WARNING! CPPRunCard instance has no attribute path quit INFO: diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 2cd643e021..2364924e4f 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0052640438079833984  +DEBUG: model prefixing takes 0.005400180816650391  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.081 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -206,12 +206,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.140 s +ALOHA: aloha creates 2 routines in 0.151 s FFV1 FFV1 FFV1 @@ -226,9 +226,8 @@ FileWriter for / INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  -DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.783s -user 0m0.565s -sys 0m0.062s +real 0m0.693s +user 0m0.637s +sys 0m0.046s diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 87bea43991..adfd30903c 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -153,7 +153,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.060 s +ALOHA: aloha creates 1 routines in 0.062 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -164,9 +164,8 @@ FileWriter for / INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  -DEBUG: self.in_madevent_mode =  False [output.py at line 207]  quit -real 0m0.417s -user 0m0.362s -sys 0m0.046s +real 0m0.455s +user 0m0.398s +sys 0m0.045s diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index f5d02691c0..88175f266b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005496025085449219  +DEBUG: model prefixing takes 0.005621194839477539  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.029 s +5 processes with 7 diagrams generated in 0.031 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.134 s +13 processes with 76 diagrams generated in 0.139 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.800 s +65 processes with 1119 diagrams generated in 1.868 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,15 +801,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.265 s -Wrote files for 810 helas calls in 3.185 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.320 s +Wrote files for 810 helas calls in 3.378 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.333 s +ALOHA: aloha creates 5 routines in 0.355 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.309 s +ALOHA: aloha creates 10 routines in 0.329 s VVV1 VVV1 FFV1 @@ -845,8 +845,6 @@ INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize True [output.py at line 206]  -DEBUG: self.in_madevent_mode =  True [output.py at line 207]  -DEBUG! Switching on tmad mode (CUDACPP_CODEGEN_TMADMODE=1) DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile @@ -1023,16 +1021,16 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: p.returncode =  0 [output.py at line 233]  +DEBUG: p.returncode =  0 [output.py at line 232]  Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README Run "open index.html" to see more information about this process. quit -real 0m8.698s -user 0m8.189s -sys 0m0.479s +real 0m9.160s +user 0m8.595s +sys 0m0.527s ************************************************************ * * * W E L C O M E to * @@ -1060,6 +1058,8 @@ Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt No valid web browser found. Please set in ./input/mg5_configuration.txt treatcards run +run_card missed argument cudacpp_backend. Takes default: CPP +run_card missed argument cudacpp_backend. Takes default: CPP WARNING! CPPRunCard instance has no attribute path quit INFO: From a39aa0bf6cf6382d5dcbfbe606535dd8a112291e Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 1 Nov 2023 19:10:26 +0100 Subject: [PATCH 096/119] [oct23av] (complete oct23av?) regenerate all 15 processes including the curand patch --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 16 ++--- .../SubProcesses/P1_epem_mupmum/check_sa.cc | 46 +++++++-------- .../ee_mumu.mad/SubProcesses/cudacpp.mk | 9 ++- .../cudacpp/ee_mumu.mad/src/mgOnGpuConfig.h | 12 ++-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 12 ++-- .../P1_Sigma_sm_epem_mupmum/check_sa.cc | 46 +++++++-------- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 9 ++- epochX/cudacpp/ee_mumu.sa/src/mgOnGpuConfig.h | 12 ++-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 18 +++--- .../SubProcesses/P1_gg_ttx/check_sa.cc | 46 +++++++-------- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 9 ++- epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h | 12 ++-- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 ++-- .../P1_Sigma_sm_gg_ttx/check_sa.cc | 46 +++++++-------- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 9 ++- epochX/cudacpp/gg_tt.sa/src/mgOnGpuConfig.h | 12 ++-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 18 +++--- .../SubProcesses/P1_gg_ttx/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_gg_ttxg/check_sa.cc | 46 +++++++-------- .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 9 ++- .../cudacpp/gg_tt01g.mad/src/mgOnGpuConfig.h | 12 ++-- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 16 ++--- .../SubProcesses/P1_gg_ttxg/check_sa.cc | 46 +++++++-------- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 9 ++- epochX/cudacpp/gg_ttg.mad/src/mgOnGpuConfig.h | 12 ++-- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 10 ++-- .../P1_Sigma_sm_gg_ttxg/check_sa.cc | 46 +++++++-------- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 9 ++- epochX/cudacpp/gg_ttg.sa/src/mgOnGpuConfig.h | 12 ++-- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +++---- .../SubProcesses/P1_gg_ttxgg/check_sa.cc | 46 +++++++-------- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 9 ++- .../cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h | 12 ++-- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 ++--- .../P1_Sigma_sm_gg_ttxgg/check_sa.cc | 46 +++++++-------- .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 9 ++- epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuConfig.h | 12 ++-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 +++---- .../SubProcesses/P1_gg_ttxggg/check_sa.cc | 46 +++++++-------- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 9 ++- .../cudacpp/gg_ttggg.mad/src/mgOnGpuConfig.h | 12 ++-- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 ++--- .../P1_Sigma_sm_gg_ttxggg/check_sa.cc | 46 +++++++-------- .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 9 ++- .../cudacpp/gg_ttggg.sa/src/mgOnGpuConfig.h | 12 ++-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 22 +++---- .../SubProcesses/P1_gu_ttxu/check_sa.cc | 46 +++++++-------- .../SubProcesses/P1_gux_ttxux/check_sa.cc | 46 +++++++-------- .../gq_ttq.mad/SubProcesses/cudacpp.mk | 9 ++- epochX/cudacpp/gq_ttq.mad/src/mgOnGpuConfig.h | 12 ++-- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 12 ++-- .../P1_Sigma_sm_gu_ttxu/check_sa.cc | 46 +++++++-------- .../P1_Sigma_sm_gux_ttxux/check_sa.cc | 46 +++++++-------- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 9 ++- epochX/cudacpp/gq_ttq.sa/src/mgOnGpuConfig.h | 12 ++-- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 8 +-- .../P1_Sigma_heft_gg_h/check_sa.cc | 46 +++++++-------- .../heft_gg_h.sa/SubProcesses/cudacpp.mk | 9 ++- .../cudacpp/heft_gg_h.sa/src/mgOnGpuConfig.h | 12 ++-- .../CODEGEN_mad_pp_tt012j_log.txt | 58 +++++++++---------- .../SubProcesses/P0_gg_ttx/check_sa.cc | 46 +++++++-------- .../SubProcesses/P0_uux_ttx/check_sa.cc | 46 +++++++-------- .../SubProcesses/P1_gg_ttxg/check_sa.cc | 46 +++++++-------- .../SubProcesses/P1_gu_ttxu/check_sa.cc | 46 +++++++-------- .../SubProcesses/P1_gux_ttxux/check_sa.cc | 46 +++++++-------- .../SubProcesses/P1_uux_ttxg/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_gg_ttxgg/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_gg_ttxuux/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_gu_ttxgu/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_gux_ttxgux/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_uc_ttxuc/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_ucx_ttxucx/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_uu_ttxuu/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_uux_ttxccx/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_uux_ttxgg/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_uux_ttxuux/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc | 46 +++++++-------- .../SubProcesses/P2_uxux_ttxuxux/check_sa.cc | 46 +++++++-------- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 9 ++- .../cudacpp/pp_tt012j.mad/src/mgOnGpuConfig.h | 12 ++-- 80 files changed, 1114 insertions(+), 1079 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 5ae99a41fc..a5710dd2c0 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005468845367431641  +DEBUG: model prefixing takes 0.005411624908447266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,19 +191,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.103 s +Wrote files for 8 helas calls in 0.100 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.209 s +ALOHA: aloha creates 3 routines in 0.205 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.262 s +ALOHA: aloha creates 7 routines in 0.258 s FFV1 FFV1 FFV2 @@ -248,9 +248,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.024s -user 0m1.716s -sys 0m0.214s +real 0m1.894s +user 0m1.659s +sys 0m0.227s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuConfig.h index cacab1031a..80032e528b 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 62d2920ca0..0d8bff3853 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005658149719238281  +DEBUG: model prefixing takes 0.00572514533996582  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,14 +174,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.282 s +ALOHA: aloha creates 4 routines in 0.274 s FFV1 FFV1 FFV2 @@ -201,6 +201,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.697s -user 0m0.636s -sys 0m0.055s +real 0m0.691s +user 0m0.628s +sys 0m0.047s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuConfig.h index c0f067f1d8..b247654dcf 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index a4d60ee071..b67119b2e5 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0059032440185546875  +DEBUG: model prefixing takes 0.005866527557373047  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,17 +190,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s -Wrote files for 10 helas calls in 0.108 s +Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s +Wrote files for 10 helas calls in 0.103 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.151 s +ALOHA: aloha creates 2 routines in 0.148 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.136 s +ALOHA: aloha creates 4 routines in 0.134 s VVV1 FFV1 FFV1 @@ -237,9 +237,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.900s -user 0m1.531s -sys 0m0.233s +real 0m1.717s +user 0m1.509s +sys 0m0.204s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h index cacab1031a..80032e528b 100644 --- a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index e6413cd3e0..b5c180943d 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005649089813232422  +DEBUG: model prefixing takes 0.0058231353759765625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.150 s +ALOHA: aloha creates 2 routines in 0.144 s VVV1 FFV1 FFV1 @@ -196,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.580s -user 0m0.511s -sys 0m0.060s +real 0m0.550s +user 0m0.484s +sys 0m0.055s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuConfig.h index c0f067f1d8..b247654dcf 100644 --- a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 3cd7342a49..745301de6c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005325794219970703  +DEBUG: model prefixing takes 0.005591392517089844  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -185,7 +185,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -202,7 +202,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,7 +217,7 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s Wrote files for 46 helas calls in 0.253 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -225,7 +225,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.340 s +ALOHA: aloha creates 5 routines in 0.331 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -233,7 +233,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.320 s +ALOHA: aloha creates 10 routines in 0.613 s VVV1 VVV1 FFV1 @@ -283,9 +283,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.398s -user 0m2.098s -sys 0m0.286s +real 0m2.699s +user 0m2.091s +sys 0m0.234s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuConfig.h index cacab1031a..80032e528b 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index ff1c677ef2..ebf1f8f3f2 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005454063415527344  +DEBUG: model prefixing takes 0.0056226253509521484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,14 +191,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s -Wrote files for 36 helas calls in 0.155 s +Wrote files for 36 helas calls in 0.152 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.337 s +ALOHA: aloha creates 5 routines in 0.332 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.320 s +ALOHA: aloha creates 10 routines in 0.316 s VVV1 VVV1 FFV1 @@ -252,9 +252,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.259s -user 0m2.015s -sys 0m0.240s +real 0m2.218s +user 0m1.984s +sys 0m0.221s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuConfig.h index cacab1031a..80032e528b 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 29caa2cb81..0b685b3fc5 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005802631378173828  +DEBUG: model prefixing takes 0.005393505096435547  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.341 s +ALOHA: aloha creates 5 routines in 0.332 s VVV1 VVV1 FFV1 @@ -204,6 +204,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.840s -user 0m0.764s -sys 0m0.063s +real 0m0.798s +user 0m0.734s +sys 0m0.054s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuConfig.h index c0f067f1d8..b247654dcf 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 9a52869de7..5e61fa70aa 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005549907684326172  +DEBUG: model prefixing takes 0.005539655685424805  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.161 s +1 processes with 123 diagrams generated in 0.160 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.436 s -Wrote files for 222 helas calls in 0.713 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.433 s +Wrote files for 222 helas calls in 0.701 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.347 s +ALOHA: aloha creates 5 routines in 0.338 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.325 s +ALOHA: aloha creates 10 routines in 0.316 s VVV1 VVV1 FFV1 @@ -255,9 +255,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.391s -user 0m3.123s -sys 0m0.258s +real 0m3.311s +user 0m3.076s +sys 0m0.229s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h index cacab1031a..80032e528b 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index a5d16ee4c0..5fe0f32625 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005487680435180664  +DEBUG: model prefixing takes 0.005811929702758789  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.163 s +1 processes with 123 diagrams generated in 0.159 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.437 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.431 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.330 s +ALOHA: aloha creates 5 routines in 0.323 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.510s -user 0m1.434s -sys 0m0.059s +real 0m1.477s +user 0m1.398s +sys 0m0.055s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuConfig.h index c0f067f1d8..b247654dcf 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index aae87d4baa..e635cb9bc3 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005763530731201172  +DEBUG: model prefixing takes 0.00549006462097168  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.923 s +1 processes with 1240 diagrams generated in 1.904 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.759 s -Wrote files for 2281 helas calls in 19.202 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.744 s +Wrote files for 2281 helas calls in 18.741 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.334 s +ALOHA: aloha creates 5 routines in 0.328 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.324 s +ALOHA: aloha creates 10 routines in 0.317 s VVV1 VVV1 FFV1 @@ -257,9 +257,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m30.289s -user 0m29.746s -sys 0m0.431s +real 0m29.654s +user 0m29.161s +sys 0m0.387s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuConfig.h index cacab1031a..80032e528b 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index cea1c6ed0d..d4301f585b 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005706787109375  +DEBUG: model prefixing takes 0.005515336990356445  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.898 s +1 processes with 1240 diagrams generated in 1.906 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.714 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.668 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.372 s +ALOHA: aloha creates 5 routines in 0.351 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m13.424s -user 0m13.250s -sys 0m0.118s +real 0m13.188s +user 0m13.024s +sys 0m0.114s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuConfig.h index c0f067f1d8..b247654dcf 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 1d6763011b..b64684ce9d 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005635261535644531  +DEBUG: model prefixing takes 0.005499839782714844  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.079 s +8 processes with 40 diagrams generated in 0.078 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -198,7 +198,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -215,7 +215,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -230,17 +230,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s -Wrote files for 32 helas calls in 0.227 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Wrote files for 32 helas calls in 0.223 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.154 s +ALOHA: aloha creates 2 routines in 0.148 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.137 s +ALOHA: aloha creates 4 routines in 0.135 s FFV1 FFV1 FFV1 @@ -294,9 +294,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.013s -user 0m1.785s -sys 0m0.222s +real 0m1.953s +user 0m1.723s +sys 0m0.227s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuConfig.h index cacab1031a..80032e528b 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 2364924e4f..6e9e4386f9 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005400180816650391  +DEBUG: model prefixing takes 0.005594491958618164  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.081 s +8 processes with 40 diagrams generated in 0.082 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -211,7 +211,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.151 s +ALOHA: aloha creates 2 routines in 0.147 s FFV1 FFV1 FFV1 @@ -228,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.693s -user 0m0.637s -sys 0m0.046s +real 0m0.666s +user 0m0.604s +sys 0m0.056s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuConfig.h index c0f067f1d8..b247654dcf 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index adfd30903c..7463a314bf 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -153,7 +153,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.062 s +ALOHA: aloha creates 1 routines in 0.063 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -166,6 +166,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.455s -user 0m0.398s -sys 0m0.045s +real 0m0.437s +user 0m0.372s +sys 0m0.054s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/check_sa.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/check_sa.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/heft_gg_h.sa/src/mgOnGpuConfig.h b/epochX/cudacpp/heft_gg_h.sa/src/mgOnGpuConfig.h index c0f067f1d8..b247654dcf 100644 --- a/epochX/cudacpp/heft_gg_h.sa/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/heft_gg_h.sa/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 88175f266b..d80455a6eb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005621194839477539  +DEBUG: model prefixing takes 0.005620479583740234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.031 s +5 processes with 7 diagrams generated in 0.029 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.139 s +13 processes with 76 diagrams generated in 0.138 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.868 s +65 processes with 1119 diagrams generated in 1.855 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,15 +801,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.320 s -Wrote files for 810 helas calls in 3.378 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.314 s +Wrote files for 810 helas calls in 3.312 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.355 s +ALOHA: aloha creates 5 routines in 0.340 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.329 s +ALOHA: aloha creates 10 routines in 0.317 s VVV1 VVV1 FFV1 @@ -1028,9 +1028,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.160s -user 0m8.595s -sys 0m0.527s +real 0m8.998s +user 0m8.453s +sys 0m0.504s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc index d2af908f0d..3fbf0ffbee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/check_sa.cc @@ -133,12 +133,12 @@ main( int argc, char** argv ) CurandHost = 1, CurandDevice = 2 }; -#ifdef __CUDACC__ - RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU -#elif not defined MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#ifdef MGONGPU_HAS_NO_CURAND + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784) +#elif defined __CUDACC__ + RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on GPU if build has curand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has no curand + RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -149,7 +149,7 @@ main( int argc, char** argv ) #ifdef __CUDACC__ RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -176,18 +176,20 @@ main( int argc, char** argv ) } else if( arg == "--curdev" ) { -#ifdef __CUDACC__ - rndgen = RandomNumberMode::CurandDevice; -#else +#ifndef __CUDACC__ throw std::runtime_error( "CurandDevice is not supported on CPUs" ); +#elif defined MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "CurandDevice is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandDevice; #endif } else if( arg == "--curhst" ) { -#ifndef MGONGPU_HAS_NO_CURAND - rndgen = RandomNumberMode::CurandHost; -#else +#ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); +#else + rndgen = RandomNumberMode::CurandHost; #endif } else if( arg == "--common" ) @@ -419,30 +421,26 @@ main( int argc, char** argv ) { prnk.reset( new CommonRandomNumberKernel( hstRndmom ) ); } -#ifndef MGONGPU_HAS_NO_CURAND else if( rndgen == RandomNumberMode::CurandHost ) { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandHost is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#else const bool onDevice = false; prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif } -#ifdef __CUDACC__ else { +#ifdef MGONGPU_HAS_NO_CURAND + throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); - } #else - else - { - throw std::logic_error( "CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) - } + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs" ); // INTERNAL ERROR (no path to this statement) #endif -#else - else - { - throw std::logic_error( "This application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) } -#endif // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index 471a7dec51..741e96058b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -141,7 +141,11 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + ifeq ($(RNDGEN),hasNoCurand) + CURANDLIBFLAGS= + else + CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! + endif CUOPTFLAGS = -lineinfo CUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math ###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow @@ -507,8 +511,9 @@ $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) # Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) $(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) ifeq ($(RNDGEN),hasCurand) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif diff --git a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuConfig.h index cacab1031a..80032e528b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuConfig.h @@ -14,13 +14,15 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND -#ifdef __CUDACC__ -#undef MGONGPU_HAS_NO_CURAND -#else +// For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND +// (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784) +//#ifdef __CUDACC__ //#undef MGONGPU_HAS_NO_CURAND // default ////#define MGONGPU_HAS_NO_CURAND 1 -#endif +//#else +//#undef MGONGPU_HAS_NO_CURAND // default +////#define MGONGPU_HAS_NO_CURAND 1 +//#endif // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) From 51a2e03b20cdbbefcca7ee9a8cec42a64dd0617d Mon Sep 17 00:00:00 2001 From: Olivier Mattelaer Date: Wed, 1 Nov 2023 22:37:22 +0100 Subject: [PATCH 097/119] remove all monkeypatch method --- MG5aMC/mg5amcnlo | 2 +- epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 49c93e01b8..8419b703e8 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 49c93e01b8596cbdb4e65f628601de1e6f08c744 +Subproject commit 8419b703e8fa9cc11d2da82e2c41558324e7109c diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 524045919c..8961036fb1 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -269,7 +269,6 @@ def add_madevent_plugin_fct(self): all variable that are file related should be called as madgraph.dir.file.variable """ plugin_path = os.path.dirname(os.path.realpath( __file__ )) - ###files.cp(pjoin(plugin_path, 'plugin_interface.py'), pjoin(self.dir_path, 'bin', 'internal')) # AV FIXME (added by OM, but file is missing?) files.cp(pjoin(plugin_path, 'launch_plugin.py'), pjoin(self.dir_path, 'bin', 'internal')) files.ln(pjoin(self.dir_path, 'lib'), pjoin(self.dir_path, 'SubProcesses')) From 3a738036b8879a4f61c13e53154efebca870a59e Mon Sep 17 00:00:00 2001 From: Olivier Mattelaer Date: Wed, 1 Nov 2023 23:01:58 +0100 Subject: [PATCH 098/119] force flag also for GCC on mac --- .../PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common index fd6ca72279..e19a7d2054 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common @@ -118,7 +118,7 @@ index 348c283be..65369d610 100644 -$(PROG): $(PROCESS) auto_dsig.o $(LIBS) $(MATRIX) - $(FC) -o $(PROG) $(PROCESS) $(MATRIX) $(LINKLIBS) $(LDFLAGS) $(BIASDEPENDENCIES) -fopenmp -+ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) ++ifeq ($(UNAME),Darwin) +LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) +LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" +else From b46724efb7638e94fa68228c7863ab703c4595df Mon Sep 17 00:00:00 2001 From: Olivier Mattelaer Date: Wed, 1 Nov 2023 23:04:58 +0100 Subject: [PATCH 099/119] put back the make_opts as required for consistency with MG5aMC practise for all makefile --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 6ba8d99d3e..3ec0810703 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -27,7 +27,7 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +include ../../Source/make_opts #put back by OM (crucial for MG5aMC flag consistency/documentation) #------------------------------------------------------------------------------- From 4e1dccb4446f0a63a5654687beb54d5787f8c63f Mon Sep 17 00:00:00 2001 From: Olivier Mattelaer Date: Thu, 2 Nov 2023 00:00:54 +0100 Subject: [PATCH 100/119] forbid openmp on mac --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 3ec0810703..cb8c09df48 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -233,6 +233,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +724,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- From d4452f54dc3bc27918d352bcbda3a204cdbea054 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 2 Nov 2023 07:59:08 +0100 Subject: [PATCH 101/119] [oct23av] upgrade back mg5amcnlo to the latest gpucpp (Olivier downgraded that to an older version but I think this was accidental) --- MG5aMC/mg5amcnlo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 8419b703e8..49c93e01b8 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 8419b703e8fa9cc11d2da82e2c41558324e7109c +Subproject commit 49c93e01b8596cbdb4e65f628601de1e6f08c744 From d69bd0bc8a9a20fdcf09b27f122d5c1007a622f3 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 2 Nov 2023 14:14:08 +0100 Subject: [PATCH 102/119] [oct23av] in CODEGEN cudacpp.mk, only include Source/make_opts if Source exists (e.g it is missing in .sa directories) #787 --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index cb8c09df48..05ba4c361a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -27,7 +27,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -include ../../Source/make_opts #put back by OM (crucial for MG5aMC flag consistency/documentation) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- From 75fbc3325957c2056a2a9ec4498ebee2589b6c39 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 07:36:23 +0100 Subject: [PATCH 103/119] [oct23av] regenerate ggtt.mad including Olivier's patches - 'make cleanall' now fails because make_opts is included #787 (*NB OpenMP is now disabled by default!*) This boils down to make -f cudacpp.mk cleanall OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 HRDCOD=0 RNDGEN=hasCurand Building in BUILDDIR=. for tag=512y_d_inl0_hrd0_hasCurand (USEBUILDDIR is not set) make USEBUILDDIR=0 clean -f ../../Source/make_opts make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: *** No rule to make target 'clean'. Stop. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' --- .../cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 16 ++++++++-------- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 14 ++++++++++++-- epochX/cudacpp/gg_tt.mad/SubProcesses/makefile | 2 +- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index b67119b2e5..7b7464a85f 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005866527557373047  +DEBUG: model prefixing takes 0.005258798599243164  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,16 +191,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.103 s +Wrote files for 10 helas calls in 0.101 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.148 s +ALOHA: aloha creates 2 routines in 0.143 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.134 s +ALOHA: aloha creates 4 routines in 0.130 s VVV1 FFV1 FFV1 @@ -237,9 +237,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.717s -user 0m1.509s -sys 0m0.204s +real 0m1.870s +user 0m1.453s +sys 0m0.225s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 741e96058b..c840ce4643 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -27,7 +27,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +241,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +732,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile index 65369d6101..d572486c2e 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/makefile @@ -95,7 +95,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +ifeq ($(UNAME),Darwin) LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" else From 707065ef5f1f28e7e69f9d215ec9c8e340c718c2 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 07:46:25 +0100 Subject: [PATCH 104/119] [oct23av] in ggtt.mad cudacpp.mk, add some debug printouts to show how including make_opts causes issues here: the CUDACPP_MAKEFILE variable is corrupted #787 make -f cudacpp.mk cleanall CUDACPP_MAKEFILE='cudacpp.mk' CUDACPP_MAKEFILE='../../Source/make_opts' cudacpp.mk:42: *** exit. Stop. --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index c840ce4643..c2da7528c4 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -7,6 +7,8 @@ #=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') + CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -36,6 +38,8 @@ UNAME_P := $(shell uname -p) ifneq ($(wildcard ../../Source/make_opts),) include ../../Source/make_opts endif +$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') +$(error exit) #------------------------------------------------------------------------------- From c92ec8e9c1c52198b3ed6d77aa60e3a819ccbced Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 07:48:54 +0100 Subject: [PATCH 105/119] [oct23av] in ggtt.mad cudacpp.mk, try to fix the CUDACPP_MAKEFILE issue with override, this is irrelevant! #787 make -f cudacpp.mk cleanall CUDACPP_MAKEFILE='cudacpp.mk' CUDACPP_MAKEFILE='../../Source/make_opts' cudacpp.mk:42: *** exit. Stop. --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index c2da7528c4..2f40beb739 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -6,7 +6,7 @@ #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) #=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +override CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) $(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') CUDACPP_SRC_MAKEFILE = cudacpp_src.mk From 113d332e6eed6cd8cd43db40284fea876b9f0468 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 07:51:52 +0100 Subject: [PATCH 106/119] [oct23av] in ggtt.mad cudacpp.mk, use ':=' to set CUDACPP_MAKEFILE only once at the beginning and avoid corruption when make_opts is included #787 make -f cudacpp.mk cleanall CUDACPP_MAKEFILE='cudacpp.mk' CUDACPP_MAKEFILE='cudacpp.mk' cudacpp.mk:43: *** exit. Stop. However, this is still sensitive to variables coming from outside make -f cudacpp.mk cleanall CUDACPP_MAKEFILE=pippo CUDACPP_MAKEFILE='pippo' CUDACPP_MAKEFILE='pippo' cudacpp.mk:43: *** exit. Stop. --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 2f40beb739..3465ea15ef 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -5,8 +5,9 @@ #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) #=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value is not modified further down after including make_opts -override CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) $(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') CUDACPP_SRC_MAKEFILE = cudacpp_src.mk From 784f37cb2b66dc3f4820b29d9a34463124ab3bc8 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 07:55:39 +0100 Subject: [PATCH 107/119] [oct23av] in ggtt.mad cudacpp.mk, add override on top of ':=' to ensure that CUDACPP_MAKEFILE is an internal variable that cannot be set externally #787 make -f cudacpp.mk cleanall CUDACPP_MAKEFILE='cudacpp.mk' CUDACPP_MAKEFILE='cudacpp.mk' cudacpp.mk:43: *** exit. Stop. This is now insensitive to variables coming from outside make -f cudacpp.mk cleanall CUDACPP_MAKEFILE=pippo CUDACPP_MAKEFILE='cudacpp.mk' CUDACPP_MAKEFILE='cudacpp.mk' cudacpp.mk:43: *** exit. Stop. --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 3465ea15ef..62a77c78bb 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -7,7 +7,7 @@ #=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories #=== NB: use ':=' to ensure that the value is not modified further down after including make_opts -CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) $(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') CUDACPP_SRC_MAKEFILE = cudacpp_src.mk From 418c8b1e896d90cd37f93f5ba4396c6ff81ca82b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 08:14:50 +0100 Subject: [PATCH 108/119] [oct23av] in ggtt.mad cudacpp.mk, remove debug printouts - 'make cleanall' now succeeds after including make_opts #787 NB: I also checked that 'make cleanall; make' gives exactly the same output in my environment whether make_opts is included or not --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 62a77c78bb..dd7c8b4172 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -4,13 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories -#=== NB: use ':=' to ensure that the value is not modified further down after including make_opts - +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -39,8 +39,6 @@ UNAME_P := $(shell uname -p) ifneq ($(wildcard ../../Source/make_opts),) include ../../Source/make_opts endif -$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -$(error exit) #------------------------------------------------------------------------------- From b13ae492eb783a98244d492fabc03313e95882fa Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 08:28:30 +0100 Subject: [PATCH 109/119] [oct23av] in CODEGEN, backport the fixes in cudacpp.mk for make_opts from ggtt.mad #787 --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 05ba4c361a..d8be8e72ce 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- From 6b39fcb46da2d3e5f59caa582631165fab945908 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 10:05:24 +0100 Subject: [PATCH 110/119] [oct23av] regenerate all processes including the last changes to makefiles for Mac and for make_opts --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 18 +++--- .../ee_mumu.mad/SubProcesses/cudacpp.mk | 23 ++++++-- .../cudacpp/ee_mumu.mad/SubProcesses/makefile | 2 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 ++-- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 23 ++++++-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 10 ++-- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 ++-- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 23 ++++++-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 20 +++---- .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 23 ++++++-- .../gg_tt01g.mad/SubProcesses/makefile | 2 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 +++---- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 23 ++++++-- .../cudacpp/gg_ttg.mad/SubProcesses/makefile | 2 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 14 ++--- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 23 ++++++-- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +++---- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 23 ++++++-- .../cudacpp/gg_ttgg.mad/SubProcesses/makefile | 2 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 ++--- .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 23 ++++++-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 +++---- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 23 ++++++-- .../gg_ttggg.mad/SubProcesses/makefile | 2 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 ++--- .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 23 ++++++-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 22 ++++---- .../gq_ttq.mad/SubProcesses/cudacpp.mk | 23 ++++++-- .../cudacpp/gq_ttq.mad/SubProcesses/makefile | 2 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 14 ++--- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 23 ++++++-- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 8 +-- .../heft_gg_h.sa/SubProcesses/cudacpp.mk | 23 ++++++-- .../CODEGEN_mad_pp_tt012j_log.txt | 56 +++++++++---------- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 23 ++++++-- .../pp_tt012j.mad/SubProcesses/makefile | 2 +- 36 files changed, 394 insertions(+), 212 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index a5710dd2c0..ae1e7a6e7f 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005411624908447266  +DEBUG: model prefixing takes 0.005455732345581055  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.005 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output madevent ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,19 +191,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.100 s +Wrote files for 8 helas calls in 0.098 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.205 s +ALOHA: aloha creates 3 routines in 0.196 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.258 s +ALOHA: aloha creates 7 routines in 0.249 s FFV1 FFV1 FFV2 @@ -248,9 +248,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.894s -user 0m1.659s -sys 0m0.227s +real 0m1.885s +user 0m1.626s +sys 0m0.205s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile index 65369d6101..d572486c2e 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/makefile @@ -95,7 +95,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +ifeq ($(UNAME),Darwin) LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" else diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 0d8bff3853..e23cc03d5d 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00572514533996582  +DEBUG: model prefixing takes 0.005369663238525391  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -181,7 +181,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.274 s +ALOHA: aloha creates 4 routines in 0.262 s FFV1 FFV1 FFV2 @@ -201,6 +201,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.691s -user 0m0.628s -sys 0m0.047s +real 0m0.711s +user 0m0.578s +sys 0m0.059s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 7b7464a85f..7a4d84afcd 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005258798599243164  +DEBUG: model prefixing takes 0.005388021469116211  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -237,9 +237,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.870s -user 0m1.453s -sys 0m0.225s +real 0m2.004s +user 0m1.448s +sys 0m0.220s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index b5c180943d..2b7e24e4bc 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058231353759765625  +DEBUG: model prefixing takes 0.005523204803466797  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.144 s +ALOHA: aloha creates 2 routines in 0.142 s VVV1 FFV1 FFV1 @@ -196,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.550s -user 0m0.484s -sys 0m0.055s +real 0m0.546s +user 0m0.463s +sys 0m0.057s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 745301de6c..314aa7e54f 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005591392517089844  +DEBUG: model prefixing takes 0.005343914031982422  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -163,7 +163,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.020 s +1 processes with 16 diagrams generated in 0.019 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -185,7 +185,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -202,7 +202,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -218,14 +218,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s -Wrote files for 46 helas calls in 0.253 s +Wrote files for 46 helas calls in 0.239 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.331 s +ALOHA: aloha creates 5 routines in 0.332 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -233,7 +233,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.613 s +ALOHA: aloha creates 10 routines in 0.640 s VVV1 VVV1 FFV1 @@ -283,9 +283,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.699s -user 0m2.091s -sys 0m0.234s +real 0m2.611s +user 0m2.029s +sys 0m0.243s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile index 65369d6101..d572486c2e 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/makefile @@ -95,7 +95,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +ifeq ($(UNAME),Darwin) LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" else diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index ebf1f8f3f2..ef1138c5e9 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056226253509521484  +DEBUG: model prefixing takes 0.005430459976196289  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s -Wrote files for 36 helas calls in 0.152 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +Wrote files for 36 helas calls in 0.146 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.332 s +ALOHA: aloha creates 5 routines in 0.323 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.316 s +ALOHA: aloha creates 10 routines in 0.307 s VVV1 VVV1 FFV1 @@ -252,9 +252,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.218s -user 0m1.984s -sys 0m0.221s +real 0m2.201s +user 0m1.925s +sys 0m0.223s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile index 65369d6101..d572486c2e 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/makefile @@ -95,7 +95,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +ifeq ($(UNAME),Darwin) LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" else diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 0b685b3fc5..99d894704b 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005393505096435547  +DEBUG: model prefixing takes 0.005426645278930664  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.023 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.332 s +ALOHA: aloha creates 5 routines in 0.318 s VVV1 VVV1 FFV1 @@ -204,6 +204,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.798s -user 0m0.734s -sys 0m0.054s +real 0m0.769s +user 0m0.709s +sys 0m0.050s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 5e61fa70aa..4fedc83eed 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005539655685424805  +DEBUG: model prefixing takes 0.005503416061401367  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.160 s +1 processes with 123 diagrams generated in 0.155 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.433 s -Wrote files for 222 helas calls in 0.701 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s +Wrote files for 222 helas calls in 0.684 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.338 s +ALOHA: aloha creates 5 routines in 0.329 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.316 s +ALOHA: aloha creates 10 routines in 0.309 s VVV1 VVV1 FFV1 @@ -255,9 +255,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.311s -user 0m3.076s -sys 0m0.229s +real 0m5.059s +user 0m2.982s +sys 0m0.232s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile index 65369d6101..d572486c2e 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/makefile @@ -95,7 +95,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +ifeq ($(UNAME),Darwin) LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" else diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 5fe0f32625..5ce2f9ce66 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005811929702758789  +DEBUG: model prefixing takes 0.005643606185913086  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.159 s +1 processes with 123 diagrams generated in 0.156 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.431 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.323 s +ALOHA: aloha creates 5 routines in 0.315 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.477s -user 0m1.398s -sys 0m0.055s +real 0m1.431s +user 0m1.365s +sys 0m0.053s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index e635cb9bc3..ab52071d35 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00549006462097168  +DEBUG: model prefixing takes 0.005440235137939453  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.904 s +1 processes with 1240 diagrams generated in 1.890 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.744 s -Wrote files for 2281 helas calls in 18.741 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.483 s +Wrote files for 2281 helas calls in 18.335 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.328 s +ALOHA: aloha creates 5 routines in 0.322 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.317 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -257,9 +257,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m29.654s -user 0m29.161s -sys 0m0.387s +real 0m28.928s +user 0m28.452s +sys 0m0.379s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile index 65369d6101..d572486c2e 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/makefile @@ -95,7 +95,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +ifeq ($(UNAME),Darwin) LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" else diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index d4301f585b..a09465533d 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005515336990356445  +DEBUG: model prefixing takes 0.00536036491394043  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.906 s +1 processes with 1240 diagrams generated in 1.842 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.668 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.483 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.351 s +ALOHA: aloha creates 5 routines in 0.344 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m13.188s -user 0m13.024s -sys 0m0.114s +real 0m12.857s +user 0m12.694s +sys 0m0.108s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index b64684ce9d..e9f77c01ba 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005499839782714844  +DEBUG: model prefixing takes 0.005260467529296875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.078 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -198,7 +198,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -215,7 +215,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -230,17 +230,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.223 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Wrote files for 32 helas calls in 0.215 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.148 s +ALOHA: aloha creates 2 routines in 0.144 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.135 s +ALOHA: aloha creates 4 routines in 0.131 s FFV1 FFV1 FFV1 @@ -294,9 +294,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.953s -user 0m1.723s -sys 0m0.227s +real 0m1.904s +user 0m1.684s +sys 0m0.213s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile index 65369d6101..d572486c2e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/makefile @@ -95,7 +95,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +ifeq ($(UNAME),Darwin) LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" else diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 6e9e4386f9..c075764d55 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005594491958618164  +DEBUG: model prefixing takes 0.0053064823150634766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.082 s +8 processes with 40 diagrams generated in 0.076 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -206,12 +206,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.029 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.147 s +ALOHA: aloha creates 2 routines in 0.141 s FFV1 FFV1 FFV1 @@ -228,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.666s -user 0m0.604s -sys 0m0.056s +real 0m0.646s +user 0m0.581s +sys 0m0.052s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 7463a314bf..e02926e728 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -153,7 +153,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.063 s +ALOHA: aloha creates 1 routines in 0.061 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -166,6 +166,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.437s -user 0m0.372s -sys 0m0.054s +real 0m0.417s +user 0m0.359s +sys 0m0.048s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index d80455a6eb..1d04df7c37 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005620479583740234  +DEBUG: model prefixing takes 0.005662202835083008  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.138 s +13 processes with 76 diagrams generated in 0.134 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.855 s +65 processes with 1119 diagrams generated in 1.799 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,15 +801,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.314 s -Wrote files for 810 helas calls in 3.312 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.263 s +Wrote files for 810 helas calls in 3.209 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.340 s +ALOHA: aloha creates 5 routines in 0.329 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.317 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -1028,9 +1028,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m8.998s -user 0m8.453s -sys 0m0.504s +real 0m8.727s +user 0m8.236s +sys 0m0.447s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index 741e96058b..dd7c8b4172 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -4,10 +4,13 @@ # Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') -CUDACPP_MAKEFILE = $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -CUDACPP_SRC_MAKEFILE = cudacpp_src.mk +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk #------------------------------------------------------------------------------- @@ -27,7 +30,15 @@ UNAME_S := $(shell uname -s) UNAME_P := $(shell uname -p) ###$(info UNAME_P='$(UNAME_P)') -###include ../../Source/make_opts # AV remove (added by OM) +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif #------------------------------------------------------------------------------- @@ -233,6 +244,8 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else ifneq ($(UNAME), 'Darwin') +override OMPFLAGS = # AV disable OpenMP MT on mac else override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT (default before #575) @@ -722,7 +735,7 @@ ifneq ($(shell which flock 2>/dev/null),) @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) else - $(MAKE) -C $(TESTDIR) + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile index 65369d6101..d572486c2e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/makefile @@ -95,7 +95,7 @@ SYMMETRY = symmetry.o idenparts.o # Binaries -ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +ifeq ($(UNAME),Darwin) LDFLAGS += -lc++ # avoid 'Undefined symbols' for chrono::steady_clock on macOS (checked with otool -L libmg5amc_gg_ttx_cpp.so) LDFLAGS += -mmacosx-version-min=11.3 # avoid "ld: warning: object file was built for newer macOS version than being linked" else From c4d2e9e96a51628a6ceff2a3467df70b0ecbc5e3 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 17:05:50 +0100 Subject: [PATCH 111/119] [oct23av] rerun 78 tput tests, with FPEs enabled in the check executable - usual failures in ggttg f/m and gqttq f (#783), no change in performance (*NB OpenMP is now disabled by default!*) STARTED AT Fri Nov 3 10:06:44 AM CET 2023 ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean ENDED(1) AT Fri Nov 3 01:30:11 PM CET 2023 [Status=2] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Fri Nov 3 01:55:47 PM CET 2023 [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Fri Nov 3 02:05:25 PM CET 2023 [Status=2] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Fri Nov 3 02:08:40 PM CET 2023 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ENDED(5) AT Fri Nov 3 02:11:53 PM CET 2023 [Status=0] --- .../log_eemumu_mad_d_inl0_hrd0.txt | 103 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 103 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 103 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 103 +++++++-------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 103 +++++++-------- .../log_eemumu_mad_d_inl0_hrd1.txt | 103 +++++++-------- .../log_eemumu_mad_d_inl1_hrd0.txt | 103 +++++++-------- .../log_eemumu_mad_d_inl1_hrd1.txt | 103 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0.txt | 103 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 103 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 103 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 103 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 103 +++++++-------- .../log_eemumu_mad_f_inl0_hrd1.txt | 103 +++++++-------- .../log_eemumu_mad_f_inl1_hrd0.txt | 103 +++++++-------- .../log_eemumu_mad_f_inl1_hrd1.txt | 103 +++++++-------- .../log_eemumu_mad_m_inl0_hrd0.txt | 103 +++++++-------- .../log_eemumu_mad_m_inl0_hrd1.txt | 103 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0.txt | 103 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 103 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 103 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 103 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 103 +++++++-------- .../log_ggtt_mad_d_inl0_hrd1.txt | 103 +++++++-------- .../log_ggtt_mad_d_inl1_hrd0.txt | 103 +++++++-------- .../log_ggtt_mad_d_inl1_hrd1.txt | 103 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0.txt | 103 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 103 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 103 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 103 +++++++-------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 103 +++++++-------- .../log_ggtt_mad_f_inl0_hrd1.txt | 103 +++++++-------- .../log_ggtt_mad_f_inl1_hrd0.txt | 103 +++++++-------- .../log_ggtt_mad_f_inl1_hrd1.txt | 103 +++++++-------- .../log_ggtt_mad_m_inl0_hrd0.txt | 103 +++++++-------- .../log_ggtt_mad_m_inl0_hrd1.txt | 103 +++++++-------- .../log_ggttg_mad_d_inl0_hrd0.txt | 117 ++++++++--------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 117 ++++++++--------- .../log_ggttg_mad_d_inl0_hrd1.txt | 117 ++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 40 +++--- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 40 +++--- .../log_ggttg_mad_f_inl0_hrd1.txt | 40 +++--- .../log_ggttg_mad_m_inl0_hrd0.txt | 40 +++--- .../log_ggttg_mad_m_inl0_hrd1.txt | 40 +++--- .../log_ggttgg_mad_d_inl0_hrd0.txt | 117 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 117 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 117 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 117 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 117 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 117 ++++++++--------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 117 ++++++++--------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 117 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 117 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 117 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 117 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 117 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 117 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 117 ++++++++--------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 121 +++++++++--------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 121 +++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 117 ++++++++--------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 117 ++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 117 ++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 117 ++++++++--------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 117 ++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 117 ++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 117 ++++++++--------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 117 ++++++++--------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 117 ++++++++--------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 117 ++++++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 117 ++++++++--------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 117 ++++++++--------- .../log_gqttq_mad_d_inl0_hrd1.txt | 117 ++++++++--------- .../log_gqttq_mad_f_inl0_hrd0.txt | 108 ++++++++-------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 108 ++++++++-------- .../log_gqttq_mad_f_inl0_hrd1.txt | 108 ++++++++-------- .../log_gqttq_mad_m_inl0_hrd0.txt | 117 ++++++++--------- .../log_gqttq_mad_m_inl0_hrd1.txt | 117 ++++++++--------- 78 files changed, 3928 insertions(+), 4290 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index d9773fa9e0..5abaf48c27 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_22:38:57 +DATE: 2023-11-03_13:10:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.999458e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.940961e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.067088e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.425670e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.222247e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.012114e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.786611 sec - 2,626,513,820 cycles # 3.012 GHz - 4,089,953,760 instructions # 1.56 insn per cycle - 1.073481584 seconds time elapsed +TOTAL : 0.857427 sec + 2,861,836,198 cycles # 3.005 GHz + 4,395,542,756 instructions # 1.54 insn per cycle + 1.189695235 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.125739e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.324458e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.324458e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.128658e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.324925e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.324925e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.985518 sec - 18,327,786,554 cycles # 3.060 GHz - 44,036,033,273 instructions # 2.40 insn per cycle - 5.990684166 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.970436 sec + 18,347,671,953 cycles # 3.071 GHz + 43,938,983,717 instructions # 2.39 insn per cycle + 5.978217238 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 420) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.691045e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.213177e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.213177e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.660817e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.184751e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.184751e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.113874 sec - 12,767,822,386 cycles # 3.101 GHz - 31,002,879,427 instructions # 2.43 insn per cycle - 4.119112324 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.192129 sec + 12,809,417,728 cycles # 3.052 GHz + 31,016,432,387 instructions # 2.42 insn per cycle + 4.205962943 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1631) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.100650e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.950531e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.950531e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.074393e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.897997e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.897997e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.380885 sec - 10,086,974,506 cycles # 2.980 GHz - 19,377,326,262 instructions # 1.92 insn per cycle - 3.386108586 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) +TOTAL : 3.424513 sec + 10,077,521,544 cycles # 2.940 GHz + 19,366,070,840 instructions # 1.92 insn per cycle + 3.435971091 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.189272e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.095103e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.095103e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.171630e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.081425e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.081425e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.258705 sec - 9,718,547,501 cycles # 2.979 GHz - 19,005,874,298 instructions # 1.96 insn per cycle - 3.263850249 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) +TOTAL : 3.285829 sec + 9,782,653,766 cycles # 2.972 GHz + 18,983,356,035 instructions # 1.94 insn per cycle + 3.300405408 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1665) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.790268e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.367426e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.367426e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.870011e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.499133e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.499133e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.915927 sec - 8,595,619,480 cycles # 2.193 GHz - 15,738,404,294 instructions # 1.83 insn per cycle - 3.921168735 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) +TOTAL : 3.753305 sec + 8,627,314,746 cycles # 2.296 GHz + 15,735,767,464 instructions # 1.82 insn per cycle + 3.765985274 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 876) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 6c990677ae..abcc335062 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_23:09:31 +DATE: 2023-11-03_13:58:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.798784e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.770897e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.770897e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.684254e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.540316e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.540316e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.186945 sec - 7,434,601,508 cycles # 3.065 GHz - 13,256,508,970 instructions # 1.78 insn per cycle - 2.483222320 seconds time elapsed +TOTAL : 2.224447 sec + 7,545,611,950 cycles # 3.058 GHz + 13,315,711,854 instructions # 1.76 insn per cycle + 2.525188178 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -85,16 +85,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.085908e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.266356e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.266356e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.088698e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.270495e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270495e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.396306 sec - 19,502,356,005 cycles # 3.047 GHz - 44,261,832,646 instructions # 2.27 insn per cycle - 6.402537575 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.375164 sec + 19,562,608,505 cycles # 3.066 GHz + 44,166,894,222 instructions # 2.26 insn per cycle + 6.381733173 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 420) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -113,16 +112,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.605106e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.073231e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.073231e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.600351e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.064795e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.064795e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.510790 sec - 14,025,769,628 cycles # 3.106 GHz - 31,843,541,012 instructions # 2.27 insn per cycle - 4.517109862 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.536602 sec + 13,986,967,709 cycles # 3.079 GHz + 31,858,097,714 instructions # 2.28 insn per cycle + 4.543267608 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1631) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,16 +139,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.966793e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.679363e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.679363e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.974818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.694900e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.694900e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.795442 sec - 11,317,692,643 cycles # 2.978 GHz - 20,737,726,615 instructions # 1.83 insn per cycle - 3.801846848 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) +TOTAL : 3.785803 sec + 11,319,453,378 cycles # 2.986 GHz + 20,725,242,418 instructions # 1.83 insn per cycle + 3.792230201 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -169,16 +166,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.980473e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.725571e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.725571e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.024211e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.795837e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.795837e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.783151 sec - 10,943,929,913 cycles # 2.889 GHz - 20,366,389,809 instructions # 1.86 insn per cycle - 3.789497899 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) +TOTAL : 3.711845 sec + 11,025,578,716 cycles # 2.966 GHz + 20,353,975,948 instructions # 1.85 insn per cycle + 3.718442154 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1665) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -197,16 +193,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.744882e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.277335e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.277335e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.769786e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.312748e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.312748e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.213123 sec - 9,903,146,048 cycles # 2.348 GHz - 16,883,717,328 instructions # 1.70 insn per cycle - 4.219745362 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) +TOTAL : 4.154845 sec + 9,930,919,219 cycles # 2.387 GHz + 16,877,948,183 instructions # 1.70 insn per cycle + 4.161452694 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 876) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 6344cc7ba8..9c171d5aa7 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_23:22:16 +DATE: 2023-11-03_14:12:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.825460e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.615556e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.978319e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.518793e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.550788e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.020092e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.303776 sec - 4,657,124,266 cycles # 3.040 GHz - 7,186,636,754 instructions # 1.54 insn per cycle - 1.588682023 seconds time elapsed +TOTAL : 1.322621 sec + 4,714,608,796 cycles # 3.038 GHz + 7,352,175,067 instructions # 1.56 insn per cycle + 1.608967629 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.134096e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.332205e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.332205e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.139632e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.336228e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.336228e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.296140 sec - 19,421,926,510 cycles # 3.083 GHz - 44,137,585,352 instructions # 2.27 insn per cycle - 6.301319249 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.266327 sec + 19,426,711,131 cycles # 3.098 GHz + 44,042,281,250 instructions # 2.27 insn per cycle + 6.271573649 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 420) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.651480e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.154634e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.154634e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.682420e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.206424e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.206424e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.560540 sec - 13,873,540,395 cycles # 3.044 GHz - 31,009,056,044 instructions # 2.24 insn per cycle - 4.565789838 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.484074 sec + 13,859,190,016 cycles # 3.088 GHz + 31,018,475,981 instructions # 2.24 insn per cycle + 4.489265899 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1631) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.097326e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.928723e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.928723e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.088883e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.925223e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.925223e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.742619 sec - 11,169,147,793 cycles # 2.981 GHz - 19,279,405,520 instructions # 1.73 insn per cycle - 3.747767217 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) +TOTAL : 3.749912 sec + 11,186,702,264 cycles # 2.980 GHz + 19,267,314,009 instructions # 1.72 insn per cycle + 3.755230236 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.190762e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.098141e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.098141e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.168230e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.077689e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.077689e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.617262 sec - 10,818,196,768 cycles # 2.987 GHz - 18,695,491,265 instructions # 1.73 insn per cycle - 3.622515413 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) +TOTAL : 3.647713 sec + 10,905,880,835 cycles # 2.986 GHz + 18,695,552,937 instructions # 1.71 insn per cycle + 3.653166666 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1665) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.865525e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.485419e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.485419e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.847756e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.453812e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.453812e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.125743 sec - 9,708,020,978 cycles # 2.351 GHz - 15,438,456,670 instructions # 1.59 insn per cycle - 4.131079365 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) +TOTAL : 4.151667 sec + 9,725,947,647 cycles # 2.341 GHz + 15,433,393,788 instructions # 1.59 insn per cycle + 4.156858401 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 876) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 3aefc59966..3e310efa36 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_23:19:04 +DATE: 2023-11-03_14:08:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.850388e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.635176e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.005200e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.542546e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.573728e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.039776e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.953840 sec - 3,625,690,902 cycles # 3.045 GHz - 7,247,981,304 instructions # 2.00 insn per cycle - 1.247604337 seconds time elapsed +TOTAL : 0.966199 sec + 3,631,078,095 cycles # 3.042 GHz + 7,243,627,382 instructions # 1.99 insn per cycle + 1.251235752 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.136488e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.334373e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.334373e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.138499e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.335855e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.335855e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.934612 sec - 18,310,624,957 cycles # 3.084 GHz - 44,036,944,062 instructions # 2.40 insn per cycle - 5.939841225 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.919799 sec + 18,319,541,591 cycles # 3.092 GHz + 43,938,665,623 instructions # 2.40 insn per cycle + 5.925332232 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 420) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.688951e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.208841e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.208841e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.666277e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.183174e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.183174e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.120879 sec - 12,781,805,897 cycles # 3.099 GHz - 31,000,901,178 instructions # 2.43 insn per cycle - 4.125959736 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.178085 sec + 12,735,588,219 cycles # 3.045 GHz + 31,015,602,777 instructions # 2.44 insn per cycle + 4.183602760 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1631) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.109438e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.946978e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.946978e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.114881e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.952882e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.952882e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.371386 sec - 10,049,111,074 cycles # 2.977 GHz - 19,378,382,951 instructions # 1.93 insn per cycle - 3.376527896 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) +TOTAL : 3.358551 sec + 10,039,725,787 cycles # 2.985 GHz + 19,365,092,946 instructions # 1.93 insn per cycle + 3.363774859 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.185993e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.087896e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.087896e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.171976e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.077146e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.077146e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.266619 sec - 9,718,371,404 cycles # 2.971 GHz - 18,994,616,347 instructions # 1.95 insn per cycle - 3.271821523 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) +TOTAL : 3.284609 sec + 9,776,297,677 cycles # 2.974 GHz + 18,994,290,313 instructions # 1.94 insn per cycle + 3.289871294 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1665) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.896345e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.528531e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.528531e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.851683e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.465690e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.465690e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.708220 sec - 8,592,027,434 cycles # 2.315 GHz - 15,737,406,155 instructions # 1.83 insn per cycle - 3.713476400 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) +TOTAL : 3.788845 sec + 8,587,145,882 cycles # 2.268 GHz + 15,735,191,375 instructions # 1.83 insn per cycle + 3.794205028 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 876) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index dce8f8cf57..f8a712ac39 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_23:15:51 +DATE: 2023-11-03_14:05:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.295907e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.598257e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.930198e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.196142e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.495900e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.895038e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.822441 sec - 6,267,828,253 cycles # 3.060 GHz - 11,571,964,951 instructions # 1.85 insn per cycle - 2.106182309 seconds time elapsed +TOTAL : 1.844030 sec + 6,338,648,018 cycles # 3.057 GHz + 11,545,833,593 instructions # 1.82 insn per cycle + 2.130455527 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,16 +78,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.122306e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.317017e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.317017e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.121742e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.316851e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.316851e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.006713 sec - 18,284,116,238 cycles # 3.042 GHz - 44,035,103,509 instructions # 2.41 insn per cycle - 6.011881366 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.006675 sec + 18,357,748,563 cycles # 3.058 GHz + 43,944,454,893 instructions # 2.39 insn per cycle + 6.011909954 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 420) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -105,16 +104,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.695489e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.215832e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.215832e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.687890e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.212517e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.212517e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.104459 sec - 12,751,493,356 cycles # 3.104 GHz - 31,001,134,395 instructions # 2.43 insn per cycle - 4.109688724 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.124821 sec + 12,751,927,336 cycles # 3.088 GHz + 31,015,330,451 instructions # 2.43 insn per cycle + 4.130146742 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1631) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -132,16 +130,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.115266e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.947026e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.947026e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.082017e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.903162e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.903162e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.360992 sec - 10,038,778,756 cycles # 2.983 GHz - 19,377,215,963 instructions # 1.93 insn per cycle - 3.366349925 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) +TOTAL : 3.411364 sec + 10,065,886,460 cycles # 2.947 GHz + 19,365,047,474 instructions # 1.92 insn per cycle + 3.416626826 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,16 +156,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.188854e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.095807e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.095807e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.134766e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.016852e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.016852e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.263860 sec - 9,699,497,416 cycles # 2.968 GHz - 18,994,878,614 instructions # 1.96 insn per cycle - 3.269116510 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) +TOTAL : 3.341613 sec + 9,750,832,688 cycles # 2.914 GHz + 18,982,576,827 instructions # 1.95 insn per cycle + 3.347220259 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1665) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,16 +182,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.887543e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.510355e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.510355e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.885636e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.514573e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.514573e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.724044 sec - 8,601,500,033 cycles # 2.307 GHz - 15,737,301,137 instructions # 1.83 insn per cycle - 3.729408714 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) +TOTAL : 3.720771 sec + 8,585,253,922 cycles # 2.305 GHz + 15,731,896,314 instructions # 1.83 insn per cycle + 3.725980193 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 876) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 34fd48853a..f9258d2008 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_22:39:31 +DATE: 2023-11-03_13:11:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.002468e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.955424e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.101943e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.423111e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.263090e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.097043e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.642654 sec - 2,632,177,698 cycles # 3.036 GHz - 4,095,145,359 instructions # 1.56 insn per cycle - 0.928848819 seconds time elapsed +TOTAL : 0.704180 sec + 2,779,160,829 cycles # 2.936 GHz + 4,301,484,509 instructions # 1.55 insn per cycle + 1.022354163 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.197679e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.417716e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.417716e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.199434e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.422199e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.422199e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.648487 sec - 17,453,071,160 cycles # 3.088 GHz - 41,883,122,891 instructions # 2.40 insn per cycle - 5.653445764 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.637640 sec + 17,488,217,829 cycles # 3.099 GHz + 41,784,834,407 instructions # 2.39 insn per cycle + 5.645669854 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 379) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.734097e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.283067e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.283067e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.700066e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.246279e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.246279e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.022159 sec - 12,443,939,665 cycles # 3.091 GHz - 30,163,658,084 instructions # 2.42 insn per cycle - 4.027468023 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1611) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.104444 sec + 12,562,195,567 cycles # 3.057 GHz + 30,178,467,292 instructions # 2.40 insn per cycle + 4.117007071 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1599) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.127395e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.983414e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.983414e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.127446e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.985004e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.985004e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.344989 sec - 9,961,675,461 cycles # 2.974 GHz - 19,109,589,410 instructions # 1.92 insn per cycle - 3.350100783 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1930) (512y: 0) (512z: 0) +TOTAL : 3.341762 sec + 9,968,599,535 cycles # 2.978 GHz + 19,097,045,495 instructions # 1.92 insn per cycle + 3.358406085 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.208451e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.135186e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.135186e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.195204e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.119233e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.119233e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.233377 sec - 9,653,095,855 cycles # 2.981 GHz - 18,775,836,987 instructions # 1.95 insn per cycle - 3.238819698 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1661) (512y: 178) (512z: 0) +TOTAL : 3.252318 sec + 9,689,875,846 cycles # 2.975 GHz + 18,763,677,025 instructions # 1.94 insn per cycle + 3.269057716 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1637) (512y: 178) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.882766e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.524894e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.524894e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.918034e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.580927e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.580927e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.739273 sec - 8,432,587,246 cycles # 2.253 GHz - 15,614,997,197 instructions # 1.85 insn per cycle - 3.744502296 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 886) (512y: 156) (512z: 1239) +TOTAL : 3.667847 sec + 8,448,792,836 cycles # 2.300 GHz + 15,609,069,263 instructions # 1.85 insn per cycle + 3.680296602 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 862) (512y: 156) (512z: 1239) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index b66677f31e..72bf3d22f1 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_22:58:59 +DATE: 2023-11-03_13:48:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.821878e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.665537e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.054112e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.512564e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.589413e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.065843e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.673776 sec - 2,711,188,081 cycles # 2.996 GHz - 4,231,389,791 instructions # 1.56 insn per cycle - 0.964580036 seconds time elapsed +TOTAL : 0.679787 sec + 2,732,583,530 cycles # 2.999 GHz + 4,175,209,336 instructions # 1.53 insn per cycle + 0.973013638 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.701921e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.180269e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.180269e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.707397e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.189358e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.189358e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.094769 sec - 12,660,917,906 cycles # 3.089 GHz - 32,576,964,737 instructions # 2.57 insn per cycle - 4.099999084 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.074541 sec + 12,658,239,152 cycles # 3.103 GHz + 32,575,305,813 instructions # 2.57 insn per cycle + 4.079991878 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 283) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.158145e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.082236e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.082236e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.156555e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.091845e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.091845e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.309714 sec - 10,251,966,583 cycles # 3.093 GHz - 24,505,268,154 instructions # 2.39 insn per cycle - 3.315131551 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.314636 sec + 10,278,588,296 cycles # 3.098 GHz + 24,513,614,042 instructions # 2.38 insn per cycle + 3.320201354 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1239) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.243411e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.297771e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.297771e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.349859e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.452580e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.452580e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.210802 sec - 9,128,872,534 cycles # 2.839 GHz - 16,941,891,489 instructions # 1.86 insn per cycle - 3.216298233 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) +TOTAL : 3.064425 sec + 9,146,466,774 cycles # 2.981 GHz + 16,932,841,121 instructions # 1.85 insn per cycle + 3.070184966 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1613) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.425644e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.597095e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.597095e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.396992e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.538162e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.538162e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.981437 sec - 8,897,923,450 cycles # 2.980 GHz - 16,357,694,772 instructions # 1.84 insn per cycle - 2.986734200 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) +TOTAL : 3.009688 sec + 8,963,425,840 cycles # 2.974 GHz + 16,348,307,376 instructions # 1.82 insn per cycle + 3.015194945 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1346) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.086825e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.890582e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.890582e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.061526e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.857162e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.857162e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.406662 sec - 7,939,123,317 cycles # 2.328 GHz - 14,593,713,630 instructions # 1.84 insn per cycle - 3.412033100 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) +TOTAL : 3.444659 sec + 7,945,690,234 cycles # 2.304 GHz + 14,580,913,598 instructions # 1.84 insn per cycle + 3.450299245 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 991) (512y: 158) (512z: 955) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index fd9bfff43c..0785ba7dac 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_22:59:29 +DATE: 2023-11-03_13:48:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.828772e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.682181e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.097744e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.514678e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.572894e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.064311e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.664226 sec - 2,733,772,329 cycles # 3.032 GHz - 4,252,663,384 instructions # 1.56 insn per cycle - 0.964052579 seconds time elapsed +TOTAL : 0.670061 sec + 2,738,456,878 cycles # 3.035 GHz + 4,206,945,724 instructions # 1.54 insn per cycle + 0.963542841 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.208707e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.120592e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.120592e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.236655e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.165602e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.165602e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.248886 sec - 9,853,414,421 cycles # 3.029 GHz - 25,456,960,363 instructions # 2.58 insn per cycle - 3.254418759 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.207737 sec + 9,869,351,452 cycles # 3.072 GHz + 25,456,720,619 instructions # 2.58 insn per cycle + 3.213513616 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 236) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.501986e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.851139e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.851139e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.512813e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.861138e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.861138e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.911448 sec - 8,944,084,931 cycles # 3.067 GHz - 21,514,467,636 instructions # 2.41 insn per cycle - 2.916830419 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.900152 sec + 9,028,151,733 cycles # 3.108 GHz + 21,522,265,772 instructions # 2.38 insn per cycle + 2.905750020 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1107) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.516582e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.815233e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.815233e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.524411e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.832678e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.832678e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.895060 sec - 8,584,608,205 cycles # 2.961 GHz - 15,829,588,796 instructions # 1.84 insn per cycle - 2.900312973 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) +TOTAL : 2.879170 sec + 8,575,957,214 cycles # 2.974 GHz + 15,820,070,406 instructions # 1.84 insn per cycle + 2.884745744 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1476) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.567342e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.937683e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.937683e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.568614e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.924961e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.924961e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.840443 sec - 8,453,151,631 cycles # 2.972 GHz - 15,539,804,379 instructions # 1.84 insn per cycle - 2.845864538 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) +TOTAL : 2.834564 sec + 8,467,631,862 cycles # 2.982 GHz + 15,519,332,660 instructions # 1.83 insn per cycle + 2.840188470 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1244) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.242996e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.198969e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.198969e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.228071e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.164518e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.164518e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.196952 sec - 7,563,786,645 cycles # 2.363 GHz - 14,293,986,386 instructions # 1.89 insn per cycle - 3.202372288 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) +TOTAL : 3.208995 sec + 7,606,383,968 cycles # 2.367 GHz + 14,281,516,951 instructions # 1.88 insn per cycle + 3.214658818 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1017) (512y: 164) (512z: 874) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 794573167f..c1d51d5e2b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_22:40:03 +DATE: 2023-11-03_13:11:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.637729e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.329324e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.277725e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.090897e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.078806e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.260039e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.554589 sec - 2,330,144,606 cycles # 3.018 GHz - 3,601,132,107 instructions # 1.55 insn per cycle - 0.831243618 seconds time elapsed +TOTAL : 0.585228 sec + 2,433,446,774 cycles # 2.999 GHz + 3,808,602,769 instructions # 1.57 insn per cycle + 0.883462810 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.157878e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.372623e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.372623e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.166692e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.383327e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.383327e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.792880 sec - 17,806,778,218 cycles # 3.072 GHz - 43,613,328,536 instructions # 2.45 insn per cycle - 5.797645456 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.748719 sec + 17,847,326,756 cycles # 3.102 GHz + 43,555,210,437 instructions # 2.44 insn per cycle + 5.756388982 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 418) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.410827e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.674479e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.674479e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.393554e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.657719e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.657719e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.964239 sec - 9,221,580,079 cycles # 3.107 GHz - 21,925,566,713 instructions # 2.38 insn per cycle - 2.969030583 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.992868 sec + 9,247,144,257 cycles # 3.085 GHz + 21,931,320,878 instructions # 2.37 insn per cycle + 3.007580407 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1924) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.607797e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.989703e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.989703e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.534752e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.887537e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.887537e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.761029 sec - 8,275,934,414 cycles # 2.994 GHz - 15,591,456,247 instructions # 1.88 insn per cycle - 2.765970000 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) +TOTAL : 2.842369 sec + 8,296,474,164 cycles # 2.914 GHz + 15,586,874,856 instructions # 1.88 insn per cycle + 2.859206780 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2577) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.642139e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.064710e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.064710e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.604319e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.017823e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.017823e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.727414 sec - 8,176,411,188 cycles # 2.993 GHz - 15,433,931,357 instructions # 1.89 insn per cycle - 2.732285465 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +TOTAL : 2.762625 sec + 8,242,291,698 cycles # 2.978 GHz + 15,423,882,127 instructions # 1.87 insn per cycle + 2.774867972 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2472) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.615030e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.018431e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.018431e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.657199e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.090947e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.090947e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.763565 sec - 6,616,617,623 cycles # 2.392 GHz - 12,870,037,151 instructions # 1.95 insn per cycle - 2.768592591 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) +TOTAL : 2.717847 sec + 6,617,027,405 cycles # 2.432 GHz + 12,861,770,185 instructions # 1.94 insn per cycle + 2.732059991 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 840e18a636..1a3ded7611 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_23:10:08 +DATE: 2023-11-03_13:59:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.550916e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.988783e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.988783e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.290971e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.508237e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.508237e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.629479 sec - 5,634,078,784 cycles # 3.040 GHz - 10,242,090,703 instructions # 1.82 insn per cycle - 1.911926836 seconds time elapsed +TOTAL : 1.659289 sec + 5,780,451,529 cycles # 3.064 GHz + 10,396,790,543 instructions # 1.80 insn per cycle + 1.943549281 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -85,16 +85,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.148295e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.355569e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.355569e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.141808e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.350661e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.350661e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.938443 sec - 18,473,541,682 cycles # 3.108 GHz - 43,762,296,031 instructions # 2.37 insn per cycle - 5.944353296 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.967801 sec + 18,491,989,401 cycles # 3.096 GHz + 43,704,548,705 instructions # 2.36 insn per cycle + 5.973986343 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 418) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -113,16 +112,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.309795e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.452502e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.452502e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.273930e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.393611e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.393611e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.198061 sec - 9,971,648,922 cycles # 3.113 GHz - 23,260,266,333 instructions # 2.33 insn per cycle - 3.204013668 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.250354 sec + 9,995,547,177 cycles # 3.070 GHz + 23,265,528,960 instructions # 2.33 insn per cycle + 3.256463701 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1924) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,16 +139,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.489278e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.735887e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.735887e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.480521e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.709421e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.709421e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.993393 sec - 9,020,025,084 cycles # 3.008 GHz - 16,710,769,598 instructions # 1.85 insn per cycle - 2.999420475 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) +TOTAL : 3.007311 sec + 9,024,110,492 cycles # 2.995 GHz + 16,706,406,071 instructions # 1.85 insn per cycle + 3.013559701 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2577) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -169,16 +166,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.443126e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.672909e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.672909e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.505086e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.767088e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.767088e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.053646 sec - 8,967,225,018 cycles # 2.935 GHz - 16,558,461,611 instructions # 1.85 insn per cycle - 3.059730658 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +TOTAL : 2.977230 sec + 8,950,702,512 cycles # 3.001 GHz + 16,543,195,202 instructions # 1.85 insn per cycle + 2.983414874 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2472) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -197,16 +193,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.522735e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.787130e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.787130e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.494614e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765296e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765296e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.967360 sec - 7,377,554,587 cycles # 2.482 GHz - 14,076,363,533 instructions # 1.91 insn per cycle - 2.973262874 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) +TOTAL : 2.993974 sec + 7,449,647,781 cycles # 2.484 GHz + 14,067,779,386 instructions # 1.89 insn per cycle + 3.000162317 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 6773ae0a0d..370497cac6 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_23:22:52 +DATE: 2023-11-03_14:12:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.383036e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.202546e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.214819e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.314201e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.171400e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.214137e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.153231 sec - 4,139,430,436 cycles # 3.020 GHz - 6,625,012,847 instructions # 1.60 insn per cycle - 1.429182628 seconds time elapsed +TOTAL : 1.150300 sec + 4,173,208,289 cycles # 3.043 GHz + 6,635,894,357 instructions # 1.59 insn per cycle + 1.428443006 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.154531e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.367995e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.367995e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.159655e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.375997e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.375997e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.132854 sec - 18,828,647,542 cycles # 3.068 GHz - 43,796,198,955 instructions # 2.33 insn per cycle - 6.137841998 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.103053 sec + 18,835,159,555 cycles # 3.084 GHz + 43,737,478,159 instructions # 2.32 insn per cycle + 6.108226259 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 418) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.373084e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.623301e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.623301e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.380206e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.638253e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.638253e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.327964 sec - 10,225,038,004 cycles # 3.069 GHz - 22,007,341,056 instructions # 2.15 insn per cycle - 3.333079465 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.321443 sec + 10,224,469,171 cycles # 3.074 GHz + 22,011,805,915 instructions # 2.15 insn per cycle + 3.326535999 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1924) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.563130e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.939831e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.939831e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.567060e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.927154e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.927154e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.124450 sec - 9,294,040,657 cycles # 2.971 GHz - 15,501,961,660 instructions # 1.67 insn per cycle - 3.129427880 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) +TOTAL : 3.130332 sec + 9,276,752,637 cycles # 2.959 GHz + 15,496,982,915 instructions # 1.67 insn per cycle + 3.135534079 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2577) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.589303e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.004535e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.004535e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.606068e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.017624e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.017624e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.111634 sec - 9,245,896,562 cycles # 2.968 GHz - 15,144,115,387 instructions # 1.64 insn per cycle - 3.116587905 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +TOTAL : 3.086367 sec + 9,231,582,857 cycles # 2.987 GHz + 15,133,340,078 instructions # 1.64 insn per cycle + 3.091367178 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2472) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.625191e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.046723e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.046723e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.528786e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.869868e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.869868e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.076968 sec - 7,636,124,568 cycles # 2.478 GHz - 12,579,210,314 instructions # 1.65 insn per cycle - 3.082030809 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) +TOTAL : 3.188987 sec + 7,617,585,488 cycles # 2.385 GHz + 12,570,876,558 instructions # 1.65 insn per cycle + 3.194434066 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 72ae922772..65103394f2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_23:19:38 +DATE: 2023-11-03_14:09:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.391937e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.221657e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.272523e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.323201e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.195679e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.287097e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.835423 sec - 3,175,610,956 cycles # 3.015 GHz - 6,448,969,321 instructions # 2.03 insn per cycle - 1.110247901 seconds time elapsed +TOTAL : 0.839275 sec + 3,258,064,410 cycles # 3.046 GHz + 6,600,815,561 instructions # 2.03 insn per cycle + 1.128693838 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.166233e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.381715e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.381715e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.147689e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.360845e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.360845e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.750185 sec - 17,810,111,860 cycles # 3.095 GHz - 43,613,391,486 instructions # 2.45 insn per cycle - 5.755174931 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.843460 sec + 17,842,880,354 cycles # 3.051 GHz + 43,555,724,067 instructions # 2.44 insn per cycle + 5.848538265 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 418) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.400092e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.665371e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.665371e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.313187e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.558279e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.558279e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.978065 sec - 9,233,112,440 cycles # 3.096 GHz - 21,926,388,449 instructions # 2.37 insn per cycle - 2.983291462 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.090860 sec + 9,280,161,539 cycles # 2.999 GHz + 21,931,630,305 instructions # 2.36 insn per cycle + 3.095925319 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1924) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.550721e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.893542e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.893542e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.600147e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.974521e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.974521e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.818534 sec - 8,293,108,403 cycles # 2.938 GHz - 15,590,838,460 instructions # 1.88 insn per cycle - 2.823621763 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) +TOTAL : 2.770030 sec + 8,277,937,688 cycles # 2.984 GHz + 15,586,216,059 instructions # 1.88 insn per cycle + 2.775082359 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2577) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.601734e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.008891e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.008891e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.608908e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.023062e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.023062e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.769509 sec - 8,228,017,164 cycles # 2.967 GHz - 15,441,173,199 instructions # 1.88 insn per cycle - 2.774410515 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +TOTAL : 2.758732 sec + 8,219,982,403 cycles # 2.975 GHz + 15,429,625,460 instructions # 1.88 insn per cycle + 2.763845157 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2472) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.613335e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.006719e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.006719e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.673753e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.118689e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.118689e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.769013 sec - 6,622,512,740 cycles # 2.389 GHz - 12,869,806,868 instructions # 1.94 insn per cycle - 2.774035451 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) +TOTAL : 2.699227 sec + 6,592,595,838 cycles # 2.439 GHz + 12,860,007,004 instructions # 1.95 insn per cycle + 2.704311392 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index c6afedfd67..c66b9e94cf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_23:16:25 +DATE: 2023-11-03_14:06:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.417354e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.176792e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.147986e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.319519e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.146808e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143902e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.434919 sec - 5,054,404,953 cycles # 3.056 GHz - 9,199,574,501 instructions # 1.82 insn per cycle - 1.711405816 seconds time elapsed +TOTAL : 1.448649 sec + 5,087,365,027 cycles # 3.052 GHz + 9,310,766,276 instructions # 1.83 insn per cycle + 1.725195666 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,16 +78,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.168343e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.383929e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.383929e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.155200e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.369267e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.369267e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.739834 sec - 17,800,816,655 cycles # 3.099 GHz - 43,614,163,599 instructions # 2.45 insn per cycle - 5.744981592 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.802015 sec + 17,844,581,528 cycles # 3.074 GHz + 43,556,997,966 instructions # 2.44 insn per cycle + 5.807155409 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 418) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -105,16 +104,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.399660e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.661286e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.661286e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.407034e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.683536e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.683536e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.980617 sec - 9,247,779,186 cycles # 3.098 GHz - 21,925,450,174 instructions # 2.37 insn per cycle - 2.985625850 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.971545 sec + 9,217,486,095 cycles # 3.097 GHz + 21,930,311,629 instructions # 2.38 insn per cycle + 2.976586238 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1924) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -132,16 +130,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.594289e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.975137e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.975137e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.600726e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.979250e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.979250e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.771450 sec - 8,292,170,915 cycles # 2.989 GHz - 15,591,583,098 instructions # 1.88 insn per cycle - 2.776542292 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) +TOTAL : 2.769304 sec + 8,276,687,518 cycles # 2.984 GHz + 15,585,955,312 instructions # 1.88 insn per cycle + 2.774287197 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2577) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,16 +156,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.627498e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.045740e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.045740e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.628217e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.043704e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.043704e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.742639 sec - 8,207,084,430 cycles # 2.988 GHz - 15,434,630,659 instructions # 1.88 insn per cycle - 2.747705166 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) +TOTAL : 2.739955 sec + 8,198,710,916 cycles # 2.988 GHz + 15,428,862,262 instructions # 1.88 insn per cycle + 2.744927346 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2472) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,16 +182,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.561763e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.934099e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.934099e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.363138e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.573284e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.573284e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.816193 sec - 6,657,072,695 cycles # 2.360 GHz - 12,869,581,963 instructions # 1.93 insn per cycle - 2.821211378 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) +TOTAL : 3.037682 sec + 6,617,551,728 cycles # 2.175 GHz + 12,861,059,438 instructions # 1.94 insn per cycle + 3.043153629 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 1fbb44ee5c..b24db0679a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_22:40:33 +DATE: 2023-11-03_13:12:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.642128e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.346376e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.326204e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.090597e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.086053e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.295941e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.551760 sec - 2,330,967,972 cycles # 3.025 GHz - 3,597,652,139 instructions # 1.54 insn per cycle - 0.827558698 seconds time elapsed +TOTAL : 0.595642 sec + 2,379,950,695 cycles # 2.907 GHz + 3,760,363,157 instructions # 1.58 insn per cycle + 0.889829124 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.251222e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.502030e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.502030e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.252100e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.506607e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.506607e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.380932 sec - 16,725,808,463 cycles # 3.106 GHz - 41,372,320,155 instructions # 2.47 insn per cycle - 5.385666902 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.378652 sec + 16,714,293,164 cycles # 3.105 GHz + 41,313,255,981 instructions # 2.47 insn per cycle + 5.386196800 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 362) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.465709e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.828337e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.828337e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.402586e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.722326e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.722326e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.909540 sec - 8,989,559,108 cycles # 3.085 GHz - 21,229,913,779 instructions # 2.36 insn per cycle - 2.914409561 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1841) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.985937 sec + 9,062,093,173 cycles # 3.030 GHz + 21,236,533,356 instructions # 2.34 insn per cycle + 3.002020734 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1829) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.611896e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.015253e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.015253e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.478980e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.815371e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.815371e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.758608 sec - 8,224,469,886 cycles # 2.977 GHz - 15,425,098,172 instructions # 1.88 insn per cycle - 2.763474022 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2536) (512y: 0) (512z: 0) +TOTAL : 2.903147 sec + 8,298,189,603 cycles # 2.854 GHz + 15,421,650,856 instructions # 1.86 insn per cycle + 2.919415247 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2518) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.672314e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.144716e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.144716e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.666988e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.139402e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.139402e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.700190 sec - 8,077,137,333 cycles # 2.987 GHz - 15,243,670,128 instructions # 1.89 insn per cycle - 2.705044215 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) +TOTAL : 2.706988 sec + 8,127,237,805 cycles # 2.997 GHz + 15,233,409,972 instructions # 1.87 insn per cycle + 2.719726531 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2399) (512y: 8) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.668548e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.123575e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.123575e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.671852e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.132790e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.132790e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.707559 sec - 6,594,740,119 cycles # 2.432 GHz - 12,847,978,977 instructions # 1.95 insn per cycle - 2.712589393 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1705) (512y: 18) (512z: 1427) +TOTAL : 2.707953 sec + 6,591,494,668 cycles # 2.430 GHz + 12,839,585,857 instructions # 1.95 insn per cycle + 2.719283692 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1681) (512y: 18) (512z: 1427) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 67d4c4703a..89d3cca091 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_22:59:57 +DATE: 2023-11-03_13:49:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.378185e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.223465e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.277552e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.311771e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188006e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.262468e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.564996 sec - 2,374,437,058 cycles # 3.015 GHz - 3,681,105,263 instructions # 1.55 insn per cycle - 0.844506702 seconds time elapsed +TOTAL : 0.576457 sec + 2,389,540,020 cycles # 2.950 GHz + 3,688,400,686 instructions # 1.54 insn per cycle + 0.867059057 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.758646e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.303527e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.303527e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.741075e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.279072e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.279072e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.935199 sec - 12,172,098,963 cycles # 3.090 GHz - 32,521,086,001 instructions # 2.67 insn per cycle - 3.940316830 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.966477 sec + 12,234,047,740 cycles # 3.081 GHz + 32,532,108,547 instructions # 2.66 insn per cycle + 3.971800905 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 299) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.825516e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.767678e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.767678e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.848238e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.804103e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.804103e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.583311 sec - 7,985,144,128 cycles # 3.086 GHz - 18,690,459,382 instructions # 2.34 insn per cycle - 2.588535770 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1554) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.569201 sec + 8,000,233,571 cycles # 3.109 GHz + 18,689,180,183 instructions # 2.34 insn per cycle + 2.574590277 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1542) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.954275e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.871745e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.871745e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.959458e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.878325e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.878325e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.483708 sec - 7,425,889,374 cycles # 2.985 GHz - 14,254,038,906 instructions # 1.92 insn per cycle - 2.488804683 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) +TOTAL : 2.477471 sec + 7,440,495,496 cycles # 2.998 GHz + 14,252,548,520 instructions # 1.92 insn per cycle + 2.482919486 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2219) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.010885e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.042869e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.042869e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.020321e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.073175e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.073175e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.441015 sec - 7,296,345,350 cycles # 2.984 GHz - 13,951,979,933 instructions # 1.91 insn per cycle - 2.446211141 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) +TOTAL : 2.430130 sec + 7,321,892,700 cycles # 3.008 GHz + 13,944,035,239 instructions # 1.90 insn per cycle + 2.435416551 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2072) (512y: 3) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.704736e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.262639e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.262639e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.722686e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.262580e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.262580e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.681048 sec - 6,545,536,751 cycles # 2.438 GHz - 13,421,408,114 instructions # 2.05 insn per cycle - 2.686191227 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) +TOTAL : 2.661980 sec + 6,503,812,696 cycles # 2.440 GHz + 13,425,346,953 instructions # 2.06 insn per cycle + 2.667536363 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2047) (512y: 1) (512z: 1198) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 19283f01af..15fa1a5fae 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_23:00:24 +DATE: 2023-11-03_13:49:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.382020e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.233872e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.306668e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.308009e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.196636e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.296666e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.567071 sec - 2,382,516,653 cycles # 3.027 GHz - 3,627,793,245 instructions # 1.52 insn per cycle - 0.846725967 seconds time elapsed +TOTAL : 0.571463 sec + 2,377,347,881 cycles # 2.993 GHz + 3,694,824,692 instructions # 1.55 insn per cycle + 0.851108222 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.226592e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.215249e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.215249e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.270957e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.300156e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.300156e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.194239 sec - 9,429,587,787 cycles # 2.955 GHz - 25,311,541,209 instructions # 2.68 insn per cycle - 3.199238329 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.134664 sec + 9,456,512,355 cycles # 3.013 GHz + 25,299,662,978 instructions # 2.68 insn per cycle + 3.139956692 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 250) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.045220e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.672632e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.672632e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.217435e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.962242e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.962242e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.438450 sec - 7,222,375,461 cycles # 2.957 GHz - 16,901,888,826 instructions # 2.34 insn per cycle - 2.443522059 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1359) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.315359 sec + 7,212,398,641 cycles # 3.109 GHz + 16,900,659,302 instructions # 2.34 insn per cycle + 2.320845440 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1347) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.117489e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.340330e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.340330e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.041323e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.188628e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.188628e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.367857 sec - 7,080,165,072 cycles # 2.985 GHz - 13,619,148,647 instructions # 1.92 insn per cycle - 2.373101167 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) +TOTAL : 2.427336 sec + 7,137,617,302 cycles # 2.935 GHz + 13,617,817,493 instructions # 1.91 insn per cycle + 2.432974579 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2042) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.158861e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.457557e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.457557e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.162263e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.461570e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.461570e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.340958 sec - 7,015,657,902 cycles # 2.991 GHz - 13,430,116,599 instructions # 1.91 insn per cycle - 2.346201265 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) +TOTAL : 2.336082 sec + 7,036,323,508 cycles # 3.006 GHz + 13,428,129,683 instructions # 1.91 insn per cycle + 2.341501579 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1921) (512y: 4) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.805452e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.487556e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.487556e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.792413e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.457184e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.457184e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.596913 sec - 6,326,204,144 cycles # 2.432 GHz - 13,152,923,009 instructions # 2.08 insn per cycle - 2.602133383 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) +TOTAL : 2.606747 sec + 6,335,932,322 cycles # 2.428 GHz + 13,156,642,126 instructions # 2.08 insn per cycle + 2.612124224 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2005) (512y: 1) (512z: 1083) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 2812ac0b53..c8a1826ac9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_22:41:02 +DATE: 2023-11-03_13:12:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.988074e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.909786e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.002115e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.420580e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.225288e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.974352e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.648456 sec - 2,594,556,028 cycles # 2.963 GHz - 4,040,130,517 instructions # 1.56 insn per cycle - 0.932684475 seconds time elapsed +TOTAL : 0.696522 sec + 2,828,368,151 cycles # 3.033 GHz + 4,459,128,504 instructions # 1.58 insn per cycle + 0.999812364 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.097041e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.280907e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.280907e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.118611e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.307197e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.307197e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.134086 sec - 18,716,855,271 cycles # 3.049 GHz - 44,287,379,187 instructions # 2.37 insn per cycle - 6.139244754 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.013136 sec + 18,776,569,958 cycles # 3.120 GHz + 44,190,473,826 instructions # 2.35 insn per cycle + 6.021440922 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 426) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.751235e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.312434e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.312434e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.736065e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.298893e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.298893e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.986301 sec - 12,329,366,151 cycles # 3.090 GHz - 30,959,681,512 instructions # 2.51 insn per cycle - 3.991578409 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.023958 sec + 12,426,947,550 cycles # 3.084 GHz + 30,958,210,972 instructions # 2.49 insn per cycle + 4.038824000 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1673) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.078430e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.890762e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.890762e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.009814e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.796099e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.796099e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.414968 sec - 10,142,587,821 cycles # 2.967 GHz - 19,399,691,686 instructions # 1.91 insn per cycle - 3.420034144 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) +TOTAL : 3.529040 sec + 10,150,198,206 cycles # 2.872 GHz + 19,380,941,090 instructions # 1.91 insn per cycle + 3.542539797 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.176833e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.087447e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.087447e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.071616e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.920585e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.920585e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.276978 sec - 9,769,804,904 cycles # 2.978 GHz - 18,982,166,653 instructions # 1.94 insn per cycle - 3.282084297 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1859) (512y: 188) (512z: 0) +TOTAL : 3.439842 sec + 9,801,969,382 cycles # 2.845 GHz + 18,964,241,159 instructions # 1.93 insn per cycle + 3.452522056 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1835) (512y: 188) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.950559e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.628157e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.628157e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.835716e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.462614e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.462614e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.618415 sec - 8,360,946,549 cycles # 2.308 GHz - 15,064,923,745 instructions # 1.80 insn per cycle - 3.623523733 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1023) (512y: 155) (512z: 1316) +TOTAL : 3.831470 sec + 8,408,634,839 cycles # 2.192 GHz + 15,052,624,754 instructions # 1.79 insn per cycle + 3.846787066 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 999) (512y: 155) (512z: 1316) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 995b7d3ff6..b5a3475756 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-10-30_22:41:35 +DATE: 2023-11-03_13:13:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.003728e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.948322e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.086359e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.422335e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.236601e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.020207e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.644574 sec - 2,626,626,234 cycles # 3.016 GHz - 4,025,842,605 instructions # 1.53 insn per cycle - 0.931663719 seconds time elapsed +TOTAL : 0.691416 sec + 2,819,877,381 cycles # 3.039 GHz + 4,403,256,288 instructions # 1.56 insn per cycle + 1.000837396 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -76,16 +76,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.152263e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.357614e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.357614e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.161899e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.369033e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.369033e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.857814 sec - 17,950,593,870 cycles # 3.062 GHz - 42,536,443,801 instructions # 2.37 insn per cycle - 5.862908022 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.806095 sec + 18,032,240,191 cycles # 3.104 GHz + 42,441,543,294 instructions # 2.35 insn per cycle + 5.813826022 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.781795e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.366816e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.366816e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.753631e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.336727e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.336727e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.925776 sec - 12,158,268,283 cycles # 3.094 GHz - 30,266,774,018 instructions # 2.49 insn per cycle - 3.930856861 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.992523 sec + 12,184,645,859 cycles # 3.048 GHz + 30,264,428,187 instructions # 2.48 insn per cycle + 4.007432791 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1680) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.107093e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.951411e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.951411e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.094154e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.931089e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.931089e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.373013 sec - 10,033,049,034 cycles # 2.971 GHz - 19,281,562,841 instructions # 1.92 insn per cycle - 3.378152041 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2162) (512y: 0) (512z: 0) +TOTAL : 3.390247 sec + 10,038,085,304 cycles # 2.956 GHz + 19,265,024,477 instructions # 1.92 insn per cycle + 3.403373491 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2144) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.193939e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.106247e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.106247e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.206540e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.140055e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.140055e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.253124 sec - 9,643,205,186 cycles # 2.961 GHz - 18,781,851,111 instructions # 1.95 insn per cycle - 3.258335142 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1833) (512y: 191) (512z: 0) +TOTAL : 3.238082 sec + 9,649,245,798 cycles # 2.975 GHz + 18,764,779,113 instructions # 1.94 insn per cycle + 3.251590176 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 191) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.953784e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.646715e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.646715e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.991486e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.705796e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.705796e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.613479 sec - 8,269,801,205 cycles # 2.286 GHz - 14,988,618,301 instructions # 1.81 insn per cycle - 3.618555203 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1020) (512y: 156) (512z: 1305) +TOTAL : 3.545115 sec + 8,268,426,013 cycles # 2.329 GHz + 14,974,356,151 instructions # 1.81 insn per cycle + 3.557224656 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 996) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index d91837223d..86b81b3912 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_22:42:07 +DATE: 2023-11-03_13:13:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.280545e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.182594e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.276893e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.014510e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.133907e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.272655e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.509947 sec - 2,245,406,404 cycles # 3.004 GHz - 3,238,247,605 instructions # 1.44 insn per cycle - 0.806066214 seconds time elapsed +TOTAL : 0.524203 sec + 2,315,774,867 cycles # 3.023 GHz + 3,262,855,573 instructions # 1.41 insn per cycle + 0.842689521 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.122811e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.185621e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.185621e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.188691e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.254188e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.254188e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.047558 sec - 15,156,486,389 cycles # 3.002 GHz - 38,440,455,348 instructions # 2.54 insn per cycle - 5.052769245 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.894044 sec + 15,078,170,131 cycles # 3.078 GHz + 38,412,995,351 instructions # 2.55 insn per cycle + 4.902013341 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.702211e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.903740e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.903740e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.733567e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.939331e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.939331e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.936396 sec - 9,105,668,983 cycles # 3.097 GHz - 24,592,502,332 instructions # 2.70 insn per cycle - 2.941522608 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.910573 sec + 9,058,606,298 cycles # 3.107 GHz + 24,587,125,160 instructions # 2.71 insn per cycle + 2.925346801 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.960855e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.471891e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.471891e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.937642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.460811e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.460811e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.862437 sec - 5,450,005,076 cycles # 2.919 GHz - 11,265,361,123 instructions # 2.07 insn per cycle - 1.867565340 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) +TOTAL : 1.867147 sec + 5,466,849,614 cycles # 2.919 GHz + 11,257,507,450 instructions # 2.06 insn per cycle + 1.885058814 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2358) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.618799e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.273168e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.273168e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.384127e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.004039e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.004039e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.686602 sec - 4,953,592,768 cycles # 2.929 GHz - 10,570,210,152 instructions # 2.13 insn per cycle - 1.691821851 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) +TOTAL : 1.746179 sec + 4,957,706,445 cycles # 2.830 GHz + 10,563,056,082 instructions # 2.13 insn per cycle + 1.758886631 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2053) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.081730e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.316605e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.316605e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.105961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.350897e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.350897e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.673039 sec - 5,387,031,443 cycles # 2.012 GHz - 7,805,348,613 instructions # 1.45 insn per cycle - 2.678234005 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) +TOTAL : 2.655795 sec + 5,371,604,588 cycles # 2.019 GHz + 7,798,605,112 instructions # 1.45 insn per cycle + 2.670392027 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1422) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 5370f7c935..4806d829cd 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:10:39 +DATE: 2023-11-03_14:00:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.655944e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.023463e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.023463e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.553961e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.874598e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.874598e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.795812 sec - 3,140,135,292 cycles # 3.032 GHz - 4,880,880,783 instructions # 1.55 insn per cycle - 1.093165843 seconds time elapsed +TOTAL : 0.804780 sec + 3,169,048,208 cycles # 3.028 GHz + 4,880,834,292 instructions # 1.54 insn per cycle + 1.104220884 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -85,16 +85,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.190219e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.253906e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.253906e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.184144e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.247953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.247953e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.971251 sec - 15,474,705,468 cycles # 3.109 GHz - 38,496,723,540 instructions # 2.49 insn per cycle - 4.977932153 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.978186 sec + 15,425,389,553 cycles # 3.095 GHz + 38,473,246,448 instructions # 2.49 insn per cycle + 4.984877201 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -113,16 +112,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.690707e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.889062e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.889062e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.689714e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.892865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.892865e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.021057 sec - 9,434,067,456 cycles # 3.117 GHz - 24,774,366,858 instructions # 2.63 insn per cycle - 3.027514850 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.021358 sec + 9,403,258,777 cycles # 3.107 GHz + 24,770,539,388 instructions # 2.63 insn per cycle + 3.028074019 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,16 +139,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.629438e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.092140e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.092140e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.853118e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.356995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.356995e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.049046 sec - 5,805,137,288 cycles # 2.825 GHz - 11,552,400,768 instructions # 1.99 insn per cycle - 2.055626829 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) +TOTAL : 1.969874 sec + 5,810,125,038 cycles # 2.941 GHz + 11,543,676,632 instructions # 1.99 insn per cycle + 1.976428599 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2358) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -169,16 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.394764e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.988019e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.988019e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.501103e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.131136e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.131136e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.821677 sec - 5,305,081,925 cycles # 2.904 GHz - 10,857,512,913 instructions # 2.05 insn per cycle - 1.828081824 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) +TOTAL : 1.790886 sec + 5,296,213,039 cycles # 2.947 GHz + 10,850,736,565 instructions # 2.05 insn per cycle + 1.797661712 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2053) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -197,16 +193,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.993136e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.216683e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.216683e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.013399e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.249522e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.249522e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.811888 sec - 5,748,853,229 cycles # 2.042 GHz - 8,049,831,971 instructions # 1.40 insn per cycle - 2.818538068 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) +TOTAL : 2.796200 sec + 5,717,704,345 cycles # 2.041 GHz + 8,043,694,509 instructions # 1.41 insn per cycle + 2.803146814 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1422) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index bb48e05236..3d7587af5d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:23:24 +DATE: 2023-11-03_14:13:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.736795e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.161054e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.269806e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.580980e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155500e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270417e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.615761 sec - 2,516,032,944 cycles # 2.984 GHz - 3,646,627,135 instructions # 1.45 insn per cycle - 0.900193240 seconds time elapsed +TOTAL : 0.614016 sec + 2,545,446,825 cycles # 3.021 GHz + 3,704,672,520 instructions # 1.46 insn per cycle + 0.900418326 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.191283e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.255076e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.255076e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.195203e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.261582e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.261582e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.949371 sec - 15,346,914,541 cycles # 3.098 GHz - 38,453,059,865 instructions # 2.51 insn per cycle - 4.954545090 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.938760 sec + 15,225,135,904 cycles # 3.080 GHz + 38,429,365,378 instructions # 2.52 insn per cycle + 4.944153491 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.699240e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.903594e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.903594e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.730675e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.936865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.936865e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.998035 sec - 9,284,549,452 cycles # 3.092 GHz - 24,590,432,125 instructions # 2.65 insn per cycle - 3.003369046 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.970349 sec + 9,231,773,469 cycles # 3.103 GHz + 24,585,772,995 instructions # 2.66 insn per cycle + 2.975730961 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.897632e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.421703e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.421703e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.941104e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.468310e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.468310e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.941152 sec - 5,665,929,312 cycles # 2.912 GHz - 11,247,969,812 instructions # 1.99 insn per cycle - 1.946452907 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) +TOTAL : 1.923604 sec + 5,670,197,148 cycles # 2.941 GHz + 11,239,356,426 instructions # 1.98 insn per cycle + 1.928877172 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2358) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.630334e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.280700e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.280700e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.637555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.290130e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.290130e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.745934 sec - 5,133,074,927 cycles # 2.932 GHz - 10,520,747,164 instructions # 2.05 insn per cycle - 1.751320373 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) +TOTAL : 1.739458 sec + 5,136,547,861 cycles # 2.945 GHz + 10,513,142,920 instructions # 2.05 insn per cycle + 1.744858969 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2053) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.918342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.141024e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.141024e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.031295e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.258517e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.258517e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.843108 sec - 5,573,130,501 cycles # 1.957 GHz - 7,754,567,913 instructions # 1.39 insn per cycle - 2.848428437 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) +TOTAL : 2.760441 sec + 5,552,660,719 cycles # 2.009 GHz + 7,747,208,324 instructions # 1.40 insn per cycle + 2.765757782 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1422) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index 3fd09fe61e..a96e6be131 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:20:08 +DATE: 2023-11-03_14:10:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.737517e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.161761e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270287e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.583700e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.153230e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.267951e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.551873 sec - 2,358,028,619 cycles # 2.998 GHz - 3,692,132,213 instructions # 1.57 insn per cycle - 0.846295423 seconds time elapsed +TOTAL : 0.558026 sec + 2,313,505,925 cycles # 2.945 GHz + 3,521,498,010 instructions # 1.52 insn per cycle + 0.845028446 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.184443e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.248148e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.248148e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.178518e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.242243e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.242243e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.904538 sec - 15,160,595,332 cycles # 3.088 GHz - 38,436,526,776 instructions # 2.54 insn per cycle - 4.910021872 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.915746 sec + 15,058,217,872 cycles # 3.061 GHz + 38,413,553,191 instructions # 2.55 insn per cycle + 4.921088360 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.693975e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.894111e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.894111e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.728983e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.933272e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.933272e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.943132 sec - 9,120,478,043 cycles # 3.094 GHz - 24,590,860,131 instructions # 2.70 insn per cycle - 2.948509619 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.913226 sec + 9,056,484,610 cycles # 3.105 GHz + 24,585,808,961 instructions # 2.71 insn per cycle + 2.918420692 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.916332e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.436269e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.436269e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.777415e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.279308e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.279308e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.877491 sec - 5,483,595,754 cycles # 2.914 GHz - 11,265,750,688 instructions # 2.05 insn per cycle - 1.882860646 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) +TOTAL : 1.916389 sec + 5,465,259,492 cycles # 2.845 GHz + 11,256,916,542 instructions # 2.06 insn per cycle + 1.921736653 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2358) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.553762e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.202447e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.202447e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.574998e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.221322e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.221322e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.704603 sec - 4,969,259,538 cycles # 2.907 GHz - 10,571,999,117 instructions # 2.13 insn per cycle - 1.710109506 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) +TOTAL : 1.694918 sec + 4,974,823,544 cycles # 2.928 GHz + 10,564,942,659 instructions # 2.12 insn per cycle + 1.700257525 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2053) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.915873e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.132745e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.132745e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.100546e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.341902e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.341902e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.784277 sec - 5,389,942,641 cycles # 1.933 GHz - 7,806,239,729 instructions # 1.45 insn per cycle - 2.789657703 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) +TOTAL : 2.657429 sec + 5,396,015,009 cycles # 2.027 GHz + 7,797,762,335 instructions # 1.45 insn per cycle + 2.662830126 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1422) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index d894a954b8..854bbd9f01 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:16:56 +DATE: 2023-11-03_14:06:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.718006e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.158908e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.267607e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.828614e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.157784e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273520e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.705827 sec - 2,822,729,450 cycles # 3.034 GHz - 4,418,624,178 instructions # 1.57 insn per cycle - 0.990011938 seconds time elapsed +TOTAL : 0.705831 sec + 2,807,624,090 cycles # 3.012 GHz + 4,395,187,189 instructions # 1.57 insn per cycle + 0.990782968 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,16 +78,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.186659e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.250039e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.250039e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.204483e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.269768e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.269768e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.899766 sec - 15,151,167,976 cycles # 3.089 GHz - 38,437,118,151 instructions # 2.54 insn per cycle - 4.905336258 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.857298 sec + 15,060,047,176 cycles # 3.098 GHz + 38,412,999,521 instructions # 2.55 insn per cycle + 4.862576188 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -105,16 +104,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.632977e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.833738e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.833738e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.719132e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.926557e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.926557e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.990641 sec - 9,104,000,531 cycles # 3.040 GHz - 24,591,172,861 instructions # 2.70 insn per cycle - 2.995857286 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.921866 sec + 9,063,981,939 cycles # 3.098 GHz + 24,586,733,507 instructions # 2.71 insn per cycle + 2.927309733 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -132,16 +130,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.956023e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.485444e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.485444e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.836602e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.339797e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.339797e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.865029 sec - 5,485,610,630 cycles # 2.934 GHz - 11,265,262,528 instructions # 2.05 insn per cycle - 1.870379155 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) +TOTAL : 1.897012 sec + 5,477,510,588 cycles # 2.881 GHz + 11,256,805,143 instructions # 2.06 insn per cycle + 1.902283714 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2358) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,16 +156,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.617372e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.266189e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.266189e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.588100e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.236111e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.236111e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.689765 sec - 4,937,700,060 cycles # 2.914 GHz - 10,571,493,395 instructions # 2.14 insn per cycle - 1.695143677 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) +TOTAL : 1.691889 sec + 4,967,386,818 cycles # 2.928 GHz + 10,562,108,549 instructions # 2.13 insn per cycle + 1.697331756 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2053) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,16 +182,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.099579e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.343259e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.343259e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.004753e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.234505e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.234505e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.663038 sec - 5,397,595,899 cycles # 2.025 GHz - 7,805,657,902 instructions # 1.45 insn per cycle - 2.668426378 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) +TOTAL : 2.721009 sec + 5,373,397,763 cycles # 1.972 GHz + 7,799,884,708 instructions # 1.45 insn per cycle + 2.726823881 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1422) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index d3de8228b3..097514ff9d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_22:42:34 +DATE: 2023-11-03_13:14:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.271764e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.178316e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270685e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.009931e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.129778e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.265439e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.509733 sec - 2,227,538,194 cycles # 3.008 GHz - 3,225,921,656 instructions # 1.45 insn per cycle - 0.797383843 seconds time elapsed +TOTAL : 0.527424 sec + 2,299,489,431 cycles # 3.016 GHz + 3,267,542,464 instructions # 1.42 insn per cycle + 0.837651469 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.204961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.270121e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.270121e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.224210e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.289856e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.289856e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.859233 sec - 15,030,887,746 cycles # 3.091 GHz - 40,165,178,900 instructions # 2.67 insn per cycle - 4.864240792 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.815616 sec + 14,985,071,290 cycles # 3.108 GHz + 40,140,432,395 instructions # 2.68 insn per cycle + 4.823429688 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.793268e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.013838e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.013838e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.945464e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.176814e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176814e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.868683 sec - 8,677,905,475 cycles # 3.025 GHz - 23,687,254,858 instructions # 2.73 insn per cycle - 2.873954741 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2069) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.760471 sec + 8,597,514,748 cycles # 3.108 GHz + 23,680,256,209 instructions # 2.75 insn per cycle + 2.775860119 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2057) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.272133e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.685740e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.685740e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.299934e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.716170e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.716170e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.093033 sec - 6,095,283,841 cycles # 2.906 GHz - 13,076,094,976 instructions # 2.15 insn per cycle - 2.098361435 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) +TOTAL : 2.079179 sec + 6,095,655,744 cycles # 2.924 GHz + 13,066,539,755 instructions # 2.14 insn per cycle + 2.094091215 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2528) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.561590e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.013027e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.013027e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.576810e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.033591e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.033591e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.988737 sec - 5,807,094,296 cycles # 2.913 GHz - 12,333,429,397 instructions # 2.12 insn per cycle - 1.994110135 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 294) (512z: 0) +TOTAL : 1.980357 sec + 5,802,077,453 cycles # 2.922 GHz + 12,325,865,262 instructions # 2.12 insn per cycle + 1.992793326 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2072) (512y: 294) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.770161e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.968869e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.968869e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.772958e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.973447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.973447e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.883990 sec - 5,806,828,747 cycles # 2.010 GHz - 9,613,660,096 instructions # 1.66 insn per cycle - 2.889230202 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1510) (512y: 209) (512z: 1971) +TOTAL : 2.881095 sec + 5,824,338,366 cycles # 2.018 GHz + 9,607,054,312 instructions # 1.65 insn per cycle + 2.894349646 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1486) (512y: 209) (512z: 1971) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 30aaca97e3..3d76a3b1d9 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:00:49 +DATE: 2023-11-03_13:50:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.730699e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.161168e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.269447e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.578660e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.159605e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274665e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.516638 sec - 2,238,038,759 cycles # 3.003 GHz - 3,164,086,777 instructions # 1.41 insn per cycle - 0.804338452 seconds time elapsed +TOTAL : 0.519206 sec + 2,269,803,995 cycles # 3.023 GHz + 3,192,192,477 instructions # 1.41 insn per cycle + 0.807995850 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.561892e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.650324e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.650324e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.560816e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.649704e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.649704e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.199726 sec - 13,022,340,636 cycles # 3.098 GHz - 34,407,560,979 instructions # 2.64 insn per cycle - 4.205140576 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.195191 sec + 13,007,382,962 cycles # 3.097 GHz + 34,380,464,155 instructions # 2.64 insn per cycle + 4.200870709 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.072749e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.213016e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.213016e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.116747e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.258719e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.258719e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.519661 sec - 10,617,291,793 cycles # 3.015 GHz - 24,026,447,750 instructions # 2.26 insn per cycle - 3.525235539 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.466705 sec + 10,603,168,617 cycles # 3.057 GHz + 24,017,012,918 instructions # 2.27 insn per cycle + 3.472435796 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.849441e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.192796e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.192796e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.867142e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.209093e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.209093e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.266906 sec - 6,599,977,627 cycles # 2.905 GHz - 12,414,641,303 instructions # 1.88 insn per cycle - 2.272301949 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3156) (512y: 0) (512z: 0) +TOTAL : 2.254766 sec + 6,618,154,747 cycles # 2.929 GHz + 12,407,626,824 instructions # 1.87 insn per cycle + 2.260283575 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3138) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.137878e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.521965e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.521965e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.186031e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.579450e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.579450e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.143814 sec - 6,256,595,870 cycles # 2.912 GHz - 11,588,487,458 instructions # 1.85 insn per cycle - 2.149310332 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2692) (512y: 239) (512z: 0) +TOTAL : 2.122627 sec + 6,235,587,637 cycles # 2.931 GHz + 11,579,392,375 instructions # 1.86 insn per cycle + 2.128339591 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2668) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.124446e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.363360e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.363360e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.159380e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.403563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.403563e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.646562 sec - 5,351,277,730 cycles # 2.019 GHz - 9,309,073,211 instructions # 1.74 insn per cycle - 2.651978158 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2116) (512y: 282) (512z: 1958) +TOTAL : 2.621991 sec + 5,330,386,751 cycles # 2.029 GHz + 9,301,874,338 instructions # 1.75 insn per cycle + 2.627733477 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2092) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index ce4e266692..eda142a51a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:01:17 +DATE: 2023-11-03_13:50:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.729487e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.162433e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270183e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.570782e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.152219e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.266830e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.516858 sec - 2,237,295,030 cycles # 2.996 GHz - 3,174,562,297 instructions # 1.42 insn per cycle - 0.805124267 seconds time elapsed +TOTAL : 0.518066 sec + 2,271,828,197 cycles # 3.023 GHz + 3,244,066,512 instructions # 1.43 insn per cycle + 0.808642452 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.696103e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.793176e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.793176e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.712334e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.810278e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.810278e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.993532 sec - 12,372,967,111 cycles # 3.095 GHz - 35,058,726,353 instructions # 2.83 insn per cycle - 3.998904701 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.969060 sec + 12,332,764,240 cycles # 3.103 GHz + 35,033,157,193 instructions # 2.84 insn per cycle + 3.974712010 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 444) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.074608e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.212813e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.212813e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.128417e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.275024e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.275024e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.515611 sec - 10,695,053,716 cycles # 3.038 GHz - 23,101,257,936 instructions # 2.16 insn per cycle - 3.521049881 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.453917 sec + 10,684,583,355 cycles # 3.089 GHz + 23,090,903,403 instructions # 2.16 insn per cycle + 3.459546938 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.247576e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.650304e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.650304e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.247993e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.651435e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.651435e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.101997 sec - 6,159,177,623 cycles # 2.924 GHz - 11,969,990,350 instructions # 1.94 insn per cycle - 2.107463553 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2511) (512y: 0) (512z: 0) +TOTAL : 2.098516 sec + 6,166,838,461 cycles # 2.931 GHz + 11,963,065,848 instructions # 1.94 insn per cycle + 2.104346902 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2493) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.036588e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.425695e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.425695e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.231478e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.639448e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.639448e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.188313 sec - 6,014,477,005 cycles # 2.742 GHz - 11,143,899,041 instructions # 1.85 insn per cycle - 2.193874742 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 174) (512z: 0) +TOTAL : 2.106732 sec + 6,010,784,913 cycles # 2.846 GHz + 11,136,301,440 instructions # 1.85 insn per cycle + 2.112515993 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2104) (512y: 174) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.156544e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.400305e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.400305e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.031094e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.264647e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.264647e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.627559 sec - 5,253,056,551 cycles # 1.996 GHz - 9,033,435,385 instructions # 1.72 insn per cycle - 2.633049022 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1567) +TOTAL : 2.703137 sec + 5,239,430,875 cycles # 1.935 GHz + 9,026,740,696 instructions # 1.72 insn per cycle + 2.708889136 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1627) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 6da43fd641..29c708926a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_22:43:01 +DATE: 2023-11-03_13:14:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.106542e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.720684e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.980250e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.196933e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.559127e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.926612e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.468593 sec - 2,057,005,401 cycles # 2.993 GHz - 2,917,040,882 instructions # 1.42 insn per cycle - 0.745160706 seconds time elapsed +TOTAL : 0.487537 sec + 2,076,343,494 cycles # 2.909 GHz + 2,953,975,768 instructions # 1.42 insn per cycle + 0.786754642 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.329290e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.404591e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.404591e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.364053e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.441665e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.441665e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.586315 sec - 14,172,393,957 cycles # 3.087 GHz - 38,391,636,412 instructions # 2.71 insn per cycle - 4.591225371 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.516750 sec + 14,091,556,001 cycles # 3.117 GHz + 38,372,630,771 instructions # 2.72 insn per cycle + 4.524612799 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.089249e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.493979e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.493979e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.244340e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.673304e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.673304e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.144092 sec - 6,471,307,430 cycles # 3.012 GHz - 15,829,826,382 instructions # 2.45 insn per cycle - 2.149125793 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.081299 sec + 6,470,101,875 cycles # 3.101 GHz + 15,825,349,191 instructions # 2.45 insn per cycle + 2.096048134 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2677) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.905728e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.021084e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.021084e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.485632e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.088212e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.088212e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.264061 sec - 3,460,763,676 cycles # 2.728 GHz - 7,607,180,903 instructions # 2.20 insn per cycle - 1.269495301 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) +TOTAL : 1.189119 sec + 3,458,210,229 cycles # 2.896 GHz + 7,599,552,591 instructions # 2.20 insn per cycle + 1.205083556 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3033) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.027805e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.193250e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.193250e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.016158e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.185590e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185590e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.102476 sec - 3,246,168,576 cycles # 2.933 GHz - 7,215,794,390 instructions # 2.22 insn per cycle - 1.107709326 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) +TOTAL : 1.114621 sec + 3,253,189,150 cycles # 2.904 GHz + 7,207,637,696 instructions # 2.22 insn per cycle + 1.131215020 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2826) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.600644e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.456495e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.456495e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.475166e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.307198e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.307198e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.462918 sec - 3,063,851,596 cycles # 2.088 GHz - 5,846,117,154 instructions # 1.91 insn per cycle - 1.467990395 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) +TOTAL : 1.483668 sec + 3,072,167,998 cycles # 2.063 GHz + 5,838,885,052 instructions # 1.90 insn per cycle + 1.496320265 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2340) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 7f7e1d606d..25ff2590ad 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:11:07 +DATE: 2023-11-03_14:00:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.366832e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.822977e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.822977e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.151426e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.469883e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.469883e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.653541 sec - 2,694,324,767 cycles # 3.042 GHz - 4,193,830,258 instructions # 1.56 insn per cycle - 0.942698763 seconds time elapsed +TOTAL : 0.663313 sec + 2,706,340,617 cycles # 3.037 GHz + 4,165,256,765 instructions # 1.54 insn per cycle + 0.947646628 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -85,16 +85,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.342376e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.418135e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.418135e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.339221e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.415226e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.415226e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.602060 sec - 14,334,608,868 cycles # 3.111 GHz - 38,435,670,602 instructions # 2.68 insn per cycle - 4.608235736 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.604104 sec + 14,275,638,221 cycles # 3.097 GHz + 38,413,171,238 instructions # 2.69 insn per cycle + 4.610228258 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -113,16 +112,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.213893e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.643183e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.643183e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.196913e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.615655e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.615655e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.139790 sec - 6,672,789,863 cycles # 3.111 GHz - 16,109,837,908 instructions # 2.41 insn per cycle - 2.145928460 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.144006 sec + 6,663,102,745 cycles # 3.100 GHz + 16,104,440,418 instructions # 2.42 insn per cycle + 2.150194082 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2677) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -141,16 +139,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.467509e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.085520e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.085520e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.436831e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.082724e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082724e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.235887 sec - 3,669,653,224 cycles # 2.956 GHz - 7,844,588,977 instructions # 2.14 insn per cycle - 1.241993093 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) +TOTAL : 1.235899 sec + 3,661,958,938 cycles # 2.949 GHz + 7,836,314,271 instructions # 2.14 insn per cycle + 1.242239596 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3033) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -169,16 +166,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.015093e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.176261e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.176261e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.010128e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.170283e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.170283e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.161149 sec - 3,452,043,722 cycles # 2.960 GHz - 7,453,312,449 instructions # 2.16 insn per cycle - 1.167242704 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) +TOTAL : 1.161869 sec + 3,443,334,261 cycles # 2.950 GHz + 7,445,199,482 instructions # 2.16 insn per cycle + 1.168103439 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2826) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -197,16 +193,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.587543e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.430774e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.430774e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.508269e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.333771e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.333771e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.510256 sec - 3,262,706,450 cycles # 2.153 GHz - 6,100,300,493 instructions # 1.87 insn per cycle - 1.516346990 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) +TOTAL : 1.523419 sec + 3,270,348,443 cycles # 2.140 GHz + 6,093,572,486 instructions # 1.86 insn per cycle + 1.529595531 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2340) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 4c350b0716..cb54cefed4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:23:51 +DATE: 2023-11-03_14:13:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.801264e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.639438e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.941307e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.434441e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.625386e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.947980e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.557664 sec - 2,331,543,515 cycles # 2.993 GHz - 3,413,686,462 instructions # 1.46 insn per cycle - 0.836667519 seconds time elapsed +TOTAL : 0.560461 sec + 2,380,757,395 cycles # 3.021 GHz + 3,460,383,265 instructions # 1.45 insn per cycle + 0.845514302 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.279854e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.351747e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.351747e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.362534e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.440173e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.440173e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.736031 sec - 14,322,445,358 cycles # 3.022 GHz - 38,422,566,575 instructions # 2.68 insn per cycle - 4.740915131 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.573205 sec + 14,247,235,365 cycles # 3.112 GHz + 38,399,348,971 instructions # 2.70 insn per cycle + 4.578265833 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.166961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.588776e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.588776e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.253010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.682916e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.682916e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.167189 sec - 6,643,980,938 cycles # 3.061 GHz - 15,842,875,312 instructions # 2.38 insn per cycle - 2.172258926 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.129666 sec + 6,633,670,352 cycles # 3.109 GHz + 15,836,863,697 instructions # 2.39 insn per cycle + 2.134853355 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2677) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.593001e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.102557e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.102557e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.517990e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.095157e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.095157e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.228870 sec - 3,628,474,043 cycles # 2.942 GHz - 7,590,788,545 instructions # 2.09 insn per cycle - 1.233998812 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) +TOTAL : 1.234639 sec + 3,623,239,445 cycles # 2.924 GHz + 7,583,203,150 instructions # 2.09 insn per cycle + 1.239691986 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3033) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.019085e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.186087e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.186087e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.015481e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.177825e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.177825e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.166612 sec - 3,422,071,573 cycles # 2.922 GHz - 7,166,231,514 instructions # 2.09 insn per cycle - 1.171724120 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) +TOTAL : 1.166274 sec + 3,418,214,243 cycles # 2.920 GHz + 7,158,770,979 instructions # 2.09 insn per cycle + 1.171464201 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2826) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.583272e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.437719e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.437719e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.405756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.247163e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.247163e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.521831 sec - 3,238,239,834 cycles # 2.122 GHz - 5,797,083,051 instructions # 1.79 insn per cycle - 1.527076370 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) +TOTAL : 1.551800 sec + 3,227,998,686 cycles # 2.074 GHz + 5,789,283,627 instructions # 1.79 insn per cycle + 1.557068256 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2340) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index c1a2ea0227..87845a19d9 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:20:34 +DATE: 2023-11-03_14:10:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.862897e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.663387e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.966562e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.488390e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.644453e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.964646e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.501383 sec - 2,181,381,336 cycles # 3.006 GHz - 3,424,313,283 instructions # 1.57 insn per cycle - 0.782844227 seconds time elapsed +TOTAL : 0.506827 sec + 2,189,987,943 cycles # 3.026 GHz + 3,436,928,296 instructions # 1.57 insn per cycle + 0.782775854 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.331003e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.407484e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.407484e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.358375e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.435124e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.435124e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.582165 sec - 14,152,562,935 cycles # 3.086 GHz - 38,392,294,188 instructions # 2.71 insn per cycle - 4.587123865 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.526256 sec + 14,080,847,145 cycles # 3.108 GHz + 38,369,987,972 instructions # 2.72 insn per cycle + 4.531177368 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.203403e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.628400e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.628400e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.256589e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.691470e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.691470e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.100151 sec - 6,477,299,825 cycles # 3.078 GHz - 15,829,339,843 instructions # 2.44 insn per cycle - 2.105215947 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.076070 sec + 6,465,651,360 cycles # 3.110 GHz + 15,824,795,059 instructions # 2.45 insn per cycle + 2.081190945 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2677) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.592522e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.100397e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.100397e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.620688e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.105701e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.105701e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.176672 sec - 3,456,621,724 cycles # 2.927 GHz - 7,606,538,399 instructions # 2.20 insn per cycle - 1.181720411 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) +TOTAL : 1.170267 sec + 3,454,298,635 cycles # 2.941 GHz + 7,599,254,165 instructions # 2.20 insn per cycle + 1.175519026 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3033) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.017882e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.180154e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.180154e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.687626e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.121348e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.121348e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.113532 sec - 3,248,683,790 cycles # 2.906 GHz - 7,215,461,932 instructions # 2.22 insn per cycle - 1.118566030 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) +TOTAL : 1.164248 sec + 3,254,264,032 cycles # 2.784 GHz + 7,207,984,280 instructions # 2.21 insn per cycle + 1.169514366 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2826) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.292205e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.091909e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.091909e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.603116e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.469723e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.469723e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.523124 sec - 3,068,382,625 cycles # 2.009 GHz - 5,846,492,637 instructions # 1.91 insn per cycle - 1.528127506 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) +TOTAL : 1.459450 sec + 3,063,578,056 cycles # 2.093 GHz + 5,838,117,130 instructions # 1.91 insn per cycle + 1.464612785 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2340) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 7f6619cd33..2fabea2bcd 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:17:22 +DATE: 2023-11-03_14:07:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.174030e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.645464e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.946490e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.102411e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.633947e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.952715e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.601645 sec - 2,504,719,694 cycles # 3.039 GHz - 3,887,630,578 instructions # 1.55 insn per cycle - 0.883248378 seconds time elapsed +TOTAL : 0.604346 sec + 2,499,145,476 cycles # 3.034 GHz + 3,893,994,904 instructions # 1.56 insn per cycle + 0.881470321 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,16 +78,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.347093e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.424329e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.424329e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.356009e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.433242e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.433242e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.550575 sec - 14,154,277,861 cycles # 3.108 GHz - 38,392,177,110 instructions # 2.71 insn per cycle - 4.555522154 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.530402 sec + 14,084,276,645 cycles # 3.106 GHz + 38,370,392,611 instructions # 2.72 insn per cycle + 4.535673091 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -105,16 +104,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.252432e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.682421e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.682421e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.264298e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.695949e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.695949e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.079439 sec - 6,469,167,580 cycles # 3.105 GHz - 15,829,428,699 instructions # 2.45 insn per cycle - 2.084347706 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.072861 sec + 6,463,221,241 cycles # 3.111 GHz + 15,824,115,222 instructions # 2.45 insn per cycle + 2.077965757 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2677) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -132,16 +130,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.637369e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.106605e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.106605e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.557333e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.094837e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.094837e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.170723 sec - 3,458,271,351 cycles # 2.943 GHz - 7,606,680,804 instructions # 2.20 insn per cycle - 1.175720703 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) +TOTAL : 1.177853 sec + 3,454,695,002 cycles # 2.922 GHz + 7,599,092,976 instructions # 2.20 insn per cycle + 1.182951114 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3033) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,16 +156,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.034498e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.200618e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.200618e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.033926e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.201633e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.201633e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.095956 sec - 3,250,338,121 cycles # 2.954 GHz - 7,214,870,285 instructions # 2.22 insn per cycle - 1.101160397 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) +TOTAL : 1.093129 sec + 3,239,321,715 cycles # 2.952 GHz + 7,206,965,422 instructions # 2.22 insn per cycle + 1.098293023 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2826) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -186,16 +182,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.653433e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.514463e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.514463e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.646235e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.502344e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.502344e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.452578 sec - 3,058,889,602 cycles # 2.100 GHz - 5,845,641,266 instructions # 1.91 insn per cycle - 1.457631752 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) +TOTAL : 1.451183 sec + 3,057,295,502 cycles # 2.100 GHz + 5,837,882,102 instructions # 1.91 insn per cycle + 1.456347808 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2340) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 2e156cf238..570702caf6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_22:43:24 +DATE: 2023-11-03_13:15:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.114345e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.750527e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.014575e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.286668e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.628069e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.019302e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.467188 sec - 2,061,300,508 cycles # 3.013 GHz - 2,979,795,053 instructions # 1.45 insn per cycle - 0.742618348 seconds time elapsed +TOTAL : 0.478643 sec + 2,113,601,376 cycles # 3.005 GHz + 2,975,316,663 instructions # 1.41 insn per cycle + 0.775074903 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.295508e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.368349e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.368349e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.311263e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.384837e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.384837e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.651284 sec - 14,414,978,676 cycles # 3.097 GHz - 39,885,747,501 instructions # 2.77 insn per cycle - 4.656156481 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.617186 sec + 14,345,588,057 cycles # 3.104 GHz + 39,862,651,177 instructions # 2.78 insn per cycle + 4.624830306 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 557) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.108362e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.705438e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.705438e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.842103e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.426487e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.426487e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.800005 sec - 5,593,021,873 cycles # 3.100 GHz - 15,299,807,626 instructions # 2.74 insn per cycle - 1.805212831 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.881027 sec + 5,579,160,795 cycles # 2.958 GHz + 15,294,602,945 instructions # 2.74 insn per cycle + 1.898569017 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2461) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.787179e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.474335e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.474335e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.849193e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.549754e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.549754e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.627313 sec - 4,741,036,728 cycles # 2.906 GHz - 9,747,765,661 instructions # 2.06 insn per cycle - 1.632312389 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3710) (512y: 0) (512z: 0) +TOTAL : 1.611383 sec + 4,737,627,785 cycles # 2.932 GHz + 9,741,051,480 instructions # 2.06 insn per cycle + 1.625476103 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3692) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.999958e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.729374e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.729374e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.036284e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.770599e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.770599e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.580177 sec - 4,620,710,026 cycles # 2.916 GHz - 9,339,228,525 instructions # 2.02 insn per cycle - 1.585058401 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3497) (512y: 0) (512z: 0) +TOTAL : 1.570073 sec + 4,626,472,822 cycles # 2.937 GHz + 9,331,703,433 instructions # 2.02 insn per cycle + 1.582744287 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3473) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.137462e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.683087e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.683087e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.204270e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.761377e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.761377e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.791618 sec - 3,651,443,842 cycles # 2.033 GHz - 7,045,651,657 instructions # 1.93 insn per cycle - 1.796536804 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2606) (512y: 12) (512z: 2221) +TOTAL : 1.770855 sec + 3,652,333,187 cycles # 2.056 GHz + 7,038,738,600 instructions # 1.93 insn per cycle + 1.784681837 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2582) (512y: 12) (512z: 2221) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 3e1f931c4f..28778525bf 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:01:44 +DATE: 2023-11-03_13:50:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.802566e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.662871e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.967885e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.408658e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.634217e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.954065e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.474180 sec - 2,101,252,491 cycles # 3.000 GHz - 3,030,117,960 instructions # 1.44 insn per cycle - 0.758250415 seconds time elapsed +TOTAL : 0.475559 sec + 2,115,805,668 cycles # 2.995 GHz + 3,005,784,945 instructions # 1.42 insn per cycle + 0.763544980 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.641227e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.737778e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.737778e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.640807e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.739837e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.739837e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.054902 sec - 12,605,264,157 cycles # 3.106 GHz - 34,394,645,325 instructions # 2.73 insn per cycle - 4.060052613 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.051489 sec + 12,602,738,737 cycles # 3.107 GHz + 34,367,646,318 instructions # 2.73 insn per cycle + 4.056865009 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.576010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.067873e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.067873e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.598028e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.093458e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.093458e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.965839 sec - 6,089,585,063 cycles # 3.091 GHz - 14,875,054,278 instructions # 2.44 insn per cycle - 1.971084739 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.955100 sec + 6,075,860,611 cycles # 3.100 GHz + 14,869,348,420 instructions # 2.45 insn per cycle + 1.960626555 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2997) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.615830e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.504043e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.504043e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.638223e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.541140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.541140e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.461335 sec - 4,275,220,530 cycles # 2.917 GHz - 9,042,872,048 instructions # 2.12 insn per cycle - 1.466711798 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4445) (512y: 0) (512z: 0) +TOTAL : 1.454851 sec + 4,257,285,483 cycles # 2.917 GHz + 9,034,268,038 instructions # 2.12 insn per cycle + 1.460220416 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4427) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.713493e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.603354e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.603354e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.775592e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.684138e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.684138e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.443103 sec - 4,201,605,081 cycles # 2.905 GHz - 8,678,273,624 instructions # 2.07 insn per cycle - 1.448450717 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4244) (512y: 0) (512z: 0) +TOTAL : 1.428152 sec + 4,208,542,546 cycles # 2.938 GHz + 8,668,841,642 instructions # 2.06 insn per cycle + 1.433477910 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4220) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.754828e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.235250e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.235250e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.842720e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.341315e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.341315e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.906937 sec - 3,847,864,916 cycles # 2.015 GHz - 7,820,928,515 instructions # 2.03 insn per cycle - 1.912353157 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 0) (512z: 2556) +TOTAL : 1.875069 sec + 3,841,952,399 cycles # 2.044 GHz + 7,812,706,346 instructions # 2.03 insn per cycle + 1.880579118 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4396) (512y: 0) (512z: 2556) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 8a69e175ae..20ffd90a56 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_23:02:07 +DATE: 2023-11-03_13:51:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.853070e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.693882e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.006213e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.511161e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.703547e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.038636e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.470775 sec - 2,095,007,668 cycles # 3.024 GHz - 2,980,363,168 instructions # 1.42 insn per cycle - 0.749660365 seconds time elapsed +TOTAL : 0.475749 sec + 2,104,291,018 cycles # 2.988 GHz + 2,955,446,490 instructions # 1.40 insn per cycle + 0.763206295 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.827771e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.938870e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.938870e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.830330e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.942360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.942360e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 3.792247 sec - 11,758,748,490 cycles # 3.097 GHz - 35,129,457,123 instructions # 2.99 insn per cycle - 3.797289248 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.785680 sec + 11,752,006,274 cycles # 3.100 GHz + 35,103,987,323 instructions # 2.99 insn per cycle + 3.791246697 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.738410e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.255110e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.255110e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.696846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.211128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.211128e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.911877 sec - 5,951,196,288 cycles # 3.106 GHz - 14,483,812,665 instructions # 2.43 insn per cycle - 1.917070183 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.922397 sec + 5,958,455,869 cycles # 3.092 GHz + 14,478,500,599 instructions # 2.43 insn per cycle + 1.927940043 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2560) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.892954e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.829064e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.829064e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.847447e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.769654e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.769654e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.410830 sec - 4,155,679,226 cycles # 2.936 GHz - 8,888,215,628 instructions # 2.14 insn per cycle - 1.416036036 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3576) (512y: 0) (512z: 0) +TOTAL : 1.416637 sec + 4,152,850,979 cycles # 2.922 GHz + 8,880,220,301 instructions # 2.14 insn per cycle + 1.422167959 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3558) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.947895e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.896742e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.896742e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.892718e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.835991e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.835991e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.402197 sec - 4,131,982,697 cycles # 2.937 GHz - 8,425,111,238 instructions # 2.04 insn per cycle - 1.407381420 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3320) (512y: 0) (512z: 0) +TOTAL : 1.408167 sec + 4,129,402,905 cycles # 2.922 GHz + 8,417,127,810 instructions # 2.04 insn per cycle + 1.413668763 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3296) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.979339e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.495089e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.495089e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.947803e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.456162e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.456162e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.837942 sec - 3,799,678,092 cycles # 2.063 GHz - 7,713,472,925 instructions # 2.03 insn per cycle - 1.843097802 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3436) (512y: 0) (512z: 2108) +TOTAL : 1.843327 sec + 3,787,877,452 cycles # 2.050 GHz + 7,705,706,460 instructions # 2.03 insn per cycle + 1.848729462 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3412) (512y: 0) (512z: 2108) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 9c8f35e78d..be08b0f2cb 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_22:43:49 +DATE: 2023-11-03_13:15:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.272066e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.179135e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.271243e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.274406e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.151337e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.265311e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.508054 sec - 2,187,282,267 cycles # 2.978 GHz - 3,169,296,860 instructions # 1.45 insn per cycle - 0.792015861 seconds time elapsed +TOTAL : 0.519897 sec + 2,283,332,215 cycles # 3.025 GHz + 3,248,780,616 instructions # 1.42 insn per cycle + 0.825340878 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.169756e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.233247e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.233247e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.181311e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.245712e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.245712e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.936357 sec - 15,268,315,849 cycles # 3.090 GHz - 38,638,138,492 instructions # 2.53 insn per cycle - 4.941625774 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.908499 sec + 15,234,841,173 cycles # 3.100 GHz + 38,614,636,672 instructions # 2.53 insn per cycle + 4.916787929 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 659) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.681342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.882250e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.882250e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.574916e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.763087e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.763087e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.951744 sec - 8,930,153,014 cycles # 3.021 GHz - 24,239,674,824 instructions # 2.71 insn per cycle - 2.957006740 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.034907 sec + 8,976,593,954 cycles # 2.956 GHz + 24,241,840,556 instructions # 2.70 insn per cycle + 3.050923562 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2176) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.988456e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.517430e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.517430e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.068308e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.608701e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.608701e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.854670 sec - 5,412,053,444 cycles # 2.911 GHz - 11,287,904,185 instructions # 2.09 insn per cycle - 1.859798747 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) +TOTAL : 1.829434 sec + 5,404,823,528 cycles # 2.945 GHz + 11,280,042,611 instructions # 2.09 insn per cycle + 1.844040899 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2462) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.705564e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.369171e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.369171e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.778878e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.461946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.461946e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.666815 sec - 4,863,774,902 cycles # 2.910 GHz - 10,539,053,403 instructions # 2.17 insn per cycle - 1.672233801 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) +TOTAL : 1.647058 sec + 4,869,702,123 cycles # 2.946 GHz + 10,530,154,588 instructions # 2.16 insn per cycle + 1.659743023 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2143) (512y: 148) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.216828e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.465613e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.465613e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.276183e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.533760e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.533760e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.589300 sec - 5,225,981,137 cycles # 2.015 GHz - 7,613,810,266 instructions # 1.46 insn per cycle - 2.594475099 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) +TOTAL : 2.552510 sec + 5,223,860,309 cycles # 2.042 GHz + 7,605,402,012 instructions # 1.46 insn per cycle + 2.569365786 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1609) (512y: 126) (512z: 1608) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index e20993a621..fe0c54d84c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-10-30_22:44:15 +DATE: 2023-11-03_13:16:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.276731e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.178753e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.271013e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.436244e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.158125e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273882e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.517169 sec - 2,141,581,999 cycles # 2.876 GHz - 3,095,928,504 instructions # 1.45 insn per cycle - 0.802876427 seconds time elapsed +TOTAL : 0.524889 sec + 2,253,700,906 cycles # 2.939 GHz + 3,150,635,419 instructions # 1.40 insn per cycle + 0.833351247 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -76,16 +76,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.130678e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.193208e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.193208e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.177721e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.241401e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.241401e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.026708 sec - 15,377,164,375 cycles # 3.057 GHz - 40,433,126,230 instructions # 2.63 insn per cycle - 5.031817846 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.916147 sec + 15,282,419,302 cycles # 3.106 GHz + 40,408,905,625 instructions # 2.64 insn per cycle + 4.924110639 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -103,16 +102,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.967029e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.198905e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.198905e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.872219e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.093844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.093844e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.746345 sec - 8,514,726,291 cycles # 3.096 GHz - 23,270,087,626 instructions # 2.73 insn per cycle - 2.751510263 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.810196 sec + 8,546,119,161 cycles # 3.035 GHz + 23,267,992,097 instructions # 2.72 insn per cycle + 2.824845793 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2079) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -130,16 +128,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.149152e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.533872e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.533872e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.169853e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.559211e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.559211e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.139503 sec - 6,253,820,119 cycles # 2.917 GHz - 12,973,626,048 instructions # 2.07 insn per cycle - 2.144732816 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) +TOTAL : 2.128900 sec + 6,252,672,924 cycles # 2.929 GHz + 12,966,364,012 instructions # 2.07 insn per cycle + 2.147780574 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2651) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -157,16 +154,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.443292e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.870409e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.870409e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.487443e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.938078e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.938078e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.028380 sec - 5,926,517,981 cycles # 2.916 GHz - 12,251,272,217 instructions # 2.07 insn per cycle - 2.033497640 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) +TOTAL : 2.013530 sec + 5,927,064,158 cycles # 2.936 GHz + 12,242,211,971 instructions # 2.07 insn per cycle + 2.024360041 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2185) (512y: 296) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -184,16 +180,15 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.902539e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.114637e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.114637e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.836447e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.050272e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.050272e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.790099 sec - 5,604,940,647 cycles # 2.006 GHz - 8,753,839,257 instructions # 1.56 insn per cycle - 2.795468352 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) +TOTAL : 2.837117 sec + 5,601,200,879 cycles # 1.972 GHz + 8,746,209,226 instructions # 1.56 insn per cycle + 2.853250568 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1466) (512y: 183) (512z: 1909) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 2b18dea98b..dbb6a27461 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-30_22:44:42 +DATE: 2023-11-03_13:16:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.047714e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.048356e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.060229e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.467182e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.042578e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.058885e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.458296 sec - 1,980,316,208 cycles # 2.971 GHz - 2,806,704,970 instructions # 1.42 insn per cycle - 0.726080878 seconds time elapsed +TOTAL : 0.462450 sec + 2,077,727,447 cycles # 3.022 GHz + 2,939,256,117 instructions # 1.41 insn per cycle + 0.760420017 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.127623e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.321219e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.332184e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.080642e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.317470e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.331219e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.588967 sec - 2,472,364,873 cycles # 3.018 GHz - 3,811,466,454 instructions # 1.54 insn per cycle - 0.878680525 seconds time elapsed +TOTAL : 0.600187 sec + 2,562,305,698 cycles # 3.039 GHz + 3,791,655,393 instructions # 1.48 insn per cycle + 0.901479344 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.570721e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.583060e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.583060e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.582587e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.595230e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.595230e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.395142 sec - 19,691,711,207 cycles # 3.078 GHz - 59,609,727,892 instructions # 3.03 insn per cycle - 6.399269206 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.363661 sec + 19,741,541,068 cycles # 3.101 GHz + 59,603,136,600 instructions # 3.02 insn per cycle + 6.370242758 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.903843e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.950080e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.950080e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.959878e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.005566e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.005566e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.362474 sec - 10,363,244,702 cycles # 3.079 GHz - 30,678,298,427 instructions # 2.96 insn per cycle - 3.366802524 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.321995 sec + 10,357,377,147 cycles # 3.114 GHz + 30,672,198,558 instructions # 2.96 insn per cycle + 3.336689734 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.772112e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.953675e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.953675e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.935334e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.011364e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.011364e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.699046 sec - 4,888,603,432 cycles # 2.874 GHz - 11,021,968,804 instructions # 2.25 insn per cycle - 1.703340915 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) +TOTAL : 1.667758 sec + 4,882,235,286 cycles # 2.920 GHz + 11,014,946,826 instructions # 2.26 insn per cycle + 1.681843678 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4449) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.107137e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.129586e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.129586e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.112517e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.135400e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.135400e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.501377 sec - 4,365,749,377 cycles # 2.901 GHz - 10,298,360,400 instructions # 2.36 insn per cycle - 1.505506942 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) +TOTAL : 1.491700 sec + 4,367,434,146 cycles # 2.920 GHz + 10,292,361,014 instructions # 2.36 insn per cycle + 1.505034425 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4113) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.829294e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.941822e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.941822e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.857569e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.970188e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.970188e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.116256 sec - 4,095,240,477 cycles # 1.932 GHz - 5,845,721,781 instructions # 1.43 insn per cycle - 2.120470673 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) +TOTAL : 2.106012 sec + 4,092,305,690 cycles # 1.940 GHz + 5,839,185,657 instructions # 1.43 insn per cycle + 2.116411168 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1516) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 7f4e11c6ec..9c5ec8bfcc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-30_23:11:31 +DATE: 2023-11-03_14:00:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.715097e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.892341e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.892341e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.678165e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.772137e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.772137e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.484083 sec - 2,100,235,497 cycles # 3.030 GHz - 3,147,476,737 instructions # 1.50 insn per cycle - 0.751147595 seconds time elapsed +TOTAL : 0.487116 sec + 2,132,060,726 cycles # 3.012 GHz + 3,216,268,356 instructions # 1.51 insn per cycle + 0.766469604 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.790572e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.688655e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.688655e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.743648e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.448864e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.448864e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.816202 sec - 3,149,587,745 cycles # 2.965 GHz - 5,015,800,272 instructions # 1.59 insn per cycle - 1.123090986 seconds time elapsed +TOTAL : 0.821472 sec + 3,252,599,355 cycles # 3.032 GHz + 5,078,109,890 instructions # 1.56 insn per cycle + 1.130728802 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,16 +103,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.596929e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.609765e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.609765e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.508949e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.521499e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.521499e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.337947 sec - 19,723,644,395 cycles # 3.110 GHz - 59,617,270,726 instructions # 3.02 insn per cycle - 6.342151556 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.556354 sec + 19,759,987,521 cycles # 3.012 GHz + 59,608,977,045 instructions # 3.02 insn per cycle + 6.560604945 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -131,16 +130,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.783981e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.829454e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.829454e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.915913e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.961324e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.961324e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.454609 sec - 10,400,623,917 cycles # 3.008 GHz - 30,728,596,829 instructions # 2.95 insn per cycle - 3.458894302 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.358749 sec + 10,404,117,412 cycles # 3.095 GHz + 30,721,241,681 instructions # 2.95 insn per cycle + 3.363116143 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,16 +157,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.941214e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.012298e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.012298e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.903809e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.008523e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.008523e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.676940 sec - 4,910,616,642 cycles # 2.922 GHz - 11,072,028,053 instructions # 2.25 insn per cycle - 1.681365069 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) +TOTAL : 1.680116 sec + 4,916,337,320 cycles # 2.920 GHz + 11,065,114,702 instructions # 2.25 insn per cycle + 1.684455523 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4449) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -187,16 +184,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.111710e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.134977e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.134977e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.108956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.131774e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.131774e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.502610 sec - 4,398,472,196 cycles # 2.920 GHz - 10,349,003,194 instructions # 2.35 insn per cycle - 1.506934340 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) +TOTAL : 1.502158 sec + 4,393,181,404 cycles # 2.918 GHz + 10,340,311,929 instructions # 2.35 insn per cycle + 1.506385000 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4113) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -215,16 +211,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.894007e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.009716e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.009716e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.805812e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.921883e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.921883e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.105022 sec - 4,124,940,789 cycles # 1.956 GHz - 5,883,373,573 instructions # 1.43 insn per cycle - 2.109305029 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) +TOTAL : 2.126801 sec + 4,123,621,038 cycles # 1.936 GHz + 5,877,714,636 instructions # 1.43 insn per cycle + 2.131182255 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1516) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index ab5cc3d006..4b186e3c8d 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-30_22:45:11 +DATE: 2023-11-03_13:16:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.004676e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.045395e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.057644e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.451423e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.041254e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.057491e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.456331 sec - 2,013,110,564 cycles # 3.011 GHz - 2,877,607,904 instructions # 1.43 insn per cycle - 0.725772586 seconds time elapsed +TOTAL : 0.462812 sec + 2,067,313,737 cycles # 3.009 GHz + 2,928,212,077 instructions # 1.42 insn per cycle + 0.755597155 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.125680e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.318433e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329333e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.075830e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.309337e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.323289e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.584479 sec - 2,474,094,451 cycles # 3.008 GHz - 3,746,277,121 instructions # 1.51 insn per cycle - 0.881448510 seconds time elapsed +TOTAL : 0.598570 sec + 2,505,991,827 cycles # 2.994 GHz + 3,799,994,110 instructions # 1.52 insn per cycle + 0.896665996 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.579392e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.592087e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.592087e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.606375e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.619374e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.619374e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.374569 sec - 19,486,493,121 cycles # 3.055 GHz - 58,801,850,212 instructions # 3.02 insn per cycle - 6.378723001 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.305650 sec + 19,550,945,894 cycles # 3.099 GHz + 58,794,622,568 instructions # 3.01 insn per cycle + 6.313075917 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1300) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.903956e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.949132e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.949132e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.925502e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.972274e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.972274e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.362761 sec - 10,234,416,457 cycles # 3.040 GHz - 30,349,712,353 instructions # 2.97 insn per cycle - 3.367008307 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.345149 sec + 10,219,058,064 cycles # 3.055 GHz + 30,347,001,184 instructions # 2.97 insn per cycle + 3.358128835 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4958) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.551434e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.719575e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.719575e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.545609e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.725495e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.725495e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.736713 sec - 5,047,702,052 cycles # 2.901 GHz - 11,486,114,404 instructions # 2.28 insn per cycle - 1.740911711 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) +TOTAL : 1.735067 sec + 5,044,703,294 cycles # 2.901 GHz + 11,479,543,399 instructions # 2.28 insn per cycle + 1.749373805 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4573) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.040957e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.060570e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.060570e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.038658e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.058879e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.058879e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.595201 sec - 4,643,633,681 cycles # 2.905 GHz - 10,843,683,440 instructions # 2.34 insn per cycle - 1.599371741 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) +TOTAL : 1.596287 sec + 4,630,239,457 cycles # 2.893 GHz + 10,837,667,903 instructions # 2.34 insn per cycle + 1.609962505 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4159) (512y: 244) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.774921e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.884495e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.884495e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.617610e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.726680e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.726680e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.131248 sec - 4,124,512,371 cycles # 1.933 GHz - 6,110,551,427 instructions # 1.48 insn per cycle - 2.135412381 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) +TOTAL : 2.171936 sec + 4,109,589,812 cycles # 1.889 GHz + 6,103,290,525 instructions # 1.49 insn per cycle + 2.186977177 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1433) (512y: 139) (512z: 3568) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 59717c723e..362655a840 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-30_22:45:40 +DATE: 2023-11-03_13:17:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.588155e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.368447e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.458711e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.489146e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.363200e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.462876e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.439302 sec - 1,948,206,203 cycles # 2.995 GHz - 2,663,043,056 instructions # 1.37 insn per cycle - 0.707785219 seconds time elapsed +TOTAL : 0.442934 sec + 1,970,237,840 cycles # 2.984 GHz + 2,770,826,921 instructions # 1.41 insn per cycle + 0.735603362 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.434228e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.440816e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.505746e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.230130e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.406426e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.489442e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.484053 sec - 2,112,707,518 cycles # 3.007 GHz - 3,087,457,780 instructions # 1.46 insn per cycle - 0.761071480 seconds time elapsed +TOTAL : 0.491833 sec + 2,186,862,195 cycles # 3.015 GHz + 3,142,874,449 instructions # 1.44 insn per cycle + 0.784602556 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -86,10 +86,10 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 31,803,585 cycles # 2.876 GHz - 48,606,177 instructions # 1.53 insn per cycle - 0.011601940 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) + 32,317,025 cycles # 2.771 GHz + 48,300,089 instructions # 1.49 insn per cycle + 0.014257115 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1021) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index c7a6da602a..a2f6fdb57b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-30_23:11:59 +DATE: 2023-11-03_14:01:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.165376e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.169114e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.169114e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.074587e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.149885e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.149885e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.452789 sec - 1,990,263,936 cycles # 3.017 GHz - 2,962,281,360 instructions # 1.49 insn per cycle - 0.718372655 seconds time elapsed +TOTAL : 0.454331 sec + 1,999,329,976 cycles # 3.016 GHz + 2,956,102,779 instructions # 1.48 insn per cycle + 0.720212411 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.885886e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.626317e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.626317e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.788896e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.565984e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.565984e+07 ) sec^-1 MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.625236 sec - 2,547,368,142 cycles # 2.988 GHz - 3,892,559,884 instructions # 1.53 insn per cycle - 0.911620575 seconds time elapsed +TOTAL : 0.631218 sec + 2,614,227,757 cycles # 3.038 GHz + 3,889,940,623 instructions # 1.49 insn per cycle + 0.918378142 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -99,10 +99,10 @@ OK (relative difference <= 5E-3) runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) - 38,496,254 cycles # 2.918 GHz - 52,021,012 instructions # 1.35 insn per cycle - 0.013672287 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) + 38,736,994 cycles # 2.890 GHz + 51,577,023 instructions # 1.33 insn per cycle + 0.013897774 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1021) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 280e1cd39b..589a07cd15 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-30_22:45:49 +DATE: 2023-11-03_13:17:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.578827e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.307502e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.398717e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.430067e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.250178e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.346715e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.438268 sec - 1,948,761,926 cycles # 3.007 GHz - 2,775,625,164 instructions # 1.42 insn per cycle - 0.705619986 seconds time elapsed +TOTAL : 0.442845 sec + 1,978,546,976 cycles # 2.994 GHz + 2,800,934,812 instructions # 1.42 insn per cycle + 0.729688570 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.421305e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.421321e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.485464e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.183183e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.331943e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.411243e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.483714 sec - 2,118,065,382 cycles # 3.006 GHz - 3,095,007,309 instructions # 1.46 insn per cycle - 0.761647794 seconds time elapsed +TOTAL : 0.494163 sec + 2,190,798,360 cycles # 3.005 GHz + 3,102,998,927 instructions # 1.42 insn per cycle + 0.786222748 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -86,10 +86,10 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 31,592,173 cycles # 2.841 GHz - 47,578,248 instructions # 1.51 insn per cycle - 0.011580777 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) + 31,477,105 cycles # 2.743 GHz + 47,479,451 instructions # 1.51 insn per cycle + 0.014142910 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1016) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index c5f0926802..71fed6821e 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-30_22:45:58 +DATE: 2023-11-03_13:17:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.034866e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.047326e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059480e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.662719e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.038516e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.052821e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.459825 sec - 1,972,886,422 cycles # 2.942 GHz - 2,894,736,686 instructions # 1.47 insn per cycle - 0.728551974 seconds time elapsed +TOTAL : 0.462730 sec + 2,064,962,094 cycles # 2.992 GHz + 2,945,094,613 instructions # 1.43 insn per cycle + 0.755827601 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.126651e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319776e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.330754e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.076527e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.311748e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.325513e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.590117 sec - 2,420,412,755 cycles # 2.944 GHz - 3,622,346,173 instructions # 1.50 insn per cycle - 0.881208904 seconds time elapsed +TOTAL : 0.608648 sec + 2,467,621,809 cycles # 2.915 GHz + 3,601,978,504 instructions # 1.46 insn per cycle + 0.908027164 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -86,10 +86,10 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 35,060,457 cycles # 2.849 GHz - 49,936,885 instructions # 1.42 insn per cycle - 0.012855343 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) + 35,020,628 cycles # 2.806 GHz + 49,755,438 instructions # 1.42 insn per cycle + 0.015066790 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1386) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 9d2665cd6b..db6b196dcc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-10-30_22:46:08 +DATE: 2023-11-03_13:17:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.993809e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.038657e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.050632e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.645159e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033634e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.047567e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.458870 sec - 2,006,533,182 cycles # 3.002 GHz - 2,878,017,962 instructions # 1.43 insn per cycle - 0.727203702 seconds time elapsed +TOTAL : 0.460733 sec + 2,032,593,852 cycles # 3.005 GHz + 2,920,206,606 instructions # 1.44 insn per cycle + 0.742642299 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.115406e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.304600e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.315286e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.069849e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.302002e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.315503e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.582507 sec - 2,454,240,813 cycles # 3.016 GHz - 3,723,169,840 instructions # 1.52 insn per cycle - 0.871289973 seconds time elapsed +TOTAL : 0.595564 sec + 2,513,535,038 cycles # 3.015 GHz + 3,759,582,185 instructions # 1.50 insn per cycle + 0.892869761 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -86,10 +86,10 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 33,992,309 cycles # 2.807 GHz - 49,173,808 instructions # 1.45 insn per cycle - 0.012476569 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) + 34,337,414 cycles # 2.744 GHz + 48,918,383 instructions # 1.42 insn per cycle + 0.014922909 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 16d6268c40..2f58b85467 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_22:46:17 +DATE: 2023-11-03_13:18:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.517624e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.541229e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.543235e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.475226e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.504026e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.506324e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.521844 sec - 2,186,654,995 cycles # 2.913 GHz - 3,461,003,117 instructions # 1.58 insn per cycle - 0.810135449 seconds time elapsed +TOTAL : 0.521620 sec + 2,278,405,286 cycles # 3.016 GHz + 3,569,695,737 instructions # 1.57 insn per cycle + 0.824142816 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.126621e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.153137e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.154229e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.124925e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.158780e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.160188e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.020145 sec - 10,054,555,514 cycles # 3.078 GHz - 21,149,963,871 instructions # 2.10 insn per cycle - 3.325573136 seconds time elapsed +TOTAL : 3.027641 sec + 10,109,489,302 cycles # 3.067 GHz + 20,958,650,986 instructions # 2.07 insn per cycle + 3.354054406 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.989223e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.990154e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.990154e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.965820e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.966789e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.966789e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.253925 sec - 25,625,598,282 cycles # 3.104 GHz - 78,941,361,841 instructions # 3.08 insn per cycle - 8.257942730 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.350379 sec + 25,556,849,433 cycles # 3.061 GHz + 78,937,890,507 instructions # 3.09 insn per cycle + 8.357043052 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4879) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.733656e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.737015e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.737015e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.696796e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.700036e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.700036e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.401533 sec - 12,932,063,488 cycles # 2.936 GHz - 39,285,534,388 instructions # 3.04 insn per cycle - 4.405671091 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.442136 sec + 12,969,425,468 cycles # 2.917 GHz + 39,277,882,939 instructions # 3.03 insn per cycle + 4.456456759 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13170) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.580201e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.597746e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.597746e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.634369e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.652303e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.652303e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.920486 sec - 5,578,833,054 cycles # 2.900 GHz - 13,688,993,603 instructions # 2.45 insn per cycle - 1.924638190 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) +TOTAL : 1.905944 sec + 5,545,959,947 cycles # 2.904 GHz + 13,682,734,640 instructions # 2.47 insn per cycle + 1.920977419 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11339) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.806434e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.828989e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.828989e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.809381e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.832630e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.832630e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.681542 sec - 4,894,665,759 cycles # 2.905 GHz - 12,344,293,266 instructions # 2.52 insn per cycle - 1.685694216 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +TOTAL : 1.678117 sec + 4,878,586,577 cycles # 2.901 GHz + 12,338,041,344 instructions # 2.53 insn per cycle + 1.689838954 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10242) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.698382e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.713237e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.713237e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.697114e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.711979e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.711979e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.140663 sec - 4,116,399,810 cycles # 1.920 GHz - 6,337,006,175 instructions # 1.54 insn per cycle - 2.144975590 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +TOTAL : 2.137603 sec + 4,098,498,315 cycles # 1.914 GHz + 6,330,554,099 instructions # 1.54 insn per cycle + 2.154460026 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1797) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 4ce822b792..a90d88261e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:12:42 +DATE: 2023-11-03_14:02:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.143732e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.472083e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.472083e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.160085e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.491555e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.491555e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.511660 sec - 2,209,336,126 cycles # 2.997 GHz - 3,509,975,220 instructions # 1.59 insn per cycle - 0.799578324 seconds time elapsed +TOTAL : 0.508649 sec + 2,243,351,008 cycles # 3.015 GHz + 3,493,355,426 instructions # 1.56 insn per cycle + 0.803773680 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.645060e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.109225e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.109225e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.641718e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.113192e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.113192e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.299496 sec - 10,577,204,570 cycles # 2.971 GHz - 22,771,356,342 instructions # 2.15 insn per cycle - 3.626012375 seconds time elapsed +TOTAL : 3.305477 sec + 10,996,635,061 cycles # 3.077 GHz + 24,704,998,495 instructions # 2.25 insn per cycle + 3.633765024 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,16 +103,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.952360e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.953336e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.953336e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.995755e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.996726e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.996726e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.414053 sec - 25,654,559,721 cycles # 3.047 GHz - 78,949,536,483 instructions # 3.08 insn per cycle - 8.422420973 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.227691 sec + 25,539,901,235 cycles # 3.103 GHz + 78,941,603,446 instructions # 3.09 insn per cycle + 8.231975969 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4879) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -131,16 +130,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.695279e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.698962e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.698962e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.762236e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765724e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765724e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.451385 sec - 12,941,077,341 cycles # 2.905 GHz - 39,297,696,039 instructions # 3.04 insn per cycle - 4.455839883 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.369134 sec + 12,967,293,054 cycles # 2.966 GHz + 39,290,035,969 instructions # 3.03 insn per cycle + 4.373557789 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13170) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,16 +157,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.533387e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.553090e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.553090e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.634444e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.652344e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.652344e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.936377 sec - 5,585,739,994 cycles # 2.879 GHz - 13,699,929,834 instructions # 2.45 insn per cycle - 1.941109655 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) +TOTAL : 1.909587 sec + 5,562,218,027 cycles # 2.907 GHz + 13,692,479,641 instructions # 2.46 insn per cycle + 1.913910388 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11339) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -187,16 +184,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.872929e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.896714e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.896714e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.784354e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.808269e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.808269e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.674197 sec - 4,907,725,132 cycles # 2.925 GHz - 12,355,150,511 instructions # 2.52 insn per cycle - 1.678573718 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +TOTAL : 1.686542 sec + 4,892,905,107 cycles # 2.895 GHz + 12,348,037,792 instructions # 2.52 insn per cycle + 1.691037956 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10242) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -215,16 +211,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.649611e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.664664e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.664664e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.665094e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.680086e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.680086e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.157363 sec - 4,129,296,848 cycles # 1.911 GHz - 6,347,814,458 instructions # 1.54 insn per cycle - 2.161875950 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +TOTAL : 2.149806 sec + 4,107,351,573 cycles # 1.907 GHz + 6,340,734,228 instructions # 1.54 insn per cycle + 2.154151474 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1797) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index fbbc78bbe3..5254750155 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:24:14 +DATE: 2023-11-03_14:14:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.483910e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.509921e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.512008e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.510322e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.538031e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.540551e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.506434 sec - 2,214,704,532 cycles # 2.981 GHz - 3,386,390,521 instructions # 1.53 insn per cycle - 0.810038936 seconds time elapsed +TOTAL : 0.503788 sec + 2,252,662,033 cycles # 3.035 GHz + 3,515,557,636 instructions # 1.56 insn per cycle + 0.812476829 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.140721e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.172434e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.173756e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.141342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.175250e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176691e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.129864 sec - 10,198,907,692 cycles # 3.020 GHz - 22,581,252,712 instructions # 2.21 insn per cycle - 3.437157552 seconds time elapsed +TOTAL : 3.122387 sec + 10,080,449,234 cycles # 2.987 GHz + 20,662,923,062 instructions # 2.05 insn per cycle + 3.431503994 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.967449e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.968407e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.968407e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.992840e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.993789e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.993789e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.346802 sec - 25,643,377,035 cycles # 3.071 GHz - 78,941,953,126 instructions # 3.08 insn per cycle - 8.350720328 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.237469 sec + 25,586,326,392 cycles # 3.105 GHz + 78,935,069,074 instructions # 3.09 insn per cycle + 8.241477506 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4879) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.739832e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.743118e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.743118e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.733845e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.737683e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.737683e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.396405 sec - 12,921,349,789 cycles # 2.937 GHz - 39,283,878,297 instructions # 3.04 insn per cycle - 4.400450369 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.400380 sec + 12,963,141,897 cycles # 2.945 GHz + 39,278,266,161 instructions # 3.03 insn per cycle + 4.404595331 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13170) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.487098e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.504383e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.504383e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.646502e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.664554e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.664554e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.943181 sec - 5,580,010,383 cycles # 2.867 GHz - 13,688,267,157 instructions # 2.45 insn per cycle - 1.947132122 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) +TOTAL : 1.904447 sec + 5,552,738,566 cycles # 2.911 GHz + 13,681,318,294 instructions # 2.46 insn per cycle + 1.908567769 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11339) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.702345e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.725432e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.725432e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.859590e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.883833e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.883833e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.701247 sec - 4,897,512,852 cycles # 2.873 GHz - 12,342,531,060 instructions # 2.52 insn per cycle - 1.705369212 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +TOTAL : 1.671409 sec + 4,881,305,688 cycles # 2.915 GHz + 12,334,806,032 instructions # 2.53 insn per cycle + 1.675465120 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10242) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.589277e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.603475e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.603475e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.593539e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.608812e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.608812e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.172225 sec - 4,123,967,720 cycles # 1.896 GHz - 6,335,720,246 instructions # 1.54 insn per cycle - 2.176249842 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +TOTAL : 2.167894 sec + 4,105,387,288 cycles # 1.891 GHz + 6,327,572,823 instructions # 1.54 insn per cycle + 2.171922605 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1797) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index 043c4626ae..f46288647c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:20:57 +DATE: 2023-11-03_14:10:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.471163e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.497248e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.499419e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.509071e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.537185e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.539386e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.505895 sec - 2,223,447,521 cycles # 2.997 GHz - 3,509,751,489 instructions # 1.58 insn per cycle - 0.813952724 seconds time elapsed +TOTAL : 0.502824 sec + 2,250,068,765 cycles # 3.040 GHz + 3,538,075,739 instructions # 1.57 insn per cycle + 0.813609249 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.134766e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.166730e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.168082e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.138969e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.172977e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.174382e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.059208 sec - 10,034,940,988 cycles # 3.032 GHz - 22,819,661,276 instructions # 2.27 insn per cycle - 3.366574625 seconds time elapsed +TOTAL : 3.062614 sec + 10,229,382,145 cycles # 3.088 GHz + 22,201,609,187 instructions # 2.17 insn per cycle + 3.371555969 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.983353e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.984333e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.984333e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.966043e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.967005e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.967005e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.278074 sec - 25,644,473,579 cycles # 3.097 GHz - 78,941,498,794 instructions # 3.08 insn per cycle - 8.282145719 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.347953 sec + 25,546,590,557 cycles # 3.059 GHz + 78,935,472,097 instructions # 3.09 insn per cycle + 8.351969958 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4879) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.757781e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.761243e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.761243e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.575391e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.578836e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.578836e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.373408 sec - 12,920,719,076 cycles # 2.952 GHz - 39,284,359,428 instructions # 3.04 insn per cycle - 4.377582936 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.593548 sec + 12,970,107,566 cycles # 2.822 GHz + 39,277,710,202 instructions # 3.03 insn per cycle + 4.597715990 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13170) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.589418e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.606399e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.606399e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.167807e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.186151e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.186151e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.918676 sec - 5,578,217,582 cycles # 2.903 GHz - 13,689,618,564 instructions # 2.45 insn per cycle - 1.922724613 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) +TOTAL : 2.014576 sec + 5,549,063,146 cycles # 2.750 GHz + 13,682,908,062 instructions # 2.47 insn per cycle + 2.019029241 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11339) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.748063e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.770913e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.770913e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.768089e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.790984e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.790984e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.691750 sec - 4,896,349,233 cycles # 2.888 GHz - 12,344,390,321 instructions # 2.52 insn per cycle - 1.695929786 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +TOTAL : 1.685005 sec + 4,884,942,305 cycles # 2.893 GHz + 12,336,928,668 instructions # 2.53 insn per cycle + 1.689105526 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10242) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.287653e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.300208e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.300208e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.722454e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.737226e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.737226e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.259681 sec - 4,122,896,631 cycles # 1.822 GHz - 6,337,234,987 instructions # 1.54 insn per cycle - 2.263766017 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +TOTAL : 2.129994 sec + 4,094,793,737 cycles # 1.919 GHz + 6,329,508,466 instructions # 1.55 insn per cycle + 2.134175822 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1797) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index b9c5d6cae4..fce3d66688 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:17:45 +DATE: 2023-11-03_14:07:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.222738e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.511031e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.513181e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.234011e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.528551e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.530858e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.504611 sec - 2,250,444,021 cycles # 3.031 GHz - 3,479,864,190 instructions # 1.55 insn per cycle - 0.810696520 seconds time elapsed +TOTAL : 0.505287 sec + 2,240,713,082 cycles # 3.016 GHz + 3,438,870,934 instructions # 1.53 insn per cycle + 0.809116913 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -71,14 +71,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.753926e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.181085e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.182473e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.743375e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.168214e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169628e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.184117 sec - 10,464,163,533 cycles # 3.047 GHz - 23,737,906,553 instructions # 2.27 insn per cycle - 3.492903854 seconds time elapsed +TOTAL : 3.205050 sec + 10,597,294,848 cycles # 3.065 GHz + 22,938,358,256 instructions # 2.16 insn per cycle + 3.514076840 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -93,16 +93,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.974826e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.975768e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.975768e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.976812e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.977739e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.977739e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.313623 sec - 25,699,589,266 cycles # 3.090 GHz - 78,941,867,221 instructions # 3.07 insn per cycle - 8.317593698 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.301904 sec + 25,599,810,977 cycles # 3.083 GHz + 78,935,431,820 instructions # 3.08 insn per cycle + 8.305916327 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4879) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -120,16 +119,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.701465e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.704860e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.704860e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.765431e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.768870e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.768870e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.439632 sec - 12,929,764,158 cycles # 2.910 GHz - 39,284,341,231 instructions # 3.04 insn per cycle - 4.443728539 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.361408 sec + 12,972,983,204 cycles # 2.972 GHz + 39,276,794,676 instructions # 3.03 insn per cycle + 4.365530738 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13170) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -147,16 +145,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.596958e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.614431e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.614431e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.469632e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.486221e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.486221e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.916972 sec - 5,577,800,783 cycles # 2.904 GHz - 13,688,986,742 instructions # 2.45 insn per cycle - 1.921087847 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) +TOTAL : 1.942639 sec + 5,545,744,386 cycles # 2.850 GHz + 13,681,716,126 instructions # 2.47 insn per cycle + 1.946774708 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11339) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -174,16 +171,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.752841e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.775978e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.775978e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.805750e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.828814e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.828814e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.690810 sec - 4,894,650,981 cycles # 2.889 GHz - 12,344,413,507 instructions # 2.52 insn per cycle - 1.694845631 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) +TOTAL : 1.678563 sec + 4,882,802,128 cycles # 2.903 GHz + 12,336,959,860 instructions # 2.53 insn per cycle + 1.682626619 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10242) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -201,16 +197,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.693148e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.707263e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.707263e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.219836e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.232827e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.232827e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.141292 sec - 4,115,064,829 cycles # 1.919 GHz - 6,336,964,394 instructions # 1.54 insn per cycle - 2.145349336 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) +TOTAL : 2.278049 sec + 4,096,280,217 cycles # 1.795 GHz + 6,330,028,283 instructions # 1.55 insn per cycle + 2.282228085 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1797) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 0d3326ca56..c1e0a2680d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_22:46:54 +DATE: 2023-11-03_13:18:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.491367e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.514776e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.516713e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.476416e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.504069e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.506381e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.516171 sec - 2,249,295,382 cycles # 3.028 GHz - 3,533,941,890 instructions # 1.57 insn per cycle - 0.803191953 seconds time elapsed +TOTAL : 0.521804 sec + 2,271,222,187 cycles # 3.016 GHz + 3,545,540,197 instructions # 1.56 insn per cycle + 0.828184124 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.139994e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.166707e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.167859e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.137838e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.172044e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.173476e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.011297 sec - 9,979,296,942 cycles # 3.062 GHz - 20,588,122,768 instructions # 2.06 insn per cycle - 3.316869688 seconds time elapsed +TOTAL : 3.023677 sec + 10,060,817,524 cycles # 3.068 GHz + 22,099,559,419 instructions # 2.20 insn per cycle + 3.336415577 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.989568e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.990537e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.990537e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.983012e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.984028e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984028e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.251964 sec - 25,599,012,720 cycles # 3.101 GHz - 78,714,499,821 instructions # 3.07 insn per cycle - 8.256078525 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4263) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.276490 sec + 25,550,391,420 cycles # 3.086 GHz + 78,708,125,179 instructions # 3.08 insn per cycle + 8.282892300 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4250) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.700215e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.703473e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.703473e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.723060e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.726384e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.726384e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.440870 sec - 12,882,751,008 cycles # 2.899 GHz - 39,230,659,978 instructions # 3.05 insn per cycle - 4.445036011 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12949) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.410833 sec + 12,906,494,469 cycles # 2.925 GHz + 39,224,246,908 instructions # 3.04 insn per cycle + 4.423264335 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12937) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.515969e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.533415e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.533415e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.514705e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.532795e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.532795e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.934902 sec - 5,614,399,765 cycles # 2.897 GHz - 13,803,271,495 instructions # 2.46 insn per cycle - 1.939082893 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) +TOTAL : 1.931872 sec + 5,619,495,933 cycles # 2.903 GHz + 13,796,922,511 instructions # 2.46 insn per cycle + 1.942226870 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11404) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.429736e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.452404e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.452404e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.697078e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.719929e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.719929e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.748526 sec - 4,960,981,981 cycles # 2.831 GHz - 12,469,682,563 instructions # 2.51 insn per cycle - 1.752792616 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) +TOTAL : 1.697070 sec + 4,931,596,573 cycles # 2.899 GHz + 12,462,949,801 instructions # 2.53 insn per cycle + 1.712960774 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10234) (512y: 240) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.693652e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.707695e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.707695e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.701118e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.715314e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.715314e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.141122 sec - 4,116,386,291 cycles # 1.920 GHz - 6,461,786,548 instructions # 1.57 insn per cycle - 2.145389447 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) +TOTAL : 2.135519 sec + 4,115,648,082 cycles # 1.924 GHz + 6,454,623,775 instructions # 1.57 insn per cycle + 2.148173862 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1623) (512y: 192) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index b0e2abe9e7..8c991ffb74 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:02:30 +DATE: 2023-11-03_13:51:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.230348e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.253293e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.255230e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.232347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.256643e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.258773e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.526433 sec - 2,273,262,486 cycles # 3.018 GHz - 3,600,910,991 instructions # 1.58 insn per cycle - 0.810817494 seconds time elapsed +TOTAL : 0.528671 sec + 2,311,688,773 cycles # 3.025 GHz + 3,507,601,681 instructions # 1.52 insn per cycle + 0.823229672 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.776696e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.803592e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.804745e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.775534e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.804008e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.805181e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.293946 sec - 10,657,689,079 cycles # 3.004 GHz - 22,487,370,214 instructions # 2.11 insn per cycle - 3.605934146 seconds time elapsed +TOTAL : 3.299047 sec + 10,881,401,060 cycles # 3.063 GHz + 24,364,708,075 instructions # 2.24 insn per cycle + 3.609156427 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.456004e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.456479e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.456479e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.469923e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.470422e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.470422e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 36.814419 sec - 113,714,815,794 cycles # 3.089 GHz - 144,971,695,251 instructions # 1.27 insn per cycle - 36.818652323 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:21605) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 36.695736 sec + 113,474,484,475 cycles # 3.093 GHz + 144,738,639,997 instructions # 1.28 insn per cycle + 36.699887629 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:21213) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.289352e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.292012e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.292012e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.274593e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.277295e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.277295e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.994765 sec - 14,705,948,574 cycles # 2.942 GHz - 37,577,579,529 instructions # 2.56 insn per cycle - 4.999059436 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68118) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.014179 sec + 14,723,510,016 cycles # 2.934 GHz + 37,570,420,448 instructions # 2.55 insn per cycle + 5.018361008 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:68106) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.838791e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.853570e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.853570e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.796690e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.811800e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.811800e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.101447 sec - 6,122,264,653 cycles # 2.909 GHz - 13,063,480,967 instructions # 2.13 insn per cycle - 2.105553780 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) +TOTAL : 2.110000 sec + 6,128,596,243 cycles # 2.900 GHz + 13,056,352,580 instructions # 2.13 insn per cycle + 2.114203765 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:46942) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.467933e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.491324e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.491324e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.517332e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.539370e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.539370e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.741382 sec - 5,060,449,342 cycles # 2.900 GHz - 11,442,110,060 instructions # 2.26 insn per cycle - 1.745548105 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) +TOTAL : 1.729322 sec + 5,053,484,949 cycles # 2.916 GHz + 11,434,594,331 instructions # 2.26 insn per cycle + 1.733514150 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40410) (512y: 285) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.971385e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.987269e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.987269e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.002850e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.019204e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.019204e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.067036 sec - 3,978,997,180 cycles # 1.922 GHz - 5,943,536,872 instructions # 1.49 insn per cycle - 2.071156830 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) +TOTAL : 2.055975 sec + 3,972,385,481 cycles # 1.929 GHz + 5,936,216,747 instructions # 1.49 insn per cycle + 2.060152331 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2431) (512y: 337) (512z:39411) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 0d0978bb51..b4ebeb041b 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:03:38 +DATE: 2023-11-03_13:52:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.220052e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.243046e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.245124e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.234950e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.260216e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.262156e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.530319 sec - 2,259,848,260 cycles # 2.956 GHz - 3,491,848,314 instructions # 1.55 insn per cycle - 0.823546608 seconds time elapsed +TOTAL : 0.527572 sec + 2,272,502,168 cycles # 3.021 GHz + 3,459,483,926 instructions # 1.52 insn per cycle + 0.810794500 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.792897e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.819967e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.821084e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.792778e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.821467e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.822686e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.266999 sec - 10,784,156,266 cycles # 3.066 GHz - 23,481,416,267 instructions # 2.18 insn per cycle - 3.573859817 seconds time elapsed +TOTAL : 3.271697 sec + 10,757,913,675 cycles # 3.051 GHz + 23,608,820,167 instructions # 2.19 insn per cycle + 3.619466436 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.432329e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.432803e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.432803e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.417584e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.418082e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.418082e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.010471 sec - 114,295,370,649 cycles # 3.088 GHz - 145,556,328,098 instructions # 1.27 insn per cycle - 37.014451848 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:22248) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 37.130623 sec + 114,676,551,402 cycles # 3.089 GHz + 145,541,996,003 instructions # 1.27 insn per cycle + 37.134802859 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:22059) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.199736e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.202207e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.202207e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.167800e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.170194e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.170194e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.134326 sec - 15,166,468,724 cycles # 2.952 GHz - 37,764,533,543 instructions # 2.49 insn per cycle - 5.138511572 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68446) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.183057 sec + 15,156,859,358 cycles # 2.922 GHz + 37,757,579,871 instructions # 2.49 insn per cycle + 5.187317533 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:68434) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.958529e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.973629e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.973629e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.924832e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.941496e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.941496e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.070228 sec - 6,003,805,381 cycles # 2.895 GHz - 12,898,512,881 instructions # 2.15 insn per cycle - 2.074445232 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) +TOTAL : 2.075554 sec + 6,039,745,875 cycles # 2.905 GHz + 12,890,674,982 instructions # 2.13 insn per cycle + 2.079791859 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45911) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.276328e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.296939e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.296939e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.194707e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.215465e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.215465e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.777498 sec - 5,108,668,456 cycles # 2.868 GHz - 11,448,668,674 instructions # 2.24 insn per cycle - 1.781716101 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) +TOTAL : 1.790127 sec + 5,076,722,329 cycles # 2.830 GHz + 11,441,369,586 instructions # 2.25 insn per cycle + 1.794407354 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40099) (512y: 219) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.969395e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.982565e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.982565e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.900388e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.916089e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.916089e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.363302 sec - 3,965,199,500 cycles # 1.675 GHz - 5,898,552,595 instructions # 1.49 insn per cycle - 2.367905311 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) +TOTAL : 2.082110 sec + 3,946,998,182 cycles # 1.893 GHz + 5,890,184,055 instructions # 1.49 insn per cycle + 2.086376956 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 259) (512z:38937) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index b04f88d136..486ad40ba6 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_22:47:30 +DATE: 2023-11-03_13:19:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.350401e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.395183e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.400130e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.332332e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.384779e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.390167e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.472466 sec - 2,064,287,043 cycles # 3.019 GHz - 3,070,903,047 instructions # 1.49 insn per cycle - 0.740974707 seconds time elapsed +TOTAL : 0.473849 sec + 2,085,448,281 cycles # 3.026 GHz + 3,102,431,333 instructions # 1.49 insn per cycle + 0.772206568 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.525886e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.583348e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.585805e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.542899e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.615381e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.618630e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.710390 sec - 5,933,399,529 cycles # 3.072 GHz - 11,392,988,350 instructions # 1.92 insn per cycle - 1.990611745 seconds time elapsed +TOTAL : 1.718344 sec + 6,002,889,204 cycles # 3.082 GHz + 11,545,937,782 instructions # 1.92 insn per cycle + 2.005254718 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.057262e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.058265e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.058265e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.033731e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.034770e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.034770e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.980037 sec - 24,615,607,958 cycles # 3.083 GHz - 78,133,332,813 instructions # 3.17 insn per cycle - 7.984035116 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.070533 sec + 24,603,062,150 cycles # 3.048 GHz + 78,128,920,788 instructions # 3.18 insn per cycle + 8.076588204 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.492217e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.506280e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.506280e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.488751e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.503314e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.503314e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.196903 sec - 6,474,538,578 cycles # 2.943 GHz - 20,124,136,855 instructions # 3.11 insn per cycle - 2.200972137 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.195060 sec + 6,447,897,055 cycles # 2.933 GHz + 20,117,767,281 instructions # 3.12 insn per cycle + 2.208245409 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13751) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.709474e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.716682e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.716682e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.702639e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.709538e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.709538e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.967644 sec - 2,835,284,176 cycles # 2.920 GHz - 6,991,695,841 instructions # 2.47 insn per cycle - 0.971765057 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) +TOTAL : 0.968946 sec + 2,810,112,225 cycles # 2.891 GHz + 6,985,318,771 instructions # 2.49 insn per cycle + 0.984902833 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11856) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.947430e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.956519e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.956519e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.941806e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.950768e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.950768e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.849627 sec - 2,487,648,681 cycles # 2.916 GHz - 6,298,810,862 instructions # 2.53 insn per cycle - 0.853767432 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +TOTAL : 0.850482 sec + 2,478,670,305 cycles # 2.904 GHz + 6,292,505,198 instructions # 2.54 insn per cycle + 0.862853898 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10798) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.556501e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.562383e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.562383e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.565064e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.570988e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.570988e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.061313 sec - 2,046,593,820 cycles # 1.923 GHz - 3,269,352,925 instructions # 1.60 insn per cycle - 1.065402899 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) +TOTAL : 1.054240 sec + 2,040,135,052 cycles # 1.930 GHz + 3,262,448,266 instructions # 1.60 insn per cycle + 1.068023563 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 74c634601d..f5dfa59d7b 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:13:19 +DATE: 2023-11-03_14:02:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.671948e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.316331e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.316331e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.689615e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.338105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.338105e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.462676 sec - 2,024,244,638 cycles # 3.009 GHz - 3,041,248,244 instructions # 1.50 insn per cycle - 0.730268417 seconds time elapsed +TOTAL : 0.466275 sec + 2,049,312,797 cycles # 3.023 GHz + 3,074,986,142 instructions # 1.50 insn per cycle + 0.737052092 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.275155e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.478385e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.478385e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.258963e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.479077e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.479077e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.871289 sec - 6,489,134,544 cycles # 3.072 GHz - 12,828,495,799 instructions # 1.98 insn per cycle - 2.169427134 seconds time elapsed +TOTAL : 1.896442 sec + 6,547,114,416 cycles # 3.071 GHz + 13,674,100,847 instructions # 2.09 insn per cycle + 2.189382080 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,16 +103,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.057674e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.058683e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.058683e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.057706e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.058777e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.058777e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.980160 sec - 24,660,723,418 cycles # 3.091 GHz - 78,140,421,281 instructions # 3.17 insn per cycle - 7.984129751 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.977330 sec + 24,624,931,190 cycles # 3.086 GHz + 78,131,048,964 instructions # 3.17 insn per cycle + 7.981338163 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -131,16 +130,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.532234e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.546273e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.546273e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.229659e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.242643e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.242643e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.187667 sec - 6,485,446,957 cycles # 2.960 GHz - 20,133,607,632 instructions # 3.10 insn per cycle - 2.191951636 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.275774 sec + 6,458,705,726 cycles # 2.834 GHz + 20,126,381,677 instructions # 3.12 insn per cycle + 2.280068981 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13751) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,16 +157,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.687386e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.694462e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.694462e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.700287e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.707686e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.707686e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.982044 sec - 2,845,455,999 cycles # 2.887 GHz - 7,001,460,428 instructions # 2.46 insn per cycle - 0.986146209 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) +TOTAL : 0.972132 sec + 2,821,480,493 cycles # 2.892 GHz + 6,993,944,246 instructions # 2.48 insn per cycle + 0.976185959 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11856) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -187,16 +184,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.946050e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.955574e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.955574e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.938068e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.947761e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947761e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.853102 sec - 2,498,282,365 cycles # 2.916 GHz - 6,308,731,962 instructions # 2.53 insn per cycle - 0.857331584 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +TOTAL : 0.853437 sec + 2,487,354,942 cycles # 2.903 GHz + 6,301,326,408 instructions # 2.53 insn per cycle + 0.857469948 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10798) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -215,16 +211,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.547234e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.552924e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.552924e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.548564e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.554396e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.554396e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.070786 sec - 2,056,266,891 cycles # 1.914 GHz - 3,279,522,920 instructions # 1.59 insn per cycle - 1.074961383 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) +TOTAL : 1.066846 sec + 2,050,944,515 cycles # 1.917 GHz + 3,272,430,014 instructions # 1.60 insn per cycle + 1.071113593 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 720048ec3c..173c8076ae 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:24:51 +DATE: 2023-11-03_14:14:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.334807e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.383679e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.389054e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.347024e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.398089e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.403736e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.464332 sec - 2,004,763,386 cycles # 2.983 GHz - 3,003,176,836 instructions # 1.50 insn per cycle - 0.731505024 seconds time elapsed +TOTAL : 0.461628 sec + 2,029,123,236 cycles # 3.024 GHz + 3,049,839,564 instructions # 1.50 insn per cycle + 0.728608078 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.548345e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.616460e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.619482e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.545451e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.618382e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.621615e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.797034 sec - 6,178,315,733 cycles # 3.040 GHz - 13,028,330,035 instructions # 2.11 insn per cycle - 2.088729594 seconds time elapsed +TOTAL : 1.793331 sec + 6,217,113,732 cycles # 3.073 GHz + 12,961,195,860 instructions # 2.08 insn per cycle + 2.079989875 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.026245e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.027281e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.027281e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.060313e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.061332e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.061332e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.104730 sec - 24,633,844,566 cycles # 3.039 GHz - 78,139,304,517 instructions # 3.17 insn per cycle - 8.108845161 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.966547 sec + 24,594,743,209 cycles # 3.086 GHz + 78,127,435,867 instructions # 3.18 insn per cycle + 7.970399932 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.385038e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.398761e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.398761e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.344667e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.358670e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.358670e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.229439 sec - 6,483,373,401 cycles # 2.904 GHz - 20,123,730,824 instructions # 3.10 insn per cycle - 2.233500667 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.239093 sec + 6,454,079,659 cycles # 2.879 GHz + 20,117,446,936 instructions # 3.12 insn per cycle + 2.242924767 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13751) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.668417e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.675217e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.675217e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.705593e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.712747e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.712747e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.991992 sec - 2,842,155,618 cycles # 2.856 GHz - 6,989,719,162 instructions # 2.46 insn per cycle - 0.996014634 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) +TOTAL : 0.967614 sec + 2,815,668,718 cycles # 2.901 GHz + 6,983,504,963 instructions # 2.48 insn per cycle + 0.971467670 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11856) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.918755e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.928461e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.928461e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.931679e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.941065e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.941065e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.863724 sec - 2,490,611,110 cycles # 2.873 GHz - 6,295,213,713 instructions # 2.53 insn per cycle - 0.867676126 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +TOTAL : 0.854674 sec + 2,481,889,358 cycles # 2.893 GHz + 6,287,664,762 instructions # 2.53 insn per cycle + 0.858561765 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10798) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.531685e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.537774e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.537774e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.547581e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.553526e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.553526e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.079892 sec - 2,051,094,812 cycles # 1.893 GHz - 3,265,110,091 instructions # 1.59 insn per cycle - 1.083901539 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) +TOTAL : 1.065766 sec + 2,044,093,407 cycles # 1.912 GHz + 3,257,736,990 instructions # 1.59 insn per cycle + 1.069778995 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 7804f4bf8a..9651ad3989 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:21:33 +DATE: 2023-11-03_14:11:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.349443e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.397501e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.402622e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.350971e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.403437e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.408842e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.459481 sec - 2,011,901,926 cycles # 3.015 GHz - 3,051,434,225 instructions # 1.52 insn per cycle - 0.726336676 seconds time elapsed +TOTAL : 0.461626 sec + 2,013,045,963 cycles # 3.001 GHz + 3,008,078,375 instructions # 1.49 insn per cycle + 0.728719546 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.555513e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.624649e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.627678e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.545870e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.618926e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.622082e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.740664 sec - 6,016,957,836 cycles # 3.061 GHz - 12,432,593,817 instructions # 2.07 insn per cycle - 2.022269033 seconds time elapsed +TOTAL : 1.752196 sec + 6,080,811,268 cycles # 3.076 GHz + 12,476,749,992 instructions # 2.05 insn per cycle + 2.034079474 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.062365e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.063397e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.063397e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.054774e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.055772e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.055772e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.960046 sec - 24,649,032,652 cycles # 3.095 GHz - 78,133,271,650 instructions # 3.17 insn per cycle - 7.964074741 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.986309 sec + 24,581,427,262 cycles # 3.077 GHz + 78,126,819,050 instructions # 3.18 insn per cycle + 7.990240161 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.451293e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.465184e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.465184e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.485156e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.498721e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.498721e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.208659 sec - 6,503,127,198 cycles # 2.941 GHz - 20,125,558,857 instructions # 3.09 insn per cycle - 2.212485154 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.195463 sec + 6,450,261,189 cycles # 2.934 GHz + 20,116,674,747 instructions # 3.12 insn per cycle + 2.199312699 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13751) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.699821e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.706920e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.706920e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.712568e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.719872e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.719872e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.972443 sec - 2,836,265,238 cycles # 2.907 GHz - 6,991,517,704 instructions # 2.47 insn per cycle - 0.976302090 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) +TOTAL : 0.962474 sec + 2,809,844,250 cycles # 2.910 GHz + 6,984,023,413 instructions # 2.49 insn per cycle + 0.966406204 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11856) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.768782e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.777060e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.777060e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.936291e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.945525e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.945525e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.935326 sec - 2,490,998,791 cycles # 2.653 GHz - 6,298,940,736 instructions # 2.53 insn per cycle - 0.939567745 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +TOTAL : 0.851856 sec + 2,477,933,241 cycles # 2.898 GHz + 6,291,229,332 instructions # 2.54 insn per cycle + 0.855761566 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10798) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.536870e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.542938e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.542938e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.558361e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.564161e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.564161e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.074951 sec - 2,049,510,603 cycles # 1.901 GHz - 3,269,159,659 instructions # 1.60 insn per cycle - 1.078898146 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) +TOTAL : 1.057206 sec + 2,039,611,088 cycles # 1.924 GHz + 3,261,377,179 instructions # 1.60 insn per cycle + 1.061196190 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 312baf5e91..a1f4be7db1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:18:22 +DATE: 2023-11-03_14:08:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.799496e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.394968e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.400157e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.773311e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.394019e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.402051e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.462459 sec - 2,017,920,392 cycles # 3.011 GHz - 2,978,442,542 instructions # 1.48 insn per cycle - 0.728973268 seconds time elapsed +TOTAL : 0.462737 sec + 2,025,500,525 cycles # 3.009 GHz + 3,064,784,530 instructions # 1.51 insn per cycle + 0.729843858 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -71,14 +71,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.510554e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.618370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.621351e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.500648e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.626485e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.629727e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.821247 sec - 6,276,478,242 cycles # 3.066 GHz - 12,339,707,675 instructions # 1.97 insn per cycle - 2.109895530 seconds time elapsed +TOTAL : 1.819392 sec + 6,275,186,955 cycles # 3.067 GHz + 12,611,714,187 instructions # 2.01 insn per cycle + 2.103369529 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -93,16 +93,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.069803e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.070843e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.070843e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.072408e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.073443e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.073443e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.931969 sec - 24,636,660,510 cycles # 3.105 GHz - 78,133,102,110 instructions # 3.17 insn per cycle - 7.935964631 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.918207 sec + 24,614,651,634 cycles # 3.108 GHz + 78,126,892,887 instructions # 3.17 insn per cycle + 7.922085250 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -120,16 +119,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.246085e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.259813e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.259813e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.512189e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.526482e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.526482e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.271476 sec - 6,476,732,405 cycles # 2.847 GHz - 20,124,333,621 instructions # 3.11 insn per cycle - 2.275698592 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.187884 sec + 6,446,988,066 cycles # 2.942 GHz + 20,116,530,852 instructions # 3.12 insn per cycle + 2.191848676 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13751) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -147,16 +145,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.697774e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.705196e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.705196e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.704770e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.712060e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.712060e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.974034 sec - 2,841,486,993 cycles # 2.907 GHz - 6,991,546,734 instructions # 2.46 insn per cycle - 0.977976633 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) +TOTAL : 0.966908 sec + 2,811,435,876 cycles # 2.898 GHz + 6,984,450,105 instructions # 2.48 insn per cycle + 0.970845648 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11856) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -174,16 +171,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.910241e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.919448e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.919448e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.943965e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.953637e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953637e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.866300 sec - 2,488,264,122 cycles # 2.862 GHz - 6,298,648,645 instructions # 2.53 insn per cycle - 0.870222768 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) +TOTAL : 0.848496 sec + 2,478,042,485 cycles # 2.910 GHz + 6,291,817,509 instructions # 2.54 insn per cycle + 0.852404179 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10798) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -201,16 +197,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.577066e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.582967e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.582967e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.552310e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.558116e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.558116e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.047804 sec - 2,047,024,012 cycles # 1.947 GHz - 3,268,367,464 instructions # 1.60 insn per cycle - 1.051771742 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) +TOTAL : 1.061241 sec + 2,039,380,020 cycles # 1.915 GHz + 3,261,243,710 instructions # 1.60 insn per cycle + 1.065343977 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 4efafebf91..4784426610 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_22:47:58 +DATE: 2023-11-03_13:19:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.359603e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.405982e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.411135e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.349532e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.401557e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.407213e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.477129 sec - 2,034,373,832 cycles # 2.961 GHz - 3,044,023,237 instructions # 1.50 insn per cycle - 0.745923336 seconds time elapsed +TOTAL : 0.477873 sec + 2,080,326,704 cycles # 2.999 GHz + 3,084,289,779 instructions # 1.48 insn per cycle + 0.773403482 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.581293e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.639577e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.642053e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.564463e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.638632e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.641780e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.709849 sec - 5,921,893,801 cycles # 3.061 GHz - 11,251,944,201 instructions # 1.90 insn per cycle - 1.991252364 seconds time elapsed +TOTAL : 1.725193 sec + 6,008,100,480 cycles # 3.075 GHz + 12,589,981,173 instructions # 2.10 insn per cycle + 2.013160084 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.061559e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.062588e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.062588e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.060333e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.061405e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.061405e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.963268 sec - 24,535,327,018 cycles # 3.080 GHz - 77,859,389,754 instructions # 3.17 insn per cycle - 7.967328124 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3113) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 7.964531 sec + 24,559,153,248 cycles # 3.082 GHz + 77,854,490,785 instructions # 3.17 insn per cycle + 7.970382825 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3100) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.641093e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.655983e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.655983e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.658932e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.673162e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.673162e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.154162 sec - 6,421,039,489 cycles # 2.976 GHz - 20,089,349,313 instructions # 3.13 insn per cycle - 2.158481887 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.146568 sec + 6,409,796,903 cycles # 2.981 GHz + 20,082,551,685 instructions # 3.13 insn per cycle + 2.161769898 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13440) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.660885e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.667784e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.667784e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.654966e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.661573e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.661573e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.994893 sec - 2,904,128,875 cycles # 2.909 GHz - 7,133,196,313 instructions # 2.46 insn per cycle - 0.998901924 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) +TOTAL : 0.996366 sec + 2,912,304,991 cycles # 2.914 GHz + 7,126,891,821 instructions # 2.45 insn per cycle + 1.009169071 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12243) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.791241e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.798983e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.798983e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.856846e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.865562e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.865562e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.923346 sec - 2,599,913,719 cycles # 2.805 GHz - 6,442,215,820 instructions # 2.48 insn per cycle - 0.927419565 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) +TOTAL : 0.888266 sec + 2,589,026,411 cycles # 2.902 GHz + 6,435,733,406 instructions # 2.49 insn per cycle + 0.899709352 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11252) (512y: 27) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.509030e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.514644e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.514644e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.491972e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.497264e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.497264e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.094451 sec - 2,118,917,788 cycles # 1.930 GHz - 3,430,381,413 instructions # 1.62 insn per cycle - 1.098405290 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2912) (512y: 22) (512z: 9647) +TOTAL : 1.105471 sec + 2,110,425,003 cycles # 1.905 GHz + 3,424,239,517 instructions # 1.62 insn per cycle + 1.116915330 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2888) (512y: 22) (512z: 9647) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index b668bec12a..814bd11723 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:04:46 +DATE: 2023-11-03_13:54:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.574079e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.613132e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.617511e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.591643e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.632313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.639138e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.482740 sec - 2,126,131,620 cycles # 3.020 GHz - 3,206,350,299 instructions # 1.51 insn per cycle - 0.764160349 seconds time elapsed +TOTAL : 0.486615 sec + 2,119,343,458 cycles # 2.997 GHz + 3,147,759,889 instructions # 1.49 insn per cycle + 0.768780543 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.745425e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.801933e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.804339e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.713328e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.773264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.775863e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.847491 sec - 6,452,122,354 cycles # 3.082 GHz - 13,948,621,951 instructions # 2.16 insn per cycle - 2.149801382 seconds time elapsed +TOTAL : 1.856016 sec + 6,397,119,813 cycles # 3.061 GHz + 12,981,241,131 instructions # 2.03 insn per cycle + 2.149194154 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.868653e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.869502e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.869502e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.908089e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.908939e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.908939e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.953627 sec - 86,115,472,235 cycles # 3.081 GHz - 135,575,076,729 instructions # 1.57 insn per cycle - 27.957670222 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15486) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 27.763472 sec + 85,788,771,362 cycles # 3.090 GHz + 135,558,353,260 instructions # 1.58 insn per cycle + 27.767517079 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15498) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -107,8 +106,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627534e-04 -Avg ME (F77/C++) = 6.6275340278065809E-004 -Relative difference = 4.195614963669944e-09 +Avg ME (F77/C++) = 6.6275342482202682E-004 +Relative difference = 3.74528849077935e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe -p 64 256 1 OMP= @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.206534e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.219355e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.219355e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.164817e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.177659e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.177659e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.283492 sec - 6,773,099,534 cycles # 2.962 GHz - 19,387,740,254 instructions # 2.86 insn per cycle - 2.287633055 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:69680) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.294031 sec + 6,780,048,690 cycles # 2.951 GHz + 19,379,838,021 instructions # 2.86 insn per cycle + 2.298104631 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:69668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.514138e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.519767e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.519767e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.501462e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.506925e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.506925e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.090954 sec - 3,177,223,779 cycles # 2.904 GHz - 6,808,757,079 instructions # 2.14 insn per cycle - 1.095061938 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) +TOTAL : 1.096967 sec + 3,164,197,145 cycles # 2.875 GHz + 6,801,481,175 instructions # 2.15 insn per cycle + 1.101068017 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:49059) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.811722e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.820106e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.820106e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.832862e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.841270e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.841270e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.913032 sec - 2,651,999,117 cycles # 2.894 GHz - 5,987,075,507 instructions # 2.26 insn per cycle - 0.916949136 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) +TOTAL : 0.899695 sec + 2,626,761,775 cycles # 2.909 GHz + 5,979,745,483 instructions # 2.28 insn per cycle + 0.903765130 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:42653) (512y: 11) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.538318e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.543956e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.543956e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.539328e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.545357e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.545357e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.073994 sec - 2,072,605,882 cycles # 1.924 GHz - 3,501,780,959 instructions # 1.69 insn per cycle - 1.077935198 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5198) (512y: 3) (512z:44822) +TOTAL : 1.070480 sec + 2,068,281,241 cycles # 1.927 GHz + 3,494,063,429 instructions # 1.69 insn per cycle + 1.074606344 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5174) (512y: 3) (512z:44822) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 5e744a93e3..20cfe4f0bf 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_23:05:38 +DATE: 2023-11-03_13:54:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.524936e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.563450e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.568376e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.574561e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.615562e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.620086e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.488640 sec - 2,025,619,146 cycles # 2.859 GHz - 3,037,506,653 instructions # 1.50 insn per cycle - 0.770645006 seconds time elapsed +TOTAL : 0.484423 sec + 2,095,804,311 cycles # 2.968 GHz + 3,116,236,953 instructions # 1.49 insn per cycle + 0.766353398 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.649797e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.705127e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.707537e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.686607e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.746725e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.749340e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.858266 sec - 6,431,342,673 cycles # 3.079 GHz - 13,514,328,149 instructions # 2.10 insn per cycle - 2.148737908 seconds time elapsed +TOTAL : 1.856488 sec + 6,359,088,314 cycles # 3.041 GHz + 12,734,066,872 instructions # 2.00 insn per cycle + 2.148830509 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.901255e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.902089e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.902089e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.814578e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.815423e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.815423e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.799387 sec - 86,050,210,121 cycles # 3.095 GHz - 135,904,121,070 instructions # 1.58 insn per cycle - 27.803438431 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15910) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 28.210026 sec + 86,898,226,581 cycles # 3.080 GHz + 136,168,004,846 instructions # 1.57 insn per cycle + 28.213953775 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15836) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -107,8 +106,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275352674967369E-004 -Relative difference = 4.0361421941458736e-08 +Avg ME (F77/C++) = 6.6275350387951654E-004 +Relative difference = 5.853634118441163e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe -p 64 256 1 OMP= @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.137168e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.149873e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.149873e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.146591e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.159401e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.159401e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.305713 sec - 6,851,279,837 cycles # 2.969 GHz - 19,440,056,931 instructions # 2.84 insn per cycle - 2.309801766 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:69722) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.299520 sec + 6,843,640,652 cycles # 2.972 GHz + 19,433,215,212 instructions # 2.84 insn per cycle + 2.303591381 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:69710) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.547310e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.553162e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.553162e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.552959e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.559002e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.559002e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.067714 sec - 3,107,929,943 cycles # 2.902 GHz - 6,719,698,155 instructions # 2.16 insn per cycle - 1.071722984 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) +TOTAL : 1.060874 sec + 3,099,042,655 cycles # 2.912 GHz + 6,712,227,285 instructions # 2.17 insn per cycle + 1.064787447 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:47649) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.843070e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.851764e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.851764e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.840652e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.849063e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.849063e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.897455 sec - 2,626,652,350 cycles # 2.916 GHz - 5,970,213,844 instructions # 2.27 insn per cycle - 0.901540279 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) +TOTAL : 0.895700 sec + 2,623,288,445 cycles # 2.918 GHz + 5,963,021,182 instructions # 2.27 insn per cycle + 0.899657048 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:41818) (512y: 13) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.539415e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.545162e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.545162e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.539209e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.545158e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.545158e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.073421 sec - 2,077,996,637 cycles # 1.930 GHz - 3,494,858,382 instructions # 1.68 insn per cycle - 1.077375599 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4162) (512y: 4) (512z:44465) +TOTAL : 1.070536 sec + 2,071,173,952 cycles # 1.929 GHz + 3,487,510,778 instructions # 1.68 insn per cycle + 1.074519012 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4138) (512y: 4) (512z:44465) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 2f0b8b31cf..694fd5e359 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_22:48:27 +DATE: 2023-11-03_13:20:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.492406e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.515496e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.517439e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.458705e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.486462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.488653e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.515757 sec - 2,237,825,305 cycles # 3.009 GHz - 3,541,071,069 instructions # 1.58 insn per cycle - 0.803705171 seconds time elapsed +TOTAL : 0.523722 sec + 2,255,939,501 cycles # 2.995 GHz + 3,509,263,775 instructions # 1.56 insn per cycle + 0.830704766 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.139310e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.165996e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.167100e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.121227e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.155141e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.156519e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.020287 sec - 10,063,525,082 cycles # 3.080 GHz - 22,322,079,468 instructions # 2.22 insn per cycle - 3.324599235 seconds time elapsed +TOTAL : 3.036705 sec + 10,115,435,049 cycles # 3.073 GHz + 22,061,768,809 instructions # 2.18 insn per cycle + 3.348414492 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.971138e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.972060e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.972060e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.961925e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.962832e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.962832e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.329783 sec - 25,914,783,719 cycles # 3.110 GHz - 79,442,464,075 instructions # 3.07 insn per cycle - 8.333954442 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4857) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.366261 sec + 25,827,115,509 cycles # 3.086 GHz + 79,439,243,960 instructions # 3.08 insn per cycle + 8.372513432 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4844) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.644383e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.647633e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.647633e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.707062e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.710365e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.710365e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.509094 sec - 12,959,080,361 cycles # 2.872 GHz - 38,555,729,789 instructions # 2.98 insn per cycle - 4.513261823 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13161) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.430067 sec + 12,714,024,789 cycles # 2.868 GHz + 38,548,672,066 instructions # 3.03 insn per cycle + 4.444871259 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13149) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.730078e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.748568e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.748568e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.776736e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.794746e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.794746e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.887755 sec - 5,512,242,363 cycles # 2.915 GHz - 13,486,515,249 instructions # 2.45 insn per cycle - 1.891845030 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) +TOTAL : 1.874756 sec + 5,484,082,227 cycles # 2.919 GHz + 13,477,398,777 instructions # 2.46 insn per cycle + 1.889668068 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11224) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.879764e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.903641e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.903641e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.828008e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.851423e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.851423e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.669158 sec - 4,872,534,032 cycles # 2.913 GHz - 12,139,368,739 instructions # 2.49 insn per cycle - 1.673328458 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) +TOTAL : 1.675570 sec + 4,865,852,507 cycles # 2.899 GHz + 12,134,094,396 instructions # 2.49 insn per cycle + 1.690145290 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10130) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.640128e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.654087e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.654087e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.679831e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.694420e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.694420e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.155921 sec - 4,141,154,094 cycles # 1.918 GHz - 6,338,702,815 instructions # 1.53 insn per cycle - 2.160133515 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1802) (512y: 93) (512z: 9358) +TOTAL : 2.141625 sec + 4,133,118,102 cycles # 1.926 GHz + 6,332,211,076 instructions # 1.53 insn per cycle + 2.156472073 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1778) (512y: 93) (512z: 9358) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index ccac2b3fbf..4d1d31ffb1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-10-30_22:49:04 +DATE: 2023-11-03_13:20:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.490192e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.514534e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.516400e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.475287e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.502927e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.505387e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.517992 sec - 2,239,749,066 cycles # 3.007 GHz - 3,465,742,535 instructions # 1.55 insn per cycle - 0.806009054 seconds time elapsed +TOTAL : 0.518126 sec + 2,276,729,172 cycles # 3.041 GHz + 3,488,397,186 instructions # 1.53 insn per cycle + 0.821168706 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.150690e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.177532e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.178659e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.127315e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.161363e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.162789e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.007631 sec - 10,038,232,454 cycles # 3.083 GHz - 21,636,576,131 instructions # 2.16 insn per cycle - 3.312819700 seconds time elapsed +TOTAL : 3.029051 sec + 10,093,082,805 cycles # 3.065 GHz + 22,762,372,486 instructions # 2.26 insn per cycle + 3.352739234 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.966819e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.967730e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.967730e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.946525e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.947456e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947456e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.347331 sec - 25,957,942,010 cycles # 3.109 GHz - 79,453,544,444 instructions # 3.06 insn per cycle - 8.351342416 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4504) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.432240 sec + 25,964,717,090 cycles # 3.078 GHz + 79,447,901,186 instructions # 3.06 insn per cycle + 8.438341900 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4491) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.615181e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.618274e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.618274e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.776995e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.780282e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.780282e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.545254 sec - 12,649,362,853 cycles # 2.781 GHz - 38,527,203,698 instructions # 3.05 insn per cycle - 4.549380271 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12928) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.348375 sec + 12,634,604,343 cycles # 2.903 GHz + 38,518,123,106 instructions # 3.05 insn per cycle + 4.360783870 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12916) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.633408e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.651576e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.651576e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.631631e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.648449e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.648449e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.908898 sec - 5,566,883,210 cycles # 2.911 GHz - 13,611,086,667 instructions # 2.45 insn per cycle - 1.912985527 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) +TOTAL : 1.906965 sec + 5,551,258,215 cycles # 2.907 GHz + 13,604,309,042 instructions # 2.45 insn per cycle + 1.921266748 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11309) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.821954e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.844810e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.844810e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.805918e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.828218e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.828218e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.678540 sec - 4,911,467,844 cycles # 2.920 GHz - 12,274,224,547 instructions # 2.50 insn per cycle - 1.682639558 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) +TOTAL : 1.678299 sec + 4,900,060,132 cycles # 2.913 GHz + 12,267,575,637 instructions # 2.50 insn per cycle + 1.690581680 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10119) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.669286e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.683741e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.683741e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.542959e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.556798e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.556798e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.147327 sec - 4,147,311,338 cycles # 1.929 GHz - 6,446,476,893 instructions # 1.55 insn per cycle - 2.151443442 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1627) (512y: 191) (512z: 9356) +TOTAL : 2.180419 sec + 4,152,247,379 cycles # 1.902 GHz + 6,439,813,764 instructions # 1.55 insn per cycle + 2.195336855 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1603) (512y: 191) (512z: 9356) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index dd384d9a9e..e2653e70e0 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-30_22:51:21 +DATE: 2023-11-03_13:23:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.071806e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.072188e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.072290e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.066555e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.066940e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.067040e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.415811 sec - 8,328,884,135 cycles # 3.048 GHz - 17,466,962,527 instructions # 2.10 insn per cycle - 2.791455375 seconds time elapsed +TOTAL : 2.448849 sec + 8,442,927,493 cycles # 3.084 GHz + 18,742,679,289 instructions # 2.22 insn per cycle + 2.849862735 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.218962e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.220743e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.220937e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.254183e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.256391e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.256597e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.995999 sec - 13,328,179,760 cycles # 3.087 GHz - 28,796,282,685 instructions # 2.16 insn per cycle - 4.374280178 seconds time elapsed +TOTAL : 3.999010 sec + 13,287,409,997 cycles # 3.074 GHz + 27,420,293,358 instructions # 2.06 insn per cycle + 4.379911015 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.508851e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.509092e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.509092e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.063391e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.063614e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.063614e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.219499 sec - 18,779,011,052 cycles # 3.018 GHz - 53,915,285,892 instructions # 2.87 insn per cycle - 6.223403710 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.548783 sec + 18,780,752,615 cycles # 2.869 GHz + 53,910,020,226 instructions # 2.87 insn per cycle + 6.554617751 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32434) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.681222e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.681310e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.681310e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.680442e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.680533e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.680533e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.146190 sec - 9,792,919,230 cycles # 3.110 GHz - 27,092,459,718 instructions # 2.77 insn per cycle - 3.150238199 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.146585 sec + 9,799,648,783 cycles # 3.113 GHz + 27,088,050,418 instructions # 2.76 insn per cycle + 3.159402614 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96429) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.616553e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.616985e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.616985e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.639391e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.639978e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.639978e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.464498 sec - 4,237,325,660 cycles # 2.888 GHz - 9,561,899,697 instructions # 2.26 insn per cycle - 1.468620001 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) +TOTAL : 1.456416 sec + 4,224,739,188 cycles # 2.899 GHz + 9,555,401,933 instructions # 2.26 insn per cycle + 1.469570493 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84372) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.153027e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.153572e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.153572e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.135680e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.136237e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.136237e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.277062 sec - 3,703,837,435 cycles # 2.893 GHz - 8,485,212,359 instructions # 2.29 insn per cycle - 1.280922003 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) +TOTAL : 1.282040 sec + 3,715,294,067 cycles # 2.898 GHz + 8,479,241,206 instructions # 2.28 insn per cycle + 1.291522613 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79967) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.762009e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.762555e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.762555e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.777236e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.777800e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.777800e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.409289 sec - 2,693,541,093 cycles # 1.907 GHz - 4,272,936,710 instructions # 1.59 insn per cycle - 1.413386706 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) +TOTAL : 1.404458 sec + 2,686,476,600 cycles # 1.914 GHz + 4,267,694,416 instructions # 1.59 insn per cycle + 1.415354808 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2260) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 639241e990..025b178cf6 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-30_23:13:48 +DATE: 2023-11-03_14:03:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.065386e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.066269e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.066269e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.065666e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.066634e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.066634e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.349881 sec - 8,241,377,872 cycles # 3.087 GHz - 18,573,308,608 instructions # 2.25 insn per cycle - 2.725675058 seconds time elapsed +TOTAL : 2.361861 sec + 8,209,630,411 cycles # 3.065 GHz + 18,786,762,328 instructions # 2.29 insn per cycle + 2.737224579 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.221612e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.252857e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.252857e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.225131e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.256858e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.256858e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.974706 sec - 13,200,930,977 cycles # 3.073 GHz - 30,240,222,320 instructions # 2.29 insn per cycle - 4.351773020 seconds time elapsed +TOTAL : 3.981554 sec + 13,248,550,244 cycles # 3.081 GHz + 30,399,077,328 instructions # 2.29 insn per cycle + 4.359768682 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,16 +103,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.291402e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.291627e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.291627e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.471249e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.471484e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.471484e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.376409 sec - 18,809,228,656 cycles # 2.949 GHz - 53,916,582,682 instructions # 2.87 insn per cycle - 6.380275769 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.236715 sec + 18,875,634,034 cycles # 3.025 GHz + 53,908,664,810 instructions # 2.86 insn per cycle + 6.240512438 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32434) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -131,16 +130,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.664099e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.664190e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.664190e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.670534e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.670629e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.670629e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.178127 sec - 9,832,061,157 cycles # 3.091 GHz - 27,094,448,733 instructions # 2.76 insn per cycle - 3.182138725 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.163057 sec + 9,804,224,210 cycles # 3.097 GHz + 27,086,274,445 instructions # 2.76 insn per cycle + 3.166923044 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96429) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,16 +157,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.637569e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.638054e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.638054e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.592879e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.593315e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.593315e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.457515 sec - 4,240,447,705 cycles # 2.904 GHz - 9,562,677,956 instructions # 2.26 insn per cycle - 1.461476328 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) +TOTAL : 1.471724 sec + 4,258,563,758 cycles # 2.887 GHz + 9,555,052,696 instructions # 2.24 insn per cycle + 1.475635999 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84372) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -187,16 +184,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.148572e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.149184e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.149184e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.109707e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.110252e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.110252e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.277569 sec - 3,716,396,189 cycles # 2.902 GHz - 8,486,430,712 instructions # 2.28 insn per cycle - 1.281428385 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) +TOTAL : 1.286979 sec + 3,729,066,198 cycles # 2.891 GHz + 8,478,886,169 instructions # 2.27 insn per cycle + 1.290608704 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79967) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -215,16 +211,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.751319e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.751878e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.751878e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.737669e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.738260e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.738260e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.412799 sec - 2,691,848,606 cycles # 1.901 GHz - 4,273,756,865 instructions # 1.59 insn per cycle - 1.416761861 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) +TOTAL : 1.415289 sec + 2,702,491,212 cycles # 1.905 GHz + 4,267,005,207 instructions # 1.58 insn per cycle + 1.419327297 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2260) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index bf7c77a5e4..d91b7a964f 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-30_22:52:24 +DATE: 2023-11-03_13:24:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.062263e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.062685e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.062819e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.064830e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.065205e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.065307e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.429730 sec - 8,238,728,563 cycles # 2.986 GHz - 17,116,797,941 instructions # 2.08 insn per cycle - 2.815386546 seconds time elapsed +TOTAL : 2.445677 sec + 8,430,603,165 cycles # 3.077 GHz + 17,728,518,966 instructions # 2.10 insn per cycle + 2.842706377 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.262982e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.264788e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.264964e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.276158e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.278341e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.278585e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.989275 sec - 13,288,251,530 cycles # 3.081 GHz - 31,068,913,236 instructions # 2.34 insn per cycle - 4.371305973 seconds time elapsed +TOTAL : 3.987255 sec + 13,257,486,465 cycles # 3.075 GHz + 30,585,814,740 instructions # 2.31 insn per cycle + 4.367674825 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.614917e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.615157e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.615157e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.272709e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.272950e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.272950e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.134510 sec - 18,886,125,284 cycles # 3.077 GHz - 53,924,616,535 instructions # 2.86 insn per cycle - 6.138383350 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32062) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.386470 sec + 18,959,780,309 cycles # 2.970 GHz + 53,918,897,900 instructions # 2.84 insn per cycle + 6.392426466 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32049) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.671266e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.671363e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.671363e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.689998e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.690092e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.690092e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.164494 sec - 9,802,153,603 cycles # 3.095 GHz - 27,090,486,636 instructions # 2.76 insn per cycle - 3.168549098 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96284) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.132253 sec + 9,720,938,501 cycles # 3.102 GHz + 27,083,782,808 instructions # 2.79 insn per cycle + 3.144238123 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96272) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.619789e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.620217e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.620217e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.616659e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.617157e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617157e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.463092 sec - 4,245,007,171 cycles # 2.895 GHz - 9,561,802,040 instructions # 2.25 insn per cycle - 1.467091762 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) +TOTAL : 1.465435 sec + 4,209,972,441 cycles # 2.873 GHz + 9,555,330,418 instructions # 2.27 insn per cycle + 1.479902065 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84460) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.158600e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.159171e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.159171e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.138076e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.138707e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.138707e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.274480 sec - 3,696,559,414 cycles # 2.893 GHz - 8,485,931,929 instructions # 2.30 insn per cycle - 1.278469617 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) +TOTAL : 1.279971 sec + 3,719,590,213 cycles # 2.903 GHz + 8,479,382,445 instructions # 2.28 insn per cycle + 1.294846019 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79990) (512y: 241) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.763810e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.764369e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.764369e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.717045e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.717595e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.717595e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.408262 sec - 2,689,637,134 cycles # 1.905 GHz - 4,276,228,536 instructions # 1.59 insn per cycle - 1.412254518 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) +TOTAL : 1.424665 sec + 2,692,562,922 cycles # 1.889 GHz + 4,270,528,609 instructions # 1.59 insn per cycle + 1.435537142 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2145) (512y: 187) (512z:79110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index a97a68dc6e..15538f78ca 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-30_22:53:26 +DATE: 2023-11-03_13:25:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.748686e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.749873e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.750128e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.767911e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.768744e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.768980e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.656102 sec - 5,926,019,855 cycles # 3.073 GHz - 12,736,616,385 instructions # 2.15 insn per cycle - 1.985610858 seconds time elapsed +TOTAL : 1.688375 sec + 5,951,848,603 cycles # 3.071 GHz + 11,790,908,447 instructions # 1.98 insn per cycle + 2.046537894 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.339808e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.340454e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.340531e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.379848e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.380728e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.380818e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.885166 sec - 6,654,335,499 cycles # 3.077 GHz - 13,084,980,952 instructions # 1.97 insn per cycle - 2.218919529 seconds time elapsed +TOTAL : 1.906118 sec + 6,749,188,338 cycles # 3.084 GHz + 13,265,647,719 instructions # 1.97 insn per cycle + 2.247715707 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.021345e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.021625e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.021625e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.985771e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.986041e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.986041e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.858145 sec - 17,881,884,830 cycles # 3.055 GHz - 53,592,678,992 instructions # 3.00 insn per cycle - 5.862065521 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.877729 sec + 17,890,232,313 cycles # 3.044 GHz + 53,582,796,261 instructions # 3.00 insn per cycle + 5.884006985 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20194) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.600099e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.600523e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.600523e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.576937e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.577363e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.577363e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.472157 sec - 4,568,508,892 cycles # 3.097 GHz - 13,762,283,879 instructions # 3.01 insn per cycle - 1.476097215 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.480853 sec + 4,574,156,529 cycles # 3.087 GHz + 13,756,406,040 instructions # 3.01 insn per cycle + 1.495727074 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96974) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.239872e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.241574e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.241574e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.282438e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.284225e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.284225e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.734175 sec - 2,134,092,407 cycles # 2.894 GHz - 4,816,927,075 instructions # 2.26 insn per cycle - 0.738107512 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) +TOTAL : 0.728895 sec + 2,121,768,847 cycles # 2.905 GHz + 4,810,848,757 instructions # 2.27 insn per cycle + 0.742146050 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84886) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.319564e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.321778e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.321778e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.310636e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.312960e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.312960e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.639518 sec - 1,869,432,943 cycles # 2.909 GHz - 4,274,112,596 instructions # 2.29 insn per cycle - 0.643443684 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) +TOTAL : 0.640831 sec + 1,856,094,515 cycles # 2.896 GHz + 4,267,918,984 instructions # 2.30 insn per cycle + 0.656015079 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80586) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.529460e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.531730e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.531730e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.495650e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.497949e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.497949e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.706303 sec - 1,352,907,244 cycles # 1.906 GHz - 2,158,709,001 instructions # 1.60 insn per cycle - 0.710217457 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) +TOTAL : 0.709649 sec + 1,349,486,163 cycles # 1.899 GHz + 2,152,592,889 instructions # 1.60 insn per cycle + 0.722089483 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2854) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 70e303847d..7a872855a6 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-30_23:14:51 +DATE: 2023-11-03_14:04:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.793897e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.795846e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.795846e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.809501e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.811290e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.811290e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.608399 sec - 5,800,830,027 cycles # 3.073 GHz - 12,113,811,498 instructions # 2.09 insn per cycle - 1.943977837 seconds time elapsed +TOTAL : 1.592651 sec + 5,720,568,208 cycles # 3.068 GHz + 12,197,897,724 instructions # 2.13 insn per cycle + 1.923051082 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.322809e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.335242e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.335242e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.321289e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.334233e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.334233e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.875097 sec - 6,569,119,147 cycles # 3.057 GHz - 13,956,037,742 instructions # 2.12 insn per cycle - 2.205562052 seconds time elapsed +TOTAL : 1.869715 sec + 6,568,388,726 cycles # 3.060 GHz + 14,401,992,310 instructions # 2.19 insn per cycle + 2.203701933 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,16 +103,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.135151e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.135458e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.135458e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.727810e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.728089e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.728089e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.788302 sec - 17,919,872,006 cycles # 3.094 GHz - 53,589,483,238 instructions # 2.99 insn per cycle - 5.792489017 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.057945 sec + 17,910,983,389 cycles # 2.955 GHz + 53,583,204,369 instructions # 2.99 insn per cycle + 6.061887031 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20194) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -131,16 +130,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.596719e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.597153e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.597153e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.402700e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.403121e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.403121e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.472955 sec - 4,563,554,686 cycles # 3.091 GHz - 13,763,441,312 instructions # 3.02 insn per cycle - 1.476926496 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.554122 sec + 4,576,472,109 cycles # 2.938 GHz + 13,756,199,358 instructions # 3.01 insn per cycle + 1.558315371 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96974) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,16 +157,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.761763e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.763329e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.763329e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.189371e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.191147e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.191147e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.785466 sec - 2,137,347,048 cycles # 2.710 GHz - 4,817,859,383 instructions # 2.25 insn per cycle - 0.789305461 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) +TOTAL : 0.736358 sec + 2,124,251,465 cycles # 2.873 GHz + 4,810,297,342 instructions # 2.26 insn per cycle + 0.740176225 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84886) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -187,16 +184,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.207081e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.209309e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.209309e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.312133e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.314635e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.314635e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.648749 sec - 1,882,456,319 cycles # 2.887 GHz - 4,274,769,656 instructions # 2.27 insn per cycle - 0.652761200 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) +TOTAL : 0.637308 sec + 1,855,922,863 cycles # 2.897 GHz + 4,267,475,480 instructions # 2.30 insn per cycle + 0.641166139 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80586) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -215,16 +211,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.539970e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.542209e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.542209e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.529576e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.531852e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.531852e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.705380 sec - 1,353,054,715 cycles # 1.910 GHz - 2,159,449,357 instructions # 1.60 insn per cycle - 0.709198096 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) +TOTAL : 0.703798 sec + 1,344,909,722 cycles # 1.902 GHz + 2,152,113,875 instructions # 1.60 insn per cycle + 0.707777505 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2854) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index e7f54d6d2b..4090b5123c 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-30_22:54:13 +DATE: 2023-11-03_13:26:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.770874e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.771789e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.772038e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.762192e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.763017e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.763276e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.647024 sec - 5,887,181,959 cycles # 3.065 GHz - 12,015,496,021 instructions # 2.04 insn per cycle - 1.977473721 seconds time elapsed +TOTAL : 1.686323 sec + 5,942,489,542 cycles # 3.065 GHz + 11,726,530,507 instructions # 1.97 insn per cycle + 2.047701806 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.320055e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.320688e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.320771e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.338330e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.339105e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.339234e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.902397 sec - 6,695,503,483 cycles # 3.076 GHz - 14,006,039,580 instructions # 2.09 insn per cycle - 2.233097385 seconds time elapsed +TOTAL : 1.912323 sec + 6,739,286,561 cycles # 3.079 GHz + 13,579,863,067 instructions # 2.02 insn per cycle + 2.245726170 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.023182e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.023466e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.023466e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.113073e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.113355e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.113355e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.854767 sec - 17,892,204,982 cycles # 3.054 GHz - 53,579,913,182 instructions # 2.99 insn per cycle - 5.858666128 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20206) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.799115 sec + 17,812,397,434 cycles # 3.072 GHz + 53,573,245,900 instructions # 3.01 insn per cycle + 5.805588787 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20193) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.571699e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.572125e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.572125e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.587545e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.588072e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.588072e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.483000 sec - 4,554,187,119 cycles # 3.064 GHz - 13,755,949,994 instructions # 3.02 insn per cycle - 1.487135379 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.475125 sec + 4,554,309,272 cycles # 3.084 GHz + 13,749,743,854 instructions # 3.02 insn per cycle + 1.487334628 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96594) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.181275e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.183090e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.183090e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.245882e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.247655e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.247655e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.740225 sec - 2,148,260,465 cycles # 2.889 GHz - 4,819,022,606 instructions # 2.24 insn per cycle - 0.744129170 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) +TOTAL : 0.734261 sec + 2,124,964,575 cycles # 2.896 GHz + 4,812,728,309 instructions # 2.26 insn per cycle + 0.749171337 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85341) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.261554e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.263763e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.263763e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.153742e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.155987e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.155987e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.643969 sec - 1,875,691,757 cycles # 2.898 GHz - 4,276,314,641 instructions # 2.28 insn per cycle - 0.647880426 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) +TOTAL : 0.653340 sec + 1,875,053,062 cycles # 2.865 GHz + 4,269,884,463 instructions # 2.28 insn per cycle + 0.663528756 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:81051) (512y: 26) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.552883e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.555127e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.555127e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.574090e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.576360e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.576360e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.703972 sec - 1,355,506,444 cycles # 1.916 GHz - 2,165,111,812 instructions # 1.60 insn per cycle - 0.707801876 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3475) (512y: 34) (512z:79492) +TOTAL : 0.703514 sec + 1,352,429,724 cycles # 1.923 GHz + 2,159,038,912 instructions # 1.60 insn per cycle + 0.718100362 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3451) (512y: 34) (512z:79492) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 69178ef9b8..b8deba13b8 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-30_22:54:59 +DATE: 2023-11-03_13:27:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.693924e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.694449e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.694675e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.692075e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.692589e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.692738e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.167248 sec - 7,509,767,168 cycles # 3.023 GHz - 16,389,976,098 instructions # 2.18 insn per cycle - 2.544503385 seconds time elapsed +TOTAL : 2.169362 sec + 7,653,818,833 cycles # 3.074 GHz + 16,944,539,564 instructions # 2.21 insn per cycle + 2.552070102 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.115466e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.115725e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.115755e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.115093e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.115408e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.115436e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.395163 sec - 11,404,310,500 cycles # 3.070 GHz - 26,402,744,024 instructions # 2.32 insn per cycle - 3.773859216 seconds time elapsed +TOTAL : 3.394126 sec + 11,511,876,452 cycles # 3.097 GHz + 26,536,909,196 instructions # 2.31 insn per cycle + 3.774250196 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.989120e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.989332e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.989332e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.974923e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.975145e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.975145e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.646182 sec - 19,129,776,834 cycles # 2.885 GHz - 54,153,273,486 instructions # 2.83 insn per cycle - 6.650112221 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32066) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.621112 sec + 19,074,935,811 cycles # 2.880 GHz + 54,145,373,257 instructions # 2.84 insn per cycle + 6.624983367 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32053) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.628095e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.628183e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.628183e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.658232e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.658328e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.658328e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.248782 sec - 9,415,772,907 cycles # 2.896 GHz - 26,160,926,400 instructions # 2.78 insn per cycle - 3.252909826 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96005) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.185966 sec + 9,298,086,311 cycles # 2.915 GHz + 26,151,974,165 instructions # 2.81 insn per cycle + 3.193226772 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:95993) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.801898e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.802416e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.802416e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.761899e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.762352e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.762352e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.394669 sec - 4,044,411,295 cycles # 2.895 GHz - 9,227,963,992 instructions # 2.28 insn per cycle - 1.398693419 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) +TOTAL : 1.405505 sec + 4,083,897,105 cycles # 2.898 GHz + 9,220,939,721 instructions # 2.26 insn per cycle + 1.413149403 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84137) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.399382e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.400021e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.400021e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.349536e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.350160e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.350160e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.205178 sec - 3,515,102,818 cycles # 2.909 GHz - 8,175,049,106 instructions # 2.33 insn per cycle - 1.209109436 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) +TOTAL : 1.215980 sec + 3,534,568,745 cycles # 2.898 GHz + 8,168,436,140 instructions # 2.31 insn per cycle + 1.225628391 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79820) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.827743e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.828372e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.828372e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.882490e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.883154e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.883154e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.384871 sec - 2,648,995,532 cycles # 1.909 GHz - 4,154,654,547 instructions # 1.57 insn per cycle - 1.388825055 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2045) (512y: 93) (512z:78760) +TOTAL : 1.362278 sec + 2,610,656,560 cycles # 1.911 GHz + 4,147,890,063 instructions # 1.59 insn per cycle + 1.372539515 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2021) (512y: 93) (512z:78760) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index de63defaec..60f387590d 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-10-30_22:56:00 +DATE: 2023-11-03_13:28:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.674499e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.675054e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.675214e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.683617e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.684135e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.684388e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.173682 sec - 7,622,451,046 cycles # 3.058 GHz - 15,269,915,811 instructions # 2.00 insn per cycle - 2.549506521 seconds time elapsed +TOTAL : 2.172775 sec + 7,425,569,558 cycles # 2.989 GHz + 15,063,628,728 instructions # 2.03 insn per cycle + 2.549783467 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.113208e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.113470e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.113496e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.112713e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.113034e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113072e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.390779 sec - 11,439,695,311 cycles # 3.082 GHz - 23,466,887,699 instructions # 2.05 insn per cycle - 3.768718600 seconds time elapsed +TOTAL : 3.397376 sec + 11,478,639,054 cycles # 3.085 GHz + 26,660,080,251 instructions # 2.32 insn per cycle + 3.777534329 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.476067e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.476300e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.476300e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.976554e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.976766e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.976766e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.235077 sec - 19,078,518,974 cycles # 3.059 GHz - 54,152,325,581 instructions # 2.84 insn per cycle - 6.238938674 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32243) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.619732 sec + 19,255,909,015 cycles # 2.908 GHz + 54,146,217,870 instructions # 2.81 insn per cycle + 6.623630619 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32230) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.635029e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.635114e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.635114e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.632685e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.632772e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.632772e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.234145 sec - 9,372,109,067 cycles # 2.895 GHz - 26,079,565,451 instructions # 2.78 insn per cycle - 3.238069698 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:95899) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.236276 sec + 9,400,746,844 cycles # 2.902 GHz + 26,071,648,675 instructions # 2.77 insn per cycle + 3.240211773 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:95887) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.777387e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.777857e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.777857e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.805141e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.805613e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.805613e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.403764 sec - 4,069,542,076 cycles # 2.892 GHz - 9,213,514,277 instructions # 2.26 insn per cycle - 1.407702679 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) +TOTAL : 1.389557 sec + 4,052,419,327 cycles # 2.909 GHz + 9,205,965,348 instructions # 2.27 insn per cycle + 1.393548917 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83758) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.341206e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.341838e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.341838e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.398621e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.399244e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.399244e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.221671 sec - 3,533,596,743 cycles # 2.884 GHz - 8,168,189,541 instructions # 2.31 insn per cycle - 1.225733749 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) +TOTAL : 1.202525 sec + 3,498,191,184 cycles # 2.901 GHz + 8,160,684,568 instructions # 2.33 insn per cycle + 1.206410282 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79349) (512y: 229) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.857360e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.857975e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.857975e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.825276e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.825930e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.825930e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.374279 sec - 2,619,392,465 cycles # 1.902 GHz - 4,154,053,123 instructions # 1.59 insn per cycle - 1.378252659 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1492) (512y: 175) (512z:78776) +TOTAL : 1.382476 sec + 2,614,942,717 cycles # 1.887 GHz + 4,146,069,189 instructions # 1.59 insn per cycle + 1.386689646 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1468) (512y: 175) (512z:78776) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 13e7273074..a2058aa09b 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-30_22:49:40 +DATE: 2023-11-03_13:21:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.973651e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.318745e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.639621e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.677221e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.325089e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.707683e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.439457 sec - 1,962,130,491 cycles # 3.016 GHz - 2,773,909,314 instructions # 1.41 insn per cycle - 0.708454238 seconds time elapsed +TOTAL : 0.444363 sec + 2,018,559,464 cycles # 3.017 GHz + 2,824,765,054 instructions # 1.40 insn per cycle + 0.739050843 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.708042e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.153949e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.486889e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.257167e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.087055e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.516826e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.515792 sec - 2,268,451,135 cycles # 3.042 GHz - 3,263,513,511 instructions # 1.44 insn per cycle - 0.802925124 seconds time elapsed +TOTAL : 0.526934 sec + 2,296,852,180 cycles # 3.002 GHz + 3,272,393,278 instructions # 1.42 insn per cycle + 0.822447233 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.107249e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.130516e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.130516e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.098598e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.121368e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.121368e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.501091 sec - 4,691,660,297 cycles # 3.119 GHz - 13,466,853,090 instructions # 2.87 insn per cycle - 1.505086869 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.509656 sec + 4,704,275,482 cycles # 3.109 GHz + 13,460,137,213 instructions # 2.86 insn per cycle + 1.516168697 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 847) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.999256e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.074586e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.074586e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.982331e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.057802e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.057802e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.840405 sec - 2,622,747,297 cycles # 3.108 GHz - 7,555,461,285 instructions # 2.88 insn per cycle - 0.844567020 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.844686 sec + 2,623,720,298 cycles # 3.091 GHz + 7,549,133,073 instructions # 2.88 insn per cycle + 0.858775496 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3083) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.426098e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.657593e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.657593e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.403395e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.625866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.625866e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.499544 sec - 1,475,119,745 cycles # 2.933 GHz - 3,121,837,620 instructions # 2.12 insn per cycle - 0.503611366 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) +TOTAL : 0.499871 sec + 1,470,509,842 cycles # 2.919 GHz + 3,115,984,743 instructions # 2.12 insn per cycle + 0.512575147 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2899) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.782086e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.058198e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.058198e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.518653e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.770602e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.770602e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.454992 sec - 1,341,924,139 cycles # 2.929 GHz - 2,984,324,634 instructions # 2.22 insn per cycle - 0.459135591 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) +TOTAL : 0.484813 sec + 1,332,633,375 cycles # 2.727 GHz + 2,977,844,779 instructions # 2.23 insn per cycle + 0.499587088 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.589079e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.713488e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.713488e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.570932e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.696607e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.696607e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.655941 sec - 1,323,693,549 cycles # 2.008 GHz - 1,955,727,668 instructions # 1.48 insn per cycle - 0.659915394 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) +TOTAL : 0.657932 sec + 1,319,186,002 cycles # 1.993 GHz + 1,949,358,587 instructions # 1.48 insn per cycle + 0.672712115 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index f866b77efa..910b708052 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-30_23:12:09 +DATE: 2023-11-03_14:01:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.730145e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.272081e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.272081e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.664243e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.145947e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.145947e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.466796 sec - 1,998,477,799 cycles # 2.941 GHz - 2,930,077,505 instructions # 1.47 insn per cycle - 0.737311250 seconds time elapsed +TOTAL : 0.469985 sec + 2,040,349,232 cycles # 3.003 GHz + 3,004,500,181 instructions # 1.47 insn per cycle + 0.738746623 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.395450e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.390061e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.390061e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.301893e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.269698e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.269698e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.731935 sec - 2,979,024,996 cycles # 3.040 GHz - 4,603,881,403 instructions # 1.55 insn per cycle - 1.037777142 seconds time elapsed +TOTAL : 0.746650 sec + 3,012,108,544 cycles # 3.028 GHz + 4,567,300,950 instructions # 1.52 insn per cycle + 1.052502726 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,16 +103,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.102555e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.125269e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.125269e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.084702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.107837e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.107837e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.513100 sec - 4,722,087,835 cycles # 3.114 GHz - 13,474,075,896 instructions # 2.85 insn per cycle - 1.517291651 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.535237 sec + 4,735,135,324 cycles # 3.077 GHz + 13,467,171,096 instructions # 2.84 insn per cycle + 1.539513895 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 847) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -131,16 +130,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.980738e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.057162e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.057162e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.980959e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.055590e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.055590e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.855659 sec - 2,660,648,265 cycles # 3.096 GHz - 7,605,299,110 instructions # 2.86 insn per cycle - 0.860039908 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.851887 sec + 2,650,539,837 cycles # 3.099 GHz + 7,597,895,175 instructions # 2.87 insn per cycle + 0.856130482 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3083) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,16 +157,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.391170e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.611775e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.611775e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.369342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.589036e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.589036e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.511637 sec - 1,512,122,172 cycles # 2.935 GHz - 3,172,695,983 instructions # 2.10 insn per cycle - 0.515934668 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) +TOTAL : 0.510869 sec + 1,501,262,258 cycles # 2.918 GHz + 3,163,821,980 instructions # 2.11 insn per cycle + 0.515082973 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2899) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -187,16 +184,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.745688e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.016672e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.016672e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.718302e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.984568e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.984568e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.464905 sec - 1,371,264,682 cycles # 2.926 GHz - 3,033,027,472 instructions # 2.21 insn per cycle - 0.469116581 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) +TOTAL : 0.466300 sec + 1,367,345,452 cycles # 2.909 GHz + 3,028,055,122 instructions # 2.21 insn per cycle + 0.470678204 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -215,16 +211,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.565004e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.691739e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.691739e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.563950e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.686880e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.686880e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.668760 sec - 1,356,402,541 cycles # 2.017 GHz - 1,995,346,777 instructions # 1.47 insn per cycle - 0.673109753 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) +TOTAL : 0.666406 sec + 1,352,029,392 cycles # 2.018 GHz + 1,988,030,302 instructions # 1.47 insn per cycle + 0.670887553 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 08d1ff56a8..1d4e53af3a 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-30_22:49:57 +DATE: 2023-11-03_13:21:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.951461e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.258664e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.565594e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.650966e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.233304e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.593275e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.441527 sec - 1,956,451,794 cycles # 2.998 GHz - 2,789,819,750 instructions # 1.43 insn per cycle - 0.709902590 seconds time elapsed +TOTAL : 0.442305 sec + 1,990,347,518 cycles # 3.016 GHz + 2,813,021,989 instructions # 1.41 insn per cycle + 0.727558891 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.677767e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.046823e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.369504e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.224080e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.963239e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.366729e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.517612 sec - 2,231,335,512 cycles # 2.987 GHz - 3,256,861,084 instructions # 1.46 insn per cycle - 0.805480224 seconds time elapsed +TOTAL : 0.525687 sec + 2,309,475,640 cycles # 3.027 GHz + 3,304,899,924 instructions # 1.43 insn per cycle + 0.820449609 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.097921e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.120832e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.120832e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.099687e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.122691e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.122691e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.513492 sec - 4,715,532,801 cycles # 3.109 GHz - 13,460,856,046 instructions # 2.85 insn per cycle - 1.517628249 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.508019 sec + 4,696,287,050 cycles # 3.107 GHz + 13,454,191,338 instructions # 2.86 insn per cycle + 1.514176499 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 836) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.989508e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.064579e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.064579e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.005583e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.080670e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.080670e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.843927 sec - 2,623,248,024 cycles # 3.096 GHz - 7,554,408,256 instructions # 2.88 insn per cycle - 0.847974886 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3088) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.835044 sec + 2,617,103,039 cycles # 3.120 GHz + 7,548,121,019 instructions # 2.88 insn per cycle + 0.847366392 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3076) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.379456e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.600576e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.600576e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.376685e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.599716e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.599716e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.505877 sec - 1,477,736,959 cycles # 2.902 GHz - 3,120,500,586 instructions # 2.11 insn per cycle - 0.510031020 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) +TOTAL : 0.503836 sec + 1,474,536,973 cycles # 2.904 GHz + 3,114,257,750 instructions # 2.11 insn per cycle + 0.515034850 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2882) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.707124e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.978219e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.978219e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.729039e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.996518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.996518e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.463655 sec - 1,341,577,851 cycles # 2.872 GHz - 2,981,136,774 instructions # 2.22 insn per cycle - 0.467697604 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) +TOTAL : 0.457525 sec + 1,334,713,858 cycles # 2.891 GHz + 2,975,109,033 instructions # 2.23 insn per cycle + 0.470624542 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2646) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.548823e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.673906e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.673906e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.554823e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.679178e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.679178e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.666039 sec - 1,325,973,811 cycles # 1.981 GHz - 1,954,047,154 instructions # 1.47 insn per cycle - 0.670126568 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) +TOTAL : 0.661512 sec + 1,318,722,950 cycles # 1.981 GHz + 1,947,473,030 instructions # 1.48 insn per cycle + 0.674217858 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1324) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 02220fa6e1..18fde06e89 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-30_22:50:15 +DATE: 2023-11-03_13:22:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.967331e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.214482e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.337385e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.368545e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.195532e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.338037e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.434980 sec - 1,942,062,183 cycles # 3.007 GHz - 2,746,890,880 instructions # 1.41 insn per cycle - 0.703360888 seconds time elapsed +TOTAL : 0.439653 sec + 1,963,073,365 cycles # 3.010 GHz + 2,782,162,803 instructions # 1.42 insn per cycle + 0.724656523 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.292022e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.833149e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.949301e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.233506e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.807053e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953262e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.470018 sec - 2,086,659,072 cycles # 3.025 GHz - 2,994,580,206 instructions # 1.44 insn per cycle - 0.748012233 seconds time elapsed +TOTAL : 0.474831 sec + 2,104,200,925 cycles # 3.006 GHz + 2,984,209,799 instructions # 1.42 insn per cycle + 0.758654985 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.161070e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.187807e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.187807e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.156001e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181933e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.181933e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.430744 sec - 4,454,323,954 cycles # 3.106 GHz - 13,052,174,805 instructions # 2.93 insn per cycle - 1.434671601 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.434457 sec + 4,448,159,929 cycles # 3.093 GHz + 13,045,617,942 instructions # 2.93 insn per cycle + 1.440492141 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.903986e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.090947e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.090947e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.114863e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.313630e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.313630e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.584766 sec - 1,709,460,469 cycles # 2.906 GHz - 4,515,259,006 instructions # 2.64 insn per cycle - 0.588806620 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.542348 sec + 1,695,415,281 cycles # 3.104 GHz + 4,508,974,040 instructions # 2.66 insn per cycle + 0.553837720 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.106741e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.879293e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.879293e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.108059e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.866586e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.866586e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.287960 sec - 851,659,590 cycles # 2.922 GHz - 1,898,593,134 instructions # 2.23 insn per cycle - 0.292038777 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) +TOTAL : 0.285252 sec + 845,650,618 cycles # 2.925 GHz + 1,891,849,456 instructions # 2.24 insn per cycle + 0.297356991 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3473) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.596776e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.492603e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.492603e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.553055e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.452732e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.452732e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.268043 sec - 798,185,067 cycles # 2.940 GHz - 1,821,244,750 instructions # 2.28 insn per cycle - 0.272109561 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) +TOTAL : 0.266707 sec + 792,995,173 cycles # 2.930 GHz + 1,814,974,440 instructions # 2.29 insn per cycle + 0.278822798 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3311) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,10 +190,10 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 27,701,954 cycles # 2.643 GHz - 41,567,168 instructions # 1.50 insn per cycle - 0.010909835 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) + 29,780,103 cycles # 2.640 GHz + 42,180,426 instructions # 1.42 insn per cycle + 0.018151168 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1944) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index b48a6fcec3..ca70adfc3f 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-30_23:12:26 +DATE: 2023-11-03_14:01:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.414946e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.217784e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.217784e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.690473e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.051445e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.051445e+07 ) sec^-1 MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 -TOTAL : 0.452341 sec - 1,900,339,500 cycles # 2.864 GHz - 2,799,669,730 instructions # 1.47 insn per cycle - 0.721098960 seconds time elapsed +TOTAL : 0.447544 sec + 1,995,094,462 cycles # 3.028 GHz + 2,944,796,314 instructions # 1.48 insn per cycle + 0.716219472 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.349106e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.876504e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.876504e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.203183e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.581207e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.581207e+07 ) sec^-1 MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 -TOTAL : 0.608431 sec - 2,571,734,972 cycles # 3.036 GHz - 3,893,707,345 instructions # 1.51 insn per cycle - 0.906254720 seconds time elapsed +TOTAL : 0.615303 sec + 2,561,439,318 cycles # 3.029 GHz + 3,834,138,920 instructions # 1.50 insn per cycle + 0.903022085 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,16 +103,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.163465e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.189480e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.189480e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.159807e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.185784e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185784e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.430810 sec - 4,466,454,752 cycles # 3.114 GHz - 13,056,490,171 instructions # 2.92 insn per cycle - 1.434790628 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.432391 sec + 4,462,709,360 cycles # 3.108 GHz + 13,049,905,565 instructions # 2.92 insn per cycle + 1.436417435 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -131,16 +130,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.094784e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.288488e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.288488e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.079770e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.276056e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.276056e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.552738 sec - 1,719,469,732 cycles # 3.091 GHz - 4,563,277,713 instructions # 2.65 insn per cycle - 0.556970147 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.552836 sec + 1,714,551,062 cycles # 3.082 GHz + 4,556,234,091 instructions # 2.66 insn per cycle + 0.556965782 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -159,16 +157,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.070520e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.859545e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.859545e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.072468e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.823460e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.823460e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.293554 sec - 871,649,791 cycles # 2.936 GHz - 1,935,490,228 instructions # 2.22 insn per cycle - 0.297628852 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) +TOTAL : 0.290519 sec + 863,549,988 cycles # 2.936 GHz + 1,927,930,259 instructions # 2.23 insn per cycle + 0.294678952 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3473) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -187,16 +184,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.465694e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.353147e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.353147e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.461286e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.336369e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.336369e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.277041 sec - 819,154,003 cycles # 2.920 GHz - 1,858,008,427 instructions # 2.27 insn per cycle - 0.281050664 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) +TOTAL : 0.274464 sec + 811,968,530 cycles # 2.921 GHz + 1,851,193,986 instructions # 2.28 insn per cycle + 0.278710334 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3311) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -211,10 +207,10 @@ OK (relative difference <= 5E-3) runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) - 36,661,274 cycles # 2.764 GHz - 50,499,779 instructions # 1.38 insn per cycle - 0.013630840 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) + 37,821,224 cycles # 2.670 GHz + 49,845,872 instructions # 1.32 insn per cycle + 0.014789496 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1944) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index ee4cf36aec..acc7a9c5e0 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-30_22:50:31 +DATE: 2023-11-03_13:22:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.971727e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.217793e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.336731e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.321585e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.194004e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.323765e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.433842 sec - 1,909,081,900 cycles # 2.967 GHz - 2,630,455,270 instructions # 1.38 insn per cycle - 0.700771922 seconds time elapsed +TOTAL : 0.436518 sec + 1,946,354,316 cycles # 2.996 GHz + 2,660,085,025 instructions # 1.37 insn per cycle + 0.724399364 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.242783e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.807399e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.919355e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.186399e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.779449e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.920598e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.468249 sec - 2,069,885,834 cycles # 3.011 GHz - 2,962,006,936 instructions # 1.43 insn per cycle - 0.745153246 seconds time elapsed +TOTAL : 0.472348 sec + 2,134,571,311 cycles # 3.027 GHz + 2,989,315,923 instructions # 1.40 insn per cycle + 0.762610695 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.163734e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.189710e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.189710e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.141093e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.167011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.167011e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.427125 sec - 4,448,877,888 cycles # 3.110 GHz - 13,033,054,677 instructions # 2.93 insn per cycle - 1.431045061 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.453735 sec + 4,446,221,332 cycles # 3.051 GHz + 13,027,008,274 instructions # 2.93 insn per cycle + 1.460116009 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 714) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.130048e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.328370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.328370e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.141538e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.343407e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.343407e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.542235 sec - 1,688,384,240 cycles # 3.096 GHz - 4,511,025,762 instructions # 2.67 insn per cycle - 0.546181384 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.537937 sec + 1,685,741,136 cycles # 3.112 GHz + 4,505,064,132 instructions # 2.67 insn per cycle + 0.549338887 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3577) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.112726e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.884376e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.884376e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.112669e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.888447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.888447e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.287485 sec - 851,316,568 cycles # 2.926 GHz - 1,895,161,178 instructions # 2.23 insn per cycle - 0.291541754 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) +TOTAL : 0.284460 sec + 844,730,834 cycles # 2.930 GHz + 1,888,937,622 instructions # 2.24 insn per cycle + 0.297127714 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.539172e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.428313e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.428313e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.532166e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.412841e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.412841e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.270331 sec - 799,518,850 cycles # 2.921 GHz - 1,817,399,076 instructions # 2.27 insn per cycle - 0.274476762 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) +TOTAL : 0.267501 sec + 794,418,594 cycles # 2.927 GHz + 1,810,991,128 instructions # 2.28 insn per cycle + 0.281613083 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3274) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,10 +190,10 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 28,310,705 cycles # 2.739 GHz - 40,898,386 instructions # 1.44 insn per cycle - 0.010855318 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1932) (512y: 32) (512z: 2383) + 29,224,854 cycles # 2.706 GHz + 41,299,770 instructions # 1.41 insn per cycle + 0.022176168 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1907) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 1050b324b7..90cb55044b 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-30_22:50:46 +DATE: 2023-11-03_13:22:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.987462e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.350900e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.659598e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.665308e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.271925e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.627429e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.438127 sec - 1,961,591,121 cycles # 3.019 GHz - 2,745,718,346 instructions # 1.40 insn per cycle - 0.707121708 seconds time elapsed +TOTAL : 0.447989 sec + 1,974,780,319 cycles # 2.966 GHz + 2,819,670,600 instructions # 1.43 insn per cycle + 0.744502455 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.710817e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.169634e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.502781e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.254771e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.104270e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.522137e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.513425 sec - 2,258,016,655 cycles # 3.040 GHz - 3,240,568,814 instructions # 1.44 insn per cycle - 0.800127554 seconds time elapsed +TOTAL : 0.528549 sec + 2,299,640,622 cycles # 3.005 GHz + 3,256,375,279 instructions # 1.42 insn per cycle + 0.822763207 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.094539e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.117012e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.117012e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.084952e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.107624e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.107624e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.518561 sec - 4,720,608,486 cycles # 3.102 GHz - 13,469,279,631 instructions # 2.85 insn per cycle - 1.522489455 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.528964 sec + 4,726,820,201 cycles # 3.085 GHz + 13,462,870,049 instructions # 2.85 insn per cycle + 1.535237667 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.026111e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.102690e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.102690e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.000087e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.074643e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.074643e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.829732 sec - 2,596,663,847 cycles # 3.117 GHz - 7,388,668,463 instructions # 2.85 insn per cycle - 0.833756324 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.837218 sec + 2,595,361,513 cycles # 3.086 GHz + 7,381,862,026 instructions # 2.84 insn per cycle + 0.850044679 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3061) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.448801e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.679453e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.679453e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.425089e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.650468e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.650468e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.495919 sec - 1,465,592,723 cycles # 2.935 GHz - 3,058,112,296 instructions # 2.09 insn per cycle - 0.500045412 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) +TOTAL : 0.496959 sec + 1,461,826,401 cycles # 2.918 GHz + 3,051,511,691 instructions # 2.09 insn per cycle + 0.512707584 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2995) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.879819e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.166647e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.166647e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.878630e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.164221e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.164221e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.443605 sec - 1,306,750,305 cycles # 2.923 GHz - 2,933,003,661 instructions # 2.24 insn per cycle - 0.447663683 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) +TOTAL : 0.440640 sec + 1,298,008,305 cycles # 2.919 GHz + 2,925,673,041 instructions # 2.25 insn per cycle + 0.455740476 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.302695e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.408241e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.408241e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.305194e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.410658e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.410658e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.736183 sec - 1,366,860,503 cycles # 1.849 GHz - 1,972,222,648 instructions # 1.44 insn per cycle - 0.740334587 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) +TOTAL : 0.732088 sec + 1,359,034,153 cycles # 1.846 GHz + 1,965,275,198 instructions # 1.45 insn per cycle + 0.747587285 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index f752ce36e3..dd8caa2559 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS=-fopenmp +OMPFLAGS= AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-10-30_22:51:04 +DATE: 2023-11-03_13:22:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.941380e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.208820e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.498284e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.626602e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.144887e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.496577e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.438407 sec - 1,956,379,222 cycles # 3.008 GHz - 2,770,878,160 instructions # 1.42 insn per cycle - 0.707058917 seconds time elapsed +TOTAL : 0.446727 sec + 1,947,084,355 cycles # 2.897 GHz + 2,696,453,178 instructions # 1.38 insn per cycle + 0.736802652 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.668828e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.009198e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.330148e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.216372e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.927003e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.335966e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.513962 sec - 2,253,699,576 cycles # 3.029 GHz - 3,213,129,563 instructions # 1.43 insn per cycle - 0.801248370 seconds time elapsed +TOTAL : 0.528575 sec + 2,276,047,250 cycles # 2.969 GHz + 3,176,726,798 instructions # 1.40 insn per cycle + 0.824470891 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,16 +90,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.096779e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.119383e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.119383e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.073436e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.095874e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.095874e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.515324 sec - 4,726,250,058 cycles # 3.112 GHz - 13,455,652,183 instructions # 2.85 insn per cycle - 1.519521309 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.544866 sec + 4,724,528,983 cycles # 3.051 GHz + 13,449,130,633 instructions # 2.85 insn per cycle + 1.551244322 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 814) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -117,16 +116,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.999365e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.074695e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.074695e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.997011e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.072543e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.072543e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.839998 sec - 2,599,518,552 cycles # 3.082 GHz - 7,392,404,713 instructions # 2.84 insn per cycle - 0.844137925 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.838344 sec + 2,598,952,948 cycles # 3.085 GHz + 7,385,835,720 instructions # 2.84 insn per cycle + 0.849552154 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3050) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -144,16 +142,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.371429e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.593127e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.593127e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.391429e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.612881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.612881e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.507572 sec - 1,469,854,218 cycles # 2.875 GHz - 3,058,124,169 instructions # 2.08 insn per cycle - 0.511713729 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) +TOTAL : 0.501350 sec + 1,458,751,807 cycles # 2.887 GHz + 3,051,487,989 instructions # 2.09 insn per cycle + 0.512435660 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2972) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -171,16 +168,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.877641e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.168441e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.168441e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.868802e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.154760e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154760e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.443851 sec - 1,307,337,681 cycles # 2.922 GHz - 2,933,611,821 instructions # 2.24 insn per cycle - 0.448146499 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) +TOTAL : 0.442043 sec + 1,299,923,563 cycles # 2.915 GHz + 2,926,182,502 instructions # 2.25 insn per cycle + 0.452442721 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2751) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -198,16 +194,15 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.504769e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.622888e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.622888e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.457990e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.573459e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.573459e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.677398 sec - 1,366,250,754 cycles # 2.007 GHz - 1,971,487,169 instructions # 1.44 insn per cycle - 0.681481143 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) +TOTAL : 0.687239 sec + 1,356,674,081 cycles # 1.962 GHz + 1,964,861,606 instructions # 1.45 insn per cycle + 0.698495126 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1652) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. From 5d28956caa93d59e84a04cefe4f0b4523b2c1683 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 17:16:58 +0100 Subject: [PATCH 112/119] [oct23av] rerun 18 tmad tests (while rerunning also tput with FPEs enabled), no change in functionality or performance (*NB OpenMP is now disabled by default!*) STARTED AT Fri Nov 3 02:15:11 PM CET 2023 ENDED AT Fri Nov 3 02:38:38 PM CET 2023 Status=0 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 0 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt --- .../log_eemumu_mad_d_inl0_hrd0.txt | 136 ++++++++--------- .../log_eemumu_mad_f_inl0_hrd0.txt | 132 ++++++++--------- .../log_eemumu_mad_m_inl0_hrd0.txt | 138 ++++++++--------- .../log_ggtt_mad_d_inl0_hrd0.txt | 138 ++++++++--------- .../log_ggtt_mad_f_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggtt_mad_m_inl0_hrd0.txt | 132 ++++++++--------- .../log_ggttg_mad_d_inl0_hrd0.txt | 140 +++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 138 ++++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 138 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 128 ++++++++-------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 138 ++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 12 +- .../log_ggttggg_mad_f_inl0_hrd0.txt | 8 +- .../log_ggttggg_mad_m_inl0_hrd0.txt | 10 +- .../log_gqttq_mad_d_inl0_hrd0.txt | 136 ++++++++--------- .../log_gqttq_mad_f_inl0_hrd0.txt | 140 +++++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 134 ++++++++--------- 18 files changed, 1035 insertions(+), 1035 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 525d780972..77fe2ed306 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,10 +15,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:26:14 +DATE: 2023-11-03_14:21:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6317s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6235s - [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6322s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6243s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1808s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1726s - [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1903s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1817s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.53E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3341s - [COUNTERS] Fortran MEs ( 1 ) : 0.0859s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4441s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3520s + [COUNTERS] Fortran MEs ( 1 ) : 0.0921s for 90112 events => throughput is 9.79E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1910s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1845s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2004s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1937s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.22E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4371s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3658s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0713s for 90112 events => throughput is 1.26E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4331s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3605s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0726s for 90112 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.225597e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235602e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.237094e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269361e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1856s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1817s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1840s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1800s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0040s for 8192 events => throughput is 2.06E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3892s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0445s for 90112 events => throughput is 2.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3815s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3383s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0433s for 90112 events => throughput is 2.08E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.991464e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.980623e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.992806e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.041045e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1797s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.74E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1815s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3761s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3425s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0337s for 90112 events => throughput is 2.68E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3402s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0335s for 90112 events => throughput is 2.69E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.530051e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.654499e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.718767e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.790648e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1796s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1809s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1781s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.85E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3754s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3433s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 90112 events => throughput is 2.81E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3678s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3365s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0314s for 90112 events => throughput is 2.87E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.686220e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.813865e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.846999e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.819295e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1847s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1811s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.38E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3898s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3501s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0397s for 90112 events => throughput is 2.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3813s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0388s for 90112 events => throughput is 2.32E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.103998e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.182323e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.334399e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.275304e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5940s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5935s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.63E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5938s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5933s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.67E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7665s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7617s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.85E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7577s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7528s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.86E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.297070e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.222938e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.494836e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.926125e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.153646e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.694324e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.383787e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.376536e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.165062e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.707496e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.969138e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.981585e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168544e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.719109e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.115108e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.125260e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 983675a1b5..2e7a5e8f3d 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:26:31 +DATE: 2023-11-03_14:21:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6371s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6292s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6333s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6253s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1797s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1715s - [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1819s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1741s + [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -110,8 +110,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) [COUNTERS] PROGRAM TOTAL : 0.4167s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3307s - [COUNTERS] Fortran MEs ( 1 ) : 0.0860s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3314s + [COUNTERS] Fortran MEs ( 1 ) : 0.0854s for 90112 events => throughput is 1.06E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166087172673] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2068s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2005s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1877s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1816s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501907796603360E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4378s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3647s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0730s for 90112 events => throughput is 1.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4143s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3447s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0696s for 90112 events => throughput is 1.29E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.185853e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.253827e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.278973e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.302410e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165570339780] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1935s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1907s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1802s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1775s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.12E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905322826635E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3655s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3377s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 90112 events => throughput is 3.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3780s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3501s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 90112 events => throughput is 3.22E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.212841e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.148616e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.324039e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.259827e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1837s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1814s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.61E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1832s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.52E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,8 +319,8 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3482s + [COUNTERS] PROGRAM TOTAL : 0.3667s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0249s for 90112 events => throughput is 3.62E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.494857e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.440927e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.578201e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.573217e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1847s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1842s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1820s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.77E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3724s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3478s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 90112 events => throughput is 3.66E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3681s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3435s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 90112 events => throughput is 3.67E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.552484e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.600067e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.686130e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.754576e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166440400542] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1850s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.66E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1873s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1851s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.69E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501908978565555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3730s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3478s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 90112 events => throughput is 3.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3804s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3549s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0255s for 90112 events => throughput is 3.53E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.427450e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.483435e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.720969e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.661856e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166823487174] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5945s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5940s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.74E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5905s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.69E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501910542849674E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7583s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7538s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.97E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7804s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7758s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.94E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.891593e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.351128e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.844741e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.874991e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.841028e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.920407e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.052490e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.035596e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.882792e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.750747e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.115822e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.223793e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.073812e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.397928e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.447499e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.418216e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index c323dc9862..988708e401 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none - +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=512y + make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:26:48 +DATE: 2023-11-03_14:21:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6306s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6226s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6250s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6171s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1794s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1716s - [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1782s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1702s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4167s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3309s - [COUNTERS] Fortran MEs ( 1 ) : 0.0858s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4157s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3308s + [COUNTERS] Fortran MEs ( 1 ) : 0.0849s for 90112 events => throughput is 1.06E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211734] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1888s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1819s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 8192 events => throughput is 1.19E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1873s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1808s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.25E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4175s + [COUNTERS] PROGRAM TOTAL : 0.4169s [COUNTERS] Fortran Overhead ( 0 ) : 0.3443s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0733s for 90112 events => throughput is 1.23E+06 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0727s for 90112 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.158891e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.191633e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.176701e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.168097e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 2.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1824s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1786s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3854s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3430s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 90112 events => throughput is 2.13E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3831s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3402s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0429s for 90112 events => throughput is 2.10E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.028245e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.058337e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.167584e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.169456e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1832s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1801s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1780s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3761s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3422s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 90112 events => throughput is 2.66E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3393s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0334s for 90112 events => throughput is 2.70E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.640928e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.575244e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.695641e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.799275e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1821s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1793s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1819s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1790s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.90E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 90112 events => throughput is 2.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3743s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 90112 events => throughput is 2.83E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.691960e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.740478e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.911478e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.970797e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1794s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1830s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1795s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.35E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3449s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0377s for 90112 events => throughput is 2.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3836s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3461s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0375s for 90112 events => throughput is 2.40E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.177892e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.298518e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.311961e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.467182e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169066587257] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5929s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5924s + [COUNTERS] PROGRAM TOTAL : 0.5999s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5993s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.61E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919911173610E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7616s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7567s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.84E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7600s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7552s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.85E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.200888e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.215424e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.927076e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.900873e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.188164e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.713004e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.384660e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.350320e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.185460e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.721122e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.889571e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.891982e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.184598e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.744274e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.123279e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.113133e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 370bafb4b0..1c17450a40 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:27:05 +DATE: 2023-11-03_14:22:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3542s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3123s - [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3552s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3147s + [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3103s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2698s - [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3067s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2663s + [COUNTERS] Fortran MEs ( 1 ) : 0.0403s for 8192 events => throughput is 2.03E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6683s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2204s - [COUNTERS] Fortran MEs ( 1 ) : 0.4479s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6567s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2097s + [COUNTERS] Fortran MEs ( 1 ) : 0.4470s for 90112 events => throughput is 2.02E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3459s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3086s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0373s for 8192 events => throughput is 2.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3421s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3052s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0370s for 8192 events => throughput is 2.22E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775372] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6795s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2710s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4085s for 90112 events => throughput is 2.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6711s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4048s for 90112 events => throughput is 2.23E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.120628e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.240136e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.186499e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.246301e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3149s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2933s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0216s for 8192 events => throughput is 3.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3123s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2910s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.84E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.5593s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3113s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2479s for 90112 events => throughput is 3.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4758s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2401s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2357s for 90112 events => throughput is 3.82E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.636571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.814458e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.778780e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.799058e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3002s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 8192 events => throughput is 6.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2946s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2817s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0129s for 8192 events => throughput is 6.36E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3995s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2550s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1445s for 90112 events => throughput is 6.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3939s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2474s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1465s for 90112 events => throughput is 6.15E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.008439e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.003903e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.106080e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.235696e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2856s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0119s for 8192 events => throughput is 6.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2986s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.08E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3695s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1283s for 90112 events => throughput is 7.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4648s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3245s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1403s for 90112 events => throughput is 6.42E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.765143e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.813758e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.703708e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.710973e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3108s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2915s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0193s for 8192 events => throughput is 4.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3143s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2953s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0189s for 8192 events => throughput is 4.33E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4685s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2567s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2118s for 90112 events => throughput is 4.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5031s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2812s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2219s for 90112 events => throughput is 4.06E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.089044e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.088501e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.044371e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.031349e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6948s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6955s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6949s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.42E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6643s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6581s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.44E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6545s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6481s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.42E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.194981e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.084897e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.690509e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.690035e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.290641e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.998858e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.072428e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.074784e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.325122e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.994007e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.154985e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152245e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.317983e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.996363e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.054471e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.002988e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 86b5bafaea..58819f13cf 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 @@ -15,11 +15,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -28,12 +27,13 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2023-10-30_23:27:31 +DATE: 2023-11-03_14:22:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3526s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3116s - [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3596s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3190s + [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3093s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2682s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2661s + [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6575s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2112s - [COUNTERS] Fortran MEs ( 1 ) : 0.4463s for 90112 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6511s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2036s + [COUNTERS] Fortran MEs ( 1 ) : 0.4475s for 90112 events => throughput is 2.01E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690706767555099] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3403s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3051s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0352s for 8192 events => throughput is 2.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3370s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3023s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0346s for 8192 events => throughput is 2.37E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782605295497] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6425s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2608s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3818s for 90112 events => throughput is 2.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6412s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2618s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3794s for 90112 events => throughput is 2.37E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.382222e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.365750e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.388990e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.346162e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690702885183541] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3058s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2911s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0146s for 8192 events => throughput is 5.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3004s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2856s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0148s for 8192 events => throughput is 5.54E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223778858016772] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4148s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2514s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1634s for 90112 events => throughput is 5.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3997s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2392s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1605s for 90112 events => throughput is 5.61E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.330431e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.378211e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.341045e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.483452e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2785s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3231s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2381s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0849s for 90112 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3185s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2326s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0859s for 90112 events => throughput is 1.05E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.023298e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.035352e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.030857e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.030223e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2774s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2832s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2762s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.16E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,8 +395,8 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3185s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2392s + [COUNTERS] PROGRAM TOTAL : 1.3092s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2299s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0793s for 90112 events => throughput is 1.14E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.103573e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.114954e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110688e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111064e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698914467276] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2923s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2824s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 8192 events => throughput is 8.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2891s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2791s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 8192 events => throughput is 8.23E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223780273983500] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3711s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2571s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1140s for 90112 events => throughput is 7.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3496s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2394s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1103s for 90112 events => throughput is 8.17E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.478441e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.588175e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.631045e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.594492e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690703397697980] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6988s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6983s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.51E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6957s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6951s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.34E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223786763175951] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6625s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6570s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 90112 events => throughput is 1.64E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6552s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6498s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.65E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.480019e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.278872e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.050077e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.958730e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.413949e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.818332e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.754978e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.760656e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.425389e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.821736e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.844977e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.859017e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.891833e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.371288e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.412374e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.429668e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 318c167090..cb957582e7 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -15,10 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -27,13 +28,12 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2023-10-30_23:27:56 +DATE: 2023-11-03_14:23:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3538s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3130s - [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3676s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3246s + [COUNTERS] Fortran MEs ( 1 ) : 0.0430s for 8192 events => throughput is 1.91E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2712s - [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3058s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2654s + [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7731s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3014s - [COUNTERS] Fortran MEs ( 1 ) : 0.4717s for 90112 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6499s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2045s + [COUNTERS] Fortran MEs ( 1 ) : 0.4454s for 90112 events => throughput is 2.02E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3482s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3107s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0375s for 8192 events => throughput is 2.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3442s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3072s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0370s for 8192 events => throughput is 2.21E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7134s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2939s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4195s for 90112 events => throughput is 2.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6618s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2548s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4070s for 90112 events => throughput is 2.21E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.192172e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.187012e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.182032e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.190577e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3138s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2929s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3104s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2894s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.91E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4780s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2494s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2286s for 90112 events => throughput is 3.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4727s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2421s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2305s for 90112 events => throughput is 3.91E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.824287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.829884e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.853636e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.857284e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2849s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2928s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2801s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0127s for 8192 events => throughput is 6.44E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3838s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2425s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1414s for 90112 events => throughput is 6.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3718s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2318s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1400s for 90112 events => throughput is 6.44E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.319893e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.247774e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.021389e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.307806e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2945s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2826s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0119s for 8192 events => throughput is 6.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2916s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2804s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.31E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,8 +395,8 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3684s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2419s + [COUNTERS] PROGRAM TOTAL : 1.3666s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2401s [COUNTERS] CudaCpp MEs ( 2 ) : 0.1265s for 90112 events => throughput is 7.12E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.899333e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.866736e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.948599e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.056278e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3082s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2893s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0189s for 8192 events => throughput is 4.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3059s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2875s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0184s for 8192 events => throughput is 4.45E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4583s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2510s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2072s for 90112 events => throughput is 4.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4513s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2472s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2041s for 90112 events => throughput is 4.42E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.137027e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.146707e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.272346e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.375036e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6968s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6962s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6924s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6918s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782303744791] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6666s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6602s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.41E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6484s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6421s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.149612e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.080978e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.592954e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.652267e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.307478e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.998863e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.054254e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056186e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.298580e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.990687e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.137759e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.134241e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.297095e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.001170e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.015457e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.023565e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index c309bf7674..5fb683acd0 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:28:22 +DATE: 2023-11-03_14:23:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5408s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2234s - [COUNTERS] Fortran MEs ( 1 ) : 0.3174s for 8192 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5401s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2255s + [COUNTERS] Fortran MEs ( 1 ) : 0.3146s for 8192 events => throughput is 2.60E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5382s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2207s - [COUNTERS] Fortran MEs ( 1 ) : 0.3175s for 8192 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5343s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2195s + [COUNTERS] Fortran MEs ( 1 ) : 0.3148s for 8192 events => throughput is 2.60E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8715s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3912s - [COUNTERS] Fortran MEs ( 1 ) : 3.4803s for 90112 events => throughput is 2.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8199s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3740s + [COUNTERS] Fortran MEs ( 1 ) : 3.4459s for 90112 events => throughput is 2.62E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470791E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8633s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5369s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3263s for 8192 events => throughput is 2.51E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8536s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5320s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3216s for 8192 events => throughput is 2.55E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2770s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6914s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5855s for 90112 events => throughput is 2.51E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2956s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6974s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5983s for 90112 events => throughput is 2.50E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.584657e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.604218e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.596787e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.603357e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470777E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5556s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3862s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1694s for 8192 events => throughput is 4.84E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5511s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3836s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1674s for 8192 events => throughput is 4.89E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.4182s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5481s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8700s for 90112 events => throughput is 4.82E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.3574s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5195s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8379s for 90112 events => throughput is 4.90E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.974443e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.971222e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.961157e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.969548e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3880s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3039s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0842s for 8192 events => throughput is 9.73E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3843s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3003s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0840s for 8192 events => throughput is 9.76E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3753s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9265s for 90112 events => throughput is 9.73E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3465s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4370s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9096s for 90112 events => throughput is 9.91E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.842938e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000702e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.942981e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.988605e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3742s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3645s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2903s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0743s for 8192 events => throughput is 1.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2797s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4533s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8264s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2510s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4337s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8173s for 90112 events => throughput is 1.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.103246e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.115226e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110259e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100593e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4287s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3241s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1045s for 8192 events => throughput is 7.84E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4242s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3213s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1029s for 8192 events => throughput is 7.96E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6347s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4831s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1515s for 90112 events => throughput is 7.83E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6010s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4694s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1317s for 90112 events => throughput is 7.96E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.792470e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.959592e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.840493e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.012539e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6515s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6461s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6558s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8274s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8047s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.96E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8107s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7878s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0229s for 90112 events => throughput is 3.94E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.632907e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.612339e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.028665e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.865541e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.001329e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.627230e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.238062e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.237387e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.000560e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.658424e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.246369e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.247853e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.997732e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.609214e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.736542e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.745287e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 0d53ce5f29..6fbc1b8a0f 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:29:03 +DATE: 2023-11-03_14:24:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5383s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2205s - [COUNTERS] Fortran MEs ( 1 ) : 0.3178s for 8192 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5354s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2217s + [COUNTERS] Fortran MEs ( 1 ) : 0.3138s for 8192 events => throughput is 2.61E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5374s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2208s - [COUNTERS] Fortran MEs ( 1 ) : 0.3166s for 8192 events => throughput is 2.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5335s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2184s + [COUNTERS] Fortran MEs ( 1 ) : 0.3151s for 8192 events => throughput is 2.60E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8863s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3948s - [COUNTERS] Fortran MEs ( 1 ) : 3.4914s for 90112 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9254s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4370s + [COUNTERS] Fortran MEs ( 1 ) : 3.4884s for 90112 events => throughput is 2.58E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349765248158E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8470s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5289s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3181s for 8192 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8356s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5223s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3132s for 8192 events => throughput is 2.62E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310860767768514E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.1676s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6889s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4787s for 90112 events => throughput is 2.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1026s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6641s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4385s for 90112 events => throughput is 2.62E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.664891e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.697383e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.678201e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.698783e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196334183509370E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4053s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3112s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0941s for 8192 events => throughput is 8.71E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4030s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3093s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0937s for 8192 events => throughput is 8.74E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847547651041E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.4984s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4624s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0361s for 90112 events => throughput is 8.70E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5866s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5178s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0689s for 90112 events => throughput is 8.43E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.849596e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.859401e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.839850e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.866780e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3049s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2618s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0432s for 8192 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3050s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2617s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8877s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4102s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4775s for 90112 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8700s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3958s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4742s for 90112 events => throughput is 1.90E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.910833e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.846803e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.922964e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.911347e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2974s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2578s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0396s for 8192 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2945s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2557s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0388s for 8192 events => throughput is 2.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8399s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4111s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4288s for 90112 events => throughput is 2.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8253s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3994s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4259s for 90112 events => throughput is 2.12E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.103227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.115953e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.100634e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.160440e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196344079460428E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3223s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2717s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0506s for 8192 events => throughput is 1.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3177s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2677s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0500s for 8192 events => throughput is 1.64E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310857804286998E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9829s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4266s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5563s for 90112 events => throughput is 1.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9549s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5464s for 90112 events => throughput is 1.65E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.585816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.606971e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.607898e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.622152e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349366365994E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6463s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6455s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.65E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6413s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6404s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.77E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310864949473968E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8629s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.7943s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7849s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 90112 events => throughput is 9.55E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.331511e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.320932e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.854816e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.861129e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.761923e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.664882e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.387832e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.397726e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.746229e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.622996e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.509978e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.534593e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.611958e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.497628e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.615043e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.618938e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index aa573e43f0..49eb2706cd 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:29:39 +DATE: 2023-11-03_14:24:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5412s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2212s - [COUNTERS] Fortran MEs ( 1 ) : 0.3200s for 8192 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5358s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2227s + [COUNTERS] Fortran MEs ( 1 ) : 0.3131s for 8192 events => throughput is 2.62E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5409s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2221s - [COUNTERS] Fortran MEs ( 1 ) : 0.3187s for 8192 events => throughput is 2.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5387s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2263s + [COUNTERS] Fortran MEs ( 1 ) : 0.3124s for 8192 events => throughput is 2.62E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.9079s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3978s - [COUNTERS] Fortran MEs ( 1 ) : 3.5102s for 90112 events => throughput is 2.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8187s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3784s + [COUNTERS] Fortran MEs ( 1 ) : 3.4403s for 90112 events => throughput is 2.62E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358763382007E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8743s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5440s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3303s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8638s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5380s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3258s for 8192 events => throughput is 2.51E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872835011053E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.3386s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7039s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6346s for 90112 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2839s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6818s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6021s for 90112 events => throughput is 2.50E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.525161e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.559173e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.525191e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.573125e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358804670396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5616s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3842s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1775s for 8192 events => throughput is 4.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5484s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3821s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1663s for 8192 events => throughput is 4.93E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872836789727E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.3758s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5413s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8344s for 90112 events => throughput is 4.91E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.3769s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5431s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8338s for 90112 events => throughput is 4.91E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.044459e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.754175e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.000983e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.937414e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3897s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3046s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0851s for 8192 events => throughput is 9.63E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3824s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2993s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0831s for 8192 events => throughput is 9.86E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.4817s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5101s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9716s for 90112 events => throughput is 9.27E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3687s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4491s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9197s for 90112 events => throughput is 9.80E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.731456e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.003131e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.036086e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002381e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2998s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0756s for 8192 events => throughput is 1.08E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2548s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8110s for 90112 events => throughput is 1.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2394s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4337s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8056s for 90112 events => throughput is 1.12E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.143906e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.143249e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.130138e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.145832e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358757578441E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4344s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3275s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1069s for 8192 events => throughput is 7.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4319s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3251s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1068s for 8192 events => throughput is 7.67E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872803699391E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.7432s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5145s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2287s for 90112 events => throughput is 7.33E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6523s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4789s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1734s for 90112 events => throughput is 7.68E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.663897e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.668189e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.610555e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.655247e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358102981245E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6564s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6510s + [COUNTERS] PROGRAM TOTAL : 0.6518s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6464s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872068634174E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8300s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8072s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8128s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7900s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.96E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.631020e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.620967e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.229081e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.820585e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.964249e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.642288e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.234210e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.233489e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.975956e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.645321e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.242767e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.248124e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.983181e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.649140e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.714684e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.718456e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index e14c658652..f9d3c4043e 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:30:21 +DATE: 2023-11-03_14:25:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3910s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2753s - [COUNTERS] Fortran MEs ( 1 ) : 4.1157s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3440s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2774s + [COUNTERS] Fortran MEs ( 1 ) : 4.0666s for 8192 events => throughput is 2.01E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3946s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2730s - [COUNTERS] Fortran MEs ( 1 ) : 4.1216s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3345s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2700s + [COUNTERS] Fortran MEs ( 1 ) : 4.0645s for 8192 events => throughput is 2.02E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.4735s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8980s - [COUNTERS] Fortran MEs ( 1 ) : 45.5755s for 90112 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.0034s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8824s + [COUNTERS] Fortran MEs ( 1 ) : 45.1210s for 90112 events => throughput is 2.00E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.6632s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4139s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2493s for 8192 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6907s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5143s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1763s for 8192 events => throughput is 1.96E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 53.0552s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0362s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.0189s for 90112 events => throughput is 1.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.0839s + [COUNTERS] Fortran Overhead ( 0 ) : 5.9376s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.1463s for 90112 events => throughput is 1.95E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.978140e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.987220e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.981159e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.007452e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.7419s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4794s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2625s for 8192 events => throughput is 3.62E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6638s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4426s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2211s for 8192 events => throughput is 3.69E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 29.4316s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1191s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.3125s for 90112 events => throughput is 3.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.7170s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0525s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.6644s for 90112 events => throughput is 3.65E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.738679e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.799047e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.620314e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.786733e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.2993s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2837s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0156s for 8192 events => throughput is 8.07E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1946s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2212s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9733s for 8192 events => throughput is 8.42E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.0894s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9408s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.1486s for 90112 events => throughput is 8.08E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.5555s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8339s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.7216s for 90112 events => throughput is 8.40E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.633351e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.126786e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.639574e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.340374e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9660s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1071s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8588s for 8192 events => throughput is 9.54E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9555s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1027s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8529s for 8192 events => throughput is 9.61E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.1848s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7263s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4585s for 90112 events => throughput is 9.53E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.0489s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6894s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.3594s for 90112 events => throughput is 9.63E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.781280e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.899347e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.795193e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.909621e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4520s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3493s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1027s for 8192 events => throughput is 7.43E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.3829s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3187s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0641s for 8192 events => throughput is 7.70E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.8334s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9616s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8718s for 90112 events => throughput is 7.59E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.5364s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9130s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.6234s for 90112 events => throughput is 7.75E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.653125e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.702464e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.171391e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.627010e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8248s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7916s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0332s for 8192 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8074s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7751s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0323s for 8192 events => throughput is 2.54E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7303s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3763s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3540s for 90112 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7137s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3633s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3505s for 90112 events => throughput is 2.57E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.292253e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.295020e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.518305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.520040e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.103023e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.113125e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.141573e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.159949e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.119711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.117693e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.166642e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.156944e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.121489e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.118922e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.435333e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.436022e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 5e7aa12c64..23b30f1d97 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:34:35 +DATE: 2023-11-03_14:29:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.4205s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2761s - [COUNTERS] Fortran MEs ( 1 ) : 4.1444s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3585s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2729s + [COUNTERS] Fortran MEs ( 1 ) : 4.0855s for 8192 events => throughput is 2.01E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3809s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2723s - [COUNTERS] Fortran MEs ( 1 ) : 4.1087s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3604s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2691s + [COUNTERS] Fortran MEs ( 1 ) : 4.0913s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.2595s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8860s - [COUNTERS] Fortran MEs ( 1 ) : 45.3735s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 46.9679s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8721s + [COUNTERS] Fortran MEs ( 1 ) : 45.0957s for 90112 events => throughput is 2.00E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396490802749E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.3601s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2377s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1224s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.2959s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2064s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.0894s for 8192 events => throughput is 2.00E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774602344628E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 51.1932s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8889s - [COUNTERS] CudaCpp MEs ( 2 ) : 45.3043s for 90112 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 50.7315s + [COUNTERS] Fortran Overhead ( 0 ) : 5.8205s + [COUNTERS] CudaCpp MEs ( 2 ) : 44.9111s for 90112 events => throughput is 2.01E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.064791e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.086896e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.056153e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.085135e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277389126121586E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4698s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3636s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1062s for 8192 events => throughput is 7.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4679s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3644s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1035s for 8192 events => throughput is 7.42E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803771887543366E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 15.4000s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9874s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.4125s for 90112 events => throughput is 7.26E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.1623s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9574s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.2048s for 90112 events => throughput is 7.38E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.510038e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.521286e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.463864e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.529131e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.2604s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7598s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5006s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3322s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7991s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5331s for 8192 events => throughput is 1.54E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.8736s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3745s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4990s for 90112 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.7860s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3594s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4267s for 90112 events => throughput is 1.66E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.710708e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.733277e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.710081e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.713538e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1198s + [COUNTERS] PROGRAM TOTAL : 1.1199s [COUNTERS] Fortran Overhead ( 0 ) : 0.6914s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4284s for 8192 events => throughput is 1.91E+04 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4285s for 8192 events => throughput is 1.91E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.0277s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2859s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.7418s for 90112 events => throughput is 1.90E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.0143s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2786s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.7356s for 90112 events => throughput is 1.90E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.964173e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.943790e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.952364e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.957674e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396394633404E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.3259s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7968s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5290s for 8192 events => throughput is 1.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3342s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8067s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5275s for 8192 events => throughput is 1.55E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803777741065333E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 8.1714s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3874s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7840s for 90112 events => throughput is 1.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.1909s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3908s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8001s for 90112 events => throughput is 1.55E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.576795e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.490025e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.574102e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.444637e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277400478491260E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7681s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7835s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7620s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.82E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -548,8 +548,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] Cross section = 0.000158 [1.5803779990154892E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) [COUNTERS] PROGRAM TOTAL : 2.5744s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3393s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2351s for 90112 events => throughput is 3.83E+05 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3391s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2353s for 90112 events => throughput is 3.83E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.596987e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.593797e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.950725e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.939181e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.494407e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.492205e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.637374e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.665669e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.503985e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.495594e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.725521e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.659964e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.484632e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.476367e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.523442e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.521263e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index b9c1204e09..099daaf875 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,12 +1,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:37:53 +DATE: 2023-11-03_14:32:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3521s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2754s - [COUNTERS] Fortran MEs ( 1 ) : 4.0767s for 8192 events => throughput is 2.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3416s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2711s + [COUNTERS] Fortran MEs ( 1 ) : 4.0705s for 8192 events => throughput is 2.01E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3925s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2709s - [COUNTERS] Fortran MEs ( 1 ) : 4.1216s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2745s + [COUNTERS] Fortran MEs ( 1 ) : 4.1137s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 46.9733s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8635s - [COUNTERS] Fortran MEs ( 1 ) : 45.1098s for 90112 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 46.9881s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8639s + [COUNTERS] Fortran MEs ( 1 ) : 45.1242s for 90112 events => throughput is 2.00E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277432965013E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.6564s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4133s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2432s for 8192 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6258s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3913s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2346s for 8192 events => throughput is 1.93E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725813026109E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 52.8661s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0399s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.8261s for 90112 events => throughput is 1.92E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.8410s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0507s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.7903s for 90112 events => throughput is 1.93E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.985030e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.986406e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.976483e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984071e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277430934464E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.6813s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4741s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2072s for 8192 events => throughput is 3.71E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7478s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4757s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2721s for 8192 events => throughput is 3.61E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725816246317E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 28.4509s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0381s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.4128s for 90112 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.8117s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0740s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.7377s for 90112 events => throughput is 3.64E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.810181e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.744041e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.800338e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.783870e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.1824s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2147s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9677s for 8192 events => throughput is 8.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1797s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2161s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9636s for 8192 events => throughput is 8.50E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.3669s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8024s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.5645s for 90112 events => throughput is 8.53E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.3593s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8150s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.5443s for 90112 events => throughput is 8.55E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.724129e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.773755e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.790107e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.783302e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9418s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0966s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8452s for 8192 events => throughput is 9.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9449s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0988s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8461s for 8192 events => throughput is 9.68E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.1278s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7193s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4085s for 90112 events => throughput is 9.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.1426s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7153s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4273s for 90112 events => throughput is 9.56E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.923637e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.895193e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.767125e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.946582e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.5489s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4546s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0943s for 8192 events => throughput is 7.49E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.3960s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3284s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0677s for 8192 events => throughput is 7.67E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.7117s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9290s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.7827s for 90112 events => throughput is 7.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.7488s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9489s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.7999s for 90112 events => throughput is 7.64E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.665232e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.775707e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.734538e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.783797e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277293084707E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8050s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7728s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0322s for 8192 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8018s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7699s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 8192 events => throughput is 2.57E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725738731039E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7237s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3786s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3451s for 90112 events => throughput is 2.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7679s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4051s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3627s for 90112 events => throughput is 2.48E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.289069e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.302892e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.526584e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.535657e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.118346e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.112952e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.165672e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154600e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.110701e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.122390e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168512e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169657e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.101237e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.108240e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.440700e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.438063e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 43f15bcd5b..52d8de89eb 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:43:30 +DATE: 2023-11-03_14:38:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index a18f7f44a8..d2dfd1e943 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -17,23 +17,23 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2023-10-30_23:43:33 +DATE: 2023-11-03_14:38:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 63fd239456..5eb0659f4b 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:43:36 +DATE: 2023-11-03_14:38:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 8b97b424e2..405f8e65cf 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:42:05 +DATE: 2023-11-03_14:37:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3064s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2366s - [COUNTERS] Fortran MEs ( 1 ) : 0.0698s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3063s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2373s + [COUNTERS] Fortran MEs ( 1 ) : 0.0690s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3011s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2317s - [COUNTERS] Fortran MEs ( 1 ) : 0.0694s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2958s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2270s + [COUNTERS] Fortran MEs ( 1 ) : 0.0688s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2048s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4352s - [COUNTERS] Fortran MEs ( 1 ) : 0.7697s for 90112 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1688s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4067s + [COUNTERS] Fortran MEs ( 1 ) : 0.7620s for 90112 events => throughput is 1.18E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3893s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3129s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0764s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3839s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3089s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3510s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5225s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8285s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3142s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4904s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8238s for 90112 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.083219e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103427e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082834e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106392e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3183s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2773s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3109s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2710s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615872] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.0676s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5790s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4885s for 90112 events => throughput is 1.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9020s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4591s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4430s for 90112 events => throughput is 2.03E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.011280e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.011564e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.004922e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.005399e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2820s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2578s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2791s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2559s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0232s for 8192 events => throughput is 3.53E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7150s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2578s for 90112 events => throughput is 3.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7026s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4480s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2545s for 90112 events => throughput is 3.54E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.422980e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.416682e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.485503e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.463573e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2770s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2561s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2738s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2532s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6961s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4641s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2320s for 90112 events => throughput is 3.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6787s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4482s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2305s for 90112 events => throughput is 3.91E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.764338e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.889416e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.770277e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.892184e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3010s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2692s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 8192 events => throughput is 2.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2969s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2660s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0310s for 8192 events => throughput is 2.65E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8116s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4693s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3423s for 90112 events => throughput is 2.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8024s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4622s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3402s for 90112 events => throughput is 2.65E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.478678e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.634842e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.631815e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.658963e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6626s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6619s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6579s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6572s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.16E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615869] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2433s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2357s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.19E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8560s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8484s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.18E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.610218e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.570932e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.161418e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.991191e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.629399e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.399212e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.492469e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.526039e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.615993e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.388680e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.799999e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.791775e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.604556e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.378274e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.786702e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.781671e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index be2917d26e..f862c0da30 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none - +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2023-10-30_23:42:34 +DATE: 2023-11-03_14:37:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3083s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2383s - [COUNTERS] Fortran MEs ( 1 ) : 0.0700s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3209s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2467s + [COUNTERS] Fortran MEs ( 1 ) : 0.0742s for 8192 events => throughput is 1.10E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3017s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2316s - [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3213s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2463s + [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1785s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4139s - [COUNTERS] Fortran MEs ( 1 ) : 0.7646s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1588s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4011s + [COUNTERS] Fortran MEs ( 1 ) : 0.7577s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050316058770007] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3802s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3092s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0709s for 8192 events => throughput is 1.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3719s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3016s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0703s for 8192 events => throughput is 1.16E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182797520666] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3016s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5173s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7843s for 90112 events => throughput is 1.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2795s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4986s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7809s for 90112 events => throughput is 1.15E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.162040e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.167125e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.151940e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.163234e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313133963987] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2603s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 8192 events => throughput is 3.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2819s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2572s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 8192 events => throughput is 3.32E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179276862181] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7398s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4632s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2765s for 90112 events => throughput is 3.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7204s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4460s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2744s for 90112 events => throughput is 3.28E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.205576e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.221816e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.147401e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.192435e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2599s + [COUNTERS] PROGRAM TOTAL : 0.2604s [COUNTERS] Fortran Overhead ( 0 ) : 0.2472s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0127s for 8192 events => throughput is 6.43E+05 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.24E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6035s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4659s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1376s for 90112 events => throughput is 6.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5832s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4492s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1340s for 90112 events => throughput is 6.72E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.338397e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.254203e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.343589e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.371108e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2592s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2480s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2557s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2444s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.29E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5684s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1263s for 90112 events => throughput is 7.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5487s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4231s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1256s for 90112 events => throughput is 7.18E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.758514e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.893256e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.052098e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.000656e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050317064561834] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2663s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2510s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2612s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2460s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.38E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182143140752] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6192s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4470s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1722s for 90112 events => throughput is 5.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6093s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4384s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1709s for 90112 events => throughput is 5.27E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.921448e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.858026e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.119546e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.045424e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050319131407651] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6579s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6574s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6539s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6534s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.59E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801186038252196] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8671s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8612s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 90112 events => throughput is 1.52E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8472s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8414s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 90112 events => throughput is 1.54E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.914110e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.787997e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.440910e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.403452e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.334370e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.801683e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.703577e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.703537e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.243059e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.843279e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.770259e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.791364e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.746993e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.382815e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.939439e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.946674e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index b248c00c80..7a35cd56f1 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,12 +15,12 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-10-30_23:43:01 +DATE: 2023-11-03_14:38:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3027s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2334s - [COUNTERS] Fortran MEs ( 1 ) : 0.0693s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3006s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2316s + [COUNTERS] Fortran MEs ( 1 ) : 0.0690s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3025s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2324s - [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3152s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2421s + [COUNTERS] Fortran MEs ( 1 ) : 0.0730s for 8192 events => throughput is 1.12E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1797s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4155s - [COUNTERS] Fortran MEs ( 1 ) : 0.7642s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1574s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4001s + [COUNTERS] Fortran MEs ( 1 ) : 0.7572s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,8 +134,8 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657206] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3865s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3107s + [COUNTERS] PROGRAM TOTAL : 0.3817s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3059s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0758s for 8192 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608796] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3561s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5216s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8345s for 90112 events => throughput is 1.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3116s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4934s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8182s for 90112 events => throughput is 1.10E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.091158e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.052255e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082266e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056069e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657201] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3137s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2744s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0392s for 8192 events => throughput is 2.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3280s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2866s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608810] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9130s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4797s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4333s for 90112 events => throughput is 2.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8866s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4572s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4293s for 90112 events => throughput is 2.10E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.035583e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.074202e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.014225e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.071823e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2821s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2589s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0232s for 8192 events => throughput is 3.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2788s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2560s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7607s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4976s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2631s for 90112 events => throughput is 3.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6909s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4404s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2504s for 90112 events => throughput is 3.60E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.500656e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.556578e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.486676e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.556099e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2828s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2617s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2718s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2516s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0202s for 8192 events => throughput is 4.06E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7154s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4856s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2298s for 90112 events => throughput is 3.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6599s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4355s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2245s for 90112 events => throughput is 4.01E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.920991e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.927162e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.905025e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.001830e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2991s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2672s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 8192 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2975s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2656s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 8192 events => throughput is 2.57E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8242s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4689s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3553s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8011s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4557s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3454s for 90112 events => throughput is 2.61E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.437291e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.547378e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.499461e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.523060e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333301029693] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6574s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6568s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6577s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6570s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182637219935] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8600s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8524s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.19E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8570s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8495s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 90112 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.578688e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.565902e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.112656e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.977935e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.619097e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.387906e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.484643e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.500001e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.622729e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.390935e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.817708e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.764099e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.610768e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.392060e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.779631e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.772015e+07 ) sec^-1 TEST COMPLETED From c492e2c2efee9362090f27ee8b9a1418d7add8c7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 17:43:28 +0100 Subject: [PATCH 113/119] [oct23av] in CODEGEN, fix BUG in Olivier's 4e1dccb44 for OpenMP on Mac: the intent was to disable OpenMP on Mac, but it disabled it also on Linux, now fixed --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index d8be8e72ce..941abaabbe 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice From b647330b2f1816c5382cc2bf8ad4085fb2a5658a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 17:47:07 +0100 Subject: [PATCH 114/119] [oct23av] regenerate all processes including the last changes to makefiles to disable OpenMP on Mac but not on Linux --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 16 ++--- .../ee_mumu.mad/SubProcesses/cudacpp.mk | 9 ++- .../CODEGEN_cudacpp_ee_mumu_log.txt | 12 ++-- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 9 ++- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 16 ++--- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 9 ++- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 ++-- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 9 ++- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 20 +++---- .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 9 ++- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 +++---- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 9 ++- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 12 ++-- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 9 ++- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +++---- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 9 ++- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 12 ++-- .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 9 ++- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 +++---- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 9 ++- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 ++--- .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 9 ++- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 22 +++---- .../gq_ttq.mad/SubProcesses/cudacpp.mk | 9 ++- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 14 ++--- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 9 ++- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 8 +-- .../heft_gg_h.sa/SubProcesses/cudacpp.mk | 9 ++- .../CODEGEN_mad_pp_tt012j_log.txt | 58 +++++++++---------- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 9 ++- 30 files changed, 197 insertions(+), 212 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index ae1e7a6e7f..8444deb94b 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005455732345581055  +DEBUG: model prefixing takes 0.005800008773803711  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,19 +191,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.098 s +Wrote files for 8 helas calls in 0.102 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.196 s +ALOHA: aloha creates 3 routines in 0.216 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.249 s +ALOHA: aloha creates 7 routines in 0.261 s FFV1 FFV1 FFV2 @@ -248,9 +248,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.885s -user 0m1.626s -sys 0m0.205s +real 0m3.166s +user 0m1.697s +sys 0m0.232s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index e23cc03d5d..68c9fa1408 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005369663238525391  +DEBUG: model prefixing takes 0.005563497543334961  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,14 +174,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.262 s +ALOHA: aloha creates 4 routines in 0.276 s FFV1 FFV1 FFV2 @@ -201,6 +201,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.711s -user 0m0.578s -sys 0m0.059s +real 0m0.696s +user 0m0.627s +sys 0m0.061s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 7a4d84afcd..eb7804a07a 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005388021469116211  +DEBUG: model prefixing takes 0.005347251892089844  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,16 +191,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.101 s +Wrote files for 10 helas calls in 0.106 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.152 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.138 s VVV1 FFV1 FFV1 @@ -237,9 +237,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.004s -user 0m1.448s -sys 0m0.220s +real 0m1.960s +user 0m1.543s +sys 0m0.235s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 2b7e24e4bc..38b2b6b8e0 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005523204803466797  +DEBUG: model prefixing takes 0.0056209564208984375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.193 s VVV1 FFV1 FFV1 @@ -196,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.546s -user 0m0.463s +real 0m2.589s +user 0m0.521s sys 0m0.057s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 314aa7e54f..d6539c1821 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005343914031982422  +DEBUG: model prefixing takes 0.005568742752075195  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -185,7 +185,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -202,7 +202,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,15 +217,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s -Wrote files for 46 helas calls in 0.239 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s +Wrote files for 46 helas calls in 0.251 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.332 s +ALOHA: aloha creates 5 routines in 0.334 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -233,7 +233,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.640 s +ALOHA: aloha creates 10 routines in 0.317 s VVV1 VVV1 FFV1 @@ -283,9 +283,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.611s -user 0m2.029s -sys 0m0.243s +real 0m2.393s +user 0m2.106s +sys 0m0.255s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index ef1138c5e9..031d16a1dc 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005430459976196289  +DEBUG: model prefixing takes 0.005894660949707031  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.146 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Wrote files for 36 helas calls in 0.154 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.323 s +ALOHA: aloha creates 5 routines in 0.336 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.307 s +ALOHA: aloha creates 10 routines in 0.317 s VVV1 VVV1 FFV1 @@ -252,9 +252,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.201s -user 0m1.925s -sys 0m0.223s +real 0m2.249s +user 0m1.997s +sys 0m0.247s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 99d894704b..a5b7537758 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005426645278930664  +DEBUG: model prefixing takes 0.005454301834106445  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.318 s +ALOHA: aloha creates 5 routines in 0.333 s VVV1 VVV1 FFV1 @@ -204,6 +204,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.769s -user 0m0.709s -sys 0m0.050s +real 0m0.820s +user 0m0.748s +sys 0m0.059s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 4fedc83eed..78bdf2d4a4 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005503416061401367  +DEBUG: model prefixing takes 0.005633831024169922  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.155 s +1 processes with 123 diagrams generated in 0.160 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s -Wrote files for 222 helas calls in 0.684 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.432 s +Wrote files for 222 helas calls in 0.711 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.343 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.309 s +ALOHA: aloha creates 10 routines in 0.319 s VVV1 VVV1 FFV1 @@ -255,9 +255,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m5.059s -user 0m2.982s -sys 0m0.232s +real 0m7.630s +user 0m3.094s +sys 0m0.252s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 5ce2f9ce66..dc10ba97a6 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005643606185913086  +DEBUG: model prefixing takes 0.005874156951904297  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.156 s +1 processes with 123 diagrams generated in 0.162 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.419 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.432 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.315 s +ALOHA: aloha creates 5 routines in 0.324 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.431s -user 0m1.365s +real 0m1.504s +user 0m1.431s sys 0m0.053s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index ab52071d35..9e9d8f7f10 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005440235137939453  +DEBUG: model prefixing takes 0.005738019943237305  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.890 s +1 processes with 1240 diagrams generated in 1.909 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.483 s -Wrote files for 2281 helas calls in 18.335 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.657 s +Wrote files for 2281 helas calls in 18.994 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.322 s +ALOHA: aloha creates 5 routines in 0.338 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.317 s VVV1 VVV1 FFV1 @@ -257,9 +257,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m28.928s -user 0m28.452s -sys 0m0.379s +real 0m29.945s +user 0m29.405s +sys 0m0.427s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index a09465533d..5cdaa9dd1f 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00536036491394043  +DEBUG: model prefixing takes 0.005713462829589844  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.842 s +1 processes with 1240 diagrams generated in 1.918 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.483 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.717 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.344 s +ALOHA: aloha creates 5 routines in 0.371 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m12.857s -user 0m12.694s -sys 0m0.108s +real 0m13.400s +user 0m13.234s +sys 0m0.105s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index e9f77c01ba..4fa198f80f 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005260467529296875  +DEBUG: model prefixing takes 0.005542755126953125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.080 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -198,7 +198,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -215,7 +215,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -230,17 +230,17 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s -Wrote files for 32 helas calls in 0.215 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Wrote files for 32 helas calls in 0.226 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.144 s +ALOHA: aloha creates 2 routines in 0.153 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.131 s +ALOHA: aloha creates 4 routines in 0.135 s FFV1 FFV1 FFV1 @@ -294,9 +294,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.904s -user 0m1.684s -sys 0m0.213s +real 0m2.111s +user 0m1.747s +sys 0m0.252s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index c075764d55..f5681268a4 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0053064823150634766  +DEBUG: model prefixing takes 0.005438327789306641  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.076 s +8 processes with 40 diagrams generated in 0.079 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -206,12 +206,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.029 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.158 s FFV1 FFV1 FFV1 @@ -228,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.646s -user 0m0.581s -sys 0m0.052s +real 0m0.697s +user 0m0.622s +sys 0m0.067s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index e02926e728..78de4aa8bf 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -153,7 +153,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.061 s +ALOHA: aloha creates 1 routines in 0.063 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -166,6 +166,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.417s -user 0m0.359s -sys 0m0.048s +real 0m0.462s +user 0m0.394s +sys 0m0.051s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 1d04df7c37..3f2aa2319f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005662202835083008  +DEBUG: model prefixing takes 0.005484104156494141  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.029 s +5 processes with 7 diagrams generated in 0.030 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.134 s +13 processes with 76 diagrams generated in 0.136 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.799 s +65 processes with 1119 diagrams generated in 1.833 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -801,15 +801,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.263 s -Wrote files for 810 helas calls in 3.209 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.294 s +Wrote files for 810 helas calls in 3.801 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.351 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.308 s +ALOHA: aloha creates 10 routines in 0.324 s VVV1 VVV1 FFV1 @@ -1028,9 +1028,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m8.727s -user 0m8.236s -sys 0m0.447s +real 0m9.622s +user 0m8.492s +sys 0m0.501s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index dd7c8b4172..fc383b7c5c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -241,14 +241,13 @@ override OMPFLAGS = -fopenmp else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else ifneq ($(UNAME), 'Darwin') -override OMPFLAGS = # AV disable OpenMP MT on mac else -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT (default before #575) +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) endif # Set the default AVX (vectorization) choice From 5418ed5c2684f5f77fccc024d5defa370f54bea0 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 18:47:38 +0100 Subject: [PATCH 115/119] [oct23av] in ggttmad cudacpp.mk, fix one third error for openmp on Mac - testing this on Olivier's desktop Mac, looks ok now --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else From 1b3ea526cdb64c892bfc68f5ebd6a1a7c6df7822 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 18:49:50 +0100 Subject: [PATCH 116/119] [oct23av] in CODEGEN, backport the third bug fix in openmp for mac from ggttmad --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 941abaabbe..b399eb36b0 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else From 08a7a7d55eab43bc54c5dcdd8033adf86aca40b9 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 18:53:00 +0100 Subject: [PATCH 117/119] [oct23av] regenerate all processes including the third bug fix in makefiles to disable OpenMP on Mac but not on Linux --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 14 ++--- .../ee_mumu.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 ++-- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 2 +- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 18 +++---- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 12 ++--- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 2 +- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 20 +++---- .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 2 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 18 +++---- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 2 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 14 ++--- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 2 +- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 20 +++---- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 ++--- .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 2 +- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 +++---- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 ++--- .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 2 +- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 +++---- .../gq_ttq.mad/SubProcesses/cudacpp.mk | 2 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 14 ++--- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_cudacpp_heft_gg_h_log.txt | 8 +-- .../heft_gg_h.sa/SubProcesses/cudacpp.mk | 2 +- .../CODEGEN_mad_pp_tt012j_log.txt | 54 +++++++++---------- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 2 +- 29 files changed, 149 insertions(+), 149 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 8444deb94b..c75f0f44d7 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005800008773803711  +DEBUG: model prefixing takes 0.0054340362548828125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,12 +191,12 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.102 s +Wrote files for 8 helas calls in 0.099 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.216 s +ALOHA: aloha creates 3 routines in 0.204 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines @@ -248,9 +248,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.166s -user 0m1.697s -sys 0m0.232s +real 0m1.910s +user 0m1.676s +sys 0m0.221s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 68c9fa1408..4db63d797a 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005563497543334961  +DEBUG: model prefixing takes 0.0056912899017333984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -181,7 +181,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.276 s +ALOHA: aloha creates 4 routines in 0.269 s FFV1 FFV1 FFV2 @@ -201,6 +201,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.696s -user 0m0.627s -sys 0m0.061s +real 0m0.672s +user 0m0.594s +sys 0m0.064s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index eb7804a07a..9f574144e7 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005347251892089844  +DEBUG: model prefixing takes 0.005654096603393555  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,16 +191,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.106 s +Wrote files for 10 helas calls in 0.109 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.152 s +ALOHA: aloha creates 2 routines in 0.155 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.138 s +ALOHA: aloha creates 4 routines in 0.141 s VVV1 FFV1 FFV1 @@ -237,9 +237,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.960s -user 0m1.543s -sys 0m0.235s +real 0m1.956s +user 0m1.564s +sys 0m0.225s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 38b2b6b8e0..78e924730d 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056209564208984375  +DEBUG: model prefixing takes 0.005621910095214844  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.193 s +ALOHA: aloha creates 2 routines in 0.146 s VVV1 FFV1 FFV1 @@ -196,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m2.589s -user 0m0.521s -sys 0m0.057s +real 0m0.573s +user 0m0.475s +sys 0m0.060s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index d6539c1821..a80c9750fe 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005568742752075195  +DEBUG: model prefixing takes 0.00545811653137207  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -163,7 +163,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.019 s +1 processes with 16 diagrams generated in 0.020 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -185,7 +185,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -202,7 +202,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -218,14 +218,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s -Wrote files for 46 helas calls in 0.251 s +Wrote files for 46 helas calls in 0.246 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.334 s +ALOHA: aloha creates 5 routines in 0.335 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -233,7 +233,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.317 s +ALOHA: aloha creates 10 routines in 0.314 s VVV1 VVV1 FFV1 @@ -283,9 +283,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.393s -user 0m2.106s -sys 0m0.255s +real 0m2.326s +user 0m2.071s +sys 0m0.244s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 031d16a1dc..373045f436 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005894660949707031  +DEBUG: model prefixing takes 0.005476474761962891  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s -Wrote files for 36 helas calls in 0.154 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Wrote files for 36 helas calls in 0.150 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.336 s +ALOHA: aloha creates 5 routines in 0.327 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.317 s +ALOHA: aloha creates 10 routines in 0.312 s VVV1 VVV1 FFV1 @@ -252,9 +252,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.249s -user 0m1.997s -sys 0m0.247s +real 0m2.288s +user 0m1.960s +sys 0m0.232s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index a5b7537758..d4636e5b42 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005454301834106445  +DEBUG: model prefixing takes 0.005591392517089844  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.333 s +ALOHA: aloha creates 5 routines in 0.328 s VVV1 VVV1 FFV1 @@ -204,6 +204,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.820s -user 0m0.748s -sys 0m0.059s +real 0m0.867s +user 0m0.725s +sys 0m0.054s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 78bdf2d4a4..028ea12c38 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005633831024169922  +DEBUG: model prefixing takes 0.005753517150878906  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.160 s +1 processes with 123 diagrams generated in 0.162 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -190,15 +190,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.432 s -Wrote files for 222 helas calls in 0.711 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.437 s +Wrote files for 222 helas calls in 0.698 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.343 s +ALOHA: aloha creates 5 routines in 0.336 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.319 s +ALOHA: aloha creates 10 routines in 0.316 s VVV1 VVV1 FFV1 @@ -255,9 +255,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.630s -user 0m3.094s -sys 0m0.252s +real 0m3.685s +user 0m3.044s +sys 0m0.262s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index dc10ba97a6..51ad921d68 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005874156951904297  +DEBUG: model prefixing takes 0.00551152229309082  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.162 s +1 processes with 123 diagrams generated in 0.158 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.432 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.434 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.324 s +ALOHA: aloha creates 5 routines in 0.337 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.504s -user 0m1.431s -sys 0m0.053s +real 0m1.742s +user 0m1.392s +sys 0m0.072s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 9e9d8f7f10..6147b7b154 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005738019943237305  +DEBUG: model prefixing takes 0.00539708137512207  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.909 s +1 processes with 1240 diagrams generated in 1.903 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,15 +192,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.657 s -Wrote files for 2281 helas calls in 18.994 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.648 s +Wrote files for 2281 helas calls in 18.679 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.338 s +ALOHA: aloha creates 5 routines in 0.324 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.317 s +ALOHA: aloha creates 10 routines in 0.315 s VVV1 VVV1 FFV1 @@ -257,9 +257,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m29.945s -user 0m29.405s -sys 0m0.427s +real 0m29.484s +user 0m28.971s +sys 0m0.416s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 5cdaa9dd1f..96575fb544 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005713462829589844  +DEBUG: model prefixing takes 0.005684852600097656  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.918 s +1 processes with 1240 diagrams generated in 1.909 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.717 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.626 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.371 s +ALOHA: aloha creates 5 routines in 0.354 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m13.400s -user 0m13.234s -sys 0m0.105s +real 0m13.393s +user 0m12.965s +sys 0m0.120s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 4fa198f80f..96187049e8 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005542755126953125  +DEBUG: model prefixing takes 0.005629301071166992  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.080 s +8 processes with 40 diagrams generated in 0.078 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -198,7 +198,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -215,7 +215,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -231,16 +231,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.226 s +Wrote files for 32 helas calls in 0.222 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.153 s +ALOHA: aloha creates 2 routines in 0.146 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.135 s +ALOHA: aloha creates 4 routines in 0.133 s FFV1 FFV1 FFV1 @@ -294,9 +294,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.111s -user 0m1.747s -sys 0m0.252s +real 0m1.938s +user 0m1.724s +sys 0m0.209s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index f5681268a4..df17ded021 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005438327789306641  +DEBUG: model prefixing takes 0.0056269168853759766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.079 s +8 processes with 40 diagrams generated in 0.078 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -206,12 +206,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.158 s +ALOHA: aloha creates 2 routines in 0.148 s FFV1 FFV1 FFV1 @@ -228,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.697s -user 0m0.622s -sys 0m0.067s +real 0m0.655s +user 0m0.590s +sys 0m0.056s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 78de4aa8bf..1cbaa67772 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -153,7 +153,7 @@ Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.063 s +ALOHA: aloha creates 1 routines in 0.062 s VVS3 FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. @@ -166,6 +166,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.462s -user 0m0.394s -sys 0m0.051s +real 0m0.530s +user 0m0.366s +sys 0m0.058s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 3f2aa2319f..4b66e3dcbb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005484104156494141  +DEBUG: model prefixing takes 0.0055620670318603516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.030 s +5 processes with 7 diagrams generated in 0.029 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.833 s +65 processes with 1119 diagrams generated in 1.823 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -514,7 +514,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -531,7 +531,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -548,7 +548,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -565,7 +565,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -582,7 +582,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -599,7 +599,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -616,7 +616,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -633,7 +633,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -650,7 +650,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -667,7 +667,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -684,7 +684,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -701,7 +701,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -718,7 +718,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -735,7 +735,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -752,7 +752,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -769,7 +769,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -786,7 +786,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -802,14 +802,14 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx Generated helas calls for 18 subprocesses (372 diagrams) in 1.294 s -Wrote files for 810 helas calls in 3.801 s +Wrote files for 810 helas calls in 3.272 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.351 s +ALOHA: aloha creates 5 routines in 0.337 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.324 s +ALOHA: aloha creates 10 routines in 0.313 s VVV1 VVV1 FFV1 @@ -1028,9 +1028,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m9.622s -user 0m8.492s -sys 0m0.501s +real 0m8.875s +user 0m8.390s +sys 0m0.460s ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index fc383b7c5c..509307506b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -242,7 +242,7 @@ else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) override OMPFLAGS = -fopenmp ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) ###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S), 'Darwin') # OM for Mac (any compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) else From 4351daa27751eab2237af7f679cd1755d912a274 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 20:54:01 +0100 Subject: [PATCH 118/119] [oct23av] rerun 78 tput tests, with FPEs enabled in the check executable - usual failures in ggttg f/m and gqttq f (#783), no change in performance (*NB OpenMP is now again enabled by default*) (or maybe ~1-2% slower on average? anyway, keep OpenMP on as in the past) --- .../log_eemumu_mad_d_inl0_hrd0.txt | 103 ++++++++------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 103 ++++++++------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 103 ++++++++------- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 103 ++++++++------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 103 ++++++++------- .../log_eemumu_mad_d_inl0_hrd1.txt | 103 ++++++++------- .../log_eemumu_mad_d_inl1_hrd0.txt | 103 ++++++++------- .../log_eemumu_mad_d_inl1_hrd1.txt | 103 ++++++++------- .../log_eemumu_mad_f_inl0_hrd0.txt | 103 ++++++++------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 103 ++++++++------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 103 ++++++++------- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 103 ++++++++------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 103 ++++++++------- .../log_eemumu_mad_f_inl0_hrd1.txt | 103 ++++++++------- .../log_eemumu_mad_f_inl1_hrd0.txt | 103 ++++++++------- .../log_eemumu_mad_f_inl1_hrd1.txt | 103 ++++++++------- .../log_eemumu_mad_m_inl0_hrd0.txt | 103 ++++++++------- .../log_eemumu_mad_m_inl0_hrd1.txt | 103 ++++++++------- .../log_ggtt_mad_d_inl0_hrd0.txt | 103 ++++++++------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 103 ++++++++------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 103 ++++++++------- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 103 ++++++++------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 103 ++++++++------- .../log_ggtt_mad_d_inl0_hrd1.txt | 103 ++++++++------- .../log_ggtt_mad_d_inl1_hrd0.txt | 103 ++++++++------- .../log_ggtt_mad_d_inl1_hrd1.txt | 103 ++++++++------- .../log_ggtt_mad_f_inl0_hrd0.txt | 103 ++++++++------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 103 ++++++++------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 103 ++++++++------- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 103 ++++++++------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 103 ++++++++------- .../log_ggtt_mad_f_inl0_hrd1.txt | 103 ++++++++------- .../log_ggtt_mad_f_inl1_hrd0.txt | 103 ++++++++------- .../log_ggtt_mad_f_inl1_hrd1.txt | 103 ++++++++------- .../log_ggtt_mad_m_inl0_hrd0.txt | 103 ++++++++------- .../log_ggtt_mad_m_inl0_hrd1.txt | 103 ++++++++------- .../log_ggttg_mad_d_inl0_hrd0.txt | 117 +++++++++-------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 117 +++++++++-------- .../log_ggttg_mad_d_inl0_hrd1.txt | 117 +++++++++-------- .../log_ggttg_mad_f_inl0_hrd0.txt | 40 +++--- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 40 +++--- .../log_ggttg_mad_f_inl0_hrd1.txt | 40 +++--- .../log_ggttg_mad_m_inl0_hrd0.txt | 40 +++--- .../log_ggttg_mad_m_inl0_hrd1.txt | 40 +++--- .../log_ggttgg_mad_d_inl0_hrd0.txt | 117 +++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 117 +++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 117 +++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 117 +++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 117 +++++++++-------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 117 +++++++++-------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 117 +++++++++-------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 117 +++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 117 +++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 117 +++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 117 +++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 117 +++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 117 +++++++++-------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 117 +++++++++-------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 121 +++++++++--------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 121 +++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 117 +++++++++-------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 117 +++++++++-------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 117 +++++++++-------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 117 +++++++++-------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 117 +++++++++-------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 117 +++++++++-------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 117 +++++++++-------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 117 +++++++++-------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 117 +++++++++-------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 117 +++++++++-------- .../log_gqttq_mad_d_inl0_hrd0.txt | 117 +++++++++-------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 117 +++++++++-------- .../log_gqttq_mad_d_inl0_hrd1.txt | 117 +++++++++-------- .../log_gqttq_mad_f_inl0_hrd0.txt | 108 ++++++++-------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 108 ++++++++-------- .../log_gqttq_mad_f_inl0_hrd1.txt | 108 ++++++++-------- .../log_gqttq_mad_m_inl0_hrd0.txt | 117 +++++++++-------- .../log_gqttq_mad_m_inl0_hrd1.txt | 117 +++++++++-------- 78 files changed, 4290 insertions(+), 3928 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 5abaf48c27..4e0cc4f360 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:10:40 +DATE: 2023-11-03_19:00:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.425670e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.222247e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.012114e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.995135e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.942022e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.073010e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.857427 sec - 2,861,836,198 cycles # 3.005 GHz - 4,395,542,756 instructions # 1.54 insn per cycle - 1.189695235 seconds time elapsed +TOTAL : 0.649523 sec + 2,606,897,569 cycles # 2.955 GHz + 4,039,165,920 instructions # 1.55 insn per cycle + 0.938736477 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.128658e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.324925e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.324925e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.116390e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.309346e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.309346e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.970436 sec - 18,347,671,953 cycles # 3.071 GHz - 43,938,983,717 instructions # 2.39 insn per cycle - 5.978217238 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 420) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.039128 sec + 18,293,625,810 cycles # 3.027 GHz + 44,037,997,118 instructions # 2.41 insn per cycle + 6.044375342 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.660817e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.184751e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.184751e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.650519e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.159299e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.159299e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.192129 sec - 12,809,417,728 cycles # 3.052 GHz - 31,016,432,387 instructions # 2.42 insn per cycle - 4.205962943 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1631) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.212186 sec + 12,761,177,625 cycles # 3.027 GHz + 31,004,602,670 instructions # 2.43 insn per cycle + 4.217391637 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.074393e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.897997e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.897997e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.065360e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.886676e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.886676e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.424513 sec - 10,077,521,544 cycles # 2.940 GHz - 19,366,070,840 instructions # 1.92 insn per cycle - 3.435971091 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 0) (512z: 0) +TOTAL : 3.440327 sec + 10,045,086,881 cycles # 2.916 GHz + 19,380,193,658 instructions # 1.93 insn per cycle + 3.445672409 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.171630e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.081425e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.081425e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.092180e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.955480e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.955480e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.285829 sec - 9,782,653,766 cycles # 2.972 GHz - 18,983,356,035 instructions # 1.94 insn per cycle - 3.300405408 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1665) (512y: 181) (512z: 0) +TOTAL : 3.409304 sec + 9,718,965,428 cycles # 2.848 GHz + 18,998,332,681 instructions # 1.95 insn per cycle + 3.414677998 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.870011e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.499133e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.499133e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.821062e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.417007e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.417007e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.753305 sec - 8,627,314,746 cycles # 2.296 GHz - 15,735,767,464 instructions # 1.82 insn per cycle - 3.765985274 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 876) (512y: 154) (512z: 1258) +TOTAL : 3.852694 sec + 8,598,148,642 cycles # 2.229 GHz + 15,740,848,417 instructions # 1.83 insn per cycle + 3.858015954 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index abcc335062..a2a2220e0b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:58:54 +DATE: 2023-11-03_19:34:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.684254e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.540316e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.540316e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.616160e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.542311e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.542311e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.224447 sec - 7,545,611,950 cycles # 3.058 GHz - 13,315,711,854 instructions # 1.76 insn per cycle - 2.525188178 seconds time elapsed +TOTAL : 2.257075 sec + 7,500,299,564 cycles # 3.000 GHz + 13,128,281,558 instructions # 1.75 insn per cycle + 2.557069801 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -85,15 +85,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.088698e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.270495e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270495e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.074156e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.251964e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.251964e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.375164 sec - 19,562,608,505 cycles # 3.066 GHz - 44,166,894,222 instructions # 2.26 insn per cycle - 6.381733173 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 420) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.457469 sec + 19,613,725,947 cycles # 3.035 GHz + 44,260,538,354 instructions # 2.26 insn per cycle + 6.464068851 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -112,15 +113,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.600351e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.064795e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.064795e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.537992e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.980628e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.980628e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.536602 sec - 13,986,967,709 cycles # 3.079 GHz - 31,858,097,714 instructions # 2.28 insn per cycle - 4.543267608 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1631) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.703362 sec + 14,014,545,412 cycles # 2.976 GHz + 31,843,317,256 instructions # 2.27 insn per cycle + 4.710044451 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -139,15 +141,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.974818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.694900e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.694900e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.930954e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.630364e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.630364e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.785803 sec - 11,319,453,378 cycles # 2.986 GHz - 20,725,242,418 instructions # 1.83 insn per cycle - 3.792230201 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 0) (512z: 0) +TOTAL : 3.870178 sec + 11,351,058,249 cycles # 2.929 GHz + 20,737,271,008 instructions # 1.83 insn per cycle + 3.876822605 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +169,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.024211e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.795837e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.795837e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.936889e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.651989e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.651989e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.711845 sec - 11,025,578,716 cycles # 2.966 GHz - 20,353,975,948 instructions # 1.85 insn per cycle - 3.718442154 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1665) (512y: 181) (512z: 0) +TOTAL : 3.871998 sec + 11,000,759,855 cycles # 2.837 GHz + 20,365,657,381 instructions # 1.85 insn per cycle + 3.879015734 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -193,15 +197,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.769786e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.312748e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.312748e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.694377e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.207135e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.207135e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.154845 sec - 9,930,919,219 cycles # 2.387 GHz - 16,877,948,183 instructions # 1.70 insn per cycle - 4.161452694 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 876) (512y: 154) (512z: 1258) +TOTAL : 4.335020 sec + 9,935,731,633 cycles # 2.289 GHz + 16,882,918,411 instructions # 1.70 insn per cycle + 4.341683669 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 9c171d5aa7..dedce3e2ef 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_14:12:09 +DATE: 2023-11-03_19:47:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.518793e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.550788e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.020092e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.493472e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.526211e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.980085e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.322621 sec - 4,714,608,796 cycles # 3.038 GHz - 7,352,175,067 instructions # 1.56 insn per cycle - 1.608967629 seconds time elapsed +TOTAL : 1.335531 sec + 4,653,241,552 cycles # 2.971 GHz + 7,232,975,239 instructions # 1.55 insn per cycle + 1.623039981 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.139632e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.336228e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.336228e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.100587e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.292616e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.292616e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.266327 sec - 19,426,711,131 cycles # 3.098 GHz - 44,042,281,250 instructions # 2.27 insn per cycle - 6.271573649 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 420) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.487751 sec + 19,390,492,430 cycles # 2.987 GHz + 44,137,957,280 instructions # 2.28 insn per cycle + 6.493082825 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.682420e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.206424e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.206424e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.649039e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.157189e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.157189e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.484074 sec - 13,859,190,016 cycles # 3.088 GHz - 31,018,475,981 instructions # 2.24 insn per cycle - 4.489265899 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1631) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.573606 sec + 13,864,290,699 cycles # 3.029 GHz + 31,004,021,041 instructions # 2.24 insn per cycle + 4.579072706 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.088883e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.925223e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.925223e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.050077e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.865714e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.865714e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.749912 sec - 11,186,702,264 cycles # 2.980 GHz - 19,267,314,009 instructions # 1.72 insn per cycle - 3.755230236 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 0) (512z: 0) +TOTAL : 3.825144 sec + 11,151,950,602 cycles # 2.912 GHz + 19,279,192,444 instructions # 1.73 insn per cycle + 3.830421553 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.168230e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.077689e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.077689e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.125943e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.996151e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.996151e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.647713 sec - 10,905,880,835 cycles # 2.986 GHz - 18,695,552,937 instructions # 1.71 insn per cycle - 3.653166666 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1665) (512y: 181) (512z: 0) +TOTAL : 3.721741 sec + 10,820,749,101 cycles # 2.904 GHz + 18,706,645,976 instructions # 1.73 insn per cycle + 3.727088912 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.847756e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.453812e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.453812e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.802766e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.399092e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.399092e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.151667 sec - 9,725,947,647 cycles # 2.341 GHz - 15,433,393,788 instructions # 1.59 insn per cycle - 4.156858401 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 876) (512y: 154) (512z: 1258) +TOTAL : 4.260983 sec + 9,758,383,682 cycles # 2.288 GHz + 15,439,422,037 instructions # 1.58 insn per cycle + 4.266311634 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 3e310efa36..753c8feb62 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_14:08:56 +DATE: 2023-11-03_19:43:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.542546e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.573728e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.039776e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.492551e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.537742e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.994776e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.966199 sec - 3,631,078,095 cycles # 3.042 GHz - 7,243,627,382 instructions # 1.99 insn per cycle - 1.251235752 seconds time elapsed +TOTAL : 0.978991 sec + 3,581,699,122 cycles # 2.964 GHz + 7,061,755,742 instructions # 1.97 insn per cycle + 1.265379690 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.138499e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.335855e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.335855e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.108457e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.301315e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.301315e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.919799 sec - 18,319,541,591 cycles # 3.092 GHz - 43,938,665,623 instructions # 2.40 insn per cycle - 5.925332232 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 420) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.081290 sec + 18,339,334,415 cycles # 3.014 GHz + 44,033,842,254 instructions # 2.40 insn per cycle + 6.086519540 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.666277e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.183174e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.183174e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.647910e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.158230e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.158230e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.178085 sec - 12,735,588,219 cycles # 3.045 GHz - 31,015,602,777 instructions # 2.44 insn per cycle - 4.183602760 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1631) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.219825 sec + 12,790,482,904 cycles # 3.028 GHz + 31,000,190,511 instructions # 2.42 insn per cycle + 4.225042583 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.114881e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.952882e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.952882e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.046562e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.846964e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.846964e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.358551 sec - 10,039,725,787 cycles # 2.985 GHz - 19,365,092,946 instructions # 1.93 insn per cycle - 3.363774859 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 0) (512z: 0) +TOTAL : 3.470466 sec + 10,075,062,185 cycles # 2.899 GHz + 19,376,808,574 instructions # 1.92 insn per cycle + 3.475725491 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.171976e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.077146e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.077146e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.091991e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.948349e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.948349e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.284609 sec - 9,776,297,677 cycles # 2.974 GHz - 18,994,290,313 instructions # 1.94 insn per cycle - 3.289871294 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1665) (512y: 181) (512z: 0) +TOTAL : 3.411832 sec + 9,706,821,336 cycles # 2.841 GHz + 18,993,945,887 instructions # 1.96 insn per cycle + 3.417093831 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.851683e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.465690e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.465690e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.817313e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.417390e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.417390e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.788845 sec - 8,587,145,882 cycles # 2.268 GHz - 15,735,191,375 instructions # 1.83 insn per cycle - 3.794205028 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 876) (512y: 154) (512z: 1258) +TOTAL : 3.864825 sec + 8,629,354,000 cycles # 2.231 GHz + 15,737,585,107 instructions # 1.82 insn per cycle + 3.870285071 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index f8a712ac39..8472c31bea 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_14:05:41 +DATE: 2023-11-03_19:40:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.196142e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.495900e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.895038e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.065913e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.488032e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.905997e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.844030 sec - 6,338,648,018 cycles # 3.057 GHz - 11,545,833,593 instructions # 1.82 insn per cycle - 2.130455527 seconds time elapsed +TOTAL : 1.876732 sec + 6,299,612,348 cycles # 2.989 GHz + 11,571,253,190 instructions # 1.84 insn per cycle + 2.164294467 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,15 +78,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.121742e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.316851e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.316851e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.111600e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.304742e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.304742e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.006675 sec - 18,357,748,563 cycles # 3.058 GHz - 43,944,454,893 instructions # 2.39 insn per cycle - 6.011909954 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 420) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.064598 sec + 18,297,128,822 cycles # 3.015 GHz + 44,033,779,580 instructions # 2.41 insn per cycle + 6.069938342 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -104,15 +105,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.687890e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.212517e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.212517e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.622403e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.120612e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.120612e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.124821 sec - 12,751,927,336 cycles # 3.088 GHz - 31,015,330,451 instructions # 2.43 insn per cycle - 4.130146742 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1631) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.285433 sec + 12,790,120,071 cycles # 2.982 GHz + 31,000,688,554 instructions # 2.42 insn per cycle + 4.290779048 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,15 +132,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.082017e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.903162e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.903162e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.044295e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.854365e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.854365e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.411364 sec - 10,065,886,460 cycles # 2.947 GHz - 19,365,047,474 instructions # 1.92 insn per cycle - 3.416626826 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 0) (512z: 0) +TOTAL : 3.476131 sec + 10,066,944,453 cycles # 2.893 GHz + 19,377,002,166 instructions # 1.92 insn per cycle + 3.481530813 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -156,15 +159,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.134766e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.016852e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.016852e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.095206e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.953285e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.953285e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.341613 sec - 9,750,832,688 cycles # 2.914 GHz - 18,982,576,827 instructions # 1.95 insn per cycle - 3.347220259 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1665) (512y: 181) (512z: 0) +TOTAL : 3.401536 sec + 9,758,102,764 cycles # 2.865 GHz + 18,996,151,120 instructions # 1.95 insn per cycle + 3.406936941 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -182,15 +186,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.885636e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.514573e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.514573e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.814025e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.410019e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.410019e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.720771 sec - 8,585,253,922 cycles # 2.305 GHz - 15,731,896,314 instructions # 1.83 insn per cycle - 3.725980193 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 876) (512y: 154) (512z: 1258) +TOTAL : 3.870433 sec + 8,615,604,376 cycles # 2.224 GHz + 15,736,922,136 instructions # 1.83 insn per cycle + 3.875834680 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index f9258d2008..b542059ad1 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:11:14 +DATE: 2023-11-03_19:00:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.423111e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.263090e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.097043e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.000398e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.960570e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.110004e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.704180 sec - 2,779,160,829 cycles # 2.936 GHz - 4,301,484,509 instructions # 1.55 insn per cycle - 1.022354163 seconds time elapsed +TOTAL : 0.647549 sec + 2,611,748,045 cycles # 2.979 GHz + 4,046,502,501 instructions # 1.55 insn per cycle + 0.933750268 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.199434e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.422199e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.422199e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.159227e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.372064e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.372064e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.637640 sec - 17,488,217,829 cycles # 3.099 GHz - 41,784,834,407 instructions # 2.39 insn per cycle - 5.645669854 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 379) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.833108 sec + 17,445,226,847 cycles # 2.989 GHz + 41,885,202,351 instructions # 2.40 insn per cycle + 5.838346819 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.700066e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.246279e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.246279e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.682893e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.222491e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.222491e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.104444 sec - 12,562,195,567 cycles # 3.057 GHz - 30,178,467,292 instructions # 2.40 insn per cycle - 4.117007071 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1599) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.142121 sec + 12,470,632,862 cycles # 3.008 GHz + 30,166,171,065 instructions # 2.42 insn per cycle + 4.147564686 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.127446e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.985004e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.985004e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.069225e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.895121e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.895121e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.341762 sec - 9,968,599,535 cycles # 2.978 GHz - 19,097,045,495 instructions # 1.92 insn per cycle - 3.358406085 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) +TOTAL : 3.437470 sec + 9,952,077,094 cycles # 2.891 GHz + 19,112,450,451 instructions # 1.92 insn per cycle + 3.442739539 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1930) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.195204e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.119233e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.119233e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.130212e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.018241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.018241e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.252318 sec - 9,689,875,846 cycles # 2.975 GHz - 18,763,677,025 instructions # 1.94 insn per cycle - 3.269057716 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1637) (512y: 178) (512z: 0) +TOTAL : 3.352335 sec + 9,644,260,853 cycles # 2.874 GHz + 18,779,667,176 instructions # 1.95 insn per cycle + 3.357742942 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1661) (512y: 178) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.918034e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.580927e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.580927e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.865497e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.495990e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.495990e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.667847 sec - 8,448,792,836 cycles # 2.300 GHz - 15,609,069,263 instructions # 1.85 insn per cycle - 3.680296602 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 862) (512y: 156) (512z: 1239) +TOTAL : 3.772482 sec + 8,452,356,069 cycles # 2.238 GHz + 15,617,271,494 instructions # 1.85 insn per cycle + 3.777813091 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 886) (512y: 156) (512z: 1239) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 72bf3d22f1..9fba89aff3 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:48:16 +DATE: 2023-11-03_19:23:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.512564e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.589413e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.065843e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.483432e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.567049e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.058193e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.679787 sec - 2,732,583,530 cycles # 2.999 GHz - 4,175,209,336 instructions # 1.53 insn per cycle - 0.973013638 seconds time elapsed +TOTAL : 0.676370 sec + 2,703,741,341 cycles # 2.971 GHz + 4,197,515,180 instructions # 1.55 insn per cycle + 0.967825669 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.707397e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.189358e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.189358e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.672486e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.141310e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.141310e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.074541 sec - 12,658,239,152 cycles # 3.103 GHz - 32,575,305,813 instructions # 2.57 insn per cycle - 4.079991878 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 283) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.163173 sec + 12,692,329,334 cycles # 3.045 GHz + 32,576,040,648 instructions # 2.57 insn per cycle + 4.168672183 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.156555e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.091845e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.091845e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.116856e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.025219e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.025219e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.314636 sec - 10,278,588,296 cycles # 3.098 GHz - 24,513,614,042 instructions # 2.38 insn per cycle - 3.320201354 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1239) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.372207 sec + 10,267,724,267 cycles # 3.041 GHz + 24,505,197,015 instructions # 2.39 insn per cycle + 3.377809241 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.349859e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.452580e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.452580e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.304978e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.380785e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.380785e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.064425 sec - 9,146,466,774 cycles # 2.981 GHz - 16,932,841,121 instructions # 1.85 insn per cycle - 3.070184966 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1613) (512y: 0) (512z: 0) +TOTAL : 3.125688 sec + 9,128,103,141 cycles # 2.916 GHz + 16,940,836,203 instructions # 1.86 insn per cycle + 3.131242434 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.396992e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.538162e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.538162e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.298021e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.382509e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.382509e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.009688 sec - 8,963,425,840 cycles # 2.974 GHz - 16,348,307,376 instructions # 1.82 insn per cycle - 3.015194945 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1346) (512y: 139) (512z: 0) +TOTAL : 3.144282 sec + 8,899,696,508 cycles # 2.834 GHz + 16,372,313,838 instructions # 1.84 insn per cycle + 3.149838418 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.061526e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.857162e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.857162e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.053092e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.845549e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.845549e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.444659 sec - 7,945,690,234 cycles # 2.304 GHz - 14,580,913,598 instructions # 1.84 insn per cycle - 3.450299245 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 991) (512y: 158) (512z: 955) +TOTAL : 3.465226 sec + 7,910,184,141 cycles # 2.280 GHz + 14,591,740,895 instructions # 1.84 insn per cycle + 3.470686114 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 0785ba7dac..9b85799057 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:48:46 +DATE: 2023-11-03_19:23:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.514678e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.572894e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.064311e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.480686e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.569964e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.063993e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.670061 sec - 2,738,456,878 cycles # 3.035 GHz - 4,206,945,724 instructions # 1.54 insn per cycle - 0.963542841 seconds time elapsed +TOTAL : 0.677772 sec + 2,691,282,086 cycles # 2.960 GHz + 4,219,338,579 instructions # 1.57 insn per cycle + 0.971577356 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.236655e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.165602e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.165602e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.182406e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.087943e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.087943e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.207737 sec - 9,869,351,452 cycles # 3.072 GHz - 25,456,720,619 instructions # 2.58 insn per cycle - 3.213513616 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 236) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.286151 sec + 9,910,806,255 cycles # 3.012 GHz + 25,456,031,111 instructions # 2.57 insn per cycle + 3.291763573 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.512813e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.861138e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.861138e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.467752e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.800434e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.800434e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.900152 sec - 9,028,151,733 cycles # 3.108 GHz - 21,522,265,772 instructions # 2.38 insn per cycle - 2.905750020 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1107) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.950518 sec + 8,946,482,743 cycles # 3.027 GHz + 21,514,123,834 instructions # 2.40 insn per cycle + 2.956056552 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.524411e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.832678e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.832678e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.464134e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.723435e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.723435e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.879170 sec - 8,575,957,214 cycles # 2.974 GHz - 15,820,070,406 instructions # 1.84 insn per cycle - 2.884745744 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1476) (512y: 0) (512z: 0) +TOTAL : 2.952533 sec + 8,633,003,733 cycles # 2.920 GHz + 15,829,431,121 instructions # 1.83 insn per cycle + 2.958100358 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.568614e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.924961e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.924961e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.533505e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.859681e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.859681e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.834564 sec - 8,467,631,862 cycles # 2.982 GHz - 15,519,332,660 instructions # 1.83 insn per cycle - 2.840188470 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1244) (512y: 139) (512z: 0) +TOTAL : 2.876122 sec + 8,428,640,196 cycles # 2.926 GHz + 15,527,735,744 instructions # 1.84 insn per cycle + 2.881608685 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.228071e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.164518e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.164518e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.128966e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.008830e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.008830e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.208995 sec - 7,606,383,968 cycles # 2.367 GHz - 14,281,516,951 instructions # 1.88 insn per cycle - 3.214658818 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1017) (512y: 164) (512z: 874) +TOTAL : 3.361119 sec + 7,560,312,259 cycles # 2.246 GHz + 14,293,668,051 instructions # 1.89 insn per cycle + 3.366622669 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index c1d51d5e2b..46e803358f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:11:47 +DATE: 2023-11-03_19:01:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.090897e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.078806e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.260039e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.626199e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.328475e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.281681e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.585228 sec - 2,433,446,774 cycles # 2.999 GHz - 3,808,602,769 instructions # 1.57 insn per cycle - 0.883462810 seconds time elapsed +TOTAL : 0.560646 sec + 2,313,886,918 cycles # 2.957 GHz + 3,567,705,327 instructions # 1.54 insn per cycle + 0.840116151 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.166692e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.383327e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.383327e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.146010e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.358105e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.358105e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.748719 sec - 17,847,326,756 cycles # 3.102 GHz - 43,555,210,437 instructions # 2.44 insn per cycle - 5.756388982 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 418) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.851033 sec + 17,813,996,987 cycles # 3.043 GHz + 43,616,814,202 instructions # 2.45 insn per cycle + 5.856069183 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.393554e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.657719e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.657719e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.343466e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.599751e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.599751e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.992868 sec - 9,247,144,257 cycles # 3.085 GHz - 21,931,320,878 instructions # 2.37 insn per cycle - 3.007580407 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1924) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.048613 sec + 9,276,606,540 cycles # 3.040 GHz + 21,930,294,042 instructions # 2.36 insn per cycle + 3.053688884 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.534752e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.887537e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.887537e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.523694e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.872956e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.872956e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.842369 sec - 8,296,474,164 cycles # 2.914 GHz - 15,586,874,856 instructions # 1.88 insn per cycle - 2.859206780 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2577) (512y: 0) (512z: 0) +TOTAL : 2.845518 sec + 8,308,772,789 cycles # 2.916 GHz + 15,593,301,532 instructions # 1.88 insn per cycle + 2.850623438 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.604319e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.017823e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.017823e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.489948e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.840461e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.840461e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.762625 sec - 8,242,291,698 cycles # 2.978 GHz - 15,423,882,127 instructions # 1.87 insn per cycle - 2.774867972 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2472) (512y: 9) (512z: 0) +TOTAL : 2.887357 sec + 8,231,785,355 cycles # 2.847 GHz + 15,437,944,905 instructions # 1.88 insn per cycle + 2.892363682 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.657199e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.090947e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.090947e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.580760e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.973673e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.973673e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.717847 sec - 6,617,027,405 cycles # 2.432 GHz - 12,861,770,185 instructions # 1.94 insn per cycle - 2.732059991 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 17) (512z: 1439) +TOTAL : 2.796324 sec + 6,629,287,981 cycles # 2.367 GHz + 12,873,018,117 instructions # 1.94 insn per cycle + 2.801456274 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 1a3ded7611..a12ca3b41d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:59:31 +DATE: 2023-11-03_19:34:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.290971e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.508237e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.508237e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.243102e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.475352e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.475352e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.659289 sec - 5,780,451,529 cycles # 3.064 GHz - 10,396,790,543 instructions # 1.80 insn per cycle - 1.943549281 seconds time elapsed +TOTAL : 1.676327 sec + 5,681,132,328 cycles # 2.981 GHz + 10,328,752,116 instructions # 1.82 insn per cycle + 1.962251346 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -85,15 +85,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.141808e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.350661e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.350661e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.117341e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.320071e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.320071e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.967801 sec - 18,491,989,401 cycles # 3.096 GHz - 43,704,548,705 instructions # 2.36 insn per cycle - 5.973986343 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 418) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.103747 sec + 18,503,457,384 cycles # 3.029 GHz + 43,763,268,873 instructions # 2.37 insn per cycle + 6.109986471 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -112,15 +113,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.273930e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.393611e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.393611e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.169781e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.246790e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.246790e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.250354 sec - 9,995,547,177 cycles # 3.070 GHz - 23,265,528,960 instructions # 2.33 insn per cycle - 3.256463701 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1924) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.406148 sec + 10,026,239,155 cycles # 2.945 GHz + 23,264,915,776 instructions # 2.32 insn per cycle + 3.412744895 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -139,15 +141,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.480521e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.709421e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.709421e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.376931e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.582524e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.582524e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.007311 sec - 9,024,110,492 cycles # 2.995 GHz - 16,706,406,071 instructions # 1.85 insn per cycle - 3.013559701 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2577) (512y: 0) (512z: 0) +TOTAL : 3.133404 sec + 9,115,108,969 cycles # 2.904 GHz + 16,712,850,458 instructions # 1.83 insn per cycle + 3.139765331 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +169,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.505086e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.767088e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.767088e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.412136e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.649634e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.649634e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.977230 sec - 8,950,702,512 cycles # 3.001 GHz - 16,543,195,202 instructions # 1.85 insn per cycle - 2.983414874 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2472) (512y: 9) (512z: 0) +TOTAL : 3.093398 sec + 9,015,171,302 cycles # 2.909 GHz + 16,559,247,945 instructions # 1.84 insn per cycle + 3.099791137 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -193,15 +197,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.494614e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765296e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765296e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.406219e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.608241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.608241e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.993974 sec - 7,449,647,781 cycles # 2.484 GHz - 14,067,779,386 instructions # 1.89 insn per cycle - 3.000162317 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 17) (512z: 1439) +TOTAL : 3.106272 sec + 7,475,444,541 cycles # 2.404 GHz + 14,076,958,110 instructions # 1.88 insn per cycle + 3.112522018 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 370497cac6..e12a7cff38 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_14:12:45 +DATE: 2023-11-03_19:47:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.314201e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.171400e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.214137e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.309547e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.164321e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.211559e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.150300 sec - 4,173,208,289 cycles # 3.043 GHz - 6,635,894,357 instructions # 1.59 insn per cycle - 1.428443006 seconds time elapsed +TOTAL : 1.178788 sec + 4,175,363,575 cycles # 2.986 GHz + 6,687,157,832 instructions # 1.60 insn per cycle + 1.455561692 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.159655e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.375997e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.375997e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.139229e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.352216e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.352216e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.103053 sec - 18,835,159,555 cycles # 3.084 GHz - 43,737,478,159 instructions # 2.32 insn per cycle - 6.108226259 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 418) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.211284 sec + 18,855,190,279 cycles # 3.034 GHz + 43,795,517,542 instructions # 2.32 insn per cycle + 6.216374296 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.380206e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.638253e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.638253e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.318674e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.546898e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.546898e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.321443 sec - 10,224,469,171 cycles # 3.074 GHz - 22,011,805,915 instructions # 2.15 insn per cycle - 3.326535999 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1924) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.402195 sec + 10,237,833,782 cycles # 3.006 GHz + 22,007,212,368 instructions # 2.15 insn per cycle + 3.407333694 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.567060e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.927154e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.927154e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.476676e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.816143e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.816143e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.130332 sec - 9,276,752,637 cycles # 2.959 GHz - 15,496,982,915 instructions # 1.67 insn per cycle - 3.135534079 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2577) (512y: 0) (512z: 0) +TOTAL : 3.234448 sec + 9,334,268,427 cycles # 2.883 GHz + 15,503,242,414 instructions # 1.66 insn per cycle + 3.239539945 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.606068e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.017624e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.017624e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.532354e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.931778e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.931778e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.086367 sec - 9,231,582,857 cycles # 2.987 GHz - 15,133,340,078 instructions # 1.64 insn per cycle - 3.091367178 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2472) (512y: 9) (512z: 0) +TOTAL : 3.179353 sec + 9,298,076,707 cycles # 2.921 GHz + 15,144,691,612 instructions # 1.63 insn per cycle + 3.184641880 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.528786e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.869868e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.869868e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.550309e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.928739e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.928739e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.188987 sec - 7,617,585,488 cycles # 2.385 GHz - 12,570,876,558 instructions # 1.65 insn per cycle - 3.194434066 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 17) (512z: 1439) +TOTAL : 3.163394 sec + 7,678,426,346 cycles # 2.424 GHz + 12,579,409,911 instructions # 1.64 insn per cycle + 3.168501704 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index 65103394f2..ed97b2f8ed 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_14:09:30 +DATE: 2023-11-03_19:44:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.323201e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.195679e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.287097e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.311918e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.184761e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.263047e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.839275 sec - 3,258,064,410 cycles # 3.046 GHz - 6,600,815,561 instructions # 2.03 insn per cycle - 1.128693838 seconds time elapsed +TOTAL : 0.849658 sec + 3,163,783,620 cycles # 2.955 GHz + 6,425,624,965 instructions # 2.03 insn per cycle + 1.127772989 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.147689e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.360845e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.360845e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.132012e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.344208e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.344208e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.843460 sec - 17,842,880,354 cycles # 3.051 GHz - 43,555,724,067 instructions # 2.44 insn per cycle - 5.848538265 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 418) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.993383 sec + 18,094,070,839 cycles # 3.017 GHz + 43,613,404,695 instructions # 2.41 insn per cycle + 5.998406050 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.313187e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.558279e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.558279e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.281067e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.486158e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.486158e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.090860 sec - 9,280,161,539 cycles # 2.999 GHz - 21,931,630,305 instructions # 2.36 insn per cycle - 3.095925319 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1924) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.130477 sec + 9,257,197,715 cycles # 2.953 GHz + 21,925,291,921 instructions # 2.37 insn per cycle + 3.135663717 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.600147e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.974521e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.974521e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.526300e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.881905e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.881905e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.770030 sec - 8,277,937,688 cycles # 2.984 GHz - 15,586,216,059 instructions # 1.88 insn per cycle - 2.775082359 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2577) (512y: 0) (512z: 0) +TOTAL : 2.846007 sec + 8,323,404,187 cycles # 2.920 GHz + 15,589,367,643 instructions # 1.87 insn per cycle + 2.851124263 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.608908e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.023062e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.023062e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.559394e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.951403e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.951403e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.758732 sec - 8,219,982,403 cycles # 2.975 GHz - 15,429,625,460 instructions # 1.88 insn per cycle - 2.763845157 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2472) (512y: 9) (512z: 0) +TOTAL : 2.815665 sec + 8,248,875,592 cycles # 2.925 GHz + 15,439,478,624 instructions # 1.87 insn per cycle + 2.820889860 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.673753e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.118689e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.118689e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.553964e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.948928e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.948928e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.699227 sec - 6,592,595,838 cycles # 2.439 GHz - 12,860,007,004 instructions # 1.95 insn per cycle - 2.704311392 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 17) (512z: 1439) +TOTAL : 2.827281 sec + 6,687,814,053 cycles # 2.363 GHz + 12,869,763,437 instructions # 1.92 insn per cycle + 2.832592565 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index c66b9e94cf..c7d745ef4d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_14:06:16 +DATE: 2023-11-03_19:41:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.319519e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.146808e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.143902e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.077097e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.138341e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.120075e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.448649 sec - 5,087,365,027 cycles # 3.052 GHz - 9,310,766,276 instructions # 1.83 insn per cycle - 1.725195666 seconds time elapsed +TOTAL : 1.480161 sec + 5,077,584,264 cycles # 2.967 GHz + 9,258,149,444 instructions # 1.82 insn per cycle + 1.768271684 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,15 +78,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.155200e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.369267e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.369267e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.142005e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.354012e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.354012e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.802015 sec - 17,844,581,528 cycles # 3.074 GHz - 43,556,997,966 instructions # 2.44 insn per cycle - 5.807155409 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 418) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.874856 sec + 17,835,700,462 cycles # 3.034 GHz + 43,613,540,806 instructions # 2.45 insn per cycle + 5.879931479 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -104,15 +105,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.407034e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.683536e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.683536e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.282759e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.491220e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.491220e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.971545 sec - 9,217,486,095 cycles # 3.097 GHz - 21,930,311,629 instructions # 2.38 insn per cycle - 2.976586238 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1924) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.129132 sec + 9,269,728,355 cycles # 2.963 GHz + 21,928,484,188 instructions # 2.37 insn per cycle + 3.134244707 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,15 +132,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.600726e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.979250e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.979250e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.516560e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.868004e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.868004e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.769304 sec - 8,276,687,518 cycles # 2.984 GHz - 15,585,955,312 instructions # 1.88 insn per cycle - 2.774287197 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2577) (512y: 0) (512z: 0) +TOTAL : 2.857533 sec + 8,336,241,805 cycles # 2.913 GHz + 15,589,958,795 instructions # 1.87 insn per cycle + 2.862709487 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -156,15 +159,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.628217e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.043704e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.043704e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.536616e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.924197e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.924197e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.739955 sec - 8,198,710,916 cycles # 2.988 GHz - 15,428,862,262 instructions # 1.88 insn per cycle - 2.744927346 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2472) (512y: 9) (512z: 0) +TOTAL : 2.838427 sec + 8,267,692,084 cycles # 2.908 GHz + 15,438,877,256 instructions # 1.87 insn per cycle + 2.843475918 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -182,15 +186,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.363138e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.573284e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.573284e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.539393e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.905150e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.905150e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.037682 sec - 6,617,551,728 cycles # 2.175 GHz - 12,861,059,438 instructions # 1.94 insn per cycle - 3.043153629 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1711) (512y: 17) (512z: 1439) +TOTAL : 2.843291 sec + 6,667,785,493 cycles # 2.342 GHz + 12,868,798,226 instructions # 1.93 insn per cycle + 2.848396098 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index b24db0679a..2a5177092e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:12:16 +DATE: 2023-11-03_19:01:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.090597e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.086053e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.295941e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.628396e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.344836e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.322116e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.595642 sec - 2,379,950,695 cycles # 2.907 GHz - 3,760,363,157 instructions # 1.58 insn per cycle - 0.889829124 seconds time elapsed +TOTAL : 0.558495 sec + 2,344,289,295 cycles # 2.966 GHz + 3,579,154,611 instructions # 1.53 insn per cycle + 0.847997464 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.252100e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.506607e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.506607e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.195436e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.435503e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.435503e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.378652 sec - 16,714,293,164 cycles # 3.105 GHz - 41,313,255,981 instructions # 2.47 insn per cycle - 5.386196800 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 362) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.634613 sec + 16,757,667,455 cycles # 2.972 GHz + 41,375,848,460 instructions # 2.47 insn per cycle + 5.639688103 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.402586e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.722326e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.722326e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.409189e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.740073e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.740073e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.985937 sec - 9,062,093,173 cycles # 3.030 GHz - 21,236,533,356 instructions # 2.34 insn per cycle - 3.002020734 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1829) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.974456 sec + 9,031,167,153 cycles # 3.032 GHz + 21,234,204,961 instructions # 2.35 insn per cycle + 2.979655809 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1841) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.478980e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.815371e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.815371e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.541260e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.926631e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.926631e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.903147 sec - 8,298,189,603 cycles # 2.854 GHz - 15,421,650,856 instructions # 1.86 insn per cycle - 2.919415247 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2518) (512y: 0) (512z: 0) +TOTAL : 2.832126 sec + 8,284,857,543 cycles # 2.922 GHz + 15,430,300,133 instructions # 1.86 insn per cycle + 2.837298063 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2536) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.666988e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.139402e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.139402e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.592912e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.031163e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.031163e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.706988 sec - 8,127,237,805 cycles # 2.997 GHz - 15,233,409,972 instructions # 1.87 insn per cycle - 2.719726531 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2399) (512y: 8) (512z: 0) +TOTAL : 2.778473 sec + 8,124,076,124 cycles # 2.921 GHz + 15,242,043,085 instructions # 1.88 insn per cycle + 2.783650122 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.671852e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.132790e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.132790e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.583024e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.982786e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.982786e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.707953 sec - 6,591,494,668 cycles # 2.430 GHz - 12,839,585,857 instructions # 1.95 insn per cycle - 2.719283692 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1681) (512y: 18) (512z: 1427) +TOTAL : 2.793855 sec + 6,612,725,918 cycles # 2.363 GHz + 12,851,623,569 instructions # 1.94 insn per cycle + 2.799020549 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1705) (512y: 18) (512z: 1427) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 89d3cca091..b5507320b6 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:49:13 +DATE: 2023-11-03_19:24:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.311771e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188006e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.262468e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.295762e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181123e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.251991e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.576457 sec - 2,389,540,020 cycles # 2.950 GHz - 3,688,400,686 instructions # 1.54 insn per cycle - 0.867059057 seconds time elapsed +TOTAL : 0.577748 sec + 2,371,472,909 cycles # 2.938 GHz + 3,662,215,838 instructions # 1.54 insn per cycle + 0.866645313 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.741075e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.279072e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.279072e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.709669e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.230063e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.230063e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.966477 sec - 12,234,047,740 cycles # 3.081 GHz - 32,532,108,547 instructions # 2.66 insn per cycle - 3.971800905 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 299) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.043238 sec + 12,201,253,013 cycles # 3.016 GHz + 32,520,928,331 instructions # 2.67 insn per cycle + 4.048480591 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.848238e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.804103e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.804103e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.776736e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.688717e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.688717e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.569201 sec - 8,000,233,571 cycles # 3.109 GHz - 18,689,180,183 instructions # 2.34 insn per cycle - 2.574590277 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1542) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.629894 sec + 8,006,523,859 cycles # 3.039 GHz + 18,689,561,969 instructions # 2.33 insn per cycle + 2.635155805 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1554) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.959458e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.878325e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.878325e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.876319e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.776118e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.776118e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.477471 sec - 7,440,495,496 cycles # 2.998 GHz - 14,252,548,520 instructions # 1.92 insn per cycle - 2.482919486 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2219) (512y: 0) (512z: 0) +TOTAL : 2.544972 sec + 7,483,863,921 cycles # 2.935 GHz + 14,252,784,118 instructions # 1.90 insn per cycle + 2.550249205 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.020321e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.073175e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.073175e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.940665e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.960644e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.960644e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.430130 sec - 7,321,892,700 cycles # 3.008 GHz - 13,944,035,239 instructions # 1.90 insn per cycle - 2.435416551 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2072) (512y: 3) (512z: 0) +TOTAL : 2.495422 sec + 7,326,781,172 cycles # 2.931 GHz + 13,945,833,508 instructions # 1.90 insn per cycle + 2.500698244 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.722686e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.262580e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.262580e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.636740e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.108198e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.108198e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.661980 sec - 6,503,812,696 cycles # 2.440 GHz - 13,425,346,953 instructions # 2.06 insn per cycle - 2.667536363 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2047) (512y: 1) (512z: 1198) +TOTAL : 2.746264 sec + 6,527,138,912 cycles # 2.373 GHz + 13,421,028,013 instructions # 2.06 insn per cycle + 2.751679406 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 15fa1a5fae..b6c42e0895 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:49:40 +DATE: 2023-11-03_19:24:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.308009e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.196636e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.296666e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.300995e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.194789e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.295764e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.571463 sec - 2,377,347,881 cycles # 2.993 GHz - 3,694,824,692 instructions # 1.55 insn per cycle - 0.851108222 seconds time elapsed +TOTAL : 0.573687 sec + 2,396,122,888 cycles # 2.957 GHz + 3,709,386,643 instructions # 1.55 insn per cycle + 0.867525381 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.270957e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.300156e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.300156e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.274435e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.306451e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.306451e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.134664 sec - 9,456,512,355 cycles # 3.013 GHz - 25,299,662,978 instructions # 2.68 insn per cycle - 3.139956692 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 250) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.128769 sec + 9,423,056,878 cycles # 3.008 GHz + 25,306,341,141 instructions # 2.69 insn per cycle + 3.134038482 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.217435e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.962242e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.962242e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.099658e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.759584e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.759584e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.315359 sec - 7,212,398,641 cycles # 3.109 GHz - 16,900,659,302 instructions # 2.34 insn per cycle - 2.320845440 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1347) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.397339 sec + 7,201,211,606 cycles # 2.998 GHz + 16,901,413,977 instructions # 2.35 insn per cycle + 2.402789017 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.041323e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.188628e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.188628e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.019910e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.199492e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.199492e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.427336 sec - 7,137,617,302 cycles # 2.935 GHz - 13,617,817,493 instructions # 1.91 insn per cycle - 2.432974579 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2042) (512y: 0) (512z: 0) +TOTAL : 2.443323 sec + 7,147,435,963 cycles # 2.920 GHz + 13,619,110,670 instructions # 1.91 insn per cycle + 2.448969091 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.162263e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.461570e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.461570e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.050148e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.307582e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.307582e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.336082 sec - 7,036,323,508 cycles # 3.006 GHz - 13,428,129,683 instructions # 1.91 insn per cycle - 2.341501579 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1921) (512y: 4) (512z: 0) +TOTAL : 2.423418 sec + 7,082,396,314 cycles # 2.918 GHz + 13,431,226,521 instructions # 1.90 insn per cycle + 2.429141482 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1 Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.792413e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.457184e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.457184e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.725279e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.338904e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.338904e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.606747 sec - 6,335,932,322 cycles # 2.428 GHz - 13,156,642,126 instructions # 2.08 insn per cycle - 2.612124224 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2005) (512y: 1) (512z: 1083) +TOTAL : 2.669392 sec + 6,366,623,257 cycles # 2.381 GHz + 13,153,230,984 instructions # 2.07 insn per cycle + 2.674848562 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index c8a1826ac9..40be1e0fe4 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:12:46 +DATE: 2023-11-03_19:02:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.420580e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.225288e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.974352e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.986561e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.920506e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.026737e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.696522 sec - 2,828,368,151 cycles # 3.033 GHz - 4,459,128,504 instructions # 1.58 insn per cycle - 0.999812364 seconds time elapsed +TOTAL : 0.651585 sec + 2,613,210,290 cycles # 2.977 GHz + 4,026,633,947 instructions # 1.54 insn per cycle + 0.940304085 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.118611e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.307197e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.307197e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.098312e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.283308e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.283308e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.013136 sec - 18,776,569,958 cycles # 3.120 GHz - 44,190,473,826 instructions # 2.35 insn per cycle - 6.021440922 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 426) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.126587 sec + 18,732,621,094 cycles # 3.056 GHz + 44,288,636,649 instructions # 2.36 insn per cycle + 6.131702524 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.736065e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.298893e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.298893e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.724748e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.279623e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.279623e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.023958 sec - 12,426,947,550 cycles # 3.084 GHz - 30,958,210,972 instructions # 2.49 insn per cycle - 4.038824000 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1673) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.052368 sec + 12,345,078,225 cycles # 3.044 GHz + 30,962,385,061 instructions # 2.51 insn per cycle + 4.057665704 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.009814e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.796099e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.796099e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.012805e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.801799e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.801799e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.529040 sec - 10,150,198,206 cycles # 2.872 GHz - 19,380,941,090 instructions # 1.91 insn per cycle - 3.542539797 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 0) (512z: 0) +TOTAL : 3.527503 sec + 10,105,777,222 cycles # 2.861 GHz + 19,402,091,411 instructions # 1.92 insn per cycle + 3.532885933 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.071616e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.920585e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.920585e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.136223e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.011490e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.011490e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.439842 sec - 9,801,969,382 cycles # 2.845 GHz - 18,964,241,159 instructions # 1.93 insn per cycle - 3.452522056 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1835) (512y: 188) (512z: 0) +TOTAL : 3.337554 sec + 9,780,270,182 cycles # 2.927 GHz + 18,984,447,401 instructions # 1.94 insn per cycle + 3.342834380 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1859) (512y: 188) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.835716e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.462614e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.462614e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.916274e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.582982e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.582982e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.831470 sec - 8,408,634,839 cycles # 2.192 GHz - 15,052,624,754 instructions # 1.79 insn per cycle - 3.846787066 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 999) (512y: 155) (512z: 1316) +TOTAL : 3.678279 sec + 8,374,553,290 cycles # 2.274 GHz + 15,066,979,076 instructions # 1.80 insn per cycle + 3.683518796 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1023) (512y: 155) (512z: 1316) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index b5a3475756..d0448f95d2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_13:13:19 +DATE: 2023-11-03_19:02:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.422335e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.236601e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.020207e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.995389e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.942657e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.069355e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.691416 sec - 2,819,877,381 cycles # 3.039 GHz - 4,403,256,288 instructions # 1.56 insn per cycle - 1.000837396 seconds time elapsed +TOTAL : 0.648218 sec + 2,577,449,374 cycles # 2.937 GHz + 3,930,119,139 instructions # 1.52 insn per cycle + 0.934838617 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -76,15 +76,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.161899e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.369033e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.369033e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.138539e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.340756e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.340756e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.806095 sec - 18,032,240,191 cycles # 3.104 GHz - 42,441,543,294 instructions # 2.35 insn per cycle - 5.813826022 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.930169 sec + 17,940,598,550 cycles # 3.023 GHz + 42,539,439,563 instructions # 2.37 insn per cycle + 5.935391018 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.753631e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.336727e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.336727e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.737380e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.320541e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.320541e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.992523 sec - 12,184,645,859 cycles # 3.048 GHz - 30,264,428,187 instructions # 2.48 insn per cycle - 4.007432791 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1680) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.022351 sec + 12,179,829,023 cycles # 3.025 GHz + 30,269,422,152 instructions # 2.49 insn per cycle + 4.027705928 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.094154e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.931089e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.931089e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.003006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.791277e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.791277e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.390247 sec - 10,038,085,304 cycles # 2.956 GHz - 19,265,024,477 instructions # 1.92 insn per cycle - 3.403373491 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2144) (512y: 0) (512z: 0) +TOTAL : 3.544763 sec + 10,086,483,930 cycles # 2.843 GHz + 19,285,075,836 instructions # 1.91 insn per cycle + 3.550049339 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2162) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.206540e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.140055e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.140055e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.153713e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.048947e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.048947e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.238082 sec - 9,649,245,798 cycles # 2.975 GHz - 18,764,779,113 instructions # 1.94 insn per cycle - 3.251590176 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1809) (512y: 191) (512z: 0) +TOTAL : 3.313722 sec + 9,652,564,948 cycles # 2.909 GHz + 18,773,850,855 instructions # 1.94 insn per cycle + 3.319022077 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1833) (512y: 191) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0 Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.991486e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.705796e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.705796e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.911178e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.576380e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.576380e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.545115 sec - 8,268,426,013 cycles # 2.329 GHz - 14,974,356,151 instructions # 1.81 insn per cycle - 3.557224656 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 996) (512y: 156) (512z: 1305) +TOTAL : 3.691490 sec + 8,274,258,282 cycles # 2.239 GHz + 14,991,882,108 instructions # 1.81 insn per cycle + 3.696773496 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1020) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 86b81b3912..ecfe1f9032 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_13:13:52 +DATE: 2023-11-03_19:03:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.014510e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.133907e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.272655e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.269149e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.178306e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270483e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.524203 sec - 2,315,774,867 cycles # 3.023 GHz - 3,262,855,573 instructions # 1.41 insn per cycle - 0.842689521 seconds time elapsed +TOTAL : 0.515028 sec + 2,190,362,135 cycles # 2.945 GHz + 3,134,430,746 instructions # 1.43 insn per cycle + 0.801320986 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.188691e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.254188e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.254188e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.141790e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.204663e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.204663e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.894044 sec - 15,078,170,131 cycles # 3.078 GHz - 38,412,995,351 instructions # 2.55 insn per cycle - 4.902013341 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.001947 sec + 15,160,921,453 cycles # 3.029 GHz + 38,440,320,018 instructions # 2.54 insn per cycle + 5.007262329 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.733567e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.939331e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.939331e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.537912e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.729582e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.729582e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.910573 sec - 9,058,606,298 cycles # 3.107 GHz - 24,587,125,160 instructions # 2.71 insn per cycle - 2.925346801 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2144) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.070180 sec + 9,135,564,109 cycles # 2.971 GHz + 24,595,068,911 instructions # 2.69 insn per cycle + 3.075510770 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.937642e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.460811e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.460811e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.794659e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.298456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.298456e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.867147 sec - 5,466,849,614 cycles # 2.919 GHz - 11,257,507,450 instructions # 2.06 insn per cycle - 1.885058814 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2358) (512y: 0) (512z: 0) +TOTAL : 1.915155 sec + 5,488,800,341 cycles # 2.860 GHz + 11,269,289,809 instructions # 2.05 insn per cycle + 1.920562747 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.384127e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.004039e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.004039e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.465243e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.099655e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.099655e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.746179 sec - 4,957,706,445 cycles # 2.830 GHz - 10,563,056,082 instructions # 2.13 insn per cycle - 1.758886631 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2053) (512y: 144) (512z: 0) +TOTAL : 1.726047 sec + 4,948,464,581 cycles # 2.859 GHz + 10,575,268,094 instructions # 2.14 insn per cycle + 1.731560491 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.105961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.350897e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.350897e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.977744e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.204839e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.204839e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.655795 sec - 5,371,604,588 cycles # 2.019 GHz - 7,798,605,112 instructions # 1.45 insn per cycle - 2.670392027 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1422) (512y: 122) (512z: 1542) +TOTAL : 2.740172 sec + 5,379,659,738 cycles # 1.960 GHz + 7,808,789,832 instructions # 1.45 insn per cycle + 2.745493260 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 4806d829cd..dd2f256477 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_14:00:03 +DATE: 2023-11-03_19:35:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.553961e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.874598e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.874598e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.496633e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.880527e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.880527e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.804780 sec - 3,169,048,208 cycles # 3.028 GHz - 4,880,834,292 instructions # 1.54 insn per cycle - 1.104220884 seconds time elapsed +TOTAL : 0.808083 sec + 3,120,895,454 cycles # 2.971 GHz + 4,726,889,577 instructions # 1.51 insn per cycle + 1.107972527 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -85,15 +85,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.184144e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.247953e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.247953e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.117962e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.179706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.179706e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.978186 sec - 15,425,389,553 cycles # 3.095 GHz - 38,473,246,448 instructions # 2.49 insn per cycle - 4.984877201 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.135527 sec + 15,504,544,823 cycles # 3.016 GHz + 38,497,224,440 instructions # 2.48 insn per cycle + 5.142229259 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -112,15 +113,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.689714e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.892865e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.892865e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.595756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.790745e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.790745e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.021358 sec - 9,403,258,777 cycles # 3.107 GHz - 24,770,539,388 instructions # 2.63 insn per cycle - 3.028074019 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2144) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.098715 sec + 9,432,801,004 cycles # 3.038 GHz + 24,773,895,780 instructions # 2.63 insn per cycle + 3.105439323 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -139,15 +141,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.853118e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.356995e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.356995e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.527781e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.981315e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.981315e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.969874 sec - 5,810,125,038 cycles # 2.941 GHz - 11,543,676,632 instructions # 1.99 insn per cycle - 1.976428599 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2358) (512y: 0) (512z: 0) +TOTAL : 2.098555 sec + 5,826,323,105 cycles # 2.789 GHz + 11,554,423,664 instructions # 1.98 insn per cycle + 2.105206679 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +169,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.501103e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.131136e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.131136e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.300396e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.893264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.893264e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.790886 sec - 5,296,213,039 cycles # 2.947 GHz - 10,850,736,565 instructions # 2.05 insn per cycle - 1.797661712 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2053) (512y: 144) (512z: 0) +TOTAL : 1.849117 sec + 5,294,307,248 cycles # 2.854 GHz + 10,856,382,305 instructions # 2.05 insn per cycle + 1.855861110 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -193,15 +197,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.013399e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.249522e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.249522e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.891057e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.111611e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.111611e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.796200 sec - 5,717,704,345 cycles # 2.041 GHz - 8,043,694,509 instructions # 1.41 insn per cycle - 2.803146814 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1422) (512y: 122) (512z: 1542) +TOTAL : 2.882235 sec + 5,742,873,090 cycles # 1.988 GHz + 8,048,787,968 instructions # 1.40 insn per cycle + 2.889049440 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 3d7587af5d..70c42f96ca 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_14:13:17 +DATE: 2023-11-03_19:48:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.580980e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155500e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270417e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.579966e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.154296e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270387e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.614016 sec - 2,545,446,825 cycles # 3.021 GHz - 3,704,672,520 instructions # 1.46 insn per cycle - 0.900418326 seconds time elapsed +TOTAL : 0.619804 sec + 2,500,171,473 cycles # 2.947 GHz + 3,610,462,854 instructions # 1.44 insn per cycle + 0.906022247 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.195203e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.261582e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.261582e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.141469e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.204103e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.204103e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.938760 sec - 15,225,135,904 cycles # 3.080 GHz - 38,429,365,378 instructions # 2.52 insn per cycle - 4.944153491 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.061672 sec + 15,345,417,554 cycles # 3.029 GHz + 38,452,483,858 instructions # 2.51 insn per cycle + 5.067127392 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.730675e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.936865e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.936865e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.594441e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.787517e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.787517e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.970349 sec - 9,231,773,469 cycles # 3.103 GHz - 24,585,772,995 instructions # 2.66 insn per cycle - 2.975730961 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2144) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.081938 sec + 9,306,122,505 cycles # 3.015 GHz + 24,590,602,612 instructions # 2.64 insn per cycle + 3.087467598 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.941104e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.468310e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.468310e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.780444e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.284766e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.284766e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.923604 sec - 5,670,197,148 cycles # 2.941 GHz - 11,239,356,426 instructions # 1.98 insn per cycle - 1.928877172 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2358) (512y: 0) (512z: 0) +TOTAL : 1.978919 sec + 5,659,108,727 cycles # 2.853 GHz + 11,248,307,846 instructions # 1.99 insn per cycle + 1.984493875 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.637555e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.290130e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.290130e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.409554e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.043503e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.043503e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.739458 sec - 5,136,547,861 cycles # 2.945 GHz - 10,513,142,920 instructions # 2.05 insn per cycle - 1.744858969 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2053) (512y: 144) (512z: 0) +TOTAL : 1.801971 sec + 5,131,678,035 cycles # 2.841 GHz + 10,518,217,961 instructions # 2.05 insn per cycle + 1.807387516 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.031295e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.258517e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.258517e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.952294e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.178919e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.178919e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.760441 sec - 5,552,660,719 cycles # 2.009 GHz - 7,747,208,324 instructions # 1.40 insn per cycle - 2.765757782 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1422) (512y: 122) (512z: 1542) +TOTAL : 2.820832 sec + 5,565,619,645 cycles # 1.970 GHz + 7,754,617,723 instructions # 1.39 insn per cycle + 2.826352548 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index a96e6be131..4837b41444 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_14:10:00 +DATE: 2023-11-03_19:45:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.583700e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.153230e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.267951e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.583777e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.154968e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.271096e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.558026 sec - 2,313,505,925 cycles # 2.945 GHz - 3,521,498,010 instructions # 1.52 insn per cycle - 0.845028446 seconds time elapsed +TOTAL : 0.557101 sec + 2,322,977,037 cycles # 2.953 GHz + 3,599,423,025 instructions # 1.55 insn per cycle + 0.843882316 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.178518e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.242243e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.242243e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.134010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.196717e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.196717e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.915746 sec - 15,058,217,872 cycles # 3.061 GHz - 38,413,553,191 instructions # 2.55 insn per cycle - 4.921088360 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.022340 sec + 15,161,844,495 cycles # 3.017 GHz + 38,436,020,868 instructions # 2.54 insn per cycle + 5.028057319 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.728983e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.933272e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.933272e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.611425e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.807723e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.807723e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.913226 sec - 9,056,484,610 cycles # 3.105 GHz - 24,585,808,961 instructions # 2.71 insn per cycle - 2.918420692 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2144) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.009043 sec + 9,092,248,013 cycles # 3.018 GHz + 24,590,993,356 instructions # 2.70 insn per cycle + 3.014816078 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.777415e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.279308e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.279308e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.765157e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.263695e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.263695e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.916389 sec - 5,465,259,492 cycles # 2.845 GHz - 11,256,916,542 instructions # 2.06 insn per cycle - 1.921736653 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2358) (512y: 0) (512z: 0) +TOTAL : 1.924911 sec + 5,492,799,049 cycles # 2.847 GHz + 11,264,994,094 instructions # 2.05 insn per cycle + 1.930399853 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.574998e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.221322e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.221322e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.461458e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.086226e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.086226e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.694918 sec - 4,974,823,544 cycles # 2.928 GHz - 10,564,942,659 instructions # 2.12 insn per cycle - 1.700257525 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2053) (512y: 144) (512z: 0) +TOTAL : 1.728063 sec + 4,951,669,022 cycles # 2.858 GHz + 10,569,075,843 instructions # 2.13 insn per cycle + 1.733593807 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.100546e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.341902e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.341902e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.938989e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.163796e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.163796e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.657429 sec - 5,396,015,009 cycles # 2.027 GHz - 7,797,762,335 instructions # 1.45 insn per cycle - 2.662830126 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1422) (512y: 122) (512z: 1542) +TOTAL : 2.768049 sec + 5,404,539,268 cycles # 1.950 GHz + 7,804,733,779 instructions # 1.44 insn per cycle + 2.773480694 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 854bbd9f01..04f32ac3bc 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_14:06:46 +DATE: 2023-11-03_19:41:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.828614e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.157784e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273520e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.845624e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.154000e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.267501e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.705831 sec - 2,807,624,090 cycles # 3.012 GHz - 4,395,187,189 instructions # 1.57 insn per cycle - 0.990782968 seconds time elapsed +TOTAL : 0.705622 sec + 2,764,377,825 cycles # 2.955 GHz + 4,322,445,800 instructions # 1.56 insn per cycle + 0.992638570 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,15 +78,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.204483e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.269768e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.269768e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.118266e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.179189e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.179189e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.857298 sec - 15,060,047,176 cycles # 3.098 GHz - 38,412,999,521 instructions # 2.55 insn per cycle - 4.862576188 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.055003 sec + 15,355,352,228 cycles # 3.035 GHz + 38,436,037,499 instructions # 2.50 insn per cycle + 5.060369145 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -104,15 +105,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.719132e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.926557e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.926557e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.619308e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.814626e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.814626e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.921866 sec - 9,063,981,939 cycles # 3.098 GHz - 24,586,733,507 instructions # 2.71 insn per cycle - 2.927309733 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2144) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.002993 sec + 9,098,824,080 cycles # 3.025 GHz + 24,590,228,698 instructions # 2.70 insn per cycle + 3.008485414 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,15 +132,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.836602e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.339797e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.339797e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.738465e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.252767e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.252767e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.897012 sec - 5,477,510,588 cycles # 2.881 GHz - 11,256,805,143 instructions # 2.06 insn per cycle - 1.902283714 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2358) (512y: 0) (512z: 0) +TOTAL : 1.934521 sec + 5,491,674,204 cycles # 2.833 GHz + 11,265,170,941 instructions # 2.05 insn per cycle + 1.939950087 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -156,15 +159,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.588100e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.236111e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.236111e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.341479e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.957193e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.957193e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.691889 sec - 4,967,386,818 cycles # 2.928 GHz - 10,562,108,549 instructions # 2.13 insn per cycle - 1.697331756 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2053) (512y: 144) (512z: 0) +TOTAL : 1.759206 sec + 4,958,873,003 cycles # 2.811 GHz + 10,570,272,367 instructions # 2.13 insn per cycle + 1.764825335 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -182,15 +186,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.004753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.234505e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.234505e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.934828e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.158501e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.158501e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.721009 sec - 5,373,397,763 cycles # 1.972 GHz - 7,799,884,708 instructions # 1.45 insn per cycle - 2.726823881 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1422) (512y: 122) (512z: 1542) +TOTAL : 2.772888 sec + 5,409,288,056 cycles # 1.948 GHz + 7,806,084,388 instructions # 1.44 insn per cycle + 2.778257755 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 097514ff9d..4e3b221e19 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_13:14:19 +DATE: 2023-11-03_19:03:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.009931e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.129778e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.265439e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.258167e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.174363e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.266024e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.527424 sec - 2,299,489,431 cycles # 3.016 GHz - 3,267,542,464 instructions # 1.42 insn per cycle - 0.837651469 seconds time elapsed +TOTAL : 0.515882 sec + 2,147,525,845 cycles # 2.877 GHz + 3,086,933,024 instructions # 1.44 insn per cycle + 0.803849250 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.224210e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.289856e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.289856e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.170531e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.234097e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.234097e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.815616 sec - 14,985,071,290 cycles # 3.108 GHz - 40,140,432,395 instructions # 2.68 insn per cycle - 4.823429688 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.935479 sec + 15,016,135,362 cycles # 3.040 GHz + 40,166,123,209 instructions # 2.67 insn per cycle + 4.940913654 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.945464e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.176814e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.176814e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.815308e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.035943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.035943e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.760471 sec - 8,597,514,748 cycles # 3.108 GHz - 23,680,256,209 instructions # 2.75 insn per cycle - 2.775860119 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2057) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.853658 sec + 8,679,305,567 cycles # 3.037 GHz + 23,688,803,932 instructions # 2.73 insn per cycle + 2.859362026 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2069) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.299934e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.716170e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.716170e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.201194e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.599502e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.599502e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.079179 sec - 6,095,655,744 cycles # 2.924 GHz - 13,066,539,755 instructions # 2.14 insn per cycle - 2.094091215 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2528) (512y: 0) (512z: 0) +TOTAL : 2.119971 sec + 6,076,924,812 cycles # 2.860 GHz + 13,078,281,182 instructions # 2.15 insn per cycle + 2.125352086 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.576810e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.033591e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.033591e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.478450e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.920522e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.920522e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.980357 sec - 5,802,077,453 cycles # 2.922 GHz - 12,325,865,262 instructions # 2.12 insn per cycle - 1.992793326 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2072) (512y: 294) (512z: 0) +TOTAL : 2.017570 sec + 5,787,274,892 cycles # 2.862 GHz + 12,336,105,279 instructions # 2.13 insn per cycle + 2.023012261 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 294) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.772958e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.973447e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.973447e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.519779e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.701184e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.701184e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.881095 sec - 5,824,338,366 cycles # 2.018 GHz - 9,607,054,312 instructions # 1.65 insn per cycle - 2.894349646 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1486) (512y: 209) (512z: 1971) +TOTAL : 3.086221 sec + 5,817,765,621 cycles # 1.888 GHz + 9,621,068,231 instructions # 1.65 insn per cycle + 3.091564620 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1510) (512y: 209) (512z: 1971) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 3d76a3b1d9..3337c01ad4 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_13:50:05 +DATE: 2023-11-03_19:25:16 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.578660e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.159605e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.274665e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.554755e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155174e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.268743e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.519206 sec - 2,269,803,995 cycles # 3.023 GHz - 3,192,192,477 instructions # 1.41 insn per cycle - 0.807995850 seconds time elapsed +TOTAL : 0.526687 sec + 2,250,994,801 cycles # 2.926 GHz + 3,097,737,524 instructions # 1.38 insn per cycle + 0.826717654 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.560816e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.649704e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.649704e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.473532e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.556761e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.556761e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.195191 sec - 13,007,382,962 cycles # 3.097 GHz - 34,380,464,155 instructions # 2.64 insn per cycle - 4.200870709 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.344783 sec + 13,019,193,404 cycles # 2.993 GHz + 34,405,663,599 instructions # 2.64 insn per cycle + 4.350365607 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.116747e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.258719e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.258719e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.104680e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.249620e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.249620e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.466705 sec - 10,603,168,617 cycles # 3.057 GHz - 24,017,012,918 instructions # 2.27 insn per cycle - 3.472435796 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2570) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.483866 sec + 10,607,531,951 cycles # 3.041 GHz + 24,022,392,993 instructions # 2.26 insn per cycle + 3.489298956 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.867142e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.209093e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.209093e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.787875e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.125865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.125865e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.254766 sec - 6,618,154,747 cycles # 2.929 GHz - 12,407,626,824 instructions # 1.87 insn per cycle - 2.260283575 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3138) (512y: 0) (512z: 0) +TOTAL : 2.295291 sec + 6,588,895,934 cycles # 2.865 GHz + 12,413,954,044 instructions # 1.88 insn per cycle + 2.300926049 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3156) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.186031e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.579450e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.579450e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.072251e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.445053e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.445053e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.122627 sec - 6,235,587,637 cycles # 2.931 GHz - 11,579,392,375 instructions # 1.86 insn per cycle - 2.128339591 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2668) (512y: 239) (512z: 0) +TOTAL : 2.171777 sec + 6,238,931,665 cycles # 2.866 GHz + 11,585,660,605 instructions # 1.86 insn per cycle + 2.177410338 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2692) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.159380e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.403563e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.403563e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.998110e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.229600e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.229600e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.621991 sec - 5,330,386,751 cycles # 2.029 GHz - 9,301,874,338 instructions # 1.75 insn per cycle - 2.627733477 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2092) (512y: 282) (512z: 1958) +TOTAL : 2.727363 sec + 5,337,713,756 cycles # 1.954 GHz + 9,308,309,205 instructions # 1.74 insn per cycle + 2.732896997 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2116) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index eda142a51a..64e33308d5 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_13:50:32 +DATE: 2023-11-03_19:25:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.570782e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.152219e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.266830e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.571117e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.157677e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270835e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.518066 sec - 2,271,828,197 cycles # 3.023 GHz - 3,244,066,512 instructions # 1.43 insn per cycle - 0.808642452 seconds time elapsed +TOTAL : 0.523342 sec + 2,241,527,426 cycles # 2.944 GHz + 3,209,964,665 instructions # 1.43 insn per cycle + 0.819917937 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.712334e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.810278e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.810278e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.658099e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.754988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.754988e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.969060 sec - 12,332,764,240 cycles # 3.103 GHz - 35,033,157,193 instructions # 2.84 insn per cycle - 3.974712010 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 444) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.049112 sec + 12,374,606,485 cycles # 3.053 GHz + 35,058,016,337 instructions # 2.83 insn per cycle + 4.054549094 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.128417e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.275024e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.275024e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.088523e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.231607e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.231607e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.453917 sec - 10,684,583,355 cycles # 3.089 GHz - 23,090,903,403 instructions # 2.16 insn per cycle - 3.459546938 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2351) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.500477 sec + 10,694,410,777 cycles # 3.051 GHz + 23,099,336,289 instructions # 2.16 insn per cycle + 3.506159729 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.247993e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.651435e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.651435e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.105721e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.492220e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.492220e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.098516 sec - 6,166,838,461 cycles # 2.931 GHz - 11,963,065,848 instructions # 1.94 insn per cycle - 2.104346902 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2493) (512y: 0) (512z: 0) +TOTAL : 2.158641 sec + 6,163,495,994 cycles # 2.849 GHz + 11,969,488,967 instructions # 1.94 insn per cycle + 2.164367762 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2511) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.231478e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.639448e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.639448e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.169198e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.571659e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.571659e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.106732 sec - 6,010,784,913 cycles # 2.846 GHz - 11,136,301,440 instructions # 1.85 insn per cycle - 2.112515993 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2104) (512y: 174) (512z: 0) +TOTAL : 2.133549 sec + 6,039,094,179 cycles # 2.824 GHz + 11,144,077,781 instructions # 1.85 insn per cycle + 2.139096234 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 174) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.031094e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.264647e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.264647e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.003701e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.233597e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.233597e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.703137 sec - 5,239,430,875 cycles # 1.935 GHz - 9,026,740,696 instructions # 1.72 insn per cycle - 2.708889136 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1627) (512y: 208) (512z: 1567) +TOTAL : 2.726476 sec + 5,224,063,612 cycles # 1.913 GHz + 9,034,702,359 instructions # 1.73 insn per cycle + 2.732050023 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 29c708926a..8d92c550fe 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_13:14:46 +DATE: 2023-11-03_19:04:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.196933e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.559127e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.926612e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.099342e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.699387e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.953526e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.487537 sec - 2,076,343,494 cycles # 2.909 GHz - 2,953,975,768 instructions # 1.42 insn per cycle - 0.786754642 seconds time elapsed +TOTAL : 0.471293 sec + 2,042,101,644 cycles # 2.948 GHz + 2,946,816,826 instructions # 1.44 insn per cycle + 0.749881107 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.364053e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.441665e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.441665e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.296642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.371475e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.371475e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.516750 sec - 14,091,556,001 cycles # 3.117 GHz - 38,372,630,771 instructions # 2.72 insn per cycle - 4.524612799 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.650028 sec + 14,160,157,406 cycles # 3.043 GHz + 38,398,040,352 instructions # 2.71 insn per cycle + 4.655270250 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.244340e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.673304e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.673304e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.139917e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.562152e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.562152e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.081299 sec - 6,470,101,875 cycles # 3.101 GHz - 15,825,349,191 instructions # 2.45 insn per cycle - 2.096048134 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2677) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.124632 sec + 6,476,959,128 cycles # 3.042 GHz + 15,834,256,517 instructions # 2.44 insn per cycle + 2.129768462 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.485632e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.088212e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.088212e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.088663e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.043198e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.043198e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.189119 sec - 3,458,210,229 cycles # 2.896 GHz - 7,599,552,591 instructions # 2.20 insn per cycle - 1.205083556 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3033) (512y: 0) (512z: 0) +TOTAL : 1.237397 sec + 3,465,504,689 cycles # 2.794 GHz + 7,611,207,779 instructions # 2.20 insn per cycle + 1.242588855 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.016158e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.185590e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.185590e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.457008e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.096549e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.096549e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.114621 sec - 3,253,189,150 cycles # 2.904 GHz - 7,207,637,696 instructions # 2.22 insn per cycle - 1.131215020 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2826) (512y: 23) (512z: 0) +TOTAL : 1.196326 sec + 3,247,822,045 cycles # 2.704 GHz + 7,220,309,293 instructions # 2.22 insn per cycle + 1.201704693 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.475166e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.307198e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.307198e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.679715e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.389169e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.389169e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.483668 sec - 3,072,167,998 cycles # 2.063 GHz - 5,838,885,052 instructions # 1.90 insn per cycle - 1.496320265 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2340) (512y: 24) (512z: 1889) +TOTAL : 1.658315 sec + 3,062,288,257 cycles # 1.842 GHz + 5,850,668,317 instructions # 1.91 insn per cycle + 1.663822965 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 25ff2590ad..a1ebef89d2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_14:00:31 +DATE: 2023-11-03_19:35:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.151426e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.469883e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.469883e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.064201e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.498245e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.498245e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.663313 sec - 2,706,340,617 cycles # 3.037 GHz - 4,165,256,765 instructions # 1.54 insn per cycle - 0.947646628 seconds time elapsed +TOTAL : 0.670260 sec + 2,637,877,021 cycles # 2.942 GHz + 4,088,256,570 instructions # 1.55 insn per cycle + 0.955124097 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -85,15 +85,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.339221e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.415226e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.415226e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.270912e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.344925e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.344925e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.604104 sec - 14,275,638,221 cycles # 3.097 GHz - 38,413,171,238 instructions # 2.69 insn per cycle - 4.610228258 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.744982 sec + 14,378,860,027 cycles # 3.027 GHz + 38,435,472,086 instructions # 2.67 insn per cycle + 4.751370421 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -112,15 +113,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.196913e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.615655e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.615655e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.017460e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.422989e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.422989e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.144006 sec - 6,663,102,745 cycles # 3.100 GHz - 16,104,440,418 instructions # 2.42 insn per cycle - 2.150194082 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2677) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.222354 sec + 6,685,137,863 cycles # 3.001 GHz + 16,109,819,565 instructions # 2.41 insn per cycle + 2.228696460 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -139,15 +141,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.436831e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.082724e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.082724e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.204872e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.057185e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.057185e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.235899 sec - 3,661,958,938 cycles # 2.949 GHz - 7,836,314,271 instructions # 2.14 insn per cycle - 1.242239596 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3033) (512y: 0) (512z: 0) +TOTAL : 1.267912 sec + 3,665,496,802 cycles # 2.878 GHz + 7,843,464,752 instructions # 2.14 insn per cycle + 1.274414413 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -166,15 +169,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.010128e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.170283e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.170283e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.639653e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.116975e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.116975e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.161869 sec - 3,443,334,261 cycles # 2.950 GHz - 7,445,199,482 instructions # 2.16 insn per cycle - 1.168103439 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2826) (512y: 23) (512z: 0) +TOTAL : 1.220373 sec + 3,444,640,052 cycles # 2.810 GHz + 7,451,522,975 instructions # 2.16 insn per cycle + 1.226715796 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -193,15 +197,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.508269e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.333771e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.333771e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.178040e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.972638e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.972638e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.523419 sec - 3,270,348,443 cycles # 2.140 GHz - 6,093,572,486 instructions # 1.86 insn per cycle - 1.529595531 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2340) (512y: 24) (512z: 1889) +TOTAL : 1.593853 sec + 3,283,201,976 cycles # 2.053 GHz + 6,099,788,393 instructions # 1.86 insn per cycle + 1.600161746 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index cb54cefed4..b7fb0d6959 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_14:13:44 +DATE: 2023-11-03_19:48:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.434441e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.625386e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.947980e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.431152e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.624289e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.946132e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.560461 sec - 2,380,757,395 cycles # 3.021 GHz - 3,460,383,265 instructions # 1.45 insn per cycle - 0.845514302 seconds time elapsed +TOTAL : 0.564134 sec + 2,302,613,621 cycles # 2.942 GHz + 3,377,451,746 instructions # 1.47 insn per cycle + 0.841499880 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.362534e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.440173e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.440173e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.289992e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.364715e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.364715e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.573205 sec - 14,247,235,365 cycles # 3.112 GHz - 38,399,348,971 instructions # 2.70 insn per cycle - 4.578265833 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.718949 sec + 14,318,249,819 cycles # 3.032 GHz + 38,421,429,911 instructions # 2.68 insn per cycle + 4.724102129 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.253010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.682916e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.682916e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.077786e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.487595e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.487595e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.129666 sec - 6,633,670,352 cycles # 3.109 GHz - 15,836,863,697 instructions # 2.39 insn per cycle - 2.134853355 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2677) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.204441 sec + 6,639,814,735 cycles # 3.006 GHz + 15,841,902,427 instructions # 2.39 insn per cycle + 2.209539727 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.517990e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.095157e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.095157e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.307822e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.070999e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.070999e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.234639 sec - 3,623,239,445 cycles # 2.924 GHz - 7,583,203,150 instructions # 2.09 insn per cycle - 1.239691986 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3033) (512y: 0) (512z: 0) +TOTAL : 1.265035 sec + 3,649,285,785 cycles # 2.875 GHz + 7,591,137,573 instructions # 2.08 insn per cycle + 1.270319196 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.015481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.177825e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.177825e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.974832e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.160037e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.160037e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.166274 sec - 3,418,214,243 cycles # 2.920 GHz - 7,158,770,979 instructions # 2.09 insn per cycle - 1.171464201 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2826) (512y: 23) (512z: 0) +TOTAL : 1.191816 sec + 3,426,519,284 cycles # 2.864 GHz + 7,166,067,248 instructions # 2.09 insn per cycle + 1.197132868 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.405756e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.247163e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.247163e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.265683e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.068951e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.068951e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.551800 sec - 3,227,998,686 cycles # 2.074 GHz - 5,789,283,627 instructions # 1.79 insn per cycle - 1.557068256 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2340) (512y: 24) (512z: 1889) +TOTAL : 1.584018 sec + 3,241,188,093 cycles # 2.041 GHz + 5,795,628,367 instructions # 1.79 insn per cycle + 1.589192883 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index 87845a19d9..30f4fadf92 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_14:10:26 +DATE: 2023-11-03_19:45:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.488390e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.644453e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.964646e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.447666e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.634082e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.951326e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.506827 sec - 2,189,987,943 cycles # 3.026 GHz - 3,436,928,296 instructions # 1.57 insn per cycle - 0.782775854 seconds time elapsed +TOTAL : 0.513708 sec + 2,149,338,807 cycles # 2.936 GHz + 3,363,855,189 instructions # 1.57 insn per cycle + 0.790810409 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.358375e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.435124e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.435124e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.247364e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.319306e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.319306e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.526256 sec - 14,080,847,145 cycles # 3.108 GHz - 38,369,987,972 instructions # 2.72 insn per cycle - 4.531177368 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.751713 sec + 14,161,394,696 cycles # 2.978 GHz + 38,393,782,229 instructions # 2.71 insn per cycle + 4.756965371 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.256589e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.691470e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.691470e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.102956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.519127e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.519127e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.076070 sec - 6,465,651,360 cycles # 3.110 GHz - 15,824,795,059 instructions # 2.45 insn per cycle - 2.081190945 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2677) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.140784 sec + 6,476,072,518 cycles # 3.019 GHz + 15,828,662,766 instructions # 2.44 insn per cycle + 2.146087935 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.620688e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.105701e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.105701e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.357298e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.077430e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.077430e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.170267 sec - 3,454,298,635 cycles # 2.941 GHz - 7,599,254,165 instructions # 2.20 insn per cycle - 1.175519026 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3033) (512y: 0) (512z: 0) +TOTAL : 1.205006 sec + 3,468,184,099 cycles # 2.868 GHz + 7,606,030,531 instructions # 2.19 insn per cycle + 1.210138102 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.687626e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.121348e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.121348e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.559739e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.106426e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106426e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.164248 sec - 3,254,264,032 cycles # 2.784 GHz - 7,207,984,280 instructions # 2.21 insn per cycle - 1.169514366 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2826) (512y: 23) (512z: 0) +TOTAL : 1.182909 sec + 3,252,386,286 cycles # 2.739 GHz + 7,215,128,616 instructions # 2.22 insn per cycle + 1.188234183 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.603116e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.469723e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.469723e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.332938e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.163555e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.163555e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.459450 sec - 3,063,578,056 cycles # 2.093 GHz - 5,838,117,130 instructions # 1.91 insn per cycle - 1.464612785 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2340) (512y: 24) (512z: 1889) +TOTAL : 1.514986 sec + 3,076,222,583 cycles # 2.024 GHz + 5,845,646,643 instructions # 1.90 insn per cycle + 1.520503790 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 2fabea2bcd..65eed836f1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_14:07:13 +DATE: 2023-11-03_19:42:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.102411e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.633947e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.952715e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.910755e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.623741e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.938668e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.604346 sec - 2,499,145,476 cycles # 3.034 GHz - 3,893,994,904 instructions # 1.56 insn per cycle - 0.881470321 seconds time elapsed +TOTAL : 0.613295 sec + 2,456,965,302 cycles # 2.952 GHz + 3,803,211,416 instructions # 1.55 insn per cycle + 0.890835389 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -78,15 +78,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.356009e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.433242e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.433242e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.291712e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.365790e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.365790e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.530402 sec - 14,084,276,645 cycles # 3.106 GHz - 38,370,392,611 instructions # 2.72 insn per cycle - 4.535673091 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.660750 sec + 14,151,818,953 cycles # 3.034 GHz + 38,392,284,342 instructions # 2.71 insn per cycle + 4.665929439 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -104,15 +105,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.264298e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.695949e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.695949e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.100691e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.531126e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.531126e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.072861 sec - 6,463,221,241 cycles # 3.111 GHz - 15,824,115,222 instructions # 2.45 insn per cycle - 2.077965757 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2677) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.141262 sec + 6,484,613,456 cycles # 3.022 GHz + 15,829,197,800 instructions # 2.44 insn per cycle + 2.146554392 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,15 +132,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.557333e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.094837e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.094837e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.341999e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.073892e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.073892e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.177853 sec - 3,454,695,002 cycles # 2.922 GHz - 7,599,092,976 instructions # 2.20 insn per cycle - 1.182951114 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3033) (512y: 0) (512z: 0) +TOTAL : 1.207094 sec + 3,469,517,910 cycles # 2.864 GHz + 7,605,958,162 instructions # 2.19 insn per cycle + 1.212334488 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -156,15 +159,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.033926e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.201633e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.201633e+06 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.000164e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.163047e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.163047e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.093129 sec - 3,239,321,715 cycles # 2.952 GHz - 7,206,965,422 instructions # 2.22 insn per cycle - 1.098293023 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2826) (512y: 23) (512z: 0) +TOTAL : 1.132933 sec + 3,264,238,503 cycles # 2.869 GHz + 7,214,964,009 instructions # 2.21 insn per cycle + 1.138315941 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -182,15 +186,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.646235e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.502344e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.502344e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.339791e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.166023e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.166023e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.451183 sec - 3,057,295,502 cycles # 2.100 GHz - 5,837,882,102 instructions # 1.91 insn per cycle - 1.456347808 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2340) (512y: 24) (512z: 1889) +TOTAL : 1.514355 sec + 3,071,490,694 cycles # 2.022 GHz + 5,845,279,944 instructions # 1.90 insn per cycle + 1.519539150 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 570702caf6..06d8f7d09d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_13:15:09 +DATE: 2023-11-03_19:04:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.286668e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.628069e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.019302e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.108032e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.751852e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.017010e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.478643 sec - 2,113,601,376 cycles # 3.005 GHz - 2,975,316,663 instructions # 1.41 insn per cycle - 0.775074903 seconds time elapsed +TOTAL : 0.473084 sec + 2,025,626,323 cycles # 2.920 GHz + 2,923,341,053 instructions # 1.44 insn per cycle + 0.752440698 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.311263e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.384837e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.384837e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.226197e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.296658e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.296658e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.617186 sec - 14,345,588,057 cycles # 3.104 GHz - 39,862,651,177 instructions # 2.78 insn per cycle - 4.624830306 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 557) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.795639 sec + 14,422,319,778 cycles # 3.005 GHz + 39,889,404,210 instructions # 2.77 insn per cycle + 4.800761254 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.842103e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.426487e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.426487e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.840353e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.410043e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.410043e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.881027 sec - 5,579,160,795 cycles # 2.958 GHz - 15,294,602,945 instructions # 2.74 insn per cycle - 1.898569017 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2461) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.880181 sec + 5,610,891,745 cycles # 2.978 GHz + 15,305,908,167 instructions # 2.73 insn per cycle + 1.885354787 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.849193e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.549754e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.549754e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.584020e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.270908e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.270908e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.611383 sec - 4,737,627,785 cycles # 2.932 GHz - 9,741,051,480 instructions # 2.06 insn per cycle - 1.625476103 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3692) (512y: 0) (512z: 0) +TOTAL : 1.679496 sec + 4,739,407,479 cycles # 2.814 GHz + 9,752,382,085 instructions # 2.06 insn per cycle + 1.685063058 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3710) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.036284e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.770599e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.770599e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.785300e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.495008e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.495008e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.570073 sec - 4,626,472,822 cycles # 2.937 GHz - 9,331,703,433 instructions # 2.02 insn per cycle - 1.582744287 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3473) (512y: 0) (512z: 0) +TOTAL : 1.630325 sec + 4,628,420,386 cycles # 2.831 GHz + 9,343,264,044 instructions # 2.02 insn per cycle + 1.635531127 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.204270e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.761377e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.761377e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.035393e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.577354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.577354e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.770855 sec - 3,652,333,187 cycles # 2.056 GHz - 7,038,738,600 instructions # 1.93 insn per cycle - 1.784681837 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2582) (512y: 12) (512z: 2221) +TOTAL : 1.821625 sec + 3,652,061,133 cycles # 2.000 GHz + 7,049,331,376 instructions # 1.93 insn per cycle + 1.826875192 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2606) (512y: 12) (512z: 2221) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 28778525bf..430bbd2c8e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_13:50:59 +DATE: 2023-11-03_19:26:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.408658e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.634217e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.954065e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.386931e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.620878e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.939459e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.475559 sec - 2,115,805,668 cycles # 2.995 GHz - 3,005,784,945 instructions # 1.42 insn per cycle - 0.763544980 seconds time elapsed +TOTAL : 0.478570 sec + 2,066,322,031 cycles # 2.937 GHz + 2,939,169,205 instructions # 1.42 insn per cycle + 0.760998289 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.640807e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.739837e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.739837e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.585659e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.679951e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.679951e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.051489 sec - 12,602,738,737 cycles # 3.107 GHz - 34,367,646,318 instructions # 2.73 insn per cycle - 4.056865009 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.141107 sec + 12,606,870,018 cycles # 3.041 GHz + 34,392,677,682 instructions # 2.73 insn per cycle + 4.146310630 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.598028e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.093458e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.093458e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.476247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.957210e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.957210e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.955100 sec - 6,075,860,611 cycles # 3.100 GHz - 14,869,348,420 instructions # 2.45 insn per cycle - 1.960626555 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2997) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.000613 sec + 6,098,731,252 cycles # 3.041 GHz + 14,873,462,613 instructions # 2.44 insn per cycle + 2.006051106 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.638223e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.541140e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.541140e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.182448e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.992665e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.992665e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.454851 sec - 4,257,285,483 cycles # 2.917 GHz - 9,034,268,038 instructions # 2.12 insn per cycle - 1.460220416 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4427) (512y: 0) (512z: 0) +TOTAL : 1.544245 sec + 4,326,302,580 cycles # 2.793 GHz + 9,041,454,033 instructions # 2.09 insn per cycle + 1.549495391 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4445) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.775592e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.684138e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.684138e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.602793e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.504278e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.504278e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.428152 sec - 4,208,542,546 cycles # 2.938 GHz - 8,668,841,642 instructions # 2.06 insn per cycle - 1.433477910 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4220) (512y: 0) (512z: 0) +TOTAL : 1.462983 sec + 4,209,847,303 cycles # 2.868 GHz + 8,675,528,842 instructions # 2.06 insn per cycle + 1.468300337 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.842720e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.341315e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.341315e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.697162e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.177263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.177263e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.875069 sec - 3,841,952,399 cycles # 2.044 GHz - 7,812,706,346 instructions # 2.03 insn per cycle - 1.880579118 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4396) (512y: 0) (512z: 2556) +TOTAL : 1.925379 sec + 3,842,178,645 cycles # 1.991 GHz + 7,819,452,293 instructions # 2.04 insn per cycle + 1.930845155 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 0) (512z: 2556) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 20ffd90a56..c32244c33c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_13:51:23 +DATE: 2023-11-03_19:26:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.511161e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.703547e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.038636e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.460575e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.684792e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.012555e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.475749 sec - 2,104,291,018 cycles # 2.988 GHz - 2,955,446,490 instructions # 1.40 insn per cycle - 0.763206295 seconds time elapsed +TOTAL : 0.478960 sec + 2,073,686,428 cycles # 2.952 GHz + 2,982,309,893 instructions # 1.44 insn per cycle + 0.760465246 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.830330e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.942360e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.942360e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.768420e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.879887e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.879887e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 3.785680 sec - 11,752,006,274 cycles # 3.100 GHz - 35,103,987,323 instructions # 2.99 insn per cycle - 3.791246697 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.874951 sec + 11,759,850,982 cycles # 3.031 GHz + 35,129,174,459 instructions # 2.99 insn per cycle + 3.880406297 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.696846e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.211128e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.211128e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.548911e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.058975e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.058975e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.922397 sec - 5,958,455,869 cycles # 3.092 GHz - 14,478,500,599 instructions # 2.43 insn per cycle - 1.927940043 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2560) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.977553 sec + 5,960,287,184 cycles # 3.008 GHz + 14,484,169,544 instructions # 2.43 insn per cycle + 1.983134337 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.847447e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.769654e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.769654e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.662372e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.600563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.600563e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.416637 sec - 4,152,850,979 cycles # 2.922 GHz - 8,880,220,301 instructions # 2.14 insn per cycle - 1.422167959 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3558) (512y: 0) (512z: 0) +TOTAL : 1.451994 sec + 4,186,509,528 cycles # 2.874 GHz + 8,887,826,504 instructions # 2.12 insn per cycle + 1.457581768 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3576) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.892718e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.835991e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.835991e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.782199e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.721549e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.721549e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.408167 sec - 4,129,402,905 cycles # 2.922 GHz - 8,417,127,810 instructions # 2.04 insn per cycle - 1.413668763 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3296) (512y: 0) (512z: 0) +TOTAL : 1.432127 sec + 4,128,776,992 cycles # 2.874 GHz + 8,424,271,434 instructions # 2.04 insn per cycle + 1.437420732 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3320) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [ha Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.947803e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.456162e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.456162e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.779314e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.273574e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.273574e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.843327 sec - 3,787,877,452 cycles # 2.050 GHz - 7,705,706,460 instructions # 2.03 insn per cycle - 1.848729462 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3412) (512y: 0) (512z: 2108) +TOTAL : 1.899022 sec + 3,798,792,191 cycles # 1.996 GHz + 7,712,429,012 instructions # 2.03 insn per cycle + 1.904382082 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3436) (512y: 0) (512z: 2108) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index be08b0f2cb..4284e04c80 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_13:15:33 +DATE: 2023-11-03_19:05:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.274406e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.151337e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.265311e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.262595e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.173145e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.266137e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.519897 sec - 2,283,332,215 cycles # 3.025 GHz - 3,248,780,616 instructions # 1.42 insn per cycle - 0.825340878 seconds time elapsed +TOTAL : 0.516288 sec + 2,170,206,194 cycles # 2.914 GHz + 3,121,753,700 instructions # 1.44 insn per cycle + 0.802206987 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.181311e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.245712e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.245712e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.129811e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.193121e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.193121e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.908499 sec - 15,234,841,173 cycles # 3.100 GHz - 38,614,636,672 instructions # 2.53 insn per cycle - 4.916787929 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 659) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.027744 sec + 15,293,663,581 cycles # 3.040 GHz + 38,642,438,156 instructions # 2.53 insn per cycle + 5.032856601 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.574916e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.763087e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.763087e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.666972e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.869148e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.869148e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.034907 sec - 8,976,593,954 cycles # 2.956 GHz - 24,241,840,556 instructions # 2.70 insn per cycle - 3.050923562 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2176) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.964411 sec + 8,933,093,188 cycles # 3.009 GHz + 24,243,353,502 instructions # 2.71 insn per cycle + 2.969821465 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.068308e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.608701e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.608701e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.660709e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.167400e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.167400e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.829434 sec - 5,404,823,528 cycles # 2.945 GHz - 11,280,042,611 instructions # 2.09 insn per cycle - 1.844040899 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2462) (512y: 0) (512z: 0) +TOTAL : 1.961588 sec + 5,410,079,541 cycles # 2.752 GHz + 11,291,080,205 instructions # 2.09 insn per cycle + 1.966921243 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.778878e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.461946e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.461946e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.588007e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.231756e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.231756e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.647058 sec - 4,869,702,123 cycles # 2.946 GHz - 10,530,154,588 instructions # 2.16 insn per cycle - 1.659743023 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2143) (512y: 148) (512z: 0) +TOTAL : 1.695283 sec + 4,860,759,917 cycles # 2.859 GHz + 10,541,284,808 instructions # 2.17 insn per cycle + 1.700590360 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.276183e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.533760e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.533760e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.107588e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.350535e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.350535e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.552510 sec - 5,223,860,309 cycles # 2.042 GHz - 7,605,402,012 instructions # 1.46 insn per cycle - 2.569365786 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1609) (512y: 126) (512z: 1608) +TOTAL : 2.656629 sec + 5,204,386,075 cycles # 1.956 GHz + 7,617,502,706 instructions # 1.46 insn per cycle + 2.661905103 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index fe0c54d84c..58d2d743b0 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_13:16:00 +DATE: 2023-11-03_19:05:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.436244e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.158125e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273882e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.265506e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.176728e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270375e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.524889 sec - 2,253,700,906 cycles # 2.939 GHz - 3,150,635,419 instructions # 1.40 insn per cycle - 0.833351247 seconds time elapsed +TOTAL : 0.513169 sec + 2,175,922,923 cycles # 2.936 GHz + 3,154,957,492 instructions # 1.45 insn per cycle + 0.799013980 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -76,15 +76,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.177721e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.241401e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.241401e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.110999e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.171227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.171227e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.916147 sec - 15,282,419,302 cycles # 3.106 GHz - 40,408,905,625 instructions # 2.64 insn per cycle - 4.924110639 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.072155 sec + 15,377,556,110 cycles # 3.029 GHz + 40,435,905,161 instructions # 2.63 insn per cycle + 5.077406066 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -102,15 +103,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.872219e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.093844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.093844e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.761885e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.974310e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.974310e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.810196 sec - 8,546,119,161 cycles # 3.035 GHz - 23,267,992,097 instructions # 2.72 insn per cycle - 2.824845793 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 2079) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.891901 sec + 8,516,736,770 cycles # 2.941 GHz + 23,273,421,536 instructions # 2.73 insn per cycle + 2.897134410 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -128,15 +130,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.169853e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.559211e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.559211e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.041812e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.416387e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.416387e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.128900 sec - 6,252,672,924 cycles # 2.929 GHz - 12,966,364,012 instructions # 2.07 insn per cycle - 2.147780574 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2651) (512y: 0) (512z: 0) +TOTAL : 2.184891 sec + 6,239,964,038 cycles # 2.850 GHz + 12,976,938,369 instructions # 2.08 insn per cycle + 2.190210603 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -154,15 +157,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.487443e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.938078e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.938078e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.262419e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.673980e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.673980e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.013530 sec - 5,927,064,158 cycles # 2.936 GHz - 12,242,211,971 instructions # 2.07 insn per cycle - 2.024360041 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2185) (512y: 296) (512z: 0) +TOTAL : 2.097286 sec + 5,931,604,060 cycles # 2.822 GHz + 12,254,844,972 instructions # 2.07 insn per cycle + 2.102596228 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -180,15 +184,16 @@ Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [ha Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.836447e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.050272e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.050272e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.636806e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.830983e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.830983e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.837117 sec - 5,601,200,879 cycles # 1.972 GHz - 8,746,209,226 instructions # 1.56 insn per cycle - 2.853250568 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1466) (512y: 183) (512z: 1909) +TOTAL : 2.989274 sec + 5,599,763,733 cycles # 1.871 GHz + 8,758,209,944 instructions # 1.56 insn per cycle + 2.994808333 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index dbb6a27461..c973ded005 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_13:16:27 +DATE: 2023-11-03_19:06:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.467182e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.042578e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.058885e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.987778e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.047089e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059978e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.462450 sec - 2,077,727,447 cycles # 3.022 GHz - 2,939,256,117 instructions # 1.41 insn per cycle - 0.760420017 seconds time elapsed +TOTAL : 0.462314 sec + 1,969,733,176 cycles # 2.915 GHz + 2,854,417,454 instructions # 1.45 insn per cycle + 0.732902295 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.080642e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.317470e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.331219e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.125374e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.318187e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329149e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.600187 sec - 2,562,305,698 cycles # 3.039 GHz - 3,791,655,393 instructions # 1.48 insn per cycle - 0.901479344 seconds time elapsed +TOTAL : 0.595579 sec + 2,446,683,532 cycles # 2.952 GHz + 3,726,903,800 instructions # 1.52 insn per cycle + 0.888429467 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.582587e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.595230e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.595230e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.543975e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.556543e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.556543e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.363661 sec - 19,741,541,068 cycles # 3.101 GHz - 59,603,136,600 instructions # 3.02 insn per cycle - 6.370242758 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1453) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.463148 sec + 19,697,684,289 cycles # 3.046 GHz + 59,611,728,869 instructions # 3.03 insn per cycle + 6.467313414 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.959878e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.005566e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.005566e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.806236e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.850408e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.850408e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.321995 sec - 10,357,377,147 cycles # 3.114 GHz - 30,672,198,558 instructions # 2.96 insn per cycle - 3.336689734 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5141) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.430883 sec + 10,361,092,942 cycles # 3.017 GHz + 30,679,655,225 instructions # 2.96 insn per cycle + 3.435128458 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.935334e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.011364e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.011364e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.723128e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.902993e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.902993e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.667758 sec - 4,882,235,286 cycles # 2.920 GHz - 11,014,946,826 instructions # 2.26 insn per cycle - 1.681843678 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4449) (512y: 0) (512z: 0) +TOTAL : 1.707466 sec + 4,879,146,362 cycles # 2.851 GHz + 11,021,709,924 instructions # 2.26 insn per cycle + 1.711937944 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.112517e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.135400e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.135400e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.083664e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.105516e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.105516e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.491700 sec - 4,367,434,146 cycles # 2.920 GHz - 10,292,361,014 instructions # 2.36 insn per cycle - 1.505034425 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4113) (512y: 91) (512z: 0) +TOTAL : 1.533989 sec + 4,371,523,225 cycles # 2.843 GHz + 10,299,869,041 instructions # 2.36 insn per cycle + 1.538284203 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.857569e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.970188e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.970188e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.583252e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.691167e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.691167e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.106012 sec - 4,092,305,690 cycles # 1.940 GHz - 5,839,185,657 instructions # 1.43 insn per cycle - 2.116411168 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1516) (512y: 95) (512z: 3466) +TOTAL : 2.184881 sec + 4,101,268,943 cycles # 1.874 GHz + 5,846,549,953 instructions # 1.43 insn per cycle + 2.189162148 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 9c5ec8bfcc..cc88ce6db1 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_14:00:54 +DATE: 2023-11-03_19:36:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.678165e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.772137e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.772137e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.617150e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.773641e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.773641e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.487116 sec - 2,132,060,726 cycles # 3.012 GHz - 3,216,268,356 instructions # 1.51 insn per cycle - 0.766469604 seconds time elapsed +TOTAL : 0.490872 sec + 2,070,161,118 cycles # 2.946 GHz + 3,152,579,676 instructions # 1.52 insn per cycle + 0.759960652 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.743648e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.448864e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.448864e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.687018e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.487518e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.487518e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.821472 sec - 3,252,599,355 cycles # 3.032 GHz - 5,078,109,890 instructions # 1.56 insn per cycle - 1.130728802 seconds time elapsed +TOTAL : 0.832612 sec + 3,193,307,533 cycles # 2.947 GHz + 4,978,788,975 instructions # 1.56 insn per cycle + 1.143205796 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,15 +103,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.508949e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.521499e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.521499e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.529162e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.541866e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.541866e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.556354 sec - 19,759,987,521 cycles # 3.012 GHz - 59,608,977,045 instructions # 3.02 insn per cycle - 6.560604945 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1453) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.507705 sec + 19,736,202,639 cycles # 3.031 GHz + 59,616,040,959 instructions # 3.02 insn per cycle + 6.512416242 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,15 +131,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.915913e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.961324e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.961324e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.815393e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.861165e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.861165e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.358749 sec - 10,404,117,412 cycles # 3.095 GHz - 30,721,241,681 instructions # 2.95 insn per cycle - 3.363116143 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 5141) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.431600 sec + 10,398,990,181 cycles # 3.027 GHz + 30,726,516,620 instructions # 2.95 insn per cycle + 3.436080496 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,15 +159,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.903809e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.008523e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.008523e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.253880e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.426870e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.426870e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.680116 sec - 4,916,337,320 cycles # 2.920 GHz - 11,065,114,702 instructions # 2.25 insn per cycle - 1.684455523 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4449) (512y: 0) (512z: 0) +TOTAL : 1.802152 sec + 4,928,997,803 cycles # 2.730 GHz + 11,072,368,065 instructions # 2.25 insn per cycle + 1.806633331 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,15 +187,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.108956e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.131774e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.131774e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.076136e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.098656e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.098656e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.502158 sec - 4,393,181,404 cycles # 2.918 GHz - 10,340,311,929 instructions # 2.35 insn per cycle - 1.506385000 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4113) (512y: 91) (512z: 0) +TOTAL : 1.553423 sec + 4,411,400,335 cycles # 2.833 GHz + 10,349,798,385 instructions # 2.35 insn per cycle + 1.557941492 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -211,15 +215,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.805812e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.921883e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.921883e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.266833e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.375233e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.375233e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.126801 sec - 4,123,621,038 cycles # 1.936 GHz - 5,877,714,636 instructions # 1.43 insn per cycle - 2.131182255 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1516) (512y: 95) (512z: 3466) +TOTAL : 2.287929 sec + 4,148,582,308 cycles # 1.811 GHz + 5,885,924,420 instructions # 1.42 insn per cycle + 2.292472050 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 4b186e3c8d..890a9e444f 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_13:16:56 +DATE: 2023-11-03_19:06:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.451423e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.041254e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.057491e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.934806e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.040123e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.052620e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.462812 sec - 2,067,313,737 cycles # 3.009 GHz - 2,928,212,077 instructions # 1.42 insn per cycle - 0.755597155 seconds time elapsed +TOTAL : 0.460430 sec + 1,973,324,046 cycles # 2.928 GHz + 2,840,856,751 instructions # 1.44 insn per cycle + 0.731489352 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.075830e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.309337e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.323289e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.120884e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.312101e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322916e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.598570 sec - 2,505,991,827 cycles # 2.994 GHz - 3,799,994,110 instructions # 1.52 insn per cycle - 0.896665996 seconds time elapsed +TOTAL : 0.593653 sec + 2,438,307,110 cycles # 2.956 GHz + 3,770,815,852 instructions # 1.55 insn per cycle + 0.884294118 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.606375e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.619374e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.619374e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.568377e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.581048e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.581048e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.305650 sec - 19,550,945,894 cycles # 3.099 GHz - 58,794,622,568 instructions # 3.01 insn per cycle - 6.313075917 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1300) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.401933 sec + 19,482,758,220 cycles # 3.042 GHz + 58,802,978,389 instructions # 3.02 insn per cycle + 6.406140471 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.925502e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.972274e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.972274e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.917983e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.963815e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.963815e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.345149 sec - 10,219,058,064 cycles # 3.055 GHz - 30,347,001,184 instructions # 2.97 insn per cycle - 3.358128835 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4958) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.353380 sec + 10,239,214,469 cycles # 3.050 GHz + 30,351,045,797 instructions # 2.96 insn per cycle + 3.357673213 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.545609e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.725495e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.725495e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.402320e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.570383e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.570383e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.735067 sec - 5,044,703,294 cycles # 2.901 GHz - 11,479,543,399 instructions # 2.28 insn per cycle - 1.749373805 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4573) (512y: 0) (512z: 0) +TOTAL : 1.764710 sec + 5,042,998,580 cycles # 2.852 GHz + 11,486,615,235 instructions # 2.28 insn per cycle + 1.768978894 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.038658e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.058879e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.058879e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.003860e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.023445e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.023445e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.596287 sec - 4,630,239,457 cycles # 2.893 GHz - 10,837,667,903 instructions # 2.34 insn per cycle - 1.609962505 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4159) (512y: 244) (512z: 0) +TOTAL : 1.654433 sec + 4,647,317,234 cycles # 2.803 GHz + 10,844,918,785 instructions # 2.33 insn per cycle + 1.658681615 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [h Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.617610e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.726680e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.726680e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.419133e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.526721e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.526721e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.171936 sec - 4,109,589,812 cycles # 1.889 GHz - 6,103,290,525 instructions # 1.49 insn per cycle - 2.186977177 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1433) (512y: 139) (512z: 3568) +TOTAL : 2.233568 sec + 4,119,227,015 cycles # 1.842 GHz + 6,111,995,104 instructions # 1.48 insn per cycle + 2.238507475 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 362655a840..906002ccef 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_13:17:25 +DATE: 2023-11-03_19:07:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.489146e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.363200e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.462876e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.570718e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.332431e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.423909e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.442934 sec - 1,970,237,840 cycles # 2.984 GHz - 2,770,826,921 instructions # 1.41 insn per cycle - 0.735603362 seconds time elapsed +TOTAL : 0.445719 sec + 1,977,839,409 cycles # 2.946 GHz + 2,766,831,818 instructions # 1.40 insn per cycle + 0.728762524 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.230130e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.406426e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.489442e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.444258e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.461256e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.527187e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.491833 sec - 2,186,862,195 cycles # 3.015 GHz - 3,142,874,449 instructions # 1.44 insn per cycle - 0.784602556 seconds time elapsed +TOTAL : 0.490311 sec + 2,098,277,441 cycles # 2.940 GHz + 3,050,395,563 instructions # 1.45 insn per cycle + 0.771282830 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -86,10 +86,10 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 32,317,025 cycles # 2.771 GHz - 48,300,089 instructions # 1.49 insn per cycle - 0.014257115 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1021) (avx2: 0) (512y: 0) (512z: 0) + 32,139,063 cycles # 2.763 GHz + 49,369,582 instructions # 1.54 insn per cycle + 0.012019390 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index a2f6fdb57b..afa8c22c25 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_14:01:23 +DATE: 2023-11-03_19:36:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.074587e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.149885e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.149885e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.935100e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.139273e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.139273e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.454331 sec - 1,999,329,976 cycles # 3.016 GHz - 2,956,102,779 instructions # 1.48 insn per cycle - 0.720212411 seconds time elapsed +TOTAL : 0.458037 sec + 1,958,659,698 cycles # 2.936 GHz + 2,907,533,469 instructions # 1.48 insn per cycle + 0.726231579 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.788896e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.565984e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.565984e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.639472e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.576828e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.576828e+07 ) sec^-1 MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.631218 sec - 2,614,227,757 cycles # 3.038 GHz - 3,889,940,623 instructions # 1.49 insn per cycle - 0.918378142 seconds time elapsed +TOTAL : 0.638235 sec + 2,567,083,186 cycles # 2.951 GHz + 3,965,073,751 instructions # 1.54 insn per cycle + 0.927254467 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -99,10 +99,10 @@ OK (relative difference <= 5E-3) runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) - 38,736,994 cycles # 2.890 GHz - 51,577,023 instructions # 1.33 insn per cycle - 0.013897774 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1021) (avx2: 0) (512y: 0) (512z: 0) + 38,813,158 cycles # 2.791 GHz + 52,008,055 instructions # 1.34 insn per cycle + 0.014463641 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 589a07cd15..e0c37ae81b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_13:17:35 +DATE: 2023-11-03_19:07:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.430067e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.250178e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.346715e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.552711e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.312060e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.409477e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.442845 sec - 1,978,546,976 cycles # 2.994 GHz - 2,800,934,812 instructions # 1.42 insn per cycle - 0.729688570 seconds time elapsed +TOTAL : 0.443645 sec + 1,939,887,285 cycles # 2.958 GHz + 2,753,223,301 instructions # 1.42 insn per cycle + 0.713433638 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.183183e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.331943e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.411243e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.420862e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.422248e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.487501e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.494163 sec - 2,190,798,360 cycles # 3.005 GHz - 3,102,998,927 instructions # 1.42 insn per cycle - 0.786222748 seconds time elapsed +TOTAL : 0.489840 sec + 2,095,642,051 cycles # 2.944 GHz + 3,058,032,700 instructions # 1.46 insn per cycle + 0.771189239 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -86,10 +86,10 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 31,477,105 cycles # 2.743 GHz - 47,479,451 instructions # 1.51 insn per cycle - 0.014142910 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1016) (avx2: 0) (512y: 0) (512z: 0) + 31,454,006 cycles # 2.782 GHz + 48,514,001 instructions # 1.54 insn per cycle + 0.011695448 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 71fed6821e..9bd85e98d0 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_13:17:44 +DATE: 2023-11-03_19:07:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.662719e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.038516e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.052821e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.981637e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.050998e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.064107e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.462730 sec - 2,064,962,094 cycles # 2.992 GHz - 2,945,094,613 instructions # 1.43 insn per cycle - 0.755827601 seconds time elapsed +TOTAL : 0.460239 sec + 1,991,164,692 cycles # 2.956 GHz + 2,861,513,835 instructions # 1.44 insn per cycle + 0.731121053 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.076527e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.311748e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.325513e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.125939e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.318916e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329956e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.608648 sec - 2,467,621,809 cycles # 2.915 GHz - 3,601,978,504 instructions # 1.46 insn per cycle - 0.908027164 seconds time elapsed +TOTAL : 0.595711 sec + 2,444,157,832 cycles # 2.957 GHz + 3,696,457,333 instructions # 1.51 insn per cycle + 0.888026518 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -86,10 +86,10 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 35,020,628 cycles # 2.806 GHz - 49,755,438 instructions # 1.42 insn per cycle - 0.015066790 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1386) (avx2: 0) (512y: 0) (512z: 0) + 35,021,922 cycles # 2.756 GHz + 50,809,631 instructions # 1.45 insn per cycle + 0.013111359 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index db6b196dcc..659836495f 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_13:17:54 +DATE: 2023-11-03_19:07:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.645159e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.033634e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.047567e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.948465e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.041856e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.054410e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.460733 sec - 2,032,593,852 cycles # 3.005 GHz - 2,920,206,606 instructions # 1.44 insn per cycle - 0.742642299 seconds time elapsed +TOTAL : 0.460434 sec + 1,981,925,545 cycles # 2.941 GHz + 2,855,578,890 instructions # 1.44 insn per cycle + 0.731466835 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.069849e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.302002e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.315503e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.114794e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.303596e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.314294e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.595564 sec - 2,513,535,038 cycles # 3.015 GHz - 3,759,582,185 instructions # 1.50 insn per cycle - 0.892869761 seconds time elapsed +TOTAL : 0.592739 sec + 2,423,209,817 cycles # 2.940 GHz + 3,698,114,761 instructions # 1.53 insn per cycle + 0.885260737 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -86,10 +86,10 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 34,337,414 cycles # 2.744 GHz - 48,918,383 instructions # 1.42 insn per cycle - 0.014922909 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 1263) (avx2: 0) (512y: 0) (512z: 0) + 34,542,827 cycles # 2.778 GHz + 50,097,141 instructions # 1.45 insn per cycle + 0.012808089 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 2f58b85467..a9f9e7f9b0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_13:18:03 +DATE: 2023-11-03_19:07:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.475226e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.504026e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.506324e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.471280e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.495513e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497667e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.521620 sec - 2,278,405,286 cycles # 3.016 GHz - 3,569,695,737 instructions # 1.57 insn per cycle - 0.824142816 seconds time elapsed +TOTAL : 0.521778 sec + 2,221,753,731 cycles # 2.953 GHz + 3,509,979,793 instructions # 1.58 insn per cycle + 0.811888374 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.124925e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.158780e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.160188e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.130694e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.157314e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.158457e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.027641 sec - 10,109,489,302 cycles # 3.067 GHz - 20,958,650,986 instructions # 2.07 insn per cycle - 3.354054406 seconds time elapsed +TOTAL : 3.024926 sec + 9,877,023,451 cycles # 3.016 GHz + 20,938,621,148 instructions # 2.12 insn per cycle + 3.332222792 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.965820e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.966789e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.966789e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.942881e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.943811e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.943811e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.350379 sec - 25,556,849,433 cycles # 3.061 GHz - 78,937,890,507 instructions # 3.09 insn per cycle - 8.357043052 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4879) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.450914 sec + 25,661,004,969 cycles # 3.035 GHz + 78,943,064,293 instructions # 3.08 insn per cycle + 8.455241133 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.696796e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.700036e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.700036e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.566286e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.569647e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.569647e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.442136 sec - 12,969,425,468 cycles # 2.917 GHz - 39,277,882,939 instructions # 3.03 insn per cycle - 4.456456759 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13170) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.607952 sec + 12,925,846,736 cycles # 2.803 GHz + 39,287,875,718 instructions # 3.04 insn per cycle + 4.612260028 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.634369e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.652303e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.652303e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.376392e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.393376e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.393376e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.905944 sec - 5,545,959,947 cycles # 2.904 GHz - 13,682,734,640 instructions # 2.47 insn per cycle - 1.920977419 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11339) (512y: 0) (512z: 0) +TOTAL : 1.967322 sec + 5,576,808,906 cycles # 2.829 GHz + 13,690,679,702 instructions # 2.45 insn per cycle + 1.971661788 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.809381e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.832630e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.832630e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.568825e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.591271e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.591271e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.678117 sec - 4,878,586,577 cycles # 2.901 GHz - 12,338,041,344 instructions # 2.53 insn per cycle - 1.689838954 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10242) (512y: 88) (512z: 0) +TOTAL : 1.723570 sec + 4,897,962,779 cycles # 2.836 GHz + 12,345,795,320 instructions # 2.52 insn per cycle + 1.727906957 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.697114e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.711979e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.711979e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.463403e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.476893e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.476893e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.137603 sec - 4,098,498,315 cycles # 1.914 GHz - 6,330,554,099 instructions # 1.54 insn per cycle - 2.154460026 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1797) (512y: 102) (512z: 9375) +TOTAL : 2.207008 sec + 4,113,706,051 cycles # 1.861 GHz + 6,338,446,257 instructions # 1.54 insn per cycle + 2.211395304 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index a90d88261e..05b9b7b471 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_14:02:07 +DATE: 2023-11-03_19:37:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.160085e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.491555e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.491555e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.138586e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.475297e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.475297e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.508649 sec - 2,243,351,008 cycles # 3.015 GHz - 3,493,355,426 instructions # 1.56 insn per cycle - 0.803773680 seconds time elapsed +TOTAL : 0.514369 sec + 2,174,774,169 cycles # 2.935 GHz + 3,408,753,270 instructions # 1.57 insn per cycle + 0.802511668 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.641718e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.113192e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.113192e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.635405e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.119639e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.119639e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.305477 sec - 10,996,635,061 cycles # 3.077 GHz - 24,704,998,495 instructions # 2.25 insn per cycle - 3.633765024 seconds time elapsed +TOTAL : 3.311178 sec + 10,730,531,324 cycles # 2.994 GHz + 24,179,707,994 instructions # 2.25 insn per cycle + 3.640277810 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,15 +103,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.995755e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.996726e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.996726e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.906612e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.907549e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.907549e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.227691 sec - 25,539,901,235 cycles # 3.103 GHz - 78,941,603,446 instructions # 3.09 insn per cycle - 8.231975969 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4879) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.615680 sec + 25,666,310,685 cycles # 2.978 GHz + 78,949,148,944 instructions # 3.08 insn per cycle + 8.620265583 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,15 +131,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.762236e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765724e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765724e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.685334e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.688850e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.688850e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.369134 sec - 12,967,293,054 cycles # 2.966 GHz - 39,290,035,969 instructions # 3.03 insn per cycle - 4.373557789 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13170) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.463406 sec + 12,942,626,026 cycles # 2.897 GHz + 39,297,696,719 instructions # 3.04 insn per cycle + 4.468216686 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,15 +159,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.634444e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.652344e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.652344e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.403877e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.422097e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.422097e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.909587 sec - 5,562,218,027 cycles # 2.907 GHz - 13,692,479,641 instructions # 2.46 insn per cycle - 1.913910388 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11339) (512y: 0) (512z: 0) +TOTAL : 1.965161 sec + 5,597,716,321 cycles # 2.843 GHz + 13,700,115,311 instructions # 2.45 insn per cycle + 1.969720229 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,15 +187,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.784354e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.808269e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.808269e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.573549e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.596918e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.596918e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.686542 sec - 4,892,905,107 cycles # 2.895 GHz - 12,348,037,792 instructions # 2.52 insn per cycle - 1.691037956 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10242) (512y: 88) (512z: 0) +TOTAL : 1.726627 sec + 4,910,197,742 cycles # 2.838 GHz + 12,354,930,161 instructions # 2.52 insn per cycle + 1.731069519 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -211,15 +215,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.665094e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.680086e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.680086e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.408369e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.421923e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.421923e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.149806 sec - 4,107,351,573 cycles # 1.907 GHz - 6,340,734,228 instructions # 1.54 insn per cycle - 2.154151474 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1797) (512y: 102) (512z: 9375) +TOTAL : 2.227463 sec + 4,132,274,023 cycles # 1.852 GHz + 6,348,232,709 instructions # 1.54 insn per cycle + 2.231941444 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 5254750155..d4a13c45dc 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_14:14:07 +DATE: 2023-11-03_19:49:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.510322e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.538031e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.540551e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.490628e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.519771e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.522013e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.503788 sec - 2,252,662,033 cycles # 3.035 GHz - 3,515,557,636 instructions # 1.56 insn per cycle - 0.812476829 seconds time elapsed +TOTAL : 0.506263 sec + 2,193,209,541 cycles # 2.934 GHz + 3,448,112,270 instructions # 1.57 insn per cycle + 0.811794626 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.141342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.175250e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.176691e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.140777e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.174961e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176419e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.122387 sec - 10,080,449,234 cycles # 2.987 GHz - 20,662,923,062 instructions # 2.05 insn per cycle - 3.431503994 seconds time elapsed +TOTAL : 3.133332 sec + 10,144,803,574 cycles # 2.992 GHz + 22,979,164,856 instructions # 2.27 insn per cycle + 3.446699997 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.992840e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.993789e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.993789e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.934897e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.935823e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935823e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.237469 sec - 25,586,326,392 cycles # 3.105 GHz - 78,935,069,074 instructions # 3.09 insn per cycle - 8.241477506 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4879) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.487397 sec + 25,642,144,633 cycles # 3.020 GHz + 78,942,503,354 instructions # 3.08 insn per cycle + 8.491509185 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.733845e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.737683e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.737683e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.604711e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.608085e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.608085e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.400380 sec - 12,963,141,897 cycles # 2.945 GHz - 39,278,266,161 instructions # 3.03 insn per cycle - 4.404595331 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13170) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.560510 sec + 12,949,935,406 cycles # 2.841 GHz + 39,287,959,625 instructions # 3.03 insn per cycle + 4.564590789 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.646502e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.664554e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.664554e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.331820e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.349574e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.349574e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.904447 sec - 5,552,738,566 cycles # 2.911 GHz - 13,681,318,294 instructions # 2.46 insn per cycle - 1.908567769 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11339) (512y: 0) (512z: 0) +TOTAL : 1.979581 sec + 5,585,242,942 cycles # 2.817 GHz + 13,688,645,923 instructions # 2.45 insn per cycle + 1.983846301 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.859590e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.883833e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.883833e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.501909e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.523734e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.523734e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.671409 sec - 4,881,305,688 cycles # 2.915 GHz - 12,334,806,032 instructions # 2.53 insn per cycle - 1.675465120 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10242) (512y: 88) (512z: 0) +TOTAL : 1.737131 sec + 4,904,473,574 cycles # 2.818 GHz + 12,343,066,066 instructions # 2.52 insn per cycle + 1.741373569 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.593539e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.608812e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.608812e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.326865e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.339889e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.339889e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.167894 sec - 4,105,387,288 cycles # 1.891 GHz - 6,327,572,823 instructions # 1.54 insn per cycle - 2.171922605 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1797) (512y: 102) (512z: 9375) +TOTAL : 2.249568 sec + 4,122,823,033 cycles # 1.830 GHz + 6,335,244,526 instructions # 1.54 insn per cycle + 2.253741280 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index f46288647c..8a019b9732 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_14:10:49 +DATE: 2023-11-03_19:45:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.509071e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.537185e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.539386e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.497991e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.525524e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.527678e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.502824 sec - 2,250,068,765 cycles # 3.040 GHz - 3,538,075,739 instructions # 1.57 insn per cycle - 0.813609249 seconds time elapsed +TOTAL : 0.505150 sec + 2,198,803,568 cycles # 2.954 GHz + 3,469,496,289 instructions # 1.58 insn per cycle + 0.812740673 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.138969e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.172977e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.174382e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.149366e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.183697e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.185208e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.062614 sec - 10,229,382,145 cycles # 3.088 GHz - 22,201,609,187 instructions # 2.17 insn per cycle - 3.371555969 seconds time elapsed +TOTAL : 3.069067 sec + 9,961,450,693 cycles # 3.001 GHz + 22,775,488,914 instructions # 2.29 insn per cycle + 3.378594275 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.966043e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.967005e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.967005e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.919154e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.920062e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.920062e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.347953 sec - 25,546,590,557 cycles # 3.059 GHz - 78,935,472,097 instructions # 3.09 insn per cycle - 8.351969958 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4879) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.555088 sec + 25,630,164,257 cycles # 2.995 GHz + 78,942,698,347 instructions # 3.08 insn per cycle + 8.559388166 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.575391e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.578836e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.578836e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.673575e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.677034e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.677034e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.593548 sec - 12,970,107,566 cycles # 2.822 GHz - 39,277,710,202 instructions # 3.03 insn per cycle - 4.597715990 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13170) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.474572 sec + 12,938,774,287 cycles # 2.890 GHz + 39,284,863,862 instructions # 3.04 insn per cycle + 4.478882140 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.167807e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.186151e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.186151e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.365364e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.382422e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.382422e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.014576 sec - 5,549,063,146 cycles # 2.750 GHz - 13,682,908,062 instructions # 2.47 insn per cycle - 2.019029241 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11339) (512y: 0) (512z: 0) +TOTAL : 1.970072 sec + 5,585,160,191 cycles # 2.830 GHz + 13,689,327,859 instructions # 2.45 insn per cycle + 1.974279626 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.768089e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.790984e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.790984e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.573694e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.596726e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.596726e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.685005 sec - 4,884,942,305 cycles # 2.893 GHz - 12,336,928,668 instructions # 2.53 insn per cycle - 1.689105526 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10242) (512y: 88) (512z: 0) +TOTAL : 1.722482 sec + 4,895,075,879 cycles # 2.836 GHz + 12,344,411,096 instructions # 2.52 insn per cycle + 1.726704102 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.722454e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.737226e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.737226e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.342892e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.356180e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.356180e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.129994 sec - 4,094,793,737 cycles # 1.919 GHz - 6,329,508,466 instructions # 1.55 insn per cycle - 2.134175822 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1797) (512y: 102) (512z: 9375) +TOTAL : 2.243467 sec + 4,145,301,834 cycles # 1.845 GHz + 6,337,134,423 instructions # 1.53 insn per cycle + 2.247770943 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index fce3d66688..0761c0d014 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_14:07:36 +DATE: 2023-11-03_19:42:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.234011e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.528551e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.530858e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.224877e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.534029e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.536870e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.505287 sec - 2,240,713,082 cycles # 3.016 GHz - 3,438,870,934 instructions # 1.53 insn per cycle - 0.809116913 seconds time elapsed +TOTAL : 0.509685 sec + 2,194,677,974 cycles # 2.952 GHz + 3,468,699,947 instructions # 1.58 insn per cycle + 0.805522488 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -71,14 +71,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.743375e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.168214e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.169628e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.741528e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.176834e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.178277e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.205050 sec - 10,597,294,848 cycles # 3.065 GHz - 22,938,358,256 instructions # 2.16 insn per cycle - 3.514076840 seconds time elapsed +TOTAL : 3.196085 sec + 10,332,938,289 cycles # 2.993 GHz + 23,233,171,839 instructions # 2.25 insn per cycle + 3.511259911 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -93,15 +93,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.976812e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.977739e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.977739e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.927835e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.928807e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.928807e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.301904 sec - 25,599,810,977 cycles # 3.083 GHz - 78,935,431,820 instructions # 3.08 insn per cycle - 8.305916327 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4879) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.516575 sec + 25,626,746,874 cycles # 3.008 GHz + 78,942,783,638 instructions # 3.08 insn per cycle + 8.520860421 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -119,15 +120,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.765431e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.768870e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.768870e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.674456e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.677849e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.677849e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.361408 sec - 12,972,983,204 cycles # 2.972 GHz - 39,276,794,676 instructions # 3.03 insn per cycle - 4.365530738 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13170) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.472834 sec + 12,938,647,402 cycles # 2.891 GHz + 39,285,558,550 instructions # 3.04 insn per cycle + 4.477166946 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -145,15 +147,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.469632e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.486221e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.486221e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.290335e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.307469e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.307469e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.942639 sec - 5,545,744,386 cycles # 2.850 GHz - 13,681,716,126 instructions # 2.47 insn per cycle - 1.946774708 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11339) (512y: 0) (512z: 0) +TOTAL : 1.987857 sec + 5,582,015,296 cycles # 2.804 GHz + 13,690,066,849 instructions # 2.45 insn per cycle + 1.992149312 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,15 +174,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.805750e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.828814e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.828814e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.537627e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.561759e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.561759e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.678563 sec - 4,882,802,128 cycles # 2.903 GHz - 12,336,959,860 instructions # 2.53 insn per cycle - 1.682626619 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10242) (512y: 88) (512z: 0) +TOTAL : 1.729438 sec + 4,899,116,746 cycles # 2.827 GHz + 12,344,356,410 instructions # 2.52 insn per cycle + 1.733854664 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -197,15 +201,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.219836e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.232827e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.232827e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.331605e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.345774e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.345774e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.278049 sec - 4,096,280,217 cycles # 1.795 GHz - 6,330,028,283 instructions # 1.55 insn per cycle - 2.282228085 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1797) (512y: 102) (512z: 9375) +TOTAL : 2.247519 sec + 4,126,377,191 cycles # 1.833 GHz + 6,337,288,668 instructions # 1.54 insn per cycle + 2.251874954 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index c1e0a2680d..d519ec18af 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_13:18:40 +DATE: 2023-11-03_19:08:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.476416e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.504069e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.506381e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.482135e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.509267e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.511176e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.521804 sec - 2,271,222,187 cycles # 3.016 GHz - 3,545,540,197 instructions # 1.56 insn per cycle - 0.828184124 seconds time elapsed +TOTAL : 0.519206 sec + 2,212,325,201 cycles # 2.954 GHz + 3,433,704,735 instructions # 1.55 insn per cycle + 0.807580904 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.137838e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.172044e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.173476e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.159162e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.186085e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.187240e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.023677 sec - 10,060,817,524 cycles # 3.068 GHz - 22,099,559,419 instructions # 2.20 insn per cycle - 3.336415577 seconds time elapsed +TOTAL : 3.004869 sec + 9,812,463,662 cycles # 3.013 GHz + 21,581,231,713 instructions # 2.20 insn per cycle + 3.312573877 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.983012e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.984028e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.984028e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.947345e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.948277e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.948277e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.276490 sec - 25,550,391,420 cycles # 3.086 GHz - 78,708,125,179 instructions # 3.08 insn per cycle - 8.282892300 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4250) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.431137 sec + 25,590,035,480 cycles # 3.034 GHz + 78,715,048,416 instructions # 3.08 insn per cycle + 8.435307792 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.723060e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.726384e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.726384e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.620452e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.623805e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.623805e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.410833 sec - 12,906,494,469 cycles # 2.925 GHz - 39,224,246,908 instructions # 3.04 insn per cycle - 4.423264335 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12937) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.539871 sec + 12,909,848,042 cycles # 2.843 GHz + 39,233,023,972 instructions # 3.04 insn per cycle + 4.544176080 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12949) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.514705e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.532795e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.532795e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.331174e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.348654e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.348654e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.931872 sec - 5,619,495,933 cycles # 2.903 GHz - 13,796,922,511 instructions # 2.46 insn per cycle - 1.942226870 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11404) (512y: 0) (512z: 0) +TOTAL : 1.977747 sec + 5,618,064,764 cycles # 2.836 GHz + 13,804,762,963 instructions # 2.46 insn per cycle + 1.981982814 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.697078e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.719929e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.719929e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.463129e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.484771e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.484771e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.697070 sec - 4,931,596,573 cycles # 2.899 GHz - 12,462,949,801 instructions # 2.53 insn per cycle - 1.712960774 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10234) (512y: 240) (512z: 0) +TOTAL : 1.742192 sec + 4,960,747,667 cycles # 2.842 GHz + 12,470,817,922 instructions # 2.51 insn per cycle + 1.746604551 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.701118e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.715314e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.715314e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.427183e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.440655e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.440655e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.135519 sec - 4,115,648,082 cycles # 1.924 GHz - 6,454,623,775 instructions # 1.57 insn per cycle - 2.148173862 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1623) (512y: 192) (512z: 9375) +TOTAL : 2.217977 sec + 4,119,292,054 cycles # 1.855 GHz + 6,462,314,928 instructions # 1.57 insn per cycle + 2.222289185 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 8c991ffb74..0e734b6c9d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_13:51:46 +DATE: 2023-11-03_19:26:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.232347e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.256643e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.258773e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.237666e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.262462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.264647e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.528671 sec - 2,311,688,773 cycles # 3.025 GHz - 3,507,601,681 instructions # 1.52 insn per cycle - 0.823229672 seconds time elapsed +TOTAL : 0.533653 sec + 2,219,666,724 cycles # 2.910 GHz + 3,445,153,040 instructions # 1.55 insn per cycle + 0.821091738 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.775534e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.804008e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.805181e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.775197e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.803191e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.804422e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.299047 sec - 10,881,401,060 cycles # 3.063 GHz - 24,364,708,075 instructions # 2.24 insn per cycle - 3.609156427 seconds time elapsed +TOTAL : 3.300230 sec + 10,634,484,052 cycles # 2.991 GHz + 23,844,861,281 instructions # 2.24 insn per cycle + 3.611693691 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.469923e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.470422e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.470422e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.361422e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.361903e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.361903e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 36.695736 sec - 113,474,484,475 cycles # 3.093 GHz - 144,738,639,997 instructions # 1.28 insn per cycle - 36.699887629 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:21213) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 37.613350 sec + 113,653,626,732 cycles # 3.022 GHz + 144,966,182,806 instructions # 1.28 insn per cycle + 37.617592948 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:21605) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.274593e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.277295e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.277295e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.197160e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.199710e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.199710e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.014179 sec - 14,723,510,016 cycles # 2.934 GHz - 37,570,420,448 instructions # 2.55 insn per cycle - 5.018361008 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68106) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.138561 sec + 14,751,525,638 cycles # 2.870 GHz + 37,578,516,323 instructions # 2.55 insn per cycle + 5.143061031 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:68118) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.796690e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.811800e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.811800e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.662015e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.676566e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.676566e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.110000 sec - 6,128,596,243 cycles # 2.900 GHz - 13,056,352,580 instructions # 2.13 insn per cycle - 2.114203765 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:46942) (512y: 0) (512z: 0) +TOTAL : 2.150367 sec + 6,125,090,080 cycles # 2.844 GHz + 13,063,740,704 instructions # 2.13 insn per cycle + 2.154679772 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.517332e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.539370e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.539370e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.263953e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.285040e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.285040e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.729322 sec - 5,053,484,949 cycles # 2.916 GHz - 11,434,594,331 instructions # 2.26 insn per cycle - 1.733514150 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40410) (512y: 285) (512z: 0) +TOTAL : 1.780016 sec + 5,060,160,878 cycles # 2.837 GHz + 11,442,229,361 instructions # 2.26 insn per cycle + 1.784487029 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.002850e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.019204e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.019204e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.515689e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.530167e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.530167e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.055975 sec - 3,972,385,481 cycles # 1.929 GHz - 5,936,216,747 instructions # 1.49 insn per cycle - 2.060152331 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2431) (512y: 337) (512z:39411) +TOTAL : 2.192230 sec + 3,982,582,654 cycles # 1.814 GHz + 5,943,874,364 instructions # 1.49 insn per cycle + 2.196624515 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index b4ebeb041b..a431669edb 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_13:52:55 +DATE: 2023-11-03_19:28:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.234950e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.260216e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.262156e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.227099e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.252215e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.254306e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.527572 sec - 2,272,502,168 cycles # 3.021 GHz - 3,459,483,926 instructions # 1.52 insn per cycle - 0.810794500 seconds time elapsed +TOTAL : 0.530677 sec + 2,254,800,400 cycles # 2.956 GHz + 3,541,881,168 instructions # 1.57 insn per cycle + 0.819833622 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.792778e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.821467e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.822686e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.792463e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.821318e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.822521e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.271697 sec - 10,757,913,675 cycles # 3.051 GHz - 23,608,820,167 instructions # 2.19 insn per cycle - 3.619466436 seconds time elapsed +TOTAL : 3.276536 sec + 10,598,798,874 cycles # 3.001 GHz + 22,505,546,793 instructions # 2.12 insn per cycle + 3.590880872 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.417584e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.418082e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.418082e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.316847e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.317310e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.317310e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.130623 sec - 114,676,551,402 cycles # 3.089 GHz - 145,541,996,003 instructions # 1.27 insn per cycle - 37.134802859 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:22059) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 38.002712 sec + 114,613,209,494 cycles # 3.016 GHz + 145,560,103,749 instructions # 1.27 insn per cycle + 38.007069023 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:22248) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.167800e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.170194e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.170194e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.101440e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.103871e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.103871e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.183057 sec - 15,156,859,358 cycles # 2.922 GHz - 37,757,579,871 instructions # 2.49 insn per cycle - 5.187317533 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:68434) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.297737 sec + 15,180,958,119 cycles # 2.864 GHz + 37,765,704,407 instructions # 2.49 insn per cycle + 5.302092232 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:68446) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.924832e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.941496e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.941496e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.750289e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.764988e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.764988e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.075554 sec - 6,039,745,875 cycles # 2.905 GHz - 12,890,674,982 instructions # 2.13 insn per cycle - 2.079791859 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45911) (512y: 0) (512z: 0) +TOTAL : 2.125646 sec + 6,006,519,083 cycles # 2.821 GHz + 12,897,926,690 instructions # 2.15 insn per cycle + 2.130039886 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.194707e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.215465e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.215465e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.134516e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.155464e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.155464e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.790127 sec - 5,076,722,329 cycles # 2.830 GHz - 11,441,369,586 instructions # 2.25 insn per cycle - 1.794407354 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40099) (512y: 219) (512z: 0) +TOTAL : 1.805195 sec + 5,111,264,978 cycles # 2.826 GHz + 11,448,660,091 instructions # 2.24 insn per cycle + 1.809562076 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.900388e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.916089e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.916089e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.713307e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.727980e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.727980e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.082110 sec - 3,946,998,182 cycles # 1.893 GHz - 5,890,184,055 instructions # 1.49 insn per cycle - 2.086376956 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1947) (512y: 259) (512z:38937) +TOTAL : 2.136153 sec + 3,956,606,945 cycles # 1.850 GHz + 5,898,384,643 instructions # 1.49 insn per cycle + 2.140545061 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 486ad40ba6..389fe370ef 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_13:19:17 +DATE: 2023-11-03_19:08:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.332332e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.384779e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.390167e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.330449e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.375316e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.385679e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.473849 sec - 2,085,448,281 cycles # 3.026 GHz - 3,102,431,333 instructions # 1.49 insn per cycle - 0.772206568 seconds time elapsed +TOTAL : 0.478801 sec + 2,034,971,060 cycles # 2.940 GHz + 3,054,212,240 instructions # 1.50 insn per cycle + 0.749375620 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.542899e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.615381e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.618630e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.529589e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.587136e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.589764e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.718344 sec - 6,002,889,204 cycles # 3.082 GHz - 11,545,937,782 instructions # 1.92 insn per cycle - 2.005254718 seconds time elapsed +TOTAL : 1.723184 sec + 5,782,983,871 cycles # 2.964 GHz + 12,066,403,823 instructions # 2.09 insn per cycle + 2.008243733 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.033731e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.034770e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.034770e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.003677e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.004662e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.004662e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.070533 sec - 24,603,062,150 cycles # 3.048 GHz - 78,128,920,788 instructions # 3.18 insn per cycle - 8.076588204 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.193664 sec + 24,655,416,435 cycles # 3.008 GHz + 78,134,412,275 instructions # 3.17 insn per cycle + 8.197717930 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.488751e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.503314e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.503314e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.270897e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.285143e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.285143e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.195060 sec - 6,447,897,055 cycles # 2.933 GHz - 20,117,767,281 instructions # 3.12 insn per cycle - 2.208245409 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13751) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.263632 sec + 6,475,526,341 cycles # 2.856 GHz + 20,124,982,632 instructions # 3.11 insn per cycle + 2.267936828 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.702639e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.709538e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.709538e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.655891e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.662862e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.662862e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.968946 sec - 2,810,112,225 cycles # 2.891 GHz - 6,985,318,771 instructions # 2.49 insn per cycle - 0.984902833 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11856) (512y: 0) (512z: 0) +TOTAL : 0.998679 sec + 2,840,454,971 cycles # 2.834 GHz + 6,992,590,525 instructions # 2.46 insn per cycle + 1.002898964 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.941806e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.950768e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.950768e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.904708e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.914180e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.914180e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.850482 sec - 2,478,670,305 cycles # 2.904 GHz - 6,292,505,198 instructions # 2.54 insn per cycle - 0.862853898 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10798) (512y: 43) (512z: 0) +TOTAL : 0.868982 sec + 2,491,374,231 cycles # 2.855 GHz + 6,299,681,276 instructions # 2.53 insn per cycle + 0.873227215 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.565064e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.570988e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.570988e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.509691e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.515612e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.515612e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.054240 sec - 2,040,135,052 cycles # 1.930 GHz - 3,262,448,266 instructions # 1.60 insn per cycle - 1.068023563 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 46) (512z: 9571) +TOTAL : 1.094413 sec + 2,048,957,877 cycles # 1.866 GHz + 3,269,073,408 instructions # 1.60 insn per cycle + 1.098654820 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index f5dfa59d7b..5a5ccf0962 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_14:02:44 +DATE: 2023-11-03_19:38:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.689615e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.338105e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.338105e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.621379e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.322960e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.322960e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.466275 sec - 2,049,312,797 cycles # 3.023 GHz - 3,074,986,142 instructions # 1.50 insn per cycle - 0.737052092 seconds time elapsed +TOTAL : 0.467719 sec + 2,022,012,389 cycles # 2.930 GHz + 3,029,595,627 instructions # 1.50 insn per cycle + 0.748028952 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.258963e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.479077e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.479077e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.232227e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.472561e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.472561e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.896442 sec - 6,547,114,416 cycles # 3.071 GHz - 13,674,100,847 instructions # 2.09 insn per cycle - 2.189382080 seconds time elapsed +TOTAL : 1.900347 sec + 6,375,786,665 cycles # 2.982 GHz + 13,373,135,596 instructions # 2.10 insn per cycle + 2.195039568 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,15 +103,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.057706e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.058777e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.058777e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.008350e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.009347e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.009347e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.977330 sec - 24,624,931,190 cycles # 3.086 GHz - 78,131,048,964 instructions # 3.17 insn per cycle - 7.981338163 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.176665 sec + 24,649,325,474 cycles # 3.013 GHz + 78,138,045,806 instructions # 3.17 insn per cycle + 8.180908705 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,15 +131,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.229659e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.242643e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.242643e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.326247e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.339746e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.339746e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.275774 sec - 6,458,705,726 cycles # 2.834 GHz - 20,126,381,677 instructions # 3.12 insn per cycle - 2.280068981 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13751) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.249404 sec + 6,483,421,678 cycles # 2.878 GHz + 20,133,640,820 instructions # 3.11 insn per cycle + 2.253658931 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,15 +159,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.700287e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.707686e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.707686e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.657895e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.664866e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.664866e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.972132 sec - 2,821,480,493 cycles # 2.892 GHz - 6,993,944,246 instructions # 2.48 insn per cycle - 0.976185959 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11856) (512y: 0) (512z: 0) +TOTAL : 0.999874 sec + 2,846,897,865 cycles # 2.837 GHz + 7,001,448,108 instructions # 2.46 insn per cycle + 1.004235579 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,15 +187,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.938068e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.947761e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.947761e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.899947e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.909346e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.909346e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.853437 sec - 2,487,354,942 cycles # 2.903 GHz - 6,301,326,408 instructions # 2.53 insn per cycle - 0.857469948 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10798) (512y: 43) (512z: 0) +TOTAL : 0.873710 sec + 2,498,501,131 cycles # 2.848 GHz + 6,308,536,459 instructions # 2.52 insn per cycle + 0.877964105 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -211,15 +215,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.548564e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.554396e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.554396e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.494285e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.499863e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.499863e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.066846 sec - 2,050,944,515 cycles # 1.917 GHz - 3,272,430,014 instructions # 1.60 insn per cycle - 1.071113593 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 46) (512z: 9571) +TOTAL : 1.108704 sec + 2,059,473,334 cycles # 1.852 GHz + 3,279,338,884 instructions # 1.59 insn per cycle + 1.113120539 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 173c8076ae..12ad22d5a3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_14:14:44 +DATE: 2023-11-03_19:49:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.347024e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.398089e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.403736e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.340393e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.392051e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.397944e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.461628 sec - 2,029,123,236 cycles # 3.024 GHz - 3,049,839,564 instructions # 1.50 insn per cycle - 0.728608078 seconds time elapsed +TOTAL : 0.462195 sec + 1,986,930,742 cycles # 2.947 GHz + 3,005,964,493 instructions # 1.51 insn per cycle + 0.730831332 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.545451e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.618382e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.621615e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.547500e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.620827e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.624055e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.793331 sec - 6,217,113,732 cycles # 3.073 GHz - 12,961,195,860 instructions # 2.08 insn per cycle - 2.079989875 seconds time elapsed +TOTAL : 1.798053 sec + 6,062,916,752 cycles # 2.993 GHz + 11,569,516,184 instructions # 1.91 insn per cycle + 2.082278895 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.060313e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.061332e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.061332e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.005661e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.006690e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.006690e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 7.966547 sec - 24,594,743,209 cycles # 3.086 GHz - 78,127,435,867 instructions # 3.18 insn per cycle - 7.970399932 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.186459 sec + 24,671,953,454 cycles # 3.013 GHz + 78,137,621,710 instructions # 3.17 insn per cycle + 8.190517160 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.344667e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.358670e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.358670e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.107458e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.120841e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.120841e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.239093 sec - 6,454,079,659 cycles # 2.879 GHz - 20,117,446,936 instructions # 3.12 insn per cycle - 2.242924767 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13751) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.317106 sec + 6,488,771,451 cycles # 2.796 GHz + 20,124,539,496 instructions # 3.10 insn per cycle + 2.321142527 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.705593e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.712747e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.712747e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.647793e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.654673e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.654673e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.967614 sec - 2,815,668,718 cycles # 2.901 GHz - 6,983,504,963 instructions # 2.48 insn per cycle - 0.971467670 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11856) (512y: 0) (512z: 0) +TOTAL : 1.005506 sec + 2,843,966,049 cycles # 2.818 GHz + 6,991,496,346 instructions # 2.46 insn per cycle + 1.009548479 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.931679e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.941065e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.941065e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.895349e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.904605e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.904605e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.854674 sec - 2,481,889,358 cycles # 2.893 GHz - 6,287,664,762 instructions # 2.53 insn per cycle - 0.858561765 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10798) (512y: 43) (512z: 0) +TOTAL : 0.875040 sec + 2,495,845,822 cycles # 2.841 GHz + 6,297,369,404 instructions # 2.52 insn per cycle + 0.879134455 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.547581e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.553526e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.553526e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.504042e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.510113e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.510113e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.065766 sec - 2,044,093,407 cycles # 1.912 GHz - 3,257,736,990 instructions # 1.59 insn per cycle - 1.069778995 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 46) (512z: 9571) +TOTAL : 1.099941 sec + 2,050,409,457 cycles # 1.858 GHz + 3,265,015,309 instructions # 1.59 insn per cycle + 1.104007255 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 9651ad3989..5b13ff9774 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_14:11:26 +DATE: 2023-11-03_19:46:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.350971e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.403437e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.408842e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.339869e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.391844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.397472e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.461626 sec - 2,013,045,963 cycles # 3.001 GHz - 3,008,078,375 instructions # 1.49 insn per cycle - 0.728719546 seconds time elapsed +TOTAL : 0.461224 sec + 1,973,907,563 cycles # 2.940 GHz + 2,969,869,707 instructions # 1.50 insn per cycle + 0.729741448 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.545870e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.618926e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.622082e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.563612e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.637504e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.640751e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.752196 sec - 6,080,811,268 cycles # 3.076 GHz - 12,476,749,992 instructions # 2.05 insn per cycle - 2.034079474 seconds time elapsed +TOTAL : 1.749654 sec + 5,928,680,592 cycles # 2.999 GHz + 12,893,930,524 instructions # 2.17 insn per cycle + 2.033490620 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.054774e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.055772e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.055772e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.014770e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.015759e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.015759e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.986309 sec - 24,581,427,262 cycles # 3.077 GHz - 78,126,819,050 instructions # 3.18 insn per cycle - 7.990240161 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.147957 sec + 24,635,567,678 cycles # 3.022 GHz + 78,133,891,626 instructions # 3.17 insn per cycle + 8.152140443 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.485156e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.498721e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.498721e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.062428e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.074909e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.074909e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.195463 sec - 6,450,261,189 cycles # 2.934 GHz - 20,116,674,747 instructions # 3.12 insn per cycle - 2.199312699 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13751) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.330015 sec + 6,475,827,642 cycles # 2.775 GHz + 20,124,634,132 instructions # 3.11 insn per cycle + 2.334037311 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.712568e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.719872e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.719872e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.595519e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.602006e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.602006e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.962474 sec - 2,809,844,250 cycles # 2.910 GHz - 6,984,023,413 instructions # 2.49 insn per cycle - 0.966406204 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11856) (512y: 0) (512z: 0) +TOTAL : 1.036160 sec + 2,838,919,957 cycles # 2.730 GHz + 6,991,694,320 instructions # 2.46 insn per cycle + 1.040335460 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.936291e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.945525e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.945525e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.893954e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.903085e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.903085e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.851856 sec - 2,477,933,241 cycles # 2.898 GHz - 6,291,229,332 instructions # 2.54 insn per cycle - 0.855761566 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10798) (512y: 43) (512z: 0) +TOTAL : 0.873924 sec + 2,489,283,092 cycles # 2.837 GHz + 6,298,948,511 instructions # 2.53 insn per cycle + 0.878050091 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.558361e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.564161e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.564161e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.497242e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.502884e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.502884e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.057206 sec - 2,039,611,088 cycles # 1.924 GHz - 3,261,377,179 instructions # 1.60 insn per cycle - 1.061196190 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 46) (512z: 9571) +TOTAL : 1.103482 sec + 2,049,248,209 cycles # 1.852 GHz + 3,268,952,113 instructions # 1.60 insn per cycle + 1.107551558 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index a1f4be7db1..cdb252ac3a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_14:08:13 +DATE: 2023-11-03_19:43:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.773311e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.394019e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.402051e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.764175e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.406414e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.411755e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.462737 sec - 2,025,500,525 cycles # 3.009 GHz - 3,064,784,530 instructions # 1.51 insn per cycle - 0.729843858 seconds time elapsed +TOTAL : 0.466281 sec + 1,989,064,547 cycles # 2.930 GHz + 3,017,212,928 instructions # 1.52 insn per cycle + 0.737783039 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -71,14 +71,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.500648e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.626485e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.629727e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.472408e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.626435e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.629621e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.819392 sec - 6,275,186,955 cycles # 3.067 GHz - 12,611,714,187 instructions # 2.01 insn per cycle - 2.103369529 seconds time elapsed +TOTAL : 1.825252 sec + 6,129,357,136 cycles # 2.985 GHz + 13,024,512,874 instructions # 2.12 insn per cycle + 2.110041533 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -93,15 +93,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.072408e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.073443e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.073443e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.017146e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.018188e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.018188e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.918207 sec - 24,614,651,634 cycles # 3.108 GHz - 78,126,892,887 instructions # 3.17 insn per cycle - 7.922085250 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.138680 sec + 24,636,857,889 cycles # 3.027 GHz + 78,136,646,989 instructions # 3.17 insn per cycle + 8.142807331 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -119,15 +120,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.512189e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.526482e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.526482e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.266088e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.280126e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.280126e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.187884 sec - 6,446,988,066 cycles # 2.942 GHz - 20,116,530,852 instructions # 3.12 insn per cycle - 2.191848676 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13751) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.265043 sec + 6,477,387,096 cycles # 2.855 GHz + 20,124,193,083 instructions # 3.11 insn per cycle + 2.269259910 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -145,15 +147,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.704770e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.712060e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.712060e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.644884e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.651718e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.651718e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.966908 sec - 2,811,435,876 cycles # 2.898 GHz - 6,984,450,105 instructions # 2.48 insn per cycle - 0.970845648 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11856) (512y: 0) (512z: 0) +TOTAL : 1.005131 sec + 2,839,448,871 cycles # 2.816 GHz + 6,991,884,623 instructions # 2.46 insn per cycle + 1.009345557 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -171,15 +174,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.943965e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.953637e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.953637e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.866159e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.874920e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.874920e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.848496 sec - 2,478,042,485 cycles # 2.910 GHz - 6,291,817,509 instructions # 2.54 insn per cycle - 0.852404179 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10798) (512y: 43) (512z: 0) +TOTAL : 0.887083 sec + 2,489,977,422 cycles # 2.796 GHz + 6,298,695,060 instructions # 2.53 insn per cycle + 0.891225776 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -197,15 +201,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.552310e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.558116e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.558116e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.498745e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.504407e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.504407e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.061241 sec - 2,039,380,020 cycles # 1.915 GHz - 3,261,243,710 instructions # 1.60 insn per cycle - 1.065343977 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2391) (512y: 46) (512z: 9571) +TOTAL : 1.102380 sec + 2,046,697,565 cycles # 1.851 GHz + 3,268,682,926 instructions # 1.60 insn per cycle + 1.106464577 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 4784426610..9fe77f3bb4 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_13:19:47 +DATE: 2023-11-03_19:09:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.349532e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.401557e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.407213e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.327293e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.373619e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.378917e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.477873 sec - 2,080,326,704 cycles # 2.999 GHz - 3,084,289,779 instructions # 1.48 insn per cycle - 0.773403482 seconds time elapsed +TOTAL : 0.480093 sec + 2,046,992,398 cycles # 2.957 GHz + 3,008,261,627 instructions # 1.47 insn per cycle + 0.750577809 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.564463e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.638632e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.641780e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.515177e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.572348e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.574911e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.725193 sec - 6,008,100,480 cycles # 3.075 GHz - 12,589,981,173 instructions # 2.10 insn per cycle - 2.013160084 seconds time elapsed +TOTAL : 1.715757 sec + 5,871,373,370 cycles # 3.006 GHz + 12,204,738,560 instructions # 2.08 insn per cycle + 2.009775672 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.060333e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.061405e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.061405e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.026797e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.027818e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027818e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 7.964531 sec - 24,559,153,248 cycles # 3.082 GHz - 77,854,490,785 instructions # 3.17 insn per cycle - 7.970382825 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3100) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.100053 sec + 24,563,227,881 cycles # 3.031 GHz + 77,860,200,084 instructions # 3.17 insn per cycle + 8.104232064 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3113) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.658932e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.673162e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.673162e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.430084e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.444359e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.444359e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.146568 sec - 6,409,796,903 cycles # 2.981 GHz - 20,082,551,685 instructions # 3.13 insn per cycle - 2.161769898 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13440) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.215968 sec + 6,421,588,621 cycles # 2.894 GHz + 20,090,220,099 instructions # 3.13 insn per cycle + 2.220335001 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.654966e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.661573e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.661573e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.625861e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.632520e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.632520e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.996366 sec - 2,912,304,991 cycles # 2.914 GHz - 7,126,891,821 instructions # 2.45 insn per cycle - 1.009169071 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12243) (512y: 0) (512z: 0) +TOTAL : 1.016598 sec + 2,906,571,537 cycles # 2.849 GHz + 7,134,546,428 instructions # 2.45 insn per cycle + 1.020819368 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.856846e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.865562e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.865562e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.810175e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.818358e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.818358e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.888266 sec - 2,589,026,411 cycles # 2.902 GHz - 6,435,733,406 instructions # 2.49 insn per cycle - 0.899709352 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11252) (512y: 27) (512z: 0) +TOTAL : 0.914087 sec + 2,595,791,217 cycles # 2.828 GHz + 6,442,852,611 instructions # 2.48 insn per cycle + 0.918452804 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.491972e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.497264e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.497264e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.453251e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.458727e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.458727e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.105471 sec - 2,110,425,003 cycles # 1.905 GHz - 3,424,239,517 instructions # 1.62 insn per cycle - 1.116915330 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2888) (512y: 22) (512z: 9647) +TOTAL : 1.136514 sec + 2,124,554,510 cycles # 1.864 GHz + 3,431,456,558 instructions # 1.62 insn per cycle + 1.140688320 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2912) (512y: 22) (512z: 9647) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 814bd11723..6d22eac4d2 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_13:54:05 +DATE: 2023-11-03_19:29:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.591643e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.632313e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.639138e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.584275e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.627587e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.631963e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.486615 sec - 2,119,343,458 cycles # 2.997 GHz - 3,147,759,889 instructions # 1.49 insn per cycle - 0.768780543 seconds time elapsed +TOTAL : 0.489018 sec + 2,081,067,674 cycles # 2.934 GHz + 3,133,776,802 instructions # 1.51 insn per cycle + 0.771988427 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.713328e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.773264e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.775863e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.747350e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.808169e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.810857e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.856016 sec - 6,397,119,813 cycles # 3.061 GHz - 12,981,241,131 instructions # 2.03 insn per cycle - 2.149194154 seconds time elapsed +TOTAL : 1.853996 sec + 6,275,481,753 cycles # 3.001 GHz + 12,514,155,894 instructions # 1.99 insn per cycle + 2.147936222 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.908089e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.908939e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.908939e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.644036e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.644860e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.644860e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 27.763472 sec - 85,788,771,362 cycles # 3.090 GHz - 135,558,353,260 instructions # 1.58 insn per cycle - 27.767517079 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15498) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 29.065327 sec + 87,424,924,787 cycles # 3.008 GHz + 135,567,300,472 instructions # 1.55 insn per cycle + 29.069446346 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15486) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -106,8 +107,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627534e-04 -Avg ME (F77/C++) = 6.6275342482202682E-004 -Relative difference = 3.74528849077935e-08 +Avg ME (F77/C++) = 6.6275340278065809E-004 +Relative difference = 4.195614963669944e-09 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check.exe -p 64 256 1 OMP= @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.164817e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.177659e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.177659e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.026233e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.038857e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.038857e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.294031 sec - 6,780,048,690 cycles # 2.951 GHz - 19,379,838,021 instructions # 2.86 insn per cycle - 2.298104631 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:69668) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.342565 sec + 6,786,587,363 cycles # 2.893 GHz + 19,387,387,931 instructions # 2.86 insn per cycle + 2.346831164 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:69680) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.501462e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.506925e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.506925e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.459444e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.464900e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.464900e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.096967 sec - 3,164,197,145 cycles # 2.875 GHz - 6,801,481,175 instructions # 2.15 insn per cycle - 1.101068017 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:49059) (512y: 0) (512z: 0) +TOTAL : 1.132478 sec + 3,179,013,562 cycles # 2.798 GHz + 6,809,043,401 instructions # 2.14 insn per cycle + 1.136902959 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.832862e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.841270e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.841270e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.738168e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.745907e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.745907e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.899695 sec - 2,626,761,775 cycles # 2.909 GHz - 5,979,745,483 instructions # 2.28 insn per cycle - 0.903765130 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:42653) (512y: 11) (512z: 0) +TOTAL : 0.952016 sec + 2,651,392,730 cycles # 2.774 GHz + 5,987,188,755 instructions # 2.26 insn per cycle + 0.956397839 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.539328e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.545357e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.545357e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.472802e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.478184e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.478184e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.070480 sec - 2,068,281,241 cycles # 1.927 GHz - 3,494,063,429 instructions # 1.69 insn per cycle - 1.074606344 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5174) (512y: 3) (512z:44822) +TOTAL : 1.121995 sec + 2,073,738,270 cycles # 1.843 GHz + 3,501,511,021 instructions # 1.69 insn per cycle + 1.126283052 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5198) (512y: 3) (512z:44822) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 20cfe4f0bf..5c9ad24a46 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_13:54:57 +DATE: 2023-11-03_19:30:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.574561e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.615562e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.620086e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.558233e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.598421e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.603327e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.484423 sec - 2,095,804,311 cycles # 2.968 GHz - 3,116,236,953 instructions # 1.49 insn per cycle - 0.766353398 seconds time elapsed +TOTAL : 0.487345 sec + 2,076,063,570 cycles # 2.928 GHz + 3,124,474,063 instructions # 1.50 insn per cycle + 0.769324674 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.686607e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.746725e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.749340e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.647182e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.706650e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.709351e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.856488 sec - 6,359,088,314 cycles # 3.041 GHz - 12,734,066,872 instructions # 2.00 insn per cycle - 2.148830509 seconds time elapsed +TOTAL : 1.865548 sec + 6,314,327,402 cycles # 2.992 GHz + 13,540,816,282 instructions # 2.14 insn per cycle + 2.170188129 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.814578e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.815423e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.815423e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.736423e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.737265e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.737265e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 28.210026 sec - 86,898,226,581 cycles # 3.080 GHz - 136,168,004,846 instructions # 1.57 insn per cycle - 28.213953775 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:15836) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 28.597065 sec + 86,035,998,776 cycles # 3.009 GHz + 135,911,265,736 instructions # 1.58 insn per cycle + 28.601145029 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:15910) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -106,8 +107,8 @@ runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProces cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275350387951654E-004 -Relative difference = 5.853634118441163e-09 +Avg ME (F77/C++) = 6.6275352674967369E-004 +Relative difference = 4.0361421941458736e-08 OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check.exe -p 64 256 1 OMP= @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.146591e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.159401e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.159401e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.976771e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.989628e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.989628e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.299520 sec - 6,843,640,652 cycles # 2.972 GHz - 19,433,215,212 instructions # 2.84 insn per cycle - 2.303591381 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:69710) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.358802 sec + 6,848,676,061 cycles # 2.899 GHz + 19,439,456,701 instructions # 2.84 insn per cycle + 2.362995374 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:69722) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.552959e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.559002e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.559002e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.510619e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.516450e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.516450e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.060874 sec - 3,099,042,655 cycles # 2.912 GHz - 6,712,227,285 instructions # 2.17 insn per cycle - 1.064787447 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:47649) (512y: 0) (512z: 0) +TOTAL : 1.093889 sec + 3,110,977,160 cycles # 2.835 GHz + 6,719,869,092 instructions # 2.16 insn per cycle + 1.098127483 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.840652e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.849063e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.849063e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.794946e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.802956e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.802956e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.895700 sec - 2,623,288,445 cycles # 2.918 GHz - 5,963,021,182 instructions # 2.27 insn per cycle - 0.899657048 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:41818) (512y: 13) (512z: 0) +TOTAL : 0.922821 sec + 2,627,235,427 cycles # 2.838 GHz + 5,970,250,488 instructions # 2.27 insn per cycle + 0.926978795 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.539209e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.545158e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.545158e+04 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.483560e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.489106e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.489106e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.070536 sec - 2,071,173,952 cycles # 1.929 GHz - 3,487,510,778 instructions # 1.68 insn per cycle - 1.074519012 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4138) (512y: 4) (512z:44465) +TOTAL : 1.114177 sec + 2,080,137,201 cycles # 1.861 GHz + 3,494,948,543 instructions # 1.68 insn per cycle + 1.118521627 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4162) (512y: 4) (512z:44465) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 694fd5e359..b38c13fcd9 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_13:20:16 +DATE: 2023-11-03_19:09:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.458705e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.486462e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.488653e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.468828e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.491770e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.493892e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.523722 sec - 2,255,939,501 cycles # 2.995 GHz - 3,509,263,775 instructions # 1.56 insn per cycle - 0.830704766 seconds time elapsed +TOTAL : 0.519876 sec + 2,215,127,737 cycles # 2.957 GHz + 3,487,212,374 instructions # 1.57 insn per cycle + 0.807913712 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.121227e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.155141e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.156519e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.135164e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.161799e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.162966e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.036705 sec - 10,115,435,049 cycles # 3.073 GHz - 22,061,768,809 instructions # 2.18 insn per cycle - 3.348414492 seconds time elapsed +TOTAL : 3.028303 sec + 9,769,796,186 cycles # 2.979 GHz + 22,335,132,843 instructions # 2.29 insn per cycle + 3.336784998 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.961925e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.962832e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.962832e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.912244e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.913140e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.913140e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.366261 sec - 25,827,115,509 cycles # 3.086 GHz - 79,439,243,960 instructions # 3.08 insn per cycle - 8.372513432 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4844) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.586099 sec + 25,914,180,302 cycles # 3.017 GHz + 79,445,505,152 instructions # 3.07 insn per cycle + 8.590406292 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4857) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.707062e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.710365e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.710365e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.695684e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.699049e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.699049e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.430067 sec - 12,714,024,789 cycles # 2.868 GHz - 38,548,672,066 instructions # 3.03 insn per cycle - 4.444871259 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:13149) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.447189 sec + 12,656,450,439 cycles # 2.844 GHz + 38,554,825,829 instructions # 3.05 insn per cycle + 4.451478069 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:13161) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.776736e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.794746e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.794746e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.537952e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.556620e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.556620e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.874756 sec - 5,484,082,227 cycles # 2.919 GHz - 13,477,398,777 instructions # 2.46 insn per cycle - 1.889668068 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11224) (512y: 0) (512z: 0) +TOTAL : 1.930375 sec + 5,512,214,802 cycles # 2.850 GHz + 13,486,265,307 instructions # 2.45 insn per cycle + 1.934770358 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.828008e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.851423e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.851423e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.638550e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.660856e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.660856e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.675570 sec - 4,865,852,507 cycles # 2.899 GHz - 12,134,094,396 instructions # 2.49 insn per cycle - 1.690145290 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10130) (512y: 79) (512z: 0) +TOTAL : 1.711054 sec + 4,872,445,248 cycles # 2.842 GHz + 12,141,983,198 instructions # 2.49 insn per cycle + 1.715434660 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.679831e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.694420e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.694420e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.406789e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.420159e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.420159e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.141625 sec - 4,133,118,102 cycles # 1.926 GHz - 6,332,211,076 instructions # 1.53 insn per cycle - 2.156472073 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1778) (512y: 93) (512z: 9358) +TOTAL : 2.223975 sec + 4,144,217,356 cycles # 1.862 GHz + 6,340,578,545 instructions # 1.53 insn per cycle + 2.228285470 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1802) (512y: 93) (512z: 9358) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index 4d1d31ffb1..46f37c0a90 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_13:20:53 +DATE: 2023-11-03_19:10:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.475287e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.502927e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.505387e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.484364e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.507714e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.509764e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.518126 sec - 2,276,729,172 cycles # 3.041 GHz - 3,488,397,186 instructions # 1.53 insn per cycle - 0.821168706 seconds time elapsed +TOTAL : 0.519968 sec + 2,216,873,411 cycles # 2.952 GHz + 3,459,675,597 instructions # 1.56 insn per cycle + 0.809738739 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.127315e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.161363e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.162789e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.134555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.161246e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.162402e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.029051 sec - 10,093,082,805 cycles # 3.065 GHz - 22,762,372,486 instructions # 2.26 insn per cycle - 3.352739234 seconds time elapsed +TOTAL : 3.016486 sec + 9,822,814,204 cycles # 3.004 GHz + 22,339,986,571 instructions # 2.27 insn per cycle + 3.325238208 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.946525e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.947456e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.947456e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.909809e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.910727e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910727e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.432240 sec - 25,964,717,090 cycles # 3.078 GHz - 79,447,901,186 instructions # 3.06 insn per cycle - 8.438341900 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4491) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.597381 sec + 25,939,435,501 cycles # 3.017 GHz + 79,457,351,519 instructions # 3.06 insn per cycle + 8.601657625 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 4504) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.776995e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.780282e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.780282e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.664829e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.668218e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.668218e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.348375 sec - 12,634,604,343 cycles # 2.903 GHz - 38,518,123,106 instructions # 3.05 insn per cycle - 4.360783870 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:12916) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.484461 sec + 12,651,418,370 cycles # 2.819 GHz + 38,525,727,884 instructions # 3.05 insn per cycle + 4.488762135 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:12928) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.631631e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.648449e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.648449e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.385701e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.404187e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.404187e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.906965 sec - 5,551,258,215 cycles # 2.907 GHz - 13,604,309,042 instructions # 2.45 insn per cycle - 1.921266748 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11309) (512y: 0) (512z: 0) +TOTAL : 1.965077 sec + 5,557,225,506 cycles # 2.823 GHz + 13,610,780,927 instructions # 2.45 insn per cycle + 1.969439061 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.805918e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.828218e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.828218e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 9.328216e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.349743e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.349743e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.678299 sec - 4,900,060,132 cycles # 2.913 GHz - 12,267,575,637 instructions # 2.50 insn per cycle - 1.690581680 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10119) (512y: 239) (512z: 0) +TOTAL : 1.767465 sec + 4,920,931,185 cycles # 2.779 GHz + 12,278,542,674 instructions # 2.50 insn per cycle + 1.771926617 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.542959e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.556798e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.556798e+03 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.389874e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.403004e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.403004e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.180419 sec - 4,152,247,379 cycles # 1.902 GHz - 6,439,813,764 instructions # 1.55 insn per cycle - 2.195336855 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1603) (512y: 191) (512z: 9356) +TOTAL : 2.228912 sec + 4,146,930,402 cycles # 1.858 GHz + 6,446,453,346 instructions # 1.55 insn per cycle + 2.233245374 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1627) (512y: 191) (512z: 9356) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index e2653e70e0..2048a9698e 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_13:23:13 +DATE: 2023-11-03_19:12:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.066555e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.066940e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.067040e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.071850e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.072225e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.072335e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.448849 sec - 8,442,927,493 cycles # 3.084 GHz - 18,742,679,289 instructions # 2.22 insn per cycle - 2.849862735 seconds time elapsed +TOTAL : 2.421447 sec + 8,245,731,454 cycles # 3.012 GHz + 18,688,279,165 instructions # 2.27 insn per cycle + 2.797097094 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.254183e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.256391e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.256597e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.261920e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.263777e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.264034e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.999010 sec - 13,287,409,997 cycles # 3.074 GHz - 27,420,293,358 instructions # 2.06 insn per cycle - 4.379911015 seconds time elapsed +TOTAL : 3.993277 sec + 12,924,149,664 cycles # 2.993 GHz + 29,920,520,122 instructions # 2.32 insn per cycle + 4.373104302 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.063391e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.063614e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.063614e+01 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.414546e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.414780e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.414780e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.548783 sec - 18,780,752,615 cycles # 2.869 GHz - 53,910,020,226 instructions # 2.87 insn per cycle - 6.554617751 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32434) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.278557 sec + 18,784,400,880 cycles # 2.990 GHz + 53,915,743,321 instructions # 2.87 insn per cycle + 6.282578284 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.680442e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.680533e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.680533e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.622225e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.622313e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.622313e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.146585 sec - 9,799,648,783 cycles # 3.113 GHz - 27,088,050,418 instructions # 2.76 insn per cycle - 3.159402614 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96429) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.260349 sec + 9,843,353,366 cycles # 3.016 GHz + 27,093,120,012 instructions # 2.75 insn per cycle + 3.264542212 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.639391e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.639978e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.639978e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.543297e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.543763e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.543763e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.456416 sec - 4,224,739,188 cycles # 2.899 GHz - 9,555,401,933 instructions # 2.26 insn per cycle - 1.469570493 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84372) (512y: 0) (512z: 0) +TOTAL : 1.494911 sec + 4,247,565,583 cycles # 2.835 GHz + 9,561,660,282 instructions # 2.25 insn per cycle + 1.498994646 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.135680e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.136237e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.136237e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.041064e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.041630e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.041630e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.282040 sec - 3,715,294,067 cycles # 2.898 GHz - 8,479,241,206 instructions # 2.28 insn per cycle - 1.291522613 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79967) (512y: 91) (512z: 0) +TOTAL : 1.312043 sec + 3,711,873,932 cycles # 2.822 GHz + 8,485,580,977 instructions # 2.29 insn per cycle + 1.316064551 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.777236e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.777800e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.777800e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.655846e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.656376e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.656376e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.404458 sec - 2,686,476,600 cycles # 1.914 GHz - 4,267,694,416 instructions # 1.59 insn per cycle - 1.415354808 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2260) (512y: 105) (512z:79105) +TOTAL : 1.450066 sec + 2,692,078,825 cycles # 1.852 GHz + 4,273,245,565 instructions # 1.59 insn per cycle + 1.454158841 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 025b178cf6..fbbae31086 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_14:03:14 +DATE: 2023-11-03_19:38:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.065666e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.066634e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.066634e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.071334e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.072304e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.072304e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.361861 sec - 8,209,630,411 cycles # 3.065 GHz - 18,786,762,328 instructions # 2.29 insn per cycle - 2.737224579 seconds time elapsed +TOTAL : 2.376547 sec + 8,066,576,997 cycles # 2.992 GHz + 17,224,378,863 instructions # 2.14 insn per cycle + 2.753340167 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.225131e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.256858e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.256858e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.219956e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.252584e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.252584e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.981554 sec - 13,248,550,244 cycles # 3.081 GHz - 30,399,077,328 instructions # 2.29 insn per cycle - 4.359768682 seconds time elapsed +TOTAL : 3.983566 sec + 12,755,700,095 cycles # 2.969 GHz + 26,780,853,821 instructions # 2.10 insn per cycle + 4.362214203 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,15 +103,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.471249e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.471484e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.471484e+01 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.520548e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.520796e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.520796e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.236715 sec - 18,875,634,034 cycles # 3.025 GHz - 53,908,664,810 instructions # 2.86 insn per cycle - 6.240512438 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32434) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.203566 sec + 18,829,459,475 cycles # 3.034 GHz + 53,915,868,697 instructions # 2.86 insn per cycle + 6.207586404 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,15 +131,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.670534e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.670629e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.670629e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.632618e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.632708e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.632708e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.163057 sec - 9,804,224,210 cycles # 3.097 GHz - 27,086,274,445 instructions # 2.76 insn per cycle - 3.166923044 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96429) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.239187 sec + 9,805,468,555 cycles # 3.024 GHz + 27,094,086,958 instructions # 2.76 insn per cycle + 3.243245202 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,15 +159,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.592879e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.593315e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.593315e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.541893e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.542348e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.542348e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.471724 sec - 4,258,563,758 cycles # 2.887 GHz - 9,555,052,696 instructions # 2.24 insn per cycle - 1.475635999 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84372) (512y: 0) (512z: 0) +TOTAL : 1.496042 sec + 4,247,154,617 cycles # 2.833 GHz + 9,562,315,517 instructions # 2.25 insn per cycle + 1.500165545 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,15 +187,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.109707e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.110252e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.110252e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.062512e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.063083e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.063083e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.286979 sec - 3,729,066,198 cycles # 2.891 GHz - 8,478,886,169 instructions # 2.27 insn per cycle - 1.290608704 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79967) (512y: 91) (512z: 0) +TOTAL : 1.305609 sec + 3,707,362,205 cycles # 2.832 GHz + 8,486,374,508 instructions # 2.29 insn per cycle + 1.309600698 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -211,15 +215,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.737669e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.738260e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.738260e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.623189e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.623772e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.623772e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.415289 sec - 2,702,491,212 cycles # 1.905 GHz - 4,267,005,207 instructions # 1.58 insn per cycle - 1.419327297 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2260) (512y: 105) (512z:79105) +TOTAL : 1.463361 sec + 2,697,367,089 cycles # 1.839 GHz + 4,274,143,132 instructions # 1.58 insn per cycle + 1.467446249 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index d91b7a964f..c51993cada 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_13:24:31 +DATE: 2023-11-03_19:13:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.064830e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.065205e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.065307e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.063023e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.063394e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.063534e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.445677 sec - 8,430,603,165 cycles # 3.077 GHz - 17,728,518,966 instructions # 2.10 insn per cycle - 2.842706377 seconds time elapsed +TOTAL : 2.424921 sec + 8,232,683,158 cycles # 2.990 GHz + 17,655,317,796 instructions # 2.14 insn per cycle + 2.812107310 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.276158e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.278341e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.278585e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.268141e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.269954e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.270195e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.987255 sec - 13,257,486,465 cycles # 3.075 GHz - 30,585,814,740 instructions # 2.31 insn per cycle - 4.367674825 seconds time elapsed +TOTAL : 3.993040 sec + 12,961,046,511 cycles # 3.002 GHz + 29,041,240,897 instructions # 2.24 insn per cycle + 4.374135451 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.272709e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.272950e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.272950e+01 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.423791e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.424026e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.424026e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.386470 sec - 18,959,780,309 cycles # 2.970 GHz - 53,918,897,900 instructions # 2.84 insn per cycle - 6.392426466 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32049) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.281065 sec + 18,737,351,960 cycles # 2.982 GHz + 53,924,990,961 instructions # 2.88 insn per cycle + 6.285160496 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.689998e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.690092e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.690092e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.617244e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.617330e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.617330e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.132253 sec - 9,720,938,501 cycles # 3.102 GHz - 27,083,782,808 instructions # 2.79 insn per cycle - 3.144238123 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96272) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.274940 sec + 9,810,206,221 cycles # 2.993 GHz + 27,090,315,670 instructions # 2.76 insn per cycle + 3.279033724 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.616659e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.617157e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.617157e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.504500e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.504945e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.504945e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.465435 sec - 4,209,972,441 cycles # 2.873 GHz - 9,555,330,418 instructions # 2.27 insn per cycle - 1.479902065 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84460) (512y: 0) (512z: 0) +TOTAL : 1.511756 sec + 4,249,692,377 cycles # 2.805 GHz + 9,561,658,782 instructions # 2.25 insn per cycle + 1.515796071 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.138076e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.138707e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.138707e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.067567e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.068141e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.068141e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.279971 sec - 3,719,590,213 cycles # 2.903 GHz - 8,479,382,445 instructions # 2.28 insn per cycle - 1.294846019 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79990) (512y: 241) (512z: 0) +TOTAL : 1.304248 sec + 3,697,935,435 cycles # 2.828 GHz + 8,485,512,243 instructions # 2.29 insn per cycle + 1.308302011 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.717045e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.717595e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.717595e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.626044e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.626572e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.626572e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.424665 sec - 2,692,562,922 cycles # 1.889 GHz - 4,270,528,609 instructions # 1.59 insn per cycle - 1.435537142 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2145) (512y: 187) (512z:79110) +TOTAL : 1.462511 sec + 2,704,261,685 cycles # 1.846 GHz + 4,277,565,036 instructions # 1.58 insn per cycle + 1.466688212 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 15538f78ca..0a60ba6d62 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_13:25:48 +DATE: 2023-11-03_19:14:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.767911e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.768744e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.768980e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.757584e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.758488e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.758845e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.688375 sec - 5,951,848,603 cycles # 3.071 GHz - 11,790,908,447 instructions # 1.98 insn per cycle - 2.046537894 seconds time elapsed +TOTAL : 1.659896 sec + 5,702,631,198 cycles # 2.947 GHz + 11,810,983,379 instructions # 2.07 insn per cycle + 1.991424837 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.379848e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.380728e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.380818e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.332515e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.333177e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.333265e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.906118 sec - 6,749,188,338 cycles # 3.084 GHz - 13,265,647,719 instructions # 1.97 insn per cycle - 2.247715707 seconds time elapsed +TOTAL : 1.929687 sec + 6,546,483,377 cycles # 2.952 GHz + 14,155,312,120 instructions # 2.16 insn per cycle + 2.273547514 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.985771e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.986041e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.986041e+01 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.817807e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.818080e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.818080e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.877729 sec - 17,890,232,313 cycles # 3.044 GHz - 53,582,796,261 instructions # 3.00 insn per cycle - 5.884006985 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20194) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.991502 sec + 17,897,297,418 cycles # 2.986 GHz + 53,590,305,749 instructions # 2.99 insn per cycle + 5.995609214 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.576937e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.577363e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.577363e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.535145e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.535592e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.535592e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.480853 sec - 4,574,156,529 cycles # 3.087 GHz - 13,756,406,040 instructions # 3.01 insn per cycle - 1.495727074 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96974) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.499015 sec + 4,559,682,745 cycles # 3.035 GHz + 13,762,791,022 instructions # 3.02 insn per cycle + 1.503172123 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.282438e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.284225e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.284225e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.101340e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.103065e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.103065e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.728895 sec - 2,121,768,847 cycles # 2.905 GHz - 4,810,848,757 instructions # 2.27 insn per cycle - 0.742146050 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84886) (512y: 0) (512z: 0) +TOTAL : 0.748885 sec + 2,136,693,329 cycles # 2.841 GHz + 4,817,082,222 instructions # 2.25 insn per cycle + 0.752876610 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.310636e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.312960e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.312960e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.112158e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.114365e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.114365e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.640831 sec - 1,856,094,515 cycles # 2.896 GHz - 4,267,918,984 instructions # 2.30 insn per cycle - 0.656015079 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80586) (512y: 46) (512z: 0) +TOTAL : 0.656308 sec + 1,869,942,366 cycles # 2.835 GHz + 4,274,318,244 instructions # 2.29 insn per cycle + 0.660301551 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.495650e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.497949e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.497949e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.296564e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.298817e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.298817e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.709649 sec - 1,349,486,163 cycles # 1.899 GHz - 2,152,592,889 instructions # 1.60 insn per cycle - 0.722089483 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2854) (512y: 49) (512z:79298) +TOTAL : 0.728798 sec + 1,352,736,555 cycles # 1.847 GHz + 2,158,877,197 instructions # 1.60 insn per cycle + 0.732817833 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 7a872855a6..17034b30a2 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_14:04:31 +DATE: 2023-11-03_19:39:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.809501e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.811290e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.811290e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.806522e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.808414e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.808414e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.592651 sec - 5,720,568,208 cycles # 3.068 GHz - 12,197,897,724 instructions # 2.13 insn per cycle - 1.923051082 seconds time elapsed +TOTAL : 1.595844 sec + 5,598,060,641 cycles # 2.994 GHz + 11,899,085,664 instructions # 2.13 insn per cycle + 1.927316991 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.321289e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.334233e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.334233e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.306726e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.320071e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.320071e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.869715 sec - 6,568,388,726 cycles # 3.060 GHz - 14,401,992,310 instructions # 2.19 insn per cycle - 2.203701933 seconds time elapsed +TOTAL : 1.896426 sec + 6,482,998,335 cycles # 2.990 GHz + 13,087,346,923 instructions # 2.02 insn per cycle + 2.228516012 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,15 +103,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.727810e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.728089e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.728089e+01 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.982697e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.982966e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.982966e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.057945 sec - 17,910,983,389 cycles # 2.955 GHz - 53,583,204,369 instructions # 2.99 insn per cycle - 6.061887031 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20194) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.882902 sec + 17,886,003,642 cycles # 3.039 GHz + 53,589,820,489 instructions # 3.00 insn per cycle + 5.886864227 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,15 +131,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.402700e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.403121e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.403121e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.517559e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.518006e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.518006e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.554122 sec - 4,576,472,109 cycles # 2.938 GHz - 13,756,199,358 instructions # 3.01 insn per cycle - 1.558315371 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96974) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.508545 sec + 4,560,262,414 cycles # 3.016 GHz + 13,763,353,615 instructions # 3.02 insn per cycle + 1.512732617 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,15 +159,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.189371e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.191147e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.191147e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.047943e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.049624e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.049624e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.736358 sec - 2,124,251,465 cycles # 2.873 GHz - 4,810,297,342 instructions # 2.26 insn per cycle - 0.740176225 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84886) (512y: 0) (512z: 0) +TOTAL : 0.755133 sec + 2,153,006,129 cycles # 2.839 GHz + 4,818,213,561 instructions # 2.24 insn per cycle + 0.759225829 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,15 +187,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.312133e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.314635e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.314635e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.134004e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.136209e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.136209e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.637308 sec - 1,855,922,863 cycles # 2.897 GHz - 4,267,475,480 instructions # 2.30 insn per cycle - 0.641166139 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80586) (512y: 46) (512z: 0) +TOTAL : 0.654556 sec + 1,870,329,136 cycles # 2.842 GHz + 4,274,869,931 instructions # 2.29 insn per cycle + 0.658687365 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -211,15 +215,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.529576e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.531852e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.531852e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.265196e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.267580e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.267580e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.703798 sec - 1,344,909,722 cycles # 1.902 GHz - 2,152,113,875 instructions # 1.60 insn per cycle - 0.707777505 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2854) (512y: 49) (512z:79298) +TOTAL : 0.732395 sec + 1,354,970,411 cycles # 1.842 GHz + 2,159,667,135 instructions # 1.59 insn per cycle + 0.736399157 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 4090b5123c..9247dc6a21 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_13:26:45 +DATE: 2023-11-03_19:15:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.762192e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.763017e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.763276e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.757824e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.758656e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.758919e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.686323 sec - 5,942,489,542 cycles # 3.065 GHz - 11,726,530,507 instructions # 1.97 insn per cycle - 2.047701806 seconds time elapsed +TOTAL : 1.658123 sec + 5,805,953,943 cycles # 3.008 GHz + 12,018,291,784 instructions # 2.07 insn per cycle + 1.988767308 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.338330e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.339105e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.339234e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.327280e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.327957e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.328041e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.912323 sec - 6,739,286,561 cycles # 3.079 GHz - 13,579,863,067 instructions # 2.02 insn per cycle - 2.245726170 seconds time elapsed +TOTAL : 1.929152 sec + 6,666,976,802 cycles # 3.013 GHz + 13,831,721,664 instructions # 2.07 insn per cycle + 2.269150647 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.113073e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.113355e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.113355e+01 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.798758e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.799028e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.799028e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.799115 sec - 17,812,397,434 cycles # 3.072 GHz - 53,573,245,900 instructions # 3.01 insn per cycle - 5.805588787 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:20193) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.002749 sec + 17,897,748,334 cycles # 2.981 GHz + 53,583,210,251 instructions # 2.99 insn per cycle + 6.006727820 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:20206) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.587545e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.588072e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.588072e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.533102e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.533527e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.533527e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.475125 sec - 4,554,309,272 cycles # 3.084 GHz - 13,749,743,854 instructions # 3.02 insn per cycle - 1.487334628 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:96594) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.498905 sec + 4,550,573,846 cycles # 3.029 GHz + 13,756,139,320 instructions # 3.02 insn per cycle + 1.503009468 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.245882e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.247655e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.247655e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.049905e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.051589e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.051589e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.734261 sec - 2,124,964,575 cycles # 2.896 GHz - 4,812,728,309 instructions # 2.26 insn per cycle - 0.749171337 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85341) (512y: 0) (512z: 0) +TOTAL : 0.753863 sec + 2,147,980,052 cycles # 2.837 GHz + 4,819,413,658 instructions # 2.24 insn per cycle + 0.757858909 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.153742e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.155987e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.155987e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 8.121398e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.123528e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.123528e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.653340 sec - 1,875,053,062 cycles # 2.865 GHz - 4,269,884,463 instructions # 2.28 insn per cycle - 0.663528756 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:81051) (512y: 26) (512z: 0) +TOTAL : 0.655569 sec + 1,875,337,702 cycles # 2.847 GHz + 4,276,013,202 instructions # 2.28 insn per cycle + 0.659452126 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.574090e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.576360e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.576360e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.258028e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.260328e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.260328e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.703514 sec - 1,352,429,724 cycles # 1.923 GHz - 2,159,038,912 instructions # 1.60 insn per cycle - 0.718100362 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3451) (512y: 34) (512z:79492) +TOTAL : 0.732438 sec + 1,358,895,231 cycles # 1.851 GHz + 2,165,631,438 instructions # 1.59 insn per cycle + 0.736476884 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3475) (512y: 34) (512z:79492) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index b8deba13b8..67db6760e6 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_13:27:42 +DATE: 2023-11-03_19:16:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.692075e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.692589e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.692738e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.697393e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.698008e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.698206e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.169362 sec - 7,653,818,833 cycles # 3.074 GHz - 16,944,539,564 instructions # 2.21 insn per cycle - 2.552070102 seconds time elapsed +TOTAL : 2.168263 sec + 7,466,161,453 cycles # 3.002 GHz + 16,782,968,221 instructions # 2.25 insn per cycle + 2.544374597 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.115093e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.115408e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.115436e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.111494e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.111753e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111788e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.394126 sec - 11,511,876,452 cycles # 3.097 GHz - 26,536,909,196 instructions # 2.31 insn per cycle - 3.774250196 seconds time elapsed +TOTAL : 3.403934 sec + 11,261,999,951 cycles # 3.015 GHz + 23,279,217,600 instructions # 2.07 insn per cycle + 3.795199307 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.974923e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.975145e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.975145e+01 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.891205e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.891420e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.891420e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.621112 sec - 19,074,935,811 cycles # 2.880 GHz - 54,145,373,257 instructions # 2.84 insn per cycle - 6.624983367 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32053) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.695697 sec + 19,121,802,644 cycles # 2.855 GHz + 54,152,938,154 instructions # 2.83 insn per cycle + 6.699723618 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32066) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.658232e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.658328e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.658328e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.589938e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.590022e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.590022e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.185966 sec - 9,298,086,311 cycles # 2.915 GHz - 26,151,974,165 instructions # 2.81 insn per cycle - 3.193226772 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:95993) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.327144 sec + 9,411,187,085 cycles # 2.826 GHz + 26,159,441,613 instructions # 2.78 insn per cycle + 3.331341639 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:96005) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.761899e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.762352e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.762352e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.556465e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.556911e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.556911e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.405505 sec - 4,083,897,105 cycles # 2.898 GHz - 9,220,939,721 instructions # 2.26 insn per cycle - 1.413149403 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84137) (512y: 0) (512z: 0) +TOTAL : 1.488421 sec + 4,038,495,427 cycles # 2.707 GHz + 9,228,280,089 instructions # 2.29 insn per cycle + 1.492543554 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.349536e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.350160e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.350160e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.276116e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.276827e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.276827e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.215980 sec - 3,534,568,745 cycles # 2.898 GHz - 8,168,436,140 instructions # 2.31 insn per cycle - 1.225628391 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79820) (512y: 79) (512z: 0) +TOTAL : 1.240270 sec + 3,525,917,357 cycles # 2.835 GHz + 8,175,363,577 instructions # 2.32 insn per cycle + 1.244573424 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.882490e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.883154e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.883154e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.671636e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.672174e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.672174e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.362278 sec - 2,610,656,560 cycles # 1.911 GHz - 4,147,890,063 instructions # 1.59 insn per cycle - 1.372539515 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2021) (512y: 93) (512z:78760) +TOTAL : 1.443903 sec + 2,654,961,238 cycles # 1.834 GHz + 4,155,116,507 instructions # 1.57 insn per cycle + 1.448186385 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2045) (512y: 93) (512z:78760) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 60f387590d..ba876e5994 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_13:28:57 +DATE: 2023-11-03_19:17:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.683617e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.684135e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.684388e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.679011e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.679665e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.679866e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.172775 sec - 7,425,569,558 cycles # 2.989 GHz - 15,063,628,728 instructions # 2.03 insn per cycle - 2.549783467 seconds time elapsed +TOTAL : 2.173579 sec + 7,474,410,637 cycles # 3.001 GHz + 15,946,585,145 instructions # 2.13 insn per cycle + 2.550103231 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.112713e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.113034e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.113072e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.109202e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.109461e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.109492e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.397376 sec - 11,478,639,054 cycles # 3.085 GHz - 26,660,080,251 instructions # 2.32 insn per cycle - 3.777534329 seconds time elapsed +TOTAL : 3.402138 sec + 11,227,553,919 cycles # 3.005 GHz + 23,286,904,291 instructions # 2.07 insn per cycle + 3.792137186 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.976554e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.976766e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.976766e+01 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 7.862068e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.862272e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.862272e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.619732 sec - 19,255,909,015 cycles # 2.908 GHz - 54,146,217,870 instructions # 2.81 insn per cycle - 6.623630619 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:32230) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.723543 sec + 19,074,467,052 cycles # 2.836 GHz + 54,156,087,092 instructions # 2.84 insn per cycle + 6.727488337 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:32243) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.632685e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.632772e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.632772e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.568667e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.568765e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.568765e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.236276 sec - 9,400,746,844 cycles # 2.902 GHz - 26,071,648,675 instructions # 2.77 insn per cycle - 3.240211773 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:95887) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.375716 sec + 9,382,313,393 cycles # 2.776 GHz + 26,079,058,590 instructions # 2.78 insn per cycle + 3.379999018 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:95899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.805141e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.805613e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.805613e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.662540e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.663002e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.663002e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.389557 sec - 4,052,419,327 cycles # 2.909 GHz - 9,205,965,348 instructions # 2.27 insn per cycle - 1.393548917 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83758) (512y: 0) (512z: 0) +TOTAL : 1.448110 sec + 4,074,555,185 cycles # 2.807 GHz + 9,213,769,276 instructions # 2.26 insn per cycle + 1.452285529 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.398621e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.399244e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.399244e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 4.250454e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.251202e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.251202e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.202525 sec - 3,498,191,184 cycles # 2.901 GHz - 8,160,684,568 instructions # 2.33 insn per cycle - 1.206410282 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79349) (512y: 229) (512z: 0) +TOTAL : 1.248074 sec + 3,536,570,557 cycles # 2.826 GHz + 8,168,521,757 instructions # 2.31 insn per cycle + 1.252256213 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.825276e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.825930e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.825930e+02 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.691090e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.691677e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.691677e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.382476 sec - 2,614,942,717 cycles # 1.887 GHz - 4,146,069,189 instructions # 1.59 insn per cycle - 1.386689646 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1468) (512y: 175) (512z:78776) +TOTAL : 1.437256 sec + 2,622,132,529 cycles # 1.820 GHz + 4,153,851,791 instructions # 1.58 insn per cycle + 1.441375266 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1492) (512y: 175) (512z:78776) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index a2058aa09b..32c5e2345e 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_13:21:30 +DATE: 2023-11-03_19:11:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.677221e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.325089e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.707683e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.931878e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.341004e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.663503e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.444363 sec - 2,018,559,464 cycles # 3.017 GHz - 2,824,765,054 instructions # 1.40 insn per cycle - 0.739050843 seconds time elapsed +TOTAL : 0.446607 sec + 1,970,164,515 cycles # 2.938 GHz + 2,759,248,123 instructions # 1.40 insn per cycle + 0.729204009 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.257167e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.087055e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.516826e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.710415e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.163714e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.497427e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.526934 sec - 2,296,852,180 cycles # 3.002 GHz - 3,272,393,278 instructions # 1.42 insn per cycle - 0.822447233 seconds time elapsed +TOTAL : 0.523022 sec + 2,217,601,456 cycles # 2.938 GHz + 3,205,519,009 instructions # 1.45 insn per cycle + 0.813078242 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.098598e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.121368e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.121368e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.073669e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.096220e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.096220e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.509656 sec - 4,704,275,482 cycles # 3.109 GHz - 13,460,137,213 instructions # 2.86 insn per cycle - 1.516168697 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 847) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.548155 sec + 4,698,700,649 cycles # 3.029 GHz + 13,467,797,998 instructions # 2.87 insn per cycle + 1.552304744 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.982331e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.057802e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.057802e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.948763e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.021816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.021816e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.844686 sec - 2,623,720,298 cycles # 3.091 GHz - 7,549,133,073 instructions # 2.88 insn per cycle - 0.858775496 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3083) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.862036 sec + 2,624,478,574 cycles # 3.032 GHz + 7,556,486,050 instructions # 2.88 insn per cycle + 0.866308924 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.403395e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.625866e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.625866e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.306326e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.524533e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.524533e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.499871 sec - 1,470,509,842 cycles # 2.919 GHz - 3,115,984,743 instructions # 2.12 insn per cycle - 0.512575147 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2899) (512y: 0) (512z: 0) +TOTAL : 0.517734 sec + 1,480,526,951 cycles # 2.839 GHz + 3,123,082,416 instructions # 2.11 insn per cycle + 0.522085763 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.518653e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.770602e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.770602e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.669407e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.933881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.933881e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.484813 sec - 1,332,633,375 cycles # 2.727 GHz - 2,977,844,779 instructions # 2.23 insn per cycle - 0.499587088 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) +TOTAL : 0.468132 sec + 1,341,729,382 cycles # 2.844 GHz + 2,984,537,487 instructions # 2.22 insn per cycle + 0.472335074 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.570932e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.696607e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.696607e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.279474e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.384367e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.384367e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.657932 sec - 1,319,186,002 cycles # 1.993 GHz - 1,949,358,587 instructions # 1.48 insn per cycle - 0.672712115 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) +TOTAL : 0.743758 sec + 1,327,382,690 cycles # 1.776 GHz + 1,956,119,028 instructions # 1.47 insn per cycle + 0.747985259 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 910b708052..83cbc116b3 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_14:01:33 +DATE: 2023-11-03_19:36:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.664243e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.145947e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.145947e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.568026e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.132079e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.132079e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.469985 sec - 2,040,349,232 cycles # 3.003 GHz - 3,004,500,181 instructions # 1.47 insn per cycle - 0.738746623 seconds time elapsed +TOTAL : 0.473711 sec + 2,006,451,769 cycles # 2.929 GHz + 2,970,353,925 instructions # 1.48 insn per cycle + 0.742629859 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.301893e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.269698e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.269698e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.250433e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.283042e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.283042e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.746650 sec - 3,012,108,544 cycles # 3.028 GHz - 4,567,300,950 instructions # 1.52 insn per cycle - 1.052502726 seconds time elapsed +TOTAL : 0.748674 sec + 3,002,657,574 cycles # 2.966 GHz + 4,543,695,427 instructions # 1.51 insn per cycle + 1.069550305 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,15 +103,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.084702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.107837e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.107837e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.069178e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.091931e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.091931e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.535237 sec - 4,735,135,324 cycles # 3.077 GHz - 13,467,171,096 instructions # 2.84 insn per cycle - 1.539513895 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 847) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.560726 sec + 4,731,718,585 cycles # 3.025 GHz + 13,472,168,375 instructions # 2.85 insn per cycle + 1.565141837 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,15 +131,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.980959e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.055590e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.055590e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.899999e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.973174e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.973174e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.851887 sec - 2,650,539,837 cycles # 3.099 GHz - 7,597,895,175 instructions # 2.87 insn per cycle - 0.856130482 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3083) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.892296 sec + 2,670,244,018 cycles # 2.980 GHz + 7,605,526,435 instructions # 2.85 insn per cycle + 0.896907337 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,15 +159,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.369342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.589036e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.589036e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.091835e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.296236e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.296236e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.510869 sec - 1,501,262,258 cycles # 2.918 GHz - 3,163,821,980 instructions # 2.11 insn per cycle - 0.515082973 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2899) (512y: 0) (512z: 0) +TOTAL : 0.561077 sec + 1,524,432,631 cycles # 2.698 GHz + 3,172,781,548 instructions # 2.08 insn per cycle + 0.565642937 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,15 +187,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.718302e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.984568e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.984568e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.608228e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.871141e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.871141e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.466300 sec - 1,367,345,452 cycles # 2.909 GHz - 3,028,055,122 instructions # 2.21 insn per cycle - 0.470678204 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) +TOTAL : 0.483758 sec + 1,382,209,807 cycles # 2.835 GHz + 3,035,256,040 instructions # 2.20 insn per cycle + 0.488244630 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -211,15 +215,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.563950e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.686880e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.686880e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.425183e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.544675e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.544675e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.666406 sec - 1,352,029,392 cycles # 2.018 GHz - 1,988,030,302 instructions # 1.47 insn per cycle - 0.670887553 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) +TOTAL : 0.707575 sec + 1,368,070,277 cycles # 1.923 GHz + 1,995,483,449 instructions # 1.46 insn per cycle + 0.712159059 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 1d4e53af3a..5c16312148 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_13:21:48 +DATE: 2023-11-03_19:11:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.650966e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.233304e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.593275e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.898292e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.236740e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.548470e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.442305 sec - 1,990,347,518 cycles # 3.016 GHz - 2,813,021,989 instructions # 1.41 insn per cycle - 0.727558891 seconds time elapsed +TOTAL : 0.444696 sec + 1,938,726,844 cycles # 2.937 GHz + 2,756,323,630 instructions # 1.42 insn per cycle + 0.718363875 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.224080e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.963239e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.366729e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.682843e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.082328e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.409380e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.525687 sec - 2,309,475,640 cycles # 3.027 GHz - 3,304,899,924 instructions # 1.43 insn per cycle - 0.820449609 seconds time elapsed +TOTAL : 0.522202 sec + 2,220,530,283 cycles # 2.941 GHz + 3,184,953,404 instructions # 1.43 insn per cycle + 0.811776517 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.099687e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.122691e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.122691e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.070337e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092872e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092872e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.508019 sec - 4,696,287,050 cycles # 3.107 GHz - 13,454,191,338 instructions # 2.86 insn per cycle - 1.514176499 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 836) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.552829 sec + 4,705,329,544 cycles # 3.023 GHz + 13,461,758,666 instructions # 2.86 insn per cycle + 1.556952692 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.005583e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.080670e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.080670e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.948045e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.021952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.021952e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.835044 sec - 2,617,103,039 cycles # 3.120 GHz - 7,548,121,019 instructions # 2.88 insn per cycle - 0.847366392 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3076) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.862207 sec + 2,624,178,818 cycles # 3.031 GHz + 7,555,487,904 instructions # 2.88 insn per cycle + 0.866510467 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3088) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.376685e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.599716e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.599716e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.292100e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.512278e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.512278e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.503836 sec - 1,474,536,973 cycles # 2.904 GHz - 3,114,257,750 instructions # 2.11 insn per cycle - 0.515034850 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2882) (512y: 0) (512z: 0) +TOTAL : 0.519779 sec + 1,479,324,919 cycles # 2.825 GHz + 3,121,432,800 instructions # 2.11 insn per cycle + 0.524166869 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.729039e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.996518e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.996518e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.586783e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.851292e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.851292e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.457525 sec - 1,334,713,858 cycles # 2.891 GHz - 2,975,109,033 instructions # 2.23 insn per cycle - 0.470624542 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2646) (512y: 104) (512z: 0) +TOTAL : 0.479341 sec + 1,345,156,968 cycles # 2.785 GHz + 2,982,279,143 instructions # 2.22 insn per cycle + 0.483569808 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.554823e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.679178e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.679178e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.481639e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.600263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.600263e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.661512 sec - 1,318,722,950 cycles # 1.981 GHz - 1,947,473,030 instructions # 1.48 insn per cycle - 0.674217858 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1324) (512y: 106) (512z: 2173) +TOTAL : 0.683909 sec + 1,326,826,217 cycles # 1.930 GHz + 1,955,120,469 instructions # 1.47 insn per cycle + 0.688253496 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 18fde06e89..59e9dbfb13 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_13:22:05 +DATE: 2023-11-03_19:11:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.368545e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.195532e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.338037e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.904199e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.231536e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.359887e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.439653 sec - 1,963,073,365 cycles # 3.010 GHz - 2,782,162,803 instructions # 1.42 insn per cycle - 0.724656523 seconds time elapsed +TOTAL : 0.438419 sec + 1,915,720,301 cycles # 2.940 GHz + 2,722,845,778 instructions # 1.42 insn per cycle + 0.708695201 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.233506e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.807053e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.953262e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.256707e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.834983e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.952518e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.474831 sec - 2,104,200,925 cycles # 3.006 GHz - 2,984,209,799 instructions # 1.42 insn per cycle - 0.758654985 seconds time elapsed +TOTAL : 0.473385 sec + 2,068,832,196 cycles # 2.955 GHz + 2,965,580,704 instructions # 1.43 insn per cycle + 0.757067346 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.156001e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.181933e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.181933e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.135878e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.161149e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.161149e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.434457 sec - 4,448,159,929 cycles # 3.093 GHz - 13,045,617,942 instructions # 2.93 insn per cycle - 1.440492141 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.462461 sec + 4,454,737,328 cycles # 3.039 GHz + 13,053,159,453 instructions # 2.93 insn per cycle + 1.466494148 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.114863e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.313630e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.313630e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.046237e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.238088e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.238088e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.542348 sec - 1,695,415,281 cycles # 3.104 GHz - 4,508,974,040 instructions # 2.66 insn per cycle - 0.553837720 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.557296 sec + 1,699,998,155 cycles # 3.031 GHz + 4,515,681,552 instructions # 2.66 insn per cycle + 0.561435544 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.108059e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.866586e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.866586e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.648399e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.355867e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.355867e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.285252 sec - 845,650,618 cycles # 2.925 GHz - 1,891,849,456 instructions # 2.24 insn per cycle - 0.297356991 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3473) (512y: 0) (512z: 0) +TOTAL : 0.311054 sec + 851,131,460 cycles # 2.704 GHz + 1,899,263,660 instructions # 2.23 insn per cycle + 0.315235937 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.553055e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.452732e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.452732e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.243995e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.098185e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.098185e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.266707 sec - 792,995,173 cycles # 2.930 GHz - 1,814,974,440 instructions # 2.29 insn per cycle - 0.278822798 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3311) (512y: 22) (512z: 0) +TOTAL : 0.282792 sec + 800,211,416 cycles # 2.794 GHz + 1,822,370,089 instructions # 2.28 insn per cycle + 0.286974618 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -190,10 +194,10 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 29,780,103 cycles # 2.640 GHz - 42,180,426 instructions # 1.42 insn per cycle - 0.018151168 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1944) (512y: 32) (512z: 2383) + 29,217,754 cycles # 2.652 GHz + 42,284,295 instructions # 1.45 insn per cycle + 0.011406114 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index ca70adfc3f..f15afb12c1 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_14:01:50 +DATE: 2023-11-03_19:37:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.690473e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.051445e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.051445e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.572083e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.023629e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.023629e+07 ) sec^-1 MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 -TOTAL : 0.447544 sec - 1,995,094,462 cycles # 3.028 GHz - 2,944,796,314 instructions # 1.48 insn per cycle - 0.716219472 seconds time elapsed +TOTAL : 0.454387 sec + 1,955,352,187 cycles # 2.938 GHz + 2,863,812,902 instructions # 1.46 insn per cycle + 0.722319097 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.203183e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.581207e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.581207e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.087118e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.599283e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.599283e+07 ) sec^-1 MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 -TOTAL : 0.615303 sec - 2,561,439,318 cycles # 3.029 GHz - 3,834,138,920 instructions # 1.50 insn per cycle - 0.903022085 seconds time elapsed +TOTAL : 0.623566 sec + 2,498,674,729 cycles # 2.923 GHz + 3,766,117,574 instructions # 1.51 insn per cycle + 0.913465239 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -103,15 +103,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.159807e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.185784e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.185784e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.124937e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.150391e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.150391e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.432391 sec - 4,462,709,360 cycles # 3.108 GHz - 13,049,905,565 instructions # 2.92 insn per cycle - 1.436417435 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.480709 sec + 4,471,348,915 cycles # 3.013 GHz + 13,056,806,498 instructions # 2.92 insn per cycle + 1.485019275 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -130,15 +131,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.079770e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.276056e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.276056e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.015036e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.208624e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.208624e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.552836 sec - 1,714,551,062 cycles # 3.082 GHz - 4,556,234,091 instructions # 2.66 insn per cycle - 0.556965782 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.567631 sec + 1,721,622,943 cycles # 3.014 GHz + 4,563,283,810 instructions # 2.65 insn per cycle + 0.571796628 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -157,15 +159,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.072468e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.823460e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.823460e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.904492e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.650265e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.650265e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.290519 sec - 863,549,988 cycles # 2.936 GHz - 1,927,930,259 instructions # 2.23 insn per cycle - 0.294678952 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3473) (512y: 0) (512z: 0) +TOTAL : 0.302253 sec + 872,846,100 cycles # 2.852 GHz + 1,935,401,156 instructions # 2.22 insn per cycle + 0.306655862 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -184,15 +187,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.461286e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.336369e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.336369e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.271441e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.120717e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.120717e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.274464 sec - 811,968,530 cycles # 2.921 GHz - 1,851,193,986 instructions # 2.28 insn per cycle - 0.278710334 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3311) (512y: 22) (512z: 0) +TOTAL : 0.285638 sec + 819,147,203 cycles # 2.831 GHz + 1,858,340,668 instructions # 2.27 insn per cycle + 0.289825539 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -207,10 +211,10 @@ OK (relative difference <= 5E-3) runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) - 37,821,224 cycles # 2.670 GHz - 49,845,872 instructions # 1.32 insn per cycle - 0.014789496 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1944) (512y: 32) (512z: 2383) + 37,779,421 cycles # 2.664 GHz + 50,267,131 instructions # 1.33 insn per cycle + 0.014729622 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index acc7a9c5e0..c8e32c45f6 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_13:22:22 +DATE: 2023-11-03_19:12:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.321585e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.194004e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.323765e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.816263e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.233557e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.356584e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.436518 sec - 1,946,354,316 cycles # 2.996 GHz - 2,660,085,025 instructions # 1.37 insn per cycle - 0.724399364 seconds time elapsed +TOTAL : 0.439029 sec + 1,906,384,387 cycles # 2.932 GHz + 2,668,630,925 instructions # 1.40 insn per cycle + 0.709025104 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.186399e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.779449e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.920598e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.165457e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.788318e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.899924e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.472348 sec - 2,134,571,311 cycles # 3.027 GHz - 2,989,315,923 instructions # 1.40 insn per cycle - 0.762610695 seconds time elapsed +TOTAL : 0.475153 sec + 2,060,825,458 cycles # 2.945 GHz + 2,959,751,148 instructions # 1.44 insn per cycle + 0.758667305 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.141093e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.167011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.167011e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.129555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.154905e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.154905e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.453735 sec - 4,446,221,332 cycles # 3.051 GHz - 13,027,008,274 instructions # 2.93 insn per cycle - 1.460116009 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 714) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.470613 sec + 4,452,780,841 cycles # 3.021 GHz + 13,033,295,085 instructions # 2.93 insn per cycle + 1.474743963 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.141538e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.343407e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.343407e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.000043e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.190804e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.190804e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.537937 sec - 1,685,741,136 cycles # 3.112 GHz - 4,505,064,132 instructions # 2.67 insn per cycle - 0.549338887 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3577) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.566289 sec + 1,691,331,084 cycles # 2.968 GHz + 4,511,809,710 instructions # 2.67 insn per cycle + 0.570477990 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.112669e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.888447e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.888447e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 5.392978e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.034440e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.034440e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.284460 sec - 844,730,834 cycles # 2.930 GHz - 1,888,937,622 instructions # 2.24 insn per cycle - 0.297127714 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3443) (512y: 0) (512z: 0) +TOTAL : 0.325119 sec + 853,124,200 cycles # 2.596 GHz + 1,896,337,755 instructions # 2.22 insn per cycle + 0.329328797 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.532166e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.412841e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.412841e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 6.399192e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.280649e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.280649e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.267501 sec - 794,418,594 cycles # 2.927 GHz - 1,810,991,128 instructions # 2.28 insn per cycle - 0.281613083 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3274) (512y: 22) (512z: 0) +TOTAL : 0.275864 sec + 799,266,525 cycles # 2.860 GHz + 1,818,357,527 instructions # 2.28 insn per cycle + 0.279975539 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -190,10 +194,10 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 29,224,854 cycles # 2.706 GHz - 41,299,770 instructions # 1.41 insn per cycle - 0.022176168 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1907) (512y: 32) (512z: 2383) + 28,695,242 cycles # 2.686 GHz + 41,682,313 instructions # 1.45 insn per cycle + 0.011083970 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1932) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 90cb55044b..2f090614c3 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_13:22:38 +DATE: 2023-11-03_19:12:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.665308e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.271925e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.627429e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.924011e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.312316e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.652376e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.447989 sec - 1,974,780,319 cycles # 2.966 GHz - 2,819,670,600 instructions # 1.43 insn per cycle - 0.744502455 seconds time elapsed +TOTAL : 0.444193 sec + 1,982,708,723 cycles # 2.960 GHz + 2,773,326,834 instructions # 1.40 insn per cycle + 0.727594315 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.254771e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.104270e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.522137e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.716781e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.189044e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.525460e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.528549 sec - 2,299,640,622 cycles # 3.005 GHz - 3,256,375,279 instructions # 1.42 insn per cycle - 0.822763207 seconds time elapsed +TOTAL : 0.521362 sec + 2,209,841,644 cycles # 2.939 GHz + 3,173,284,555 instructions # 1.44 insn per cycle + 0.811280771 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.084952e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.107624e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.107624e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.069544e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.093797e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.093797e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.528964 sec - 4,726,820,201 cycles # 3.085 GHz - 13,462,870,049 instructions # 2.85 insn per cycle - 1.535237667 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.554251 sec + 4,735,824,731 cycles # 3.041 GHz + 13,470,683,397 instructions # 2.84 insn per cycle + 1.558385201 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.000087e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.074643e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.074643e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.965218e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.040121e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.040121e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.837218 sec - 2,595,361,513 cycles # 3.086 GHz - 7,381,862,026 instructions # 2.84 insn per cycle - 0.850044679 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3061) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.855114 sec + 2,601,303,673 cycles # 3.029 GHz + 7,389,579,625 instructions # 2.84 insn per cycle + 0.859411839 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.425089e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.650468e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.650468e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.103178e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.304731e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.304731e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.496959 sec - 1,461,826,401 cycles # 2.918 GHz - 3,051,511,691 instructions # 2.09 insn per cycle - 0.512707584 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2995) (512y: 0) (512z: 0) +TOTAL : 0.550950 sec + 1,470,989,933 cycles # 2.653 GHz + 3,058,765,662 instructions # 2.08 insn per cycle + 0.555184249 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.878630e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.164221e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.164221e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.774277e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.060098e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.060098e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.440640 sec - 1,298,008,305 cycles # 2.919 GHz - 2,925,673,041 instructions # 2.25 insn per cycle - 0.455740476 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) +TOTAL : 0.455765 sec + 1,309,522,407 cycles # 2.852 GHz + 2,933,428,757 instructions # 2.24 insn per cycle + 0.459981977 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.305194e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.410658e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.410658e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.411920e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.526016e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.526016e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.732088 sec - 1,359,034,153 cycles # 1.846 GHz - 1,965,275,198 instructions # 1.45 insn per cycle - 0.747587285 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) +TOTAL : 0.703353 sec + 1,366,582,014 cycles # 1.933 GHz + 1,972,774,215 instructions # 1.44 insn per cycle + 0.707707323 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index dd8caa2559..f9fb6155f7 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,7 +1,7 @@ export CUDACPP_RUNTIME_ENABLEFPE=on Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -OMPFLAGS= +OMPFLAGS=-fopenmp AVX=512y FPTYPE=d HELINL=0 @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_13:22:56 +DATE: 2023-11-03_19:12:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.626602e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.144887e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.496577e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.886874e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.228157e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.568514e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.446727 sec - 1,947,084,355 cycles # 2.897 GHz - 2,696,453,178 instructions # 1.38 insn per cycle - 0.736802652 seconds time elapsed +TOTAL : 0.444068 sec + 1,946,521,853 cycles # 2.951 GHz + 2,755,422,178 instructions # 1.42 insn per cycle + 0.717280231 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.216372e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.927003e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.335966e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.675020e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.027076e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.349457e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.528575 sec - 2,276,047,250 cycles # 2.969 GHz - 3,176,726,798 instructions # 1.40 insn per cycle - 0.824470891 seconds time elapsed +TOTAL : 0.523198 sec + 2,222,274,900 cycles # 2.946 GHz + 3,198,191,753 instructions # 1.44 insn per cycle + 0.813003520 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -90,15 +90,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.073436e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.095874e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.095874e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.069395e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.091866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.091866e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.544866 sec - 4,724,528,983 cycles # 3.051 GHz - 13,449,130,633 instructions # 2.85 insn per cycle - 1.551244322 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 814) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 1.554132 sec + 4,733,166,680 cycles # 3.039 GHz + 13,456,716,984 instructions # 2.84 insn per cycle + 1.558278315 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -116,15 +117,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.997011e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.072543e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.072543e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 1.963106e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.038064e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.038064e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.838344 sec - 2,598,952,948 cycles # 3.085 GHz - 7,385,835,720 instructions # 2.84 insn per cycle - 0.849552154 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 3050) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.856001 sec + 2,603,447,344 cycles # 3.028 GHz + 7,393,362,148 instructions # 2.84 insn per cycle + 0.860294166 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -142,15 +144,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.391429e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.612881e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.612881e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.354162e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.573385e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.573385e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.501350 sec - 1,458,751,807 cycles # 2.887 GHz - 3,051,487,989 instructions # 2.09 insn per cycle - 0.512435660 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2972) (512y: 0) (512z: 0) +TOTAL : 0.509733 sec + 1,467,381,346 cycles # 2.859 GHz + 3,058,521,485 instructions # 2.08 insn per cycle + 0.513844239 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -168,15 +171,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.868802e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.154760e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.154760e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 3.783084e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.065773e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.065773e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.442043 sec - 1,299,923,563 cycles # 2.915 GHz - 2,926,182,502 instructions # 2.25 insn per cycle - 0.452442721 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2751) (512y: 110) (512z: 0) +TOTAL : 0.454796 sec + 1,307,019,802 cycles # 2.851 GHz + 2,934,565,738 instructions # 2.25 insn per cycle + 0.459066978 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. @@ -194,15 +198,16 @@ Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.457990e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.573459e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.573459e+05 ) sec^-1 +OMP threads / `nproc --all` = 1 / 4 +EvtsPerSec[Rmb+ME] (23) = ( 2.408065e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.519741e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.519741e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.687239 sec - 1,356,674,081 cycles # 1.962 GHz - 1,964,861,606 instructions # 1.45 insn per cycle - 0.698495126 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1652) (512y: 114) (512z: 2171) +TOTAL : 0.704354 sec + 1,368,218,437 cycles # 1.933 GHz + 1,972,609,636 instructions # 1.44 insn per cycle + 0.708886358 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe [ PASSED ] 6 tests. From f53166d8d584eb1caa577f0ed3a8598d1c267910 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 3 Nov 2023 20:54:39 +0100 Subject: [PATCH 119/119] [oct23av] ** COMPLETE OCT23AV ** rerun 18 tmad tests (while rerunning also tput with FPEs enabled), usual ggttggg failures, no change in functionality or performance (*NB OpenMP is now again enabled by default*) (or maybe ~1-2% slower on average? anyway, keep OpenMP on as in the past) --- .../log_eemumu_mad_d_inl0_hrd0.txt | 134 ++++++++--------- .../log_eemumu_mad_f_inl0_hrd0.txt | 132 ++++++++--------- .../log_eemumu_mad_m_inl0_hrd0.txt | 134 ++++++++--------- .../log_ggtt_mad_d_inl0_hrd0.txt | 138 +++++++++--------- .../log_ggtt_mad_f_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggtt_mad_m_inl0_hrd0.txt | 134 ++++++++--------- .../log_ggttg_mad_d_inl0_hrd0.txt | 134 ++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 136 ++++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 132 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 138 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 130 ++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 138 +++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 6 +- .../log_ggttggg_mad_f_inl0_hrd0.txt | 10 +- .../log_ggttggg_mad_m_inl0_hrd0.txt | 14 +- .../log_gqttq_mad_d_inl0_hrd0.txt | 132 ++++++++--------- .../log_gqttq_mad_f_inl0_hrd0.txt | 134 ++++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 134 ++++++++--------- 18 files changed, 1023 insertions(+), 1023 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 77fe2ed306..bcf56600ba 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,11 +15,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -28,12 +27,13 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2023-11-03_14:21:18 +DATE: 2023-11-03_19:52:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6322s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6243s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6373s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6287s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.49E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1903s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1817s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1807s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1728s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4441s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3520s - [COUNTERS] Fortran MEs ( 1 ) : 0.0921s for 90112 events => throughput is 9.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4217s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3352s + [COUNTERS] Fortran MEs ( 1 ) : 0.0865s for 90112 events => throughput is 1.04E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.2004s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1937s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1919s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1852s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4331s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3605s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0726s for 90112 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4231s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3509s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0722s for 90112 events => throughput is 1.25E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.235602e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.217666e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.269361e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.241611e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1840s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1800s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0040s for 8192 events => throughput is 2.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1865s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1826s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.10E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3815s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3383s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0433s for 90112 events => throughput is 2.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3926s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3476s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0451s for 90112 events => throughput is 2.00E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.980623e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.991197e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.041045e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.990100e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1815s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1864s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1832s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,8 +319,8 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3402s + [COUNTERS] PROGRAM TOTAL : 0.3800s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3465s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0335s for 90112 events => throughput is 2.69E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.654499e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.603611e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.790648e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.718712e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1809s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1781s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.85E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.93E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3678s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3365s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0314s for 90112 events => throughput is 2.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3774s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3449s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 90112 events => throughput is 2.78E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.813865e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.713996e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.819295e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.775269e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1890s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1855s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.35E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3813s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3425s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0388s for 90112 events => throughput is 2.32E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3894s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3496s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0398s for 90112 events => throughput is 2.26E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.182323e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.190424e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.275304e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.183626e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5938s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5933s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.67E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5997s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5992s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.68E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7577s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7528s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.86E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7696s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7647s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.85E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.222938e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.173877e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.926125e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.893710e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.694324e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716630e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.376536e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.387595e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.707496e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.739579e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.981585e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.929113e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.719109e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.693635e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.125260e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.118370e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 2e7a5e8f3d..ff3c2ae8d4 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2023-11-03_14:21:35 +DATE: 2023-11-03_19:52:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,8 +59,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6333s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6253s + [COUNTERS] PROGRAM TOTAL : 0.6418s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6338s [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1819s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1741s - [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1742s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.67E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4167s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3314s - [COUNTERS] Fortran MEs ( 1 ) : 0.0854s for 90112 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4264s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3383s + [COUNTERS] Fortran MEs ( 1 ) : 0.0882s for 90112 events => throughput is 1.02E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166087172673] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1816s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1909s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1845s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.27E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501907796603360E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4143s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0696s for 90112 events => throughput is 1.29E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4197s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3492s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0705s for 90112 events => throughput is 1.28E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.253827e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.260485e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.302410e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.240620e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165570339780] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1802s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1775s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1824s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.18E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905322826635E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3780s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3501s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 90112 events => throughput is 3.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3742s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3464s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 90112 events => throughput is 3.24E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.148616e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.182676e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.259827e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.343050e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1832s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1914s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1892s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.72E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3667s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0249s for 90112 events => throughput is 3.62E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3767s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3513s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 90112 events => throughput is 3.55E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.440927e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.496883e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.573217e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.660390e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1842s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1867s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1844s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.57E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3681s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3435s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 90112 events => throughput is 3.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3763s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3515s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 90112 events => throughput is 3.63E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.600067e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.562187e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.754576e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.601892e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166440400542] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1873s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1851s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1875s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1852s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.57E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,8 +471,8 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501908978565555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3804s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3549s + [COUNTERS] PROGRAM TOTAL : 0.3774s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3519s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0255s for 90112 events => throughput is 3.53E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.483435e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.223682e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.661856e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.583359e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166823487174] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5910s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5905s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.69E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5998s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5993s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.73E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501910542849674E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7804s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7758s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.94E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7713s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7665s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.87E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.351128e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.583398e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.874991e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.881767e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.920407e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.997979e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.035596e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.043514e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.750747e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.954785e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.223793e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.219791e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.397928e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.299152e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.418216e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.462264e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 988708e401..7741c53b46 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,12 +15,12 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:21:52 +DATE: 2023-11-03_19:52:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6250s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6171s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6387s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6300s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.48E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1782s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1702s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1817s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1737s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4157s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3308s - [COUNTERS] Fortran MEs ( 1 ) : 0.0849s for 90112 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4238s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3373s + [COUNTERS] Fortran MEs ( 1 ) : 0.0865s for 90112 events => throughput is 1.04E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211734] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1873s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1808s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.25E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1883s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3443s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0727s for 90112 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4433s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3677s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0757s for 90112 events => throughput is 1.19E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.191633e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.177056e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.168097e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.187091e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1824s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1786s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1964s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1922s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.95E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3831s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3402s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0429s for 90112 events => throughput is 2.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4148s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3686s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0462s for 90112 events => throughput is 1.95E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.058337e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.000126e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.169456e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.127276e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1780s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1824s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.62E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3726s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3393s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0334s for 90112 events => throughput is 2.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3807s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3465s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0342s for 90112 events => throughput is 2.63E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.575244e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.610232e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.799275e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.645393e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1819s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1790s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.90E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1900s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.68E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3743s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3425s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 90112 events => throughput is 2.83E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3808s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3484s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0325s for 90112 events => throughput is 2.78E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.740478e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.740865e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.970797e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.846547e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1830s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1795s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1860s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1826s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.46E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3836s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3461s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0375s for 90112 events => throughput is 2.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3896s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3515s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0381s for 90112 events => throughput is 2.36E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.298518e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.142678e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.467182e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.406082e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169066587257] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5999s + [COUNTERS] PROGRAM TOTAL : 0.5998s [COUNTERS] Fortran Overhead ( 0 ) : 0.5993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.61E+07 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919911173610E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7600s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7552s + [COUNTERS] PROGRAM TOTAL : 0.7721s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7672s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.85E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.215424e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.181977e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.900873e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.926668e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.713004e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.726329e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.350320e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.399920e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.721122e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.694690e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.891982e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.877527e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.744274e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.708142e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.113133e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.118945e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 1c17450a40..1c30dae812 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:22:09 +DATE: 2023-11-03_19:53:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3552s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3147s - [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3686s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3264s + [COUNTERS] Fortran MEs ( 1 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3067s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2663s - [COUNTERS] Fortran MEs ( 1 ) : 0.0403s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3158s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2736s + [COUNTERS] Fortran MEs ( 1 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6567s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2097s - [COUNTERS] Fortran MEs ( 1 ) : 0.4470s for 90112 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6988s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2402s + [COUNTERS] Fortran MEs ( 1 ) : 0.4586s for 90112 events => throughput is 1.97E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3421s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3052s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0370s for 8192 events => throughput is 2.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3139s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0377s for 8192 events => throughput is 2.17E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775372] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6711s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2663s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4048s for 90112 events => throughput is 2.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7148s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2977s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4171s for 90112 events => throughput is 2.16E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.240136e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143201e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.246301e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.178576e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3123s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2910s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3233s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3012s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4758s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2401s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2357s for 90112 events => throughput is 3.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5228s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2789s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2439s for 90112 events => throughput is 3.69E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.814458e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.615837e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.799058e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.718240e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2946s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2817s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0129s for 8192 events => throughput is 6.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3023s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2891s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 8192 events => throughput is 6.21E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3939s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2474s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1465s for 90112 events => throughput is 6.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4266s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2769s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1496s for 90112 events => throughput is 6.02E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.003903e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.870487e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.235696e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.072305e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2986s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3006s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0119s for 8192 events => throughput is 6.89E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4648s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3245s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1403s for 90112 events => throughput is 6.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4026s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2691s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1335s for 90112 events => throughput is 6.75E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.813758e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.610205e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.710973e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.622254e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3143s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2953s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0189s for 8192 events => throughput is 4.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3191s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0194s for 8192 events => throughput is 4.23E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.5031s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2219s for 90112 events => throughput is 4.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5054s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2826s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2228s for 90112 events => throughput is 4.04E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.088501e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.911690e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.031349e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.045481e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6955s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6949s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7037s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7032s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6545s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6481s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6871s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6805s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.37E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.084897e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.043596e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.690035e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.671088e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.998858e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.005777e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.074784e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.074802e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.994007e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.019573e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.152245e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.147636e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.996363e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.014036e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.002988e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.011683e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 58819f13cf..7edcebceb9 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2023-11-03_14:22:35 +DATE: 2023-11-03_19:53:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3596s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3190s - [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3667s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3245s + [COUNTERS] Fortran MEs ( 1 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2661s - [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3249s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2811s + [COUNTERS] Fortran MEs ( 1 ) : 0.0438s for 8192 events => throughput is 1.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6511s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2036s - [COUNTERS] Fortran MEs ( 1 ) : 0.4475s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7464s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2748s + [COUNTERS] Fortran MEs ( 1 ) : 0.4716s for 90112 events => throughput is 1.91E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690706767555099] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3370s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3023s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0346s for 8192 events => throughput is 2.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3467s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3115s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0352s for 8192 events => throughput is 2.33E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782605295497] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6412s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2618s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3794s for 90112 events => throughput is 2.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6806s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2908s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3898s for 90112 events => throughput is 2.31E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.365750e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.279168e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.346162e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.299428e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690702885183541] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3004s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2856s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0148s for 8192 events => throughput is 5.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3091s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2943s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0148s for 8192 events => throughput is 5.53E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223778858016772] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3997s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2392s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1605s for 90112 events => throughput is 5.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4417s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2764s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1652s for 90112 events => throughput is 5.45E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.378211e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.234141e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.483452e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.323283e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2833s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2757s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2903s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2825s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3185s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2326s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0859s for 90112 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3699s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2805s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0894s for 90112 events => throughput is 1.01E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.035352e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.010480e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.030223e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.003913e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2832s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2762s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2913s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2842s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0071s for 8192 events => throughput is 1.15E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3092s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2299s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0793s for 90112 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3439s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2627s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0812s for 90112 events => throughput is 1.11E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.114954e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.090791e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111064e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092579e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698914467276] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2891s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2791s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 8192 events => throughput is 8.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2963s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2859s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0104s for 8192 events => throughput is 7.89E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223780273983500] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3496s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2394s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1103s for 90112 events => throughput is 8.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3867s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2714s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1153s for 90112 events => throughput is 7.81E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.588175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.366599e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.594492e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.487198e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690703397697980] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6957s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6951s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.34E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7024s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7018s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.52E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223786763175951] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6552s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6498s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.65E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6918s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6861s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 90112 events => throughput is 1.60E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.278872e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.243778e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.958730e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.844714e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.818332e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.837802e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.760656e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.769339e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.821736e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.775138e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.859017e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.863954e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.371288e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.397746e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.429668e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.449606e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index cb957582e7..30dac17633 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -3,9 +3,9 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:23:01 +DATE: 2023-11-03_19:53:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3676s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3246s - [COUNTERS] Fortran MEs ( 1 ) : 0.0430s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3561s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3153s + [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3058s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2654s - [COUNTERS] Fortran MEs ( 1 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3103s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2697s + [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6499s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2045s - [COUNTERS] Fortran MEs ( 1 ) : 0.4454s for 90112 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6795s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2275s + [COUNTERS] Fortran MEs ( 1 ) : 0.4521s for 90112 events => throughput is 1.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3442s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3072s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0370s for 8192 events => throughput is 2.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3522s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3138s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0384s for 8192 events => throughput is 2.13E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6618s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2548s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4070s for 90112 events => throughput is 2.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7156s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2968s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4188s for 90112 events => throughput is 2.15E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.187012e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.113023e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.190577e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.146418e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3104s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2894s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3199s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2989s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.90E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4727s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2305s for 90112 events => throughput is 3.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5211s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2871s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2339s for 90112 events => throughput is 3.85E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.829884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.687467e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.857284e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.724259e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2928s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2801s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0127s for 8192 events => throughput is 6.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3043s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2913s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0129s for 8192 events => throughput is 6.33E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3718s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2318s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1400s for 90112 events => throughput is 6.44E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4192s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2746s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1446s for 90112 events => throughput is 6.23E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.247774e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.051901e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.307806e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.195854e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2916s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2804s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2995s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2875s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0120s for 8192 events => throughput is 6.80E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3666s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2401s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1265s for 90112 events => throughput is 7.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3959s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2664s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1296s for 90112 events => throughput is 6.96E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.866736e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.842430e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.056278e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.007264e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3059s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2875s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0184s for 8192 events => throughput is 4.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3146s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2960s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 8192 events => throughput is 4.40E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4513s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2472s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2041s for 90112 events => throughput is 4.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5378s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3142s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2236s for 90112 events => throughput is 4.03E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.146707e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.894022e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.375036e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.946552e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.6924s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6918s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7067s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7061s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.46E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782303744791] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6484s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6929s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6862s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.34E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.080978e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.049753e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.652267e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.613651e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.998863e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.019403e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.056186e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.060699e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.990687e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.995962e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.134241e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.142982e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.001170e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.026315e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.023565e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.022885e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 5fb683acd0..d992721ecf 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=avx2 + make USEBUILDDIR=1 AVX=512y +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' @@ -17,13 +17,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:23:26 +DATE: 2023-11-03_19:54:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5401s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2255s - [COUNTERS] Fortran MEs ( 1 ) : 0.3146s for 8192 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5463s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2264s + [COUNTERS] Fortran MEs ( 1 ) : 0.3199s for 8192 events => throughput is 2.56E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5343s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2195s - [COUNTERS] Fortran MEs ( 1 ) : 0.3148s for 8192 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5423s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2222s + [COUNTERS] Fortran MEs ( 1 ) : 0.3201s for 8192 events => throughput is 2.56E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8199s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3740s - [COUNTERS] Fortran MEs ( 1 ) : 3.4459s for 90112 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9241s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4090s + [COUNTERS] Fortran MEs ( 1 ) : 3.5151s for 90112 events => throughput is 2.56E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470791E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8536s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5320s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3216s for 8192 events => throughput is 2.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8783s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5509s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3274s for 8192 events => throughput is 2.50E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2956s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6974s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5983s for 90112 events => throughput is 2.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3304s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7125s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6180s for 90112 events => throughput is 2.49E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.604218e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.563855e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.603357e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.539633e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470777E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5511s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3836s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1674s for 8192 events => throughput is 4.89E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5609s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3903s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1705s for 8192 events => throughput is 4.80E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.3574s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5195s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8379s for 90112 events => throughput is 4.90E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4794s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5811s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8984s for 90112 events => throughput is 4.75E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.971222e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.820475e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.969548e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.874297e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3843s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3003s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0840s for 8192 events => throughput is 9.76E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3928s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0854s for 8192 events => throughput is 9.59E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3465s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4370s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9096s for 90112 events => throughput is 9.91E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4294s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4857s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9437s for 90112 events => throughput is 9.55E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.000702e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.717012e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.988605e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.756457e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3645s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2903s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0743s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3746s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2982s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2510s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4337s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8173s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3655s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5058s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8597s for 90112 events => throughput is 1.05E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.115226e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.094100e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.100593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.081248e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4242s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3213s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1029s for 8192 events => throughput is 7.96E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4370s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3297s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1073s for 8192 events => throughput is 7.64E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6010s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4694s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1317s for 90112 events => throughput is 7.96E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6869s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5079s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1790s for 90112 events => throughput is 7.64E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.959592e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.730653e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.012539e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.578143e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6558s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6799s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6745s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8107s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7878s + [COUNTERS] PROGRAM TOTAL : 1.8667s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8438s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0229s for 90112 events => throughput is 3.94E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.612339e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.611230e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.865541e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.333105e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.627230e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.644038e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.237387e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.240451e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.658424e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.653799e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.247853e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.251657e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.609214e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.651458e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.745287e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.754830e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 6fbc1b8a0f..a339973536 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none - +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -16,10 +16,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:24:07 +DATE: 2023-11-03_19:55:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5354s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2217s - [COUNTERS] Fortran MEs ( 1 ) : 0.3138s for 8192 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5498s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2259s + [COUNTERS] Fortran MEs ( 1 ) : 0.3239s for 8192 events => throughput is 2.53E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5335s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2184s - [COUNTERS] Fortran MEs ( 1 ) : 0.3151s for 8192 events => throughput is 2.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5475s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2238s + [COUNTERS] Fortran MEs ( 1 ) : 0.3236s for 8192 events => throughput is 2.53E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.9254s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4370s - [COUNTERS] Fortran MEs ( 1 ) : 3.4884s for 90112 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9843s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4284s + [COUNTERS] Fortran MEs ( 1 ) : 3.5559s for 90112 events => throughput is 2.53E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349765248158E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8356s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5223s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3132s for 8192 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8606s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5403s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3204s for 8192 events => throughput is 2.56E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310860767768514E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.1026s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6641s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4385s for 90112 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2449s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7120s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5329s for 90112 events => throughput is 2.55E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.697383e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.612374e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.698783e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.564881e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196334183509370E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4030s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3093s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0937s for 8192 events => throughput is 8.74E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4339s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3327s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1012s for 8192 events => throughput is 8.10E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847547651041E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.5866s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5178s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0689s for 90112 events => throughput is 8.43E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5445s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4937s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0508s for 90112 events => throughput is 8.58E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.859401e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.676181e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.866780e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.776153e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3050s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2617s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3149s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2698s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0450s for 8192 events => throughput is 1.82E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8700s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3958s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4742s for 90112 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9260s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4419s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4841s for 90112 events => throughput is 1.86E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.846803e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.865505e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.911347e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.837629e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.2945s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2557s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0388s for 8192 events => throughput is 2.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3024s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2625s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0399s for 8192 events => throughput is 2.05E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8253s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3994s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4259s for 90112 events => throughput is 2.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8768s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4395s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4373s for 90112 events => throughput is 2.06E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.115953e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.065719e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.160440e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.103855e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196344079460428E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3177s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2677s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0500s for 8192 events => throughput is 1.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3291s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2768s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0523s for 8192 events => throughput is 1.57E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310857804286998E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9549s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4085s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5464s for 90112 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0319s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4573s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5745s for 90112 events => throughput is 1.57E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.606971e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.561181e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.622152e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.560141e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349366365994E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6413s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6404s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6502s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6494s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.50E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310864949473968E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.7943s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7849s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 90112 events => throughput is 9.55E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8485s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8390s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0096s for 90112 events => throughput is 9.41E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.320932e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.292780e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.861129e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.862148e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.664882e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.637111e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.397726e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.443658e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.622996e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.653596e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.534593e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.515346e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.497628e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.504423e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.618938e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.620516e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 49eb2706cd..0d971ecde6 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:24:43 +DATE: 2023-11-03_19:55:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5358s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2227s - [COUNTERS] Fortran MEs ( 1 ) : 0.3131s for 8192 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5559s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2317s + [COUNTERS] Fortran MEs ( 1 ) : 0.3242s for 8192 events => throughput is 2.53E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5387s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2263s - [COUNTERS] Fortran MEs ( 1 ) : 0.3124s for 8192 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5470s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2235s + [COUNTERS] Fortran MEs ( 1 ) : 0.3235s for 8192 events => throughput is 2.53E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.8187s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3784s - [COUNTERS] Fortran MEs ( 1 ) : 3.4403s for 90112 events => throughput is 2.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9714s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4219s + [COUNTERS] Fortran MEs ( 1 ) : 3.5496s for 90112 events => throughput is 2.54E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358763382007E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8638s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5380s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3258s for 8192 events => throughput is 2.51E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8877s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5532s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3345s for 8192 events => throughput is 2.45E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872835011053E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2839s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6818s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6021s for 90112 events => throughput is 2.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.5218s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7614s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7604s for 90112 events => throughput is 2.40E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.559173e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.427313e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.573125e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.496439e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358804670396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5484s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3821s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1663s for 8192 events => throughput is 4.93E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5567s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3892s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1675s for 8192 events => throughput is 4.89E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872836789727E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.3769s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5431s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8338s for 90112 events => throughput is 4.91E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4587s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5767s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8820s for 90112 events => throughput is 4.79E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.754175e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.968795e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.937414e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.959892e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3824s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0831s for 8192 events => throughput is 9.86E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3947s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0862s for 8192 events => throughput is 9.51E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3687s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4491s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9197s for 90112 events => throughput is 9.80E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4507s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4993s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9513s for 90112 events => throughput is 9.47E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.003131e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.685236e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.002381e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.962312e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3754s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2998s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0756s for 8192 events => throughput is 1.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3728s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2978s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2394s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4337s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8056s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2949s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4713s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8237s for 90112 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.143249e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.124486e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.145832e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.126890e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358757578441E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4319s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3251s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1068s for 8192 events => throughput is 7.67E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4452s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3335s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1117s for 8192 events => throughput is 7.34E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872803699391E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6523s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4789s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1734s for 90112 events => throughput is 7.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7230s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5103s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2127s for 90112 events => throughput is 7.43E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.668189e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.441126e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.655247e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.419166e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358102981245E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6518s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6464s + [COUNTERS] PROGRAM TOTAL : 0.6594s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6540s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872068634174E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8128s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7900s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0227s for 90112 events => throughput is 3.96E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8526s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8298s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.94E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.620967e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.626262e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.820585e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.888012e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.642288e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.627419e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.233489e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234131e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.645321e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.606969e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.248124e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.246896e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.649140e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.626608e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.718456e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.728520e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index f9d3c4043e..ba8c60f62e 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:25:24 +DATE: 2023-11-03_19:56:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3440s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2774s - [COUNTERS] Fortran MEs ( 1 ) : 4.0666s for 8192 events => throughput is 2.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4568s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2815s + [COUNTERS] Fortran MEs ( 1 ) : 4.1753s for 8192 events => throughput is 1.96E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3345s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2700s - [COUNTERS] Fortran MEs ( 1 ) : 4.0645s for 8192 events => throughput is 2.02E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5175s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2787s + [COUNTERS] Fortran MEs ( 1 ) : 4.2387s for 8192 events => throughput is 1.93E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 47.0034s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8824s - [COUNTERS] Fortran MEs ( 1 ) : 45.1210s for 90112 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.0120s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9235s + [COUNTERS] Fortran MEs ( 1 ) : 46.0885s for 90112 events => throughput is 1.96E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.6907s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5143s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1763s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.7799s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4663s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3136s for 8192 events => throughput is 1.90E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 52.0839s - [COUNTERS] Fortran Overhead ( 0 ) : 5.9376s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.1463s for 90112 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 53.8857s + [COUNTERS] Fortran Overhead ( 0 ) : 6.1301s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.7557s for 90112 events => throughput is 1.89E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.987220e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953970e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.007452e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.950653e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.6638s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4426s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2211s for 8192 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8228s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5191s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3037s for 8192 events => throughput is 3.56E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 28.7170s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0525s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.6644s for 90112 events => throughput is 3.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.7001s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1956s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.5045s for 90112 events => throughput is 3.53E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.799047e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.686347e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.786733e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.681541e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.1946s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2212s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9733s for 8192 events => throughput is 8.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.2608s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2531s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0077s for 8192 events => throughput is 8.13E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.5555s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8339s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.7216s for 90112 events => throughput is 8.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.8799s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8850s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.9950s for 90112 events => throughput is 8.20E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.126786e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.425637e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.340374e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.448586e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9555s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1027s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8529s for 8192 events => throughput is 9.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.0082s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1311s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8771s for 8192 events => throughput is 9.34E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.0489s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6894s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.3594s for 90112 events => throughput is 9.63E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.4208s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7744s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.6464s for 90112 events => throughput is 9.34E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.899347e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.625406e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.909621e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.599473e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.3829s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3187s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0641s for 8192 events => throughput is 7.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4768s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3764s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1005s for 8192 events => throughput is 7.44E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.5364s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9130s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.6234s for 90112 events => throughput is 7.75E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.3944s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0207s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.3737s for 90112 events => throughput is 7.28E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.702464e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.487218e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.627010e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.501573e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8074s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7751s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0323s for 8192 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8150s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7821s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.49E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7137s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3633s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3505s for 90112 events => throughput is 2.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7813s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4228s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3586s for 90112 events => throughput is 2.51E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.295020e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.281506e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.520040e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.519229e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.113125e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.106281e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.159949e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.149081e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.117693e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.098811e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.156944e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169654e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.118922e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.104970e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.436022e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.438070e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 23b30f1d97..2c58d8399d 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -16,10 +16,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:29:35 +DATE: 2023-11-03_20:00:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3585s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2729s - [COUNTERS] Fortran MEs ( 1 ) : 4.0855s for 8192 events => throughput is 2.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4730s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2806s + [COUNTERS] Fortran MEs ( 1 ) : 4.1924s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3604s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2691s - [COUNTERS] Fortran MEs ( 1 ) : 4.0913s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4924s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2814s + [COUNTERS] Fortran MEs ( 1 ) : 4.2110s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 46.9679s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8721s - [COUNTERS] Fortran MEs ( 1 ) : 45.0957s for 90112 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.0870s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9193s + [COUNTERS] Fortran MEs ( 1 ) : 46.1676s for 90112 events => throughput is 1.95E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396490802749E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.2959s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2064s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.0894s for 8192 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.5167s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3246s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1920s for 8192 events => throughput is 1.95E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774602344628E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 50.7315s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8205s - [COUNTERS] CudaCpp MEs ( 2 ) : 44.9111s for 90112 events => throughput is 2.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.0969s + [COUNTERS] Fortran Overhead ( 0 ) : 5.9741s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.1228s for 90112 events => throughput is 1.95E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.086896e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.036738e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.085135e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.035901e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277389126121586E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4679s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3644s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1035s for 8192 events => throughput is 7.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5366s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3964s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1402s for 8192 events => throughput is 7.18E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803771887543366E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 15.1623s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9574s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.2048s for 90112 events => throughput is 7.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.6834s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0490s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.6344s for 90112 events => throughput is 7.13E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.521286e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.385848e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.529131e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.336063e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.3322s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7991s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5331s for 8192 events => throughput is 1.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2706s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7693s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5013s for 8192 events => throughput is 1.63E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.7860s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3594s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4267s for 90112 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.9611s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4052s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5558s for 90112 events => throughput is 1.62E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.733277e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.671775e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.713538e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.674155e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1199s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6914s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4285s for 8192 events => throughput is 1.91E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1460s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7047s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4413s for 8192 events => throughput is 1.86E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.0143s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2786s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.7356s for 90112 events => throughput is 1.90E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.1917s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3395s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.8523s for 90112 events => throughput is 1.86E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.943790e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.912795e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.957674e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.909696e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396394633404E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.3342s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8067s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5275s for 8192 events => throughput is 1.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3662s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8206s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5456s for 8192 events => throughput is 1.50E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803777741065333E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 8.1909s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3908s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.8001s for 90112 events => throughput is 1.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.4389s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4516s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.9874s for 90112 events => throughput is 1.51E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.490025e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.534307e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.444637e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.484518e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277400478491260E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7835s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7620s + [COUNTERS] PROGRAM TOTAL : 0.7763s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7549s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.82E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803779990154892E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.5744s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3391s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2353s for 90112 events => throughput is 3.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6207s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3864s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2342s for 90112 events => throughput is 3.85E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.593797e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.582914e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.939181e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.939400e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.492205e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.483584e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.665669e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.662803e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.495594e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.489429e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.659964e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.631443e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.476367e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.463590e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.521263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.531910e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 099daaf875..7032d72896 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,12 +1,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 - -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:32:52 +DATE: 2023-11-03_20:04:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.3416s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2711s - [COUNTERS] Fortran MEs ( 1 ) : 4.0705s for 8192 events => throughput is 2.01E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4626s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2774s + [COUNTERS] Fortran MEs ( 1 ) : 4.1852s for 8192 events => throughput is 1.96E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.3881s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2745s - [COUNTERS] Fortran MEs ( 1 ) : 4.1137s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4427s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2777s + [COUNTERS] Fortran MEs ( 1 ) : 4.1649s for 8192 events => throughput is 1.97E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 46.9881s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8639s - [COUNTERS] Fortran MEs ( 1 ) : 45.1242s for 90112 events => throughput is 2.00E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.3675s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9183s + [COUNTERS] Fortran MEs ( 1 ) : 46.4493s for 90112 events => throughput is 1.94E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277432965013E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.6258s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3913s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2346s for 8192 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 9.0356s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6432s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3924s for 8192 events => throughput is 1.87E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725813026109E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 52.8410s - [COUNTERS] Fortran Overhead ( 0 ) : 6.0507s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.7903s for 90112 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 54.3841s + [COUNTERS] Fortran Overhead ( 0 ) : 6.2075s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.1766s for 90112 events => throughput is 1.87E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.986406e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.891623e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.984071e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.924168e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277430934464E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.7478s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4757s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2721s for 8192 events => throughput is 3.61E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7893s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5036s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2857s for 8192 events => throughput is 3.58E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725816246317E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 28.8117s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0740s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.7377s for 90112 events => throughput is 3.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.4680s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1631s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.3048s for 90112 events => throughput is 3.56E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.744041e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.703810e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.783870e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.713606e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.1797s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2161s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9636s for 8192 events => throughput is 8.50E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.2254s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2372s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9882s for 8192 events => throughput is 8.29E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.3593s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8150s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.5443s for 90112 events => throughput is 8.55E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.9261s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8936s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.0324s for 90112 events => throughput is 8.17E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.773755e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.503062e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.783302e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.519397e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.9449s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0988s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8461s for 8192 events => throughput is 9.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.0128s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1323s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8805s for 8192 events => throughput is 9.30E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.1426s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7153s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4273s for 90112 events => throughput is 9.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.3871s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7750s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.6121s for 90112 events => throughput is 9.37E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.895193e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.683983e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.946582e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.679001e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.3960s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3284s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0677s for 8192 events => throughput is 7.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5013s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3879s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1134s for 8192 events => throughput is 7.36E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 14.7488s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9489s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.7999s for 90112 events => throughput is 7.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.3721s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0357s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.3363s for 90112 events => throughput is 7.30E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.775707e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.423059e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.783797e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.425324e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277293084707E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8018s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7699s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 8192 events => throughput is 2.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8158s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0331s for 8192 events => throughput is 2.48E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725738731039E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7679s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4051s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3627s for 90112 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7756s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4130s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3626s for 90112 events => throughput is 2.49E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.302892e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.294705e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.535657e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.524485e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.112952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.113307e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.154600e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.174133e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.122390e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.119833e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.169657e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.183136e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.108240e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.103258e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.438063e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.436179e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 52d8de89eb..568f545851 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -17,9 +17,9 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:38:30 +DATE: 2023-11-03_20:09:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index d2dfd1e943..e844ee5b79 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,9 +1,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2023-11-03_14:38:33 +DATE: 2023-11-03_20:09:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 5eb0659f4b..43bf5072f2 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:38:36 +DATE: 2023-11-03_20:09:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 405f8e65cf..2a2ae334de 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,8 +15,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:37:04 +DATE: 2023-11-03_20:08:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3063s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2373s - [COUNTERS] Fortran MEs ( 1 ) : 0.0690s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3085s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2380s + [COUNTERS] Fortran MEs ( 1 ) : 0.0705s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2958s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2270s - [COUNTERS] Fortran MEs ( 1 ) : 0.0688s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3042s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2333s + [COUNTERS] Fortran MEs ( 1 ) : 0.0708s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1688s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4067s - [COUNTERS] Fortran MEs ( 1 ) : 0.7620s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2114s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4363s + [COUNTERS] Fortran MEs ( 1 ) : 0.7751s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3839s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3089s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3922s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3158s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3142s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4904s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8238s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3858s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5438s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8420s for 90112 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.103427e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.080426e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.106392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.086485e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3109s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2710s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3230s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2818s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615872] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9020s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4591s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4430s for 90112 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9553s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4982s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4571s for 90112 events => throughput is 1.97E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.011564e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984398e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.005399e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.942353e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2791s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2559s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0232s for 8192 events => throughput is 3.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2839s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2604s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0236s for 8192 events => throughput is 3.47E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7026s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4480s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2545s for 90112 events => throughput is 3.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7384s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4779s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2605s for 90112 events => throughput is 3.46E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.416682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.360936e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.463573e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508331e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2738s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2532s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2789s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2578s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6787s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4482s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2305s for 90112 events => throughput is 3.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7099s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4763s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2336s for 90112 events => throughput is 3.86E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.889416e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.911581e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.892184e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.775740e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2969s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2660s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0310s for 8192 events => throughput is 2.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3043s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2722s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8024s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4622s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3402s for 90112 events => throughput is 2.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8369s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4881s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3488s for 90112 events => throughput is 2.58E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.634842e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.489296e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.658963e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.512748e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6579s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.16E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6694s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6687s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615869] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8560s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8484s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.18E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9131s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9051s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 90112 events => throughput is 1.13E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.570932e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.578046e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.991191e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.918680e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.399212e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.385541e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.526039e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.515910e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.388680e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.366310e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.791775e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.781318e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.378274e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.383694e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.781671e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.778819e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index f862c0da30..76ba714558 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:37:33 +DATE: 2023-11-03_20:08:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3209s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2467s - [COUNTERS] Fortran MEs ( 1 ) : 0.0742s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3082s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2374s + [COUNTERS] Fortran MEs ( 1 ) : 0.0709s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3213s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2463s - [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3089s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2376s + [COUNTERS] Fortran MEs ( 1 ) : 0.0713s for 8192 events => throughput is 1.15E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1588s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4011s - [COUNTERS] Fortran MEs ( 1 ) : 0.7577s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2176s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4416s + [COUNTERS] Fortran MEs ( 1 ) : 0.7760s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050316058770007] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3719s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3016s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0703s for 8192 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3831s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3106s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0725s for 8192 events => throughput is 1.13E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182797520666] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2795s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4986s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7809s for 90112 events => throughput is 1.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3282s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5337s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7945s for 90112 events => throughput is 1.13E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.167125e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.150985e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.163234e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152855e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313133963987] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2819s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 8192 events => throughput is 3.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2893s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2630s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0263s for 8192 events => throughput is 3.11E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179276862181] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7204s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4460s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2744s for 90112 events => throughput is 3.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7627s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4776s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2851s for 90112 events => throughput is 3.16E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.221816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.058447e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.192435e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.117460e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2604s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2472s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2617s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2490s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0127s for 8192 events => throughput is 6.47E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5832s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4492s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1340s for 90112 events => throughput is 6.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6264s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4861s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1403s for 90112 events => throughput is 6.42E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.254203e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.320941e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.371108e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.304004e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2557s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2444s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2557s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0115s for 8192 events => throughput is 7.13E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.5487s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4231s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1256s for 90112 events => throughput is 7.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6213s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4898s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1315s for 90112 events => throughput is 6.85E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.893256e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.800881e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.000656e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.852775e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050317064561834] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2612s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2460s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2707s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2550s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.22E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182143140752] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6093s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4384s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1709s for 90112 events => throughput is 5.27E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6597s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4813s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1784s for 90112 events => throughput is 5.05E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.858026e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.682841e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.045424e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.814031e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050319131407651] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6539s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.59E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6668s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801186038252196] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8472s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8414s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 90112 events => throughput is 1.54E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9031s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8970s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.46E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.787997e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.810157e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.403452e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.442986e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.801683e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.776377e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.703537e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.714442e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.843279e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.784654e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.791364e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.791545e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.382815e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.353442e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.946674e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.984091e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 7a35cd56f1..d9f19e3972 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_14:38:00 +DATE: 2023-11-03_20:09:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3006s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2316s - [COUNTERS] Fortran MEs ( 1 ) : 0.0690s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2365s + [COUNTERS] Fortran MEs ( 1 ) : 0.0711s for 8192 events => throughput is 1.15E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3152s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2421s - [COUNTERS] Fortran MEs ( 1 ) : 0.0730s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3048s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2341s + [COUNTERS] Fortran MEs ( 1 ) : 0.0707s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.1574s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4001s - [COUNTERS] Fortran MEs ( 1 ) : 0.7572s for 90112 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2173s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4390s + [COUNTERS] Fortran MEs ( 1 ) : 0.7783s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657206] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3817s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3059s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0758s for 8192 events => throughput is 1.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3915s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3150s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0766s for 8192 events => throughput is 1.07E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608796] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3116s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4934s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8182s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4080s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5555s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8525s for 90112 events => throughput is 1.06E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.052255e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.026153e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.056069e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.029864e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657201] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3280s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2866s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3183s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2784s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0399s for 8192 events => throughput is 2.05E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608810] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8866s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4293s for 90112 events => throughput is 2.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9529s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5041s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4488s for 90112 events => throughput is 2.01E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.074202e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.013366e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.071823e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.020889e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2788s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2560s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2636s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0235s for 8192 events => throughput is 3.48E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6909s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4404s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2504s for 90112 events => throughput is 3.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7508s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4909s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2599s for 90112 events => throughput is 3.47E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.556578e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.380135e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.556099e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.471740e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2718s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2516s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0202s for 8192 events => throughput is 4.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2817s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2612s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6599s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4355s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2245s for 90112 events => throughput is 4.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7107s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4801s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2306s for 90112 events => throughput is 3.91E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.927162e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.890792e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.001830e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.973788e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2975s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2656s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 8192 events => throughput is 2.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3050s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2711s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 8192 events => throughput is 2.42E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8011s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4557s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3454s for 90112 events => throughput is 2.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8573s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4973s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3600s for 90112 events => throughput is 2.50E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.547378e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.438047e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.523060e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.395865e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333301029693] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6577s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6570s + [COUNTERS] PROGRAM TOTAL : 0.6671s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6664s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182637219935] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8570s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8495s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 90112 events => throughput is 1.19E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8923s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8845s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 90112 events => throughput is 1.16E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.565902e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.584492e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.977935e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.972938e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.387906e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.377134e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.500001e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.496287e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.390935e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.388325e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.764099e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.763560e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.392060e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.382255e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.772015e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.773123e+07 ) sec^-1 TEST COMPLETED